init
This commit is contained in:
@ -0,0 +1,228 @@
|
||||
#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
|
||||
#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
//frees devbuffer if it is not already NULL, and sets devbuffer to NULL
|
||||
//wrapper to cudaFree
|
||||
template<typename T> int cuda_free(T **devptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(*devptr==NULL)
|
||||
{
|
||||
return ret; //devbuffer is already NULL/freed
|
||||
}
|
||||
|
||||
err = cudaFree(*devptr);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -1; //failed to free device pointer
|
||||
*devptr = NULL; // - ? should only happen if I'm trying to double-free something
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
*devptr = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//copies hostbuffer to devbuffer
|
||||
//initializes devbuffer from NULL if devbuffer is NULL
|
||||
//if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
|
||||
template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(N<=0)
|
||||
{
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(hostbuffer==NULL)
|
||||
{
|
||||
ret = -2; //host buffer is NULL
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1)
|
||||
{
|
||||
if(*devbuffer !=NULL)
|
||||
{
|
||||
cuda_free(devbuffer);
|
||||
}
|
||||
}
|
||||
|
||||
if(*devbuffer==NULL)
|
||||
{
|
||||
err = cudaMalloc(devbuffer,sizeof(T)*N);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -3; //failed to allocate
|
||||
*devbuffer = NULL;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -4; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//copies info from devbuffer to hostbuffer
|
||||
//initialzies hostbuffer from NULL if NULL
|
||||
//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing)
|
||||
template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(N<=0)
|
||||
{
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(devbuffer==NULL)
|
||||
{
|
||||
ret = -5; //null dev buffer
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1 && *hostbuffer!=NULL)
|
||||
{
|
||||
delete[] (*hostbuffer); hostbuffer = NULL;
|
||||
}
|
||||
|
||||
if(*hostbuffer==NULL)
|
||||
{
|
||||
*hostbuffer = new(std::nothrow) T[N];
|
||||
if(*hostbuffer==NULL)
|
||||
{
|
||||
ret = -6; //failed to allocate host buffer
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -7; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
|
||||
//initializes devptr from NULL if not already initialized
|
||||
template<typename T> int cuda_copytodevice(T *hostptr, T **devptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
bool overwrite = 1;
|
||||
|
||||
if(hostptr==NULL)
|
||||
{
|
||||
ret = -2; //host buffer is NULL
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1)
|
||||
{
|
||||
if(*devptr !=NULL)
|
||||
{
|
||||
cuda_free(devptr);
|
||||
}
|
||||
}
|
||||
|
||||
if(*devptr==NULL)
|
||||
{
|
||||
err = cudaMalloc(devptr,sizeof(T));
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -3; //failed to allocate
|
||||
*devptr = NULL;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -4; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//wrapper for cudaMemcpy - copies an item or struct (count 1) from device
|
||||
//initializes hostptr from NULL with new if not already initialized
|
||||
template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
bool overwrite = 1;
|
||||
|
||||
if(devptr==NULL)
|
||||
{
|
||||
ret = -5; //null dev buffer
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1 && *hostptr!=NULL)
|
||||
{
|
||||
delete (*hostptr); hostptr = NULL;
|
||||
}
|
||||
|
||||
if(*hostptr==NULL)
|
||||
{
|
||||
*hostptr = new(std::nothrow) T;
|
||||
if(*hostptr==NULL)
|
||||
{
|
||||
ret = -6; //failed to allocate host buffer
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -7; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user