229 lines
4.7 KiB
C++
229 lines
4.7 KiB
C++
#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
|
|
#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
|
|
|
|
namespace amscuda
|
|
{
|
|
|
|
//frees devbuffer if it is not already NULL, and sets devbuffer to NULL
|
|
//wrapper to cudaFree
|
|
template<typename T> int cuda_free(T **devptr)
|
|
{
|
|
int ret = 0;
|
|
cudaError_t err = cudaSuccess;
|
|
|
|
if(*devptr==NULL)
|
|
{
|
|
return ret; //devbuffer is already NULL/freed
|
|
}
|
|
|
|
err = cudaFree(*devptr);
|
|
if(err!=cudaSuccess)
|
|
{
|
|
ret = -1; //failed to free device pointer
|
|
*devptr = NULL; // - ? should only happen if I'm trying to double-free something
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
*devptr = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
//copies hostbuffer to devbuffer
|
|
//initializes devbuffer from NULL if devbuffer is NULL
|
|
//if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
|
|
template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite)
|
|
{
|
|
int ret = 0;
|
|
cudaError_t err = cudaSuccess;
|
|
|
|
if(N<=0)
|
|
{
|
|
ret = 0;
|
|
return ret;
|
|
}
|
|
|
|
if(hostbuffer==NULL)
|
|
{
|
|
ret = -2; //host buffer is NULL
|
|
return ret;
|
|
}
|
|
|
|
if(overwrite==1)
|
|
{
|
|
if(*devbuffer !=NULL)
|
|
{
|
|
cuda_free(devbuffer);
|
|
}
|
|
}
|
|
|
|
if(*devbuffer==NULL)
|
|
{
|
|
err = cudaMalloc(devbuffer,sizeof(T)*N);
|
|
if(err!=cudaSuccess)
|
|
{
|
|
ret = -3; //failed to allocate
|
|
*devbuffer = NULL;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice);
|
|
if(err!=cudaSuccess)
|
|
{
|
|
ret = -4; //failed to copy
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
}
|
|
|
|
|
|
return ret;
|
|
}
|
|
|
|
//copies info from devbuffer to hostbuffer
|
|
//initialzies hostbuffer from NULL if NULL
|
|
//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing)
|
|
template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite)
|
|
{
|
|
int ret = 0;
|
|
cudaError_t err = cudaSuccess;
|
|
|
|
if(N<=0)
|
|
{
|
|
ret = 0;
|
|
return ret;
|
|
}
|
|
|
|
if(devbuffer==NULL)
|
|
{
|
|
ret = -5; //null dev buffer
|
|
return ret;
|
|
}
|
|
|
|
if(overwrite==1 && *hostbuffer!=NULL)
|
|
{
|
|
delete[] (*hostbuffer); hostbuffer = NULL;
|
|
}
|
|
|
|
if(*hostbuffer==NULL)
|
|
{
|
|
*hostbuffer = new(std::nothrow) T[N];
|
|
if(*hostbuffer==NULL)
|
|
{
|
|
ret = -6; //failed to allocate host buffer
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost);
|
|
if(err!=cudaSuccess)
|
|
{
|
|
ret = -7; //failed to copy
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
|
|
//initializes devptr from NULL if not already initialized
|
|
template<typename T> int cuda_copytodevice(T *hostptr, T **devptr)
|
|
{
|
|
int ret = 0;
|
|
cudaError_t err = cudaSuccess;
|
|
bool overwrite = 1;
|
|
|
|
if(hostptr==NULL)
|
|
{
|
|
ret = -2; //host buffer is NULL
|
|
return ret;
|
|
}
|
|
|
|
if(overwrite==1)
|
|
{
|
|
if(*devptr !=NULL)
|
|
{
|
|
cuda_free(devptr);
|
|
}
|
|
}
|
|
|
|
if(*devptr==NULL)
|
|
{
|
|
err = cudaMalloc(devptr,sizeof(T));
|
|
if(err!=cudaSuccess)
|
|
{
|
|
ret = -3; //failed to allocate
|
|
*devptr = NULL;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice);
|
|
if(err!=cudaSuccess)
|
|
{
|
|
ret = -4; //failed to copy
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
}
|
|
|
|
|
|
return ret;
|
|
}
|
|
|
|
//wrapper for cudaMemcpy - copies an item or struct (count 1) from device
|
|
//initializes hostptr from NULL with new if not already initialized
|
|
template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr)
|
|
{
|
|
int ret = 0;
|
|
cudaError_t err = cudaSuccess;
|
|
bool overwrite = 1;
|
|
|
|
if(devptr==NULL)
|
|
{
|
|
ret = -5; //null dev buffer
|
|
return ret;
|
|
}
|
|
|
|
if(overwrite==1 && *hostptr!=NULL)
|
|
{
|
|
delete (*hostptr); hostptr = NULL;
|
|
}
|
|
|
|
if(*hostptr==NULL)
|
|
{
|
|
*hostptr = new(std::nothrow) T;
|
|
if(*hostptr==NULL)
|
|
{
|
|
ret = -6; //failed to allocate host buffer
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost);
|
|
if(err!=cudaSuccess)
|
|
{
|
|
ret = -7; //failed to copy
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
};
|
|
|
|
#endif
|
|
|