Files
amsculib3/old/9apr26_prerefactor/include/amsculib2/amscu_cudafunctions_impl.hpp
2026-04-10 13:29:50 -04:00

229 lines
4.7 KiB
C++

#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
namespace amscuda
{
//frees devbuffer if it is not already NULL, and sets devbuffer to NULL
//wrapper to cudaFree
template<typename T> int cuda_free(T **devptr)
{
int ret = 0;
cudaError_t err = cudaSuccess;
if(*devptr==NULL)
{
return ret; //devbuffer is already NULL/freed
}
err = cudaFree(*devptr);
if(err!=cudaSuccess)
{
ret = -1; //failed to free device pointer
*devptr = NULL; // - ? should only happen if I'm trying to double-free something
}
else
{
ret = 1;
*devptr = NULL;
}
return ret;
}
//copies hostbuffer to devbuffer
//initializes devbuffer from NULL if devbuffer is NULL
//if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite)
{
int ret = 0;
cudaError_t err = cudaSuccess;
if(N<=0)
{
ret = 0;
return ret;
}
if(hostbuffer==NULL)
{
ret = -2; //host buffer is NULL
return ret;
}
if(overwrite==1)
{
if(*devbuffer !=NULL)
{
cuda_free(devbuffer);
}
}
if(*devbuffer==NULL)
{
err = cudaMalloc(devbuffer,sizeof(T)*N);
if(err!=cudaSuccess)
{
ret = -3; //failed to allocate
*devbuffer = NULL;
return ret;
}
}
err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice);
if(err!=cudaSuccess)
{
ret = -4; //failed to copy
}
else
{
ret = 1;
}
return ret;
}
//copies info from devbuffer to hostbuffer
//initialzies hostbuffer from NULL if NULL
//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing)
template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite)
{
int ret = 0;
cudaError_t err = cudaSuccess;
if(N<=0)
{
ret = 0;
return ret;
}
if(devbuffer==NULL)
{
ret = -5; //null dev buffer
return ret;
}
if(overwrite==1 && *hostbuffer!=NULL)
{
delete[] (*hostbuffer); hostbuffer = NULL;
}
if(*hostbuffer==NULL)
{
*hostbuffer = new(std::nothrow) T[N];
if(*hostbuffer==NULL)
{
ret = -6; //failed to allocate host buffer
return ret;
}
}
err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost);
if(err!=cudaSuccess)
{
ret = -7; //failed to copy
}
else
{
ret = 1;
}
return ret;
}
//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
//initializes devptr from NULL if not already initialized
template<typename T> int cuda_copytodevice(T *hostptr, T **devptr)
{
int ret = 0;
cudaError_t err = cudaSuccess;
bool overwrite = 1;
if(hostptr==NULL)
{
ret = -2; //host buffer is NULL
return ret;
}
if(overwrite==1)
{
if(*devptr !=NULL)
{
cuda_free(devptr);
}
}
if(*devptr==NULL)
{
err = cudaMalloc(devptr,sizeof(T));
if(err!=cudaSuccess)
{
ret = -3; //failed to allocate
*devptr = NULL;
return ret;
}
}
err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice);
if(err!=cudaSuccess)
{
ret = -4; //failed to copy
}
else
{
ret = 1;
}
return ret;
}
//wrapper for cudaMemcpy - copies an item or struct (count 1) from device
//initializes hostptr from NULL with new if not already initialized
template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr)
{
int ret = 0;
cudaError_t err = cudaSuccess;
bool overwrite = 1;
if(devptr==NULL)
{
ret = -5; //null dev buffer
return ret;
}
if(overwrite==1 && *hostptr!=NULL)
{
delete (*hostptr); hostptr = NULL;
}
if(*hostptr==NULL)
{
*hostptr = new(std::nothrow) T;
if(*hostptr==NULL)
{
ret = -6; //failed to allocate host buffer
return ret;
}
}
err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost);
if(err!=cudaSuccess)
{
ret = -7; //failed to copy
}
else
{
ret = 1;
}
return ret;
}
};
#endif