#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__ #define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__ namespace amscuda { //frees devbuffer if it is not already NULL, and sets devbuffer to NULL //wrapper to cudaFree template int cuda_free(T **devptr) { int ret = 0; cudaError_t err = cudaSuccess; if(*devptr==NULL) { return ret; //devbuffer is already NULL/freed } err = cudaFree(*devptr); if(err!=cudaSuccess) { ret = -1; //failed to free device pointer *devptr = NULL; // - ? should only happen if I'm trying to double-free something } else { ret = 1; *devptr = NULL; } return ret; } //copies hostbuffer to devbuffer //initializes devbuffer from NULL if devbuffer is NULL //if overwrite is true, deletes and reallocates devbuffer on device (for resizing) template int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite) { int ret = 0; cudaError_t err = cudaSuccess; if(N<=0) { ret = 0; return ret; } if(hostbuffer==NULL) { ret = -2; //host buffer is NULL return ret; } if(overwrite==1) { if(*devbuffer !=NULL) { cuda_free(devbuffer); } } if(*devbuffer==NULL) { err = cudaMalloc(devbuffer,sizeof(T)*N); if(err!=cudaSuccess) { ret = -3; //failed to allocate *devbuffer = NULL; return ret; } } err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice); if(err!=cudaSuccess) { ret = -4; //failed to copy } else { ret = 1; } return ret; } //copies info from devbuffer to hostbuffer //initialzies hostbuffer from NULL if NULL //if overwrite is true, deletes and reallocates hostbuffer on host (for resizing) template int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite) { int ret = 0; cudaError_t err = cudaSuccess; if(N<=0) { ret = 0; return ret; } if(devbuffer==NULL) { ret = -5; //null dev buffer return ret; } if(overwrite==1 && *hostbuffer!=NULL) { delete[] (*hostbuffer); hostbuffer = NULL; } if(*hostbuffer==NULL) { *hostbuffer = new(std::nothrow) T[N]; if(*hostbuffer==NULL) { ret = -6; //failed to allocate host buffer return ret; } } err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost); if(err!=cudaSuccess) { ret = -7; //failed to copy } else { ret = 1; } return ret; } //wrapper for cudaMemcpy - copies an item or struct (count 1) to the device //initializes devptr from NULL if not already initialized template int cuda_copytodevice(T *hostptr, T **devptr) { int ret = 0; cudaError_t err = cudaSuccess; bool overwrite = 1; if(hostptr==NULL) { ret = -2; //host buffer is NULL return ret; } if(overwrite==1) { if(*devptr !=NULL) { cuda_free(devptr); } } if(*devptr==NULL) { err = cudaMalloc(devptr,sizeof(T)); if(err!=cudaSuccess) { ret = -3; //failed to allocate *devptr = NULL; return ret; } } err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice); if(err!=cudaSuccess) { ret = -4; //failed to copy } else { ret = 1; } return ret; } //wrapper for cudaMemcpy - copies an item or struct (count 1) from device //initializes hostptr from NULL with new if not already initialized template int cuda_copyfromdevice(T *devptr, T **hostptr) { int ret = 0; cudaError_t err = cudaSuccess; bool overwrite = 1; if(devptr==NULL) { ret = -5; //null dev buffer return ret; } if(overwrite==1 && *hostptr!=NULL) { delete (*hostptr); hostptr = NULL; } if(*hostptr==NULL) { *hostptr = new(std::nothrow) T; if(*hostptr==NULL) { ret = -6; //failed to allocate host buffer return ret; } } err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost); if(err!=cudaSuccess) { ret = -7; //failed to copy } else { ret = 1; } return ret; } }; #endif