#ifndef __CUARRAY_IMPL_HPP__ #define __CUARRAY_IMPL_HPP__ namespace amscuda { // New Version cuarray // simpler, less crap going on template __device__ __host__ cuarray::cuarray() { length = 0; data = NULL; } template __device__ __host__ cuarray::~cuarray() { if(data!=NULL) { delete[] data; data = NULL; } length = 0; } template __device__ __host__ int cuarray::resize(const int _length) { int ret = 0; T *newbuffer = NULL; if(length==_length) { //do nothing ret = 1; return ret; } if(_length<=0) { if(data!=NULL) { delete[] data; data = NULL; } length = 0; ret = 1; } newbuffer = new T[_length]; if(newbuffer==NULL) { ret = -1; //failed to allocate memory return ret; } int I; T def; if(data!=NULL) { for(I=0;I __host__ int cuarray::device_send(cuarray **dptr) { int ret = 0; int dlength; if(*dptr==NULL) { ret = _device_send_overwrite(dptr); } else { dlength = device_length(*dptr); if(dlength=length) { ret = _device_send_copy(*dptr); } else { ret = _device_send_overwrite(dptr); } } return ret; } template __host__ int cuarray::_device_send_overwrite(cuarray **dptr) { int ret = 0; cuarray dlocal; cudaError_t err = cudaSuccess; device_free(dptr); if(length>=0 && data!=NULL) { err = cudaMalloc(dptr,sizeof(cuarray)); if(err==cudaSuccess) { err = cudaMalloc(&(dlocal.data),sizeof(T)*length); dlocal.length = length; if(err==cudaSuccess) { cudaMemcpy(*dptr,&dlocal,sizeof(cuarray),cudaMemcpyHostToDevice); if(data!=NULL) err = cudaMemcpy(dlocal.data,data,sizeof(T)*length,cudaMemcpyHostToDevice); else err = cudaSuccess; if(err==cudaSuccess) { ret = 1; } else { ret = -3; } } else { ret = -2; } } else { ret = -1; } } else { dlocal.data = NULL; dlocal.length = 0; err = cudaMalloc(dptr,sizeof(cuarray)); if(err==cudaSuccess) { cudaMemcpy(*dptr,&dlocal,sizeof(cuarray),cudaMemcpyHostToDevice); ret = 1; } else { ret = -4; } } dlocal.data = NULL; dlocal.length = -1; return ret; } template __host__ int cuarray::_device_send_copy(cuarray *dptr) { int ret = 0; cudaError_t err = cudaSuccess; T* ddata = NULL; ddata = device_data_ptr(dptr); err = cudaMemcpy(ddata,data,sizeof(T)*length,cudaMemcpyHostToDevice); if(err==cudaSuccess) { ret = 1; } else { ret = -1; } return ret; } template __host__ int cuarray::device_pull(cuarray *dptr) { int ret = 0; int dlength; T* ddata; cudaError_t err; if(dptr==NULL) { ret = -1; // null d pointer return ret; } dlength = device_length(dptr); if(dlength!=length) { this->resize(dlength); } ddata = device_data_ptr(dptr); if(length>0 && data!=NULL && ddata!=NULL) { err = cudaMemcpy(data,dptr,length*sizeof(T),cudaMemcpyDeviceToHost); if(err==cudaSuccess) { ret = 1; } else { ret = -2; } } return ret; } template __host__ int cuarray::device_free(cuarray **dptr) { int ret = 0; cuarray dlocal; if(*dptr!=NULL) { cudaMemcpy(&dlocal,dptr,sizeof(cuarray),cudaMemcpyDeviceToHost); if(dlocal.data!=NULL) { cudaFree(dlocal.data); dlocal.data = NULL; } cudaFree(*dptr); *dptr = NULL; ret = 1; } dlocal.data = NULL; dlocal.length = -1; return ret; } template __host__ int cuarray::device_length(cuarray *dptr) { int ret = -1; cuarray dlocal; if(dptr==NULL) { return ret; } cudaMemcpy(&dlocal,dptr,sizeof(cuarray),cudaMemcpyDeviceToHost); ret = dlocal.length; dlocal.data = NULL; dlocal.length = -1; return ret; } template __host__ T* cuarray::device_data_ptr(cuarray *dptr) { T* ret = NULL; cuarray dlocal; if(dptr==NULL) { return ret; } cudaMemcpy(&dlocal,dptr,sizeof(cuarray),cudaMemcpyDeviceToHost); ret = dlocal.data; dlocal.data = NULL; dlocal.length = -1; return ret; } template __device__ __host__ int cuarray::size() const { return this->length; } template __device__ __host__ T& cuarray::at(const int I) { return this->data[I]; } template __device__ __host__ const T& cuarray::at(const int I) const { return this->data[I]; } template __device__ __host__ T& cuarray::operator[](const int I) { return this->data[I]; } template __device__ __host__ const T& cuarray::operator[](const int I) const { return this->data[I]; } }; #endif