529 lines
14 KiB
Plaintext
529 lines
14 KiB
Plaintext
#ifndef __AMSCURARRAY_IMPL_HPP__
|
|
#define __AMSCURARRAY_IMPL_HPP__
|
|
|
|
namespace amscuda
|
|
{
|
|
|
|
template<typename T> curarray<T>::curarray()
|
|
{
|
|
device = -1;
|
|
devptr = NULL;
|
|
Narrays = 0;
|
|
N = NULL;
|
|
hostarrayptrs = NULL;
|
|
devarrayptrs = NULL;
|
|
|
|
}
|
|
|
|
template<typename T> curarray<T>::~curarray()
|
|
{
|
|
device = -1;
|
|
devptr = NULL;
|
|
Narrays = 0;
|
|
N = NULL;
|
|
hostarrayptrs = NULL;
|
|
devarrayptrs = NULL;
|
|
|
|
}
|
|
|
|
template<typename T> int curarray_new(curarray<T>** ptr, int Narrays)
|
|
{
|
|
int ret = 0;
|
|
int device;
|
|
curarray<T> *lhptr = *ptr;
|
|
|
|
cudaGetDevice(&device);
|
|
|
|
if(lhptr!=NULL)
|
|
{
|
|
curarray_delete(ptr);
|
|
}
|
|
|
|
*ptr = new(std::nothrow) curarray<T>();
|
|
lhptr = *ptr;
|
|
|
|
int I;
|
|
|
|
if(Narrays<0) Narrays=0;
|
|
|
|
lhptr->Narrays = Narrays;
|
|
lhptr->device = device;
|
|
lhptr->N = new(std::nothrow) int[Narrays];
|
|
lhptr->hostarrayptrs = new(std::nothrow) T*[Narrays];
|
|
lhptr->devarrayptrs = new(std::nothrow) T*[Narrays];
|
|
|
|
for(I=0;I<Narrays;I++)
|
|
{
|
|
lhptr->N[I] = 0;
|
|
lhptr->hostarrayptrs[I] = NULL;
|
|
lhptr->devarrayptrs[I] = NULL;
|
|
}
|
|
|
|
curarray_device_new(lhptr);
|
|
|
|
return ret;
|
|
}
|
|
|
|
template<typename T> int curarray_delete(curarray<T>** ptr)
|
|
{
|
|
int ret = 0;
|
|
curarray<T> *lptr = NULL;
|
|
int olddev;
|
|
|
|
int I;
|
|
|
|
if(*ptr==NULL)
|
|
{
|
|
return 0;
|
|
}
|
|
lptr = *ptr;
|
|
|
|
cudaGetDevice(&olddev);
|
|
cudaSetDevice(lptr->device);
|
|
|
|
|
|
if(lptr->devptr!=NULL)
|
|
{
|
|
curarray_device_delete(lptr);
|
|
}
|
|
|
|
lptr->device = -1;
|
|
|
|
for(I=0;I<lptr->Narrays;I++)
|
|
{
|
|
if(lptr->hostarrayptrs!=NULL)
|
|
{
|
|
if(lptr->hostarrayptrs[I]!=NULL)
|
|
{
|
|
delete[] lptr->hostarrayptrs[I];
|
|
lptr->hostarrayptrs[I] = NULL;
|
|
}
|
|
}
|
|
|
|
if(lptr->devarrayptrs!=NULL)
|
|
{
|
|
if(lptr->devarrayptrs[I]!=NULL)
|
|
{
|
|
//erasing device memory should have been handled in curarray_device_delete
|
|
lptr->devarrayptrs[I] = NULL;
|
|
}
|
|
}
|
|
|
|
lptr->N[I] = 0;
|
|
}
|
|
|
|
if(lptr->N != NULL) {delete[] lptr->N; lptr->N = NULL;}
|
|
if(lptr->hostarrayptrs!=NULL) {delete[] lptr->hostarrayptrs; lptr->hostarrayptrs=NULL;}
|
|
if(lptr->devarrayptrs!=NULL) {delete[] lptr->devarrayptrs; lptr->devarrayptrs=NULL;}
|
|
|
|
if(*ptr!=NULL) {delete *ptr; *ptr = NULL;}
|
|
|
|
cudaSetDevice(olddev);
|
|
|
|
return ret;
|
|
}
|
|
|
|
template<typename T> int curarray_device_new(curarray<T> *hostptr)
|
|
{
|
|
int ret = 0;
|
|
curarray<T> ldevdata;
|
|
|
|
if(hostptr==NULL) return -1;
|
|
if(hostptr->devptr!=NULL)
|
|
{
|
|
curarray_device_delete(hostptr);
|
|
}
|
|
|
|
int I;
|
|
|
|
cudaGetDevice(&(hostptr->device));
|
|
|
|
ldevdata.device = hostptr->device;
|
|
ldevdata.Narrays = hostptr->Narrays;
|
|
int Narrays = hostptr->Narrays;
|
|
|
|
cudaMalloc(&(ldevdata.N),sizeof(int)*Narrays);
|
|
cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice);
|
|
|
|
ldevdata.hostarrayptrs = NULL;
|
|
|
|
for(I=0;I<Narrays;I++)
|
|
{
|
|
if(hostptr->N[I]>0)
|
|
{
|
|
if(hostptr->devarrayptrs[I]!=NULL)
|
|
{
|
|
cudaFree(hostptr->devarrayptrs[I]);
|
|
hostptr->devarrayptrs[I] = NULL;
|
|
}
|
|
|
|
cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]);
|
|
cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice);
|
|
}
|
|
else
|
|
{
|
|
if(hostptr->devarrayptrs[I]!=NULL)
|
|
{
|
|
cudaFree(hostptr->devarrayptrs[I]);
|
|
hostptr->devarrayptrs[I] = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
cudaMalloc(&(ldevdata.devarrayptrs),sizeof(T*)*Narrays);
|
|
cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice);
|
|
|
|
cudaMalloc(&(hostptr->devptr),sizeof(curarray<T>));
|
|
cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray<T>),cudaMemcpyHostToDevice);
|
|
|
|
ret = 1;
|
|
|
|
return ret;
|
|
}
|
|
|
|
template<typename T> int curarray_device_delete(curarray<T> *hostptr)
|
|
{
|
|
int ret = 0;
|
|
|
|
curarray<T> ldevdata;
|
|
int olddev;
|
|
|
|
if(hostptr->devptr==NULL)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
cudaGetDevice(&olddev);
|
|
cudaSetDevice(hostptr->device);
|
|
|
|
cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
|
|
|
|
int I;
|
|
for(I=0;I<hostptr->Narrays;I++)
|
|
{
|
|
if(hostptr->devarrayptrs[I]!=NULL)
|
|
{
|
|
cudaFree(hostptr->devarrayptrs[I]);
|
|
hostptr->devarrayptrs[I] = NULL;
|
|
}
|
|
}
|
|
|
|
cudaFree(ldevdata.devarrayptrs);
|
|
cudaFree(ldevdata.N);
|
|
|
|
cudaFree(hostptr->devptr);
|
|
hostptr->devptr = NULL;
|
|
hostptr->device = -1;
|
|
|
|
cudaSetDevice(olddev);
|
|
|
|
ret = 1;
|
|
return ret;
|
|
}
|
|
|
|
template<typename T> int curarray_push(curarray<T> *hostptr)
|
|
{
|
|
int ret = 0;
|
|
|
|
int olddev;
|
|
|
|
curarray<T> ldevdata;
|
|
T** ldevarrayptrs = NULL;
|
|
int *devN = NULL;
|
|
|
|
if(hostptr==NULL) return -1;
|
|
|
|
cudaGetDevice(&olddev);
|
|
cudaSetDevice(hostptr->device);
|
|
|
|
int Narrays = hostptr->Narrays;
|
|
|
|
cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
|
|
ldevarrayptrs = new(std::nothrow) T*[Narrays];
|
|
devN = new(std::nothrow) int[Narrays];
|
|
|
|
cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost);
|
|
cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost);
|
|
|
|
|
|
int I;
|
|
|
|
for(I=0;I<Narrays;I++)
|
|
{
|
|
//check to see that host size is the same as device size, and that
|
|
//the host device pointer is the same as the device device pointer
|
|
if( (hostptr->N[I]!=devN[I]) ||
|
|
(hostptr->devarrayptrs[I] != ldevarrayptrs[I])
|
|
)
|
|
{
|
|
cudaFree(ldevarrayptrs[I]);
|
|
ldevarrayptrs[I] = NULL;
|
|
hostptr->devarrayptrs[I] = NULL;
|
|
|
|
if(hostptr->N[I]>0)
|
|
{
|
|
cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]);
|
|
ldevarrayptrs[I] = hostptr->devarrayptrs[I];
|
|
devN[I] = hostptr->N[I];
|
|
}
|
|
else
|
|
{
|
|
devN[I] = 0;
|
|
}
|
|
}
|
|
if(hostptr->N[I]>0)
|
|
{
|
|
//copy host data to device
|
|
cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice);
|
|
}
|
|
} //for each array
|
|
|
|
//rectify and copy device data structure to device
|
|
ldevdata.device = hostptr->device;
|
|
ldevdata.devptr = NULL;
|
|
ldevdata.Narrays = hostptr->Narrays; //later - logic for dealing with when this is not true
|
|
ldevdata.hostarrayptrs = NULL;
|
|
|
|
|
|
cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice);
|
|
cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray<T>),cudaMemcpyHostToDevice);
|
|
|
|
cuda_errortrap("curarray_push cuda error:");
|
|
|
|
cudaSetDevice(olddev);
|
|
|
|
delete[] ldevarrayptrs;
|
|
delete[] devN;
|
|
|
|
return ret;
|
|
}
|
|
|
|
template<typename T> int curarray_pull(curarray<T> *hostptr)
|
|
{
|
|
int ret = 0;
|
|
|
|
int olddev;
|
|
|
|
curarray<T> ldevdata;
|
|
T** ldevarrayptrs = NULL;
|
|
int *devN = NULL;
|
|
|
|
if(hostptr==NULL) return -1;
|
|
|
|
cudaGetDevice(&olddev);
|
|
cudaSetDevice(hostptr->device);
|
|
|
|
cuda_errortrap("dbg1");
|
|
|
|
int Narrays = hostptr->Narrays;
|
|
|
|
cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
|
|
ldevarrayptrs = new(std::nothrow) T*[Narrays];
|
|
devN = new(std::nothrow) int[Narrays];
|
|
|
|
cuda_errortrap("dbg2");
|
|
|
|
cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost);
|
|
cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost);
|
|
|
|
cuda_errortrap("dbg3");
|
|
char dbgjnk[50];
|
|
|
|
int I;
|
|
for(I=0;I<Narrays;I++)
|
|
{
|
|
//check to see that host size is the same as device size, and that
|
|
//the host device pointer is the same as the device device pointer
|
|
if(hostptr->devarrayptrs[I] != ldevarrayptrs[I])
|
|
{
|
|
hostptr->devarrayptrs[I] = ldevarrayptrs[I];
|
|
}
|
|
|
|
if(hostptr->N[I]!=devN[I])
|
|
{
|
|
if(hostptr->hostarrayptrs[I]!=NULL)
|
|
{
|
|
delete[] hostptr->hostarrayptrs[I];
|
|
hostptr->hostarrayptrs[I] = NULL;
|
|
}
|
|
|
|
if(devN[I]>0)
|
|
{
|
|
hostptr->hostarrayptrs[I] = new(std::nothrow) T[devN[I]];
|
|
hostptr->N[I] = devN[I];
|
|
}
|
|
else
|
|
{
|
|
hostptr->N[I] = 0;
|
|
}
|
|
}
|
|
|
|
if(hostptr->hostarrayptrs[I]!=NULL && hostptr->devarrayptrs[I]!=NULL)
|
|
{
|
|
cudaMemcpy(hostptr->hostarrayptrs[I],hostptr->devarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyDeviceToHost);
|
|
sprintf(dbgjnk,"%d dbg %d",I,hostptr->N[I]);
|
|
cuda_errortrap(dbgjnk);
|
|
}
|
|
} //for each array
|
|
|
|
//for the pull operation, I don't think any update of the device data structure is necessary
|
|
|
|
cudaSetDevice(olddev);
|
|
|
|
delete[] ldevarrayptrs;
|
|
delete[] devN;
|
|
|
|
return ret;
|
|
}
|
|
|
|
template<typename T> __host__ int curarray<T>::push()
|
|
{
|
|
return curarray_push(this);
|
|
}
|
|
template<typename T> __host__ int curarray<T>::pull()
|
|
{
|
|
return curarray_pull(this);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#memory-allocation-and-lifetime%5B/url%5D
|
|
|
|
cudaMalloc() and cudaFree() have distinct semantics between the host and
|
|
device environments. When invoked from the host, cudaMalloc() allocates a
|
|
new region from unused device memory. When invoked from the device runtime
|
|
these functions map to device-side malloc() and free(). This implies that
|
|
within the device environment the total allocatable memory is limited to the
|
|
device malloc() heap size, which may be smaller than the available unused
|
|
device memory. Also, it is an error to invoke cudaFree() from the host
|
|
program on a pointer which was allocated by cudaMalloc() on the device
|
|
or vice-versa.
|
|
|
|
So, basically this entire function is not going to work. I'll be unable to resize within
|
|
a kernel.
|
|
*/
|
|
|
|
/*
|
|
|
|
template<typename T> __device__ int curarray<T>::dev_resizearray(int arraynum, int arraysize)
|
|
{
|
|
int ret = 0;
|
|
T* newptr = NULL;
|
|
int I;
|
|
T def;
|
|
|
|
if(arraynum>=0 && arraynum<Narrays)
|
|
{
|
|
if(N[arraynum]!=arraysize)
|
|
{
|
|
if(arraysize<=0)
|
|
{
|
|
if(devarrayptrs[arraynum]!=NULL) cudaFree(devarrayptrs[arraynum]);
|
|
devarrayptrs[arraynum] = NULL;
|
|
N[arraynum] = 0;
|
|
ret = 1;
|
|
return ret;
|
|
}
|
|
|
|
cudaMalloc(&newptr,arraysize*sizeof(T));
|
|
if(newptr!=NULL)
|
|
{
|
|
//do I want to assume there is a copy operator? (operator=)
|
|
//for now, yes - write a more restrictive class later if I don't want it
|
|
if(devarrayptrs[arraynum]!=NULL)
|
|
{
|
|
for(I=0;I<N[arraynum]&&I<arraysize;I++)
|
|
{
|
|
newptr[I] = devarrayptrs[arraynum][I];
|
|
}
|
|
}
|
|
for(I=N[arraynum];I<arraysize;I++)
|
|
{
|
|
newptr[I] = def;
|
|
}
|
|
|
|
if(devarrayptrs[arraynum]!=NULL) cudaFree(devarrayptrs[arraynum]);
|
|
devarrayptrs[arraynum] = newptr;
|
|
N[arraynum] = arraysize;
|
|
ret = 1;
|
|
}
|
|
else
|
|
{
|
|
ret = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
*/
|
|
|
|
template<typename T> __host__ int curarray<T>::resizearray(int arraynum, int arraysize)
|
|
{
|
|
int ret = 0;
|
|
T* newptr = NULL;
|
|
int I;
|
|
T def;
|
|
|
|
if(arraynum>=0 && arraynum<Narrays)
|
|
{
|
|
if(N[arraynum]!=arraysize)
|
|
{
|
|
if(arraysize<=0)
|
|
{
|
|
delete[] hostarrayptrs[arraynum];
|
|
hostarrayptrs[arraynum] = NULL;
|
|
N[arraynum] = 0;
|
|
ret = 1;
|
|
return ret;
|
|
}
|
|
|
|
newptr = new(std::nothrow) T[arraysize];
|
|
if(newptr!=NULL)
|
|
{
|
|
//do I want to assume there is a copy operator? (operator=)
|
|
//for now, yes - write a more restrictive class later if I don't want it
|
|
|
|
if(hostarrayptrs[arraynum]!=NULL)
|
|
{
|
|
for(I=0;I<N[arraynum]&&I<arraysize;I++)
|
|
{
|
|
newptr[I] = hostarrayptrs[arraynum][I];
|
|
}
|
|
}
|
|
for(I=N[arraynum];I<arraysize;I++)
|
|
{
|
|
newptr[I] = def;
|
|
}
|
|
|
|
//cudaFree(hostarrayptrs[arraynum]);
|
|
delete[] hostarrayptrs[arraynum];
|
|
hostarrayptrs[arraynum] = newptr;
|
|
N[arraynum] = arraysize;
|
|
ret = 1;
|
|
}
|
|
else
|
|
{
|
|
ret = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
};
|
|
|
|
#endif |