init

2026-04-10 13:29:50 -04:00
commit a8ed51a904
162 changed files with 19902 additions and 0 deletions
--- a/old/9apr26_prerefactor/include/amsculib2/amscurarray_impl.cuh
+++ b/old/9apr26_prerefactor/include/amsculib2/amscurarray_impl.cuh
@ -0,0 +1,529 @@
+#ifndef __AMSCURARRAY_IMPL_HPP__
+#define __AMSCURARRAY_IMPL_HPP__
+
+namespace amscuda
+{
+
+template<typename T> curarray<T>::curarray()
+{
+    device = -1;
+    devptr = NULL;
+    Narrays = 0;
+    N = NULL;
+    hostarrayptrs = NULL;
+    devarrayptrs = NULL;
+
+}
+
+template<typename T> curarray<T>::~curarray()
+{
+    device = -1;
+    devptr = NULL;
+    Narrays = 0;
+    N = NULL;
+    hostarrayptrs = NULL;
+    devarrayptrs = NULL;
+    
+}
+
+template<typename T> int curarray_new(curarray<T>** ptr, int Narrays)
+{
+    int ret = 0;
+    int device;
+    curarray<T> *lhptr = *ptr;
+
+    cudaGetDevice(&device);
+
+    if(lhptr!=NULL)
+    {
+        curarray_delete(ptr);
+    }
+
+    *ptr = new(std::nothrow) curarray<T>();
+    lhptr = *ptr;
+
+    int I;
+
+    if(Narrays<0) Narrays=0;
+
+    lhptr->Narrays = Narrays;
+    lhptr->device = device;
+    lhptr->N = new(std::nothrow) int[Narrays];
+    lhptr->hostarrayptrs = new(std::nothrow) T*[Narrays];
+    lhptr->devarrayptrs = new(std::nothrow) T*[Narrays];
+    
+    for(I=0;I<Narrays;I++)
+    {
+        lhptr->N[I] = 0;
+        lhptr->hostarrayptrs[I] = NULL;
+        lhptr->devarrayptrs[I] = NULL;
+    }
+
+    curarray_device_new(lhptr);
+
+    return ret;
+}
+
+template<typename T> int curarray_delete(curarray<T>** ptr)
+{
+    int ret = 0;
+    curarray<T> *lptr = NULL;
+    int olddev;
+
+    int I;
+
+    if(*ptr==NULL)
+    {
+        return 0;
+    }
+    lptr = *ptr;
+
+    cudaGetDevice(&olddev);
+    cudaSetDevice(lptr->device);
+
+    
+    if(lptr->devptr!=NULL)
+    {
+        curarray_device_delete(lptr);
+    }
+
+    lptr->device = -1;
+
+    for(I=0;I<lptr->Narrays;I++)
+    {
+        if(lptr->hostarrayptrs!=NULL)
+        {
+            if(lptr->hostarrayptrs[I]!=NULL)
+            {
+                delete[] lptr->hostarrayptrs[I];
+                lptr->hostarrayptrs[I] = NULL;
+            }
+        }
+
+        if(lptr->devarrayptrs!=NULL)
+        {
+            if(lptr->devarrayptrs[I]!=NULL)
+            {
+                //erasing device memory should have been handled in curarray_device_delete
+                lptr->devarrayptrs[I] = NULL;
+            }
+        }
+
+        lptr->N[I] = 0;
+    }
+
+    if(lptr->N != NULL) {delete[] lptr->N; lptr->N = NULL;}
+    if(lptr->hostarrayptrs!=NULL) {delete[] lptr->hostarrayptrs; lptr->hostarrayptrs=NULL;}
+    if(lptr->devarrayptrs!=NULL) {delete[] lptr->devarrayptrs; lptr->devarrayptrs=NULL;}
+
+    if(*ptr!=NULL) {delete *ptr; *ptr = NULL;}
+
+    cudaSetDevice(olddev);
+
+    return ret;
+}
+
+template<typename T> int curarray_device_new(curarray<T> *hostptr)
+{
+    int ret = 0;
+    curarray<T> ldevdata;
+
+    if(hostptr==NULL) return -1;
+    if(hostptr->devptr!=NULL)
+    {
+        curarray_device_delete(hostptr);
+    }
+
+    int I;
+
+    cudaGetDevice(&(hostptr->device));
+
+    ldevdata.device = hostptr->device;
+    ldevdata.Narrays = hostptr->Narrays;
+    int Narrays = hostptr->Narrays;
+
+    cudaMalloc(&(ldevdata.N),sizeof(int)*Narrays);
+    cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice);
+
+    ldevdata.hostarrayptrs = NULL;
+
+    for(I=0;I<Narrays;I++)
+    {
+        if(hostptr->N[I]>0)
+        {
+            if(hostptr->devarrayptrs[I]!=NULL)
+            {
+                cudaFree(hostptr->devarrayptrs[I]);
+                hostptr->devarrayptrs[I] = NULL;
+            }
+
+            cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]);
+            cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice);
+        }
+        else
+        {
+            if(hostptr->devarrayptrs[I]!=NULL)
+            {
+                cudaFree(hostptr->devarrayptrs[I]);
+                hostptr->devarrayptrs[I] = NULL;
+            }
+        }
+    }
+
+    cudaMalloc(&(ldevdata.devarrayptrs),sizeof(T*)*Narrays);
+    cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice);
+
+    cudaMalloc(&(hostptr->devptr),sizeof(curarray<T>));
+    cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray<T>),cudaMemcpyHostToDevice);    
+    
+    ret = 1;
+
+    return ret;
+}
+
+template<typename T> int curarray_device_delete(curarray<T> *hostptr)
+{
+    int ret = 0;
+
+    curarray<T> ldevdata;
+    int olddev;
+
+    if(hostptr->devptr==NULL)
+    {
+        return 0;
+    }
+
+    cudaGetDevice(&olddev);
+    cudaSetDevice(hostptr->device);
+
+    cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
+
+    int I;
+    for(I=0;I<hostptr->Narrays;I++)
+    {
+        if(hostptr->devarrayptrs[I]!=NULL)
+        {
+            cudaFree(hostptr->devarrayptrs[I]);
+            hostptr->devarrayptrs[I] = NULL;
+        }
+    }
+
+    cudaFree(ldevdata.devarrayptrs);
+    cudaFree(ldevdata.N);
+
+    cudaFree(hostptr->devptr);
+    hostptr->devptr = NULL;
+    hostptr->device = -1;
+
+    cudaSetDevice(olddev);
+
+    ret = 1;
+    return ret;
+}
+
+template<typename T> int curarray_push(curarray<T> *hostptr)
+{
+    int ret = 0;
+
+    int olddev;
+
+    curarray<T> ldevdata;
+    T** ldevarrayptrs = NULL;
+    int *devN = NULL;
+
+    if(hostptr==NULL) return -1;
+
+    cudaGetDevice(&olddev);
+    cudaSetDevice(hostptr->device);
+
+    int Narrays = hostptr->Narrays;
+
+    cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
+    ldevarrayptrs = new(std::nothrow) T*[Narrays];
+    devN = new(std::nothrow) int[Narrays];
+
+    cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost);
+    cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost);
+
+
+    int I;
+
+    for(I=0;I<Narrays;I++)
+    {
+        //check to see that host size is the same as device size, and that 
+        //the host device pointer is the same as the device device pointer
+        if( (hostptr->N[I]!=devN[I]) || 
+            (hostptr->devarrayptrs[I] != ldevarrayptrs[I])
+        )
+        {
+            cudaFree(ldevarrayptrs[I]);
+            ldevarrayptrs[I] = NULL;
+            hostptr->devarrayptrs[I] = NULL;
+
+            if(hostptr->N[I]>0)
+            {
+                cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]);
+                ldevarrayptrs[I] = hostptr->devarrayptrs[I];
+                devN[I] = hostptr->N[I];
+            }
+            else
+            {
+                devN[I] = 0;
+            }
+        }
+        if(hostptr->N[I]>0)
+        {
+            //copy host data to device
+            cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice);
+        }
+    } //for each array
+
+    //rectify and copy device data structure to device
+    ldevdata.device = hostptr->device;
+    ldevdata.devptr = NULL;
+    ldevdata.Narrays = hostptr->Narrays; //later - logic for dealing with when this is not true
+    ldevdata.hostarrayptrs = NULL;
+
+
+    cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice);
+    cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice);
+
+    cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray<T>),cudaMemcpyHostToDevice);
+
+    cuda_errortrap("curarray_push cuda error:");
+
+    cudaSetDevice(olddev);
+
+    delete[] ldevarrayptrs;
+    delete[] devN;
+
+    return ret;
+}
+
+template<typename T> int curarray_pull(curarray<T> *hostptr)
+{
+    int ret = 0;
+
+    int olddev;
+
+    curarray<T> ldevdata;
+    T** ldevarrayptrs = NULL;
+    int *devN = NULL;
+
+    if(hostptr==NULL) return -1;
+
+    cudaGetDevice(&olddev);
+    cudaSetDevice(hostptr->device);
+
+    cuda_errortrap("dbg1");
+
+    int Narrays = hostptr->Narrays;
+
+    cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
+    ldevarrayptrs = new(std::nothrow) T*[Narrays];
+    devN = new(std::nothrow) int[Narrays];
+
+    cuda_errortrap("dbg2");
+
+    cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost);
+    cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost);
+
+    cuda_errortrap("dbg3");
+    char dbgjnk[50];
+
+    int I;
+    for(I=0;I<Narrays;I++)
+    {
+        //check to see that host size is the same as device size, and that 
+        //the host device pointer is the same as the device device pointer
+        if(hostptr->devarrayptrs[I] != ldevarrayptrs[I])
+        {
+            hostptr->devarrayptrs[I] = ldevarrayptrs[I];
+        }
+
+        if(hostptr->N[I]!=devN[I])
+        {
+            if(hostptr->hostarrayptrs[I]!=NULL) 
+            {
+                delete[] hostptr->hostarrayptrs[I];
+                hostptr->hostarrayptrs[I] = NULL;
+            }
+
+            if(devN[I]>0)
+            {
+                hostptr->hostarrayptrs[I] = new(std::nothrow) T[devN[I]];
+                hostptr->N[I] = devN[I];
+            }
+            else
+            {
+                hostptr->N[I] = 0;
+            }
+        }
+
+        if(hostptr->hostarrayptrs[I]!=NULL && hostptr->devarrayptrs[I]!=NULL)
+        {
+            cudaMemcpy(hostptr->hostarrayptrs[I],hostptr->devarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyDeviceToHost);
+            sprintf(dbgjnk,"%d dbg %d",I,hostptr->N[I]);
+            cuda_errortrap(dbgjnk);
+        }
+    } //for each array
+
+    //for the pull operation, I don't think any update of the device data structure is necessary
+
+    cudaSetDevice(olddev);
+
+    delete[] ldevarrayptrs;
+    delete[] devN;
+
+    return ret;
+}
+
+template<typename T> __host__ int curarray<T>::push()
+{
+    return curarray_push(this);
+}
+template<typename T> __host__ int curarray<T>::pull()
+{
+    return curarray_pull(this);
+}
+
+
+
+/*
+https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#memory-allocation-and-lifetime%5B/url%5D
+
+cudaMalloc() and cudaFree() have distinct semantics between the host and 
+device environments. When invoked from the host, cudaMalloc() allocates a 
+new region from unused device memory. When invoked from the device runtime 
+these functions map to device-side malloc() and free(). This implies that 
+within the device environment the total allocatable memory is limited to the 
+device malloc() heap size, which may be smaller than the available unused 
+device memory. Also, it is an error to invoke cudaFree() from the host 
+program on a pointer which was allocated by cudaMalloc() on the device 
+or vice-versa.
+
+So, basically this entire function is not going to work. I'll be unable to resize within 
+a kernel.
+*/
+
+/*
+
+template<typename T> __device__ int curarray<T>::dev_resizearray(int arraynum, int arraysize)
+{
+    int ret = 0;
+    T* newptr = NULL;
+    int I;
+    T def;
+
+    if(arraynum>=0 && arraynum<Narrays)
+    {
+        if(N[arraynum]!=arraysize)
+        {
+            if(arraysize<=0)
+            {
+                if(devarrayptrs[arraynum]!=NULL) cudaFree(devarrayptrs[arraynum]);
+                devarrayptrs[arraynum] = NULL;
+                N[arraynum] = 0;
+                ret = 1;
+                return ret;
+            }
+
+            cudaMalloc(&newptr,arraysize*sizeof(T));
+            if(newptr!=NULL)
+            {
+                //do I want to assume there is a copy operator? (operator=)
+                //for now, yes - write a more restrictive class later if I don't want it
+                if(devarrayptrs[arraynum]!=NULL)
+                {
+                    for(I=0;I<N[arraynum]&&I<arraysize;I++)
+                    {
+                        newptr[I] = devarrayptrs[arraynum][I];
+                    }
+                }
+                for(I=N[arraynum];I<arraysize;I++)
+                {
+                    newptr[I] = def;
+                }
+
+                if(devarrayptrs[arraynum]!=NULL) cudaFree(devarrayptrs[arraynum]);
+                devarrayptrs[arraynum] = newptr;
+                N[arraynum] = arraysize;
+                ret = 1;
+            }
+            else
+            {
+                ret = -1;
+            }
+        }
+        else
+        {
+            ret = 1;
+        }
+    }
+
+    return ret;
+}
+
+*/
+
+template<typename T> __host__ int curarray<T>::resizearray(int arraynum, int arraysize)
+{
+    int ret = 0;
+    T* newptr = NULL;
+    int I;
+    T def;
+
+    if(arraynum>=0 && arraynum<Narrays)
+    {
+        if(N[arraynum]!=arraysize)
+        {
+            if(arraysize<=0)
+            {
+                delete[] hostarrayptrs[arraynum];
+                hostarrayptrs[arraynum] = NULL;
+                N[arraynum] = 0;
+                ret = 1;
+                return ret;
+            }
+
+            newptr = new(std::nothrow) T[arraysize];
+            if(newptr!=NULL)
+            {
+                //do I want to assume there is a copy operator? (operator=)
+                //for now, yes - write a more restrictive class later if I don't want it
+                
+                if(hostarrayptrs[arraynum]!=NULL)
+                {
+                    for(I=0;I<N[arraynum]&&I<arraysize;I++)
+                    {
+                        newptr[I] = hostarrayptrs[arraynum][I];
+                    }
+                }
+                for(I=N[arraynum];I<arraysize;I++)
+                {
+                    newptr[I] = def;
+                }
+
+                //cudaFree(hostarrayptrs[arraynum]);
+                delete[] hostarrayptrs[arraynum];
+                hostarrayptrs[arraynum] = newptr;
+                N[arraynum] = arraysize;
+                ret = 1;
+            }
+            else
+            {
+                ret = -1;
+            }
+        }
+        else
+        {
+            ret = 1;
+        }
+    }
+
+    return ret;
+}
+
+};
+
+#endif