update

2026-05-04 08:57:06 -04:00
parent fdfd239962
commit f07c1190ee
18 changed files with 250 additions and 42 deletions
--- a/build_linux64/default.inp
+++ b/build_linux64/default.inp
@ -6,32 +6,33 @@ avariable = bla

 anothervar = 1.2,1.3, 1.5 //this is a comment

-operatingmode = 1
+operatingmode = 3
 //1 - STL output
 //2 - count triangles
 //3 - count area

-verbosity = 0
+verbosity = 2

 //domain
-xyz_min = -1.2,-1.2,-1.2
-xyz_max = 1.2,1.2,1.2
-Nx = 32
-Ny = 32
-Nz = 32
+xyz_min = -2.0,-2.0,-2.0
+xyz_max = 2.0,2.0,2.0
+Nx = 128
+Ny = 128
+Nz = 128

 //execution parallelism pars
-nblockdiv = 32
+nblockdiv = 64
 nthreads = 256

 //function specific parameters
 //not all pars apply to all functions

-isjulia = 0
-juliac = 0.0, 0.0, 0.0
-exponent = 8.0
-fractaliter = 64
-threshhold = 2.0
+isjulia = 1
+juliac = 0.0,0.0,0.0
+exponent = 4.0
+fractaliter = 100
 freezethresh = 10.0
+threshold = 0.2
+scale = 0.5


--- a/build_linux64/libamscudafractallevelset.linux64.a
+++ b/build_linux64/libamscudafractallevelset.linux64.a
--- a/build_linux64/objstore/amscufracls_bagoftriangles.o
+++ b/build_linux64/objstore/amscufracls_bagoftriangles.o
--- a/build_linux64/objstore/amscufracls_function.o
+++ b/build_linux64/objstore/amscufracls_function.o
--- a/build_linux64/objstore/amscufracls_marchingtets.o
+++ b/build_linux64/objstore/amscufracls_marchingtets.o
--- a/build_linux64/objstore/amscufracls_misc.o
+++ b/build_linux64/objstore/amscufracls_misc.o
--- a/build_linux64/objstore/amscufracls_mtcube.o
+++ b/build_linux64/objstore/amscufracls_mtcube.o
--- a/build_linux64/objstore/amscufracls_parseinputfile.o
+++ b/build_linux64/objstore/amscufracls_parseinputfile.o
--- a/build_linux64/objstore/amscufracls_template.o
+++ b/build_linux64/objstore/amscufracls_template.o
--- a/build_linux64/objstore/amscufracls_tetrahedron.o
+++ b/build_linux64/objstore/amscufracls_tetrahedron.o
--- a/build_linux64/template.blend
+++ b/build_linux64/template.blend
--- a/build_linux64/template.blend1
+++ b/build_linux64/template.blend1
--- a/build_linux64/test
+++ b/build_linux64/test
--- a/include/amscudafractallevelset/amscfls_parseinput.cuh
+++ b/include/amscudafractallevelset/amscfls_parseinput.cuh
@ -33,8 +33,11 @@ namespace fractallevelset
        int verbosity;
        
        __host__ __device__ inputpars();
+        void __host__ print();
    };

+
+
    int parse_input(const char *fname, inputpars *pars);

    void inputmain(int argc, char *argv[]);
--- a/include/amscudafractallevelset/amscudafractallevelset.cuh
+++ b/include/amscudafractallevelset/amscudafractallevelset.cuh
@ -38,8 +38,8 @@ class cuvec4f;
 // #define AMSCU_CONST
 // #endif

-#define AMSCLS_MANDELBROT
-//#define AMSCLS_PPKFRACTAL
+//#define AMSCLS_MANDELBROT
+#define AMSCLS_PPKFRACTAL


 namespace amscuda
@ -156,6 +156,13 @@ struct marchingtet_pars
    __device__ __host__ marchingtet_pars();
 };

+
+void marchingtets_gpu(marchingtet_pars mtpars, bagoftriangles *mesh, int nblockdiv=32, int nthreads=256, int verbose=0);
+
+int64_t marchingtets_tricount_gpu(marchingtet_pars mtpars, int nblockdiv=32, int nthreads=256, int verbose=0);
+
+float marchingtets_area_gpu(marchingtet_pars mtpars, int nblockdiv=32, int nthreads=256, int verbose=0);
+
 }; //end namespaces
 };

--- a/src/amscudafractallevelset/amscufracls_function.cu
+++ b/src/amscudafractallevelset/amscufracls_function.cu
@ -20,10 +20,14 @@ __host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars)
 {
    float ret = 0.0f;
    float wr,wtheta,wphi;
-    cuvec3f w;
+    cuvec3f w,c;
    int I;

    w = pos;
+    if(pars.isjulia) 
+        c = pars.juliac;
+    else
+        c = pos;

    for(I=0;I<pars.fractaliter;I++)
    {
@ -31,11 +35,11 @@ __host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars)
        wtheta = ::acos(w.y/wr);
        wphi = ::atan2(w.x,w.z);
        if(wr>pars.freezethresh) break;
-        wr = ::pow(wr,8.0f);
-        wtheta = wtheta*8.0f;
-        wphi = wphi*8.0f;
+        wr = ::pow(wr,pars.exponent);
+        wtheta = wtheta*pars.exponent;
+        wphi = wphi*pars.exponent;
        w = cuvec3f(wr*sin(wtheta)*sin(wphi),wr*cos(wtheta),wr*sin(wtheta)*cos(wphi));
-        w = w + pos;
+        w = w + c;
    }

    ret = cuvec3f_norm(w)-pars.threshold;
@ -44,22 +48,89 @@ __host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars)
 };
 #endif

-#ifdef AMSCLS_JULIA
-__host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars)
-{
-    float ret = 0.0f;
-
-    return ret;
-};
-#endif

 #ifdef AMSCLS_PPKFRACTAL
 __host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars)
 {
-    float ret = 0.0f;
+    float ret = 1E6;
+    cuvec3f z;
+    float offset;
+    cuvec3f c;
+    float dr = 1.0f;
+    float signfactor;
+    int I;
+
+    if(pars.isjulia)
+    {
+        c = pars.juliac;
+        offset = 0.0;
+    }
+    else
+    {
+        c = pos;
+        offset = 1.0;
+    }
+    signfactor = (pars.scale<0.0f)? 0.95f : 1.0f;
+
+    z = pos;
+
+    for(I=0;I<pars.fractaliter;I++)
+    {
+        if(::fabs(z.y)>8.0f)
+        {
+            break;
+        }
+
+        float sx,cx,sz,cz,shy,chy;
+        sx = sinf(z.x);
+        cx = cosf(z.x);
+        sz = sinf(z.z);
+        cz = cosf(z.z);
+        shy = sinhf(z.y);
+        chy = coshf(z.y);
+
+        z = cuvec3f(sx*chy,cx*cz*shy,sz*chy)*pars.scale + c;
+        float stretch = 0.75f*::sqrtf(chy*chy + shy*shy);
+        if(stretch>0.9f) stretch = 0.9f;
+        dr = ::fabs(pars.scale)*signfactor*stretch*dr + offset;
+        float q = 1.0f/dr;
+        if(q<ret) ret = q;
+    }

    return ret;
 };
+
+/*
+float DE(vec3 pos) {
+    vec3 z = pos;
+    vec3 juliaC = Julia ? JuliaC : pos;
+    float offset = Julia ? 0.0 : 1.0;
+    float dr = 1.0;  
+    // negative scale swaps sign each iteration, which affects the dr growth
+    float signFactor = Scale < 0.0 ? 0.95 : 1.0; 
+    float de = 1e20;
+
+    for (int i = 0; i < Iterations; i++) {
+        // just for stability, not a real escape condition
+        if (abs(z.y) > 8.) break; 
+
+        float sx = sin(z.x), cx = cos(z.x);
+        float sz = sin(z.z), cz = cos(z.z);
+        // shy and chy can be replaced with taylor approximations to save cycles
+        float shy = sinh(z.y), chy = cosh(z.y);
+        
+        z = vec3(sx * chy, cx * cz * shy, sz * chy) * Scale + juliaC; 
+
+        // 0.75 and 0.9 are magic constants
+        float stretch = 0.75 * sqrt(chy * chy + shy * shy);
+        stretch = max(0.9, stretch);
+        
+        dr = abs(Scale) * signFactor * stretch * dr + offset;
+        de = min(de, 1.0 / dr);
+    }
+    return DEMultiplier* de;  
+}
+*/
 #endif
    
 };//end namespace fractallevelset
--- a/src/amscudafractallevelset/amscufracls_marchingtets.cu
+++ b/src/amscudafractallevelset/amscufracls_marchingtets.cu
@ -104,6 +104,31 @@ namespace fractallevelset
        }
    }

+    __global__ void marchingtets_gpu_debug2(marchingtet_pars mtpars, float *outbuffer)
+    {
+        int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
+        int64_t nt = blockDim.x*gridDim.x;
+        int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
+        int64_t I;
+        int Ix,Iy,Iz;
+
+
+        mtcube cube;
+
+        for(I=tn;I<psize;I+=nt)
+        {
+            Ix = I%(mtpars.Nx);
+            Iy = (I/(mtpars.Nx))%mtpars.Ny;
+            Iz = (I/(mtpars.Nx*mtpars.Ny))%mtpars.Nz;
+
+            if(Iz==(mtpars.Nz/2))
+            {
+                mtgetcube(mtpars,I,cube);
+                cube.eval_function(mtpars.pars);
+                outbuffer[Ix + Iy*mtpars.Nx] = cube.fevals[8];
+            }
+        }
+    }

    __global__ void marchingtets_gpu_areacount(marchingtet_pars mtpars, float *areabuffer)
    {
@ -173,21 +198,20 @@ namespace fractallevelset
        return;
    }

-    void marchingtets_gpu(marchingtet_pars mtpars, bagoftriangles *mesh)
+    void marchingtets_gpu(marchingtet_pars mtpars, bagoftriangles *mesh, int nblockdiv, int nthreads, int verbose)
    {
        //marchingtet_pars *dmtpars = NULL;
        int *dtricountbuffer = NULL;
        float *dvertexbuffer = NULL;
        int64_t *hoffsetbuffer = NULL;
        int64_t *doffsetbuffer = NULL;
-        int nblocks,nthreads;
+        int nblocks;

        int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
        int *htricountbuffer = NULL;
        int64_t tricount;
        
-        nthreads = 256;
-        nblocks = (int)(((psize/(int64_t)nthreads)+1L)/16L+1L);
+        nblocks = (int)(((psize/(int64_t)nthreads)+1L)/(int64_t)nblockdiv+1L);

        cudaMalloc(&dtricountbuffer, nblocks*nthreads*sizeof(int));
        htricountbuffer = new(std::nothrow) int[nblocks*nthreads];
@ -211,6 +235,7 @@ namespace fractallevelset
        // cudaMalloc(&dmtpars,sizeof(marchingtet_pars));
        // cudaMemcpy(dmtpars,&mtpars,sizeof(marchingtet_pars),cudaMemcpyHostToDevice);

+        if(verbose>=2)
            printf("debug: launching marchingtets_gpu_tricount with <%d,%d>\n",nblocks,nthreads);
        cudaError_t err = cudaSuccess;
        marchingtets_gpu_tricount<<<nblocks,nthreads>>>(mtpars,dtricountbuffer);
@ -226,6 +251,7 @@ namespace fractallevelset

        tricount = mt_count_tris_and_accum_offset(htricountbuffer,dtricountbuffer,hoffsetbuffer,doffsetbuffer,nblocks,nthreads);

+        if(verbose>=1)
            printf("marchingtets_gpu: triangle count = %ld\n",(long)tricount);

        mesh->vertices.resize(tricount*9);
@ -257,6 +283,67 @@ namespace fractallevelset
        return;
    }

+    int64_t marchingtets_tricount_gpu(marchingtet_pars mtpars, int nblockdiv, int nthreads, int verbose)
+    {
+        //marchingtet_pars *dmtpars = NULL;
+        int *dtricountbuffer = NULL;
+        int64_t *hoffsetbuffer = NULL;
+        int64_t *doffsetbuffer = NULL;
+        float *dvertexbuffer = NULL;
+        int nblocks;
+
+        int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
+        int *htricountbuffer = NULL;
+        int64_t tricount = 0;
+        
+        nblocks = (int)(((psize/(int64_t)nthreads)+1L)/(int64_t)nblockdiv+1L);
+
+        cudaMalloc(&dtricountbuffer, nblocks*nthreads*sizeof(int));
+        htricountbuffer = new(std::nothrow) int[nblocks*nthreads];
+        cudaMalloc(&doffsetbuffer, nblocks*nthreads*sizeof(int64_t));
+        hoffsetbuffer = new(std::nothrow) int64_t[nblocks*nthreads];
+
+        //DEBUG
+        // float *debugbuffer = NULL;
+        // float *ddebugbuffer = NULL;
+        // debugbuffer = new(std::nothrow) float[nblocks*nthreads*25];
+        // cudaMalloc(&ddebugbuffer,sizeof(float)*nblocks*nthreads*25);
+
+
+        if(dtricountbuffer==NULL || htricountbuffer==NULL)
+        {
+            printf("marchingtets_gpu error: failed to allocate counting buffers\n");
+            marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer);
+            return 0;
+        }
+
+        // cudaMalloc(&dmtpars,sizeof(marchingtet_pars));
+        // cudaMemcpy(dmtpars,&mtpars,sizeof(marchingtet_pars),cudaMemcpyHostToDevice);
+
+        //printf("debug: launching marchingtets_gpu_tricount with <%d,%d>\n",nblocks,nthreads);
+        cudaError_t err = cudaSuccess;
+        marchingtets_gpu_tricount<<<nblocks,nthreads>>>(mtpars,dtricountbuffer);
+        cudaDeviceSynchronize();
+        err = cudaGetLastError();
+        if(err!=cudaSuccess)
+        {
+            printf("marchingtets_gpu_tricount error: %s\n",cudaGetErrorString(err));
+        }
+
+        //mt_debug1(debugbuffer,ddebugbuffer,nblocks,nthreads);
+
+        tricount = mt_count_tris_and_accum_offset(htricountbuffer,dtricountbuffer,hoffsetbuffer,doffsetbuffer,nblocks,nthreads);
+
+        if(verbose>=1)
+            printf("marchingtets_gpu_tricount: triangle count = %ld\n",(long)tricount);
+
+        marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer);
+        
+
+        return tricount;
+    }
+
+
    void marchingtets_area_gpu_cleanup(
        float **hareabuffer,
        float **dareabuffer
@ -266,18 +353,17 @@ namespace fractallevelset
        if(*hareabuffer!=NULL) {delete[] *hareabuffer; *hareabuffer=NULL;}
    }

-    float marchingtets_area_gpu(marchingtet_pars mtpars)
+    float marchingtets_area_gpu(marchingtet_pars mtpars, int nblockdiv, int nthreads, int verbose)
    {
        //marchingtet_pars *dmtpars = NULL;
-        int nblocks,nthreads;
+        int nblocks;
        float *hareabuffer = NULL;
        float *dareabuffer = NULL;
        int I;
        float areasum = 0.0f;
        int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
        
-        nthreads = 256;
-        nblocks = (int)(((psize/(int64_t)nthreads)+1L)/16L+1L);
+        nblocks = (int)(((psize/(int64_t)nthreads)+1L)/(int64_t)nblockdiv+1L);

        cudaMalloc(&dareabuffer, nblocks*nthreads*sizeof(float));
        hareabuffer = new(std::nothrow) float[nblocks*nthreads];
--- a/src/amscudafractallevelset/amscufracls_parseinputfile.cu
+++ b/src/amscudafractallevelset/amscufracls_parseinputfile.cu
@ -96,6 +96,10 @@ namespace fractallevelset
        amsarray<amsstring> alphanums;
        
        alphanums = s.splitalphanum();
+        // printf("debug: alphanum[0]='%s'\n",alphanums[0].cstring);
+        // printf("debug: alphanum[1]='%s'\n",alphanums[1].cstring);
+        // printf("debug: alphanum[2]='%s'\n",alphanums[2].cstring);
+        
        for(I=0;I<alphanums.length&&I<3;I++)
        {
            ret[I] = (float)(alphanums[I].strtonum());
@ -124,6 +128,8 @@ namespace fractallevelset
        {
            if(varname==stringpars->at(I)[0])
            {
+                // printf("debug: par[0]='%s'\n",stringpars->at(I)[0].cstring);
+                // printf("debug: par[1]='%s'\n",stringpars->at(I)[1].cstring);
                *val = (float) stringpars->at(I)[1].strtonum();
            }
        }
@ -193,6 +199,7 @@ namespace fractallevelset
        inputpars pars;
        marchingtet_pars mtpars;
        main_functionpars mfpars;
+        bagoftriangles mesh;

        if(argc<3)
        {
@ -241,22 +248,55 @@ namespace fractallevelset
        mtpars.pars = mfpars;

        //execute model based on operatingmode
+        if(pars.verbosity>=1)
+            pars.print();
        if(pars.operatingmode==omode_stloutput)
        {
-
+            marchingtets_gpu(mtpars,&mesh,pars.nblockdiv,pars.nthreads,pars.verbosity);
+            mesh.save_stl(fnameout.cstring);
        }
        else if(pars.operatingmode==omode_counttriangles)
        {
-
+            int64_t tricount = 0;
+            tricount = marchingtets_tricount_gpu(mtpars,pars.nblockdiv,pars.nthreads,pars.verbosity);
+            printf("triangle count = %ld\n",(long)tricount);
        }
        else if(pars.operatingmode==omode_countarea)
        {
-
+            float area = 0.0f;
+            area = marchingtets_area_gpu(mtpars,pars.nblockdiv,pars.nthreads,pars.verbosity);
+            printf("area = %1.5g\n",area);
        }


    }

+    __host__ void inputpars::print()
+    {
+        printf("inputpars:\n");
+        {
+            printf("\t operatingmode = %d \n",operatingmode);
+            printf("\t verbosity = %d\n",verbosity);
+            printf("\t xyz_min = (%1.3g,%1.3g,%1.3g)\n",xyz_min[0],xyz_min[1],xyz_min[2]);
+            printf("\t xyz_max = (%1.3g,%1.3g,%1.3g)\n",xyz_max[0],xyz_max[1],xyz_max[2]);
+            printf("\t Nx = %d\n",Nx);
+            printf("\t Ny = %d\n",Ny);
+            printf("\t Nz = %d\n",Nz);
+            printf("\t nblockdiv = %d\n",nblockdiv);
+            printf("\t nthreads = %d\n",nthreads);
+            printf("\t isjulia = %d\n",(int)isjulia);
+            printf("\t juliac = (%1.3g,%1.3g,%1.3g)\n",juliac[0],juliac[1],juliac[2]);
+            printf("\t exponent = %1.3f\n",exponent);
+            printf("\t fractaliter = %d\n",fractaliter);
+            printf("\t threshold = %1.3f\n",threshold);
+            printf("\t freezethresh = %1.3f\n",freezethresh);
+            printf("\t scale = %1.3f\n",scale);
+            
+
+
+        }
+    }
+

    
 };//end namespace fractallevelset