diff --git a/build_linux64/default.inp b/build_linux64/default.inp index 7cf528d..38da61e 100644 --- a/build_linux64/default.inp +++ b/build_linux64/default.inp @@ -6,32 +6,33 @@ avariable = bla anothervar = 1.2,1.3, 1.5 //this is a comment -operatingmode = 1 +operatingmode = 3 //1 - STL output //2 - count triangles //3 - count area -verbosity = 0 +verbosity = 2 //domain -xyz_min = -1.2,-1.2,-1.2 -xyz_max = 1.2,1.2,1.2 -Nx = 32 -Ny = 32 -Nz = 32 +xyz_min = -2.0,-2.0,-2.0 +xyz_max = 2.0,2.0,2.0 +Nx = 128 +Ny = 128 +Nz = 128 //execution parallelism pars -nblockdiv = 32 +nblockdiv = 64 nthreads = 256 //function specific parameters //not all pars apply to all functions -isjulia = 0 -juliac = 0.0, 0.0, 0.0 -exponent = 8.0 -fractaliter = 64 -threshhold = 2.0 +isjulia = 1 +juliac = 0.0,0.0,0.0 +exponent = 4.0 +fractaliter = 100 freezethresh = 10.0 +threshold = 0.2 +scale = 0.5 diff --git a/build_linux64/libamscudafractallevelset.linux64.a b/build_linux64/libamscudafractallevelset.linux64.a index 7ad5794..aada216 100644 Binary files a/build_linux64/libamscudafractallevelset.linux64.a and b/build_linux64/libamscudafractallevelset.linux64.a differ diff --git a/build_linux64/objstore/amscufracls_bagoftriangles.o b/build_linux64/objstore/amscufracls_bagoftriangles.o index f9ab147..ecfb81b 100644 Binary files a/build_linux64/objstore/amscufracls_bagoftriangles.o and b/build_linux64/objstore/amscufracls_bagoftriangles.o differ diff --git a/build_linux64/objstore/amscufracls_function.o b/build_linux64/objstore/amscufracls_function.o index e7f3de4..5c8a55a 100644 Binary files a/build_linux64/objstore/amscufracls_function.o and b/build_linux64/objstore/amscufracls_function.o differ diff --git a/build_linux64/objstore/amscufracls_marchingtets.o b/build_linux64/objstore/amscufracls_marchingtets.o index d205be5..a1b7abf 100644 Binary files a/build_linux64/objstore/amscufracls_marchingtets.o and b/build_linux64/objstore/amscufracls_marchingtets.o differ diff --git a/build_linux64/objstore/amscufracls_misc.o b/build_linux64/objstore/amscufracls_misc.o index 9b34f28..af664b7 100644 Binary files a/build_linux64/objstore/amscufracls_misc.o and b/build_linux64/objstore/amscufracls_misc.o differ diff --git a/build_linux64/objstore/amscufracls_mtcube.o b/build_linux64/objstore/amscufracls_mtcube.o index d2807bc..fec80a1 100644 Binary files a/build_linux64/objstore/amscufracls_mtcube.o and b/build_linux64/objstore/amscufracls_mtcube.o differ diff --git a/build_linux64/objstore/amscufracls_parseinputfile.o b/build_linux64/objstore/amscufracls_parseinputfile.o index d3e069c..d405259 100644 Binary files a/build_linux64/objstore/amscufracls_parseinputfile.o and b/build_linux64/objstore/amscufracls_parseinputfile.o differ diff --git a/build_linux64/objstore/amscufracls_template.o b/build_linux64/objstore/amscufracls_template.o index c0862fc..182e159 100644 Binary files a/build_linux64/objstore/amscufracls_template.o and b/build_linux64/objstore/amscufracls_template.o differ diff --git a/build_linux64/objstore/amscufracls_tetrahedron.o b/build_linux64/objstore/amscufracls_tetrahedron.o index 4ce75a7..bcee4de 100644 Binary files a/build_linux64/objstore/amscufracls_tetrahedron.o and b/build_linux64/objstore/amscufracls_tetrahedron.o differ diff --git a/build_linux64/template.blend b/build_linux64/template.blend index cb5fbd9..b7c556e 100644 Binary files a/build_linux64/template.blend and b/build_linux64/template.blend differ diff --git a/build_linux64/template.blend1 b/build_linux64/template.blend1 new file mode 100644 index 0000000..cb5fbd9 Binary files /dev/null and b/build_linux64/template.blend1 differ diff --git a/build_linux64/test b/build_linux64/test index b90562a..06ea39d 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amscudafractallevelset/amscfls_parseinput.cuh b/include/amscudafractallevelset/amscfls_parseinput.cuh index b379e13..0e84637 100644 --- a/include/amscudafractallevelset/amscfls_parseinput.cuh +++ b/include/amscudafractallevelset/amscfls_parseinput.cuh @@ -33,8 +33,11 @@ namespace fractallevelset int verbosity; __host__ __device__ inputpars(); + void __host__ print(); }; + + int parse_input(const char *fname, inputpars *pars); void inputmain(int argc, char *argv[]); diff --git a/include/amscudafractallevelset/amscudafractallevelset.cuh b/include/amscudafractallevelset/amscudafractallevelset.cuh index e0f5ec3..451daa8 100644 --- a/include/amscudafractallevelset/amscudafractallevelset.cuh +++ b/include/amscudafractallevelset/amscudafractallevelset.cuh @@ -38,8 +38,8 @@ class cuvec4f; // #define AMSCU_CONST // #endif -#define AMSCLS_MANDELBROT -//#define AMSCLS_PPKFRACTAL +//#define AMSCLS_MANDELBROT +#define AMSCLS_PPKFRACTAL namespace amscuda @@ -156,6 +156,13 @@ struct marchingtet_pars __device__ __host__ marchingtet_pars(); }; + +void marchingtets_gpu(marchingtet_pars mtpars, bagoftriangles *mesh, int nblockdiv=32, int nthreads=256, int verbose=0); + +int64_t marchingtets_tricount_gpu(marchingtet_pars mtpars, int nblockdiv=32, int nthreads=256, int verbose=0); + +float marchingtets_area_gpu(marchingtet_pars mtpars, int nblockdiv=32, int nthreads=256, int verbose=0); + }; //end namespaces }; diff --git a/src/amscudafractallevelset/amscufracls_function.cu b/src/amscudafractallevelset/amscufracls_function.cu index 9688b8d..af5470a 100644 --- a/src/amscudafractallevelset/amscufracls_function.cu +++ b/src/amscudafractallevelset/amscufracls_function.cu @@ -20,10 +20,14 @@ __host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars) { float ret = 0.0f; float wr,wtheta,wphi; - cuvec3f w; + cuvec3f w,c; int I; w = pos; + if(pars.isjulia) + c = pars.juliac; + else + c = pos; for(I=0;Ipars.freezethresh) break; - wr = ::pow(wr,8.0f); - wtheta = wtheta*8.0f; - wphi = wphi*8.0f; + wr = ::pow(wr,pars.exponent); + wtheta = wtheta*pars.exponent; + wphi = wphi*pars.exponent; w = cuvec3f(wr*sin(wtheta)*sin(wphi),wr*cos(wtheta),wr*sin(wtheta)*cos(wphi)); - w = w + pos; + w = w + c; } ret = cuvec3f_norm(w)-pars.threshold; @@ -44,22 +48,89 @@ __host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars) }; #endif -#ifdef AMSCLS_JULIA -__host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars) -{ - float ret = 0.0f; - - return ret; -}; -#endif #ifdef AMSCLS_PPKFRACTAL __host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars) { - float ret = 0.0f; + float ret = 1E6; + cuvec3f z; + float offset; + cuvec3f c; + float dr = 1.0f; + float signfactor; + int I; + + if(pars.isjulia) + { + c = pars.juliac; + offset = 0.0; + } + else + { + c = pos; + offset = 1.0; + } + signfactor = (pars.scale<0.0f)? 0.95f : 1.0f; + + z = pos; + + for(I=0;I8.0f) + { + break; + } + + float sx,cx,sz,cz,shy,chy; + sx = sinf(z.x); + cx = cosf(z.x); + sz = sinf(z.z); + cz = cosf(z.z); + shy = sinhf(z.y); + chy = coshf(z.y); + + z = cuvec3f(sx*chy,cx*cz*shy,sz*chy)*pars.scale + c; + float stretch = 0.75f*::sqrtf(chy*chy + shy*shy); + if(stretch>0.9f) stretch = 0.9f; + dr = ::fabs(pars.scale)*signfactor*stretch*dr + offset; + float q = 1.0f/dr; + if(q 8.) break; + + float sx = sin(z.x), cx = cos(z.x); + float sz = sin(z.z), cz = cos(z.z); + // shy and chy can be replaced with taylor approximations to save cycles + float shy = sinh(z.y), chy = cosh(z.y); + + z = vec3(sx * chy, cx * cz * shy, sz * chy) * Scale + juliaC; + + // 0.75 and 0.9 are magic constants + float stretch = 0.75 * sqrt(chy * chy + shy * shy); + stretch = max(0.9, stretch); + + dr = abs(Scale) * signFactor * stretch * dr + offset; + de = min(de, 1.0 / dr); + } + return DEMultiplier* de; +} +*/ #endif };//end namespace fractallevelset diff --git a/src/amscudafractallevelset/amscufracls_marchingtets.cu b/src/amscudafractallevelset/amscufracls_marchingtets.cu index 282584a..74c0e09 100644 --- a/src/amscudafractallevelset/amscufracls_marchingtets.cu +++ b/src/amscudafractallevelset/amscufracls_marchingtets.cu @@ -104,6 +104,31 @@ namespace fractallevelset } } + __global__ void marchingtets_gpu_debug2(marchingtet_pars mtpars, float *outbuffer) + { + int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz; + int64_t nt = blockDim.x*gridDim.x; + int64_t tn = threadIdx.x + blockIdx.x*blockDim.x; + int64_t I; + int Ix,Iy,Iz; + + + mtcube cube; + + for(I=tn;I\n",nblocks,nthreads); + if(verbose>=2) + printf("debug: launching marchingtets_gpu_tricount with <%d,%d>\n",nblocks,nthreads); cudaError_t err = cudaSuccess; marchingtets_gpu_tricount<<>>(mtpars,dtricountbuffer); //marchingtets_gpu_debug<<>>(mtpars,ddebugbuffer); @@ -226,7 +251,8 @@ namespace fractallevelset tricount = mt_count_tris_and_accum_offset(htricountbuffer,dtricountbuffer,hoffsetbuffer,doffsetbuffer,nblocks,nthreads); - printf("marchingtets_gpu: triangle count = %ld\n",(long)tricount); + if(verbose>=1) + printf("marchingtets_gpu: triangle count = %ld\n",(long)tricount); mesh->vertices.resize(tricount*9); cudaMalloc(&dvertexbuffer,sizeof(float)*tricount*9); @@ -257,6 +283,67 @@ namespace fractallevelset return; } + int64_t marchingtets_tricount_gpu(marchingtet_pars mtpars, int nblockdiv, int nthreads, int verbose) + { + //marchingtet_pars *dmtpars = NULL; + int *dtricountbuffer = NULL; + int64_t *hoffsetbuffer = NULL; + int64_t *doffsetbuffer = NULL; + float *dvertexbuffer = NULL; + int nblocks; + + int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz; + int *htricountbuffer = NULL; + int64_t tricount = 0; + + nblocks = (int)(((psize/(int64_t)nthreads)+1L)/(int64_t)nblockdiv+1L); + + cudaMalloc(&dtricountbuffer, nblocks*nthreads*sizeof(int)); + htricountbuffer = new(std::nothrow) int[nblocks*nthreads]; + cudaMalloc(&doffsetbuffer, nblocks*nthreads*sizeof(int64_t)); + hoffsetbuffer = new(std::nothrow) int64_t[nblocks*nthreads]; + + //DEBUG + // float *debugbuffer = NULL; + // float *ddebugbuffer = NULL; + // debugbuffer = new(std::nothrow) float[nblocks*nthreads*25]; + // cudaMalloc(&ddebugbuffer,sizeof(float)*nblocks*nthreads*25); + + + if(dtricountbuffer==NULL || htricountbuffer==NULL) + { + printf("marchingtets_gpu error: failed to allocate counting buffers\n"); + marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer); + return 0; + } + + // cudaMalloc(&dmtpars,sizeof(marchingtet_pars)); + // cudaMemcpy(dmtpars,&mtpars,sizeof(marchingtet_pars),cudaMemcpyHostToDevice); + + //printf("debug: launching marchingtets_gpu_tricount with <%d,%d>\n",nblocks,nthreads); + cudaError_t err = cudaSuccess; + marchingtets_gpu_tricount<<>>(mtpars,dtricountbuffer); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("marchingtets_gpu_tricount error: %s\n",cudaGetErrorString(err)); + } + + //mt_debug1(debugbuffer,ddebugbuffer,nblocks,nthreads); + + tricount = mt_count_tris_and_accum_offset(htricountbuffer,dtricountbuffer,hoffsetbuffer,doffsetbuffer,nblocks,nthreads); + + if(verbose>=1) + printf("marchingtets_gpu_tricount: triangle count = %ld\n",(long)tricount); + + marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer); + + + return tricount; + } + + void marchingtets_area_gpu_cleanup( float **hareabuffer, float **dareabuffer @@ -266,18 +353,17 @@ namespace fractallevelset if(*hareabuffer!=NULL) {delete[] *hareabuffer; *hareabuffer=NULL;} } - float marchingtets_area_gpu(marchingtet_pars mtpars) + float marchingtets_area_gpu(marchingtet_pars mtpars, int nblockdiv, int nthreads, int verbose) { //marchingtet_pars *dmtpars = NULL; - int nblocks,nthreads; + int nblocks; float *hareabuffer = NULL; float *dareabuffer = NULL; int I; float areasum = 0.0f; int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz; - nthreads = 256; - nblocks = (int)(((psize/(int64_t)nthreads)+1L)/16L+1L); + nblocks = (int)(((psize/(int64_t)nthreads)+1L)/(int64_t)nblockdiv+1L); cudaMalloc(&dareabuffer, nblocks*nthreads*sizeof(float)); hareabuffer = new(std::nothrow) float[nblocks*nthreads]; diff --git a/src/amscudafractallevelset/amscufracls_parseinputfile.cu b/src/amscudafractallevelset/amscufracls_parseinputfile.cu index 2b071b0..048992e 100644 --- a/src/amscudafractallevelset/amscufracls_parseinputfile.cu +++ b/src/amscudafractallevelset/amscufracls_parseinputfile.cu @@ -96,6 +96,10 @@ namespace fractallevelset amsarray alphanums; alphanums = s.splitalphanum(); + // printf("debug: alphanum[0]='%s'\n",alphanums[0].cstring); + // printf("debug: alphanum[1]='%s'\n",alphanums[1].cstring); + // printf("debug: alphanum[2]='%s'\n",alphanums[2].cstring); + for(I=0;Iat(I)[0]) { + // printf("debug: par[0]='%s'\n",stringpars->at(I)[0].cstring); + // printf("debug: par[1]='%s'\n",stringpars->at(I)[1].cstring); *val = (float) stringpars->at(I)[1].strtonum(); } } @@ -193,6 +199,7 @@ namespace fractallevelset inputpars pars; marchingtet_pars mtpars; main_functionpars mfpars; + bagoftriangles mesh; if(argc<3) { @@ -241,22 +248,55 @@ namespace fractallevelset mtpars.pars = mfpars; //execute model based on operatingmode + if(pars.verbosity>=1) + pars.print(); if(pars.operatingmode==omode_stloutput) { - + marchingtets_gpu(mtpars,&mesh,pars.nblockdiv,pars.nthreads,pars.verbosity); + mesh.save_stl(fnameout.cstring); } else if(pars.operatingmode==omode_counttriangles) { - + int64_t tricount = 0; + tricount = marchingtets_tricount_gpu(mtpars,pars.nblockdiv,pars.nthreads,pars.verbosity); + printf("triangle count = %ld\n",(long)tricount); } else if(pars.operatingmode==omode_countarea) { - + float area = 0.0f; + area = marchingtets_area_gpu(mtpars,pars.nblockdiv,pars.nthreads,pars.verbosity); + printf("area = %1.5g\n",area); } } + __host__ void inputpars::print() + { + printf("inputpars:\n"); + { + printf("\t operatingmode = %d \n",operatingmode); + printf("\t verbosity = %d\n",verbosity); + printf("\t xyz_min = (%1.3g,%1.3g,%1.3g)\n",xyz_min[0],xyz_min[1],xyz_min[2]); + printf("\t xyz_max = (%1.3g,%1.3g,%1.3g)\n",xyz_max[0],xyz_max[1],xyz_max[2]); + printf("\t Nx = %d\n",Nx); + printf("\t Ny = %d\n",Ny); + printf("\t Nz = %d\n",Nz); + printf("\t nblockdiv = %d\n",nblockdiv); + printf("\t nthreads = %d\n",nthreads); + printf("\t isjulia = %d\n",(int)isjulia); + printf("\t juliac = (%1.3g,%1.3g,%1.3g)\n",juliac[0],juliac[1],juliac[2]); + printf("\t exponent = %1.3f\n",exponent); + printf("\t fractaliter = %d\n",fractaliter); + printf("\t threshold = %1.3f\n",threshold); + printf("\t freezethresh = %1.3f\n",freezethresh); + printf("\t scale = %1.3f\n",scale); + + + + } + } + };//end namespace fractallevelset