This commit is contained in:
2026-04-28 15:40:11 -04:00
parent 23fec20140
commit 739c8d3bb4
16 changed files with 2554447 additions and 16 deletions

Binary file not shown.

Binary file not shown.

2554106
build_linux64/testout.stl Normal file

File diff suppressed because it is too large Load Diff

View File

@ -53,7 +53,7 @@ struct main_functionpars
float freezethresh;
};
float main_function(cuvec3f pos, main_functionpars &pars);
__host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars);
@ -139,7 +139,7 @@ class mtcube
__device__ __host__ void eval_function(main_functionpars &pars); //calls a function under consideration at each of the 15 node points
__device__ __host__ int count_triangles();
__device__ __host__ void fill_trianglebuffer(float *buffer, int &offset); //fills from offset to offset+ntris*3*3 with the triangles calculated from fevals. Increments offset.
__device__ __host__ void fill_trianglebuffer(float *buffer, int64_t &offset); //fills from offset to offset+ntris*3*3 with the triangles calculated from fevals. Increments offset.
__device__ __host__ float count_triarea(); //counts triangle area without filling a buffer or retaining the mesh
__device__ __host__ mtcube();

View File

@ -46,7 +46,7 @@ namespace fractallevelset
N = vertices.length/9;
for(I=0;I<N;I++)
{
_local_get_tri(vertices.data,I*9);
tri = _local_get_tri(vertices.data,I*9);
normal = tri.normal();
fprintf(fp,"facet normal %1.6g %1.6g %1.6g\n",normal[0],normal[1],normal[2]);
fprintf(fp,"\touter loop\n");

View File

@ -5,13 +5,14 @@ namespace amscuda
namespace fractallevelset
{
float main_function(cuvec3f pos, main_functionpars &pars)
__host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars)
{
float ret = 0.0f;
ret = pos[0]*pos[0]+pos[1]*pos[1]+pos[2]*pos[2]-pars.threshold;
return ret;
return -ret;
};

View File

@ -9,26 +9,325 @@ namespace fractallevelset
}
__device__ __host__ void mtgetcube(const marchingtet_pars &mtpars, int64_t I, mtcube &cubeout)
{
int Ix,Iy,Iz;
float dx,dy,dz;
Ix = (int)(I%mtpars.Nx);
Iy = (int)((I/mtpars.Nx)%mtpars.Ny);
Iz = (int)((I/(mtpars.Nx*mtpars.Ny)) % mtpars.Nz);
dx = (mtpars.xyz_max[0]-mtpars.xyz_min[0])/((float)mtpars.Nx);
dy = (mtpars.xyz_max[1]-mtpars.xyz_min[1])/((float)mtpars.Ny);
dz = (mtpars.xyz_max[2]-mtpars.xyz_min[2])/((float)mtpars.Nz);
cubeout.xyz_min = cuvec3f(mtpars.xyz_min[0] + Ix*dx, mtpars.xyz_min[1] + Iy*dy, mtpars.xyz_min[2] + Iz*dz);
cubeout.xyz_max = cuvec3f(mtpars.xyz_min[0] + (Ix+1)*dx, mtpars.xyz_min[1] + (Iy+1)*dy, mtpars.xyz_min[2] + (Iz+1)*dz);
}
__global__ void marchingtets_gpu_tricount(marchingtet_pars mtpars, int *countbuffer)
{
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
int64_t nt = blockDim.x*gridDim.x;
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
int64_t I;
mtcube cube;
countbuffer[tn] = 0;
for(I=tn;I<psize;I+=nt)
{
mtgetcube(mtpars,I,cube);
cube.eval_function(mtpars.pars);
countbuffer[tn] += cube.count_triangles();
}
}
__global__ void marchingtets_gpu_meshfill(marchingtet_pars mtpars, float *vertexbuffer)
__global__ void marchingtets_gpu_debug(marchingtet_pars mtpars, float *debugbuffer)
{
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
int64_t nt = blockDim.x*gridDim.x;
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
int64_t I;
int J;
mtcube cube;
for(J=0;J<15;J++)
debugbuffer[tn*15+J] = 0;
for(I=tn;I<psize;I+=nt)
{
mtgetcube(mtpars,I,cube);
cube.eval_function(mtpars.pars);
// debugbuffer[tn*15+0] = cube.xyz_min[0];
// debugbuffer[tn*15+1] = cube.xyz_min[1];
// debugbuffer[tn*15+2] = cube.xyz_min[2];
// debugbuffer[tn*15+3] = cube.xyz_max[0];
// debugbuffer[tn*15+4] = cube.xyz_max[1];
// debugbuffer[tn*15+5] = cube.xyz_max[2];
// debugbuffer[tn*15+6] = mtpars.pars.threshold;
// debugbuffer[tn*15+7] = mtpars.Nx;
// debugbuffer[tn*15+8] = I;
// debugbuffer[tn*15+9] = psize;
for(J=0;J<15;J++)
{
debugbuffer[tn*15+J] = cube.fevals[J];
}
}
}
__global__ void marchingtets_gpu_meshfill(marchingtet_pars mtpars, const int64_t *offsetbuffer, float *vertexbuffer)
{
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
int64_t nt = blockDim.x*gridDim.x;
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
int64_t I;
mtcube cube;
int64_t offset = offsetbuffer[tn];
for(I=tn;I<psize;I+=nt)
{
mtgetcube(mtpars,I,cube);
cube.eval_function(mtpars.pars);
cube.fill_trianglebuffer(vertexbuffer,offset); //this should be incrementing the offset
}
}
__global__ void marchingtets_gpu_areacount(marchingtet_pars mtpars, float *areabuffer)
{
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
int64_t nt = blockDim.x*gridDim.x;
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
int64_t I;
mtcube cube;
areabuffer[tn] = 0.0f;
for(I=tn;I<psize;I+=nt)
{
mtgetcube(mtpars,I,cube);
cube.eval_function(mtpars.pars);
areabuffer[tn] += cube.count_triarea();
}
}
void marchingtets_gpu_cleanup(int **dtricountbuffer, int **htricountbuffer, float **dvertexbuffer, int64_t **doffsetbuffer, int64_t **hoffsetbuffer)
{
if(*dtricountbuffer!=NULL) {cudaFree(*dtricountbuffer); *dtricountbuffer=NULL;}
if(*htricountbuffer!=NULL) {delete[] *htricountbuffer; *htricountbuffer=NULL;}
if(*doffsetbuffer!=NULL) {cudaFree(*doffsetbuffer); *doffsetbuffer=NULL;}
if(*hoffsetbuffer!=NULL) {delete[] *hoffsetbuffer; *hoffsetbuffer=NULL;}
if(*dvertexbuffer!=NULL) {cudaFree(*dvertexbuffer); *dvertexbuffer=NULL;}
//if(*dmtpars!=NULL) {cudaFree(*dmtpars); *dmtpars=NULL;}
}
int64_t mt_count_tris_and_accum_offset(
int *htricountbuffer,
int *dtricountbuffer,
int64_t *hoffsetbuffer,
int64_t *doffsetbuffer,
int nblocks, int nthreads
)
{
int I;
int64_t cnt;
cudaMemcpy(htricountbuffer,dtricountbuffer,sizeof(int)*nblocks*nthreads,cudaMemcpyDeviceToHost);
cnt = 0;
for(I=0;I<nblocks*nthreads;I++)
{
hoffsetbuffer[I] = cnt*9;
cnt += htricountbuffer[I];
//printf("debug: hoffsetbuffer[%d]=%ld\n",I,hoffsetbuffer[I]);
}
cudaMemcpy(doffsetbuffer,hoffsetbuffer,sizeof(int64_t)*nblocks*nthreads,cudaMemcpyHostToDevice);
return cnt;
}
void mt_debug1(float *debugbuffer, float *ddebugbuffer, int nblocks, int nthreads)
{
int I;
cudaMemcpy(debugbuffer,ddebugbuffer,sizeof(float)*nblocks*nthreads,cudaMemcpyDeviceToHost);
for(I=0;I<nblocks*nthreads;I++)
{
printf("debug buffer [%d]: %1.3f\n",I,debugbuffer[I]);
}
return;
}
void marchingtets_gpu(marchingtet_pars mtpars, bagoftriangles *mesh)
{
//marchingtet_pars *dmtpars = NULL;
int *dtricountbuffer = NULL;
float *dvertexbuffer = NULL;
int64_t *hoffsetbuffer = NULL;
int64_t *doffsetbuffer = NULL;
int nblocks,nthreads;
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
int *htricountbuffer = NULL;
int64_t tricount;
nthreads = 256;
nblocks = (int)(((psize/(int64_t)nthreads)+1L)/16L+1L);
cudaMalloc(&dtricountbuffer, nblocks*nthreads*sizeof(int));
htricountbuffer = new(std::nothrow) int[nblocks*nthreads];
cudaMalloc(&doffsetbuffer, nblocks*nthreads*sizeof(int64_t));
hoffsetbuffer = new(std::nothrow) int64_t[nblocks*nthreads];
//DEBUG
// float *debugbuffer = NULL;
// float *ddebugbuffer = NULL;
// debugbuffer = new(std::nothrow) float[nblocks*nthreads*25];
// cudaMalloc(&ddebugbuffer,sizeof(float)*nblocks*nthreads*25);
if(dtricountbuffer==NULL || htricountbuffer==NULL)
{
printf("marchingtets_gpu error: failed to allocate counting buffers\n");
marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer);
return;
}
// cudaMalloc(&dmtpars,sizeof(marchingtet_pars));
// cudaMemcpy(dmtpars,&mtpars,sizeof(marchingtet_pars),cudaMemcpyHostToDevice);
printf("debug: launching marchingtets_gpu_tricount with <%d,%d>\n",nblocks,nthreads);
cudaError_t err = cudaSuccess;
marchingtets_gpu_tricount<<<nblocks,nthreads>>>(mtpars,dtricountbuffer);
//marchingtets_gpu_debug<<<nblocks,nthreads>>>(mtpars,ddebugbuffer);
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("marchingtets_gpu_tricount error: %s\n",cudaGetErrorString(err));
}
//mt_debug1(debugbuffer,ddebugbuffer,nblocks,nthreads);
tricount = mt_count_tris_and_accum_offset(htricountbuffer,dtricountbuffer,hoffsetbuffer,doffsetbuffer,nblocks,nthreads);
printf("marchingtets_gpu: triangle count = %ld\n",(long)tricount);
mesh->vertices.resize(tricount*9);
cudaMalloc(&dvertexbuffer,sizeof(float)*tricount*9);
marchingtets_gpu_meshfill<<<nblocks,nthreads>>>(mtpars,doffsetbuffer,dvertexbuffer);
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("marchingtets_gpu_meshfill error: %s\n",cudaGetErrorString(err));
}
//DEBUG
// int I;
// for(I=0;I<tricount*3;I++)
// {
// printf("vertex[%d]: (%1.3f, %1.3f, %1.3f)\n",I,
// mesh->vertices[I*3+0],mesh->vertices[I*3+1],mesh->vertices[I*3+2]);
// }
cudaMemcpy(mesh->vertices.data,dvertexbuffer,sizeof(float)*tricount*9,cudaMemcpyDeviceToHost);
//DEBUG
// delete[] debugbuffer; debugbuffer=NULL;
// cudaFree(ddebugbuffer); ddebugbuffer = NULL;
marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer);
return;
}
void marchingtets_area_gpu_cleanup(
float **hareabuffer,
float **dareabuffer
)
{
if(*dareabuffer!=NULL) {cudaFree(*dareabuffer); *dareabuffer=NULL;}
if(*hareabuffer!=NULL) {delete[] *hareabuffer; *hareabuffer=NULL;}
}
float marchingtets_area_gpu(marchingtet_pars mtpars)
{
//marchingtet_pars *dmtpars = NULL;
int nblocks,nthreads;
float *hareabuffer = NULL;
float *dareabuffer = NULL;
int I;
float areasum = 0.0f;
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
nthreads = 256;
nblocks = (int)(((psize/(int64_t)nthreads)+1L)/16L+1L);
cudaMalloc(&dareabuffer, nblocks*nthreads*sizeof(float));
hareabuffer = new(std::nothrow) float[nblocks*nthreads];
//DEBUG
// float *debugbuffer = NULL;
// float *ddebugbuffer = NULL;
// debugbuffer = new(std::nothrow) float[nblocks*nthreads*25];
// cudaMalloc(&ddebugbuffer,sizeof(float)*nblocks*nthreads*25);
if(dareabuffer==NULL || hareabuffer==NULL)
{
printf("marchingtets_gpu error: failed to allocate counting buffers\n");
marchingtets_area_gpu_cleanup(&hareabuffer,&dareabuffer);
return 0.0f;
}
marchingtets_gpu_areacount<<<nblocks,nthreads>>>(mtpars,dareabuffer);
printf("debug: launching marchingtets_gpu_areacount with <%d,%d>\n",nblocks,nthreads);
cudaError_t err = cudaSuccess;
cudaDeviceSynchronize();
err = cudaGetLastError();
if(err!=cudaSuccess)
{
printf("marchingtets_gpu_areacount error: %s\n",cudaGetErrorString(err));
}
cudaMemcpy(hareabuffer,dareabuffer,nblocks*nthreads*sizeof(float),cudaMemcpyDeviceToHost);
for(I=0;I<nblocks*nthreads;I++)
{
areasum += hareabuffer[I];
}
//DEBUG
// int I;
// for(I=0;I<tricount*3;I++)
// {
// printf("vertex[%d]: (%1.3f, %1.3f, %1.3f)\n",I,
// mesh->vertices[I*3+0],mesh->vertices[I*3+1],mesh->vertices[I*3+2]);
// }
//DEBUG
// delete[] debugbuffer; debugbuffer=NULL;
// cudaFree(ddebugbuffer); ddebugbuffer = NULL;
marchingtets_area_gpu_cleanup(&hareabuffer,&dareabuffer);
return areasum;
}
void marchingtets_cpu_tricount(int thread, int nthreads, marchingtet_pars mtpars, int *countbuffer)
@ -60,19 +359,33 @@ namespace fractallevelset
bagoftriangles mesh;
main_functionpars fpars;
fpars.fractaliter = 64;
fpars.threshold = 2.0f;
fpars.threshold = 0.5f;
fpars.freezethresh = 10.0f;
marchingtet_pars mtpars;
mtpars.xyz_min = cuvec3f(-1.2,-1.2,-1.2);
mtpars.xyz_max = cuvec3f(1.2,1.2,1.2);
mtpars.Nx = 5;
mtpars.Ny = 5;
mtpars.Nz = 5;
mtpars.Nx = 125;
mtpars.Ny = 125;
mtpars.Nz = 125;
mtpars.pars = fpars;
float t0,t1, area;
t0 = util::time_msec();
marchingtets_gpu(mtpars,&mesh);
t1 = util::time_msec();
printf("marchingtets_gpu: %1.3f msec runtime %d psize\n",t1-t0,mtpars.Nx*mtpars.Ny*mtpars.Nz);
t0 = util::time_msec();
area = marchingtets_area_gpu(mtpars);
t1 = util::time_msec();
printf("marchingtets_area_gpu: %1.3f msec runtime %d psize\n",t1-t0,mtpars.Nx*mtpars.Ny*mtpars.Nz);
printf("area=%1.3f, area/4pi=%1.3f\n",area,area/4.0f/pif);
mesh.save_stl(fnameout);
}

View File

@ -600,12 +600,13 @@ namespace fractallevelset
ret = 2;
tri1.p0 = mtlmpoint(tet.p0,tet.p1,fevals.eval0,fevals.eval1);
tri1.p1 = mtlmpoint(tet.p0,tet.p3,fevals.eval0,fevals.eval3);
tri1.p2 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
tri1.p1 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
tri1.p2 = mtlmpoint(tet.p0,tet.p3,fevals.eval0,fevals.eval3);
tri2.p0 = mtlmpoint(tet.p1,tet.p2,fevals.eval1,fevals.eval2);
tri2.p1 = mtlmpoint(tet.p0,tet.p1,fevals.eval0,fevals.eval1);
tri2.p2 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
tri2.p1 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
tri2.p2 = mtlmpoint(tet.p0,tet.p1,fevals.eval0,fevals.eval1);
}
else if(!os1 && !os2 && os3)
{
@ -667,7 +668,7 @@ namespace fractallevelset
//fills from offset to offset+ntris*3*3 with the triangles calculated from fevals
//increments offset
__device__ __host__ void mtcube::fill_trianglebuffer(float *buffer, int &offset)
__device__ __host__ void mtcube::fill_trianglebuffer(float *buffer, int64_t &offset)
{
int I;
triangle t1,t2;
@ -818,6 +819,16 @@ namespace fractallevelset
return;
}
__device__ __host__ void mtcube::eval_function(main_functionpars &pars)
{
//calls a function under consideration at each of the 15 node points
int I;
for(I=0;I<15;I++)
{
fevals[I] = main_function(get_point(I),pars);
}
}
};//end namespace fractallevelset
};// end namespace amscuda