bla
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
build_linux64/template.blend
Normal file
BIN
build_linux64/template.blend
Normal file
Binary file not shown.
Binary file not shown.
2554106
build_linux64/testout.stl
Normal file
2554106
build_linux64/testout.stl
Normal file
File diff suppressed because it is too large
Load Diff
@ -53,7 +53,7 @@ struct main_functionpars
|
||||
float freezethresh;
|
||||
};
|
||||
|
||||
float main_function(cuvec3f pos, main_functionpars &pars);
|
||||
__host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars);
|
||||
|
||||
|
||||
|
||||
@ -139,7 +139,7 @@ class mtcube
|
||||
__device__ __host__ void eval_function(main_functionpars &pars); //calls a function under consideration at each of the 15 node points
|
||||
|
||||
__device__ __host__ int count_triangles();
|
||||
__device__ __host__ void fill_trianglebuffer(float *buffer, int &offset); //fills from offset to offset+ntris*3*3 with the triangles calculated from fevals. Increments offset.
|
||||
__device__ __host__ void fill_trianglebuffer(float *buffer, int64_t &offset); //fills from offset to offset+ntris*3*3 with the triangles calculated from fevals. Increments offset.
|
||||
__device__ __host__ float count_triarea(); //counts triangle area without filling a buffer or retaining the mesh
|
||||
|
||||
__device__ __host__ mtcube();
|
||||
|
||||
@ -46,7 +46,7 @@ namespace fractallevelset
|
||||
N = vertices.length/9;
|
||||
for(I=0;I<N;I++)
|
||||
{
|
||||
_local_get_tri(vertices.data,I*9);
|
||||
tri = _local_get_tri(vertices.data,I*9);
|
||||
normal = tri.normal();
|
||||
fprintf(fp,"facet normal %1.6g %1.6g %1.6g\n",normal[0],normal[1],normal[2]);
|
||||
fprintf(fp,"\touter loop\n");
|
||||
|
||||
@ -5,13 +5,14 @@ namespace amscuda
|
||||
namespace fractallevelset
|
||||
{
|
||||
|
||||
float main_function(cuvec3f pos, main_functionpars &pars)
|
||||
|
||||
__host__ __device__ float main_function(cuvec3f pos, main_functionpars &pars)
|
||||
{
|
||||
float ret = 0.0f;
|
||||
|
||||
ret = pos[0]*pos[0]+pos[1]*pos[1]+pos[2]*pos[2]-pars.threshold;
|
||||
|
||||
|
||||
return ret;
|
||||
return -ret;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -9,26 +9,325 @@ namespace fractallevelset
|
||||
|
||||
}
|
||||
|
||||
__device__ __host__ void mtgetcube(const marchingtet_pars &mtpars, int64_t I, mtcube &cubeout)
|
||||
{
|
||||
int Ix,Iy,Iz;
|
||||
float dx,dy,dz;
|
||||
Ix = (int)(I%mtpars.Nx);
|
||||
Iy = (int)((I/mtpars.Nx)%mtpars.Ny);
|
||||
Iz = (int)((I/(mtpars.Nx*mtpars.Ny)) % mtpars.Nz);
|
||||
|
||||
dx = (mtpars.xyz_max[0]-mtpars.xyz_min[0])/((float)mtpars.Nx);
|
||||
dy = (mtpars.xyz_max[1]-mtpars.xyz_min[1])/((float)mtpars.Ny);
|
||||
dz = (mtpars.xyz_max[2]-mtpars.xyz_min[2])/((float)mtpars.Nz);
|
||||
|
||||
cubeout.xyz_min = cuvec3f(mtpars.xyz_min[0] + Ix*dx, mtpars.xyz_min[1] + Iy*dy, mtpars.xyz_min[2] + Iz*dz);
|
||||
cubeout.xyz_max = cuvec3f(mtpars.xyz_min[0] + (Ix+1)*dx, mtpars.xyz_min[1] + (Iy+1)*dy, mtpars.xyz_min[2] + (Iz+1)*dz);
|
||||
}
|
||||
|
||||
|
||||
__global__ void marchingtets_gpu_tricount(marchingtet_pars mtpars, int *countbuffer)
|
||||
{
|
||||
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
|
||||
int64_t nt = blockDim.x*gridDim.x;
|
||||
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int64_t I;
|
||||
|
||||
|
||||
mtcube cube;
|
||||
|
||||
countbuffer[tn] = 0;
|
||||
|
||||
for(I=tn;I<psize;I+=nt)
|
||||
{
|
||||
mtgetcube(mtpars,I,cube);
|
||||
cube.eval_function(mtpars.pars);
|
||||
countbuffer[tn] += cube.count_triangles();
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void marchingtets_gpu_meshfill(marchingtet_pars mtpars, float *vertexbuffer)
|
||||
__global__ void marchingtets_gpu_debug(marchingtet_pars mtpars, float *debugbuffer)
|
||||
{
|
||||
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
|
||||
int64_t nt = blockDim.x*gridDim.x;
|
||||
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int64_t I;
|
||||
int J;
|
||||
|
||||
|
||||
mtcube cube;
|
||||
|
||||
for(J=0;J<15;J++)
|
||||
debugbuffer[tn*15+J] = 0;
|
||||
|
||||
for(I=tn;I<psize;I+=nt)
|
||||
{
|
||||
mtgetcube(mtpars,I,cube);
|
||||
cube.eval_function(mtpars.pars);
|
||||
|
||||
// debugbuffer[tn*15+0] = cube.xyz_min[0];
|
||||
// debugbuffer[tn*15+1] = cube.xyz_min[1];
|
||||
// debugbuffer[tn*15+2] = cube.xyz_min[2];
|
||||
// debugbuffer[tn*15+3] = cube.xyz_max[0];
|
||||
// debugbuffer[tn*15+4] = cube.xyz_max[1];
|
||||
// debugbuffer[tn*15+5] = cube.xyz_max[2];
|
||||
// debugbuffer[tn*15+6] = mtpars.pars.threshold;
|
||||
// debugbuffer[tn*15+7] = mtpars.Nx;
|
||||
// debugbuffer[tn*15+8] = I;
|
||||
// debugbuffer[tn*15+9] = psize;
|
||||
for(J=0;J<15;J++)
|
||||
{
|
||||
debugbuffer[tn*15+J] = cube.fevals[J];
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void marchingtets_gpu_meshfill(marchingtet_pars mtpars, const int64_t *offsetbuffer, float *vertexbuffer)
|
||||
{
|
||||
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
|
||||
int64_t nt = blockDim.x*gridDim.x;
|
||||
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int64_t I;
|
||||
|
||||
mtcube cube;
|
||||
|
||||
int64_t offset = offsetbuffer[tn];
|
||||
|
||||
for(I=tn;I<psize;I+=nt)
|
||||
{
|
||||
mtgetcube(mtpars,I,cube);
|
||||
cube.eval_function(mtpars.pars);
|
||||
cube.fill_trianglebuffer(vertexbuffer,offset); //this should be incrementing the offset
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void marchingtets_gpu_areacount(marchingtet_pars mtpars, float *areabuffer)
|
||||
{
|
||||
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
|
||||
int64_t nt = blockDim.x*gridDim.x;
|
||||
int64_t tn = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int64_t I;
|
||||
|
||||
mtcube cube;
|
||||
|
||||
areabuffer[tn] = 0.0f;
|
||||
|
||||
for(I=tn;I<psize;I+=nt)
|
||||
{
|
||||
mtgetcube(mtpars,I,cube);
|
||||
cube.eval_function(mtpars.pars);
|
||||
areabuffer[tn] += cube.count_triarea();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void marchingtets_gpu_cleanup(int **dtricountbuffer, int **htricountbuffer, float **dvertexbuffer, int64_t **doffsetbuffer, int64_t **hoffsetbuffer)
|
||||
{
|
||||
if(*dtricountbuffer!=NULL) {cudaFree(*dtricountbuffer); *dtricountbuffer=NULL;}
|
||||
if(*htricountbuffer!=NULL) {delete[] *htricountbuffer; *htricountbuffer=NULL;}
|
||||
if(*doffsetbuffer!=NULL) {cudaFree(*doffsetbuffer); *doffsetbuffer=NULL;}
|
||||
if(*hoffsetbuffer!=NULL) {delete[] *hoffsetbuffer; *hoffsetbuffer=NULL;}
|
||||
if(*dvertexbuffer!=NULL) {cudaFree(*dvertexbuffer); *dvertexbuffer=NULL;}
|
||||
//if(*dmtpars!=NULL) {cudaFree(*dmtpars); *dmtpars=NULL;}
|
||||
}
|
||||
|
||||
int64_t mt_count_tris_and_accum_offset(
|
||||
int *htricountbuffer,
|
||||
int *dtricountbuffer,
|
||||
int64_t *hoffsetbuffer,
|
||||
int64_t *doffsetbuffer,
|
||||
int nblocks, int nthreads
|
||||
)
|
||||
{
|
||||
int I;
|
||||
int64_t cnt;
|
||||
cudaMemcpy(htricountbuffer,dtricountbuffer,sizeof(int)*nblocks*nthreads,cudaMemcpyDeviceToHost);
|
||||
cnt = 0;
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
hoffsetbuffer[I] = cnt*9;
|
||||
cnt += htricountbuffer[I];
|
||||
|
||||
//printf("debug: hoffsetbuffer[%d]=%ld\n",I,hoffsetbuffer[I]);
|
||||
}
|
||||
|
||||
cudaMemcpy(doffsetbuffer,hoffsetbuffer,sizeof(int64_t)*nblocks*nthreads,cudaMemcpyHostToDevice);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
void mt_debug1(float *debugbuffer, float *ddebugbuffer, int nblocks, int nthreads)
|
||||
{
|
||||
int I;
|
||||
cudaMemcpy(debugbuffer,ddebugbuffer,sizeof(float)*nblocks*nthreads,cudaMemcpyDeviceToHost);
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
printf("debug buffer [%d]: %1.3f\n",I,debugbuffer[I]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void marchingtets_gpu(marchingtet_pars mtpars, bagoftriangles *mesh)
|
||||
{
|
||||
//marchingtet_pars *dmtpars = NULL;
|
||||
int *dtricountbuffer = NULL;
|
||||
float *dvertexbuffer = NULL;
|
||||
int64_t *hoffsetbuffer = NULL;
|
||||
int64_t *doffsetbuffer = NULL;
|
||||
int nblocks,nthreads;
|
||||
|
||||
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
|
||||
int *htricountbuffer = NULL;
|
||||
int64_t tricount;
|
||||
|
||||
nthreads = 256;
|
||||
nblocks = (int)(((psize/(int64_t)nthreads)+1L)/16L+1L);
|
||||
|
||||
cudaMalloc(&dtricountbuffer, nblocks*nthreads*sizeof(int));
|
||||
htricountbuffer = new(std::nothrow) int[nblocks*nthreads];
|
||||
cudaMalloc(&doffsetbuffer, nblocks*nthreads*sizeof(int64_t));
|
||||
hoffsetbuffer = new(std::nothrow) int64_t[nblocks*nthreads];
|
||||
|
||||
//DEBUG
|
||||
// float *debugbuffer = NULL;
|
||||
// float *ddebugbuffer = NULL;
|
||||
// debugbuffer = new(std::nothrow) float[nblocks*nthreads*25];
|
||||
// cudaMalloc(&ddebugbuffer,sizeof(float)*nblocks*nthreads*25);
|
||||
|
||||
|
||||
if(dtricountbuffer==NULL || htricountbuffer==NULL)
|
||||
{
|
||||
printf("marchingtets_gpu error: failed to allocate counting buffers\n");
|
||||
marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer);
|
||||
return;
|
||||
}
|
||||
|
||||
// cudaMalloc(&dmtpars,sizeof(marchingtet_pars));
|
||||
// cudaMemcpy(dmtpars,&mtpars,sizeof(marchingtet_pars),cudaMemcpyHostToDevice);
|
||||
|
||||
printf("debug: launching marchingtets_gpu_tricount with <%d,%d>\n",nblocks,nthreads);
|
||||
cudaError_t err = cudaSuccess;
|
||||
marchingtets_gpu_tricount<<<nblocks,nthreads>>>(mtpars,dtricountbuffer);
|
||||
//marchingtets_gpu_debug<<<nblocks,nthreads>>>(mtpars,ddebugbuffer);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("marchingtets_gpu_tricount error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
//mt_debug1(debugbuffer,ddebugbuffer,nblocks,nthreads);
|
||||
|
||||
tricount = mt_count_tris_and_accum_offset(htricountbuffer,dtricountbuffer,hoffsetbuffer,doffsetbuffer,nblocks,nthreads);
|
||||
|
||||
printf("marchingtets_gpu: triangle count = %ld\n",(long)tricount);
|
||||
|
||||
mesh->vertices.resize(tricount*9);
|
||||
cudaMalloc(&dvertexbuffer,sizeof(float)*tricount*9);
|
||||
|
||||
marchingtets_gpu_meshfill<<<nblocks,nthreads>>>(mtpars,doffsetbuffer,dvertexbuffer);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("marchingtets_gpu_meshfill error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
//DEBUG
|
||||
// int I;
|
||||
// for(I=0;I<tricount*3;I++)
|
||||
// {
|
||||
// printf("vertex[%d]: (%1.3f, %1.3f, %1.3f)\n",I,
|
||||
// mesh->vertices[I*3+0],mesh->vertices[I*3+1],mesh->vertices[I*3+2]);
|
||||
// }
|
||||
|
||||
cudaMemcpy(mesh->vertices.data,dvertexbuffer,sizeof(float)*tricount*9,cudaMemcpyDeviceToHost);
|
||||
|
||||
//DEBUG
|
||||
// delete[] debugbuffer; debugbuffer=NULL;
|
||||
// cudaFree(ddebugbuffer); ddebugbuffer = NULL;
|
||||
|
||||
marchingtets_gpu_cleanup(&dtricountbuffer,&htricountbuffer,&dvertexbuffer,&doffsetbuffer,&hoffsetbuffer);
|
||||
return;
|
||||
}
|
||||
|
||||
void marchingtets_area_gpu_cleanup(
|
||||
float **hareabuffer,
|
||||
float **dareabuffer
|
||||
)
|
||||
{
|
||||
if(*dareabuffer!=NULL) {cudaFree(*dareabuffer); *dareabuffer=NULL;}
|
||||
if(*hareabuffer!=NULL) {delete[] *hareabuffer; *hareabuffer=NULL;}
|
||||
}
|
||||
|
||||
float marchingtets_area_gpu(marchingtet_pars mtpars)
|
||||
{
|
||||
//marchingtet_pars *dmtpars = NULL;
|
||||
int nblocks,nthreads;
|
||||
float *hareabuffer = NULL;
|
||||
float *dareabuffer = NULL;
|
||||
int I;
|
||||
float areasum = 0.0f;
|
||||
int64_t psize = (int64_t)mtpars.Nx*(int64_t)mtpars.Ny*(int64_t)mtpars.Nz;
|
||||
|
||||
nthreads = 256;
|
||||
nblocks = (int)(((psize/(int64_t)nthreads)+1L)/16L+1L);
|
||||
|
||||
cudaMalloc(&dareabuffer, nblocks*nthreads*sizeof(float));
|
||||
hareabuffer = new(std::nothrow) float[nblocks*nthreads];
|
||||
|
||||
|
||||
//DEBUG
|
||||
// float *debugbuffer = NULL;
|
||||
// float *ddebugbuffer = NULL;
|
||||
// debugbuffer = new(std::nothrow) float[nblocks*nthreads*25];
|
||||
// cudaMalloc(&ddebugbuffer,sizeof(float)*nblocks*nthreads*25);
|
||||
|
||||
|
||||
if(dareabuffer==NULL || hareabuffer==NULL)
|
||||
{
|
||||
printf("marchingtets_gpu error: failed to allocate counting buffers\n");
|
||||
marchingtets_area_gpu_cleanup(&hareabuffer,&dareabuffer);
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
marchingtets_gpu_areacount<<<nblocks,nthreads>>>(mtpars,dareabuffer);
|
||||
printf("debug: launching marchingtets_gpu_areacount with <%d,%d>\n",nblocks,nthreads);
|
||||
cudaError_t err = cudaSuccess;
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("marchingtets_gpu_areacount error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
cudaMemcpy(hareabuffer,dareabuffer,nblocks*nthreads*sizeof(float),cudaMemcpyDeviceToHost);
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
areasum += hareabuffer[I];
|
||||
}
|
||||
|
||||
//DEBUG
|
||||
// int I;
|
||||
// for(I=0;I<tricount*3;I++)
|
||||
// {
|
||||
// printf("vertex[%d]: (%1.3f, %1.3f, %1.3f)\n",I,
|
||||
// mesh->vertices[I*3+0],mesh->vertices[I*3+1],mesh->vertices[I*3+2]);
|
||||
// }
|
||||
|
||||
|
||||
//DEBUG
|
||||
// delete[] debugbuffer; debugbuffer=NULL;
|
||||
// cudaFree(ddebugbuffer); ddebugbuffer = NULL;
|
||||
|
||||
marchingtets_area_gpu_cleanup(&hareabuffer,&dareabuffer);
|
||||
return areasum;
|
||||
}
|
||||
|
||||
void marchingtets_cpu_tricount(int thread, int nthreads, marchingtet_pars mtpars, int *countbuffer)
|
||||
@ -60,19 +359,33 @@ namespace fractallevelset
|
||||
bagoftriangles mesh;
|
||||
main_functionpars fpars;
|
||||
fpars.fractaliter = 64;
|
||||
fpars.threshold = 2.0f;
|
||||
fpars.threshold = 0.5f;
|
||||
fpars.freezethresh = 10.0f;
|
||||
|
||||
marchingtet_pars mtpars;
|
||||
mtpars.xyz_min = cuvec3f(-1.2,-1.2,-1.2);
|
||||
mtpars.xyz_max = cuvec3f(1.2,1.2,1.2);
|
||||
mtpars.Nx = 5;
|
||||
mtpars.Ny = 5;
|
||||
mtpars.Nz = 5;
|
||||
mtpars.Nx = 125;
|
||||
mtpars.Ny = 125;
|
||||
mtpars.Nz = 125;
|
||||
mtpars.pars = fpars;
|
||||
|
||||
float t0,t1, area;
|
||||
|
||||
t0 = util::time_msec();
|
||||
marchingtets_gpu(mtpars,&mesh);
|
||||
t1 = util::time_msec();
|
||||
|
||||
printf("marchingtets_gpu: %1.3f msec runtime %d psize\n",t1-t0,mtpars.Nx*mtpars.Ny*mtpars.Nz);
|
||||
|
||||
t0 = util::time_msec();
|
||||
area = marchingtets_area_gpu(mtpars);
|
||||
t1 = util::time_msec();
|
||||
|
||||
printf("marchingtets_area_gpu: %1.3f msec runtime %d psize\n",t1-t0,mtpars.Nx*mtpars.Ny*mtpars.Nz);
|
||||
printf("area=%1.3f, area/4pi=%1.3f\n",area,area/4.0f/pif);
|
||||
|
||||
mesh.save_stl(fnameout);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -600,12 +600,13 @@ namespace fractallevelset
|
||||
|
||||
ret = 2;
|
||||
tri1.p0 = mtlmpoint(tet.p0,tet.p1,fevals.eval0,fevals.eval1);
|
||||
tri1.p1 = mtlmpoint(tet.p0,tet.p3,fevals.eval0,fevals.eval3);
|
||||
tri1.p2 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
|
||||
tri1.p1 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
|
||||
tri1.p2 = mtlmpoint(tet.p0,tet.p3,fevals.eval0,fevals.eval3);
|
||||
|
||||
tri2.p0 = mtlmpoint(tet.p1,tet.p2,fevals.eval1,fevals.eval2);
|
||||
tri2.p1 = mtlmpoint(tet.p0,tet.p1,fevals.eval0,fevals.eval1);
|
||||
tri2.p2 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
|
||||
tri2.p1 = mtlmpoint(tet.p3,tet.p2,fevals.eval3,fevals.eval2);
|
||||
tri2.p2 = mtlmpoint(tet.p0,tet.p1,fevals.eval0,fevals.eval1);
|
||||
|
||||
}
|
||||
else if(!os1 && !os2 && os3)
|
||||
{
|
||||
@ -667,7 +668,7 @@ namespace fractallevelset
|
||||
|
||||
//fills from offset to offset+ntris*3*3 with the triangles calculated from fevals
|
||||
//increments offset
|
||||
__device__ __host__ void mtcube::fill_trianglebuffer(float *buffer, int &offset)
|
||||
__device__ __host__ void mtcube::fill_trianglebuffer(float *buffer, int64_t &offset)
|
||||
{
|
||||
int I;
|
||||
triangle t1,t2;
|
||||
@ -819,5 +820,15 @@ namespace fractallevelset
|
||||
return;
|
||||
}
|
||||
|
||||
__device__ __host__ void mtcube::eval_function(main_functionpars &pars)
|
||||
{
|
||||
//calls a function under consideration at each of the 15 node points
|
||||
int I;
|
||||
for(I=0;I<15;I++)
|
||||
{
|
||||
fevals[I] = main_function(get_point(I),pars);
|
||||
}
|
||||
}
|
||||
|
||||
};//end namespace fractallevelset
|
||||
};// end namespace amscuda
|
||||
|
||||
Reference in New Issue
Block a user