Making some of my library code public.
commit
64a7bfc851
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,639 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
#Python3 compilation library
|
||||
#Aaron M. Schinder
|
||||
#29 Dec 2020
|
||||
#
|
||||
#Cleanup and refactor from 2017 python2 version compilation libraries
|
||||
|
||||
import os,sys,math,subprocess
|
||||
|
||||
#####################
|
||||
#Directory Functions#
|
||||
#####################
|
||||
|
||||
##flist - list all files in a given directory pth
|
||||
##optional arguments:
|
||||
# recurse - (T/F): Whether to recursively search for files in directory tree
|
||||
# exts - (list): A list of file extensions to filter on
|
||||
# normpath (T/F): whether to normalize path variables after
|
||||
#filelist = flist(pth,**kwargs):
|
||||
def flist(pth,**kwargs):
|
||||
flst = []
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse_ = False
|
||||
else:
|
||||
recurse_ = kwargs['recurse']
|
||||
if(not('exts' in kwargs)):
|
||||
filterexts_ = False
|
||||
else:
|
||||
filterexts_ = True
|
||||
exts = kwargs['exts']
|
||||
if(not('normpath' in kwargs)):
|
||||
normpath_ = True
|
||||
else:
|
||||
normpath_ = kwargs['normpath']
|
||||
if(not('linuxpath' in kwargs)):
|
||||
linuxpath_ = False
|
||||
else:
|
||||
linuxpath_ = kwargs['linuxpath']
|
||||
if(not('followlinks' in kwargs)):
|
||||
followlinks_ = False
|
||||
else:
|
||||
followlinks_ = kwargs['followlinks']
|
||||
|
||||
dirlist = []
|
||||
rawlist = os.listdir(pth)
|
||||
|
||||
for F in rawlist:
|
||||
F2 = os.path.join(pth,F)
|
||||
if(os.path.isdir(F2)):
|
||||
b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
|
||||
if(b):
|
||||
if((F2!=".")&(F2!="..")):
|
||||
dirlist.append(F2)
|
||||
elif(os.path.isfile(F2)):
|
||||
flst.append(F2)
|
||||
|
||||
#Recurse through directories
|
||||
if(recurse_):
|
||||
for D in dirlist:
|
||||
lst = flist(D,**kwargs)
|
||||
for L in lst:
|
||||
flst.append(L)
|
||||
|
||||
#Postprocess:
|
||||
#Filter out all extensions except the selected ext list
|
||||
if(filterexts_):
|
||||
flst = filterexts(flst,exts)
|
||||
|
||||
#Normalize filename path according to os
|
||||
if(normpath_):
|
||||
flst2 = list(flst)
|
||||
for I in range(0,len(flst2)):
|
||||
flst[I] = os.path.normpath(flst2[I])
|
||||
|
||||
#If linuxpath, convert all \\ to /
|
||||
#if(linuxpath_):
|
||||
# flst2 = list(flst)
|
||||
# for I in range(0,len(flst2)):
|
||||
# flst[I] = linuxpath(flst2[I])
|
||||
|
||||
return flst
|
||||
|
||||
#Filters by extensions in a list of files
|
||||
#flst = def filterexts(flst,exts):
|
||||
def filterexts(flst,exts):
|
||||
flst2 = []
|
||||
if(isinstance(exts,str)):
|
||||
exts = list([exts])
|
||||
for F in flst:
|
||||
b = False
|
||||
for ext in exts:
|
||||
if(ext[0]!='.'):
|
||||
ext = '.'+ext
|
||||
F2 = os.path.splitext(F)
|
||||
if(len(F2)>=2):
|
||||
ex = F2[1]
|
||||
if(len(ex)>0):
|
||||
if(ex[0]!='.'):
|
||||
ex = '.'+ex
|
||||
if(ex==ext):
|
||||
b = True
|
||||
if(b):
|
||||
flst2.append(F)
|
||||
|
||||
return flst2
|
||||
|
||||
#Find a file fname, starting in pth and recursing
|
||||
#Used for finding library files to link
|
||||
def findfile(fname,pth,**kwargs):
|
||||
fullfname = ""
|
||||
flst = flist(pth,recurse=True)
|
||||
for F in flst:
|
||||
F2 = os.path.split(F)[1]
|
||||
if(F2 == fname):
|
||||
fullfname = F
|
||||
|
||||
return fullfname
|
||||
|
||||
#List to space-seperated-string
|
||||
def list_to_sss(lst):
|
||||
lout = ""
|
||||
for I in range(0,len(lst)-1):
|
||||
lout = lout + lst[I] + " "
|
||||
if(len(lst)>0):
|
||||
lout = lout + lst[len(lst)-1]
|
||||
return lout
|
||||
|
||||
def strip_whitespace(strin):
|
||||
strout = ""
|
||||
I1 = -1
|
||||
I2 = -1
|
||||
for I in range(0,len(strin)):
|
||||
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
|
||||
I1 = I
|
||||
break
|
||||
q = list(range(0,len(strin)))
|
||||
q.reverse()
|
||||
for I in q:
|
||||
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
|
||||
I2 = I+1
|
||||
break
|
||||
if(I1>=0 and I2>=0):
|
||||
strout = strin[I1:I2]
|
||||
return strout
|
||||
|
||||
def sss_to_list(sss):
|
||||
lout = []
|
||||
l1 = sss.split(' ')
|
||||
for l in l1:
|
||||
l2 = strip_whitespace(l)
|
||||
lout.append(l2)
|
||||
return lout
|
||||
|
||||
|
||||
def replaceext(fname,ext):
|
||||
fname2 = ""
|
||||
if(len(ext)>0):
|
||||
if(ext[0]!='.'):
|
||||
ext = '.'+ext
|
||||
fname2 = os.path.splitext(fname)[0]+ext
|
||||
else:
|
||||
fname2 = os.path.splitext(fname)[0]
|
||||
return fname2
|
||||
|
||||
def replaceexts(fnamelist,ext):
|
||||
fname2list = []
|
||||
for F in fnamelist:
|
||||
F2 = replaceext(F,ext)
|
||||
fname2list.append(F2)
|
||||
return fname2list
|
||||
|
||||
# def except_contains_oldv(lst1,exc):
|
||||
# lst2 = []
|
||||
# for item in lst1:
|
||||
# b = 1
|
||||
# for item2 in exc:
|
||||
# if(item.find(item2)>=0):
|
||||
# b = 0
|
||||
# break
|
||||
# if(b==1):
|
||||
# lst2.append(item)
|
||||
# return lst2
|
||||
|
||||
#filenames must match
|
||||
def except_contains(lst1,exc):
|
||||
lst2 = []
|
||||
for item in lst1:
|
||||
b = 1
|
||||
for item2 in exc:
|
||||
fsplit = os.path.split(item)
|
||||
fn = fsplit[len(fsplit)-1]
|
||||
if(fn==item2):
|
||||
b = 0
|
||||
break
|
||||
if(b==1):
|
||||
lst2.append(item)
|
||||
return lst2
|
||||
|
||||
##########################
|
||||
##System Call Procedures##
|
||||
##########################
|
||||
|
||||
def callproc(cmd, **kwargs):
|
||||
if(not('logfile' in kwargs)):
|
||||
use_lf = False
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
if(logfile!=""):
|
||||
fp = open(kwargs['logfile'],'a+')
|
||||
use_lf = True
|
||||
else:
|
||||
use_lf = False
|
||||
|
||||
if(not('echo' in kwargs)):
|
||||
echo = True
|
||||
else:
|
||||
echo = kwargs['echo']
|
||||
|
||||
if(echo):
|
||||
print(cmd)
|
||||
|
||||
#encoding/deconding to/from bytes is necessary to use the subprocess command
|
||||
#in python3.7
|
||||
#However, only do this in linux
|
||||
if(sys.platform!='win32'):
|
||||
cmd2 = cmd.encode(encoding='utf-8')
|
||||
else:
|
||||
cmd2 = cmd
|
||||
proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
|
||||
(out, err) = proc.communicate()
|
||||
|
||||
out = out.decode(encoding='utf-8')
|
||||
|
||||
if(echo):
|
||||
print(out)
|
||||
#print(err);
|
||||
if(use_lf):
|
||||
fp.writelines(cmd+'\n')
|
||||
fp.writelines(out+'\n')
|
||||
|
||||
if(use_lf):
|
||||
fp.close()
|
||||
|
||||
#######################################
|
||||
##Compiler, Archive, and Linker Calls##
|
||||
#######################################
|
||||
|
||||
def smartcompile(srcfile,objext='.o'):
|
||||
mtsrc = os.path.getmtime(srcfile)
|
||||
objfile = replaceext(srcfile,objext)
|
||||
objexists = os.path.exists(objfile)
|
||||
ret = True
|
||||
if(objexists):
|
||||
mtobj = os.path.getmtime(objfile)
|
||||
if(mtobj>=mtsrc):
|
||||
ret = False
|
||||
|
||||
return ret
|
||||
|
||||
#MSVC compiler wrapper
|
||||
def msvc_compile(compilername, srcfile, **kwargs):
|
||||
|
||||
if(not('include' in kwargs)):
|
||||
include = ''
|
||||
else:
|
||||
include = kwargs['include']
|
||||
if(isinstance(include,list)):
|
||||
include = list_to_sss(include)
|
||||
|
||||
if(not('flags' in kwargs)):
|
||||
flags = ''
|
||||
else:
|
||||
flags = kwargs['flags']
|
||||
if(isinstance(flags,list)):
|
||||
flags = list_to_sss(flags)
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.obj'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('srcfileflag' in kwargs)):
|
||||
srcfileflag = '/c'
|
||||
else:
|
||||
srcfileflag = kwargs['srcfileflag']
|
||||
|
||||
if(not('outfileflag' in kwargs)):
|
||||
outfileflag = '/Fo:'
|
||||
else:
|
||||
outfileflag = kwargs['outfileflag']
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ""
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
outfile = replaceext(srcfile,objext)
|
||||
ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
|
||||
ln = ln + " " + include
|
||||
|
||||
callproc(ln,echo=True,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
#MSVC compiler wrapper
|
||||
def msvc_compile_list(compiler,srclist,**kwargs):
|
||||
for S in srclist:
|
||||
msvc_compile(compiler,S,**kwargs)
|
||||
return
|
||||
|
||||
#gnu-style compiler compile: Should work with gcc, g++, gfortran
|
||||
def gs_compile(compiler,srcfile,**kwargs):
|
||||
if(not('include' in kwargs)):
|
||||
include = ''
|
||||
else:
|
||||
include = kwargs['include']
|
||||
if(isinstance(include,list)):
|
||||
include = list_to_sss(include)
|
||||
|
||||
if(not('flags' in kwargs)):
|
||||
flags = ''
|
||||
else:
|
||||
flags = kwargs['flags']
|
||||
if(isinstance(flags,list)):
|
||||
flags = list_to_sss(flags)
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('srcfileflag' in kwargs)):
|
||||
srcfileflag = '-c'
|
||||
else:
|
||||
srcfileflag = kwargs['srcfileflag']
|
||||
|
||||
if(not('outfileflag' in kwargs)):
|
||||
outfileflag = '-o'
|
||||
else:
|
||||
outfileflag = kwargs['outfileflag']
|
||||
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ""
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
if(not('smartcompile' in kwargs)):
|
||||
_smartcompile = True
|
||||
else:
|
||||
_smartcompile = kwargs['smartcompile']
|
||||
|
||||
#Do I want to make this thing this general?
|
||||
|
||||
if(not(_smartcompile) or smartcompile(srcfile,objext)):
|
||||
outfile = replaceext(srcfile,objext)
|
||||
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
|
||||
ln = ln + " " + include
|
||||
|
||||
callproc(ln,echo=True,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
def gs_compile_list(compiler,srclist,**kwargs):
|
||||
for S in srclist:
|
||||
gs_compile(compiler,S,**kwargs)
|
||||
return
|
||||
|
||||
def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
|
||||
|
||||
for S in srcfils:
|
||||
gs_compile(compiler,S,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gs_link_all(linker,srcpath,target,**kwargs):
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
|
||||
objfils = flist(srcpath,exts=objext,recurse=recurse)
|
||||
oflst = list_to_sss(objfils)
|
||||
|
||||
gs_link_list(linker,oflst,target,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gs_link_list(linker,objlist,target,**kwargs):
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('libdir' in kwargs)):
|
||||
libdir = ''
|
||||
else:
|
||||
libdir = kwargs['libdir']
|
||||
|
||||
if(not('staticlibs' in kwargs)):
|
||||
staticlibs = ''
|
||||
else:
|
||||
staticlibs = kwargs['staticlibs']
|
||||
|
||||
if(not('libflags' in kwargs)):
|
||||
libflags = ''
|
||||
else:
|
||||
libflags = kwargs['libflags']
|
||||
|
||||
if(not('linkerflags' in kwargs)):
|
||||
linkerflags = ''
|
||||
else:
|
||||
linkerflags = kwargs['linkerflags']
|
||||
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ''
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
ln = linker+" -o "+target+" "+libdir
|
||||
ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
|
||||
|
||||
callproc(ln,logfile=logfile)
|
||||
return
|
||||
|
||||
def msvc_link_list(objlist,target,**kwargs):
|
||||
|
||||
linker = 'link'
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.obj'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('libdir' in kwargs)):
|
||||
libdir = ''
|
||||
else:
|
||||
libdir = kwargs['libdir']
|
||||
|
||||
if(not('staticlibs' in kwargs)):
|
||||
staticlibs = ''
|
||||
else:
|
||||
staticlibs = kwargs['staticlibs']
|
||||
|
||||
if(not('libflags' in kwargs)):
|
||||
libflags = ''
|
||||
else:
|
||||
libflags = kwargs['libflags']
|
||||
|
||||
if(not('linkerflags' in kwargs)):
|
||||
linkerflags = ''
|
||||
else:
|
||||
linkerflags = kwargs['linkerflags']
|
||||
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ''
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
ln = linker+" "+libdir
|
||||
ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
|
||||
ln = ln+" /out:"+target+" "+libflags
|
||||
|
||||
callproc(ln,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
def ar_all(srcpath,arname,**kwargs):
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
objlist = flist(srcpath,exts=objext,recurse=recurse)
|
||||
ar_list(objlist,arname,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def msvc_lib_list(objlist,arname,**kwargs):
|
||||
objlist2 = list_to_sss(objlist)
|
||||
|
||||
ln = "lib "+objlist2+" /out:"+arname
|
||||
callproc(ln)
|
||||
|
||||
return
|
||||
|
||||
def ar_list(objlist,arname,**kwargs):
|
||||
objlist2 = list_to_sss(objlist)
|
||||
|
||||
ln = "ar cr "+ arname+" "+objlist2
|
||||
callproc(ln)
|
||||
|
||||
return
|
||||
|
||||
def ar_add_list(objlist,arname,**kwargs):
|
||||
objlist2 = list_to_sss(objlist)
|
||||
|
||||
ln = "ar t "+arname+" "+objlist2
|
||||
callproc(ln)
|
||||
return
|
||||
|
||||
##############################
|
||||
##Derived Compiler Functions##
|
||||
##############################
|
||||
|
||||
def gcc_compile(srcfile,**kwargs):
|
||||
compiler = 'gcc'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gcc_compile_all(srcdir,**kwargs):
|
||||
compiler = 'gcc'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.c']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gcc_compile_list(srclist,**kwargs):
|
||||
compiler = 'gcc'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gpp_compile(srcfile,**kwargs):
|
||||
compiler = 'g++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gpp_compile_all(srcdir,**kwargs):
|
||||
compiler = 'g++'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gpp_compile_list(srclist,**kwargs):
|
||||
compiler = 'g++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gfortran_compile(srcfile,**kwargs):
|
||||
compiler = 'gfortran'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.f','.f90','.f77']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gfortran_compile_all(srcdir,**kwargs):
|
||||
compiler = 'gfortran'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.f','.f90','.f77']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gfortran_compile_list(srclist,**kwargs):
|
||||
compiler = 'gfortran'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.f','.f90','.f77']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def clang_compile(srcfile,**kwargs):
|
||||
compiler = 'clang++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def clang_compile_all(srcdir,**kwargs):
|
||||
compiler = 'clang++'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def clang_compile_list(srclist,**kwargs):
|
||||
compiler = 'clang++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
@ -0,0 +1,524 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,math
|
||||
import subprocess
|
||||
|
||||
##flist - list all files in a given directory pth
|
||||
##optional arguments:
|
||||
# recurse - (T/F): Whether to recursively search for files in directory tree
|
||||
# exts - (list): A list of file extensions to filter on
|
||||
# normpath (T/F): whether to normalize path variables after
|
||||
#filelist = flist(pth,**kwargs):
|
||||
def flist(pth,**kwargs):
|
||||
flst = []
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse_ = False
|
||||
else:
|
||||
recurse_ = kwargs['recurse']
|
||||
if(not('exts' in kwargs)):
|
||||
filterexts_ = False
|
||||
else:
|
||||
filterexts_ = True
|
||||
exts = kwargs['exts']
|
||||
if(not('normpath' in kwargs)):
|
||||
normpath_ = True
|
||||
else:
|
||||
normpath_ = kwargs['normpath']
|
||||
if(not('linuxpath' in kwargs)):
|
||||
linuxpath_ = False
|
||||
else:
|
||||
linuxpath_ = kwargs['linuxpath']
|
||||
if(not('followlinks' in kwargs)):
|
||||
followlinks_ = False
|
||||
else:
|
||||
followlinks_ = kwargs['followlinks']
|
||||
|
||||
dirlist = []
|
||||
rawlist = os.listdir(pth)
|
||||
|
||||
for F in rawlist:
|
||||
F2 = os.path.join(pth,F)
|
||||
if(os.path.isdir(F2)):
|
||||
b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
|
||||
if(b):
|
||||
if((F2!=".")&(F2!="..")):
|
||||
dirlist.append(F2)
|
||||
elif(os.path.isfile(F2)):
|
||||
flst.append(F2)
|
||||
|
||||
#Recurse through directories
|
||||
if(recurse_):
|
||||
for D in dirlist:
|
||||
lst = flist(D,**kwargs)
|
||||
for L in lst:
|
||||
flst.append(L)
|
||||
|
||||
#Postprocess:
|
||||
#Filter out all extensions except the selected ext list
|
||||
if(filterexts_):
|
||||
flst = filterexts(flst,exts)
|
||||
|
||||
#Normalize filename path according to os
|
||||
if(normpath_):
|
||||
flst2 = list(flst)
|
||||
for I in range(0,len(flst2)):
|
||||
flst[I] = os.path.normpath(flst2[I])
|
||||
|
||||
#If linuxpath, convert all \\ to /
|
||||
#if(linuxpath_):
|
||||
# flst2 = list(flst)
|
||||
# for I in range(0,len(flst2)):
|
||||
# flst[I] = linuxpath(flst2[I])
|
||||
|
||||
return flst
|
||||
|
||||
#Filters by extensions in a list of files
|
||||
#flst = def filterexts(flst,exts):
|
||||
def filterexts(flst,exts):
|
||||
flst2 = []
|
||||
if(isinstance(exts,str)):
|
||||
exts = list([exts])
|
||||
for F in flst:
|
||||
b = False
|
||||
for ext in exts:
|
||||
if(ext[0]!='.'):
|
||||
ext = '.'+ext
|
||||
F2 = os.path.splitext(F)
|
||||
if(len(F2)>=2):
|
||||
ex = F2[1]
|
||||
if(len(ex)>0):
|
||||
if(ex[0]!='.'):
|
||||
ex = '.'+ex
|
||||
if(ex==ext):
|
||||
b = True
|
||||
if(b):
|
||||
flst2.append(F)
|
||||
|
||||
return flst2
|
||||
|
||||
#Find a file fname, starting in pth and recursing
|
||||
#Used for finding library files to link
|
||||
def findfile(fname,pth,**kwargs):
|
||||
fullfname = ""
|
||||
flst = flist(pth,recurse=True)
|
||||
for F in flst:
|
||||
F2 = os.path.split(F)[1]
|
||||
if(F2 == fname):
|
||||
fullfname = F
|
||||
|
||||
return fullfname
|
||||
|
||||
def replaceext(fname,ext):
|
||||
fname2 = ""
|
||||
if(len(ext)>0):
|
||||
if(ext[0]!='.'):
|
||||
ext = '.'+ext
|
||||
fname2 = os.path.splitext(fname)[0]+ext
|
||||
else:
|
||||
fname2 = os.path.splitext(fname)[0]
|
||||
return fname2
|
||||
|
||||
def replaceexts(fnamelist,ext):
|
||||
fname2list = []
|
||||
for F in fnamelist:
|
||||
F2 = replaceext(F,ext)
|
||||
fname2list.append(F2)
|
||||
return fname2list
|
||||
|
||||
#filenames must match
|
||||
def except_contains(lst1,exc):
|
||||
lst2 = []
|
||||
for item in lst1:
|
||||
b = 1
|
||||
for item2 in exc:
|
||||
fsplit = os.path.split(item)
|
||||
fn = fsplit[len(fsplit)-1]
|
||||
if(fn==item2):
|
||||
b = 0
|
||||
break
|
||||
if(b==1):
|
||||
lst2.append(item)
|
||||
return lst2
|
||||
|
||||
##########################
|
||||
##System Call Procedures##
|
||||
##########################
|
||||
|
||||
def callproc(cmd, **kwargs):
|
||||
if(not('logfile' in kwargs)):
|
||||
use_lf = False
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
if(logfile!=""):
|
||||
fp = open(kwargs['logfile'],'a+')
|
||||
use_lf = True
|
||||
else:
|
||||
use_lf = False
|
||||
|
||||
if(not('echo' in kwargs)):
|
||||
echo = True
|
||||
else:
|
||||
echo = kwargs['echo']
|
||||
|
||||
if(echo):
|
||||
print(cmd)
|
||||
|
||||
#encoding/deconding to/from bytes is necessary to use the subprocess command
|
||||
#in python3.7
|
||||
#However, only do this in linux
|
||||
if(sys.platform!='win32'):
|
||||
cmd2 = cmd.encode(encoding='utf-8')
|
||||
else:
|
||||
cmd2 = cmd
|
||||
proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
|
||||
(out, err) = proc.communicate()
|
||||
|
||||
out = out.decode(encoding='utf-8')
|
||||
|
||||
if(echo):
|
||||
print(out)
|
||||
#print(err);
|
||||
if(use_lf):
|
||||
fp.writelines(cmd+'\n')
|
||||
fp.writelines(out+'\n')
|
||||
|
||||
if(use_lf):
|
||||
fp.close()
|
||||
|
||||
#List to space-seperated-string
|
||||
def list_to_sss(lst):
|
||||
lout = ""
|
||||
for I in range(0,len(lst)-1):
|
||||
lout = lout + lst[I] + " "
|
||||
if(len(lst)>0):
|
||||
lout = lout + lst[len(lst)-1]
|
||||
return lout
|
||||
|
||||
#####################################
|
||||
## Incremental Compilation Library ##
|
||||
#####################################
|
||||
|
||||
#silently read lines from a text file if exists
|
||||
def readtextlines(fname):
|
||||
txtlns = []
|
||||
|
||||
if(not os.path.isfile(fname)):
|
||||
return txtlns
|
||||
|
||||
try:
|
||||
fp = open(fname,"r")
|
||||
except:
|
||||
return txtlns
|
||||
|
||||
ln = " "
|
||||
while(ln!=""):
|
||||
ln = fp.readline()
|
||||
txtlns.append(ln)
|
||||
|
||||
fp.close()
|
||||
|
||||
return txtlns
|
||||
|
||||
def getincludefnfrage(includeline):
|
||||
|
||||
fnfrag = ""
|
||||
I1 = -1
|
||||
I2 = -1
|
||||
|
||||
for I in range(0,len(includeline)):
|
||||
if(I1<0 and (includeline[I]=='<' or includeline[I]=='"')):
|
||||
I1 = I
|
||||
if(I1>=0 and (includeline[I]=='>' or includeline[I]=='"')):
|
||||
I2 = I
|
||||
break
|
||||
if(I1>=0 and I2>=0):
|
||||
fnfrag = includeline[I1+1:I2]
|
||||
|
||||
return fnfrag
|
||||
|
||||
#Returns the name of the source file fname (if it exists)
|
||||
#and all included filenames
|
||||
def getsrcandincludes(fname, incdirs):
|
||||
|
||||
flist = []
|
||||
if(os.path.isfile(fname)):
|
||||
flist.append(fname)
|
||||
|
||||
Ilist = 0
|
||||
while(Ilist<len(flist)):
|
||||
#recurse through files
|
||||
f1 = flist[Ilist]
|
||||
lns = readtextlines(f1)
|
||||
for J in range(0,len(lns)):
|
||||
if(lns[J].find("#include")>=0):
|
||||
fnfrag = getincludefnfrage(lns[J])
|
||||
for K in range(0,len(incdirs)):
|
||||
tfn = os.path.join(incdirs[K],fnfrag)
|
||||
if(os.path.isfile(tfn)):
|
||||
flist.append(tfn)
|
||||
break
|
||||
|
||||
Ilist = Ilist + 1
|
||||
|
||||
return flist
|
||||
|
||||
#Returns the name of the object file associated with the source file
|
||||
#within the object store folder (if it exists)
|
||||
def getobjfile(fname,objstore,objext = ".o"):
|
||||
|
||||
fret = ""
|
||||
f1 = os.path.split(fname)[1]
|
||||
f2 = f1
|
||||
while(os.path.splitext(f2)[1]!=""):
|
||||
f2 = os.path.splitext(f2)[0]
|
||||
objext = objext.strip('.')
|
||||
f3 = os.path.join(objstore,"{}.{}".format(f2,objext))
|
||||
if(os.path.exists(f3)):
|
||||
fret = f3
|
||||
|
||||
return fret
|
||||
|
||||
def getsrctimes(fname, incdirs):
|
||||
|
||||
ftimes = []
|
||||
flst = getsrcandincludes(fname, incdirs)
|
||||
for I in range(0,len(flst)):
|
||||
f = flst[I]
|
||||
mt = os.path.getmtime(f)
|
||||
ftimes.append(mt)
|
||||
|
||||
return ftimes
|
||||
|
||||
def getobjtime(fname,objstore,objext=".o"):
|
||||
ret = -1
|
||||
fret = getobjfile(fname,objstore,objext)
|
||||
if(fret!=""):
|
||||
ret = os.path.getmtime(fret)
|
||||
|
||||
return ret
|
||||
|
||||
#Decide whether or not to compile source file
|
||||
def decidecompile(fname,**kwargs):
|
||||
ret = True
|
||||
|
||||
if(not os.path.isfile(fname)):
|
||||
ret = False
|
||||
return ret
|
||||
|
||||
##unpack kwargs
|
||||
if("searchincdirs" in kwargs):
|
||||
incdirs = kwargs["searchincdirs"]
|
||||
else:
|
||||
incdirs = ["./include"]
|
||||
|
||||
if("objext" in kwargs):
|
||||
objext = kwargs["objext"]
|
||||
else:
|
||||
objext = ".o"
|
||||
if("objstore" in kwargs):
|
||||
objstore = kwargs["objstore"]
|
||||
else:
|
||||
objstore = "./objstore"
|
||||
|
||||
|
||||
srclist = getsrcandincludes(fname,incdirs)
|
||||
srctlist = getsrctimes(fname,incdirs)
|
||||
obj = getobjfile(fname,objstore,objext)
|
||||
objt = getobjtime(fname,objstore,objext)
|
||||
|
||||
if(obj!=""):
|
||||
ret = False
|
||||
for I in range(0,len(srctlist)):
|
||||
if(srctlist[I]>objt):
|
||||
ret = True
|
||||
break
|
||||
|
||||
return ret
|
||||
|
||||
def gs_incremental_compile(compiler,srcfile,**kwargs):
|
||||
|
||||
if(not('include' in kwargs)):
|
||||
include = ''
|
||||
else:
|
||||
include = kwargs['include']
|
||||
if(isinstance(include,list)):
|
||||
include = list_to_sss(include)
|
||||
if(not('flags' in kwargs)):
|
||||
flags = ''
|
||||
else:
|
||||
flags = kwargs['flags']
|
||||
if(isinstance(flags,list)):
|
||||
flags = list_to_sss(flags)
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
if(not('srcfileflag' in kwargs)):
|
||||
srcfileflag = '-c'
|
||||
else:
|
||||
srcfileflag = kwargs['srcfileflag']
|
||||
if(not('outfileflag' in kwargs)):
|
||||
outfileflag = '-o'
|
||||
else:
|
||||
outfileflag = kwargs['outfileflag']
|
||||
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ""
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
if(not('smartcompile' in kwargs)):
|
||||
_smartcompile = True
|
||||
else:
|
||||
_smartcompile = kwargs['smartcompile']
|
||||
|
||||
#incrementalcompile
|
||||
if("searchincdirs" in kwargs):
|
||||
incdirs = kwargs["searchincdirs"]
|
||||
else:
|
||||
incdirs = ["./include"]
|
||||
|
||||
if("objext" in kwargs):
|
||||
objext = kwargs["objext"]
|
||||
else:
|
||||
objext = ".o"
|
||||
if("objstore" in kwargs):
|
||||
objstore = kwargs["objstore"]
|
||||
else:
|
||||
objstore = "./objstore"
|
||||
|
||||
#Do I want to make this thing this general?
|
||||
|
||||
docompile = decidecompile(srcfile,**kwargs)
|
||||
|
||||
if(docompile):
|
||||
f1 = os.path.split(srcfile)[1]
|
||||
f2 = f1
|
||||
while(os.path.splitext(f2)[1]!=""):
|
||||
f2 = os.path.splitext(f2)[0]
|
||||
outfile = os.path.join(objstore,"{}{}".format(f2,objext))
|
||||
|
||||
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
|
||||
ln = ln + " " + include
|
||||
|
||||
callproc(ln,echo=True,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
def gs_incremental_compile_list(compiler,srclist,**kwargs):
|
||||
|
||||
for s in srclist:
|
||||
gs_incremental_compile(compiler,s,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
#MSVC compiler wrapper
|
||||
|
||||
def msvc_incremental_compile(compilername, srcfile, **kwargs):
|
||||
|
||||
if(not('include' in kwargs)):
|
||||
include = ''
|
||||
else:
|
||||
include = kwargs['include']
|
||||
if(isinstance(include,list)):
|
||||
include = list_to_sss(include)
|
||||
|
||||
if(not('flags' in kwargs)):
|
||||
flags = ''
|
||||
else:
|
||||
flags = kwargs['flags']
|
||||
if(isinstance(flags,list)):
|
||||
flags = list_to_sss(flags)
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.obj'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('srcfileflag' in kwargs)):
|
||||
srcfileflag = '/c'
|
||||
else:
|
||||
srcfileflag = kwargs['srcfileflag']
|
||||
|
||||
if(not('outfileflag' in kwargs)):
|
||||
outfileflag = '/Fo:'
|
||||
else:
|
||||
outfileflag = kwargs['outfileflag']
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ""
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
#incrementalcompile
|
||||
if("searchincdirs" in kwargs):
|
||||
incdirs = kwargs["searchincdirs"]
|
||||
else:
|
||||
incdirs = ["./include"]
|
||||
# if("objext" in kwargs):
|
||||
# objext = kwargs["objext"]
|
||||
# else:
|
||||
# objext = ".o"
|
||||
if("objstore" in kwargs):
|
||||
objstore = kwargs["objstore"]
|
||||
else:
|
||||
objstore = "./objstore"
|
||||
|
||||
docompile = decidecompile(srcfile,**kwargs)
|
||||
|
||||
if(docompile):
|
||||
f1 = os.path.split(srcfile)[1]
|
||||
f2 = f1
|
||||
while(os.path.splitext(f2)[1]!=""):
|
||||
f2 = os.path.splitext(f2)[0]
|
||||
outfile = os.path.join(objstore,"{}{}".format(f2,objext))
|
||||
|
||||
ln = compilername+" "+flags+" "+srcfileflag+" "+srcfile+" "+outfileflag+" "+outfile
|
||||
ln = ln + " " + include
|
||||
|
||||
callproc(ln,echo=True,logfile=logfile)
|
||||
|
||||
# outfile = replaceext(srcfile,objext)
|
||||
# ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
|
||||
# ln = ln + " " + include
|
||||
|
||||
callproc(ln,echo=True,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
def msvc_incremental_compile_list(compiler,srclist,**kwargs):
|
||||
for S in srclist:
|
||||
msvc_incremental_compile(compiler,S,**kwargs)
|
||||
return
|
||||
|
||||
#######################
|
||||
## Main Script Tests ##
|
||||
#######################
|
||||
|
||||
def testtimes(args):
|
||||
if(len(args)>=2):
|
||||
flist = getsrcandincludes(args[1],["./include"])
|
||||
ftlist = getsrctimes(args[1],["./include"])
|
||||
for I in range(0,len(flist)):
|
||||
print("{}\t\t{}".format(flist[I],ftlist[I]))
|
||||
|
||||
print("associated obj file:")
|
||||
fobj = getobjfile(args[1],"./objstore")
|
||||
ftobj = getobjtime(args[1],"./objstore")
|
||||
if(fobj!=""):
|
||||
print("{}\t\t{}".format(fobj,ftobj))
|
||||
else:
|
||||
print("none found")
|
||||
|
||||
cflag = decidecompile(args[1])
|
||||
print("compile? : {}".format(cflag))
|
||||
|
||||
|
||||
return
|
||||
|
||||
# if(__name__ == "__main__"):
|
||||
|
||||
# args = sys.argv
|
||||
# testtimes(args)
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
from complib3 import gs_incremental_compile, gs_incremental_compile_list
|
||||
|
||||
import shutil
|
||||
#from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
|
||||
from shutil import copytree
|
||||
|
||||
libname = 'amsculib2.linux64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../linux64/include"
|
||||
commonlibdir = "../../linux64/lib"
|
||||
localbindir = "./bin_linux64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
|
||||
#--ptxas-options=-v
|
||||
kwargs['flags'] = "-dc --compiler-options '-fPIC -O3'"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-l{}".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
kwargs['objstore'] = "./objstore"
|
||||
kwargs['searchincdirs'] = ['./include']
|
||||
|
||||
#find all source files, except the main project files
|
||||
files = flist('./src',exts = srcexts, recurse=True)
|
||||
files = except_contains(files,mainsrc)
|
||||
objfiles = replaceexts(files,'.o')
|
||||
objfiles_sss = list_to_sss(objfiles)
|
||||
|
||||
#compile all the source files in the list
|
||||
#gs_compile_list(cc,files,**kwargs)
|
||||
gs_incremental_compile_list(cc,files,**kwargs)
|
||||
|
||||
#archive all the source files into a static library
|
||||
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
|
||||
objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
|
||||
ar_list(objlist,'{}/lib{}.a'.format(localbindir,libname))
|
||||
|
||||
# #Push any libraries to the common lib folder
|
||||
shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir)
|
||||
|
||||
# #Copy include files to the common include folder
|
||||
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
|
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
from complib3 import gs_incremental_compile, gs_incremental_compile_list
|
||||
|
||||
import shutil
|
||||
|
||||
libname = 'amsculib2.linux64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../linux64/include"
|
||||
commonlibdir = "../../linux64/lib"
|
||||
localbindir = "./bin_linux64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
|
||||
kwargs['flags'] = "-dc --compiler-options '-fPIC'"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-l{} -lamsculib2.linux64".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
kwargs['objstore'] = "./objstore"
|
||||
kwargs['searchincdirs'] = ['./include']
|
||||
|
||||
#-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64
|
||||
|
||||
#Pull required binary dynamic libraries to the bin folder
|
||||
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
|
||||
|
||||
#Designate source files for main test program
|
||||
fsrc = ['./src/main.cu']
|
||||
fobj = replaceexts(fsrc,'.o')
|
||||
|
||||
#Compile test programs
|
||||
gs_compile_list(cc,fsrc,**kwargs)
|
||||
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
|
@ -0,0 +1,639 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
#Python3 compilation library
|
||||
#Aaron M. Schinder
|
||||
#29 Dec 2020
|
||||
#
|
||||
#Cleanup and refactor from 2017 python2 version compilation libraries
|
||||
|
||||
import os,sys,math,subprocess
|
||||
|
||||
#####################
|
||||
#Directory Functions#
|
||||
#####################
|
||||
|
||||
##flist - list all files in a given directory pth
|
||||
##optional arguments:
|
||||
# recurse - (T/F): Whether to recursively search for files in directory tree
|
||||
# exts - (list): A list of file extensions to filter on
|
||||
# normpath (T/F): whether to normalize path variables after
|
||||
#filelist = flist(pth,**kwargs):
|
||||
def flist(pth,**kwargs):
|
||||
flst = []
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse_ = False
|
||||
else:
|
||||
recurse_ = kwargs['recurse']
|
||||
if(not('exts' in kwargs)):
|
||||
filterexts_ = False
|
||||
else:
|
||||
filterexts_ = True
|
||||
exts = kwargs['exts']
|
||||
if(not('normpath' in kwargs)):
|
||||
normpath_ = True
|
||||
else:
|
||||
normpath_ = kwargs['normpath']
|
||||
if(not('linuxpath' in kwargs)):
|
||||
linuxpath_ = False
|
||||
else:
|
||||
linuxpath_ = kwargs['linuxpath']
|
||||
if(not('followlinks' in kwargs)):
|
||||
followlinks_ = False
|
||||
else:
|
||||
followlinks_ = kwargs['followlinks']
|
||||
|
||||
dirlist = []
|
||||
rawlist = os.listdir(pth)
|
||||
|
||||
for F in rawlist:
|
||||
F2 = os.path.join(pth,F)
|
||||
if(os.path.isdir(F2)):
|
||||
b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
|
||||
if(b):
|
||||
if((F2!=".")&(F2!="..")):
|
||||
dirlist.append(F2)
|
||||
elif(os.path.isfile(F2)):
|
||||
flst.append(F2)
|
||||
|
||||
#Recurse through directories
|
||||
if(recurse_):
|
||||
for D in dirlist:
|
||||
lst = flist(D,**kwargs)
|
||||
for L in lst:
|
||||
flst.append(L)
|
||||
|
||||
#Postprocess:
|
||||
#Filter out all extensions except the selected ext list
|
||||
if(filterexts_):
|
||||
flst = filterexts(flst,exts)
|
||||
|
||||
#Normalize filename path according to os
|
||||
if(normpath_):
|
||||
flst2 = list(flst)
|
||||
for I in range(0,len(flst2)):
|
||||
flst[I] = os.path.normpath(flst2[I])
|
||||
|
||||
#If linuxpath, convert all \\ to /
|
||||
#if(linuxpath_):
|
||||
# flst2 = list(flst)
|
||||
# for I in range(0,len(flst2)):
|
||||
# flst[I] = linuxpath(flst2[I])
|
||||
|
||||
return flst
|
||||
|
||||
#Filters by extensions in a list of files
|
||||
#flst = def filterexts(flst,exts):
|
||||
def filterexts(flst,exts):
|
||||
flst2 = []
|
||||
if(isinstance(exts,str)):
|
||||
exts = list([exts])
|
||||
for F in flst:
|
||||
b = False
|
||||
for ext in exts:
|
||||
if(ext[0]!='.'):
|
||||
ext = '.'+ext
|
||||
F2 = os.path.splitext(F)
|
||||
if(len(F2)>=2):
|
||||
ex = F2[1]
|
||||
if(len(ex)>0):
|
||||
if(ex[0]!='.'):
|
||||
ex = '.'+ex
|
||||
if(ex==ext):
|
||||
b = True
|
||||
if(b):
|
||||
flst2.append(F)
|
||||
|
||||
return flst2
|
||||
|
||||
#Find a file fname, starting in pth and recursing
|
||||
#Used for finding library files to link
|
||||
def findfile(fname,pth,**kwargs):
|
||||
fullfname = ""
|
||||
flst = flist(pth,recurse=True)
|
||||
for F in flst:
|
||||
F2 = os.path.split(F)[1]
|
||||
if(F2 == fname):
|
||||
fullfname = F
|
||||
|
||||
return fullfname
|
||||
|
||||
#List to space-seperated-string
|
||||
def list_to_sss(lst):
|
||||
lout = ""
|
||||
for I in range(0,len(lst)-1):
|
||||
lout = lout + lst[I] + " "
|
||||
if(len(lst)>0):
|
||||
lout = lout + lst[len(lst)-1]
|
||||
return lout
|
||||
|
||||
def strip_whitespace(strin):
|
||||
strout = ""
|
||||
I1 = -1
|
||||
I2 = -1
|
||||
for I in range(0,len(strin)):
|
||||
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
|
||||
I1 = I
|
||||
break
|
||||
q = list(range(0,len(strin)))
|
||||
q.reverse()
|
||||
for I in q:
|
||||
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
|
||||
I2 = I+1
|
||||
break
|
||||
if(I1>=0 and I2>=0):
|
||||
strout = strin[I1:I2]
|
||||
return strout
|
||||
|
||||
def sss_to_list(sss):
|
||||
lout = []
|
||||
l1 = sss.split(' ')
|
||||
for l in l1:
|
||||
l2 = strip_whitespace(l)
|
||||
lout.append(l2)
|
||||
return lout
|
||||
|
||||
|
||||
def replaceext(fname,ext):
|
||||
fname2 = ""
|
||||
if(len(ext)>0):
|
||||
if(ext[0]!='.'):
|
||||
ext = '.'+ext
|
||||
fname2 = os.path.splitext(fname)[0]+ext
|
||||
else:
|
||||
fname2 = os.path.splitext(fname)[0]
|
||||
return fname2
|
||||
|
||||
def replaceexts(fnamelist,ext):
|
||||
fname2list = []
|
||||
for F in fnamelist:
|
||||
F2 = replaceext(F,ext)
|
||||
fname2list.append(F2)
|
||||
return fname2list
|
||||
|
||||
# def except_contains_oldv(lst1,exc):
|
||||
# lst2 = []
|
||||
# for item in lst1:
|
||||
# b = 1
|
||||
# for item2 in exc:
|
||||
# if(item.find(item2)>=0):
|
||||
# b = 0
|
||||
# break
|
||||
# if(b==1):
|
||||
# lst2.append(item)
|
||||
# return lst2
|
||||
|
||||
#filenames must match
|
||||
def except_contains(lst1,exc):
|
||||
lst2 = []
|
||||
for item in lst1:
|
||||
b = 1
|
||||
for item2 in exc:
|
||||
fsplit = os.path.split(item)
|
||||
fn = fsplit[len(fsplit)-1]
|
||||
if(fn==item2):
|
||||
b = 0
|
||||
break
|
||||
if(b==1):
|
||||
lst2.append(item)
|
||||
return lst2
|
||||
|
||||
##########################
|
||||
##System Call Procedures##
|
||||
##########################
|
||||
|
||||
def callproc(cmd, **kwargs):
|
||||
if(not('logfile' in kwargs)):
|
||||
use_lf = False
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
if(logfile!=""):
|
||||
fp = open(kwargs['logfile'],'a+')
|
||||
use_lf = True
|
||||
else:
|
||||
use_lf = False
|
||||
|
||||
if(not('echo' in kwargs)):
|
||||
echo = True
|
||||
else:
|
||||
echo = kwargs['echo']
|
||||
|
||||
if(echo):
|
||||
print(cmd)
|
||||
|
||||
#encoding/deconding to/from bytes is necessary to use the subprocess command
|
||||
#in python3.7
|
||||
#However, only do this in linux
|
||||
if(sys.platform!='win32'):
|
||||
cmd2 = cmd.encode(encoding='utf-8')
|
||||
else:
|
||||
cmd2 = cmd
|
||||
proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
|
||||
(out, err) = proc.communicate()
|
||||
|
||||
out = out.decode(encoding='utf-8')
|
||||
|
||||
if(echo):
|
||||
print(out)
|
||||
#print(err);
|
||||
if(use_lf):
|
||||
fp.writelines(cmd+'\n')
|
||||
fp.writelines(out+'\n')
|
||||
|
||||
if(use_lf):
|
||||
fp.close()
|
||||
|
||||
#######################################
|
||||
##Compiler, Archive, and Linker Calls##
|
||||
#######################################
|
||||
|
||||
def smartcompile(srcfile,objext='.o'):
|
||||
mtsrc = os.path.getmtime(srcfile)
|
||||
objfile = replaceext(srcfile,objext)
|
||||
objexists = os.path.exists(objfile)
|
||||
ret = True
|
||||
if(objexists):
|
||||
mtobj = os.path.getmtime(objfile)
|
||||
if(mtobj>=mtsrc):
|
||||
ret = False
|
||||
|
||||
return ret
|
||||
|
||||
#MSVC compiler wrapper
|
||||
def msvc_compile(compilername, srcfile, **kwargs):
|
||||
|
||||
if(not('include' in kwargs)):
|
||||
include = ''
|
||||
else:
|
||||
include = kwargs['include']
|
||||
if(isinstance(include,list)):
|
||||
include = list_to_sss(include)
|
||||
|
||||
if(not('flags' in kwargs)):
|
||||
flags = ''
|
||||
else:
|
||||
flags = kwargs['flags']
|
||||
if(isinstance(flags,list)):
|
||||
flags = list_to_sss(flags)
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.obj'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('srcfileflag' in kwargs)):
|
||||
srcfileflag = '/c'
|
||||
else:
|
||||
srcfileflag = kwargs['srcfileflag']
|
||||
|
||||
if(not('outfileflag' in kwargs)):
|
||||
outfileflag = '/Fo:'
|
||||
else:
|
||||
outfileflag = kwargs['outfileflag']
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ""
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
outfile = replaceext(srcfile,objext)
|
||||
ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
|
||||
ln = ln + " " + include
|
||||
|
||||
callproc(ln,echo=True,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
#MSVC compiler wrapper
|
||||
def msvc_compile_list(compiler,srclist,**kwargs):
|
||||
for S in srclist:
|
||||
msvc_compile(compiler,S,**kwargs)
|
||||
return
|
||||
|
||||
#gnu-style compiler compile: Should work with gcc, g++, gfortran
|
||||
def gs_compile(compiler,srcfile,**kwargs):
|
||||
if(not('include' in kwargs)):
|
||||
include = ''
|
||||
else:
|
||||
include = kwargs['include']
|
||||
if(isinstance(include,list)):
|
||||
include = list_to_sss(include)
|
||||
|
||||
if(not('flags' in kwargs)):
|
||||
flags = ''
|
||||
else:
|
||||
flags = kwargs['flags']
|
||||
if(isinstance(flags,list)):
|
||||
flags = list_to_sss(flags)
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('srcfileflag' in kwargs)):
|
||||
srcfileflag = '-c'
|
||||
else:
|
||||
srcfileflag = kwargs['srcfileflag']
|
||||
|
||||
if(not('outfileflag' in kwargs)):
|
||||
outfileflag = '-o'
|
||||
else:
|
||||
outfileflag = kwargs['outfileflag']
|
||||
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ""
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
if(not('smartcompile' in kwargs)):
|
||||
_smartcompile = True
|
||||
else:
|
||||
_smartcompile = kwargs['smartcompile']
|
||||
|
||||
#Do I want to make this thing this general?
|
||||
|
||||
if(not(_smartcompile) or smartcompile(srcfile,objext)):
|
||||
outfile = replaceext(srcfile,objext)
|
||||
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
|
||||
ln = ln + " " + include
|
||||
|
||||
callproc(ln,echo=True,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
def gs_compile_list(compiler,srclist,**kwargs):
|
||||
for S in srclist:
|
||||
gs_compile(compiler,S,**kwargs)
|
||||
return
|
||||
|
||||
def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
|
||||
|
||||
for S in srcfils:
|
||||
gs_compile(compiler,S,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gs_link_all(linker,srcpath,target,**kwargs):
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
|
||||
objfils = flist(srcpath,exts=objext,recurse=recurse)
|
||||
oflst = list_to_sss(objfils)
|
||||
|
||||
gs_link_list(linker,oflst,target,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gs_link_list(linker,objlist,target,**kwargs):
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('libdir' in kwargs)):
|
||||
libdir = ''
|
||||
else:
|
||||
libdir = kwargs['libdir']
|
||||
|
||||
if(not('staticlibs' in kwargs)):
|
||||
staticlibs = ''
|
||||
else:
|
||||
staticlibs = kwargs['staticlibs']
|
||||
|
||||
if(not('libflags' in kwargs)):
|
||||
libflags = ''
|
||||
else:
|
||||
libflags = kwargs['libflags']
|
||||
|
||||
if(not('linkerflags' in kwargs)):
|
||||
linkerflags = ''
|
||||
else:
|
||||
linkerflags = kwargs['linkerflags']
|
||||
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ''
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
ln = linker+" -o "+target+" "+libdir
|
||||
ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
|
||||
|
||||
callproc(ln,logfile=logfile)
|
||||
return
|
||||
|
||||
def msvc_link_list(objlist,target,**kwargs):
|
||||
|
||||
linker = 'link'
|
||||
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.obj'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
if(not('libdir' in kwargs)):
|
||||
libdir = ''
|
||||
else:
|
||||
libdir = kwargs['libdir']
|
||||
|
||||
if(not('staticlibs' in kwargs)):
|
||||
staticlibs = ''
|
||||
else:
|
||||
staticlibs = kwargs['staticlibs']
|
||||
|
||||
if(not('libflags' in kwargs)):
|
||||
libflags = ''
|
||||
else:
|
||||
libflags = kwargs['libflags']
|
||||
|
||||
if(not('linkerflags' in kwargs)):
|
||||
linkerflags = ''
|
||||
else:
|
||||
linkerflags = kwargs['linkerflags']
|
||||
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
|
||||
if(not('logfile' in kwargs)):
|
||||
logfile = ''
|
||||
else:
|
||||
logfile = kwargs['logfile']
|
||||
|
||||
ln = linker+" "+libdir
|
||||
ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
|
||||
ln = ln+" /out:"+target+" "+libflags
|
||||
|
||||
callproc(ln,logfile=logfile)
|
||||
|
||||
return
|
||||
|
||||
def ar_all(srcpath,arname,**kwargs):
|
||||
if(not('recurse' in kwargs)):
|
||||
recurse = True
|
||||
else:
|
||||
recurse = kwargs['recurse']
|
||||
if(not('objext' in kwargs)):
|
||||
objext = '.o'
|
||||
else:
|
||||
objext = kwargs['objext']
|
||||
|
||||
objlist = flist(srcpath,exts=objext,recurse=recurse)
|
||||
ar_list(objlist,arname,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def msvc_lib_list(objlist,arname,**kwargs):
|
||||
objlist2 = list_to_sss(objlist)
|
||||
|
||||
ln = "lib "+objlist2+" /out:"+arname
|
||||
callproc(ln)
|
||||
|
||||
return
|
||||
|
||||
def ar_list(objlist,arname,**kwargs):
|
||||
objlist2 = list_to_sss(objlist)
|
||||
|
||||
ln = "ar cr "+ arname+" "+objlist2
|
||||
callproc(ln)
|
||||
|
||||
return
|
||||
|
||||
def ar_add_list(objlist,arname,**kwargs):
|
||||
objlist2 = list_to_sss(objlist)
|
||||
|
||||
ln = "ar t "+arname+" "+objlist2
|
||||
callproc(ln)
|
||||
return
|
||||
|
||||
##############################
|
||||
##Derived Compiler Functions##
|
||||
##############################
|
||||
|
||||
def gcc_compile(srcfile,**kwargs):
|
||||
compiler = 'gcc'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gcc_compile_all(srcdir,**kwargs):
|
||||
compiler = 'gcc'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.c']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gcc_compile_list(srclist,**kwargs):
|
||||
compiler = 'gcc'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gpp_compile(srcfile,**kwargs):
|
||||
compiler = 'g++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gpp_compile_all(srcdir,**kwargs):
|
||||
compiler = 'g++'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gpp_compile_list(srclist,**kwargs):
|
||||
compiler = 'g++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gfortran_compile(srcfile,**kwargs):
|
||||
compiler = 'gfortran'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.f','.f90','.f77']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gfortran_compile_all(srcdir,**kwargs):
|
||||
compiler = 'gfortran'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.f','.f90','.f77']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def gfortran_compile_list(srclist,**kwargs):
|
||||
compiler = 'gfortran'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.f','.f90','.f77']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def clang_compile(srcfile,**kwargs):
|
||||
compiler = 'clang++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile(compiler,srcfile,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def clang_compile_all(srcdir,**kwargs):
|
||||
compiler = 'clang++'
|
||||
kwargs['objext'] = '.o'
|
||||
srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
|
||||
|
||||
return
|
||||
|
||||
def clang_compile_list(srclist,**kwargs):
|
||||
compiler = 'clang++'
|
||||
kwargs['objext'] = '.o'
|
||||
#srcexts = ['.c','.cpp']
|
||||
|
||||
gs_compile_list(compiler,srclist,**kwargs)
|
||||
|
||||
return
|
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
|
||||
import shutil
|
||||
#from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
|
||||
from shutil import copytree as copytree
|
||||
|
||||
libname = 'amsculib2.linux64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../linux64/include"
|
||||
commonlibdir = "../../linux64/lib"
|
||||
localbindir = "./bin_linux64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
|
||||
kwargs['flags'] = "-dc"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-l{}".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
|
||||
#find all source files, except the main project files
|
||||
files = flist('./src',exts = srcexts, recurse=True)
|
||||
files = except_contains(files,mainsrc)
|
||||
objfiles = replaceexts(files,'.o')
|
||||
objfiles_sss = list_to_sss(objfiles)
|
||||
|
||||
#compile all the source files in the list
|
||||
gs_compile_list(cc,files,**kwargs)
|
||||
|
||||
#archive all the source files into a static library
|
||||
ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
|
||||
|
||||
#Push any libraries to the common lib folder
|
||||
shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir)
|
||||
|
||||
#Copy include files to the common include folder
|
||||
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
|
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
|
||||
import shutil
|
||||
|
||||
libname = 'amsculib2.linux64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../linux64/include"
|
||||
commonlibdir = "../../linux64/lib"
|
||||
localbindir = "./bin_linux64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
|
||||
kwargs['flags'] = "-dc"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-l{}".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
|
||||
#Pull required binary dynamic libraries to the bin folder
|
||||
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
|
||||
|
||||
#Designate source files for main test program
|
||||
fsrc = ['./src/main.cu']
|
||||
fobj = replaceexts(fsrc,'.o')
|
||||
|
||||
#Compile test programs
|
||||
gs_compile_list(cc,fsrc,**kwargs)
|
||||
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
|
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
|
||||
import shutil
|
||||
from shutil import copytree as copytree
|
||||
|
||||
libname = 'assetcuda.msvc64' #prefix static library name to generate
|
||||
targetname = 'main' #create this executable when compiling tests
|
||||
commonincdir = "../../winx64/include"
|
||||
commonlibdir = "../../winx64/lib"
|
||||
localbindir = "./bin_winx64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp']
|
||||
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
kwargs['flags'] = "/O2"
|
||||
kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-l{}".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
|
||||
#find all source files, except the main project files
|
||||
files = flist('./src',exts = srcexts, recurse=True)
|
||||
files = except_contains(files,mainsrc)
|
||||
objfiles = replaceexts(files,'.obj')
|
||||
objfiles_sss = list_to_sss(objfiles)
|
||||
|
||||
#compile all the source files in the list
|
||||
msvc_compile_list(cc,files,**kwargs)
|
||||
#gs_compile_list(cc,files,**kwargs)
|
||||
|
||||
#archive all the source files into a static library
|
||||
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
|
||||
msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname))
|
||||
|
||||
#Push any libraries to the common lib folder
|
||||
shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
|
||||
|
||||
#Copy include files to the common include folder
|
||||
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
|
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
|
||||
import shutil
|
||||
from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
|
||||
|
||||
libname = 'assetcuda.msvc64' #prefix static library name to generate
|
||||
targetname = 'tests.exe' #create this executable when compiling tests
|
||||
commonincdir = "../../winx64/include"
|
||||
commonlibdir = "../../winx64/lib"
|
||||
localbindir = "./bin_winx64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp']
|
||||
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
kwargs['flags'] = "/O2"
|
||||
kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir)
|
||||
#kwargs['libflags'] = "lib{}.lib libamsearthtools.msvc64.lib libamsmeshtools.msvc64.lib libamsmathlib3.msvc64.lib libamsmatrix_cpp.msvc64.lib liblapack.a libblas.a libamsstring3.msvc64.lib libamsmathutil2.msvc64.lib".format(libname)
|
||||
kwargs['libflags'] = "lib{}.lib".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
|
||||
#Pull required binary dynamic libraries to the bin folder
|
||||
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
|
||||
|
||||
#Designate source files for main test program
|
||||
fsrc = ['./src/main.cu']
|
||||
fobj = replaceexts(fsrc,'.obj')
|
||||
|
||||
#Compile test programs
|
||||
msvc_compile_list(cc,fsrc,**kwargs)
|
||||
msvc_link_list(list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
|
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
|
||||
import shutil
|
||||
from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
|
||||
|
||||
libname = 'amsculib2.msvc64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../winx64/include"
|
||||
commonlibdir = "../../winx64/lib"
|
||||
localbindir = "./bin_winx64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
kwargs['flags'] = "-dc"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-l{}".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
|
||||
#find all source files, except the main project files
|
||||
files = flist('./src',exts = srcexts, recurse=True)
|
||||
files = except_contains(files,mainsrc)
|
||||
objfiles = replaceexts(files,'.o')
|
||||
objfiles_sss = list_to_sss(objfiles)
|
||||
|
||||
#compile all the source files in the list
|
||||
gs_compile_list(cc,files,**kwargs)
|
||||
|
||||
#archive all the source files into a static library
|
||||
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
|
||||
msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname))
|
||||
|
||||
#Push any libraries to the common lib folder
|
||||
shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
|
||||
|
||||
#Copy include files to the common include folder
|
||||
copy_tree('./include/',commonincdir+'/')
|
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
|
||||
import shutil
|
||||
from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
|
||||
|
||||
libname = 'amsculib2.msvc64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../winx64/include"
|
||||
commonlibdir = "../../winx64/lib"
|
||||
localbindir = "./bin_winx64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
kwargs['flags'] = "-dc"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-llib{}".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
|
||||
#Pull required binary dynamic libraries to the bin folder
|
||||
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
|
||||
|
||||
#Designate source files for main test program
|
||||
fsrc = ['./src/main.cpp']
|
||||
fobj = replaceexts(fsrc,'.o')
|
||||
|
||||
#Compile test programs
|
||||
gs_compile_list(cc,fsrc,**kwargs)
|
||||
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
|
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
from complib3 import gs_incremental_compile, gs_incremental_compile_list
|
||||
|
||||
import shutil
|
||||
from shutil import copytree
|
||||
|
||||
libname = 'amsculib2.msvc64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../winx64/include"
|
||||
commonlibdir = "../../winx64/lib"
|
||||
localbindir = "./bin_winx64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
kwargs['flags'] = "-dc"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-l{}".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
kwargs['objstore'] = "./objstore"
|
||||
kwargs['searchincdirs'] = ['./include']
|
||||
|
||||
#find all source files, except the main project files
|
||||
files = flist('./src',exts = srcexts, recurse=True)
|
||||
files = except_contains(files,mainsrc)
|
||||
objfiles = replaceexts(files,'.o')
|
||||
objfiles_sss = list_to_sss(objfiles)
|
||||
|
||||
#compile all the source files in the list
|
||||
#gs_compile_list(cc,files,**kwargs)
|
||||
gs_incremental_compile_list(cc,files,**kwargs)
|
||||
|
||||
#archive all the source files into a static library
|
||||
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
|
||||
objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
|
||||
msvc_lib_list(objlist,'{}/lib{}.lib'.format(localbindir,libname))
|
||||
|
||||
# #Push any libraries to the common lib folder
|
||||
shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
|
||||
|
||||
# #Copy include files to the common include folder
|
||||
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
|
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,subprocess,math
|
||||
from complib2 import *
|
||||
from complib3 import gs_incremental_compile, gs_incremental_compile_list
|
||||
|
||||
import shutil
|
||||
from shutil import copytree
|
||||
|
||||
libname = 'amsculib2.msvc64' #prefix static library name to generate
|
||||
targetname = 'test' #create this executable when compiling tests
|
||||
commonincdir = "../../winx64/include"
|
||||
commonlibdir = "../../winx64/lib"
|
||||
localbindir = "./bin_winx64"
|
||||
cc = 'nvcc' #compiler
|
||||
srcexts = ['.c','.cpp','.cu']
|
||||
mainsrc = ['main.cu'] #ignore these files when compiling the static library
|
||||
|
||||
kwargs = dict()
|
||||
include = "-I./include -I{}".format(commonincdir)
|
||||
kwargs['include'] = include
|
||||
kwargs['flags'] = "-dc"
|
||||
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
|
||||
kwargs['libflags'] = "-llib{} -llibamsculib2.msvc64".format(libname)
|
||||
kwargs['linkerflags'] = ""
|
||||
kwargs['recurse'] = True
|
||||
kwargs['objstore'] = "./objstore"
|
||||
kwargs['searchincdirs'] = ['./include']
|
||||
|
||||
#-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64
|
||||
|
||||
#Pull required binary dynamic libraries to the bin folder
|
||||
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
|
||||
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
|
||||
|
||||
#Designate source files for main test program
|
||||
fsrc = ['./src/main.cu']
|
||||
fobj = replaceexts(fsrc,'.o')
|
||||
|
||||
#Compile test programs
|
||||
gs_compile_list(cc,fsrc,**kwargs)
|
||||
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
|
@ -0,0 +1,89 @@
|
||||
#ifndef __AMSCU_COMP128_HPP__
|
||||
#define __AMSCU_COMP128_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace cmp
|
||||
{
|
||||
|
||||
class cucomp128
|
||||
{
|
||||
public:
|
||||
double real;
|
||||
double imag;
|
||||
|
||||
__host__ __device__ cucomp128();
|
||||
__host__ __device__ ~cucomp128();
|
||||
__host__ __device__ cucomp128(const cucomp128 &other);
|
||||
__host__ __device__ cucomp128(const double &other);
|
||||
|
||||
__host__ __device__ cucomp128& operator=(cucomp128& other);
|
||||
__host__ __device__ const cucomp128& operator=(const cucomp128& other);
|
||||
__host__ __device__ cucomp128& operator=(double& other);
|
||||
__host__ __device__ const cucomp128& operator=(const double& other);
|
||||
|
||||
__host__ __device__ double& operator[](int& ind);
|
||||
__host__ __device__ const double& operator[](const int& ind) const;
|
||||
|
||||
__host__ __device__ cucomp128 operator+(const cucomp128& z);
|
||||
__host__ __device__ cucomp128 operator-(const cucomp128& z);
|
||||
__host__ __device__ cucomp128 operator*(const cucomp128& z);
|
||||
__host__ __device__ cucomp128 operator/(const cucomp128& z);
|
||||
|
||||
__host__ __device__ cucomp128 operator+(const double& z);
|
||||
__host__ __device__ cucomp128 operator-(const double& z);
|
||||
__host__ __device__ cucomp128 operator*(const double& z);
|
||||
__host__ __device__ cucomp128 operator/(const double& z);
|
||||
|
||||
__host__ __device__ friend cucomp128 operator-(const cucomp128& z); //negation sign
|
||||
|
||||
//comparison operators
|
||||
__host__ __device__ bool operator==(const cucomp128& z) const;
|
||||
__host__ __device__ bool operator!=(const cucomp128& z) const;
|
||||
__host__ __device__ bool operator>(const cucomp128& z) const;
|
||||
__host__ __device__ bool operator<(const cucomp128& z) const;
|
||||
__host__ __device__ bool operator>=(const cucomp128& z) const;
|
||||
__host__ __device__ bool operator<=(const cucomp128& z) const;
|
||||
|
||||
__host__ __device__ bool isnan() const;
|
||||
__host__ __device__ bool isinf() const;
|
||||
|
||||
__host__ __device__ bool isreal() const;
|
||||
__host__ __device__ bool isimag() const;
|
||||
__host__ __device__ bool iszero() const;
|
||||
__host__ __device__ double arg() const;
|
||||
__host__ __device__ double mag() const;
|
||||
__host__ __device__ cucomp128 conj() const;
|
||||
};
|
||||
|
||||
__host__ __device__ double arg(cucomp128 z);
|
||||
|
||||
__host__ __device__ cucomp128 dtocomp(double _r, double _i);
|
||||
__host__ __device__ double real(cucomp128 z);
|
||||
__host__ __device__ double imag(cucomp128 z);
|
||||
__host__ __device__ cucomp128 sin(cucomp128 z);
|
||||
__host__ __device__ cucomp128 cos(cucomp128 z);
|
||||
__host__ __device__ cucomp128 tan(cucomp128 z);
|
||||
__host__ __device__ cucomp128 exp(cucomp128 z);
|
||||
__host__ __device__ cucomp128 log(cucomp128 z);
|
||||
__host__ __device__ double abs(cucomp128 z);
|
||||
__host__ __device__ cucomp128 conj(cucomp128 z);
|
||||
|
||||
// //need hyperbolic trig Functions
|
||||
__host__ __device__ cucomp128 cosh(cucomp128 z);
|
||||
__host__ __device__ cucomp128 sinh(cucomp128 z);
|
||||
__host__ __device__ cucomp128 tanh(cucomp128 z);
|
||||
|
||||
__host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2);
|
||||
|
||||
// //returns "complex sign" of complex number - 0, or a unit number with same argument
|
||||
__host__ __device__ cucomp128 csgn(cucomp128 z);
|
||||
|
||||
void test_cucomp128_1();
|
||||
|
||||
|
||||
}; //end namespace cmp
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,88 @@
|
||||
#ifndef __AMSCU_COMP64_HPP__
|
||||
#define __AMSCU_COMP64_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace cmp
|
||||
{
|
||||
|
||||
class cucomp64
|
||||
{
|
||||
public:
|
||||
float real;
|
||||
float imag;
|
||||
|
||||
__host__ __device__ cucomp64();
|
||||
__host__ __device__ ~cucomp64();
|
||||
__host__ __device__ cucomp64(const cucomp64 &other);
|
||||
__host__ __device__ cucomp64(const float &other);
|
||||
|
||||
__host__ __device__ cucomp64& operator=(cucomp64& other);
|
||||
__host__ __device__ const cucomp64& operator=(const cucomp64& other);
|
||||
__host__ __device__ cucomp64& operator=(float& other);
|
||||
__host__ __device__ const cucomp64& operator=(const float& other);
|
||||
|
||||
__host__ __device__ float& operator[](int& ind);
|
||||
__host__ __device__ const float& operator[](const int& ind) const;
|
||||
|
||||
__host__ __device__ cucomp64 operator+(const cucomp64& z);
|
||||
__host__ __device__ cucomp64 operator-(const cucomp64& z);
|
||||
__host__ __device__ cucomp64 operator*(const cucomp64& z);
|
||||
__host__ __device__ cucomp64 operator/(const cucomp64& z);
|
||||
|
||||
__host__ __device__ cucomp64 operator+(const float& z);
|
||||
__host__ __device__ cucomp64 operator-(const float& z);
|
||||
__host__ __device__ cucomp64 operator*(const float& z);
|
||||
__host__ __device__ cucomp64 operator/(const float& z);
|
||||
|
||||
__host__ __device__ friend cucomp64 operator-(const cucomp64& z); //negation sign
|
||||
|
||||
//comparison operators
|
||||
__host__ __device__ bool operator==(const cucomp64& z) const;
|
||||
__host__ __device__ bool operator!=(const cucomp64& z) const;
|
||||
__host__ __device__ bool operator>(const cucomp64& z) const;
|
||||
__host__ __device__ bool operator<(const cucomp64& z) const;
|
||||
__host__ __device__ bool operator>=(const cucomp64& z) const;
|
||||
__host__ __device__ bool operator<=(const cucomp64& z) const;
|
||||
|
||||
__host__ __device__ bool isnan() const;
|
||||
__host__ __device__ bool isinf() const;
|
||||
|
||||
__host__ __device__ bool isreal() const;
|
||||
__host__ __device__ bool isimag() const;
|
||||
__host__ __device__ bool iszero() const;
|
||||
__host__ __device__ float arg() const;
|
||||
__host__ __device__ float mag() const;
|
||||
__host__ __device__ cucomp64 conj() const;
|
||||
};
|
||||
|
||||
__host__ __device__ float arg(cucomp64 z);
|
||||
|
||||
__host__ __device__ cucomp64 dtocomp64(float _r, float _i);
|
||||
__host__ __device__ float real(cucomp64 z);
|
||||
__host__ __device__ float imag(cucomp64 z);
|
||||
__host__ __device__ cucomp64 sin(cucomp64 z);
|
||||
__host__ __device__ cucomp64 cos(cucomp64 z);
|
||||
__host__ __device__ cucomp64 tan(cucomp64 z);
|
||||
__host__ __device__ cucomp64 exp(cucomp64 z);
|
||||
__host__ __device__ cucomp64 log(cucomp64 z);
|
||||
__host__ __device__ float abs(cucomp64 z);
|
||||
__host__ __device__ cucomp64 conj(cucomp64 z);
|
||||
|
||||
// //need hyperbolic trig Functions
|
||||
__host__ __device__ cucomp64 cosh(cucomp64 z);
|
||||
__host__ __device__ cucomp64 sinh(cucomp64 z);
|
||||
__host__ __device__ cucomp64 tanh(cucomp64 z);
|
||||
|
||||
__host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2);
|
||||
|
||||
// //returns "complex sign" of complex number - 0, or a unit number with same argument
|
||||
__host__ __device__ cucomp64 csgn(cucomp64 z);
|
||||
|
||||
void test_cucomp64_1();
|
||||
|
||||
}; //end namespace cmp
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,40 @@
|
||||
#ifndef __AMSCU_CUDAFUNCTIONS_HPP__
|
||||
#define __AMSCU_CUDAFUNCTIONS_HPP__
|
||||
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
// device memory operations
|
||||
// I'm trying to avoid some of the boilerplate mental overhead involved
|
||||
// in calling cuda functions and handling errors
|
||||
|
||||
//frees devbuffer if it is not already NULL, and sets devbuffer to NULL
|
||||
//wrapper to cudaFree
|
||||
template<typename T> int cuda_free(T **devptr);
|
||||
|
||||
//copies hostbuffer to devbuffer
|
||||
//initializes devbuffer from NULL if devbuffer is NULL
|
||||
//if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
|
||||
template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite);
|
||||
|
||||
//copies info from devbuffer to hostbuffer
|
||||
//initialzies hostbuffer from NULL if NULL
|
||||
//if overwrite is true, deletes and reallocates hostbuffer on host with new[] (for resizing)
|
||||
template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite);
|
||||
|
||||
//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
|
||||
//initializes devptr from NULL if not already initialized
|
||||
template<typename T> int cuda_copytodevice(T *hostptr, T **devptr);
|
||||
|
||||
//wrapper for cudaMemcpy - copies an item or struct (count 1) from device
|
||||
//initializes hostptr from NULL with new if not already initialized
|
||||
template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr);
|
||||
|
||||
int cuda_errortrap(const char *msgheader);
|
||||
|
||||
};
|
||||
|
||||
#include <amsculib2/amscu_cudafunctions_impl.hpp>
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,228 @@
|
||||
#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
|
||||
#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
//frees devbuffer if it is not already NULL, and sets devbuffer to NULL
|
||||
//wrapper to cudaFree
|
||||
template<typename T> int cuda_free(T **devptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(*devptr==NULL)
|
||||
{
|
||||
return ret; //devbuffer is already NULL/freed
|
||||
}
|
||||
|
||||
err = cudaFree(*devptr);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -1; //failed to free device pointer
|
||||
*devptr = NULL; // - ? should only happen if I'm trying to double-free something
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
*devptr = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//copies hostbuffer to devbuffer
|
||||
//initializes devbuffer from NULL if devbuffer is NULL
|
||||
//if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
|
||||
template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(N<=0)
|
||||
{
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(hostbuffer==NULL)
|
||||
{
|
||||
ret = -2; //host buffer is NULL
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1)
|
||||
{
|
||||
if(*devbuffer !=NULL)
|
||||
{
|
||||
cuda_free(devbuffer);
|
||||
}
|
||||
}
|
||||
|
||||
if(*devbuffer==NULL)
|
||||
{
|
||||
err = cudaMalloc(devbuffer,sizeof(T)*N);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -3; //failed to allocate
|
||||
*devbuffer = NULL;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -4; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//copies info from devbuffer to hostbuffer
|
||||
//initialzies hostbuffer from NULL if NULL
|
||||
//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing)
|
||||
template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(N<=0)
|
||||
{
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(devbuffer==NULL)
|
||||
{
|
||||
ret = -5; //null dev buffer
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1 && *hostbuffer!=NULL)
|
||||
{
|
||||
delete[] (*hostbuffer); hostbuffer = NULL;
|
||||
}
|
||||
|
||||
if(*hostbuffer==NULL)
|
||||
{
|
||||
*hostbuffer = new(std::nothrow) T[N];
|
||||
if(*hostbuffer==NULL)
|
||||
{
|
||||
ret = -6; //failed to allocate host buffer
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -7; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
|
||||
//initializes devptr from NULL if not already initialized
|
||||
template<typename T> int cuda_copytodevice(T *hostptr, T **devptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
bool overwrite = 1;
|
||||
|
||||
if(hostptr==NULL)
|
||||
{
|
||||
ret = -2; //host buffer is NULL
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1)
|
||||
{
|
||||
if(*devptr !=NULL)
|
||||
{
|
||||
cuda_free(devptr);
|
||||
}
|
||||
}
|
||||
|
||||
if(*devptr==NULL)
|
||||
{
|
||||
err = cudaMalloc(devptr,sizeof(T));
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -3; //failed to allocate
|
||||
*devptr = NULL;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -4; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//wrapper for cudaMemcpy - copies an item or struct (count 1) from device
|
||||
//initializes hostptr from NULL with new if not already initialized
|
||||
template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
bool overwrite = 1;
|
||||
|
||||
if(devptr==NULL)
|
||||
{
|
||||
ret = -5; //null dev buffer
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(overwrite==1 && *hostptr!=NULL)
|
||||
{
|
||||
delete (*hostptr); hostptr = NULL;
|
||||
}
|
||||
|
||||
if(*hostptr==NULL)
|
||||
{
|
||||
*hostptr = new(std::nothrow) T;
|
||||
if(*hostptr==NULL)
|
||||
{
|
||||
ret = -6; //failed to allocate host buffer
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost);
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
ret = -7; //failed to copy
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,55 @@
|
||||
#ifndef __AMSCU_RANDOM_HPP__
|
||||
#define __AMSCU_RANDOM_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
// Random Number Gerneators
|
||||
|
||||
|
||||
// faster floating point hash function used in fractal generators
|
||||
__device__ __host__ float fhash1d_su(float x);
|
||||
|
||||
__device__ __host__ float fhash3d_su(float x, float y, float z);
|
||||
|
||||
__device__ __host__ float fhash4d_su(float x, float y, float z, float w);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Deterministic Pseudorandom int32_t Generator //
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
//Next seed in simple 32 bit integer deterministic psuedo-rand generator
|
||||
__host__ __device__ void dpr32_nextseed(int32_t *rseed_inout);
|
||||
|
||||
//Simple 32 bit integer deterministic pseudo-random generator
|
||||
// *not* for cryptography
|
||||
// Frequency of generated floats should be uniform [0,1)
|
||||
__host__ __device__ float dpr32_randf(int32_t *rseed_inout);
|
||||
|
||||
//box muller standard normal pseudorandom variable
|
||||
__host__ __device__ float dpr32_randnf(int32_t *rseed_inout);
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Deterministic Pseudorandom int64_t Generator //
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
//operates without side-effects on explicit seed for threaded use
|
||||
//deterministic pseudorandom number generator - takes seed and returns next seed
|
||||
__host__ __device__ void dpr64_nextseed(int64_t *seedinout);
|
||||
|
||||
//deterministic pseudorandom number generator - takes seed and returns next seed
|
||||
//returns uniformly distributed double
|
||||
__host__ __device__ double dpr64_randd(int64_t *seedinout);
|
||||
|
||||
__host__ __device__ float dpr64_randf(int64_t *seedinout);
|
||||
|
||||
|
||||
void test_dprg64();
|
||||
void test_dprg32();
|
||||
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,47 @@
|
||||
#ifndef __CUARRAY_HPP__
|
||||
#define __CUARRAY_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
template<typename T> class cuarray
|
||||
{
|
||||
public:
|
||||
int length;
|
||||
T* data;
|
||||
|
||||
__device__ __host__ cuarray();
|
||||
__device__ __host__ ~cuarray();
|
||||
|
||||
//Only call this on the device for thread/block local
|
||||
// dynamic arrays
|
||||
__device__ __host__ int resize(const int _length);
|
||||
|
||||
__device__ __host__ int size() const;
|
||||
__device__ __host__ T& at(const int I);
|
||||
__device__ __host__ const T& at(const int I) const;
|
||||
|
||||
__device__ __host__ T& operator[](const int I);
|
||||
__device__ __host__ const T& operator[](const int I) const;
|
||||
|
||||
|
||||
|
||||
__host__ int device_send(cuarray<T> **dptr);
|
||||
__host__ int _device_send_overwrite(cuarray<T> **dptr);
|
||||
__host__ int _device_send_copy(cuarray<T> *dptr);
|
||||
|
||||
__host__ int device_pull(cuarray<T> *dptr);
|
||||
__host__ int device_free(cuarray<T> **dptr);
|
||||
|
||||
__host__ int device_length(cuarray<T> *dptr);
|
||||
__host__ T* device_data_ptr(cuarray<T> *dptr);
|
||||
|
||||
};
|
||||
|
||||
void test_cuarray();
|
||||
|
||||
};
|
||||
|
||||
#include <amsculib2/amscuarray_impl.hpp>
|
||||
|
||||
#endif
|
@ -0,0 +1,76 @@
|
||||
#ifndef __AMSCUARRAY_DOPS_HPP__
|
||||
#define __AMSCUARRAY_DOPS_HPP__
|
||||
|
||||
//Device Operations on Arrays
|
||||
//
|
||||
|
||||
//Device Operations on Device Buffers
|
||||
// dodb
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
|
||||
//sum
|
||||
template<typename T> T devcuarray_sum(cuarray<T> *devptr);
|
||||
|
||||
template<typename T> T dbuff_sum(T *devbuffer, int N);
|
||||
|
||||
|
||||
struct dbuff_statstruct
|
||||
{
|
||||
public:
|
||||
float min;
|
||||
float max;
|
||||
float mean;
|
||||
float stdev;
|
||||
float sum;
|
||||
};
|
||||
|
||||
//stats (min,max,mean,stdev)
|
||||
|
||||
template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max);
|
||||
|
||||
template<typename T> dbuff_statstruct dbuff_stats(T *devbuffer, int N); //
|
||||
|
||||
//sets all elements to setto
|
||||
template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads);
|
||||
|
||||
//random device buffer functions
|
||||
void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
|
||||
void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
|
||||
|
||||
|
||||
void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads); //
|
||||
|
||||
//Elementwise device-buffer vector binary operation
|
||||
//takes two input arrays ( , ) --> one output array
|
||||
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
|
||||
|
||||
//Elementwise device-buffer vector two-parameter operation
|
||||
//takes one input array, and a constant paramter ( ) ---> one output array
|
||||
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
|
||||
|
||||
|
||||
//vector_add
|
||||
template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
||||
|
||||
|
||||
// Tests //
|
||||
|
||||
void test_dbuff_rand_dpr32();
|
||||
|
||||
};
|
||||
|
||||
#include <amsculib2/amscuarray_dops_impl.hpp>
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,404 @@
|
||||
#ifndef __AMSCUARRAY_DOPS_IMPL_HPP__
|
||||
#define __AMSCUARRAY_DOPS_IMPL_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
template<typename T> __global__ void dbuff_sum_kf(T *devbuffer, int N, T *rets)
|
||||
{
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int I;
|
||||
|
||||
T ret = (T) 0;
|
||||
for(I=I0;I<N;I=I+Is)
|
||||
{
|
||||
ret = ret + devbuffer[I];
|
||||
}
|
||||
rets[I0] = ret;
|
||||
}
|
||||
|
||||
template<typename T> T devcuarray_sum(cuarray<T> *devptr)
|
||||
{
|
||||
T ret = T();
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
cuarray<T> ldptr;
|
||||
|
||||
cudaMemcpy(&ldptr,devptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
|
||||
|
||||
ret = devbuffer_sum(ldptr.data,ldptr.length);
|
||||
|
||||
ldptr.data = NULL;
|
||||
ldptr.length=0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> T dbuff_sum(T *dbuff, int N)
|
||||
{
|
||||
int I;
|
||||
T ret = T();
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
int nblocks;
|
||||
int nthreads;
|
||||
|
||||
if(dbuff==NULL || N<=0)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
if(N>100)
|
||||
{
|
||||
nblocks = 10;
|
||||
nthreads = (int)sqrt((float) (N/nblocks));
|
||||
if(nthreads<=0) nthreads=1;
|
||||
if(nthreads>512) nthreads=512;
|
||||
}
|
||||
else
|
||||
{
|
||||
nblocks = 1;
|
||||
nthreads = 1;
|
||||
}
|
||||
|
||||
T *rets = NULL;
|
||||
T *devrets = NULL;
|
||||
|
||||
rets = new T[nblocks*nthreads];
|
||||
cudaMalloc(&devrets,sizeof(T)*nblocks*nthreads);
|
||||
|
||||
dbuff_sum_kf<<<nblocks,nthreads>>>(dbuff,N,devrets);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("amscu::dbuff_sum error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
cudaMemcpy(rets,devrets,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
|
||||
|
||||
ret = (T)0;
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
ret = ret + rets[I];
|
||||
}
|
||||
|
||||
cudaFree(devrets); devrets = NULL;
|
||||
delete[] rets;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<typename T> __global__ void dbuff_minmax_kf(T *devbuffer, int N, T *maxs, T *mins)
|
||||
{
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int I;
|
||||
|
||||
for(I=I0;I<N;I=I+Is)
|
||||
{
|
||||
if(I==I0)
|
||||
{
|
||||
maxs[I0] = devbuffer[I];
|
||||
mins[I0] = devbuffer[I];
|
||||
}
|
||||
else
|
||||
{
|
||||
if(devbuffer[I]>maxs[I0])
|
||||
{
|
||||
maxs[I0] = devbuffer[I];
|
||||
}
|
||||
if(devbuffer[I]<mins[I0])
|
||||
{
|
||||
mins[I0] = devbuffer[I];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max)
|
||||
{
|
||||
cudaError_t err = cudaSuccess;
|
||||
int nblocks;
|
||||
int nthreads;
|
||||
int I;
|
||||
|
||||
T *maxs = NULL;
|
||||
T *dev_maxs = NULL;
|
||||
T *mins = NULL;
|
||||
T *dev_mins = NULL;
|
||||
|
||||
T localmax = T(0);
|
||||
T localmin = T(0);
|
||||
|
||||
if(devbuffer==NULL || N<=0)
|
||||
{
|
||||
if(min!=NULL) *min = T(0);
|
||||
if(max!=NULL) *max = T(0);
|
||||
return;
|
||||
}
|
||||
|
||||
if(N>25)
|
||||
{
|
||||
nblocks = 25;
|
||||
nthreads = (int) sqrt((float)(N/nblocks));
|
||||
if(nthreads<1) nthreads = 1;
|
||||
if(nthreads>512) nthreads = 512;
|
||||
}
|
||||
else
|
||||
{
|
||||
nblocks = 1;
|
||||
nthreads = 1;
|
||||
}
|
||||
|
||||
maxs = new T[nblocks*nthreads];
|
||||
mins = new T[nblocks*nthreads];
|
||||
cudaMalloc(&dev_maxs,nblocks*nthreads);
|
||||
cudaMalloc(&dev_mins,nblocks*nthreads);
|
||||
|
||||
dbuff_minmax_kf<<<nblocks,nthreads>>>(devbuffer,N,dev_maxs,dev_mins);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("amscu::dbuff_minmax error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
cudaMemcpy(maxs,dev_maxs,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(mins,dev_mins,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
|
||||
|
||||
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
if(I==0)
|
||||
{
|
||||
localmax = maxs[0];
|
||||
localmin = mins[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
if(maxs[I]>localmax) localmax = maxs[I];
|
||||
if(mins[I]<localmin) localmin = mins[I];
|
||||
}
|
||||
}
|
||||
|
||||
if(max!=NULL) *max = localmax;
|
||||
if(min!=NULL) *min = localmin;
|
||||
|
||||
cudaFree(dev_maxs); dev_maxs = NULL;
|
||||
cudaFree(dev_mins); dev_mins = NULL;
|
||||
delete[] maxs; maxs = NULL;
|
||||
delete[] mins; mins = NULL;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> __global__ void dbuff_setall_kf(T *devbuffer, int N, T setto)
|
||||
{
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int I;
|
||||
|
||||
for(I=I0;I<N;I=I+Is)
|
||||
{
|
||||
devbuffer[I] = setto;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads)
|
||||
{
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(devbuffer==NULL || N<=0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
dbuff_setall_kf<<<nblocks,nthreads>>>(devbuffer,N,setto);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("amscu::dbuff_setall error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf1(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
|
||||
{
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int I;
|
||||
|
||||
T1 a;
|
||||
T2 b;
|
||||
T3 c;
|
||||
|
||||
for(I=I0;I<N;I=I+Is)
|
||||
{
|
||||
a = dbuf_a[I];
|
||||
b = dbuf_b[I];
|
||||
c = fpnt(a,b);
|
||||
dbuf_out[I] = c;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf2(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
|
||||
{
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int I;
|
||||
|
||||
T1 a;
|
||||
T2 b;
|
||||
T3 c;
|
||||
|
||||
for(I=I0;I<N;I=I+Is)
|
||||
{
|
||||
a = dbuf_a[I];
|
||||
b = par_b;
|
||||
c = fpnt(a,b);
|
||||
dbuf_out[I] = c;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
//Elementwise device-buffer vector binary operation
|
||||
//takes two input arrays ( , ) --> one output array
|
||||
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
|
||||
{
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(dbuf_a == NULL || dbuf_b == NULL || dbuf_out == NULL || N<=0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
dbuff_vectorbinop_kf1<<<nblocks,nthreads>>>(dbuf_a,dbuf_b,dbuf_out,N);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//Elementwise device-buffer vector two-parameter operation
|
||||
//takes one input array, and a constant paramter ( ) ---> one output array
|
||||
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
|
||||
{
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
if(dbuf_a == NULL || dbuf_out == NULL || N<=0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
dbuff_vectorbinop_kf2<<<nblocks,nthreads>>>(dbuf_a,par_b,dbuf_out,N);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> T dbuff_add_fn(T a, T b)
|
||||
{
|
||||
return a+b;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> T dbuff_sub_fn(T a, T b)
|
||||
{
|
||||
return a-b;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> T dbuff_mult_fn(T a, T b)
|
||||
{
|
||||
return a*b;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> T dbuff_div_fn(T a, T b)
|
||||
{
|
||||
return a/b;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename T> T dbuff_ldiv_fn(T a, T b)
|
||||
{
|
||||
return b/a;
|
||||
}
|
||||
|
||||
|
||||
template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
|
||||
{
|
||||
dbuff_vectorbinop(dbuff_b,par_a,dbuff_out,N,&dbuff_ldiv_fn,nblocks,nthreads);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,323 @@
|
||||
#ifndef __CUARRAY_IMPL_HPP__
|
||||
#define __CUARRAY_IMPL_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
// New Version cuarray<T>
|
||||
// simpler, less crap going on
|
||||
|
||||
template<typename T> __device__ __host__ cuarray<T>::cuarray()
|
||||
{
|
||||
length = 0;
|
||||
data = NULL;
|
||||
}
|
||||
|
||||
template<typename T> __device__ __host__ cuarray<T>::~cuarray()
|
||||
{
|
||||
if(data!=NULL)
|
||||
{
|
||||
delete[] data; data = NULL;
|
||||
}
|
||||
length = 0;
|
||||
}
|
||||
|
||||
template<typename T> __device__ __host__ int cuarray<T>::resize(const int _length)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
T *newbuffer = NULL;
|
||||
|
||||
if(length==_length)
|
||||
{
|
||||
//do nothing
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
if(_length<=0)
|
||||
{
|
||||
if(data!=NULL)
|
||||
{
|
||||
delete[] data;
|
||||
data = NULL;
|
||||
}
|
||||
length = 0;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
newbuffer = new T[_length];
|
||||
if(newbuffer==NULL)
|
||||
{
|
||||
ret = -1; //failed to allocate memory
|
||||
return ret;
|
||||
}
|
||||
|
||||
int I;
|
||||
T def;
|
||||
|
||||
if(data!=NULL)
|
||||
{
|
||||
for(I=0;I<length&&I<_length;I++)
|
||||
{
|
||||
newbuffer[I] = data[I];
|
||||
}
|
||||
for(I=length;I<_length;I++)
|
||||
{
|
||||
newbuffer[I] = def;
|
||||
}
|
||||
delete[] data; data=NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(I=0;I<_length;I++)
|
||||
{
|
||||
newbuffer[I] = def;
|
||||
}
|
||||
}
|
||||
|
||||
data = newbuffer;
|
||||
length = _length;
|
||||
ret = 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<typename T> __host__ int cuarray<T>::device_send(cuarray<T> **dptr)
|
||||
{
|
||||
int ret = 0;
|
||||
int dlength;
|
||||
|
||||
if(*dptr==NULL)
|
||||
{
|
||||
ret = _device_send_overwrite(dptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
dlength = device_length(*dptr);
|
||||
if(dlength=length)
|
||||
{
|
||||
ret = _device_send_copy(*dptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = _device_send_overwrite(dptr);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __host__ int cuarray<T>::_device_send_overwrite(cuarray<T> **dptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cuarray<T> dlocal;
|
||||
cudaError_t err = cudaSuccess;
|
||||
device_free(dptr);
|
||||
|
||||
if(length>=0 && data!=NULL)
|
||||
{
|
||||
err = cudaMalloc(dptr,sizeof(cuarray<T>));
|
||||
if(err==cudaSuccess)
|
||||
{
|
||||
err = cudaMalloc(&(dlocal.data),sizeof(T)*length);
|
||||
dlocal.length = length;
|
||||
|
||||
if(err==cudaSuccess)
|
||||
{
|
||||
cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
|
||||
if(data!=NULL)
|
||||
err = cudaMemcpy(dlocal.data,data,sizeof(T)*length,cudaMemcpyHostToDevice);
|
||||
else
|
||||
err = cudaSuccess;
|
||||
if(err==cudaSuccess)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -3;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dlocal.data = NULL;
|
||||
dlocal.length = 0;
|
||||
err = cudaMalloc(dptr,sizeof(cuarray<T>));
|
||||
if(err==cudaSuccess)
|
||||
{
|
||||
cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
dlocal.data = NULL;
|
||||
dlocal.length = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __host__ int cuarray<T>::_device_send_copy(cuarray<T> *dptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
T* ddata = NULL;
|
||||
ddata = device_data_ptr(dptr);
|
||||
|
||||
err = cudaMemcpy(ddata,data,sizeof(T)*length,cudaMemcpyHostToDevice);
|
||||
if(err==cudaSuccess)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __host__ int cuarray<T>::device_pull(cuarray<T> *dptr)
|
||||
{
|
||||
int ret = 0;
|
||||
int dlength;
|
||||
T* ddata;
|
||||
cudaError_t err;
|
||||
|
||||
if(dptr==NULL)
|
||||
{
|
||||
ret = -1; // null d pointer
|
||||
return ret;
|
||||
}
|
||||
|
||||
dlength = device_length(dptr);
|
||||
if(dlength!=length)
|
||||
{
|
||||
this->resize(dlength);
|
||||
}
|
||||
|
||||
ddata = device_data_ptr(dptr);
|
||||
|
||||
if(length>0 && data!=NULL && ddata!=NULL)
|
||||
{
|
||||
err = cudaMemcpy(data,dptr,length*sizeof(T),cudaMemcpyDeviceToHost);
|
||||
if(err==cudaSuccess)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -2;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __host__ int cuarray<T>::device_free(cuarray<T> **dptr)
|
||||
{
|
||||
int ret = 0;
|
||||
cuarray<T> dlocal;
|
||||
|
||||
if(*dptr!=NULL)
|
||||
{
|
||||
cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
|
||||
if(dlocal.data!=NULL)
|
||||
{
|
||||
cudaFree(dlocal.data);
|
||||
dlocal.data = NULL;
|
||||
}
|
||||
|
||||
cudaFree(*dptr);
|
||||
*dptr = NULL;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
dlocal.data = NULL;
|
||||
dlocal.length = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __host__ int cuarray<T>::device_length(cuarray<T> *dptr)
|
||||
{
|
||||
int ret = -1;
|
||||
cuarray<T> dlocal;
|
||||
|
||||
if(dptr==NULL)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
|
||||
ret = dlocal.length;
|
||||
|
||||
dlocal.data = NULL;
|
||||
dlocal.length = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __host__ T* cuarray<T>::device_data_ptr(cuarray<T> *dptr)
|
||||
{
|
||||
T* ret = NULL;
|
||||
cuarray<T> dlocal;
|
||||
|
||||
if(dptr==NULL)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
|
||||
ret = dlocal.data;
|
||||
|
||||
dlocal.data = NULL;
|
||||
dlocal.length = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __device__ __host__ int cuarray<T>::size() const
|
||||
{
|
||||
return this->length;
|
||||
}
|
||||
|
||||
template<typename T> __device__ __host__ T& cuarray<T>::at(const int I)
|
||||
{
|
||||
return this->data[I];
|
||||
}
|
||||
|
||||
template<typename T> __device__ __host__ const T& cuarray<T>::at(const int I) const
|
||||
{
|
||||
return this->data[I];
|
||||
}
|
||||
|
||||
template<typename T> __device__ __host__ T& cuarray<T>::operator[](const int I)
|
||||
{
|
||||
return this->data[I];
|
||||
}
|
||||
|
||||
template<typename T> __device__ __host__ const T& cuarray<T>::operator[](const int I) const
|
||||
{
|
||||
return this->data[I];
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,19 @@
|
||||
#ifndef __AMSCUDA_BINARRRW_HPP__
|
||||
#define __AMSCUDA_BINARRRW_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer);
|
||||
template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer);
|
||||
|
||||
template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer);
|
||||
template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer);
|
||||
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#include <amsculib2/amscuda_binarrrw_impl.hpp>
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,194 @@
|
||||
#ifndef __AMSCUDA_BINARRRW_IMPL_HPP__
|
||||
#define __AMSCUDA_BINARRRW_IMPL_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer)
|
||||
{
|
||||
int ret = 1;
|
||||
int I;
|
||||
long piprod;
|
||||
int32_t q;
|
||||
int cnt;
|
||||
|
||||
int32_t Nd;
|
||||
|
||||
if(fp!=NULL)
|
||||
{
|
||||
if(!feof(fp))
|
||||
{
|
||||
cnt = fread(&Nd,sizeof(int32_t),1,fp);
|
||||
if(Nd>0 && cnt>0)
|
||||
{
|
||||
shape->resize(Nd);
|
||||
piprod = 1;
|
||||
for(I=0;I<Nd;I++)
|
||||
{
|
||||
cnt = fread(&q,sizeof(int32_t),1,fp);
|
||||
shape->at(I) = q;
|
||||
if(q>0)
|
||||
{
|
||||
piprod = piprod*q;
|
||||
}
|
||||
else
|
||||
{
|
||||
piprod = 0;
|
||||
}
|
||||
}
|
||||
|
||||
buffer->resize(piprod);
|
||||
if(piprod>0)
|
||||
{
|
||||
cnt = fread((buffer->data),sizeof(T),piprod,fp);
|
||||
if(piprod==cnt)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("fread_ndarray, read %d values, expecting %ld\n",cnt,piprod);
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("fread_ndarray: Read a number of dimensions<=0.\n");
|
||||
Nd = 0;
|
||||
shape->resize(0);
|
||||
buffer->resize(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("fread_ndarray: fp=NULL.\n");
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer)
|
||||
{
|
||||
int ret = 1;
|
||||
long piprod;
|
||||
int I;
|
||||
int32_t Nd;
|
||||
|
||||
if(fp==NULL)
|
||||
{
|
||||
ret = 0;
|
||||
printf("fwrite_ndarray: fp=NULL\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
piprod = 1;
|
||||
for(I=0;I<shape->size();I++)
|
||||
{
|
||||
if(shape->at(I)>0)
|
||||
{
|
||||
piprod = piprod*shape->at(I);
|
||||
}
|
||||
else
|
||||
{
|
||||
piprod = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Nd = (int32_t) shape->size();
|
||||
|
||||
if(piprod!=buffer->size())
|
||||
{
|
||||
ret = 0;
|
||||
printf("fwrite_ndarray: buffer is size %ld, while shape is size %ld\n",(long)buffer->size(),(long)piprod);
|
||||
return ret;
|
||||
}
|
||||
|
||||
fwrite(&Nd,sizeof(int32_t),1,fp);
|
||||
if(Nd>0)
|
||||
{
|
||||
fwrite(shape->data,sizeof(int32_t),Nd,fp);
|
||||
if(piprod>0)
|
||||
{
|
||||
fwrite(buffer->data,sizeof(T),buffer->size(),fp);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer)
|
||||
{
|
||||
int ret = 0;
|
||||
int Nd = 1;
|
||||
|
||||
if(fp==NULL)
|
||||
{
|
||||
ret = 0;
|
||||
printf("fwrite_buffer: fp=NULL\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
fwrite(&Nd,sizeof(int32_t),1,fp);
|
||||
fwrite(&N,sizeof(int32_t),1,fp);
|
||||
fwrite(buffer,sizeof(T),N,fp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
int cnt;
|
||||
int32_t Nd;
|
||||
int32_t *dims = NULL;
|
||||
int piprod;
|
||||
int32_t q;
|
||||
int I;
|
||||
|
||||
int Nr;
|
||||
|
||||
|
||||
if(fp==NULL) {ret = -1; return ret;}
|
||||
if(feof(fp)) {ret = -2; return ret;}
|
||||
|
||||
cnt = fread(&Nd,sizeof(int32_t),1,fp);
|
||||
if(Nd>0 && cnt>0)
|
||||
{
|
||||
piprod = 1;
|
||||
dims = new(std::nothrow) int32_t[Nd];
|
||||
for(I=0;I<Nd;I++)
|
||||
{
|
||||
cnt = fread(&q,sizeof(int32_t),1,fp);
|
||||
dims[I] = q;
|
||||
piprod = piprod*dims[I];
|
||||
if(piprod==cnt)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("fwrite_buffer, read %d values, expecting %d\n",cnt,piprod);
|
||||
}
|
||||
}
|
||||
|
||||
Nr = amscuda::min<int32_t>(Nmax,piprod);
|
||||
cnt = fread(buffer,sizeof(T),Nr,fp);
|
||||
}
|
||||
|
||||
if(dims!=NULL) {delete[] dims; dims=NULL;}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,11 @@
|
||||
#ifndef __AMSCUGEOM_HPP__
|
||||
#define __AMSCUGEOM_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,70 @@
|
||||
#ifndef __AMSCULIB2_HPP__
|
||||
#define __AMSCULIB2_HPP__
|
||||
|
||||
//Std Lib Includes
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <new>
|
||||
|
||||
#include <cuda_runtime_api.h> //where all the cuda functions live
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda.h>
|
||||
|
||||
//Dependencies
|
||||
|
||||
//Predeclarations
|
||||
class cuvect2;
|
||||
class cuvect3;
|
||||
class cuvect4;
|
||||
class cuvect2f;
|
||||
class cuvect3f;
|
||||
class cuvect4f;
|
||||
|
||||
//Need a way to define the same symbols using both host and device code
|
||||
//A solution was found here: https://stackoverflow.com/questions/9457572/cuda-host-and-device-using-same-constant-memory
|
||||
#ifdef __CUDA_ARCH__
|
||||
#define AMSCU_CONST __constant__
|
||||
#else
|
||||
#define AMSCU_CONST
|
||||
#endif
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
//default thread and block execution
|
||||
AMSCU_CONST static const int amscu_defnblocks = 256;
|
||||
AMSCU_CONST static const int amscu_defnthreads = 512;
|
||||
|
||||
//default numthreads to execute on cpu
|
||||
AMSCU_CONST static const int amscu_defcputhreads = 8;
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
//Components
|
||||
#include <amsculib2/amscu_cudafunctions.hpp>
|
||||
#include <amsculib2/amscumath.hpp>
|
||||
#include <amsculib2/amscu_comp64.hpp>
|
||||
#include <amsculib2/amscu_comp128.hpp>
|
||||
#include <amsculib2/cuvect2.hpp>
|
||||
#include <amsculib2/cuvect3.hpp>
|
||||
#include <amsculib2/cuvect4.hpp>
|
||||
#include <amsculib2/cuvect2f.hpp>
|
||||
#include <amsculib2/cuvect3f.hpp>
|
||||
#include <amsculib2/cuvect4f.hpp>
|
||||
#include <amsculib2/amscugeom.hpp>
|
||||
#include <amsculib2/amscuarray.hpp>
|
||||
#include <amsculib2/amscuda_binarrrw.hpp>
|
||||
#include <amsculib2/amscu_random.hpp>
|
||||
|
||||
#include <amsculib2/amscuarray_dops.hpp>
|
||||
|
||||
#include <amsculib2/amscurarray.cuh>
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,56 @@
|
||||
#ifndef __AMSCUMATH_HPP__
|
||||
#define __AMSCUMATH_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
|
||||
|
||||
//Problem: These are not in the namespace
|
||||
//#define nan NAN
|
||||
//#define fnan (float) NAN
|
||||
//#define inf INFINITY
|
||||
//#define finf (float) INFINITY
|
||||
//#define pi 3.1415926535897936
|
||||
|
||||
//These need to be the same symbol for both host and device code
|
||||
AMSCU_CONST static const double nan = NAN;
|
||||
AMSCU_CONST static const float fnan = (float) NAN;
|
||||
AMSCU_CONST static const double inf = INFINITY;
|
||||
AMSCU_CONST static const float finf = (float) INFINITY;
|
||||
AMSCU_CONST static const double pi = 3.1415926535897936;
|
||||
AMSCU_CONST static const float pif = 3.1415926535897936;
|
||||
|
||||
__host__ __device__ double dabs(double x);
|
||||
__host__ __device__ float fabs(float x);
|
||||
|
||||
template<typename T> __host__ __device__ T abs(const T in)
|
||||
{
|
||||
T ret = in;
|
||||
if(in<0) ret = -in;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double mod(double a, double md);
|
||||
__host__ __device__ float mod(float a, float md);
|
||||
__host__ __device__ int mod(int x, int n);
|
||||
__host__ __device__ long mod(long x, long n);
|
||||
|
||||
__host__ __device__ int truediv(int x, int y);
|
||||
__host__ __device__ long truediv(long x, long y);
|
||||
|
||||
template<typename T> __host__ __device__ T min(T a, T b);
|
||||
template<typename T> __host__ __device__ T max(T a, T b);
|
||||
|
||||
__device__ __host__ double arg(double x, double y);
|
||||
__device__ __host__ void get_azel(double x, double y, double z, double *az, double *el);
|
||||
|
||||
void test_amscumath1();
|
||||
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#include <amsculib2/amscumath_impl.hpp>
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,42 @@
|
||||
#ifndef __AMSCUMATH_IMPL_HPP__
|
||||
#define __AMSCUMATH_IMPL_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
template<typename T> __host__ __device__ T min(T a, T b)
|
||||
{
|
||||
if(a>b)
|
||||
{
|
||||
return b;
|
||||
}
|
||||
else
|
||||
{
|
||||
return a;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
template<typename T> __host__ __device__ T max(T a, T b)
|
||||
{
|
||||
if(a>b)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
else
|
||||
{
|
||||
return b;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
template<> __host__ __device__ double min(double a, double b);
|
||||
template<> __host__ __device__ float min(float a, float b);
|
||||
template<> __host__ __device__ double max(double a, double b);
|
||||
template<> __host__ __device__ float max(float a, float b);
|
||||
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,66 @@
|
||||
#ifndef __AMSCURARRAY_HPP__
|
||||
#define __AMSCURARRAY_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
//Cuda ragged array class
|
||||
template<typename T> class curarray
|
||||
{
|
||||
public:
|
||||
int device;
|
||||
curarray* devptr; //pointer to mirror class on the device
|
||||
|
||||
int Narrays; //number of arrays
|
||||
|
||||
int *N; //dimension of each array
|
||||
T** hostarrayptrs; //pointers to each array on the host - null on the device
|
||||
T** devarrayptrs; //pointers to each array on the device
|
||||
//the double pointer is a host pointer to device pointers on the host class
|
||||
//for the device class, only the second set of arrays is in use
|
||||
|
||||
//the constructor and destructor set all pointers to NULL, they
|
||||
// do *not* manage memory. This is done with curarray_new and curarray_delete
|
||||
__device__ __host__ curarray();
|
||||
__device__ __host__ ~curarray();
|
||||
|
||||
__host__ int push();
|
||||
__host__ int pull();
|
||||
//__device__ int dev_resizearray(int arraynum, int arraysize);
|
||||
__host__ int resizearray(int arraynum, int arraysize);
|
||||
// I may want a way to resize arrays on the device without pushing/pulling all the array contents
|
||||
|
||||
|
||||
};
|
||||
|
||||
template<typename T> int curarray_new(curarray<T>** ptr, int Narrays);
|
||||
|
||||
template<typename T> int curarray_delete(curarray<T>** ptr);
|
||||
|
||||
template<typename T> int curarray_device_new(curarray<T> *hostptr);
|
||||
|
||||
template<typename T> int curarray_device_delete(curarray<T> *hostptr);
|
||||
|
||||
template<typename T> int curarray_push(curarray<T> *hostptr);
|
||||
|
||||
template<typename T> int curarray_pull(curarray<T> *hostptr);
|
||||
|
||||
|
||||
//template<typename T> int curarray_host_fillall(curarray<T> *hostptr, const T &val);
|
||||
//template<typename T> int curarray_device_fillall(curarray<T> *hostptr, const T &val);
|
||||
|
||||
|
||||
//template<typename T> __host__ int curarray_deletearray(curarray<T> *hostptr, int arrayindex);
|
||||
//template<typename T> __device__ int curarray_dev_deletearray(curarray<T> *devptr, int arrayindex);
|
||||
|
||||
//template<typename T> __host__ int curarray_allocarray(curarray<T> *hostptr, int arrayindex, int size);
|
||||
//template<typename T> __device__ int curarray_dev_allocarray(curarray<T> *devptr, int arrayindex, int size);
|
||||
|
||||
|
||||
void test_amscurarray1();
|
||||
|
||||
};
|
||||
|
||||
#include <amsculib2/amscurarray_impl.cuh>
|
||||
|
||||
#endif
|
@ -0,0 +1,529 @@
|
||||
#ifndef __AMSCURARRAY_IMPL_HPP__
|
||||
#define __AMSCURARRAY_IMPL_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
template<typename T> curarray<T>::curarray()
|
||||
{
|
||||
device = -1;
|
||||
devptr = NULL;
|
||||
Narrays = 0;
|
||||
N = NULL;
|
||||
hostarrayptrs = NULL;
|
||||
devarrayptrs = NULL;
|
||||
|
||||
}
|
||||
|
||||
template<typename T> curarray<T>::~curarray()
|
||||
{
|
||||
device = -1;
|
||||
devptr = NULL;
|
||||
Narrays = 0;
|
||||
N = NULL;
|
||||
hostarrayptrs = NULL;
|
||||
devarrayptrs = NULL;
|
||||
|
||||
}
|
||||
|
||||
template<typename T> int curarray_new(curarray<T>** ptr, int Narrays)
|
||||
{
|
||||
int ret = 0;
|
||||
int device;
|
||||
curarray<T> *lhptr = *ptr;
|
||||
|
||||
cudaGetDevice(&device);
|
||||
|
||||
if(lhptr!=NULL)
|
||||
{
|
||||
curarray_delete(ptr);
|
||||
}
|
||||
|
||||
*ptr = new(std::nothrow) curarray<T>();
|
||||
lhptr = *ptr;
|
||||
|
||||
int I;
|
||||
|
||||
if(Narrays<0) Narrays=0;
|
||||
|
||||
lhptr->Narrays = Narrays;
|
||||
lhptr->device = device;
|
||||
lhptr->N = new(std::nothrow) int[Narrays];
|
||||
lhptr->hostarrayptrs = new(std::nothrow) T*[Narrays];
|
||||
lhptr->devarrayptrs = new(std::nothrow) T*[Narrays];
|
||||
|
||||
for(I=0;I<Narrays;I++)
|
||||
{
|
||||
lhptr->N[I] = 0;
|
||||
lhptr->hostarrayptrs[I] = NULL;
|
||||
lhptr->devarrayptrs[I] = NULL;
|
||||
}
|
||||
|
||||
curarray_device_new(lhptr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int curarray_delete(curarray<T>** ptr)
|
||||
{
|
||||
int ret = 0;
|
||||
curarray<T> *lptr = NULL;
|
||||
int olddev;
|
||||
|
||||
int I;
|
||||
|
||||
if(*ptr==NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
lptr = *ptr;
|
||||
|
||||
cudaGetDevice(&olddev);
|
||||
cudaSetDevice(lptr->device);
|
||||
|
||||
|
||||
if(lptr->devptr!=NULL)
|
||||
{
|
||||
curarray_device_delete(lptr);
|
||||
}
|
||||
|
||||
lptr->device = -1;
|
||||
|
||||
for(I=0;I<lptr->Narrays;I++)
|
||||
{
|
||||
if(lptr->hostarrayptrs!=NULL)
|
||||
{
|
||||
if(lptr->hostarrayptrs[I]!=NULL)
|
||||
{
|
||||
delete[] lptr->hostarrayptrs[I];
|
||||
lptr->hostarrayptrs[I] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if(lptr->devarrayptrs!=NULL)
|
||||
{
|
||||
if(lptr->devarrayptrs[I]!=NULL)
|
||||
{
|
||||
//erasing device memory should have been handled in curarray_device_delete
|
||||
lptr->devarrayptrs[I] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
lptr->N[I] = 0;
|
||||
}
|
||||
|
||||
if(lptr->N != NULL) {delete[] lptr->N; lptr->N = NULL;}
|
||||
if(lptr->hostarrayptrs!=NULL) {delete[] lptr->hostarrayptrs; lptr->hostarrayptrs=NULL;}
|
||||
if(lptr->devarrayptrs!=NULL) {delete[] lptr->devarrayptrs; lptr->devarrayptrs=NULL;}
|
||||
|
||||
if(*ptr!=NULL) {delete *ptr; *ptr = NULL;}
|
||||
|
||||
cudaSetDevice(olddev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int curarray_device_new(curarray<T> *hostptr)
|
||||
{
|
||||
int ret = 0;
|
||||
curarray<T> ldevdata;
|
||||
|
||||
if(hostptr==NULL) return -1;
|
||||
if(hostptr->devptr!=NULL)
|
||||
{
|
||||
curarray_device_delete(hostptr);
|
||||
}
|
||||
|
||||
int I;
|
||||
|
||||
cudaGetDevice(&(hostptr->device));
|
||||
|
||||
ldevdata.device = hostptr->device;
|
||||
ldevdata.Narrays = hostptr->Narrays;
|
||||
int Narrays = hostptr->Narrays;
|
||||
|
||||
cudaMalloc(&(ldevdata.N),sizeof(int)*Narrays);
|
||||
cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice);
|
||||
|
||||
ldevdata.hostarrayptrs = NULL;
|
||||
|
||||
for(I=0;I<Narrays;I++)
|
||||
{
|
||||
if(hostptr->N[I]>0)
|
||||
{
|
||||
if(hostptr->devarrayptrs[I]!=NULL)
|
||||
{
|
||||
cudaFree(hostptr->devarrayptrs[I]);
|
||||
hostptr->devarrayptrs[I] = NULL;
|
||||
}
|
||||
|
||||
cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]);
|
||||
cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(hostptr->devarrayptrs[I]!=NULL)
|
||||
{
|
||||
cudaFree(hostptr->devarrayptrs[I]);
|
||||
hostptr->devarrayptrs[I] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudaMalloc(&(ldevdata.devarrayptrs),sizeof(T*)*Narrays);
|
||||
cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice);
|
||||
|
||||
cudaMalloc(&(hostptr->devptr),sizeof(curarray<T>));
|
||||
cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray<T>),cudaMemcpyHostToDevice);
|
||||
|
||||
ret = 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int curarray_device_delete(curarray<T> *hostptr)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
curarray<T> ldevdata;
|
||||
int olddev;
|
||||
|
||||
if(hostptr->devptr==NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
cudaGetDevice(&olddev);
|
||||
cudaSetDevice(hostptr->device);
|
||||
|
||||
cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
|
||||
|
||||
int I;
|
||||
for(I=0;I<hostptr->Narrays;I++)
|
||||
{
|
||||
if(hostptr->devarrayptrs[I]!=NULL)
|
||||
{
|
||||
cudaFree(hostptr->devarrayptrs[I]);
|
||||
hostptr->devarrayptrs[I] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
cudaFree(ldevdata.devarrayptrs);
|
||||
cudaFree(ldevdata.N);
|
||||
|
||||
cudaFree(hostptr->devptr);
|
||||
hostptr->devptr = NULL;
|
||||
hostptr->device = -1;
|
||||
|
||||
cudaSetDevice(olddev);
|
||||
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int curarray_push(curarray<T> *hostptr)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
int olddev;
|
||||
|
||||
curarray<T> ldevdata;
|
||||
T** ldevarrayptrs = NULL;
|
||||
int *devN = NULL;
|
||||
|
||||
if(hostptr==NULL) return -1;
|
||||
|
||||
cudaGetDevice(&olddev);
|
||||
cudaSetDevice(hostptr->device);
|
||||
|
||||
int Narrays = hostptr->Narrays;
|
||||
|
||||
cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
|
||||
ldevarrayptrs = new(std::nothrow) T*[Narrays];
|
||||
devN = new(std::nothrow) int[Narrays];
|
||||
|
||||
cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost);
|
||||
|
||||
|
||||
int I;
|
||||
|
||||
for(I=0;I<Narrays;I++)
|
||||
{
|
||||
//check to see that host size is the same as device size, and that
|
||||
//the host device pointer is the same as the device device pointer
|
||||
if( (hostptr->N[I]!=devN[I]) ||
|
||||
(hostptr->devarrayptrs[I] != ldevarrayptrs[I])
|
||||
)
|
||||
{
|
||||
cudaFree(ldevarrayptrs[I]);
|
||||
ldevarrayptrs[I] = NULL;
|
||||
hostptr->devarrayptrs[I] = NULL;
|
||||
|
||||
if(hostptr->N[I]>0)
|
||||
{
|
||||
cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]);
|
||||
ldevarrayptrs[I] = hostptr->devarrayptrs[I];
|
||||
devN[I] = hostptr->N[I];
|
||||
}
|
||||
else
|
||||
{
|
||||
devN[I] = 0;
|
||||
}
|
||||
}
|
||||
if(hostptr->N[I]>0)
|
||||
{
|
||||
//copy host data to device
|
||||
cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice);
|
||||
}
|
||||
} //for each array
|
||||
|
||||
//rectify and copy device data structure to device
|
||||
ldevdata.device = hostptr->device;
|
||||
ldevdata.devptr = NULL;
|
||||
ldevdata.Narrays = hostptr->Narrays; //later - logic for dealing with when this is not true
|
||||
ldevdata.hostarrayptrs = NULL;
|
||||
|
||||
|
||||
cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice);
|
||||
|
||||
cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray<T>),cudaMemcpyHostToDevice);
|
||||
|
||||
cuda_errortrap("curarray_push cuda error:");
|
||||
|
||||
cudaSetDevice(olddev);
|
||||
|
||||
delete[] ldevarrayptrs;
|
||||
delete[] devN;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> int curarray_pull(curarray<T> *hostptr)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
int olddev;
|
||||
|
||||
curarray<T> ldevdata;
|
||||
T** ldevarrayptrs = NULL;
|
||||
int *devN = NULL;
|
||||
|
||||
if(hostptr==NULL) return -1;
|
||||
|
||||
cudaGetDevice(&olddev);
|
||||
cudaSetDevice(hostptr->device);
|
||||
|
||||
cuda_errortrap("dbg1");
|
||||
|
||||
int Narrays = hostptr->Narrays;
|
||||
|
||||
cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray<T>),cudaMemcpyDeviceToHost);
|
||||
ldevarrayptrs = new(std::nothrow) T*[Narrays];
|
||||
devN = new(std::nothrow) int[Narrays];
|
||||
|
||||
cuda_errortrap("dbg2");
|
||||
|
||||
cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost);
|
||||
|
||||
cuda_errortrap("dbg3");
|
||||
char dbgjnk[50];
|
||||
|
||||
int I;
|
||||
for(I=0;I<Narrays;I++)
|
||||
{
|
||||
//check to see that host size is the same as device size, and that
|
||||
//the host device pointer is the same as the device device pointer
|
||||
if(hostptr->devarrayptrs[I] != ldevarrayptrs[I])
|
||||
{
|
||||
hostptr->devarrayptrs[I] = ldevarrayptrs[I];
|
||||
}
|
||||
|
||||
if(hostptr->N[I]!=devN[I])
|
||||
{
|
||||
if(hostptr->hostarrayptrs[I]!=NULL)
|
||||
{
|
||||
delete[] hostptr->hostarrayptrs[I];
|
||||
hostptr->hostarrayptrs[I] = NULL;
|
||||
}
|
||||
|
||||
if(devN[I]>0)
|
||||
{
|
||||
hostptr->hostarrayptrs[I] = new(std::nothrow) T[devN[I]];
|
||||
hostptr->N[I] = devN[I];
|
||||
}
|
||||
else
|
||||
{
|
||||
hostptr->N[I] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(hostptr->hostarrayptrs[I]!=NULL && hostptr->devarrayptrs[I]!=NULL)
|
||||
{
|
||||
cudaMemcpy(hostptr->hostarrayptrs[I],hostptr->devarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyDeviceToHost);
|
||||
sprintf(dbgjnk,"%d dbg %d",I,hostptr->N[I]);
|
||||
cuda_errortrap(dbgjnk);
|
||||
}
|
||||
} //for each array
|
||||
|
||||
//for the pull operation, I don't think any update of the device data structure is necessary
|
||||
|
||||
cudaSetDevice(olddev);
|
||||
|
||||
delete[] ldevarrayptrs;
|
||||
delete[] devN;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T> __host__ int curarray<T>::push()
|
||||
{
|
||||
return curarray_push(this);
|
||||
}
|
||||
template<typename T> __host__ int curarray<T>::pull()
|
||||
{
|
||||
return curarray_pull(this);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#memory-allocation-and-lifetime%5B/url%5D
|
||||
|
||||
cudaMalloc() and cudaFree() have distinct semantics between the host and
|
||||
device environments. When invoked from the host, cudaMalloc() allocates a
|
||||
new region from unused device memory. When invoked from the device runtime
|
||||
these functions map to device-side malloc() and free(). This implies that
|
||||
within the device environment the total allocatable memory is limited to the
|
||||
device malloc() heap size, which may be smaller than the available unused
|
||||
device memory. Also, it is an error to invoke cudaFree() from the host
|
||||
program on a pointer which was allocated by cudaMalloc() on the device
|
||||
or vice-versa.
|
||||
|
||||
So, basically this entire function is not going to work. I'll be unable to resize within
|
||||
a kernel.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
template<typename T> __device__ int curarray<T>::dev_resizearray(int arraynum, int arraysize)
|
||||
{
|
||||
int ret = 0;
|
||||
T* newptr = NULL;
|
||||
int I;
|
||||
T def;
|
||||
|
||||
if(arraynum>=0 && arraynum<Narrays)
|
||||
{
|
||||
if(N[arraynum]!=arraysize)
|
||||
{
|
||||
if(arraysize<=0)
|
||||
{
|
||||
if(devarrayptrs[arraynum]!=NULL) cudaFree(devarrayptrs[arraynum]);
|
||||
devarrayptrs[arraynum] = NULL;
|
||||
N[arraynum] = 0;
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
cudaMalloc(&newptr,arraysize*sizeof(T));
|
||||
if(newptr!=NULL)
|
||||
{
|
||||
//do I want to assume there is a copy operator? (operator=)
|
||||
//for now, yes - write a more restrictive class later if I don't want it
|
||||
if(devarrayptrs[arraynum]!=NULL)
|
||||
{
|
||||
for(I=0;I<N[arraynum]&&I<arraysize;I++)
|
||||
{
|
||||
newptr[I] = devarrayptrs[arraynum][I];
|
||||
}
|
||||
}
|
||||
for(I=N[arraynum];I<arraysize;I++)
|
||||
{
|
||||
newptr[I] = def;
|
||||
}
|
||||
|
||||
if(devarrayptrs[arraynum]!=NULL) cudaFree(devarrayptrs[arraynum]);
|
||||
devarrayptrs[arraynum] = newptr;
|
||||
N[arraynum] = arraysize;
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
template<typename T> __host__ int curarray<T>::resizearray(int arraynum, int arraysize)
|
||||
{
|
||||
int ret = 0;
|
||||
T* newptr = NULL;
|
||||
int I;
|
||||
T def;
|
||||
|
||||
if(arraynum>=0 && arraynum<Narrays)
|
||||
{
|
||||
if(N[arraynum]!=arraysize)
|
||||
{
|
||||
if(arraysize<=0)
|
||||
{
|
||||
delete[] hostarrayptrs[arraynum];
|
||||
hostarrayptrs[arraynum] = NULL;
|
||||
N[arraynum] = 0;
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
newptr = new(std::nothrow) T[arraysize];
|
||||
if(newptr!=NULL)
|
||||
{
|
||||
//do I want to assume there is a copy operator? (operator=)
|
||||
//for now, yes - write a more restrictive class later if I don't want it
|
||||
|
||||
if(hostarrayptrs[arraynum]!=NULL)
|
||||
{
|
||||
for(I=0;I<N[arraynum]&&I<arraysize;I++)
|
||||
{
|
||||
newptr[I] = hostarrayptrs[arraynum][I];
|
||||
}
|
||||
}
|
||||
for(I=N[arraynum];I<arraysize;I++)
|
||||
{
|
||||
newptr[I] = def;
|
||||
}
|
||||
|
||||
//cudaFree(hostarrayptrs[arraynum]);
|
||||
delete[] hostarrayptrs[arraynum];
|
||||
hostarrayptrs[arraynum] = newptr;
|
||||
N[arraynum] = arraysize;
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,84 @@
|
||||
#ifndef __CUVECT2_HPP__
|
||||
#define __CUVECT2_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
class cuvect2
|
||||
{
|
||||
public:
|
||||
double x;
|
||||
double y;
|
||||
|
||||
|
||||
__host__ __device__ cuvect2();
|
||||
__host__ __device__ ~cuvect2();
|
||||
__host__ __device__ cuvect2(double _x, double _y);
|
||||
|
||||
__host__ __device__ double& operator[](const int I);
|
||||
__host__ __device__ const double& operator[](const int I) const;
|
||||
|
||||
__host__ __device__ cuvect2 operator+(cuvect2 lhs);
|
||||
__host__ __device__ cuvect2 operator-(cuvect2 lhs);
|
||||
__host__ __device__ cuvect2 operator*(double lhs);
|
||||
__host__ __device__ cuvect2 operator/(double lhs);
|
||||
};
|
||||
|
||||
class cumat2
|
||||
{
|
||||
public:
|
||||
double dat[4];
|
||||
|
||||
__host__ __device__ cumat2();
|
||||
__host__ __device__ ~cumat2();
|
||||
__host__ __device__ double& operator[](const int I);
|
||||
__host__ __device__ double& operator()(const int I, const int J);
|
||||
__host__ __device__ double& at(const int I, const int J);
|
||||
|
||||
__host__ __device__ cumat2 operator+(cumat2 lhs);
|
||||
__host__ __device__ cumat2 operator-(cumat2 lhs);
|
||||
__host__ __device__ cumat2 operator*(double lhs);
|
||||
__host__ __device__ cumat2 operator/(double lhs);
|
||||
__host__ __device__ cuvect2 operator*(cuvect2 lhs);
|
||||
__host__ __device__ cumat2 operator*(cumat2 lhs);
|
||||
|
||||
__host__ __device__ double det();
|
||||
__host__ __device__ cumat2 transpose();
|
||||
__host__ __device__ cumat2 inverse();
|
||||
};
|
||||
|
||||
__host__ __device__ double cuvect2_dot(cuvect2 a, cuvect2 b);
|
||||
__host__ __device__ double cuvect2_cross(cuvect2 a, cuvect2 b);
|
||||
__host__ __device__ double cuvect2_norm(cuvect2 a);
|
||||
__host__ __device__ cuvect2 cuvect2_normalize(cuvect2 a);
|
||||
__host__ __device__ cuvect2 cuvect2_proj(cuvect2 a, cuvect2 b);
|
||||
|
||||
//2x2 matrix operations
|
||||
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
|
||||
|
||||
//transpose a 2x2 matrix in place
|
||||
__host__ __device__ void mat2_transpose(double *mat2inout);
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat2_copy(double *mat2_dest, const double *mat2_src);
|
||||
|
||||
//inverts mat?inout[4]
|
||||
__host__ __device__ void mat2_inverse(double *mat2inout);
|
||||
|
||||
//rotatin matrix from angle
|
||||
__host__ __device__ void mat2_rot_from_angle(double angle, double *mat2);
|
||||
|
||||
//multiplies c = a*b
|
||||
__host__ __device__ void mat2_mult(double *mat2a, double *mat2b, double *mat2c);
|
||||
|
||||
// ret = a*b
|
||||
__host__ __device__ cuvect2 mat2_mult(double *mat2a, cuvect2 b);
|
||||
|
||||
|
||||
void test_cuvect2_1();
|
||||
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,84 @@
|
||||
#ifndef __CUVECT2F_HPP__
|
||||
#define __CUVECT2F_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
class cuvect2f
|
||||
{
|
||||
public:
|
||||
float x;
|
||||
float y;
|
||||
|
||||
|
||||
__host__ __device__ cuvect2f();
|
||||
__host__ __device__ ~cuvect2f();
|
||||
__host__ __device__ cuvect2f(float _x, float _y);
|
||||
|
||||
__host__ __device__ float& operator[](const int I);
|
||||
__host__ __device__ const float& operator[](const int I) const;
|
||||
|
||||
__host__ __device__ cuvect2f operator+(cuvect2f lhs);
|
||||
__host__ __device__ cuvect2f operator-(cuvect2f lhs);
|
||||
__host__ __device__ cuvect2f operator*(float lhs);
|
||||
__host__ __device__ cuvect2f operator/(float lhs);
|
||||
};
|
||||
|
||||
class cumat2f
|
||||
{
|
||||
public:
|
||||
float dat[4];
|
||||
|
||||
__host__ __device__ cumat2f();
|
||||
__host__ __device__ ~cumat2f();
|
||||
__host__ __device__ float& operator[](const int I);
|
||||
__host__ __device__ float& operator()(const int I, const int J);
|
||||
__host__ __device__ float& at(const int I, const int J);
|
||||
|
||||
__host__ __device__ cumat2f operator+(cumat2f lhs);
|
||||
__host__ __device__ cumat2f operator-(cumat2f lhs);
|
||||
__host__ __device__ cumat2f operator*(float lhs);
|
||||
__host__ __device__ cumat2f operator/(float lhs);
|
||||
__host__ __device__ cuvect2f operator*(cuvect2f lhs);
|
||||
__host__ __device__ cumat2f operator*(cumat2f lhs);
|
||||
|
||||
__host__ __device__ float det();
|
||||
__host__ __device__ cumat2f transpose();
|
||||
__host__ __device__ cumat2f inverse();
|
||||
};
|
||||
|
||||
__host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b);
|
||||
__host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b);
|
||||
__host__ __device__ float cuvect2f_norm(cuvect2f a);
|
||||
__host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a);
|
||||
__host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b);
|
||||
|
||||
//2x2 matrix operations
|
||||
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
|
||||
|
||||
//transpose a 2x2 matrix in place
|
||||
__host__ __device__ void mat2f_transpose(float *mat2inout);
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src);
|
||||
|
||||
//inverts mat?inout[4]
|
||||
__host__ __device__ void mat2f_inverse(float *mat2inout);
|
||||
|
||||
//rotatin matrix from angle
|
||||
__host__ __device__ void mat2f_rot_from_angle(float angle, float *mat2);
|
||||
|
||||
//multiplies c = a*b
|
||||
__host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c);
|
||||
|
||||
// ret = a*b
|
||||
__host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b);
|
||||
|
||||
|
||||
void test_cuvect2f_1();
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,86 @@
|
||||
#ifndef __CUVECT3_HPP__
|
||||
#define __CUVECT3_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
class cuvect3
|
||||
{
|
||||
public:
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
|
||||
__host__ __device__ cuvect3();
|
||||
__host__ __device__ ~cuvect3();
|
||||
__host__ __device__ cuvect3(double _x, double _y, double _z);
|
||||
|
||||
|
||||
__host__ __device__ double& operator[](const int I);
|
||||
__host__ __device__ const double& operator[](const int I) const;
|
||||
|
||||
__host__ __device__ cuvect3 operator+(cuvect3 lhs);
|
||||
__host__ __device__ cuvect3 operator-(cuvect3 lhs);
|
||||
__host__ __device__ cuvect3 operator*(double lhs);
|
||||
__host__ __device__ cuvect3 operator/(double lhs);
|
||||
};
|
||||
|
||||
class cumat3
|
||||
{
|
||||
public:
|
||||
double dat[9];
|
||||
|
||||
__host__ __device__ cumat3();
|
||||
__host__ __device__ ~cumat3();
|
||||
__host__ __device__ double& operator[](const int I);
|
||||
__host__ __device__ double& operator()(const int I, const int J);
|
||||
__host__ __device__ double& at(const int I, const int J);
|
||||
|
||||
__host__ __device__ cumat3 operator+(cumat3 lhs);
|
||||
__host__ __device__ cumat3 operator-(cumat3 lhs);
|
||||
__host__ __device__ cumat3 operator*(double lhs);
|
||||
__host__ __device__ cumat3 operator/(double lhs);
|
||||
__host__ __device__ cuvect3 operator*(cuvect3 lhs);
|
||||
__host__ __device__ cumat3 operator*(cumat3 lhs);
|
||||
|
||||
__host__ __device__ double det();
|
||||
__host__ __device__ cumat3 transpose();
|
||||
__host__ __device__ cumat3 inverse();
|
||||
};
|
||||
|
||||
__host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b);
|
||||
__host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b);
|
||||
__host__ __device__ double cuvect3_norm(cuvect3 a);
|
||||
__host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a);
|
||||
__host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b);
|
||||
|
||||
//3x3 matrix operations
|
||||
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
|
||||
|
||||
//transposes a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3_transpose(double *mat3inout);
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src);
|
||||
|
||||
//returns determinant of 3x3 matrix
|
||||
__host__ __device__ double mat3_det(double *mat3in);
|
||||
|
||||
//inverts a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3_inverse(double *mat3inout);
|
||||
|
||||
__host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin);
|
||||
__host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout);
|
||||
|
||||
__host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout);
|
||||
__host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout);
|
||||
|
||||
//returns direction cosine rotation matrix from axis and angle
|
||||
__host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout);
|
||||
|
||||
__host__ void test_cudavect_logic1();
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,86 @@
|
||||
#ifndef __CUVECT3F_HPP__
|
||||
#define __CUVECT3F_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
class cuvect3f
|
||||
{
|
||||
public:
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
|
||||
__host__ __device__ cuvect3f();
|
||||
__host__ __device__ ~cuvect3f();
|
||||
__host__ __device__ cuvect3f(float _x, float _y, float _z);
|
||||
|
||||
|
||||
__host__ __device__ float& operator[](const int I);
|
||||
__host__ __device__ const float& operator[](const int I) const;
|
||||
|
||||
__host__ __device__ cuvect3f operator+(cuvect3f lhs);
|
||||
__host__ __device__ cuvect3f operator-(cuvect3f lhs);
|
||||
__host__ __device__ cuvect3f operator*(float lhs);
|
||||
__host__ __device__ cuvect3f operator/(float lhs);
|
||||
};
|
||||
|
||||
class cumat3f
|
||||
{
|
||||
public:
|
||||
float dat[9];
|
||||
|
||||
__host__ __device__ cumat3f();
|
||||
__host__ __device__ ~cumat3f();
|
||||
__host__ __device__ float& operator[](const int I);
|
||||
__host__ __device__ float& operator()(const int I, const int J);
|
||||
__host__ __device__ float& at(const int I, const int J);
|
||||
|
||||
__host__ __device__ cumat3f operator+(cumat3f lhs);
|
||||
__host__ __device__ cumat3f operator-(cumat3f lhs);
|
||||
__host__ __device__ cumat3f operator*(float lhs);
|
||||
__host__ __device__ cumat3f operator/(float lhs);
|
||||
__host__ __device__ cuvect3f operator*(cuvect3f lhs);
|
||||
__host__ __device__ cumat3f operator*(cumat3f lhs);
|
||||
|
||||
__host__ __device__ float det();
|
||||
__host__ __device__ cumat3f transpose();
|
||||
__host__ __device__ cumat3f inverse();
|
||||
};
|
||||
|
||||
__host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b);
|
||||
__host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b);
|
||||
__host__ __device__ float cuvect3f_norm(cuvect3f a);
|
||||
__host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a);
|
||||
__host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b);
|
||||
|
||||
//3x3 matrix operations
|
||||
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
|
||||
|
||||
//transposes a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3f_transpose(float *mat3inout);
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src);
|
||||
|
||||
//returns determinant of 3x3 matrix
|
||||
__host__ __device__ float mat3f_det(float *mat3in);
|
||||
|
||||
//inverts a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3f_inverse(float *mat3inout);
|
||||
|
||||
__host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin);
|
||||
__host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout);
|
||||
|
||||
__host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout);
|
||||
__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout);
|
||||
|
||||
//returns direction cosine rotation matrix from axis and angle
|
||||
__host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout);
|
||||
|
||||
__host__ void test_cudavectf_logic1();
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,59 @@
|
||||
#ifndef __CUVECT4_HPP__
|
||||
#define __CUVECT4_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
class cuvect4
|
||||
{
|
||||
public:
|
||||
double x;
|
||||
double y;
|
||||
double z;
|
||||
double w;
|
||||
|
||||
__host__ __device__ cuvect4();
|
||||
__host__ __device__ ~cuvect4();
|
||||
__host__ __device__ cuvect4(double _x, double _y, double _z, double _w);
|
||||
|
||||
__host__ __device__ double& operator[](const int I);
|
||||
__host__ __device__ const double& operator[](const int I) const;
|
||||
|
||||
__host__ __device__ cuvect4 operator+(cuvect4 lhs);
|
||||
__host__ __device__ cuvect4 operator-(cuvect4 lhs);
|
||||
__host__ __device__ cuvect4 operator*(double lhs);
|
||||
__host__ __device__ cuvect4 operator/(double lhs);
|
||||
};
|
||||
|
||||
class cumat4
|
||||
{
|
||||
public:
|
||||
double dat[16];
|
||||
|
||||
__host__ __device__ cumat4();
|
||||
__host__ __device__ ~cumat4();
|
||||
__host__ __device__ double& operator[](const int I);
|
||||
__host__ __device__ double& operator()(const int I, const int J);
|
||||
__host__ __device__ double& at(const int I, const int J);
|
||||
|
||||
__host__ __device__ cumat4 operator+(cumat4 lhs);
|
||||
__host__ __device__ cumat4 operator-(cumat4 lhs);
|
||||
__host__ __device__ cumat4 operator*(double lhs);
|
||||
__host__ __device__ cumat4 operator/(double lhs);
|
||||
__host__ __device__ cuvect4 operator*(cuvect4 lhs);
|
||||
__host__ __device__ cumat4 operator*(cumat4 lhs);
|
||||
|
||||
__host__ __device__ double det();
|
||||
__host__ __device__ cumat4 transpose();
|
||||
__host__ __device__ cumat4 inverse();
|
||||
};
|
||||
|
||||
__host__ __device__ double cuvect4_dot(cuvect4 a, cuvect4 b);
|
||||
__host__ __device__ double cuvect4_norm(cuvect4 a);
|
||||
__host__ __device__ cuvect4 cuvect4_normalize(cuvect4 a);
|
||||
__host__ __device__ cuvect4 cuvect4_proj(cuvect4 a, cuvect4 b);
|
||||
|
||||
}; //end namespace amscuda
|
||||
|
||||
#endif
|
||||
|
@ -0,0 +1,60 @@
|
||||
#ifndef __CUVECT4F_HPP__
|
||||
#define __CUVECT4F_HPP__
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
class cuvect4f
|
||||
{
|
||||
public:
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
float w;
|
||||
|
||||
__host__ __device__ cuvect4f();
|
||||
__host__ __device__ ~cuvect4f();
|
||||
__host__ __device__ cuvect4f(float _x, float _y, float _z, float _w);
|
||||
|
||||
__host__ __device__ float& operator[](const int I);
|
||||
__host__ __device__ const float& operator[](const int I) const;
|
||||
|
||||
__host__ __device__ cuvect4f operator+(cuvect4f lhs);
|
||||
__host__ __device__ cuvect4f operator-(cuvect4f lhs);
|
||||
__host__ __device__ cuvect4f operator*(float lhs);
|
||||
__host__ __device__ cuvect4f operator/(float lhs);
|
||||
};
|
||||
|
||||
class cumat4f
|
||||
{
|
||||
public:
|
||||
float dat[16];
|
||||
|
||||
__host__ __device__ cumat4f();
|
||||
__host__ __device__ ~cumat4f();
|
||||
__host__ __device__ float& operator[](const int I);
|
||||
__host__ __device__ float& operator()(const int I, const int J);
|
||||
__host__ __device__ float& at(const int I, const int J);
|
||||
|
||||
__host__ __device__ cumat4f operator+(cumat4f lhs);
|
||||
__host__ __device__ cumat4f operator-(cumat4f lhs);
|
||||
__host__ __device__ cumat4f operator*(float lhs);
|
||||
__host__ __device__ cumat4f operator/(float lhs);
|
||||
__host__ __device__ cuvect4f operator*(cuvect4f lhs);
|
||||
__host__ __device__ cumat4f operator*(cumat4f lhs);
|
||||
|
||||
__host__ __device__ float det();
|
||||
__host__ __device__ cumat4f transpose();
|
||||
__host__ __device__ cumat4f inverse();
|
||||
};
|
||||
|
||||
__host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b);
|
||||
__host__ __device__ float cuvect4f_norm(cuvect4f a);
|
||||
__host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a);
|
||||
__host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b);
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,math;
|
||||
from compscripts.complib2 import *;
|
||||
|
||||
args = sys.argv
|
||||
if(len(args)>=2):
|
||||
if(args[1]=="clean"):
|
||||
obj_list = flist('./objstore',recurse=True,exts=['.o'])
|
||||
for o in obj_list:
|
||||
os.remove('{}'.format(o))
|
||||
exit(0)
|
||||
|
||||
os.system('python3 ./compscripts/linux64.makelib.py')
|
||||
os.system('python3 ./compscripts/linux64.maketest.py')
|
||||
|
||||
# obj_list = flist('./src',recurse=True,exts=['.o'])
|
||||
# for o in obj_list:
|
||||
# os.remove('{}'.format(o))
|
||||
|
||||
#os.chdir('./bin_linux64')
|
||||
callproc('./bin_linux64/test')
|
||||
#os.chdir('..')
|
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,math;
|
||||
from compscripts.complib2 import *;
|
||||
|
||||
|
||||
os.system('python ./compscripts/winnvcc.makelib.py')
|
||||
os.system('python ./compscripts/winnvcc.maketest.py')
|
||||
|
||||
obj_list = flist('./',recurse=True,exts=['.o'])
|
||||
for o in obj_list:
|
||||
os.remove('{}'.format(o))
|
||||
|
||||
#os.chdir('./bin_winx64')
|
||||
callproc('.\\bin_winx64\\test.exe')
|
||||
#os.chdir('..')
|
||||
|
@ -0,0 +1,476 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace cmp
|
||||
{
|
||||
|
||||
__host__ __device__ cucomp128::cucomp128()
|
||||
{
|
||||
real = 0.0;
|
||||
imag = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128::~cucomp128()
|
||||
{
|
||||
real = 0.0;
|
||||
imag = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128::cucomp128(const cucomp128 &other)
|
||||
{
|
||||
real = other.real;
|
||||
imag = other.imag;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128::cucomp128(const double &other)
|
||||
{
|
||||
real = other;
|
||||
imag = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128& cucomp128::operator=(cucomp128& other)
|
||||
{
|
||||
real = other.real;
|
||||
imag = other.imag;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ const cucomp128& cucomp128::operator=(const cucomp128& other)
|
||||
{
|
||||
this->real = other.real;
|
||||
this->imag = other.imag;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128& cucomp128::operator=(double& other)
|
||||
{
|
||||
real = other;
|
||||
imag = 0.0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ const cucomp128& cucomp128::operator=(const double& other)
|
||||
{
|
||||
this->real = other;
|
||||
this->imag = 0.0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cucomp128::operator[](int& ind)
|
||||
{
|
||||
if(ind==0)
|
||||
{
|
||||
return this->real;
|
||||
}
|
||||
else
|
||||
{
|
||||
return this->imag;
|
||||
}
|
||||
}
|
||||
|
||||
__host__ __device__ const double& cucomp128::operator[](const int& ind) const
|
||||
{
|
||||
if(ind==0)
|
||||
{
|
||||
return this->real;
|
||||
}
|
||||
else
|
||||
{
|
||||
return this->imag;
|
||||
}
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cucomp128::operator+(const cucomp128& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = real + z.real;
|
||||
ret.imag = imag + z.imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cucomp128::operator-(const cucomp128& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = real - z.real;
|
||||
ret.imag = imag - z.imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cucomp128::operator*(const cucomp128& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = (real*z.real - imag*z.imag);
|
||||
ret.imag = (imag*z.real + real*z.imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cucomp128::operator/(const cucomp128& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
double zm2 = z.real*z.real+z.imag*z.imag;
|
||||
|
||||
if(zm2>0.0)
|
||||
{
|
||||
ret.real = (this->real*z.real+this->imag*z.imag)/zm2;
|
||||
ret.imag = (this->imag*z.real-this->real*z.imag)/zm2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.real = (double) finf;
|
||||
ret.imag = (double) finf;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cucomp128::operator+(const double& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = this->real + z;
|
||||
ret.imag = this->imag;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cucomp128 cucomp128::operator-(const double& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = real-z;
|
||||
ret.imag = imag;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cucomp128 cucomp128::operator*(const double& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = real*z;
|
||||
ret.imag = imag*z;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cucomp128 cucomp128::operator/(const double& z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
if(z!=0.0f)
|
||||
{
|
||||
ret.real = real/z;
|
||||
ret.imag = imag/z;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.real = finf;
|
||||
ret.imag = finf;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::operator==(const cucomp128& z) const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(z.real == real && z.imag == imag)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::operator!=(const cucomp128& z) const
|
||||
{
|
||||
return !(*this==z);
|
||||
}
|
||||
|
||||
//sort first by real value, then by imaginary value
|
||||
//this is done so that an ordering exists, as long as two values aren't equal
|
||||
__host__ __device__ bool cucomp128::operator>(const cucomp128& z) const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(this->real>z.real)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else if(this->real==z.real)
|
||||
{
|
||||
if(this->imag>z.imag)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::operator<(const cucomp128& z) const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(this->real<z.real)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else if(this->real==z.real)
|
||||
{
|
||||
if(this->imag<z.imag)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 operator-(const cucomp128 &z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = -z.real;
|
||||
ret.imag = -z.imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::operator>=(const cucomp128& z) const
|
||||
{
|
||||
bool ret = (*this==z || *this>z);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::operator<=(const cucomp128& z) const
|
||||
{
|
||||
bool ret = (*this==z || *this<z);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::isnan() const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(::isnan(this->real) || ::isnan(this->imag))
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::isinf() const
|
||||
{
|
||||
bool ret = 0;
|
||||
//calls math.h isinf()
|
||||
if(::isinf(this->real) || ::isinf(this->imag))
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::isreal() const
|
||||
{
|
||||
bool ret = 1;
|
||||
if(imag!=0.0f)
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::isimag() const
|
||||
{
|
||||
bool ret = 1;
|
||||
if(real!=0.0f)
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp128::iszero() const
|
||||
{
|
||||
bool ret = 1;
|
||||
if(real!=0.0f || imag!=0.0f)
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cucomp128::arg() const
|
||||
{
|
||||
double ret = 0.0;
|
||||
ret = (double) amscuda::arg((double)real,(double)imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cucomp128::mag() const
|
||||
{
|
||||
double ret = 0.0;
|
||||
ret = ::sqrt(real*real+imag*imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cucomp128::conj() const
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = real;
|
||||
ret.imag = -imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double arg(cucomp128 z)
|
||||
{
|
||||
return z.arg();
|
||||
}
|
||||
|
||||
__host__ __device__ double abs(cucomp128 z)
|
||||
{
|
||||
return z.mag();
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 dtocomp(double _r, double _i)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = _r;
|
||||
ret.imag = _i;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double real(cucomp128 z)
|
||||
{
|
||||
return z.real;
|
||||
}
|
||||
|
||||
__host__ __device__ double imag(cucomp128 z)
|
||||
{
|
||||
return z.imag;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 sin(cucomp128 z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
cucomp128 im1 = dtocomp(0.0f,1.0f);
|
||||
cucomp128 div = dtocomp(0.0f,2.0f);
|
||||
|
||||
ret = (exp(im1*z)-exp(-im1*z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cos(cucomp128 z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
cucomp128 im1 = dtocomp(0.0f,1.0f);
|
||||
cucomp128 div = dtocomp(2.0f,0.0f);
|
||||
|
||||
ret = (exp(im1*z)+exp(-im1*z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 tan(cucomp128 z)
|
||||
{
|
||||
return sin(z)/cos(z);
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 exp(cucomp128 z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = ::exp(z.real)*::cos(z.imag);
|
||||
ret.imag = ::exp(z.real)*::sin(z.imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 log(cucomp128 z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
ret.real = ::log(::sqrt(z.real*z.real+z.imag*z.imag));
|
||||
ret.imag = amscuda::arg(z.real,z.imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 conj(cucomp128 z)
|
||||
{
|
||||
return z.conj();
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 cosh(cucomp128 z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
cucomp128 div = dtocomp(2.0f,0.0f);
|
||||
|
||||
ret = (exp(z)+exp(-z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 sinh(cucomp128 z)
|
||||
{
|
||||
cucomp128 ret;
|
||||
cucomp128 div = dtocomp(2.0f,0.0f);
|
||||
|
||||
ret = (exp(z)-exp(-z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 tanh(cucomp128 z)
|
||||
{
|
||||
return sinh(z)/cosh(z);
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2)
|
||||
{
|
||||
cucomp128 ret;
|
||||
if(z1.mag()>0.0)
|
||||
ret = exp(z2*log(z1));
|
||||
else
|
||||
ret = dtocomp(0.0f,0.0f);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void test_cucomp128_1()
|
||||
{
|
||||
cucomp128 z1;
|
||||
cucomp128 a,b,c;
|
||||
double d1;
|
||||
double f1;
|
||||
|
||||
printf("sizeof double=%ld\n",(long)(8*sizeof(f1)));
|
||||
printf("sizeof double=%ld\n",(long)(8*sizeof(d1)));
|
||||
printf("sizeof complex=%ld\n",(long)(8*sizeof(z1)));
|
||||
printf("sizeof cucomp128=%ld\n",(long)(8*sizeof(a)));
|
||||
|
||||
a = dtocomp(1.0,1.0);
|
||||
b = dtocomp(1.0,-1.0);
|
||||
|
||||
printf("a=%1.4f + %1.4fi\n",a[0],a[1]);
|
||||
printf("b=%1.4f + %1.4fi\n",b[0],b[1]);
|
||||
c = a+b;
|
||||
printf("c=a+b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
c = a-b;
|
||||
printf("c=a-b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
c = a*b;
|
||||
printf("c=a*b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
c = a/b;
|
||||
printf("c=a/b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
f1 = abs(a);
|
||||
printf("abs(a)=%1.4f\n",f1);
|
||||
f1 = arg(a);
|
||||
printf("abs(a)=%1.4f pi\n",f1/pi);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}; //end namespace cmp
|
||||
}; //end namespace amscuda
|
@ -0,0 +1,476 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
namespace cmp
|
||||
{
|
||||
|
||||
__host__ __device__ cucomp64::cucomp64()
|
||||
{
|
||||
real = 0.0;
|
||||
imag = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64::~cucomp64()
|
||||
{
|
||||
real = 0.0;
|
||||
imag = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64::cucomp64(const cucomp64 &other)
|
||||
{
|
||||
real = other.real;
|
||||
imag = other.imag;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64::cucomp64(const float &other)
|
||||
{
|
||||
real = other;
|
||||
imag = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64& cucomp64::operator=(cucomp64& other)
|
||||
{
|
||||
real = other.real;
|
||||
imag = other.imag;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ const cucomp64& cucomp64::operator=(const cucomp64& other)
|
||||
{
|
||||
this->real = other.real;
|
||||
this->imag = other.imag;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64& cucomp64::operator=(float& other)
|
||||
{
|
||||
real = other;
|
||||
imag = 0.0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ const cucomp64& cucomp64::operator=(const float& other)
|
||||
{
|
||||
this->real = other;
|
||||
this->imag = 0.0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cucomp64::operator[](int& ind)
|
||||
{
|
||||
if(ind==0)
|
||||
{
|
||||
return this->real;
|
||||
}
|
||||
else
|
||||
{
|
||||
return this->imag;
|
||||
}
|
||||
}
|
||||
|
||||
__host__ __device__ const float& cucomp64::operator[](const int& ind) const
|
||||
{
|
||||
if(ind==0)
|
||||
{
|
||||
return this->real;
|
||||
}
|
||||
else
|
||||
{
|
||||
return this->imag;
|
||||
}
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cucomp64::operator+(const cucomp64& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = real + z.real;
|
||||
ret.imag = imag + z.imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cucomp64::operator-(const cucomp64& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = real - z.real;
|
||||
ret.imag = imag - z.imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cucomp64::operator*(const cucomp64& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = (real*z.real - imag*z.imag);
|
||||
ret.imag = (imag*z.real + real*z.imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cucomp64::operator/(const cucomp64& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
float zm2 = z.real*z.real+z.imag*z.imag;
|
||||
|
||||
if(zm2>0.0)
|
||||
{
|
||||
ret.real = (this->real*z.real+this->imag*z.imag)/zm2;
|
||||
ret.imag = (this->imag*z.real-this->real*z.imag)/zm2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.real = (float) finf;
|
||||
ret.imag = (float) finf;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cucomp64::operator+(const float& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = this->real + z;
|
||||
ret.imag = this->imag;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cucomp64 cucomp64::operator-(const float& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = real-z;
|
||||
ret.imag = imag;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cucomp64 cucomp64::operator*(const float& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = real*z;
|
||||
ret.imag = imag*z;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cucomp64 cucomp64::operator/(const float& z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
if(z!=0.0f)
|
||||
{
|
||||
ret.real = real/z;
|
||||
ret.imag = imag/z;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.real = finf;
|
||||
ret.imag = finf;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::operator==(const cucomp64& z) const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(z.real == real && z.imag == imag)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::operator!=(const cucomp64& z) const
|
||||
{
|
||||
return !(*this==z);
|
||||
}
|
||||
|
||||
//sort first by real value, then by imaginary value
|
||||
//this is done so that an ordering exists, as long as two values aren't equal
|
||||
__host__ __device__ bool cucomp64::operator>(const cucomp64& z) const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(this->real>z.real)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else if(this->real==z.real)
|
||||
{
|
||||
if(this->imag>z.imag)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::operator<(const cucomp64& z) const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(this->real<z.real)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else if(this->real==z.real)
|
||||
{
|
||||
if(this->imag<z.imag)
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 operator-(const cucomp64 &z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = -z.real;
|
||||
ret.imag = -z.imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::operator>=(const cucomp64& z) const
|
||||
{
|
||||
bool ret = (*this==z || *this>z);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::operator<=(const cucomp64& z) const
|
||||
{
|
||||
bool ret = (*this==z || *this<z);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::isnan() const
|
||||
{
|
||||
bool ret = 0;
|
||||
if(::isnan(this->real) || ::isnan(this->imag))
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::isinf() const
|
||||
{
|
||||
bool ret = 0;
|
||||
//calls math.h isinf()
|
||||
if(::isinf(this->real) || ::isinf(this->imag))
|
||||
{
|
||||
ret = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::isreal() const
|
||||
{
|
||||
bool ret = 1;
|
||||
if(imag!=0.0f)
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::isimag() const
|
||||
{
|
||||
bool ret = 1;
|
||||
if(real!=0.0f)
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ bool cucomp64::iszero() const
|
||||
{
|
||||
bool ret = 1;
|
||||
if(real!=0.0f || imag!=0.0f)
|
||||
{
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cucomp64::arg() const
|
||||
{
|
||||
float ret = 0.0;
|
||||
ret = (float) amscuda::arg((double)real,(double)imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cucomp64::mag() const
|
||||
{
|
||||
float ret = 0.0;
|
||||
ret = ::sqrt(real*real+imag*imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cucomp64::conj() const
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = real;
|
||||
ret.imag = -imag;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float arg(cucomp64 z)
|
||||
{
|
||||
return z.arg();
|
||||
}
|
||||
|
||||
__host__ __device__ float abs(cucomp64 z)
|
||||
{
|
||||
return z.mag();
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 dtocomp64(float _r, float _i)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = _r;
|
||||
ret.imag = _i;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float real(cucomp64 z)
|
||||
{
|
||||
return z.real;
|
||||
}
|
||||
|
||||
__host__ __device__ float imag(cucomp64 z)
|
||||
{
|
||||
return z.imag;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 sin(cucomp64 z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
cucomp64 im1 = dtocomp64(0.0f,1.0f);
|
||||
cucomp64 div = dtocomp64(0.0f,2.0f);
|
||||
|
||||
ret = (exp(im1*z)-exp(-im1*z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cos(cucomp64 z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
cucomp64 im1 = dtocomp64(0.0f,1.0f);
|
||||
cucomp64 div = dtocomp64(2.0f,0.0f);
|
||||
|
||||
ret = (exp(im1*z)+exp(-im1*z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 tan(cucomp64 z)
|
||||
{
|
||||
return sin(z)/cos(z);
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 exp(cucomp64 z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = ::exp(z.real)*::cos(z.imag);
|
||||
ret.imag = ::exp(z.real)*::sin(z.imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 log(cucomp64 z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
ret.real = ::log(::sqrt(z.real*z.real+z.imag*z.imag));
|
||||
ret.imag = amscuda::arg(z.real,z.imag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 conj(cucomp64 z)
|
||||
{
|
||||
return z.conj();
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 cosh(cucomp64 z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
cucomp64 div = dtocomp64(2.0f,0.0f);
|
||||
|
||||
ret = (exp(z)+exp(-z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 sinh(cucomp64 z)
|
||||
{
|
||||
cucomp64 ret;
|
||||
cucomp64 div = dtocomp64(2.0f,0.0f);
|
||||
|
||||
ret = (exp(z)-exp(-z))/div;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 tanh(cucomp64 z)
|
||||
{
|
||||
return sinh(z)/cosh(z);
|
||||
}
|
||||
|
||||
__host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2)
|
||||
{
|
||||
cucomp64 ret;
|
||||
if(z1.mag()>0.0)
|
||||
ret = exp(z2*log(z1));
|
||||
else
|
||||
ret = dtocomp64(0.0f,0.0f);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void test_cucomp64_1()
|
||||
{
|
||||
cucomp64 z1;
|
||||
cucomp64 a,b,c;
|
||||
double d1;
|
||||
float f1;
|
||||
|
||||
printf("sizeof double=%ld\n",(long)(8*sizeof(f1)));
|
||||
printf("sizeof double=%ld\n",(long)(8*sizeof(d1)));
|
||||
printf("sizeof complex=%ld\n",(long)(8*sizeof(z1)));
|
||||
printf("sizeof cucomp128=%ld\n",(long)(8*sizeof(a)));
|
||||
|
||||
a = dtocomp64(1.0,1.0);
|
||||
b = dtocomp64(1.0,-1.0);
|
||||
|
||||
printf("a=%1.4f + %1.4fi\n",a[0],a[1]);
|
||||
printf("b=%1.4f + %1.4fi\n",b[0],b[1]);
|
||||
c = a+b;
|
||||
printf("c=a+b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
c = a-b;
|
||||
printf("c=a-b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
c = a*b;
|
||||
printf("c=a*b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
c = a/b;
|
||||
printf("c=a/b: c=%1.4f + %1.4fi\n",c[0],c[1]);
|
||||
f1 = abs(a);
|
||||
printf("abs(a)=%1.4f\n",f1);
|
||||
f1 = arg(a);
|
||||
printf("abs(a)=%1.4f pi\n",f1/pi);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}; //end namespace cmp
|
||||
}; //end namespace amscuda
|
@ -0,0 +1,21 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
int cuda_errortrap(const char *msgheader)
|
||||
{
|
||||
int ret = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("%s :%s\n",msgheader,cudaGetErrorString(err));
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}; //end namespace amscuda
|
@ -0,0 +1,222 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__device__ __host__ float fhash1d_su(float x)
|
||||
{
|
||||
float ret;
|
||||
ret = x*(x>0.0f) + -x*(x<0.0f); //sign without conditionals?
|
||||
ret = fmodf(ret,10000.0f); //restrain domain
|
||||
ret = fmodf(ret*(ret+3678.453f)+7890.453f,10000.0f);
|
||||
ret = fmodf(ret*(ret+8927.2134f),10000.0f);
|
||||
ret = fmodf(ret*(ret+3656.234f),10000.0f);
|
||||
//ret = fmodf(ret*(ret+892.2134f),1000.0f);
|
||||
//ret = fmodf(ret,1000.0f);
|
||||
ret = ret/10000.0f;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
__device__ __host__ float fhash3d_su(float x, float y=0.0f, float z=0.0f)
|
||||
{
|
||||
float ret = 0.0f;
|
||||
|
||||
ret = fhash1d_su(z);
|
||||
ret = fhash1d_su(1000.0f*ret*ret + y);
|
||||
ret = fhash1d_su(1000.0f*ret*ret + x);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ __host__ float fhash4d_su(float x, float y=0.0f, float z=0.0f, float w=0.0f)
|
||||
{
|
||||
float ret = 0.0f;
|
||||
|
||||
ret = fhash1d_su(w);
|
||||
ret = fhash1d_su(1000.0f*ret*ret + z);
|
||||
ret = fhash1d_su(1000.0f*ret*ret + y);
|
||||
ret = fhash1d_su(1000.0f*ret*ret + x);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Deterministic Pseudorandom int32_t Generator //
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
//Simple 32 bit integer deterministic pseudo-random generator
|
||||
// *not* for cryptography
|
||||
// Frequency of generated floats should be uniform [0,1)
|
||||
|
||||
AMSCU_CONST static const int32_t dpr32_mod = 1<<30-1;
|
||||
AMSCU_CONST static const int32_t dpr32_mult = 25137;
|
||||
|
||||
//Next seed in simple 32 bit integer deterministic psuedo-rand generator
|
||||
__host__ __device__ void dpr32_nextseed(int32_t *rseed_inout)
|
||||
{
|
||||
int32_t lseed;
|
||||
if(rseed_inout!=NULL) lseed = *rseed_inout;
|
||||
|
||||
lseed = (lseed*dpr32_mult + 1)%dpr32_mod;
|
||||
lseed = (lseed>=0)*(lseed)+(lseed<0)*(lseed+dpr32_mod); //ensure mod is positive
|
||||
|
||||
if(rseed_inout!=NULL) *rseed_inout = lseed;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//Simple 32 bit integer deterministic pseudo-random generator
|
||||
// *not* for cryptography
|
||||
// Frequency of generated floats should be uniform [0,1)
|
||||
__host__ __device__ float dpr32_randf(int32_t *rseed_inout)
|
||||
{
|
||||
int32_t lseed = 1;
|
||||
float ret = 0.0f;
|
||||
|
||||
if(rseed_inout!=NULL) lseed = *rseed_inout;
|
||||
|
||||
dpr32_nextseed(&lseed);
|
||||
ret = ((float)(lseed))/((float)dpr32_mod);
|
||||
|
||||
if(rseed_inout!=NULL) *rseed_inout = lseed;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//box muller standard normal variable
|
||||
__host__ __device__ float dpr32_randnf(int32_t *rseed_inout)
|
||||
{
|
||||
int32_t lseed = 1;
|
||||
float ret = 0.0f;
|
||||
float u1,u2;
|
||||
|
||||
if(rseed_inout!=NULL) lseed = *rseed_inout;
|
||||
|
||||
u1 = dpr32_randf(&lseed);
|
||||
u2 = dpr32_randf(&lseed);
|
||||
|
||||
ret = ::sqrtf(-2.0f*::logf(u1))*::cosf(2.0f*pif*u2);
|
||||
|
||||
if(rseed_inout!=NULL) *rseed_inout = lseed;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Deterministic Pseudorandom int64_t Generator //
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
//"goodenough" deterministic pseudo-random number generator
|
||||
//random enough for procedural applications, deterministic,
|
||||
//operates without side-effects for thread safety
|
||||
|
||||
AMSCU_CONST const int64_t random_dpr64_mod = (2LL<<31LL)-1LL;
|
||||
AMSCU_CONST const int64_t random_dpr64_mult = 1201633LL;
|
||||
|
||||
__host__ __device__ void dpr64_nextseed(int64_t *seedinout)
|
||||
{
|
||||
int64_t lseed = 0LL;
|
||||
if(seedinout!=NULL) lseed = *seedinout;
|
||||
|
||||
lseed = (random_dpr64_mult*lseed+1LL)%random_dpr64_mod;
|
||||
lseed = (lseed>=0)*(lseed)+(lseed<0)*(lseed+random_dpr64_mod);
|
||||
|
||||
if(seedinout!=NULL) *seedinout = lseed;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ double dpr64_randd(int64_t *seedinout)
|
||||
{
|
||||
double ret = 0.0;
|
||||
int64_t lseed = 0LL;
|
||||
|
||||
if(seedinout!=NULL) lseed = *seedinout;
|
||||
|
||||
dpr64_nextseed(&lseed);
|
||||
ret = ((double)lseed)/((double)(random_dpr64_mod-1LL));
|
||||
|
||||
if(seedinout!=NULL) *seedinout = lseed;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float dpr64_randf(int64_t *seedinout)
|
||||
{
|
||||
float ret = 0.0f;
|
||||
int64_t lseed = 0LL;
|
||||
|
||||
if(seedinout!=NULL) lseed = *seedinout;
|
||||
|
||||
dpr64_nextseed(&lseed);
|
||||
ret = ((float)lseed)/((float)(random_dpr64_mod-1LL));
|
||||
|
||||
if(seedinout!=NULL) *seedinout = lseed;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////
|
||||
// Tests //
|
||||
///////////
|
||||
|
||||
void test_dprg64()
|
||||
{
|
||||
printf("Tests for dprg:\n");
|
||||
long I;
|
||||
int64_t seed = 133LL;
|
||||
double d;
|
||||
float f;
|
||||
cuvect3 qv;
|
||||
|
||||
printf("dpr64_randd test\n");
|
||||
seed = 133LL;
|
||||
for(I=0;I<10;I++)
|
||||
{
|
||||
d = dpr64_randd(&seed);
|
||||
printf("seed: %lld rand: %1.4f\n",(long long)seed,d);
|
||||
}
|
||||
|
||||
printf("\n\n");
|
||||
printf("dpr64_randf test\n");
|
||||
seed = 133LL;
|
||||
for(I=0;I<10;I++)
|
||||
{
|
||||
f = dpr64_randf(&seed);
|
||||
printf("seed: %lld rand: %1.4f\n",(long long)seed,f);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void test_dprg32()
|
||||
{
|
||||
printf("Tests for dprg:\n");
|
||||
long I;
|
||||
int32_t seed = 133;
|
||||
double d;
|
||||
float f;
|
||||
cuvect3 qv;
|
||||
|
||||
printf("dpr32_randf test\n");
|
||||
seed = 133;
|
||||
for(I=0;I<10;I++)
|
||||
{
|
||||
f = dpr32_randf(&seed);
|
||||
printf("seed: %lld rand: %1.4f\n",(long long)seed,f);
|
||||
}
|
||||
|
||||
printf("\n\ndpr32_randnf test\n");
|
||||
seed = 133;
|
||||
for(I=0;I<10;I++)
|
||||
{
|
||||
f = dpr32_randnf(&seed);
|
||||
printf("seed: %lld rand: %1.4f\n",(long long)seed,f);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}; //namespace amscuda
|
@ -0,0 +1,63 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__global__ void test_cuarray_sum_kf(cuarray<float> *dq1, float *sum)
|
||||
{
|
||||
int I;
|
||||
*sum = 0.0f;
|
||||
for(I=0;I<dq1->length;I++)
|
||||
{
|
||||
*sum = *sum + dq1->data[I];
|
||||
}
|
||||
//*sum = (float)dq1->length;
|
||||
return;
|
||||
}
|
||||
|
||||
float test_cuarray_sum(cuarray<float> *q1)
|
||||
{
|
||||
float ret = 0.0f;
|
||||
int res;
|
||||
cuarray<float> *dq1 = NULL;
|
||||
float *dsum;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
cudaMalloc(&dsum,sizeof(float));
|
||||
res = q1->device_send(&dq1);
|
||||
printf("error: res=%d\n",res);
|
||||
test_cuarray_sum_kf<<<1,1>>>(dq1,dsum);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("test_cuarray_sum Error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
cudaMemcpy(&ret,dsum,sizeof(float),cudaMemcpyDeviceToHost);
|
||||
|
||||
q1->device_free(&dq1);
|
||||
cudaFree(dsum); dsum = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void test_cuarray()
|
||||
{
|
||||
cuarray<float> q1;
|
||||
|
||||
int I;
|
||||
|
||||
q1.resize(100);
|
||||
for(I=0;I<q1.length;I++)
|
||||
{
|
||||
q1.data[I] = I;
|
||||
}
|
||||
printf("q1.length=%d\n",q1.length);
|
||||
|
||||
printf("sum of array: %1.6g\n",test_cuarray_sum(&q1));
|
||||
|
||||
}
|
||||
|
||||
};
|
@ -0,0 +1,213 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
//template instantiations
|
||||
template float dbuff_sum(float *devbuffer, int N);
|
||||
template void dbuff_minmax(float *devbuffer, int N, float *min, float *max);
|
||||
|
||||
|
||||
template void dbuff_setall(float *devbuffer, int N, float setto, int nblocks, int nthreads);
|
||||
|
||||
|
||||
|
||||
|
||||
//fill devbuffer with random uniform numbers between 0 and 1 using int32_t based generator
|
||||
__global__ void dbuff_rand_dpr32_kf(float *devbuffer, int N, int32_t *seeds)
|
||||
{
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int I;
|
||||
|
||||
int32_t lseed;
|
||||
float f;
|
||||
|
||||
lseed = seeds[I0];
|
||||
for(I=I0;I<N;I=I+Is)
|
||||
{
|
||||
f = dpr32_randf(&lseed);
|
||||
devbuffer[I] = f;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads)
|
||||
{
|
||||
cudaError_t err = cudaSuccess;
|
||||
int I;
|
||||
int32_t *seeds = NULL;
|
||||
int32_t *devseeds = NULL;
|
||||
int32_t lseed;
|
||||
|
||||
if(devbuffer==NULL || N<=0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
seeds = new(std::nothrow) int32_t[nblocks*nthreads];
|
||||
cudaMalloc(&devseeds,sizeof(int32_t)*nblocks*nthreads);
|
||||
|
||||
if(rseedinout!=NULL) lseed = *rseedinout; else lseed = 1;
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
lseed = lseed + I + 1;
|
||||
dpr32_nextseed(&lseed);
|
||||
seeds[I] = lseed;
|
||||
}
|
||||
|
||||
cudaMemcpy(devseeds,seeds,sizeof(int32_t)*nblocks*nthreads,cudaMemcpyHostToDevice);
|
||||
if(rseedinout!=NULL) *rseedinout = lseed;
|
||||
|
||||
dbuff_rand_dpr32_kf<<<nblocks,nthreads>>>(devbuffer,N,devseeds);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("amscu::dbuff_rand_dpr32 error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
cudaFree(devseeds); devseeds = NULL;
|
||||
delete[] seeds; seeds = NULL;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void dbuff_rand_dpr32n_kf(float *devbuffer, int N, int32_t *seeds)
|
||||
{
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int I;
|
||||
|
||||
int32_t lseed;
|
||||
float f;
|
||||
|
||||
lseed = seeds[I0];
|
||||
for(I=I0;I<N;I=I+Is)
|
||||
{
|
||||
f = dpr32_randnf(&lseed);
|
||||
devbuffer[I] = f;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads)
|
||||
{
|
||||
cudaError_t err = cudaSuccess;
|
||||
int I;
|
||||
int32_t *seeds = NULL;
|
||||
int32_t *devseeds = NULL;
|
||||
int32_t lseed;
|
||||
|
||||
if(devbuffer==NULL || N<=0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
seeds = new(std::nothrow) int32_t[nblocks*nthreads];
|
||||
cudaMalloc(&devseeds,sizeof(int32_t)*nblocks*nthreads);
|
||||
|
||||
if(rseedinout!=NULL) lseed = *rseedinout; else lseed = 1;
|
||||
for(I=0;I<nblocks*nthreads;I++)
|
||||
{
|
||||
lseed = lseed + I + 1;
|
||||
dpr32_nextseed(&lseed);
|
||||
seeds[I] = lseed;
|
||||
}
|
||||
|
||||
cudaMemcpy(devseeds,seeds,sizeof(int32_t)*nblocks*nthreads,cudaMemcpyHostToDevice);
|
||||
if(rseedinout!=NULL) *rseedinout = lseed;
|
||||
|
||||
dbuff_rand_dpr32n_kf<<<nblocks,nthreads>>>(devbuffer,N,devseeds);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("amscu::dbuff_rand_dpr32 error: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
cudaFree(devseeds); devseeds = NULL;
|
||||
delete[] seeds; seeds = NULL;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads)
|
||||
{
|
||||
int I;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////
|
||||
// Tests //
|
||||
///////////
|
||||
|
||||
void test_dbuff_rand_dpr32()
|
||||
{
|
||||
cuarray<float> data;
|
||||
float *dev_data = NULL;
|
||||
int Nx = 5000;
|
||||
int Ny = 5000;
|
||||
cuarray<int> dims;
|
||||
int32_t rseed = 15;
|
||||
FILE *fp = NULL;
|
||||
const char *fname = "./test_scripts/test_dbuff_rand_dpr32.bin";
|
||||
|
||||
clock_t t0,t1,t2;
|
||||
double dt0,dt1;
|
||||
|
||||
printf("Tests of dbuff_rand_dpr32...\n");
|
||||
|
||||
fp = fopen(fname,"w+");
|
||||
if(fp==NULL)
|
||||
{
|
||||
printf("Error: Could not open %s for writing.\n",fname);
|
||||
return;
|
||||
}
|
||||
|
||||
data.resize(Nx*Ny);
|
||||
dims.resize(2);
|
||||
dims[0] = Nx; dims[1] = Ny;
|
||||
cudaMalloc(&dev_data,Nx*Ny*sizeof(float));
|
||||
|
||||
t0 = clock();
|
||||
dbuff_rand_dpr32(dev_data,Nx*Ny,&rseed,256,512);
|
||||
t1 = clock();
|
||||
cudaMemcpy(data.data,dev_data,Nx*Ny*sizeof(float),cudaMemcpyDeviceToHost);
|
||||
t2 = clock();
|
||||
|
||||
dt0 = (double)(t1-t0)/(double)CLOCKS_PER_SEC*1000.0;
|
||||
dt1 = (double)(t2-t1)/(double)CLOCKS_PER_SEC*1000.0;
|
||||
printf("dbuff_rand_dpr32 exec time: %1.3f msec\n",dt0);
|
||||
printf("copy (%d,%d) to host time: %1.3f msec\n",Nx,Ny,dt1);
|
||||
|
||||
fwrite_ndarray(fp,&dims,&data);
|
||||
|
||||
t0 = clock();
|
||||
dbuff_rand_dpr32n(dev_data,Nx*Ny,&rseed,256,512);
|
||||
t1 = clock();
|
||||
cudaMemcpy(data.data,dev_data,Nx*Ny*sizeof(float),cudaMemcpyDeviceToHost);
|
||||
t2 = clock();
|
||||
|
||||
dt0 = (double)(t1-t0)/(double)CLOCKS_PER_SEC*1000.0;
|
||||
dt1 = (double)(t2-t1)/(double)CLOCKS_PER_SEC*1000.0;
|
||||
printf("dbuff_rand_dpr32n exec time: %1.3f msec\n",dt0);
|
||||
printf("copy (%d,%d) to host time: %1.3f msec\n",Nx,Ny,dt1);
|
||||
|
||||
fwrite_ndarray(fp,&dims,&data);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
cudaFree(dev_data); dev_data = NULL;
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -0,0 +1,6 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
};
|
@ -0,0 +1,6 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
};
|
@ -0,0 +1,269 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__host__ __device__ double dabs(double x)
|
||||
{
|
||||
if(x<0.0)
|
||||
{
|
||||
x = -x;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ float fabs(float x)
|
||||
{
|
||||
if(x<0.0f)
|
||||
{
|
||||
x = -x;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ double mod(double x, double md)
|
||||
{
|
||||
x = fmod(x,md);
|
||||
if(x<0.0)
|
||||
{
|
||||
x = x + md;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ float mod(float x, float md)
|
||||
{
|
||||
x = fmodf(x,md);
|
||||
if(x<0.0f)
|
||||
{
|
||||
x = x + md;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ int mod(int x, int n)
|
||||
{
|
||||
x = x % n;
|
||||
if(x<0)
|
||||
{
|
||||
x = x + n;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ long mod(long x, long n)
|
||||
{
|
||||
x = x % n;
|
||||
if(x<0)
|
||||
{
|
||||
x = x + n;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ int truediv(int x, int y)
|
||||
{
|
||||
int z = 0;
|
||||
if(x>=0 && y>0)
|
||||
{
|
||||
z = x/y;
|
||||
}
|
||||
else if(x<0 && y>0)
|
||||
{
|
||||
z = -((-x)/y) - 1;
|
||||
}
|
||||
else if(x>=0 && y<0)
|
||||
{
|
||||
z = -(x/(-y)) - 1;
|
||||
}
|
||||
else if(x<0 && y<0)
|
||||
{
|
||||
z = ((-x)/(-y));
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
__host__ __device__ long truediv(long x, long y)
|
||||
{
|
||||
int z = 0;
|
||||
if(x>=0 && y>0)
|
||||
{
|
||||
z = x/y;
|
||||
}
|
||||
else if(x<0 && y>0)
|
||||
{
|
||||
z = -((-x)/y) - 1;
|
||||
}
|
||||
else if(x>=0 && y<0)
|
||||
{
|
||||
z = -(x/(-y)) - 1;
|
||||
}
|
||||
else if(x<0 && y<0)
|
||||
{
|
||||
z = ((-x)/(-y));
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
template<> __host__ __device__ double min(double a, double b)
|
||||
{
|
||||
if(isnan(a))
|
||||
{
|
||||
return b;
|
||||
}
|
||||
else if(isnan(b))
|
||||
{
|
||||
return a;
|
||||
}
|
||||
else if(a>b)
|
||||
{
|
||||
return b;
|
||||
}
|
||||
else
|
||||
{
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
template<> __host__ __device__ float min(float a, float b)
|
||||
{
|
||||
if(isnan(a))
|
||||
{
|
||||
return b;
|
||||
}
|
||||
else if(isnan(b))
|
||||
{
|
||||
return a;
|
||||
}
|
||||
else if(a>b)
|
||||
{
|
||||
return b;
|
||||
}
|
||||
else
|
||||
{
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
template<> __host__ __device__ double max(double a, double b)
|
||||
{
|
||||
if(isnan(a))
|
||||
{
|
||||
return b;
|
||||
}
|
||||
else if(isnan(b))
|
||||
{
|
||||
return a;
|
||||
}
|
||||
else if(a>b)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
else
|
||||
{
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
template<> __host__ __device__ float max(float a, float b)
|
||||
{
|
||||
if(isnan(a))
|
||||
{
|
||||
return b;
|
||||
}
|
||||
else if(isnan(b))
|
||||
{
|
||||
return a;
|
||||
}
|
||||
else if(a>b)
|
||||
{
|
||||
return a;
|
||||
}
|
||||
else
|
||||
{
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ __host__ double arg(double x, double y)
|
||||
{
|
||||
double ret = 0.0;
|
||||
double z = ::sqrt(x*x+y*y);
|
||||
|
||||
if(z>0.0)
|
||||
{
|
||||
if(y<=x && y>=-x)
|
||||
{
|
||||
ret = asin(y/z);
|
||||
}
|
||||
else if(y>=x && y>=-x)
|
||||
{
|
||||
ret = acos(x/z);
|
||||
}
|
||||
else if(y>=x && y<=-x)
|
||||
{
|
||||
ret = pi-asin(y/z);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = 2.0*pi-acos(x/z);
|
||||
}
|
||||
}
|
||||
|
||||
if(ret<0.0)
|
||||
{
|
||||
ret = 2.0*pi+ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__device__ __host__ void get_azel(double x, double y, double z, double *az, double *el)
|
||||
{
|
||||
//int ret = -2; //should never see this return
|
||||
double n, rp;
|
||||
n = ::sqrt(x*x+y*y+z*z);
|
||||
if(n>0.0)
|
||||
{
|
||||
rp = ::sqrt(x*x+y*y);
|
||||
if(rp>0.0)
|
||||
{
|
||||
//ret = 1; //nonzero vector - should work
|
||||
*az = arg(x,y);
|
||||
*el = ::atan(z/rp);
|
||||
}
|
||||
else
|
||||
{
|
||||
//ret = 0; //straight up or straight down
|
||||
if(z>0.0)
|
||||
{
|
||||
*az = 0.0;
|
||||
*el = pi/2.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*az = 0.0;
|
||||
*el = -pi/2.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*az = 0.0;
|
||||
*el = 0.0;
|
||||
//ret = -1; //zero vector - no real azimuth/elevation
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void test_amscumath1()
|
||||
{
|
||||
printf("pi = %1.16f\n",amscuda::pi);
|
||||
}
|
||||
|
||||
};
|
@ -0,0 +1,85 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__global__ void test_amscurarray1_kf1(curarray<int> *q)
|
||||
{
|
||||
int I,J,Na;
|
||||
int I0 = threadIdx.x + blockIdx.x*blockDim.x;
|
||||
int Is = blockDim.x*gridDim.x;
|
||||
int N = q->Narrays;
|
||||
|
||||
for(I=I0; I<N;I = I + Is)
|
||||
{
|
||||
Na = q->N[I];
|
||||
|
||||
//printf("I:%d Na: %d\n",I,Na);
|
||||
//q->dev_resizearray(I, Na);
|
||||
for(J=0;J<Na;J++)
|
||||
{
|
||||
q->devarrayptrs[I][J] = J;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_amscurarray1()
|
||||
{
|
||||
int I;
|
||||
cudaError_t err = cudaSuccess;
|
||||
|
||||
printf("test_amscurarray1:\n");
|
||||
curarray<int> *qarray = NULL;
|
||||
|
||||
curarray_new(&qarray,100);
|
||||
err = cudaGetLastError();
|
||||
if(err!=cudaSuccess)
|
||||
{
|
||||
printf("debug error trap 1: %s\n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
for(I=0;I<100;I++)
|
||||
{
|
||||
qarray->resizearray(I,5);
|
||||
}
|
||||
|
||||
qarray->push();
|
||||
qarray->pull();
|
||||
|
||||
cuda_errortrap("debug: error trap 2");
|
||||
|
||||
for(I=0;I<5;I++)
|
||||
{
|
||||
printf("array[%d], size %d\n",I,qarray->N[I]);
|
||||
}
|
||||
|
||||
//
|
||||
for(I=0;I<100;I++)
|
||||
{
|
||||
qarray->resizearray(I,I%5);
|
||||
cuda_errortrap("debug: error trap resize2");
|
||||
}
|
||||
|
||||
qarray->push();
|
||||
qarray->pull();
|
||||
test_amscurarray1_kf1<<<128,1>>>(qarray->devptr);
|
||||
|
||||
cuda_errortrap("debug: error trap kf1");
|
||||
|
||||
qarray->pull();
|
||||
cuda_errortrap("debug: error trap pull2");
|
||||
|
||||
|
||||
for(I=0;I<5;I++)
|
||||
{
|
||||
printf("array[%d], size %d\n",I,qarray->N[I]);
|
||||
}
|
||||
|
||||
curarray_delete(&qarray);
|
||||
|
||||
cuda_errortrap("debug: error trap 3");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
};
|
@ -0,0 +1,361 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__host__ __device__ cuvect2::cuvect2()
|
||||
{
|
||||
x = 0.0; y = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2::~cuvect2()
|
||||
{
|
||||
x = 0.0; y = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2::cuvect2(double _x, double _y)
|
||||
{
|
||||
x = _x; y = _y;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cuvect2::operator[](const int I)
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ const double& cuvect2::operator[](const int I) const
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2 cuvect2::operator+(cuvect2 lhs)
|
||||
{
|
||||
cuvect2 ret;
|
||||
ret.x = x + lhs.x;
|
||||
ret.y = y + lhs.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2 cuvect2::operator-(cuvect2 lhs)
|
||||
{
|
||||
cuvect2 ret;
|
||||
ret.x = x - lhs.x;
|
||||
ret.y = y - lhs.y;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cuvect2 cuvect2::operator*(double lhs)
|
||||
{
|
||||
cuvect2 ret;
|
||||
ret.x = x*lhs;
|
||||
ret.y = y*lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2 cuvect2::operator/(double lhs)
|
||||
{
|
||||
cuvect2 ret;
|
||||
ret.x = x/lhs;
|
||||
ret.y = y/lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cuvect2_dot(cuvect2 a, cuvect2 b)
|
||||
{
|
||||
double ret = a.x*b.x+a.y*b.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cuvect2_cross(cuvect2 a, cuvect2 b)
|
||||
{
|
||||
double ret = a.x*b.y-a.y*b.x;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cuvect2_norm(cuvect2 a)
|
||||
{
|
||||
double ret = ::sqrt(a.x*a.x+a.y*a.y);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2 cuvect2_normalize(cuvect2 a)
|
||||
{
|
||||
cuvect2 ret;
|
||||
double m = cuvect2_norm(a);
|
||||
if(m>0.0)
|
||||
{
|
||||
ret.x = a.x/m; ret.y = a.y/m;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.x = 0.0; ret.y = 0.0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2 cuvect2_proj(cuvect2 a, cuvect2 b)
|
||||
{
|
||||
cuvect2 ret;
|
||||
cuvect2 bn = cuvect2_normalize(b);
|
||||
double m = cuvect2_dot(a,bn);
|
||||
ret = bn*m;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cumat2::cumat2()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2::~cumat2()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat2::operator[](const int I)
|
||||
{
|
||||
return dat[I];
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat2::operator()(const int I, const int J)
|
||||
{
|
||||
return dat[I+2*J];
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2 cumat2::operator+(cumat2 lhs)
|
||||
{
|
||||
int I;
|
||||
cumat2 ret;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] + lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2 cumat2::operator-(cumat2 lhs)
|
||||
{
|
||||
int I;
|
||||
cumat2 ret;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] - lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2 cumat2::operator*(double lhs)
|
||||
{
|
||||
cumat2 ret;
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]*lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2 cumat2::operator/(double lhs)
|
||||
{
|
||||
cumat2 ret;
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]/lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat2::at(const int I, const int J)
|
||||
{
|
||||
return dat[I+2*J];
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cuvect2 cumat2::operator*(cuvect2 lhs)
|
||||
{
|
||||
cuvect2 ret = cuvect2(0.0,0.0);
|
||||
int I,J;
|
||||
for(I=0;I<2;I++)
|
||||
{
|
||||
for(J=0;J<2;J++)
|
||||
{
|
||||
ret[I] = ret[I] + this->at(I,J)*lhs[J];
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2 cumat2::operator*(cumat2 lhs)
|
||||
{
|
||||
cumat2 ret;
|
||||
int I,J,K;
|
||||
|
||||
for(I=0;I<2;I++)
|
||||
{
|
||||
for(J=0;J<2;J++)
|
||||
{
|
||||
ret(I,J) = 0.0;
|
||||
for(K=0;K<2;K++)
|
||||
{
|
||||
ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cumat2::det()
|
||||
{
|
||||
double ret = 0.0;
|
||||
ret = ret + this->at(0,0)*this->at(1,1);
|
||||
ret = ret - this->at(1,0)*this->at(0,1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2 cumat2::transpose()
|
||||
{
|
||||
cumat2 q;
|
||||
int I,J;
|
||||
for(I=0;I<2;I++)
|
||||
{
|
||||
for(J=0;J<2;J++)
|
||||
{
|
||||
q.at(I,J) = this->at(J,I);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2 cumat2::inverse()
|
||||
{
|
||||
cumat2 q;
|
||||
double dt = q.det();
|
||||
if(dt!=0.0)
|
||||
{
|
||||
q(0,0) = this->at(1,1)/dt;
|
||||
q(0,1) = -this->at(0,1)/dt;
|
||||
q(1,0) = -this->at(1,0)/dt;
|
||||
q(1,1) = this->at(0,0)/dt;
|
||||
}
|
||||
else
|
||||
{
|
||||
q(0,0) = inf;
|
||||
q(0,1) = inf;
|
||||
q(1,0) = inf;
|
||||
q(1,1) = inf;
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
//2x2 matrix operations
|
||||
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
|
||||
|
||||
//transpose a 2x2 matrix in place
|
||||
__host__ __device__ void mat2_transpose(double *mat2inout)
|
||||
{
|
||||
double mat2_in[4];
|
||||
|
||||
mat2_copy(mat2_in,mat2inout);
|
||||
mat2inout[0+0*2] = mat2_in[0+0*2];
|
||||
mat2inout[1+0*2] = mat2_in[0+1*2];
|
||||
mat2inout[0+1*2] = mat2_in[1+0*2];
|
||||
mat2inout[1+1*2] = mat2_in[1+1*2];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat2_copy(double *mat2_dest, const double *mat2_src)
|
||||
{
|
||||
mat2_dest[0+0*2] = mat2_src[0+0*2];
|
||||
mat2_dest[1+0*2] = mat2_src[1+0*2];
|
||||
mat2_dest[0+1*2] = mat2_src[0+1*2];
|
||||
mat2_dest[1+1*2] = mat2_src[1+1*2];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//inverts mat?inout[4]
|
||||
__host__ __device__ void mat2_inverse(double *mat2inout)
|
||||
{
|
||||
double det = mat2inout[0+0*2]*mat2inout[1+1*2]-mat2inout[0+1*2]*mat2inout[1+0*2];
|
||||
double mat2in[4];
|
||||
|
||||
mat2_copy(mat2in,mat2inout);
|
||||
mat2inout[0+0*2] = mat2inout[1+1*2]/det;
|
||||
mat2inout[1+0*2] = -mat2inout[1+0*2]/det;
|
||||
mat2inout[0+1*2] = -mat2inout[0+1*2]/det;
|
||||
mat2inout[1+1*2] = mat2inout[0+0*2]/det;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//rotatin matrix from angle
|
||||
__host__ __device__ void mat2_rot_from_angle(double angle, double *mat2)
|
||||
{
|
||||
mat2[0+0*2] = ::cos(angle);
|
||||
mat2[1+0*2] = ::sin(angle);
|
||||
mat2[0+1*2] = -::sin(angle);
|
||||
mat2[1+1*2] = ::cos(angle);
|
||||
return;
|
||||
}
|
||||
|
||||
//multiplies c = a*b
|
||||
__host__ __device__ void mat2_mult(double *mat2a, double *mat2b, double *mat2c)
|
||||
{
|
||||
double mat2a_in[4];
|
||||
double mat2b_in[4];
|
||||
|
||||
if(mat2a==NULL || mat2b==NULL || mat2c==NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
mat2_copy(mat2a_in,mat2a);
|
||||
mat2_copy(mat2b_in,mat2b);
|
||||
|
||||
mat2c[0+0*2] = mat2a_in[0+0*2]*mat2b_in[0+0*2] + mat2a_in[1+0*2]*mat2b_in[0+1*2];
|
||||
mat2c[1+0*2] = mat2a_in[0+0*2]*mat2b_in[1+0*2] + mat2a_in[1+0*2]*mat2b_in[1+1*2];
|
||||
mat2c[0+1*2] = mat2a_in[0+1*2]*mat2b_in[0+0*2] + mat2a_in[1+1*2]*mat2b_in[0+1*2];
|
||||
mat2c[1+1*2] = mat2a_in[0+1*2]*mat2b_in[1+0*2] + mat2a_in[1+1*2]*mat2b_in[1+1*2];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// ret = a*b
|
||||
__host__ __device__ cuvect2 mat2_mult(double *mat2a, cuvect2 b)
|
||||
{
|
||||
cuvect2 ret;
|
||||
ret.x = b.x*mat2a[0+0*2] + b.y*mat2a[1+0*2];
|
||||
ret.y = b.x*mat2a[0+1*2] + b.y*mat2a[1+1*2];
|
||||
return ret;
|
||||
}
|
||||
|
||||
void test_cuvect2_1()
|
||||
{
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
};
|
@ -0,0 +1,361 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__host__ __device__ cuvect2f::cuvect2f()
|
||||
{
|
||||
x = 0.0; y = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2f::~cuvect2f()
|
||||
{
|
||||
x = 0.0; y = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2f::cuvect2f(float _x, float _y)
|
||||
{
|
||||
x = _x; y = _y;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cuvect2f::operator[](const int I)
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ const float& cuvect2f::operator[](const int I) const
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2f cuvect2f::operator+(cuvect2f lhs)
|
||||
{
|
||||
cuvect2f ret;
|
||||
ret.x = x + lhs.x;
|
||||
ret.y = y + lhs.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2f cuvect2f::operator-(cuvect2f lhs)
|
||||
{
|
||||
cuvect2f ret;
|
||||
ret.x = x - lhs.x;
|
||||
ret.y = y - lhs.y;
|
||||
return ret;
|
||||
}
|
||||
__host__ __device__ cuvect2f cuvect2f::operator*(float lhs)
|
||||
{
|
||||
cuvect2f ret;
|
||||
ret.x = x*lhs;
|
||||
ret.y = y*lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2f cuvect2f::operator/(float lhs)
|
||||
{
|
||||
cuvect2f ret;
|
||||
ret.x = x/lhs;
|
||||
ret.y = y/lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b)
|
||||
{
|
||||
float ret = a.x*b.x+a.y*b.y;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b)
|
||||
{
|
||||
float ret = a.x*b.y-a.y*b.x;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cuvect2f_norm(cuvect2f a)
|
||||
{
|
||||
float ret = ::sqrtf(a.x*a.x+a.y*a.y);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a)
|
||||
{
|
||||
cuvect2f ret;
|
||||
float m = cuvect2f_norm(a);
|
||||
if(m>0.0)
|
||||
{
|
||||
ret.x = a.x/m; ret.y = a.y/m;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.x = 0.0; ret.y = 0.0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b)
|
||||
{
|
||||
cuvect2f ret;
|
||||
cuvect2f bn = cuvect2f_normalize(b);
|
||||
float m = cuvect2f_dot(a,bn);
|
||||
ret = bn*m;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cumat2f::cumat2f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f::~cumat2f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat2f::operator[](const int I)
|
||||
{
|
||||
return dat[I];
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat2f::operator()(const int I, const int J)
|
||||
{
|
||||
return dat[I+2*J];
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f cumat2f::operator+(cumat2f lhs)
|
||||
{
|
||||
int I;
|
||||
cumat2f ret;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] + lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f cumat2f::operator-(cumat2f lhs)
|
||||
{
|
||||
int I;
|
||||
cumat2f ret;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] - lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f cumat2f::operator*(float lhs)
|
||||
{
|
||||
cumat2f ret;
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]*lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f cumat2f::operator/(float lhs)
|
||||
{
|
||||
cumat2f ret;
|
||||
int I;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]/lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat2f::at(const int I, const int J)
|
||||
{
|
||||
return dat[I+2*J];
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cuvect2f cumat2f::operator*(cuvect2f lhs)
|
||||
{
|
||||
cuvect2f ret = cuvect2f(0.0,0.0);
|
||||
int I,J;
|
||||
for(I=0;I<2;I++)
|
||||
{
|
||||
for(J=0;J<2;J++)
|
||||
{
|
||||
ret[I] = ret[I] + this->at(I,J)*lhs[J];
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f cumat2f::operator*(cumat2f lhs)
|
||||
{
|
||||
cumat2f ret;
|
||||
int I,J,K;
|
||||
|
||||
for(I=0;I<2;I++)
|
||||
{
|
||||
for(J=0;J<2;J++)
|
||||
{
|
||||
ret(I,J) = 0.0;
|
||||
for(K=0;K<2;K++)
|
||||
{
|
||||
ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cumat2f::det()
|
||||
{
|
||||
float ret = 0.0;
|
||||
ret = ret + this->at(0,0)*this->at(1,1);
|
||||
ret = ret - this->at(1,0)*this->at(0,1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f cumat2f::transpose()
|
||||
{
|
||||
cumat2f q;
|
||||
int I,J;
|
||||
for(I=0;I<2;I++)
|
||||
{
|
||||
for(J=0;J<2;J++)
|
||||
{
|
||||
q.at(I,J) = this->at(J,I);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat2f cumat2f::inverse()
|
||||
{
|
||||
cumat2f q;
|
||||
float dt = q.det();
|
||||
if(dt!=0.0)
|
||||
{
|
||||
q(0,0) = this->at(1,1)/dt;
|
||||
q(0,1) = -this->at(0,1)/dt;
|
||||
q(1,0) = -this->at(1,0)/dt;
|
||||
q(1,1) = this->at(0,0)/dt;
|
||||
}
|
||||
else
|
||||
{
|
||||
q(0,0) = inf;
|
||||
q(0,1) = inf;
|
||||
q(1,0) = inf;
|
||||
q(1,1) = inf;
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
//2x2 matrix operations
|
||||
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
|
||||
|
||||
//transpose a 2x2 matrix in place
|
||||
__host__ __device__ void mat2f_transpose(float *mat2inout)
|
||||
{
|
||||
float mat2f_in[4];
|
||||
|
||||
mat2f_copy(mat2f_in,mat2inout);
|
||||
mat2inout[0+0*2] = mat2f_in[0+0*2];
|
||||
mat2inout[1+0*2] = mat2f_in[0+1*2];
|
||||
mat2inout[0+1*2] = mat2f_in[1+0*2];
|
||||
mat2inout[1+1*2] = mat2f_in[1+1*2];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src)
|
||||
{
|
||||
mat2f_dest[0+0*2] = mat2f_src[0+0*2];
|
||||
mat2f_dest[1+0*2] = mat2f_src[1+0*2];
|
||||
mat2f_dest[0+1*2] = mat2f_src[0+1*2];
|
||||
mat2f_dest[1+1*2] = mat2f_src[1+1*2];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//inverts mat?inout[4]
|
||||
__host__ __device__ void mat2f_inverse(float *mat2inout)
|
||||
{
|
||||
float det = mat2inout[0+0*2]*mat2inout[1+1*2]-mat2inout[0+1*2]*mat2inout[1+0*2];
|
||||
float mat2in[4];
|
||||
|
||||
mat2f_copy(mat2in,mat2inout);
|
||||
mat2inout[0+0*2] = mat2inout[1+1*2]/det;
|
||||
mat2inout[1+0*2] = -mat2inout[1+0*2]/det;
|
||||
mat2inout[0+1*2] = -mat2inout[0+1*2]/det;
|
||||
mat2inout[1+1*2] = mat2inout[0+0*2]/det;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//rotatin matrix from angle
|
||||
__host__ __device__ void mat2f_rot_from_angle(float angle, float *mat2)
|
||||
{
|
||||
mat2[0+0*2] = ::cosf(angle);
|
||||
mat2[1+0*2] = ::sinf(angle);
|
||||
mat2[0+1*2] = -::sinf(angle);
|
||||
mat2[1+1*2] = ::cosf(angle);
|
||||
return;
|
||||
}
|
||||
|
||||
//multiplies c = a*b
|
||||
__host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c)
|
||||
{
|
||||
float mat2a_in[4];
|
||||
float mat2b_in[4];
|
||||
|
||||
if(mat2a==NULL || mat2b==NULL || mat2c==NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
mat2f_copy(mat2a_in,mat2a);
|
||||
mat2f_copy(mat2b_in,mat2b);
|
||||
|
||||
mat2c[0+0*2] = mat2a_in[0+0*2]*mat2b_in[0+0*2] + mat2a_in[1+0*2]*mat2b_in[0+1*2];
|
||||
mat2c[1+0*2] = mat2a_in[0+0*2]*mat2b_in[1+0*2] + mat2a_in[1+0*2]*mat2b_in[1+1*2];
|
||||
mat2c[0+1*2] = mat2a_in[0+1*2]*mat2b_in[0+0*2] + mat2a_in[1+1*2]*mat2b_in[0+1*2];
|
||||
mat2c[1+1*2] = mat2a_in[0+1*2]*mat2b_in[1+0*2] + mat2a_in[1+1*2]*mat2b_in[1+1*2];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// ret = a*b
|
||||
__host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b)
|
||||
{
|
||||
cuvect2f ret;
|
||||
ret.x = b.x*mat2a[0+0*2] + b.y*mat2a[1+0*2];
|
||||
ret.y = b.x*mat2a[0+1*2] + b.y*mat2a[1+1*2];
|
||||
return ret;
|
||||
}
|
||||
|
||||
void test_cuvect2f_1()
|
||||
{
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
};
|
@ -0,0 +1,581 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__host__ __device__ cuvect3::cuvect3()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3::~cuvect3()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cuvect3::operator[](const int I)
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
if(I==2) return z;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ const double& cuvect3::operator[](const int I) const
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
if(I==2) return z;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 cuvect3::operator+(cuvect3 lhs)
|
||||
{
|
||||
cuvect3 ret;
|
||||
ret.x = x+lhs.x;
|
||||
ret.y = y+lhs.y;
|
||||
ret.z = z+lhs.z;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 cuvect3::operator-(cuvect3 lhs)
|
||||
{
|
||||
cuvect3 ret;
|
||||
ret.x = x-lhs.x;
|
||||
ret.y = y-lhs.y;
|
||||
ret.z = z-lhs.z;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 cuvect3::operator*(double lhs)
|
||||
{
|
||||
cuvect3 ret;
|
||||
ret.x = x*lhs;
|
||||
ret.y = y*lhs;
|
||||
ret.z = z*lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 cuvect3::operator/(double lhs)
|
||||
{
|
||||
cuvect3 ret;
|
||||
ret.x = x/lhs;
|
||||
ret.y = y/lhs;
|
||||
ret.z = z/lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3::cuvect3(double _x, double _y, double _z)
|
||||
{
|
||||
x = _x; y = _y; z = _z;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b)
|
||||
{
|
||||
double ret = a.x*b.x+a.y*b.y+a.z*b.z;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b)
|
||||
{
|
||||
cuvect3 ret;
|
||||
ret[0] = a[1]*b[2]-a[2]*b[1];
|
||||
ret[1] = a[2]*b[0]-a[0]*b[2];
|
||||
ret[2] = a[0]*b[1]-a[1]*b[0];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cuvect3_norm(cuvect3 a)
|
||||
{
|
||||
double ret;
|
||||
ret = ::sqrt(a.x*a.x+a.y*a.y+a.z*a.z);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a)
|
||||
{
|
||||
cuvect3 ret;
|
||||
double m;
|
||||
m = ::sqrt(a.x*a.x+a.y*a.y+a.z*a.z);
|
||||
if(m>0.0)
|
||||
{
|
||||
ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.x = 0.0; ret.y = 0.0; ret.z = 0.0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b)
|
||||
{
|
||||
cuvect3 ret;
|
||||
cuvect3 bn = cuvect3_normalize(b);
|
||||
double m = cuvect3_dot(a,bn);
|
||||
ret = bn*m;
|
||||
return ret;
|
||||
}
|
||||
|
||||
//transposes a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3_transpose(double *mat3inout)
|
||||
{
|
||||
int I,J;
|
||||
double matint[9];
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
matint[I] = mat3inout[I];
|
||||
}
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
mat3inout[I+J*3] = matint[J+I*3];
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src)
|
||||
{
|
||||
int I;
|
||||
if(mat3_dest==NULL || mat3_src==NULL)
|
||||
return;
|
||||
|
||||
for(I=0;I<9;I++)
|
||||
mat3_dest[I] = mat3_src[I];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ double mat3_det(double *mat3in)
|
||||
{
|
||||
double ret = 0.0;
|
||||
|
||||
ret = ret + mat3in[0+0*3]*mat3in[1+1*3]*mat3in[2+2*3];
|
||||
ret = ret + mat3in[0+1*3]*mat3in[1+2*3]*mat3in[2+0*3];
|
||||
ret = ret + mat3in[0+2*3]*mat3in[1+0*3]*mat3in[2+1*3];
|
||||
ret = ret - mat3in[0+0*3]*mat3in[1+2*3]*mat3in[2+1*3];
|
||||
ret = ret - mat3in[0+1*3]*mat3in[1+0*3]*mat3in[2+2*3];
|
||||
ret = ret - mat3in[0+2*3]*mat3in[1+1*3]*mat3in[2+0*3];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//inverts a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3_inverse(double *mat3inout)
|
||||
{
|
||||
int I;
|
||||
double matint[9];
|
||||
double det = mat3_det(mat3inout);
|
||||
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
matint[I] = mat3inout[I];
|
||||
}
|
||||
|
||||
mat3inout[0+0*3] = (matint[1+1*3]*matint[2+2*3]-matint[1+2*3]*matint[2+1*3])/det;
|
||||
mat3inout[0+1*3] = -(matint[1+0*3]*matint[2+2*3]-matint[1+2*3]*matint[2+0*3])/det;
|
||||
mat3inout[0+2*3] = (matint[1+0*3]*matint[2+1*3]-matint[1+1*3]*matint[2+0*3])/det;
|
||||
mat3inout[1+0*3] = -(matint[0+1*3]*matint[2+2*3]-matint[0+2*3]*matint[2+1*3])/det;
|
||||
mat3inout[1+1*3] = (matint[0+0*3]*matint[2+2*3]-matint[0+2*3]*matint[2+0*3])/det;
|
||||
mat3inout[1+2*3] = -(matint[0+0*3]*matint[2+1*3]-matint[0+1*3]*matint[2+0*3])/det;
|
||||
mat3inout[2+0*3] = (matint[0+1*3]*matint[1+2*3]-matint[0+2*3]*matint[1+1*3])/det;
|
||||
mat3inout[2+1*3] = -(matint[0+0*3]*matint[1+2*3]-matint[0+2*3]*matint[1+0*3])/det;
|
||||
mat3inout[2+2*3] = (matint[0+0*3]*matint[1+1*3]-matint[0+1*3]*matint[1+0*3])/det;
|
||||
|
||||
mat3_transpose(mat3inout);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin)
|
||||
{
|
||||
int I,J;
|
||||
cuvect3 ret;
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
ret[I] = 0.0;
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ret[I] = ret[I] + mat3in[I+3*J]*cvin[J];
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout)
|
||||
{
|
||||
double wrk[9];
|
||||
int I,J,K;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
wrk[I+3*J] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
for(K=0;K<3;K++)
|
||||
{
|
||||
wrk[I+3*K] = wrk[I+3*K] + matina[I+3*J]*matinb[J+3*K];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
matout[I+3*J] = wrk[I+3*J];
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout)
|
||||
{
|
||||
matout[0 + 0*3] = 0.0f;
|
||||
matout[1 + 0*3] = -vecin[2];
|
||||
matout[2 + 0*3] = vecin[1];
|
||||
|
||||
matout[0 + 1*3] = vecin[2];
|
||||
matout[1 + 1*3] = 0.0f;
|
||||
matout[2 + 1*3] = -vecin[0];
|
||||
|
||||
matout[0 + 2*3] = -vecin[1];
|
||||
matout[1 + 2*3] = vecin[0];
|
||||
matout[2 + 2*3] = 0.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout)
|
||||
{
|
||||
vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]);
|
||||
vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]);
|
||||
vecout[2] = 0.5*(matin[0 + 1*3] - matin[1 + 0*3]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//returns direction cosine rotation matrix from axis and angle
|
||||
__host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout)
|
||||
{
|
||||
int I;
|
||||
double H[9];
|
||||
double Hsq[9];
|
||||
double II[9];
|
||||
|
||||
for(I=0;I<9;I++) II[I] = 0.0;
|
||||
II[0+0*3] = 1.0;
|
||||
II[1+1*3] = 1.0;
|
||||
II[2+2*3] = 1.0;
|
||||
|
||||
axis = cuvect3_normalize(axis);
|
||||
|
||||
mat3_hodgedual(axis,H);
|
||||
mat3_mult(H,H,Hsq);
|
||||
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
matout[I] = (II[I] + Hsq[I]) + H[I]*sin(angle) - Hsq[I]*cos(angle);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__host__ void test_cudavect_logic1()
|
||||
{
|
||||
//3 dim vector and matrix functional tests on host side
|
||||
|
||||
printf("3 dim vector and matrix functional tests on host side\n");
|
||||
|
||||
cuvect3 a,b,c;
|
||||
double ma[9],mb[9],mc[9];
|
||||
|
||||
int I,J;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ma[I+3*J] = ((double) rand())/((double) RAND_MAX);
|
||||
mb[I+3*J] = ma[I+3*J];
|
||||
}
|
||||
}
|
||||
|
||||
mat3_inverse(mb);
|
||||
mat3_mult(ma,mb,mc);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
printf("ma[%d,%d] = %1.3f\n",I,J,ma[I+3*J]);
|
||||
}
|
||||
}
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
printf("mb[%d,%d] = %1.3f\n",I,J,mb[I+3*J]);
|
||||
}
|
||||
}
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
printf("mc[%d,%d] = %1.3f\n",I,J,mc[I+3*J]);
|
||||
}
|
||||
}
|
||||
|
||||
a = cuvect3(1,1,1);
|
||||
b = mat3_mult(ma,a);
|
||||
b = mat3_mult(mb,b);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f\n",I,a[I],I,b[I]);
|
||||
}
|
||||
|
||||
a = cuvect3(1,0,1);
|
||||
b = cuvect3(0,1,-1);
|
||||
c = a+b;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
|
||||
c = c/2.0;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
|
||||
c = cuvect3_cross(a,b);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
|
||||
printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3_dot(c,a),cuvect3_dot(c,b));
|
||||
|
||||
printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3_norm(a),cuvect3_norm(b),cuvect3_norm(c));
|
||||
a = cuvect3_normalize(a);
|
||||
b = cuvect3_normalize(b);
|
||||
c = cuvect3_normalize(c);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3_dot(c,a),cuvect3_dot(c,b));
|
||||
printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3_norm(a),cuvect3_norm(b),cuvect3_norm(c));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__host__ __device__ cumat3::cumat3()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3::~cumat3()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat3::operator[](const int I)
|
||||
{
|
||||
return dat[I];
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat3::operator()(const int I, const int J)
|
||||
{
|
||||
return dat[I+3*J];
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3 cumat3::operator+(cumat3 lhs)
|
||||
{
|
||||
int I;
|
||||
cumat3 ret;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] + lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3 cumat3::operator-(cumat3 lhs)
|
||||
{
|
||||
int I;
|
||||
cumat3 ret;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] - lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3 cumat3::operator*(double lhs)
|
||||
{
|
||||
cumat3 ret;
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]*lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3 cumat3::operator/(double lhs)
|
||||
{
|
||||
cumat3 ret;
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]/lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat3::at(const int I, const int J)
|
||||
{
|
||||
return dat[I+3*J];
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cuvect3 cumat3::operator*(cuvect3 lhs)
|
||||
{
|
||||
cuvect3 ret = cuvect3(0.0,0.0,0.0);
|
||||
int I,J;
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ret[I] = ret[I] + this->at(I,J)*lhs[J];
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3 cumat3::operator*(cumat3 lhs)
|
||||
{
|
||||
cumat3 ret;
|
||||
int I,J,K;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ret(I,J) = 0.0;
|
||||
for(K=0;K<3;K++)
|
||||
{
|
||||
ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cumat3::det()
|
||||
{
|
||||
double ret = 0.0;
|
||||
|
||||
ret = ret + this->at(0,0)*this->at(1,1)*this->at(2,2);
|
||||
ret = ret + this->at(0,1)*this->at(1,2)*this->at(2,0);
|
||||
ret = ret + this->at(0,2)*this->at(1,0)*this->at(2,1);
|
||||
ret = ret - this->at(0,0)*this->at(1,2)*this->at(2,1);
|
||||
ret = ret - this->at(0,1)*this->at(1,0)*this->at(2,2);
|
||||
ret = ret - this->at(0,2)*this->at(1,1)*this->at(2,0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3 cumat3::transpose()
|
||||
{
|
||||
cumat3 q;
|
||||
int I,J;
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
q.at(I,J) = this->at(J,I);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3 cumat3::inverse()
|
||||
{
|
||||
cumat3 q;
|
||||
double dt = q.det();
|
||||
if(dt!=0.0)
|
||||
{
|
||||
q(0,0) = (this->at(1,1)*this->at(2,2)-this->at(1,2)*this->at(2,1))/dt;
|
||||
q(0,1) = -(this->at(1,0)*this->at(2,2)-this->at(1,2)*this->at(2,0))/dt;
|
||||
q(0,2) = (this->at(1,0)*this->at(2,1)-this->at(1,1)*this->at(2,0))/dt;
|
||||
q(1,0) = -(this->at(0,1)*this->at(2,2)-this->at(0,2)*this->at(2,1))/dt;
|
||||
q(1,1) = (this->at(0,0)*this->at(2,2)-this->at(0,2)*this->at(2,0))/dt;
|
||||
q(1,2) = -(this->at(0,0)*this->at(2,1)-this->at(0,1)*this->at(2,0))/dt;
|
||||
q(2,0) = (this->at(0,1)*this->at(1,2)-this->at(0,2)*this->at(1,1))/dt;
|
||||
q(2,1) = -(this->at(0,0)*this->at(1,2)-this->at(0,2)*this->at(1,0))/dt;
|
||||
q(2,2) = (this->at(0,0)*this->at(1,1)-this->at(0,1)*this->at(1,0))/dt;
|
||||
|
||||
q = q.transpose();
|
||||
}
|
||||
else
|
||||
{
|
||||
q(0,0) = inf;
|
||||
q(0,1) = inf;
|
||||
q(0,2) = inf;
|
||||
q(1,0) = inf;
|
||||
q(1,1) = inf;
|
||||
q(1,2) = inf;
|
||||
q(2,0) = inf;
|
||||
q(2,1) = inf;
|
||||
q(2,2) = inf;
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
};
|
@ -0,0 +1,581 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__host__ __device__ cuvect3f::cuvect3f()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f::~cuvect3f()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cuvect3f::operator[](const int I)
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
if(I==2) return z;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ const float& cuvect3f::operator[](const int I) const
|
||||
{
|
||||
if(I==0) return x;
|
||||
if(I==1) return y;
|
||||
if(I==2) return z;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f cuvect3f::operator+(cuvect3f lhs)
|
||||
{
|
||||
cuvect3f ret;
|
||||
ret.x = x+lhs.x;
|
||||
ret.y = y+lhs.y;
|
||||
ret.z = z+lhs.z;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f cuvect3f::operator-(cuvect3f lhs)
|
||||
{
|
||||
cuvect3f ret;
|
||||
ret.x = x-lhs.x;
|
||||
ret.y = y-lhs.y;
|
||||
ret.z = z-lhs.z;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f cuvect3f::operator*(float lhs)
|
||||
{
|
||||
cuvect3f ret;
|
||||
ret.x = x*lhs;
|
||||
ret.y = y*lhs;
|
||||
ret.z = z*lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f cuvect3f::operator/(float lhs)
|
||||
{
|
||||
cuvect3f ret;
|
||||
ret.x = x/lhs;
|
||||
ret.y = y/lhs;
|
||||
ret.z = z/lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f::cuvect3f(float _x, float _y, float _z)
|
||||
{
|
||||
x = _x; y = _y; z = _z;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b)
|
||||
{
|
||||
float ret = a.x*b.x+a.y*b.y+a.z*b.z;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b)
|
||||
{
|
||||
cuvect3f ret;
|
||||
ret[0] = a[1]*b[2]-a[2]*b[1];
|
||||
ret[1] = a[2]*b[0]-a[0]*b[2];
|
||||
ret[2] = a[0]*b[1]-a[1]*b[0];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cuvect3f_norm(cuvect3f a)
|
||||
{
|
||||
float ret;
|
||||
ret = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z);
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a)
|
||||
{
|
||||
cuvect3f ret;
|
||||
float m;
|
||||
m = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z);
|
||||
if(m>0.0)
|
||||
{
|
||||
ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m;
|
||||
}
|
||||
else
|
||||
{
|
||||
ret.x = 0.0; ret.y = 0.0; ret.z = 0.0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b)
|
||||
{
|
||||
cuvect3f ret;
|
||||
cuvect3f bn = cuvect3f_normalize(b);
|
||||
float m = cuvect3f_dot(a,bn);
|
||||
ret = bn*m;
|
||||
return ret;
|
||||
}
|
||||
|
||||
//transposes a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3f_transpose(float *mat3inout)
|
||||
{
|
||||
int I,J;
|
||||
float matint[9];
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
matint[I] = mat3inout[I];
|
||||
}
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
mat3inout[I+J*3] = matint[J+I*3];
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//copies src to dest
|
||||
__host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src)
|
||||
{
|
||||
int I;
|
||||
if(mat3f_dest==NULL || mat3f_src==NULL)
|
||||
return;
|
||||
|
||||
for(I=0;I<9;I++)
|
||||
mat3f_dest[I] = mat3f_src[I];
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ float mat3f_det(float *mat3in)
|
||||
{
|
||||
float ret = 0.0;
|
||||
|
||||
ret = ret + mat3in[0+0*3]*mat3in[1+1*3]*mat3in[2+2*3];
|
||||
ret = ret + mat3in[0+1*3]*mat3in[1+2*3]*mat3in[2+0*3];
|
||||
ret = ret + mat3in[0+2*3]*mat3in[1+0*3]*mat3in[2+1*3];
|
||||
ret = ret - mat3in[0+0*3]*mat3in[1+2*3]*mat3in[2+1*3];
|
||||
ret = ret - mat3in[0+1*3]*mat3in[1+0*3]*mat3in[2+2*3];
|
||||
ret = ret - mat3in[0+2*3]*mat3in[1+1*3]*mat3in[2+0*3];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
//inverts a 3x3 (9 element) matrix
|
||||
__host__ __device__ void mat3f_inverse(float *mat3inout)
|
||||
{
|
||||
int I;
|
||||
float matint[9];
|
||||
float det = mat3f_det(mat3inout);
|
||||
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
matint[I] = mat3inout[I];
|
||||
}
|
||||
|
||||
mat3inout[0+0*3] = (matint[1+1*3]*matint[2+2*3]-matint[1+2*3]*matint[2+1*3])/det;
|
||||
mat3inout[0+1*3] = -(matint[1+0*3]*matint[2+2*3]-matint[1+2*3]*matint[2+0*3])/det;
|
||||
mat3inout[0+2*3] = (matint[1+0*3]*matint[2+1*3]-matint[1+1*3]*matint[2+0*3])/det;
|
||||
mat3inout[1+0*3] = -(matint[0+1*3]*matint[2+2*3]-matint[0+2*3]*matint[2+1*3])/det;
|
||||
mat3inout[1+1*3] = (matint[0+0*3]*matint[2+2*3]-matint[0+2*3]*matint[2+0*3])/det;
|
||||
mat3inout[1+2*3] = -(matint[0+0*3]*matint[2+1*3]-matint[0+1*3]*matint[2+0*3])/det;
|
||||
mat3inout[2+0*3] = (matint[0+1*3]*matint[1+2*3]-matint[0+2*3]*matint[1+1*3])/det;
|
||||
mat3inout[2+1*3] = -(matint[0+0*3]*matint[1+2*3]-matint[0+2*3]*matint[1+0*3])/det;
|
||||
mat3inout[2+2*3] = (matint[0+0*3]*matint[1+1*3]-matint[0+1*3]*matint[1+0*3])/det;
|
||||
|
||||
mat3f_transpose(mat3inout);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin)
|
||||
{
|
||||
int I,J;
|
||||
cuvect3f ret;
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
ret[I] = 0.0;
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ret[I] = ret[I] + mat3in[I+3*J]*cvin[J];
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout)
|
||||
{
|
||||
float wrk[9];
|
||||
int I,J,K;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
wrk[I+3*J] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
for(K=0;K<3;K++)
|
||||
{
|
||||
wrk[I+3*K] = wrk[I+3*K] + matina[I+3*J]*matinb[J+3*K];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
matout[I+3*J] = wrk[I+3*J];
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout)
|
||||
{
|
||||
matout[0 + 0*3] = 0.0f;
|
||||
matout[1 + 0*3] = -vecin[2];
|
||||
matout[2 + 0*3] = vecin[1];
|
||||
|
||||
matout[0 + 1*3] = vecin[2];
|
||||
matout[1 + 1*3] = 0.0f;
|
||||
matout[2 + 1*3] = -vecin[0];
|
||||
|
||||
matout[0 + 2*3] = -vecin[1];
|
||||
matout[1 + 2*3] = vecin[0];
|
||||
matout[2 + 2*3] = 0.0f;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout)
|
||||
{
|
||||
vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]);
|
||||
vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]);
|
||||
vecout[2] = 0.5*(matin[0 + 1*3] - matin[1 + 0*3]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
//returns direction cosine rotation matrix from axis and angle
|
||||
__host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout)
|
||||
{
|
||||
int I;
|
||||
float H[9];
|
||||
float Hsq[9];
|
||||
float II[9];
|
||||
|
||||
for(I=0;I<9;I++) II[I] = 0.0;
|
||||
II[0+0*3] = 1.0;
|
||||
II[1+1*3] = 1.0;
|
||||
II[2+2*3] = 1.0;
|
||||
|
||||
axis = cuvect3f_normalize(axis);
|
||||
|
||||
mat3f_hodgedual(axis,H);
|
||||
mat3f_mult(H,H,Hsq);
|
||||
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
matout[I] = (II[I] + Hsq[I]) + H[I]*sinf(angle) - Hsq[I]*cosf(angle);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__host__ void test_cudavectf_logic1()
|
||||
{
|
||||
//3 dim vector and matrix functional tests on host side
|
||||
|
||||
printf("3 dim vector and matrix functional tests on host side\n");
|
||||
|
||||
cuvect3f a,b,c;
|
||||
float ma[9],mb[9],mc[9];
|
||||
|
||||
int I,J;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ma[I+3*J] = ((float) rand())/((float) RAND_MAX);
|
||||
mb[I+3*J] = ma[I+3*J];
|
||||
}
|
||||
}
|
||||
|
||||
mat3f_inverse(mb);
|
||||
mat3f_mult(ma,mb,mc);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
printf("ma[%d,%d] = %1.3f\n",I,J,ma[I+3*J]);
|
||||
}
|
||||
}
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
printf("mb[%d,%d] = %1.3f\n",I,J,mb[I+3*J]);
|
||||
}
|
||||
}
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
printf("mc[%d,%d] = %1.3f\n",I,J,mc[I+3*J]);
|
||||
}
|
||||
}
|
||||
|
||||
a = cuvect3f(1,1,1);
|
||||
b = mat3f_mult(ma,a);
|
||||
b = mat3f_mult(mb,b);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f\n",I,a[I],I,b[I]);
|
||||
}
|
||||
|
||||
a = cuvect3f(1,0,1);
|
||||
b = cuvect3f(0,1,-1);
|
||||
c = a+b;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
|
||||
c = c/2.0;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
|
||||
c = cuvect3f_cross(a,b);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
|
||||
printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3f_dot(c,a),cuvect3f_dot(c,b));
|
||||
|
||||
printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3f_norm(a),cuvect3f_norm(b),cuvect3f_norm(c));
|
||||
a = cuvect3f_normalize(a);
|
||||
b = cuvect3f_normalize(b);
|
||||
c = cuvect3f_normalize(c);
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]);
|
||||
}
|
||||
printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3f_dot(c,a),cuvect3f_dot(c,b));
|
||||
printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3f_norm(a),cuvect3f_norm(b),cuvect3f_norm(c));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
__host__ __device__ cumat3f::cumat3f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f::~cumat3f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat3f::operator[](const int I)
|
||||
{
|
||||
return dat[I];
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat3f::operator()(const int I, const int J)
|
||||
{
|
||||
return dat[I+3*J];
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f cumat3f::operator+(cumat3f lhs)
|
||||
{
|
||||
int I;
|
||||
cumat3f ret;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] + lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f cumat3f::operator-(cumat3f lhs)
|
||||
{
|
||||
int I;
|
||||
cumat3f ret;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] - lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f cumat3f::operator*(float lhs)
|
||||
{
|
||||
cumat3f ret;
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]*lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f cumat3f::operator/(float lhs)
|
||||
{
|
||||
cumat3f ret;
|
||||
int I;
|
||||
for(I=0;I<9;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]/lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat3f::at(const int I, const int J)
|
||||
{
|
||||
return dat[I+3*J];
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ cuvect3f cumat3f::operator*(cuvect3f lhs)
|
||||
{
|
||||
cuvect3f ret = cuvect3f(0.0,0.0,0.0);
|
||||
int I,J;
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ret[I] = ret[I] + this->at(I,J)*lhs[J];
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f cumat3f::operator*(cumat3f lhs)
|
||||
{
|
||||
cumat3f ret;
|
||||
int I,J,K;
|
||||
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
ret(I,J) = 0.0;
|
||||
for(K=0;K<3;K++)
|
||||
{
|
||||
ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cumat3f::det()
|
||||
{
|
||||
float ret = 0.0;
|
||||
|
||||
ret = ret + this->at(0,0)*this->at(1,1)*this->at(2,2);
|
||||
ret = ret + this->at(0,1)*this->at(1,2)*this->at(2,0);
|
||||
ret = ret + this->at(0,2)*this->at(1,0)*this->at(2,1);
|
||||
ret = ret - this->at(0,0)*this->at(1,2)*this->at(2,1);
|
||||
ret = ret - this->at(0,1)*this->at(1,0)*this->at(2,2);
|
||||
ret = ret - this->at(0,2)*this->at(1,1)*this->at(2,0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f cumat3f::transpose()
|
||||
{
|
||||
cumat3f q;
|
||||
int I,J;
|
||||
for(I=0;I<3;I++)
|
||||
{
|
||||
for(J=0;J<3;J++)
|
||||
{
|
||||
q.at(I,J) = this->at(J,I);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat3f cumat3f::inverse()
|
||||
{
|
||||
cumat3f q;
|
||||
float dt = q.det();
|
||||
if(dt!=0.0)
|
||||
{
|
||||
q(0,0) = (this->at(1,1)*this->at(2,2)-this->at(1,2)*this->at(2,1))/dt;
|
||||
q(0,1) = -(this->at(1,0)*this->at(2,2)-this->at(1,2)*this->at(2,0))/dt;
|
||||
q(0,2) = (this->at(1,0)*this->at(2,1)-this->at(1,1)*this->at(2,0))/dt;
|
||||
q(1,0) = -(this->at(0,1)*this->at(2,2)-this->at(0,2)*this->at(2,1))/dt;
|
||||
q(1,1) = (this->at(0,0)*this->at(2,2)-this->at(0,2)*this->at(2,0))/dt;
|
||||
q(1,2) = -(this->at(0,0)*this->at(2,1)-this->at(0,1)*this->at(2,0))/dt;
|
||||
q(2,0) = (this->at(0,1)*this->at(1,2)-this->at(0,2)*this->at(1,1))/dt;
|
||||
q(2,1) = -(this->at(0,0)*this->at(1,2)-this->at(0,2)*this->at(1,0))/dt;
|
||||
q(2,2) = (this->at(0,0)*this->at(1,1)-this->at(0,1)*this->at(1,0))/dt;
|
||||
|
||||
q = q.transpose();
|
||||
}
|
||||
else
|
||||
{
|
||||
q(0,0) = inf;
|
||||
q(0,1) = inf;
|
||||
q(0,2) = inf;
|
||||
q(1,0) = inf;
|
||||
q(1,1) = inf;
|
||||
q(1,2) = inf;
|
||||
q(2,0) = inf;
|
||||
q(2,1) = inf;
|
||||
q(2,2) = inf;
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
};
|
@ -0,0 +1,414 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
__host__ __device__ cuvect4::cuvect4()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0; w = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4::~cuvect4()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0; w = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4::cuvect4(double _x, double _y, double _z, double _w)
|
||||
{
|
||||
x = _x; y = _y; z = _z; w = _w;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cuvect4::operator[](const int I)
|
||||
{
|
||||
if(I==0) return x;
|
||||
else if(I==1) return y;
|
||||
else if(I==2) return z;
|
||||
else if(I==3) return w;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ const double& cuvect4::operator[](const int I) const
|
||||
{
|
||||
if(I==0) return x;
|
||||
else if(I==1) return y;
|
||||
else if(I==2) return z;
|
||||
else if(I==3) return w;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4 cuvect4::operator+(cuvect4 lhs)
|
||||
{
|
||||
cuvect4 ret;
|
||||
ret.x = this->x + lhs.x;
|
||||
ret.y = this->y + lhs.y;
|
||||
ret.z = this->z + lhs.z;
|
||||
ret.w = this->w + lhs.w;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4 cuvect4::operator-(cuvect4 lhs)
|
||||
{
|
||||
cuvect4 ret;
|
||||
ret.x = this->x - lhs.x;
|
||||
ret.y = this->y - lhs.y;
|
||||
ret.z = this->z - lhs.z;
|
||||
ret.w = this->w - lhs.w;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4 cuvect4::operator*(double lhs)
|
||||
{
|
||||
cuvect4 ret;
|
||||
ret.x = this->x*lhs;
|
||||
ret.y = this->y*lhs;
|
||||
ret.z = this->z*lhs;
|
||||
ret.w = this->w*lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4 cuvect4::operator/(double lhs)
|
||||
{
|
||||
cuvect4 ret;
|
||||
ret.x = this->x/lhs;
|
||||
ret.y = this->y/lhs;
|
||||
ret.z = this->z/lhs;
|
||||
ret.w = this->w/lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4::cumat4()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4::~cumat4()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat4::operator[](const int I)
|
||||
{
|
||||
return dat[I];
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat4::operator()(const int I, const int J)
|
||||
{
|
||||
return dat[I+4*J];
|
||||
}
|
||||
|
||||
__host__ __device__ double& cumat4::at(const int I, const int J)
|
||||
{
|
||||
return dat[I+4*J];
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 cumat4::operator+(cumat4 lhs)
|
||||
{
|
||||
cumat4 ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] + lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 cumat4::operator-(cumat4 lhs)
|
||||
{
|
||||
cumat4 ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] - lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 cumat4::operator*(double lhs)
|
||||
{
|
||||
cumat4 ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]*lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 cumat4::operator/(double lhs)
|
||||
{
|
||||
cumat4 ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]/lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4 cumat4::operator*(cuvect4 lhs)
|
||||
{
|
||||
cuvect4 ret = cuvect4(0.0,0.0,0.0,0.0);
|
||||
int I,J;
|
||||
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
ret[I] = ret[I] + this->at(I,J)*lhs[J];
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 cumat4::operator*(cumat4 lhs)
|
||||
{
|
||||
cumat4 ret;
|
||||
int I,J,K;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
ret(I,J) = 0;
|
||||
for(K=0;K<4;K++)
|
||||
{
|
||||
ret(I,J) = ret(I,J) + this->at(I,K) * lhs(K,J);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 cumat4::transpose()
|
||||
{
|
||||
cumat4 q;
|
||||
int I,J;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
q(I,J) = this->at(J,I);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
__host__ __device__ double cumat4::det()
|
||||
{
|
||||
double a00,a01,a02,a03;
|
||||
double a10,a11,a12,a13;
|
||||
double a20,a21,a22,a23;
|
||||
double a30,a31,a32,a33;
|
||||
double det;
|
||||
|
||||
a00 = this->at(0,0);
|
||||
a01 = this->at(0,1);
|
||||
a02 = this->at(0,2);
|
||||
a03 = this->at(0,3);
|
||||
a10 = this->at(1,0);
|
||||
a11 = this->at(1,1);
|
||||
a12 = this->at(1,2);
|
||||
a13 = this->at(1,3);
|
||||
a20 = this->at(2,0);
|
||||
a21 = this->at(2,1);
|
||||
a22 = this->at(2,2);
|
||||
a23 = this->at(2,3);
|
||||
a30 = this->at(3,0);
|
||||
a31 = this->at(3,1);
|
||||
a32 = this->at(3,2);
|
||||
a33 = this->at(3,3);
|
||||
|
||||
det = a03*a12*a21*a30 -
|
||||
a02*a13*a21*a30 -
|
||||
a03*a11*a22*a30 +
|
||||
a01*a13*a22*a30 +
|
||||
a02*a11*a23*a30 -
|
||||
a01*a12*a23*a30 -
|
||||
a03*a12*a20*a31 +
|
||||
a02*a13*a20*a31 +
|
||||
a03*a10*a22*a31 -
|
||||
a00*a13*a22*a31 -
|
||||
a02*a10*a23*a31 +
|
||||
a00*a12*a23*a31 +
|
||||
a03*a11*a20*a32 -
|
||||
a01*a13*a20*a32 -
|
||||
a03*a10*a21*a32 +
|
||||
a00*a13*a21*a32 +
|
||||
a01*a10*a23*a32 -
|
||||
a00*a11*a23*a32 -
|
||||
a02*a11*a20*a33 +
|
||||
a01*a12*a20*a33 +
|
||||
a02*a10*a21*a33 -
|
||||
a00*a12*a21*a33 -
|
||||
a01*a10*a22*a33 +
|
||||
a00*a11*a22*a33;
|
||||
|
||||
return det;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 minverse(cumat4 ma)
|
||||
{
|
||||
cumat4 mb;
|
||||
|
||||
double a00,a01,a02,a03;
|
||||
double a10,a11,a12,a13;
|
||||
double a20,a21,a22,a23;
|
||||
double a30,a31,a32,a33;
|
||||
|
||||
double b00,b01,b02,b03;
|
||||
double b10,b11,b12,b13;
|
||||
double b20,b21,b22,b23;
|
||||
double b30,b31,b32,b33;
|
||||
|
||||
double det = 0.0;
|
||||
|
||||
a00 = ma.at(0,0);
|
||||
a01 = ma.at(0,1);
|
||||
a02 = ma.at(0,2);
|
||||
a03 = ma.at(0,3);
|
||||
a10 = ma.at(1,0);
|
||||
a11 = ma.at(1,1);
|
||||
a12 = ma.at(1,2);
|
||||
a13 = ma.at(1,3);
|
||||
a20 = ma.at(2,0);
|
||||
a21 = ma.at(2,1);
|
||||
a22 = ma.at(2,2);
|
||||
a23 = ma.at(2,3);
|
||||
a30 = ma.at(3,0);
|
||||
a31 = ma.at(3,1);
|
||||
a32 = ma.at(3,2);
|
||||
a33 = ma.at(3,3);
|
||||
|
||||
det = a03*a12*a21*a30 -
|
||||
a02*a13*a21*a30 -
|
||||
a03*a11*a22*a30 +
|
||||
a01*a13*a22*a30 +
|
||||
a02*a11*a23*a30 -
|
||||
a01*a12*a23*a30 -
|
||||
a03*a12*a20*a31 +
|
||||
a02*a13*a20*a31 +
|
||||
a03*a10*a22*a31 -
|
||||
a00*a13*a22*a31 -
|
||||
a02*a10*a23*a31 +
|
||||
a00*a12*a23*a31 +
|
||||
a03*a11*a20*a32 -
|
||||
a01*a13*a20*a32 -
|
||||
a03*a10*a21*a32 +
|
||||
a00*a13*a21*a32 +
|
||||
a01*a10*a23*a32 -
|
||||
a00*a11*a23*a32 -
|
||||
a02*a11*a20*a33 +
|
||||
a01*a12*a20*a33 +
|
||||
a02*a10*a21*a33 -
|
||||
a00*a12*a21*a33 -
|
||||
a01*a10*a22*a33 +
|
||||
a00*a11*a22*a33;
|
||||
|
||||
if(det*det>1.0E-30)
|
||||
{
|
||||
b00 = -a13*a22*a31 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 + a11*a22*a33;
|
||||
b01 = a03*a22*a31 - a02*a23*a31 - a03*a21*a32 + a01*a23*a32 + a02*a21*a33 - a01*a22*a33;
|
||||
b02 = -a03*a12*a31 + a02*a13*a31 + a03*a11*a32 - a01*a13*a32 - a02*a11*a33 + a01*a12*a33;
|
||||
b03 = a03*a12*a21 - a02*a13*a21 - a03*a11*a22 + a01*a13*a22 + a02*a11*a23 - a01*a12*a23;
|
||||
b10 = a13*a22*a30 - a12*a23*a30 - a13*a20*a32 + a10*a23*a32 + a12*a20*a33 - a10*a22*a33;
|
||||
b11 = -a03*a22*a30 + a02*a23*a30 + a03*a20*a32 - a00*a23*a32 - a02*a20*a33 + a00*a22*a33;
|
||||
b12 = a03*a12*a30 - a02*a13*a30 - a03*a10*a32 + a00*a13*a32 + a02*a10*a33 - a00*a12*a33;
|
||||
b13 = -a03*a12*a20 + a02*a13*a20 + a03*a10*a22 - a00*a13*a22 - a02*a10*a23 + a00*a12*a23;
|
||||
b20 = -a13*a21*a30 + a11*a23*a30 + a13*a20*a31 - a10*a23*a31 - a11*a20*a33 + a10*a21*a33;
|
||||
b21 = a03*a21*a30 - a01*a23*a30 - a03*a20*a31 + a00*a23*a31 + a01*a20*a33 - a00*a21*a33;
|
||||
b22 = -a03*a11*a30 + a01*a13*a30 + a03*a10*a31 - a00*a13*a31 - a01*a10*a33 + a00*a11*a33;
|
||||
b23 = a03*a11*a20 - a01*a13*a20 - a03*a10*a21 + a00*a13*a21 + a01*a10*a23 - a00*a11*a23;
|
||||
b30 = a12*a21*a30 - a11*a22*a30 - a12*a20*a31 + a10*a22*a31 + a11*a20*a32 - a10*a21*a32;
|
||||
b31 = -a02*a21*a30 + a01*a22*a30 + a02*a20*a31 - a00*a22*a31 - a01*a20*a32 + a00*a21*a32;
|
||||
b32 = a02*a11*a30 - a01*a12*a30 - a02*a10*a31 + a00*a12*a31 + a01*a10*a32 - a00*a11*a32;
|
||||
b33 = -a02*a11*a20 + a01*a12*a20 + a02*a10*a21 - a00*a12*a21 - a01*a10*a22 + a00*a11*a22;
|
||||
b00 = b00/det;
|
||||
b01 = b01/det;
|
||||
b02 = b02/det;
|
||||
b03 = b03/det;
|
||||
b10 = b10/det;
|
||||
b11 = b11/det;
|
||||
b12 = b12/det;
|
||||
b13 = b13/det;
|
||||
b20 = b20/det;
|
||||
b21 = b21/det;
|
||||
b22 = b22/det;
|
||||
b23 = b23/det;
|
||||
b30 = b30/det;
|
||||
b31 = b31/det;
|
||||
b32 = b32/det;
|
||||
b33 = b33/det;
|
||||
mb.at(0,0) = b00;
|
||||
mb.at(0,1) = b01;
|
||||
mb.at(0,2) = b02;
|
||||
mb.at(0,3) = b03;
|
||||
mb.at(1,0) = b10;
|
||||
mb.at(1,1) = b11;
|
||||
mb.at(1,2) = b12;
|
||||
mb.at(1,3) = b13;
|
||||
mb.at(2,0) = b20;
|
||||
mb.at(2,1) = b21;
|
||||
mb.at(2,2) = b22;
|
||||
mb.at(2,3) = b23;
|
||||
mb.at(3,0) = b30;
|
||||
mb.at(3,1) = b31;
|
||||
mb.at(3,2) = b32;
|
||||
mb.at(3,3) = b33;
|
||||
}
|
||||
//this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again!
|
||||
return mb;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4 cumat4::inverse()
|
||||
{
|
||||
return minverse(*this);
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ double cuvect4_dot(cuvect4 a, cuvect4 b)
|
||||
{
|
||||
double ret = 0.0;
|
||||
ret = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ double cuvect4_norm(cuvect4 a)
|
||||
{
|
||||
double ret = 0.0;
|
||||
ret = ::sqrt(cuvect4_dot(a,a));
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4 cuvect4_normalize(cuvect4 a)
|
||||
{
|
||||
cuvect4 ret = cuvect4(0.0f,0.0f,0.0f,0.0f);
|
||||
double nrm = cuvect4_norm(a);
|
||||
if(nrm>0.0)
|
||||
ret = a/nrm;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4 cuvect4_proj(cuvect4 a, cuvect4 b)
|
||||
{
|
||||
cuvect4 ret;
|
||||
cuvect4 bn = cuvect4_normalize(b);
|
||||
double d = cuvect4_dot(a,bn);
|
||||
ret = bn*d;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
};
|
@ -0,0 +1,417 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
namespace amscuda
|
||||
{
|
||||
|
||||
////////////
|
||||
//cuvect4ff//
|
||||
////////////
|
||||
|
||||
__host__ __device__ cuvect4f::cuvect4f()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0; w = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f::~cuvect4f()
|
||||
{
|
||||
x = 0.0; y = 0.0; z = 0.0; w = 0.0;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f::cuvect4f(float _x, float _y, float _z, float _w)
|
||||
{
|
||||
x = _x; y = _y; z = _z; w = _w;
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cuvect4f::operator[](const int I)
|
||||
{
|
||||
if(I==0) return x;
|
||||
else if(I==1) return y;
|
||||
else if(I==2) return z;
|
||||
else if(I==3) return w;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ const float& cuvect4f::operator[](const int I) const
|
||||
{
|
||||
if(I==0) return x;
|
||||
else if(I==1) return y;
|
||||
else if(I==2) return z;
|
||||
else if(I==3) return w;
|
||||
return x;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cuvect4f::operator+(cuvect4f lhs)
|
||||
{
|
||||
cuvect4f ret;
|
||||
ret.x = this->x + lhs.x;
|
||||
ret.y = this->y + lhs.y;
|
||||
ret.z = this->z + lhs.z;
|
||||
ret.w = this->w + lhs.w;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cuvect4f::operator-(cuvect4f lhs)
|
||||
{
|
||||
cuvect4f ret;
|
||||
ret.x = this->x - lhs.x;
|
||||
ret.y = this->y - lhs.y;
|
||||
ret.z = this->z - lhs.z;
|
||||
ret.w = this->w - lhs.w;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cuvect4f::operator*(float lhs)
|
||||
{
|
||||
cuvect4f ret;
|
||||
ret.x = this->x*lhs;
|
||||
ret.y = this->y*lhs;
|
||||
ret.z = this->z*lhs;
|
||||
ret.w = this->w*lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cuvect4f::operator/(float lhs)
|
||||
{
|
||||
cuvect4f ret;
|
||||
ret.x = this->x/lhs;
|
||||
ret.y = this->y/lhs;
|
||||
ret.z = this->z/lhs;
|
||||
ret.w = this->w/lhs;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f::cumat4f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f::~cumat4f()
|
||||
{
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
dat[I] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat4f::operator[](const int I)
|
||||
{
|
||||
return dat[I];
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat4f::operator()(const int I, const int J)
|
||||
{
|
||||
return dat[I+4*J];
|
||||
}
|
||||
|
||||
__host__ __device__ float& cumat4f::at(const int I, const int J)
|
||||
{
|
||||
return dat[I+4*J];
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator+(cumat4f lhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] + lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator-(cumat4f lhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I] - lhs.dat[I];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator*(float lhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]*lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator/(float lhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I;
|
||||
for(I=0;I<16;I++)
|
||||
{
|
||||
ret.dat[I] = this->dat[I]/lhs;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cumat4f::operator*(cuvect4f lhs)
|
||||
{
|
||||
cuvect4f ret = cuvect4f(0.0,0.0,0.0,0.0);
|
||||
int I,J;
|
||||
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
ret[I] = ret[I] + this->at(I,J)*lhs[J];
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::operator*(cumat4f lhs)
|
||||
{
|
||||
cumat4f ret;
|
||||
int I,J,K;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
ret(I,J) = 0;
|
||||
for(K=0;K<4;K++)
|
||||
{
|
||||
ret(I,J) = ret(I,J) + this->at(I,K) * lhs(K,J);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::transpose()
|
||||
{
|
||||
cumat4f q;
|
||||
int I,J;
|
||||
for(I=0;I<4;I++)
|
||||
{
|
||||
for(J=0;J<4;J++)
|
||||
{
|
||||
q(I,J) = this->at(J,I);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
__host__ __device__ float cumat4f::det()
|
||||
{
|
||||
float a00,a01,a02,a03;
|
||||
float a10,a11,a12,a13;
|
||||
float a20,a21,a22,a23;
|
||||
float a30,a31,a32,a33;
|
||||
float det;
|
||||
|
||||
a00 = this->at(0,0);
|
||||
a01 = this->at(0,1);
|
||||
a02 = this->at(0,2);
|
||||
a03 = this->at(0,3);
|
||||
a10 = this->at(1,0);
|
||||
a11 = this->at(1,1);
|
||||
a12 = this->at(1,2);
|
||||
a13 = this->at(1,3);
|
||||
a20 = this->at(2,0);
|
||||
a21 = this->at(2,1);
|
||||
a22 = this->at(2,2);
|
||||
a23 = this->at(2,3);
|
||||
a30 = this->at(3,0);
|
||||
a31 = this->at(3,1);
|
||||
a32 = this->at(3,2);
|
||||
a33 = this->at(3,3);
|
||||
|
||||
det = a03*a12*a21*a30 -
|
||||
a02*a13*a21*a30 -
|
||||
a03*a11*a22*a30 +
|
||||
a01*a13*a22*a30 +
|
||||
a02*a11*a23*a30 -
|
||||
a01*a12*a23*a30 -
|
||||
a03*a12*a20*a31 +
|
||||
a02*a13*a20*a31 +
|
||||
a03*a10*a22*a31 -
|
||||
a00*a13*a22*a31 -
|
||||
a02*a10*a23*a31 +
|
||||
a00*a12*a23*a31 +
|
||||
a03*a11*a20*a32 -
|
||||
a01*a13*a20*a32 -
|
||||
a03*a10*a21*a32 +
|
||||
a00*a13*a21*a32 +
|
||||
a01*a10*a23*a32 -
|
||||
a00*a11*a23*a32 -
|
||||
a02*a11*a20*a33 +
|
||||
a01*a12*a20*a33 +
|
||||
a02*a10*a21*a33 -
|
||||
a00*a12*a21*a33 -
|
||||
a01*a10*a22*a33 +
|
||||
a00*a11*a22*a33;
|
||||
|
||||
return det;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f minverse(cumat4f ma)
|
||||
{
|
||||
cumat4f mb;
|
||||
|
||||
float a00,a01,a02,a03;
|
||||
float a10,a11,a12,a13;
|
||||
float a20,a21,a22,a23;
|
||||
float a30,a31,a32,a33;
|
||||
|
||||
float b00,b01,b02,b03;
|
||||
float b10,b11,b12,b13;
|
||||
float b20,b21,b22,b23;
|
||||
float b30,b31,b32,b33;
|
||||
|
||||
float det = 0.0;
|
||||
|
||||
a00 = ma.at(0,0);
|
||||
a01 = ma.at(0,1);
|
||||
a02 = ma.at(0,2);
|
||||
a03 = ma.at(0,3);
|
||||
a10 = ma.at(1,0);
|
||||
a11 = ma.at(1,1);
|
||||
a12 = ma.at(1,2);
|
||||
a13 = ma.at(1,3);
|
||||
a20 = ma.at(2,0);
|
||||
a21 = ma.at(2,1);
|
||||
a22 = ma.at(2,2);
|
||||
a23 = ma.at(2,3);
|
||||
a30 = ma.at(3,0);
|
||||
a31 = ma.at(3,1);
|
||||
a32 = ma.at(3,2);
|
||||
a33 = ma.at(3,3);
|
||||
|
||||
det = a03*a12*a21*a30 -
|
||||
a02*a13*a21*a30 -
|
||||
a03*a11*a22*a30 +
|
||||
a01*a13*a22*a30 +
|
||||
a02*a11*a23*a30 -
|
||||
a01*a12*a23*a30 -
|
||||
a03*a12*a20*a31 +
|
||||
a02*a13*a20*a31 +
|
||||
a03*a10*a22*a31 -
|
||||
a00*a13*a22*a31 -
|
||||
a02*a10*a23*a31 +
|
||||
a00*a12*a23*a31 +
|
||||
a03*a11*a20*a32 -
|
||||
a01*a13*a20*a32 -
|
||||
a03*a10*a21*a32 +
|
||||
a00*a13*a21*a32 +
|
||||
a01*a10*a23*a32 -
|
||||
a00*a11*a23*a32 -
|
||||
a02*a11*a20*a33 +
|
||||
a01*a12*a20*a33 +
|
||||
a02*a10*a21*a33 -
|
||||
a00*a12*a21*a33 -
|
||||
a01*a10*a22*a33 +
|
||||
a00*a11*a22*a33;
|
||||
|
||||
if(det*det>1.0E-30)
|
||||
{
|
||||
b00 = -a13*a22*a31 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 + a11*a22*a33;
|
||||
b01 = a03*a22*a31 - a02*a23*a31 - a03*a21*a32 + a01*a23*a32 + a02*a21*a33 - a01*a22*a33;
|
||||
b02 = -a03*a12*a31 + a02*a13*a31 + a03*a11*a32 - a01*a13*a32 - a02*a11*a33 + a01*a12*a33;
|
||||
b03 = a03*a12*a21 - a02*a13*a21 - a03*a11*a22 + a01*a13*a22 + a02*a11*a23 - a01*a12*a23;
|
||||
b10 = a13*a22*a30 - a12*a23*a30 - a13*a20*a32 + a10*a23*a32 + a12*a20*a33 - a10*a22*a33;
|
||||
b11 = -a03*a22*a30 + a02*a23*a30 + a03*a20*a32 - a00*a23*a32 - a02*a20*a33 + a00*a22*a33;
|
||||
b12 = a03*a12*a30 - a02*a13*a30 - a03*a10*a32 + a00*a13*a32 + a02*a10*a33 - a00*a12*a33;
|
||||
b13 = -a03*a12*a20 + a02*a13*a20 + a03*a10*a22 - a00*a13*a22 - a02*a10*a23 + a00*a12*a23;
|
||||
b20 = -a13*a21*a30 + a11*a23*a30 + a13*a20*a31 - a10*a23*a31 - a11*a20*a33 + a10*a21*a33;
|
||||
b21 = a03*a21*a30 - a01*a23*a30 - a03*a20*a31 + a00*a23*a31 + a01*a20*a33 - a00*a21*a33;
|
||||
b22 = -a03*a11*a30 + a01*a13*a30 + a03*a10*a31 - a00*a13*a31 - a01*a10*a33 + a00*a11*a33;
|
||||
b23 = a03*a11*a20 - a01*a13*a20 - a03*a10*a21 + a00*a13*a21 + a01*a10*a23 - a00*a11*a23;
|
||||
b30 = a12*a21*a30 - a11*a22*a30 - a12*a20*a31 + a10*a22*a31 + a11*a20*a32 - a10*a21*a32;
|
||||
b31 = -a02*a21*a30 + a01*a22*a30 + a02*a20*a31 - a00*a22*a31 - a01*a20*a32 + a00*a21*a32;
|
||||
b32 = a02*a11*a30 - a01*a12*a30 - a02*a10*a31 + a00*a12*a31 + a01*a10*a32 - a00*a11*a32;
|
||||
b33 = -a02*a11*a20 + a01*a12*a20 + a02*a10*a21 - a00*a12*a21 - a01*a10*a22 + a00*a11*a22;
|
||||
b00 = b00/det;
|
||||
b01 = b01/det;
|
||||
b02 = b02/det;
|
||||
b03 = b03/det;
|
||||
b10 = b10/det;
|
||||
b11 = b11/det;
|
||||
b12 = b12/det;
|
||||
b13 = b13/det;
|
||||
b20 = b20/det;
|
||||
b21 = b21/det;
|
||||
b22 = b22/det;
|
||||
b23 = b23/det;
|
||||
b30 = b30/det;
|
||||
b31 = b31/det;
|
||||
b32 = b32/det;
|
||||
b33 = b33/det;
|
||||
mb.at(0,0) = b00;
|
||||
mb.at(0,1) = b01;
|
||||
mb.at(0,2) = b02;
|
||||
mb.at(0,3) = b03;
|
||||
mb.at(1,0) = b10;
|
||||
mb.at(1,1) = b11;
|
||||
mb.at(1,2) = b12;
|
||||
mb.at(1,3) = b13;
|
||||
mb.at(2,0) = b20;
|
||||
mb.at(2,1) = b21;
|
||||
mb.at(2,2) = b22;
|
||||
mb.at(2,3) = b23;
|
||||
mb.at(3,0) = b30;
|
||||
mb.at(3,1) = b31;
|
||||
mb.at(3,2) = b32;
|
||||
mb.at(3,3) = b33;
|
||||
}
|
||||
//this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again!
|
||||
return mb;
|
||||
}
|
||||
|
||||
__host__ __device__ cumat4f cumat4f::inverse()
|
||||
{
|
||||
return minverse(*this);
|
||||
}
|
||||
|
||||
|
||||
__host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b)
|
||||
{
|
||||
float ret = 0.0;
|
||||
ret = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ float cuvect4f_norm(cuvect4f a)
|
||||
{
|
||||
float ret = 0.0;
|
||||
ret = ::sqrtf(cuvect4f_dot(a,a));
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a)
|
||||
{
|
||||
cuvect4f ret = cuvect4f(0.0f,0.0f,0.0f,0.0f);
|
||||
float nrm = cuvect4f_norm(a);
|
||||
if(nrm>0.0)
|
||||
ret = a/nrm;
|
||||
return ret;
|
||||
}
|
||||
|
||||
__host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b)
|
||||
{
|
||||
cuvect4f ret;
|
||||
cuvect4f bn = cuvect4f_normalize(b);
|
||||
float d = cuvect4f_dot(a,bn);
|
||||
ret = bn*d;
|
||||
return ret;
|
||||
}
|
||||
|
||||
}; //namespace amscuda
|
@ -0,0 +1,27 @@
|
||||
#include <amsculib2/amsculib2.hpp>
|
||||
|
||||
//using namespace ams;
|
||||
using namespace amscuda;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
printf("AMSCULIB2: Cuda Library Tests.\n");
|
||||
|
||||
//test_amscuarray_1();
|
||||
//test_amscumath1();
|
||||
|
||||
//cmp::test_cucomp64_1();
|
||||
//cmp::test_cucomp128_1();
|
||||
|
||||
//test_amscuarray_2();
|
||||
|
||||
//test_dprg64();
|
||||
//printf("\n");
|
||||
//test_dprg32();
|
||||
|
||||
//test_dbuff_rand_dpr32();
|
||||
|
||||
test_amscurarray1();
|
||||
|
||||
return 0;
|
||||
}
|
Binary file not shown.
@ -0,0 +1,76 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os,sys,math
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
#################
|
||||
## Subroutines ##
|
||||
#################
|
||||
|
||||
def binload_float_ndarray(fp):
|
||||
arr = np.zeros((0),dtype=np.float32,order='F')
|
||||
|
||||
qb = fp.read(4)
|
||||
Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0]
|
||||
shp = np.zeros((Nd),dtype=np.int32)
|
||||
|
||||
piprod = 1
|
||||
for I in range(0,Nd):
|
||||
qb = fp.read(4)
|
||||
shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0]
|
||||
piprod = piprod*shp[I]
|
||||
|
||||
qb = fp.read(4*piprod)
|
||||
arr = np.frombuffer(qb,dtype=np.float32,count=piprod)
|
||||
|
||||
arr = arr.reshape(shp)
|
||||
|
||||
return arr;
|
||||
|
||||
def binsave_float_ndarray(fp,arr):
|
||||
|
||||
|
||||
|
||||
|
||||
return
|
||||
|
||||
|
||||
#################
|
||||
## Main Script ##
|
||||
#################
|
||||
|
||||
def test_1():
|
||||
|
||||
fname = "./test_scripts/test_dbuff_rand_dpr32.bin"
|
||||
try:
|
||||
fp = open(fname,"rb")
|
||||
except:
|
||||
print("Could not open {} for reading".format(fname))
|
||||
return
|
||||
|
||||
arr1 = binload_float_ndarray(fp)
|
||||
arr2 = binload_float_ndarray(fp)
|
||||
|
||||
fp.close()
|
||||
|
||||
plt.subplot(2,2,1)
|
||||
plt.imshow(arr1)
|
||||
plt.subplot(2,2,2)
|
||||
plt.imshow(arr2)
|
||||
plt.subplot(2,2,3)
|
||||
plt.hist(arr1.flatten(),bins=100)
|
||||
plt.subplot(2,2,4)
|
||||
plt.hist(arr2.flatten(),bins=100)
|
||||
plt.show()
|
||||
|
||||
print("{} {}".format(np.mean(arr2),np.std(arr2)))
|
||||
|
||||
|
||||
return
|
||||
|
||||
if(__name__=="__main__"):
|
||||
test_1()
|
||||
|
||||
exit(0)
|
||||
|
Loading…
Reference in New Issue