commit 64a7bfc8510ac71bba8b5bc0cf2f04b3074d00d8 Author: Aaron Date: Tue Feb 4 21:59:29 2025 -0500 Making some of my library code public. diff --git a/bin_linux64/libamsculib2.linux64.a b/bin_linux64/libamsculib2.linux64.a new file mode 100644 index 0000000..b20888a Binary files /dev/null and b/bin_linux64/libamsculib2.linux64.a differ diff --git a/bin_linux64/test b/bin_linux64/test new file mode 100644 index 0000000..44ba152 Binary files /dev/null and b/bin_linux64/test differ diff --git a/bin_winx64/libamsculib2.msvc64.lib b/bin_winx64/libamsculib2.msvc64.lib new file mode 100644 index 0000000..86d4904 Binary files /dev/null and b/bin_winx64/libamsculib2.msvc64.lib differ diff --git a/bin_winx64/test.exe b/bin_winx64/test.exe new file mode 100644 index 0000000..ba98a33 Binary files /dev/null and b/bin_winx64/test.exe differ diff --git a/bin_winx64/test.exp b/bin_winx64/test.exp new file mode 100644 index 0000000..98ef086 Binary files /dev/null and b/bin_winx64/test.exp differ diff --git a/bin_winx64/test.lib b/bin_winx64/test.lib new file mode 100644 index 0000000..ba12197 Binary files /dev/null and b/bin_winx64/test.lib differ diff --git a/compscripts/__pycache__/complib2.cpython-310.pyc b/compscripts/__pycache__/complib2.cpython-310.pyc new file mode 100644 index 0000000..0c4f420 Binary files /dev/null and b/compscripts/__pycache__/complib2.cpython-310.pyc differ diff --git a/compscripts/__pycache__/complib2.cpython-36.pyc b/compscripts/__pycache__/complib2.cpython-36.pyc new file mode 100644 index 0000000..8ecee7c Binary files /dev/null and b/compscripts/__pycache__/complib2.cpython-36.pyc differ diff --git a/compscripts/__pycache__/complib2.cpython-38.pyc b/compscripts/__pycache__/complib2.cpython-38.pyc new file mode 100644 index 0000000..ec396bb Binary files /dev/null and b/compscripts/__pycache__/complib2.cpython-38.pyc differ diff --git a/compscripts/__pycache__/complib2.cpython-39.pyc b/compscripts/__pycache__/complib2.cpython-39.pyc new file mode 100644 index 0000000..3028214 Binary files /dev/null and b/compscripts/__pycache__/complib2.cpython-39.pyc differ diff --git a/compscripts/__pycache__/complib3.cpython-310.pyc b/compscripts/__pycache__/complib3.cpython-310.pyc new file mode 100644 index 0000000..11ba64e Binary files /dev/null and b/compscripts/__pycache__/complib3.cpython-310.pyc differ diff --git a/compscripts/__pycache__/complib3.cpython-39.pyc b/compscripts/__pycache__/complib3.cpython-39.pyc new file mode 100644 index 0000000..7a86934 Binary files /dev/null and b/compscripts/__pycache__/complib3.cpython-39.pyc differ diff --git a/compscripts/complib2.py b/compscripts/complib2.py new file mode 100644 index 0000000..ca0ba30 --- /dev/null +++ b/compscripts/complib2.py @@ -0,0 +1,639 @@ +#!/usr/bin/python3 + +#Python3 compilation library +#Aaron M. Schinder +#29 Dec 2020 +# +#Cleanup and refactor from 2017 python2 version compilation libraries + +import os,sys,math,subprocess + +##################### +#Directory Functions# +##################### + +##flist - list all files in a given directory pth +##optional arguments: +# recurse - (T/F): Whether to recursively search for files in directory tree +# exts - (list): A list of file extensions to filter on +# normpath (T/F): whether to normalize path variables after +#filelist = flist(pth,**kwargs): +def flist(pth,**kwargs): + flst = [] + if(not('recurse' in kwargs)): + recurse_ = False + else: + recurse_ = kwargs['recurse'] + if(not('exts' in kwargs)): + filterexts_ = False + else: + filterexts_ = True + exts = kwargs['exts'] + if(not('normpath' in kwargs)): + normpath_ = True + else: + normpath_ = kwargs['normpath'] + if(not('linuxpath' in kwargs)): + linuxpath_ = False + else: + linuxpath_ = kwargs['linuxpath'] + if(not('followlinks' in kwargs)): + followlinks_ = False + else: + followlinks_ = kwargs['followlinks'] + + dirlist = [] + rawlist = os.listdir(pth) + + for F in rawlist: + F2 = os.path.join(pth,F) + if(os.path.isdir(F2)): + b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2))) + if(b): + if((F2!=".")&(F2!="..")): + dirlist.append(F2) + elif(os.path.isfile(F2)): + flst.append(F2) + + #Recurse through directories + if(recurse_): + for D in dirlist: + lst = flist(D,**kwargs) + for L in lst: + flst.append(L) + + #Postprocess: + #Filter out all extensions except the selected ext list + if(filterexts_): + flst = filterexts(flst,exts) + + #Normalize filename path according to os + if(normpath_): + flst2 = list(flst) + for I in range(0,len(flst2)): + flst[I] = os.path.normpath(flst2[I]) + + #If linuxpath, convert all \\ to / + #if(linuxpath_): + # flst2 = list(flst) + # for I in range(0,len(flst2)): + # flst[I] = linuxpath(flst2[I]) + + return flst + +#Filters by extensions in a list of files +#flst = def filterexts(flst,exts): +def filterexts(flst,exts): + flst2 = [] + if(isinstance(exts,str)): + exts = list([exts]) + for F in flst: + b = False + for ext in exts: + if(ext[0]!='.'): + ext = '.'+ext + F2 = os.path.splitext(F) + if(len(F2)>=2): + ex = F2[1] + if(len(ex)>0): + if(ex[0]!='.'): + ex = '.'+ex + if(ex==ext): + b = True + if(b): + flst2.append(F) + + return flst2 + +#Find a file fname, starting in pth and recursing +#Used for finding library files to link +def findfile(fname,pth,**kwargs): + fullfname = "" + flst = flist(pth,recurse=True) + for F in flst: + F2 = os.path.split(F)[1] + if(F2 == fname): + fullfname = F + + return fullfname + +#List to space-seperated-string +def list_to_sss(lst): + lout = "" + for I in range(0,len(lst)-1): + lout = lout + lst[I] + " " + if(len(lst)>0): + lout = lout + lst[len(lst)-1] + return lout + +def strip_whitespace(strin): + strout = "" + I1 = -1 + I2 = -1 + for I in range(0,len(strin)): + if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'): + I1 = I + break + q = list(range(0,len(strin))) + q.reverse() + for I in q: + if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'): + I2 = I+1 + break + if(I1>=0 and I2>=0): + strout = strin[I1:I2] + return strout + +def sss_to_list(sss): + lout = [] + l1 = sss.split(' ') + for l in l1: + l2 = strip_whitespace(l) + lout.append(l2) + return lout + + +def replaceext(fname,ext): + fname2 = "" + if(len(ext)>0): + if(ext[0]!='.'): + ext = '.'+ext + fname2 = os.path.splitext(fname)[0]+ext + else: + fname2 = os.path.splitext(fname)[0] + return fname2 + +def replaceexts(fnamelist,ext): + fname2list = [] + for F in fnamelist: + F2 = replaceext(F,ext) + fname2list.append(F2) + return fname2list + +# def except_contains_oldv(lst1,exc): +# lst2 = [] +# for item in lst1: +# b = 1 +# for item2 in exc: +# if(item.find(item2)>=0): +# b = 0 +# break +# if(b==1): +# lst2.append(item) +# return lst2 + +#filenames must match +def except_contains(lst1,exc): + lst2 = [] + for item in lst1: + b = 1 + for item2 in exc: + fsplit = os.path.split(item) + fn = fsplit[len(fsplit)-1] + if(fn==item2): + b = 0 + break + if(b==1): + lst2.append(item) + return lst2 + +########################## +##System Call Procedures## +########################## + +def callproc(cmd, **kwargs): + if(not('logfile' in kwargs)): + use_lf = False + else: + logfile = kwargs['logfile'] + if(logfile!=""): + fp = open(kwargs['logfile'],'a+') + use_lf = True + else: + use_lf = False + + if(not('echo' in kwargs)): + echo = True + else: + echo = kwargs['echo'] + + if(echo): + print(cmd) + + #encoding/deconding to/from bytes is necessary to use the subprocess command + #in python3.7 + #However, only do this in linux + if(sys.platform!='win32'): + cmd2 = cmd.encode(encoding='utf-8') + else: + cmd2 = cmd + proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) + (out, err) = proc.communicate() + + out = out.decode(encoding='utf-8') + + if(echo): + print(out) + #print(err); + if(use_lf): + fp.writelines(cmd+'\n') + fp.writelines(out+'\n') + + if(use_lf): + fp.close() + +####################################### +##Compiler, Archive, and Linker Calls## +####################################### + +def smartcompile(srcfile,objext='.o'): + mtsrc = os.path.getmtime(srcfile) + objfile = replaceext(srcfile,objext) + objexists = os.path.exists(objfile) + ret = True + if(objexists): + mtobj = os.path.getmtime(objfile) + if(mtobj>=mtsrc): + ret = False + + return ret + +#MSVC compiler wrapper +def msvc_compile(compilername, srcfile, **kwargs): + + if(not('include' in kwargs)): + include = '' + else: + include = kwargs['include'] + if(isinstance(include,list)): + include = list_to_sss(include) + + if(not('flags' in kwargs)): + flags = '' + else: + flags = kwargs['flags'] + if(isinstance(flags,list)): + flags = list_to_sss(flags) + + if(not('objext' in kwargs)): + objext = '.obj' + else: + objext = kwargs['objext'] + + if(not('srcfileflag' in kwargs)): + srcfileflag = '/c' + else: + srcfileflag = kwargs['srcfileflag'] + + if(not('outfileflag' in kwargs)): + outfileflag = '/Fo:' + else: + outfileflag = kwargs['outfileflag'] + if(not('logfile' in kwargs)): + logfile = "" + else: + logfile = kwargs['logfile'] + + outfile = replaceext(srcfile,objext) + ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile + ln = ln + " " + include + + callproc(ln,echo=True,logfile=logfile) + + return + +#MSVC compiler wrapper +def msvc_compile_list(compiler,srclist,**kwargs): + for S in srclist: + msvc_compile(compiler,S,**kwargs) + return + +#gnu-style compiler compile: Should work with gcc, g++, gfortran +def gs_compile(compiler,srcfile,**kwargs): + if(not('include' in kwargs)): + include = '' + else: + include = kwargs['include'] + if(isinstance(include,list)): + include = list_to_sss(include) + + if(not('flags' in kwargs)): + flags = '' + else: + flags = kwargs['flags'] + if(isinstance(flags,list)): + flags = list_to_sss(flags) + + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + if(not('srcfileflag' in kwargs)): + srcfileflag = '-c' + else: + srcfileflag = kwargs['srcfileflag'] + + if(not('outfileflag' in kwargs)): + outfileflag = '-o' + else: + outfileflag = kwargs['outfileflag'] + + if(not('logfile' in kwargs)): + logfile = "" + else: + logfile = kwargs['logfile'] + + if(not('smartcompile' in kwargs)): + _smartcompile = True + else: + _smartcompile = kwargs['smartcompile'] + + #Do I want to make this thing this general? + + if(not(_smartcompile) or smartcompile(srcfile,objext)): + outfile = replaceext(srcfile,objext) + ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile + ln = ln + " " + include + + callproc(ln,echo=True,logfile=logfile) + + return + +def gs_compile_list(compiler,srclist,**kwargs): + for S in srclist: + gs_compile(compiler,S,**kwargs) + return + +def gs_compile_all(compiler,srcdir,srcexts,**kwargs): + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + srcfils = flist(srcdir,exts=srcexts,recurse=recurse) + + for S in srcfils: + gs_compile(compiler,S,**kwargs) + + return + +def gs_link_all(linker,srcpath,target,**kwargs): + + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + + objfils = flist(srcpath,exts=objext,recurse=recurse) + oflst = list_to_sss(objfils) + + gs_link_list(linker,oflst,target,**kwargs) + + return + +def gs_link_list(linker,objlist,target,**kwargs): + + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + if(not('libdir' in kwargs)): + libdir = '' + else: + libdir = kwargs['libdir'] + + if(not('staticlibs' in kwargs)): + staticlibs = '' + else: + staticlibs = kwargs['staticlibs'] + + if(not('libflags' in kwargs)): + libflags = '' + else: + libflags = kwargs['libflags'] + + if(not('linkerflags' in kwargs)): + linkerflags = '' + else: + linkerflags = kwargs['linkerflags'] + + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + if(not('logfile' in kwargs)): + logfile = '' + else: + logfile = kwargs['logfile'] + + ln = linker+" -o "+target+" "+libdir + ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags + + callproc(ln,logfile=logfile) + return + +def msvc_link_list(objlist,target,**kwargs): + + linker = 'link' + + if(not('objext' in kwargs)): + objext = '.obj' + else: + objext = kwargs['objext'] + + if(not('libdir' in kwargs)): + libdir = '' + else: + libdir = kwargs['libdir'] + + if(not('staticlibs' in kwargs)): + staticlibs = '' + else: + staticlibs = kwargs['staticlibs'] + + if(not('libflags' in kwargs)): + libflags = '' + else: + libflags = kwargs['libflags'] + + if(not('linkerflags' in kwargs)): + linkerflags = '' + else: + linkerflags = kwargs['linkerflags'] + + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + if(not('logfile' in kwargs)): + logfile = '' + else: + logfile = kwargs['logfile'] + + ln = linker+" "+libdir + ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags + ln = ln+" /out:"+target+" "+libflags + + callproc(ln,logfile=logfile) + + return + +def ar_all(srcpath,arname,**kwargs): + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + objlist = flist(srcpath,exts=objext,recurse=recurse) + ar_list(objlist,arname,**kwargs) + + return + +def msvc_lib_list(objlist,arname,**kwargs): + objlist2 = list_to_sss(objlist) + + ln = "lib "+objlist2+" /out:"+arname + callproc(ln) + + return + +def ar_list(objlist,arname,**kwargs): + objlist2 = list_to_sss(objlist) + + ln = "ar cr "+ arname+" "+objlist2 + callproc(ln) + + return + +def ar_add_list(objlist,arname,**kwargs): + objlist2 = list_to_sss(objlist) + + ln = "ar t "+arname+" "+objlist2 + callproc(ln) + return + +############################## +##Derived Compiler Functions## +############################## + +def gcc_compile(srcfile,**kwargs): + compiler = 'gcc' + kwargs['objext'] = '.o' + #srcexts = ['.c'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def gcc_compile_all(srcdir,**kwargs): + compiler = 'gcc' + kwargs['objext'] = '.o' + srcexts = ['.c'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def gcc_compile_list(srclist,**kwargs): + compiler = 'gcc' + kwargs['objext'] = '.o' + #srcexts = ['.c'] + + gs_compile_list(compiler,srclist,**kwargs) + + return + +def gpp_compile(srcfile,**kwargs): + compiler = 'g++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def gpp_compile_all(srcdir,**kwargs): + compiler = 'g++' + kwargs['objext'] = '.o' + srcexts = ['.c','.cpp'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def gpp_compile_list(srclist,**kwargs): + compiler = 'g++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile_list(compiler,srclist,**kwargs) + + return + +def gfortran_compile(srcfile,**kwargs): + compiler = 'gfortran' + kwargs['objext'] = '.o' + #srcexts = ['.f','.f90','.f77'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def gfortran_compile_all(srcdir,**kwargs): + compiler = 'gfortran' + kwargs['objext'] = '.o' + srcexts = ['.f','.f90','.f77'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def gfortran_compile_list(srclist,**kwargs): + compiler = 'gfortran' + kwargs['objext'] = '.o' + #srcexts = ['.f','.f90','.f77'] + + gs_compile_list(compiler,srclist,**kwargs) + + return + +def clang_compile(srcfile,**kwargs): + compiler = 'clang++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def clang_compile_all(srcdir,**kwargs): + compiler = 'clang++' + kwargs['objext'] = '.o' + srcexts = ['.c','.cpp'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def clang_compile_list(srclist,**kwargs): + compiler = 'clang++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile_list(compiler,srclist,**kwargs) + + return \ No newline at end of file diff --git a/compscripts/complib3.py b/compscripts/complib3.py new file mode 100644 index 0000000..8695738 --- /dev/null +++ b/compscripts/complib3.py @@ -0,0 +1,524 @@ +#!/usr/bin/python3 + +import os,sys,math +import subprocess + +##flist - list all files in a given directory pth +##optional arguments: +# recurse - (T/F): Whether to recursively search for files in directory tree +# exts - (list): A list of file extensions to filter on +# normpath (T/F): whether to normalize path variables after +#filelist = flist(pth,**kwargs): +def flist(pth,**kwargs): + flst = [] + if(not('recurse' in kwargs)): + recurse_ = False + else: + recurse_ = kwargs['recurse'] + if(not('exts' in kwargs)): + filterexts_ = False + else: + filterexts_ = True + exts = kwargs['exts'] + if(not('normpath' in kwargs)): + normpath_ = True + else: + normpath_ = kwargs['normpath'] + if(not('linuxpath' in kwargs)): + linuxpath_ = False + else: + linuxpath_ = kwargs['linuxpath'] + if(not('followlinks' in kwargs)): + followlinks_ = False + else: + followlinks_ = kwargs['followlinks'] + + dirlist = [] + rawlist = os.listdir(pth) + + for F in rawlist: + F2 = os.path.join(pth,F) + if(os.path.isdir(F2)): + b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2))) + if(b): + if((F2!=".")&(F2!="..")): + dirlist.append(F2) + elif(os.path.isfile(F2)): + flst.append(F2) + + #Recurse through directories + if(recurse_): + for D in dirlist: + lst = flist(D,**kwargs) + for L in lst: + flst.append(L) + + #Postprocess: + #Filter out all extensions except the selected ext list + if(filterexts_): + flst = filterexts(flst,exts) + + #Normalize filename path according to os + if(normpath_): + flst2 = list(flst) + for I in range(0,len(flst2)): + flst[I] = os.path.normpath(flst2[I]) + + #If linuxpath, convert all \\ to / + #if(linuxpath_): + # flst2 = list(flst) + # for I in range(0,len(flst2)): + # flst[I] = linuxpath(flst2[I]) + + return flst + +#Filters by extensions in a list of files +#flst = def filterexts(flst,exts): +def filterexts(flst,exts): + flst2 = [] + if(isinstance(exts,str)): + exts = list([exts]) + for F in flst: + b = False + for ext in exts: + if(ext[0]!='.'): + ext = '.'+ext + F2 = os.path.splitext(F) + if(len(F2)>=2): + ex = F2[1] + if(len(ex)>0): + if(ex[0]!='.'): + ex = '.'+ex + if(ex==ext): + b = True + if(b): + flst2.append(F) + + return flst2 + +#Find a file fname, starting in pth and recursing +#Used for finding library files to link +def findfile(fname,pth,**kwargs): + fullfname = "" + flst = flist(pth,recurse=True) + for F in flst: + F2 = os.path.split(F)[1] + if(F2 == fname): + fullfname = F + + return fullfname + +def replaceext(fname,ext): + fname2 = "" + if(len(ext)>0): + if(ext[0]!='.'): + ext = '.'+ext + fname2 = os.path.splitext(fname)[0]+ext + else: + fname2 = os.path.splitext(fname)[0] + return fname2 + +def replaceexts(fnamelist,ext): + fname2list = [] + for F in fnamelist: + F2 = replaceext(F,ext) + fname2list.append(F2) + return fname2list + +#filenames must match +def except_contains(lst1,exc): + lst2 = [] + for item in lst1: + b = 1 + for item2 in exc: + fsplit = os.path.split(item) + fn = fsplit[len(fsplit)-1] + if(fn==item2): + b = 0 + break + if(b==1): + lst2.append(item) + return lst2 + +########################## +##System Call Procedures## +########################## + +def callproc(cmd, **kwargs): + if(not('logfile' in kwargs)): + use_lf = False + else: + logfile = kwargs['logfile'] + if(logfile!=""): + fp = open(kwargs['logfile'],'a+') + use_lf = True + else: + use_lf = False + + if(not('echo' in kwargs)): + echo = True + else: + echo = kwargs['echo'] + + if(echo): + print(cmd) + + #encoding/deconding to/from bytes is necessary to use the subprocess command + #in python3.7 + #However, only do this in linux + if(sys.platform!='win32'): + cmd2 = cmd.encode(encoding='utf-8') + else: + cmd2 = cmd + proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) + (out, err) = proc.communicate() + + out = out.decode(encoding='utf-8') + + if(echo): + print(out) + #print(err); + if(use_lf): + fp.writelines(cmd+'\n') + fp.writelines(out+'\n') + + if(use_lf): + fp.close() + +#List to space-seperated-string +def list_to_sss(lst): + lout = "" + for I in range(0,len(lst)-1): + lout = lout + lst[I] + " " + if(len(lst)>0): + lout = lout + lst[len(lst)-1] + return lout + +##################################### +## Incremental Compilation Library ## +##################################### + +#silently read lines from a text file if exists +def readtextlines(fname): + txtlns = [] + + if(not os.path.isfile(fname)): + return txtlns + + try: + fp = open(fname,"r") + except: + return txtlns + + ln = " " + while(ln!=""): + ln = fp.readline() + txtlns.append(ln) + + fp.close() + + return txtlns + +def getincludefnfrage(includeline): + + fnfrag = "" + I1 = -1 + I2 = -1 + + for I in range(0,len(includeline)): + if(I1<0 and (includeline[I]=='<' or includeline[I]=='"')): + I1 = I + if(I1>=0 and (includeline[I]=='>' or includeline[I]=='"')): + I2 = I + break + if(I1>=0 and I2>=0): + fnfrag = includeline[I1+1:I2] + + return fnfrag + +#Returns the name of the source file fname (if it exists) +#and all included filenames +def getsrcandincludes(fname, incdirs): + + flist = [] + if(os.path.isfile(fname)): + flist.append(fname) + + Ilist = 0 + while(Ilist=0): + fnfrag = getincludefnfrage(lns[J]) + for K in range(0,len(incdirs)): + tfn = os.path.join(incdirs[K],fnfrag) + if(os.path.isfile(tfn)): + flist.append(tfn) + break + + Ilist = Ilist + 1 + + return flist + +#Returns the name of the object file associated with the source file +#within the object store folder (if it exists) +def getobjfile(fname,objstore,objext = ".o"): + + fret = "" + f1 = os.path.split(fname)[1] + f2 = f1 + while(os.path.splitext(f2)[1]!=""): + f2 = os.path.splitext(f2)[0] + objext = objext.strip('.') + f3 = os.path.join(objstore,"{}.{}".format(f2,objext)) + if(os.path.exists(f3)): + fret = f3 + + return fret + +def getsrctimes(fname, incdirs): + + ftimes = [] + flst = getsrcandincludes(fname, incdirs) + for I in range(0,len(flst)): + f = flst[I] + mt = os.path.getmtime(f) + ftimes.append(mt) + + return ftimes + +def getobjtime(fname,objstore,objext=".o"): + ret = -1 + fret = getobjfile(fname,objstore,objext) + if(fret!=""): + ret = os.path.getmtime(fret) + + return ret + +#Decide whether or not to compile source file +def decidecompile(fname,**kwargs): + ret = True + + if(not os.path.isfile(fname)): + ret = False + return ret + + ##unpack kwargs + if("searchincdirs" in kwargs): + incdirs = kwargs["searchincdirs"] + else: + incdirs = ["./include"] + + if("objext" in kwargs): + objext = kwargs["objext"] + else: + objext = ".o" + if("objstore" in kwargs): + objstore = kwargs["objstore"] + else: + objstore = "./objstore" + + + srclist = getsrcandincludes(fname,incdirs) + srctlist = getsrctimes(fname,incdirs) + obj = getobjfile(fname,objstore,objext) + objt = getobjtime(fname,objstore,objext) + + if(obj!=""): + ret = False + for I in range(0,len(srctlist)): + if(srctlist[I]>objt): + ret = True + break + + return ret + +def gs_incremental_compile(compiler,srcfile,**kwargs): + + if(not('include' in kwargs)): + include = '' + else: + include = kwargs['include'] + if(isinstance(include,list)): + include = list_to_sss(include) + if(not('flags' in kwargs)): + flags = '' + else: + flags = kwargs['flags'] + if(isinstance(flags,list)): + flags = list_to_sss(flags) + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + if(not('srcfileflag' in kwargs)): + srcfileflag = '-c' + else: + srcfileflag = kwargs['srcfileflag'] + if(not('outfileflag' in kwargs)): + outfileflag = '-o' + else: + outfileflag = kwargs['outfileflag'] + + if(not('logfile' in kwargs)): + logfile = "" + else: + logfile = kwargs['logfile'] + if(not('smartcompile' in kwargs)): + _smartcompile = True + else: + _smartcompile = kwargs['smartcompile'] + + #incrementalcompile + if("searchincdirs" in kwargs): + incdirs = kwargs["searchincdirs"] + else: + incdirs = ["./include"] + + if("objext" in kwargs): + objext = kwargs["objext"] + else: + objext = ".o" + if("objstore" in kwargs): + objstore = kwargs["objstore"] + else: + objstore = "./objstore" + + #Do I want to make this thing this general? + + docompile = decidecompile(srcfile,**kwargs) + + if(docompile): + f1 = os.path.split(srcfile)[1] + f2 = f1 + while(os.path.splitext(f2)[1]!=""): + f2 = os.path.splitext(f2)[0] + outfile = os.path.join(objstore,"{}{}".format(f2,objext)) + + ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile + ln = ln + " " + include + + callproc(ln,echo=True,logfile=logfile) + + return + +def gs_incremental_compile_list(compiler,srclist,**kwargs): + + for s in srclist: + gs_incremental_compile(compiler,s,**kwargs) + + return + +#MSVC compiler wrapper + +def msvc_incremental_compile(compilername, srcfile, **kwargs): + + if(not('include' in kwargs)): + include = '' + else: + include = kwargs['include'] + if(isinstance(include,list)): + include = list_to_sss(include) + + if(not('flags' in kwargs)): + flags = '' + else: + flags = kwargs['flags'] + if(isinstance(flags,list)): + flags = list_to_sss(flags) + + if(not('objext' in kwargs)): + objext = '.obj' + else: + objext = kwargs['objext'] + + if(not('srcfileflag' in kwargs)): + srcfileflag = '/c' + else: + srcfileflag = kwargs['srcfileflag'] + + if(not('outfileflag' in kwargs)): + outfileflag = '/Fo:' + else: + outfileflag = kwargs['outfileflag'] + if(not('logfile' in kwargs)): + logfile = "" + else: + logfile = kwargs['logfile'] + + #incrementalcompile + if("searchincdirs" in kwargs): + incdirs = kwargs["searchincdirs"] + else: + incdirs = ["./include"] + # if("objext" in kwargs): + # objext = kwargs["objext"] + # else: + # objext = ".o" + if("objstore" in kwargs): + objstore = kwargs["objstore"] + else: + objstore = "./objstore" + + docompile = decidecompile(srcfile,**kwargs) + + if(docompile): + f1 = os.path.split(srcfile)[1] + f2 = f1 + while(os.path.splitext(f2)[1]!=""): + f2 = os.path.splitext(f2)[0] + outfile = os.path.join(objstore,"{}{}".format(f2,objext)) + + ln = compilername+" "+flags+" "+srcfileflag+" "+srcfile+" "+outfileflag+" "+outfile + ln = ln + " " + include + + callproc(ln,echo=True,logfile=logfile) + + # outfile = replaceext(srcfile,objext) + # ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile + # ln = ln + " " + include + + callproc(ln,echo=True,logfile=logfile) + + return + +def msvc_incremental_compile_list(compiler,srclist,**kwargs): + for S in srclist: + msvc_incremental_compile(compiler,S,**kwargs) + return + +####################### +## Main Script Tests ## +####################### + +def testtimes(args): + if(len(args)>=2): + flist = getsrcandincludes(args[1],["./include"]) + ftlist = getsrctimes(args[1],["./include"]) + for I in range(0,len(flist)): + print("{}\t\t{}".format(flist[I],ftlist[I])) + + print("associated obj file:") + fobj = getobjfile(args[1],"./objstore") + ftobj = getobjtime(args[1],"./objstore") + if(fobj!=""): + print("{}\t\t{}".format(fobj,ftobj)) + else: + print("none found") + + cflag = decidecompile(args[1]) + print("compile? : {}".format(cflag)) + + + return + +# if(__name__ == "__main__"): + +# args = sys.argv +# testtimes(args) + + + + \ No newline at end of file diff --git a/compscripts/linux64.makelib.py b/compscripts/linux64.makelib.py new file mode 100644 index 0000000..2c3765f --- /dev/null +++ b/compscripts/linux64.makelib.py @@ -0,0 +1,52 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * +from complib3 import gs_incremental_compile, gs_incremental_compile_list + +import shutil +#from distutils.dir_util import copy_tree as copy_tree #this version does overwrites +from shutil import copytree + +libname = 'amsculib2.linux64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../linux64/include" +commonlibdir = "../../linux64/lib" +localbindir = "./bin_linux64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +#-dc flag: relocatable device code - needed for device functions to link in different "execution units" +#--ptxas-options=-v +kwargs['flags'] = "-dc --compiler-options '-fPIC -O3'" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-l{}".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True +kwargs['objstore'] = "./objstore" +kwargs['searchincdirs'] = ['./include'] + +#find all source files, except the main project files +files = flist('./src',exts = srcexts, recurse=True) +files = except_contains(files,mainsrc) +objfiles = replaceexts(files,'.o') +objfiles_sss = list_to_sss(objfiles) + +#compile all the source files in the list +#gs_compile_list(cc,files,**kwargs) +gs_incremental_compile_list(cc,files,**kwargs) + +#archive all the source files into a static library +#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname)) +objlist = flist(kwargs['objstore'],exts='.o',recurse=True) +ar_list(objlist,'{}/lib{}.a'.format(localbindir,libname)) + +# #Push any libraries to the common lib folder +shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir) + +# #Copy include files to the common include folder +copytree('./include/',commonincdir+'/',dirs_exist_ok=True) diff --git a/compscripts/linux64.maketest.py b/compscripts/linux64.maketest.py new file mode 100644 index 0000000..88bb698 --- /dev/null +++ b/compscripts/linux64.maketest.py @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * +from complib3 import gs_incremental_compile, gs_incremental_compile_list + +import shutil + +libname = 'amsculib2.linux64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../linux64/include" +commonlibdir = "../../linux64/lib" +localbindir = "./bin_linux64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +#-dc flag: relocatable device code - needed for device functions to link in different "execution units" +kwargs['flags'] = "-dc --compiler-options '-fPIC'" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-l{} -lamsculib2.linux64".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True +kwargs['objstore'] = "./objstore" +kwargs['searchincdirs'] = ['./include'] + +#-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64 + +#Pull required binary dynamic libraries to the bin folder +#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir); +#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir); +#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64'); + +#Designate source files for main test program +fsrc = ['./src/main.cu'] +fobj = replaceexts(fsrc,'.o') + +#Compile test programs +gs_compile_list(cc,fsrc,**kwargs) +gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs) diff --git a/compscripts/old/complib2.py b/compscripts/old/complib2.py new file mode 100644 index 0000000..ca0ba30 --- /dev/null +++ b/compscripts/old/complib2.py @@ -0,0 +1,639 @@ +#!/usr/bin/python3 + +#Python3 compilation library +#Aaron M. Schinder +#29 Dec 2020 +# +#Cleanup and refactor from 2017 python2 version compilation libraries + +import os,sys,math,subprocess + +##################### +#Directory Functions# +##################### + +##flist - list all files in a given directory pth +##optional arguments: +# recurse - (T/F): Whether to recursively search for files in directory tree +# exts - (list): A list of file extensions to filter on +# normpath (T/F): whether to normalize path variables after +#filelist = flist(pth,**kwargs): +def flist(pth,**kwargs): + flst = [] + if(not('recurse' in kwargs)): + recurse_ = False + else: + recurse_ = kwargs['recurse'] + if(not('exts' in kwargs)): + filterexts_ = False + else: + filterexts_ = True + exts = kwargs['exts'] + if(not('normpath' in kwargs)): + normpath_ = True + else: + normpath_ = kwargs['normpath'] + if(not('linuxpath' in kwargs)): + linuxpath_ = False + else: + linuxpath_ = kwargs['linuxpath'] + if(not('followlinks' in kwargs)): + followlinks_ = False + else: + followlinks_ = kwargs['followlinks'] + + dirlist = [] + rawlist = os.listdir(pth) + + for F in rawlist: + F2 = os.path.join(pth,F) + if(os.path.isdir(F2)): + b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2))) + if(b): + if((F2!=".")&(F2!="..")): + dirlist.append(F2) + elif(os.path.isfile(F2)): + flst.append(F2) + + #Recurse through directories + if(recurse_): + for D in dirlist: + lst = flist(D,**kwargs) + for L in lst: + flst.append(L) + + #Postprocess: + #Filter out all extensions except the selected ext list + if(filterexts_): + flst = filterexts(flst,exts) + + #Normalize filename path according to os + if(normpath_): + flst2 = list(flst) + for I in range(0,len(flst2)): + flst[I] = os.path.normpath(flst2[I]) + + #If linuxpath, convert all \\ to / + #if(linuxpath_): + # flst2 = list(flst) + # for I in range(0,len(flst2)): + # flst[I] = linuxpath(flst2[I]) + + return flst + +#Filters by extensions in a list of files +#flst = def filterexts(flst,exts): +def filterexts(flst,exts): + flst2 = [] + if(isinstance(exts,str)): + exts = list([exts]) + for F in flst: + b = False + for ext in exts: + if(ext[0]!='.'): + ext = '.'+ext + F2 = os.path.splitext(F) + if(len(F2)>=2): + ex = F2[1] + if(len(ex)>0): + if(ex[0]!='.'): + ex = '.'+ex + if(ex==ext): + b = True + if(b): + flst2.append(F) + + return flst2 + +#Find a file fname, starting in pth and recursing +#Used for finding library files to link +def findfile(fname,pth,**kwargs): + fullfname = "" + flst = flist(pth,recurse=True) + for F in flst: + F2 = os.path.split(F)[1] + if(F2 == fname): + fullfname = F + + return fullfname + +#List to space-seperated-string +def list_to_sss(lst): + lout = "" + for I in range(0,len(lst)-1): + lout = lout + lst[I] + " " + if(len(lst)>0): + lout = lout + lst[len(lst)-1] + return lout + +def strip_whitespace(strin): + strout = "" + I1 = -1 + I2 = -1 + for I in range(0,len(strin)): + if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'): + I1 = I + break + q = list(range(0,len(strin))) + q.reverse() + for I in q: + if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'): + I2 = I+1 + break + if(I1>=0 and I2>=0): + strout = strin[I1:I2] + return strout + +def sss_to_list(sss): + lout = [] + l1 = sss.split(' ') + for l in l1: + l2 = strip_whitespace(l) + lout.append(l2) + return lout + + +def replaceext(fname,ext): + fname2 = "" + if(len(ext)>0): + if(ext[0]!='.'): + ext = '.'+ext + fname2 = os.path.splitext(fname)[0]+ext + else: + fname2 = os.path.splitext(fname)[0] + return fname2 + +def replaceexts(fnamelist,ext): + fname2list = [] + for F in fnamelist: + F2 = replaceext(F,ext) + fname2list.append(F2) + return fname2list + +# def except_contains_oldv(lst1,exc): +# lst2 = [] +# for item in lst1: +# b = 1 +# for item2 in exc: +# if(item.find(item2)>=0): +# b = 0 +# break +# if(b==1): +# lst2.append(item) +# return lst2 + +#filenames must match +def except_contains(lst1,exc): + lst2 = [] + for item in lst1: + b = 1 + for item2 in exc: + fsplit = os.path.split(item) + fn = fsplit[len(fsplit)-1] + if(fn==item2): + b = 0 + break + if(b==1): + lst2.append(item) + return lst2 + +########################## +##System Call Procedures## +########################## + +def callproc(cmd, **kwargs): + if(not('logfile' in kwargs)): + use_lf = False + else: + logfile = kwargs['logfile'] + if(logfile!=""): + fp = open(kwargs['logfile'],'a+') + use_lf = True + else: + use_lf = False + + if(not('echo' in kwargs)): + echo = True + else: + echo = kwargs['echo'] + + if(echo): + print(cmd) + + #encoding/deconding to/from bytes is necessary to use the subprocess command + #in python3.7 + #However, only do this in linux + if(sys.platform!='win32'): + cmd2 = cmd.encode(encoding='utf-8') + else: + cmd2 = cmd + proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True) + (out, err) = proc.communicate() + + out = out.decode(encoding='utf-8') + + if(echo): + print(out) + #print(err); + if(use_lf): + fp.writelines(cmd+'\n') + fp.writelines(out+'\n') + + if(use_lf): + fp.close() + +####################################### +##Compiler, Archive, and Linker Calls## +####################################### + +def smartcompile(srcfile,objext='.o'): + mtsrc = os.path.getmtime(srcfile) + objfile = replaceext(srcfile,objext) + objexists = os.path.exists(objfile) + ret = True + if(objexists): + mtobj = os.path.getmtime(objfile) + if(mtobj>=mtsrc): + ret = False + + return ret + +#MSVC compiler wrapper +def msvc_compile(compilername, srcfile, **kwargs): + + if(not('include' in kwargs)): + include = '' + else: + include = kwargs['include'] + if(isinstance(include,list)): + include = list_to_sss(include) + + if(not('flags' in kwargs)): + flags = '' + else: + flags = kwargs['flags'] + if(isinstance(flags,list)): + flags = list_to_sss(flags) + + if(not('objext' in kwargs)): + objext = '.obj' + else: + objext = kwargs['objext'] + + if(not('srcfileflag' in kwargs)): + srcfileflag = '/c' + else: + srcfileflag = kwargs['srcfileflag'] + + if(not('outfileflag' in kwargs)): + outfileflag = '/Fo:' + else: + outfileflag = kwargs['outfileflag'] + if(not('logfile' in kwargs)): + logfile = "" + else: + logfile = kwargs['logfile'] + + outfile = replaceext(srcfile,objext) + ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile + ln = ln + " " + include + + callproc(ln,echo=True,logfile=logfile) + + return + +#MSVC compiler wrapper +def msvc_compile_list(compiler,srclist,**kwargs): + for S in srclist: + msvc_compile(compiler,S,**kwargs) + return + +#gnu-style compiler compile: Should work with gcc, g++, gfortran +def gs_compile(compiler,srcfile,**kwargs): + if(not('include' in kwargs)): + include = '' + else: + include = kwargs['include'] + if(isinstance(include,list)): + include = list_to_sss(include) + + if(not('flags' in kwargs)): + flags = '' + else: + flags = kwargs['flags'] + if(isinstance(flags,list)): + flags = list_to_sss(flags) + + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + if(not('srcfileflag' in kwargs)): + srcfileflag = '-c' + else: + srcfileflag = kwargs['srcfileflag'] + + if(not('outfileflag' in kwargs)): + outfileflag = '-o' + else: + outfileflag = kwargs['outfileflag'] + + if(not('logfile' in kwargs)): + logfile = "" + else: + logfile = kwargs['logfile'] + + if(not('smartcompile' in kwargs)): + _smartcompile = True + else: + _smartcompile = kwargs['smartcompile'] + + #Do I want to make this thing this general? + + if(not(_smartcompile) or smartcompile(srcfile,objext)): + outfile = replaceext(srcfile,objext) + ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile + ln = ln + " " + include + + callproc(ln,echo=True,logfile=logfile) + + return + +def gs_compile_list(compiler,srclist,**kwargs): + for S in srclist: + gs_compile(compiler,S,**kwargs) + return + +def gs_compile_all(compiler,srcdir,srcexts,**kwargs): + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + srcfils = flist(srcdir,exts=srcexts,recurse=recurse) + + for S in srcfils: + gs_compile(compiler,S,**kwargs) + + return + +def gs_link_all(linker,srcpath,target,**kwargs): + + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + + objfils = flist(srcpath,exts=objext,recurse=recurse) + oflst = list_to_sss(objfils) + + gs_link_list(linker,oflst,target,**kwargs) + + return + +def gs_link_list(linker,objlist,target,**kwargs): + + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + if(not('libdir' in kwargs)): + libdir = '' + else: + libdir = kwargs['libdir'] + + if(not('staticlibs' in kwargs)): + staticlibs = '' + else: + staticlibs = kwargs['staticlibs'] + + if(not('libflags' in kwargs)): + libflags = '' + else: + libflags = kwargs['libflags'] + + if(not('linkerflags' in kwargs)): + linkerflags = '' + else: + linkerflags = kwargs['linkerflags'] + + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + if(not('logfile' in kwargs)): + logfile = '' + else: + logfile = kwargs['logfile'] + + ln = linker+" -o "+target+" "+libdir + ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags + + callproc(ln,logfile=logfile) + return + +def msvc_link_list(objlist,target,**kwargs): + + linker = 'link' + + if(not('objext' in kwargs)): + objext = '.obj' + else: + objext = kwargs['objext'] + + if(not('libdir' in kwargs)): + libdir = '' + else: + libdir = kwargs['libdir'] + + if(not('staticlibs' in kwargs)): + staticlibs = '' + else: + staticlibs = kwargs['staticlibs'] + + if(not('libflags' in kwargs)): + libflags = '' + else: + libflags = kwargs['libflags'] + + if(not('linkerflags' in kwargs)): + linkerflags = '' + else: + linkerflags = kwargs['linkerflags'] + + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + + if(not('logfile' in kwargs)): + logfile = '' + else: + logfile = kwargs['logfile'] + + ln = linker+" "+libdir + ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags + ln = ln+" /out:"+target+" "+libflags + + callproc(ln,logfile=logfile) + + return + +def ar_all(srcpath,arname,**kwargs): + if(not('recurse' in kwargs)): + recurse = True + else: + recurse = kwargs['recurse'] + if(not('objext' in kwargs)): + objext = '.o' + else: + objext = kwargs['objext'] + + objlist = flist(srcpath,exts=objext,recurse=recurse) + ar_list(objlist,arname,**kwargs) + + return + +def msvc_lib_list(objlist,arname,**kwargs): + objlist2 = list_to_sss(objlist) + + ln = "lib "+objlist2+" /out:"+arname + callproc(ln) + + return + +def ar_list(objlist,arname,**kwargs): + objlist2 = list_to_sss(objlist) + + ln = "ar cr "+ arname+" "+objlist2 + callproc(ln) + + return + +def ar_add_list(objlist,arname,**kwargs): + objlist2 = list_to_sss(objlist) + + ln = "ar t "+arname+" "+objlist2 + callproc(ln) + return + +############################## +##Derived Compiler Functions## +############################## + +def gcc_compile(srcfile,**kwargs): + compiler = 'gcc' + kwargs['objext'] = '.o' + #srcexts = ['.c'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def gcc_compile_all(srcdir,**kwargs): + compiler = 'gcc' + kwargs['objext'] = '.o' + srcexts = ['.c'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def gcc_compile_list(srclist,**kwargs): + compiler = 'gcc' + kwargs['objext'] = '.o' + #srcexts = ['.c'] + + gs_compile_list(compiler,srclist,**kwargs) + + return + +def gpp_compile(srcfile,**kwargs): + compiler = 'g++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def gpp_compile_all(srcdir,**kwargs): + compiler = 'g++' + kwargs['objext'] = '.o' + srcexts = ['.c','.cpp'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def gpp_compile_list(srclist,**kwargs): + compiler = 'g++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile_list(compiler,srclist,**kwargs) + + return + +def gfortran_compile(srcfile,**kwargs): + compiler = 'gfortran' + kwargs['objext'] = '.o' + #srcexts = ['.f','.f90','.f77'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def gfortran_compile_all(srcdir,**kwargs): + compiler = 'gfortran' + kwargs['objext'] = '.o' + srcexts = ['.f','.f90','.f77'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def gfortran_compile_list(srclist,**kwargs): + compiler = 'gfortran' + kwargs['objext'] = '.o' + #srcexts = ['.f','.f90','.f77'] + + gs_compile_list(compiler,srclist,**kwargs) + + return + +def clang_compile(srcfile,**kwargs): + compiler = 'clang++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile(compiler,srcfile,**kwargs) + + return + +def clang_compile_all(srcdir,**kwargs): + compiler = 'clang++' + kwargs['objext'] = '.o' + srcexts = ['.c','.cpp'] + + gs_compile_all(compiler,srcdir,srcexts,**kwargs) + + return + +def clang_compile_list(srclist,**kwargs): + compiler = 'clang++' + kwargs['objext'] = '.o' + #srcexts = ['.c','.cpp'] + + gs_compile_list(compiler,srclist,**kwargs) + + return \ No newline at end of file diff --git a/compscripts/old/linux64.makelib.py b/compscripts/old/linux64.makelib.py new file mode 100644 index 0000000..73dc20d --- /dev/null +++ b/compscripts/old/linux64.makelib.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * + +import shutil +#from distutils.dir_util import copy_tree as copy_tree #this version does overwrites +from shutil import copytree as copytree + +libname = 'amsculib2.linux64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../linux64/include" +commonlibdir = "../../linux64/lib" +localbindir = "./bin_linux64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +#-dc flag: relocatable device code - needed for device functions to link in different "execution units" +kwargs['flags'] = "-dc" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-l{}".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True + +#find all source files, except the main project files +files = flist('./src',exts = srcexts, recurse=True) +files = except_contains(files,mainsrc) +objfiles = replaceexts(files,'.o') +objfiles_sss = list_to_sss(objfiles) + +#compile all the source files in the list +gs_compile_list(cc,files,**kwargs) + +#archive all the source files into a static library +ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname)) + +#Push any libraries to the common lib folder +shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir) + +#Copy include files to the common include folder +copytree('./include/',commonincdir+'/',dirs_exist_ok=True) diff --git a/compscripts/old/linux64.maketest.py b/compscripts/old/linux64.maketest.py new file mode 100644 index 0000000..8364642 --- /dev/null +++ b/compscripts/old/linux64.maketest.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * + +import shutil + +libname = 'amsculib2.linux64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../linux64/include" +commonlibdir = "../../linux64/lib" +localbindir = "./bin_linux64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +#-dc flag: relocatable device code - needed for device functions to link in different "execution units" +kwargs['flags'] = "-dc" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-l{}".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True + +#Pull required binary dynamic libraries to the bin folder +#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir); +#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir); +#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64'); + +#Designate source files for main test program +fsrc = ['./src/main.cu'] +fobj = replaceexts(fsrc,'.o') + +#Compile test programs +gs_compile_list(cc,fsrc,**kwargs) +gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs) diff --git a/compscripts/old/msvc.makelib.py b/compscripts/old/msvc.makelib.py new file mode 100644 index 0000000..3567025 --- /dev/null +++ b/compscripts/old/msvc.makelib.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * + +import shutil +from shutil import copytree as copytree + +libname = 'assetcuda.msvc64' #prefix static library name to generate +targetname = 'main' #create this executable when compiling tests +commonincdir = "../../winx64/include" +commonlibdir = "../../winx64/lib" +localbindir = "./bin_winx64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp'] +mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +kwargs['flags'] = "/O2" +kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-l{}".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True + +#find all source files, except the main project files +files = flist('./src',exts = srcexts, recurse=True) +files = except_contains(files,mainsrc) +objfiles = replaceexts(files,'.obj') +objfiles_sss = list_to_sss(objfiles) + +#compile all the source files in the list +msvc_compile_list(cc,files,**kwargs) +#gs_compile_list(cc,files,**kwargs) + +#archive all the source files into a static library +#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname)) +msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname)) + +#Push any libraries to the common lib folder +shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir) + +#Copy include files to the common include folder +copytree('./include/',commonincdir+'/',dirs_exist_ok=True) diff --git a/compscripts/old/msvc.maketest.py b/compscripts/old/msvc.maketest.py new file mode 100644 index 0000000..048e038 --- /dev/null +++ b/compscripts/old/msvc.maketest.py @@ -0,0 +1,39 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * + +import shutil +from distutils.dir_util import copy_tree as copy_tree #this version does overwrites + +libname = 'assetcuda.msvc64' #prefix static library name to generate +targetname = 'tests.exe' #create this executable when compiling tests +commonincdir = "../../winx64/include" +commonlibdir = "../../winx64/lib" +localbindir = "./bin_winx64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp'] +mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +kwargs['flags'] = "/O2" +kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir) +#kwargs['libflags'] = "lib{}.lib libamsearthtools.msvc64.lib libamsmeshtools.msvc64.lib libamsmathlib3.msvc64.lib libamsmatrix_cpp.msvc64.lib liblapack.a libblas.a libamsstring3.msvc64.lib libamsmathutil2.msvc64.lib".format(libname) +kwargs['libflags'] = "lib{}.lib".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True + +#Pull required binary dynamic libraries to the bin folder +#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir); +#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir); +#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64'); + +#Designate source files for main test program +fsrc = ['./src/main.cu'] +fobj = replaceexts(fsrc,'.obj') + +#Compile test programs +msvc_compile_list(cc,fsrc,**kwargs) +msvc_link_list(list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs) diff --git a/compscripts/old/winnvcc.makelib.py b/compscripts/old/winnvcc.makelib.py new file mode 100644 index 0000000..062a347 --- /dev/null +++ b/compscripts/old/winnvcc.makelib.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * + +import shutil +from distutils.dir_util import copy_tree as copy_tree #this version does overwrites + +libname = 'amsculib2.msvc64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../winx64/include" +commonlibdir = "../../winx64/lib" +localbindir = "./bin_winx64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +kwargs['flags'] = "-dc" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-l{}".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True + +#find all source files, except the main project files +files = flist('./src',exts = srcexts, recurse=True) +files = except_contains(files,mainsrc) +objfiles = replaceexts(files,'.o') +objfiles_sss = list_to_sss(objfiles) + +#compile all the source files in the list +gs_compile_list(cc,files,**kwargs) + +#archive all the source files into a static library +#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname)) +msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname)) + +#Push any libraries to the common lib folder +shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir) + +#Copy include files to the common include folder +copy_tree('./include/',commonincdir+'/') diff --git a/compscripts/old/winnvcc.maketest.py b/compscripts/old/winnvcc.maketest.py new file mode 100644 index 0000000..8f1f2f7 --- /dev/null +++ b/compscripts/old/winnvcc.maketest.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * + +import shutil +from distutils.dir_util import copy_tree as copy_tree #this version does overwrites + +libname = 'amsculib2.msvc64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../winx64/include" +commonlibdir = "../../winx64/lib" +localbindir = "./bin_winx64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +kwargs['flags'] = "-dc" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-llib{}".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True + +#Pull required binary dynamic libraries to the bin folder +#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir); +#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir); +#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64'); + +#Designate source files for main test program +fsrc = ['./src/main.cpp'] +fobj = replaceexts(fsrc,'.o') + +#Compile test programs +gs_compile_list(cc,fsrc,**kwargs) +gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs) diff --git a/compscripts/winnvcc.makelib.py b/compscripts/winnvcc.makelib.py new file mode 100644 index 0000000..3a50b2d --- /dev/null +++ b/compscripts/winnvcc.makelib.py @@ -0,0 +1,49 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * +from complib3 import gs_incremental_compile, gs_incremental_compile_list + +import shutil +from shutil import copytree + +libname = 'amsculib2.msvc64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../winx64/include" +commonlibdir = "../../winx64/lib" +localbindir = "./bin_winx64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +kwargs['flags'] = "-dc" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-l{}".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True +kwargs['objstore'] = "./objstore" +kwargs['searchincdirs'] = ['./include'] + +#find all source files, except the main project files +files = flist('./src',exts = srcexts, recurse=True) +files = except_contains(files,mainsrc) +objfiles = replaceexts(files,'.o') +objfiles_sss = list_to_sss(objfiles) + +#compile all the source files in the list +#gs_compile_list(cc,files,**kwargs) +gs_incremental_compile_list(cc,files,**kwargs) + +#archive all the source files into a static library +#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname)) +objlist = flist(kwargs['objstore'],exts='.o',recurse=True) +msvc_lib_list(objlist,'{}/lib{}.lib'.format(localbindir,libname)) + +# #Push any libraries to the common lib folder +shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir) + +# #Copy include files to the common include folder +copytree('./include/',commonincdir+'/',dirs_exist_ok=True) diff --git a/compscripts/winnvcc.maketest.py b/compscripts/winnvcc.maketest.py new file mode 100644 index 0000000..438b365 --- /dev/null +++ b/compscripts/winnvcc.maketest.py @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +import os,sys,subprocess,math +from complib2 import * +from complib3 import gs_incremental_compile, gs_incremental_compile_list + +import shutil +from shutil import copytree + +libname = 'amsculib2.msvc64' #prefix static library name to generate +targetname = 'test' #create this executable when compiling tests +commonincdir = "../../winx64/include" +commonlibdir = "../../winx64/lib" +localbindir = "./bin_winx64" +cc = 'nvcc' #compiler +srcexts = ['.c','.cpp','.cu'] +mainsrc = ['main.cu'] #ignore these files when compiling the static library + +kwargs = dict() +include = "-I./include -I{}".format(commonincdir) +kwargs['include'] = include +kwargs['flags'] = "-dc" +kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir) +kwargs['libflags'] = "-llib{} -llibamsculib2.msvc64".format(libname) +kwargs['linkerflags'] = "" +kwargs['recurse'] = True +kwargs['objstore'] = "./objstore" +kwargs['searchincdirs'] = ['./include'] + +#-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64 + +#Pull required binary dynamic libraries to the bin folder +#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir); +#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir); +#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64'); + +#Designate source files for main test program +fsrc = ['./src/main.cu'] +fobj = replaceexts(fsrc,'.o') + +#Compile test programs +gs_compile_list(cc,fsrc,**kwargs) +gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs) diff --git a/include/amsculib2/amscu_comp128.hpp b/include/amsculib2/amscu_comp128.hpp new file mode 100644 index 0000000..076c0e4 --- /dev/null +++ b/include/amsculib2/amscu_comp128.hpp @@ -0,0 +1,89 @@ +#ifndef __AMSCU_COMP128_HPP__ +#define __AMSCU_COMP128_HPP__ + +namespace amscuda +{ +namespace cmp +{ + + class cucomp128 + { + public: + double real; + double imag; + + __host__ __device__ cucomp128(); + __host__ __device__ ~cucomp128(); + __host__ __device__ cucomp128(const cucomp128 &other); + __host__ __device__ cucomp128(const double &other); + + __host__ __device__ cucomp128& operator=(cucomp128& other); + __host__ __device__ const cucomp128& operator=(const cucomp128& other); + __host__ __device__ cucomp128& operator=(double& other); + __host__ __device__ const cucomp128& operator=(const double& other); + + __host__ __device__ double& operator[](int& ind); + __host__ __device__ const double& operator[](const int& ind) const; + + __host__ __device__ cucomp128 operator+(const cucomp128& z); + __host__ __device__ cucomp128 operator-(const cucomp128& z); + __host__ __device__ cucomp128 operator*(const cucomp128& z); + __host__ __device__ cucomp128 operator/(const cucomp128& z); + + __host__ __device__ cucomp128 operator+(const double& z); + __host__ __device__ cucomp128 operator-(const double& z); + __host__ __device__ cucomp128 operator*(const double& z); + __host__ __device__ cucomp128 operator/(const double& z); + + __host__ __device__ friend cucomp128 operator-(const cucomp128& z); //negation sign + + //comparison operators + __host__ __device__ bool operator==(const cucomp128& z) const; + __host__ __device__ bool operator!=(const cucomp128& z) const; + __host__ __device__ bool operator>(const cucomp128& z) const; + __host__ __device__ bool operator<(const cucomp128& z) const; + __host__ __device__ bool operator>=(const cucomp128& z) const; + __host__ __device__ bool operator<=(const cucomp128& z) const; + + __host__ __device__ bool isnan() const; + __host__ __device__ bool isinf() const; + + __host__ __device__ bool isreal() const; + __host__ __device__ bool isimag() const; + __host__ __device__ bool iszero() const; + __host__ __device__ double arg() const; + __host__ __device__ double mag() const; + __host__ __device__ cucomp128 conj() const; + }; + + __host__ __device__ double arg(cucomp128 z); + + __host__ __device__ cucomp128 dtocomp(double _r, double _i); + __host__ __device__ double real(cucomp128 z); + __host__ __device__ double imag(cucomp128 z); + __host__ __device__ cucomp128 sin(cucomp128 z); + __host__ __device__ cucomp128 cos(cucomp128 z); + __host__ __device__ cucomp128 tan(cucomp128 z); + __host__ __device__ cucomp128 exp(cucomp128 z); + __host__ __device__ cucomp128 log(cucomp128 z); + __host__ __device__ double abs(cucomp128 z); + __host__ __device__ cucomp128 conj(cucomp128 z); + + // //need hyperbolic trig Functions + __host__ __device__ cucomp128 cosh(cucomp128 z); + __host__ __device__ cucomp128 sinh(cucomp128 z); + __host__ __device__ cucomp128 tanh(cucomp128 z); + + __host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2); + + // //returns "complex sign" of complex number - 0, or a unit number with same argument + __host__ __device__ cucomp128 csgn(cucomp128 z); + +void test_cucomp128_1(); + + +}; //end namespace cmp +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib2/amscu_comp64.hpp b/include/amsculib2/amscu_comp64.hpp new file mode 100644 index 0000000..bc4430b --- /dev/null +++ b/include/amsculib2/amscu_comp64.hpp @@ -0,0 +1,88 @@ +#ifndef __AMSCU_COMP64_HPP__ +#define __AMSCU_COMP64_HPP__ + +namespace amscuda +{ +namespace cmp +{ + + class cucomp64 + { + public: + float real; + float imag; + + __host__ __device__ cucomp64(); + __host__ __device__ ~cucomp64(); + __host__ __device__ cucomp64(const cucomp64 &other); + __host__ __device__ cucomp64(const float &other); + + __host__ __device__ cucomp64& operator=(cucomp64& other); + __host__ __device__ const cucomp64& operator=(const cucomp64& other); + __host__ __device__ cucomp64& operator=(float& other); + __host__ __device__ const cucomp64& operator=(const float& other); + + __host__ __device__ float& operator[](int& ind); + __host__ __device__ const float& operator[](const int& ind) const; + + __host__ __device__ cucomp64 operator+(const cucomp64& z); + __host__ __device__ cucomp64 operator-(const cucomp64& z); + __host__ __device__ cucomp64 operator*(const cucomp64& z); + __host__ __device__ cucomp64 operator/(const cucomp64& z); + + __host__ __device__ cucomp64 operator+(const float& z); + __host__ __device__ cucomp64 operator-(const float& z); + __host__ __device__ cucomp64 operator*(const float& z); + __host__ __device__ cucomp64 operator/(const float& z); + + __host__ __device__ friend cucomp64 operator-(const cucomp64& z); //negation sign + + //comparison operators + __host__ __device__ bool operator==(const cucomp64& z) const; + __host__ __device__ bool operator!=(const cucomp64& z) const; + __host__ __device__ bool operator>(const cucomp64& z) const; + __host__ __device__ bool operator<(const cucomp64& z) const; + __host__ __device__ bool operator>=(const cucomp64& z) const; + __host__ __device__ bool operator<=(const cucomp64& z) const; + + __host__ __device__ bool isnan() const; + __host__ __device__ bool isinf() const; + + __host__ __device__ bool isreal() const; + __host__ __device__ bool isimag() const; + __host__ __device__ bool iszero() const; + __host__ __device__ float arg() const; + __host__ __device__ float mag() const; + __host__ __device__ cucomp64 conj() const; + }; + + __host__ __device__ float arg(cucomp64 z); + + __host__ __device__ cucomp64 dtocomp64(float _r, float _i); + __host__ __device__ float real(cucomp64 z); + __host__ __device__ float imag(cucomp64 z); + __host__ __device__ cucomp64 sin(cucomp64 z); + __host__ __device__ cucomp64 cos(cucomp64 z); + __host__ __device__ cucomp64 tan(cucomp64 z); + __host__ __device__ cucomp64 exp(cucomp64 z); + __host__ __device__ cucomp64 log(cucomp64 z); + __host__ __device__ float abs(cucomp64 z); + __host__ __device__ cucomp64 conj(cucomp64 z); + + // //need hyperbolic trig Functions + __host__ __device__ cucomp64 cosh(cucomp64 z); + __host__ __device__ cucomp64 sinh(cucomp64 z); + __host__ __device__ cucomp64 tanh(cucomp64 z); + + __host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2); + + // //returns "complex sign" of complex number - 0, or a unit number with same argument + __host__ __device__ cucomp64 csgn(cucomp64 z); + +void test_cucomp64_1(); + +}; //end namespace cmp +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib2/amscu_cudafunctions.hpp b/include/amsculib2/amscu_cudafunctions.hpp new file mode 100644 index 0000000..f3aa0da --- /dev/null +++ b/include/amsculib2/amscu_cudafunctions.hpp @@ -0,0 +1,40 @@ +#ifndef __AMSCU_CUDAFUNCTIONS_HPP__ +#define __AMSCU_CUDAFUNCTIONS_HPP__ + + +namespace amscuda +{ + // device memory operations + // I'm trying to avoid some of the boilerplate mental overhead involved + // in calling cuda functions and handling errors + + //frees devbuffer if it is not already NULL, and sets devbuffer to NULL + //wrapper to cudaFree + template int cuda_free(T **devptr); + + //copies hostbuffer to devbuffer + //initializes devbuffer from NULL if devbuffer is NULL + //if overwrite is true, deletes and reallocates devbuffer on device (for resizing) + template int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite); + + //copies info from devbuffer to hostbuffer + //initialzies hostbuffer from NULL if NULL + //if overwrite is true, deletes and reallocates hostbuffer on host with new[] (for resizing) + template int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite); + + //wrapper for cudaMemcpy - copies an item or struct (count 1) to the device + //initializes devptr from NULL if not already initialized + template int cuda_copytodevice(T *hostptr, T **devptr); + + //wrapper for cudaMemcpy - copies an item or struct (count 1) from device + //initializes hostptr from NULL with new if not already initialized + template int cuda_copyfromdevice(T *devptr, T **hostptr); + + int cuda_errortrap(const char *msgheader); + +}; + +#include + +#endif + diff --git a/include/amsculib2/amscu_cudafunctions_impl.hpp b/include/amsculib2/amscu_cudafunctions_impl.hpp new file mode 100644 index 0000000..34b6382 --- /dev/null +++ b/include/amsculib2/amscu_cudafunctions_impl.hpp @@ -0,0 +1,228 @@ +#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__ +#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__ + +namespace amscuda +{ + +//frees devbuffer if it is not already NULL, and sets devbuffer to NULL +//wrapper to cudaFree +template int cuda_free(T **devptr) +{ + int ret = 0; + cudaError_t err = cudaSuccess; + + if(*devptr==NULL) + { + return ret; //devbuffer is already NULL/freed + } + + err = cudaFree(*devptr); + if(err!=cudaSuccess) + { + ret = -1; //failed to free device pointer + *devptr = NULL; // - ? should only happen if I'm trying to double-free something + } + else + { + ret = 1; + *devptr = NULL; + } + + return ret; +} + +//copies hostbuffer to devbuffer +//initializes devbuffer from NULL if devbuffer is NULL +//if overwrite is true, deletes and reallocates devbuffer on device (for resizing) +template int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite) +{ + int ret = 0; + cudaError_t err = cudaSuccess; + + if(N<=0) + { + ret = 0; + return ret; + } + + if(hostbuffer==NULL) + { + ret = -2; //host buffer is NULL + return ret; + } + + if(overwrite==1) + { + if(*devbuffer !=NULL) + { + cuda_free(devbuffer); + } + } + + if(*devbuffer==NULL) + { + err = cudaMalloc(devbuffer,sizeof(T)*N); + if(err!=cudaSuccess) + { + ret = -3; //failed to allocate + *devbuffer = NULL; + return ret; + } + } + + err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice); + if(err!=cudaSuccess) + { + ret = -4; //failed to copy + } + else + { + ret = 1; + } + + + return ret; +} + +//copies info from devbuffer to hostbuffer +//initialzies hostbuffer from NULL if NULL +//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing) +template int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite) +{ + int ret = 0; + cudaError_t err = cudaSuccess; + + if(N<=0) + { + ret = 0; + return ret; + } + + if(devbuffer==NULL) + { + ret = -5; //null dev buffer + return ret; + } + + if(overwrite==1 && *hostbuffer!=NULL) + { + delete[] (*hostbuffer); hostbuffer = NULL; + } + + if(*hostbuffer==NULL) + { + *hostbuffer = new(std::nothrow) T[N]; + if(*hostbuffer==NULL) + { + ret = -6; //failed to allocate host buffer + return ret; + } + } + + err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost); + if(err!=cudaSuccess) + { + ret = -7; //failed to copy + } + else + { + ret = 1; + } + + return ret; +} + +//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device +//initializes devptr from NULL if not already initialized +template int cuda_copytodevice(T *hostptr, T **devptr) +{ + int ret = 0; + cudaError_t err = cudaSuccess; + bool overwrite = 1; + + if(hostptr==NULL) + { + ret = -2; //host buffer is NULL + return ret; + } + + if(overwrite==1) + { + if(*devptr !=NULL) + { + cuda_free(devptr); + } + } + + if(*devptr==NULL) + { + err = cudaMalloc(devptr,sizeof(T)); + if(err!=cudaSuccess) + { + ret = -3; //failed to allocate + *devptr = NULL; + return ret; + } + } + + err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice); + if(err!=cudaSuccess) + { + ret = -4; //failed to copy + } + else + { + ret = 1; + } + + + return ret; +} + +//wrapper for cudaMemcpy - copies an item or struct (count 1) from device +//initializes hostptr from NULL with new if not already initialized +template int cuda_copyfromdevice(T *devptr, T **hostptr) +{ + int ret = 0; + cudaError_t err = cudaSuccess; + bool overwrite = 1; + + if(devptr==NULL) + { + ret = -5; //null dev buffer + return ret; + } + + if(overwrite==1 && *hostptr!=NULL) + { + delete (*hostptr); hostptr = NULL; + } + + if(*hostptr==NULL) + { + *hostptr = new(std::nothrow) T; + if(*hostptr==NULL) + { + ret = -6; //failed to allocate host buffer + return ret; + } + } + + err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost); + if(err!=cudaSuccess) + { + ret = -7; //failed to copy + } + else + { + ret = 1; + } + + return ret; +} + + +}; + +#endif + diff --git a/include/amsculib2/amscu_random.hpp b/include/amsculib2/amscu_random.hpp new file mode 100644 index 0000000..5ec2fba --- /dev/null +++ b/include/amsculib2/amscu_random.hpp @@ -0,0 +1,55 @@ +#ifndef __AMSCU_RANDOM_HPP__ +#define __AMSCU_RANDOM_HPP__ + +namespace amscuda +{ + +// Random Number Gerneators + + +// faster floating point hash function used in fractal generators +__device__ __host__ float fhash1d_su(float x); + +__device__ __host__ float fhash3d_su(float x, float y, float z); + +__device__ __host__ float fhash4d_su(float x, float y, float z, float w); + + +////////////////////////////////////////////////// +// Deterministic Pseudorandom int32_t Generator // +////////////////////////////////////////////////// + +//Next seed in simple 32 bit integer deterministic psuedo-rand generator +__host__ __device__ void dpr32_nextseed(int32_t *rseed_inout); + +//Simple 32 bit integer deterministic pseudo-random generator +// *not* for cryptography +// Frequency of generated floats should be uniform [0,1) +__host__ __device__ float dpr32_randf(int32_t *rseed_inout); + +//box muller standard normal pseudorandom variable +__host__ __device__ float dpr32_randnf(int32_t *rseed_inout); + +////////////////////////////////////////////////// +// Deterministic Pseudorandom int64_t Generator // +////////////////////////////////////////////////// + +//operates without side-effects on explicit seed for threaded use +//deterministic pseudorandom number generator - takes seed and returns next seed +__host__ __device__ void dpr64_nextseed(int64_t *seedinout); + +//deterministic pseudorandom number generator - takes seed and returns next seed +//returns uniformly distributed double +__host__ __device__ double dpr64_randd(int64_t *seedinout); + +__host__ __device__ float dpr64_randf(int64_t *seedinout); + + +void test_dprg64(); +void test_dprg32(); + + +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib2/amscuarray.hpp b/include/amsculib2/amscuarray.hpp new file mode 100644 index 0000000..9049ad2 --- /dev/null +++ b/include/amsculib2/amscuarray.hpp @@ -0,0 +1,47 @@ +#ifndef __CUARRAY_HPP__ +#define __CUARRAY_HPP__ + +namespace amscuda +{ + +template class cuarray +{ +public: + int length; + T* data; + + __device__ __host__ cuarray(); + __device__ __host__ ~cuarray(); + + //Only call this on the device for thread/block local + // dynamic arrays + __device__ __host__ int resize(const int _length); + + __device__ __host__ int size() const; + __device__ __host__ T& at(const int I); + __device__ __host__ const T& at(const int I) const; + + __device__ __host__ T& operator[](const int I); + __device__ __host__ const T& operator[](const int I) const; + + + + __host__ int device_send(cuarray **dptr); + __host__ int _device_send_overwrite(cuarray **dptr); + __host__ int _device_send_copy(cuarray *dptr); + + __host__ int device_pull(cuarray *dptr); + __host__ int device_free(cuarray **dptr); + + __host__ int device_length(cuarray *dptr); + __host__ T* device_data_ptr(cuarray *dptr); + +}; + +void test_cuarray(); + +}; + +#include + +#endif \ No newline at end of file diff --git a/include/amsculib2/amscuarray_dops.hpp b/include/amsculib2/amscuarray_dops.hpp new file mode 100644 index 0000000..2d3f1fe --- /dev/null +++ b/include/amsculib2/amscuarray_dops.hpp @@ -0,0 +1,76 @@ +#ifndef __AMSCUARRAY_DOPS_HPP__ +#define __AMSCUARRAY_DOPS_HPP__ + +//Device Operations on Arrays +// + +//Device Operations on Device Buffers +// dodb + +namespace amscuda +{ + + + //sum + template T devcuarray_sum(cuarray *devptr); + + template T dbuff_sum(T *devbuffer, int N); + + + struct dbuff_statstruct + { + public: + float min; + float max; + float mean; + float stdev; + float sum; + }; + + //stats (min,max,mean,stdev) + + template void dbuff_minmax(T *devbuffer, int N, T *min, T *max); + + template dbuff_statstruct dbuff_stats(T *devbuffer, int N); // + + //sets all elements to setto + template void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads); + + //random device buffer functions + void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); // + void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); // + + + void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads); // + + //Elementwise device-buffer vector binary operation + //takes two input arrays ( , ) --> one output array + template void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads); + + //Elementwise device-buffer vector two-parameter operation + //takes one input array, and a constant paramter ( ) ---> one output array + template void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads); + + + //vector_add + template void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); + template void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); + + + // Tests // + + void test_dbuff_rand_dpr32(); + +}; + +#include + +#endif + diff --git a/include/amsculib2/amscuarray_dops_impl.hpp b/include/amsculib2/amscuarray_dops_impl.hpp new file mode 100644 index 0000000..2cd7e88 --- /dev/null +++ b/include/amsculib2/amscuarray_dops_impl.hpp @@ -0,0 +1,404 @@ +#ifndef __AMSCUARRAY_DOPS_IMPL_HPP__ +#define __AMSCUARRAY_DOPS_IMPL_HPP__ + +namespace amscuda +{ + +template __global__ void dbuff_sum_kf(T *devbuffer, int N, T *rets) +{ + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int I; + + T ret = (T) 0; + for(I=I0;I T devcuarray_sum(cuarray *devptr) +{ + T ret = T(); + cudaError_t err = cudaSuccess; + + cuarray ldptr; + + cudaMemcpy(&ldptr,devptr,sizeof(cuarray),cudaMemcpyDeviceToHost); + + ret = devbuffer_sum(ldptr.data,ldptr.length); + + ldptr.data = NULL; + ldptr.length=0; + + return ret; +} + +template T dbuff_sum(T *dbuff, int N) +{ + int I; + T ret = T(); + cudaError_t err = cudaSuccess; + + int nblocks; + int nthreads; + + if(dbuff==NULL || N<=0) + { + return ret; + } + + if(N>100) + { + nblocks = 10; + nthreads = (int)sqrt((float) (N/nblocks)); + if(nthreads<=0) nthreads=1; + if(nthreads>512) nthreads=512; + } + else + { + nblocks = 1; + nthreads = 1; + } + + T *rets = NULL; + T *devrets = NULL; + + rets = new T[nblocks*nthreads]; + cudaMalloc(&devrets,sizeof(T)*nblocks*nthreads); + + dbuff_sum_kf<<>>(dbuff,N,devrets); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("amscu::dbuff_sum error: %s\n",cudaGetErrorString(err)); + } + + cudaMemcpy(rets,devrets,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost); + + ret = (T)0; + for(I=0;I __global__ void dbuff_minmax_kf(T *devbuffer, int N, T *maxs, T *mins) +{ + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int I; + + for(I=I0;Imaxs[I0]) + { + maxs[I0] = devbuffer[I]; + } + if(devbuffer[I] void dbuff_minmax(T *devbuffer, int N, T *min, T *max) +{ + cudaError_t err = cudaSuccess; + int nblocks; + int nthreads; + int I; + + T *maxs = NULL; + T *dev_maxs = NULL; + T *mins = NULL; + T *dev_mins = NULL; + + T localmax = T(0); + T localmin = T(0); + + if(devbuffer==NULL || N<=0) + { + if(min!=NULL) *min = T(0); + if(max!=NULL) *max = T(0); + return; + } + + if(N>25) + { + nblocks = 25; + nthreads = (int) sqrt((float)(N/nblocks)); + if(nthreads<1) nthreads = 1; + if(nthreads>512) nthreads = 512; + } + else + { + nblocks = 1; + nthreads = 1; + } + + maxs = new T[nblocks*nthreads]; + mins = new T[nblocks*nthreads]; + cudaMalloc(&dev_maxs,nblocks*nthreads); + cudaMalloc(&dev_mins,nblocks*nthreads); + + dbuff_minmax_kf<<>>(devbuffer,N,dev_maxs,dev_mins); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("amscu::dbuff_minmax error: %s\n",cudaGetErrorString(err)); + } + + cudaMemcpy(maxs,dev_maxs,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost); + cudaMemcpy(mins,dev_mins,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost); + + + for(I=0;Ilocalmax) localmax = maxs[I]; + if(mins[I] __global__ void dbuff_setall_kf(T *devbuffer, int N, T setto) +{ + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int I; + + for(I=I0;I void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads) +{ + cudaError_t err = cudaSuccess; + + if(devbuffer==NULL || N<=0) + { + return; + } + + dbuff_setall_kf<<>>(devbuffer,N,setto); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("amscu::dbuff_setall error: %s\n",cudaGetErrorString(err)); + } + + return; +} + +template __global__ void dbuff_vectorbinop_kf1(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2)) +{ + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int I; + + T1 a; + T2 b; + T3 c; + + for(I=I0;I __global__ void dbuff_vectorbinop_kf2(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2)) +{ + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int I; + + T1 a; + T2 b; + T3 c; + + for(I=I0;I one output array +template void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads) +{ + cudaError_t err = cudaSuccess; + + if(dbuf_a == NULL || dbuf_b == NULL || dbuf_out == NULL || N<=0) + { + return; + } + + dbuff_vectorbinop_kf1<<>>(dbuf_a,dbuf_b,dbuf_out,N); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err)); + } + + return; +} + +//Elementwise device-buffer vector two-parameter operation +//takes one input array, and a constant paramter ( ) ---> one output array +template void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads) +{ + cudaError_t err = cudaSuccess; + + if(dbuf_a == NULL || dbuf_out == NULL || N<=0) + { + return; + } + + dbuff_vectorbinop_kf2<<>>(dbuf_a,par_b,dbuf_out,N); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err)); + } + + return; +} + +template T dbuff_add_fn(T a, T b) +{ + return a+b; +} + +template void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads); + return; +} + +template void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads); + return; +} + +template T dbuff_sub_fn(T a, T b) +{ + return a-b; +} + +template void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads); + return; +} + +template void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads); + return; +} + +template T dbuff_mult_fn(T a, T b) +{ + return a*b; +} + +template void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads); + return; +} + +template void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads); + return; +} + +template T dbuff_div_fn(T a, T b) +{ + return a/b; +} + +template void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads); + return; +} + +template void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads); + return; +} + +template T dbuff_ldiv_fn(T a, T b) +{ + return b/a; +} + + +template void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) +{ + dbuff_vectorbinop(dbuff_b,par_a,dbuff_out,N,&dbuff_ldiv_fn,nblocks,nthreads); + return; +} + + +}; + +#endif + diff --git a/include/amsculib2/amscuarray_impl.hpp b/include/amsculib2/amscuarray_impl.hpp new file mode 100644 index 0000000..813d90b --- /dev/null +++ b/include/amsculib2/amscuarray_impl.hpp @@ -0,0 +1,323 @@ +#ifndef __CUARRAY_IMPL_HPP__ +#define __CUARRAY_IMPL_HPP__ + +namespace amscuda +{ + +// New Version cuarray +// simpler, less crap going on + +template __device__ __host__ cuarray::cuarray() +{ + length = 0; + data = NULL; +} + +template __device__ __host__ cuarray::~cuarray() +{ + if(data!=NULL) + { + delete[] data; data = NULL; + } + length = 0; +} + +template __device__ __host__ int cuarray::resize(const int _length) +{ + int ret = 0; + + T *newbuffer = NULL; + + if(length==_length) + { + //do nothing + ret = 1; + return ret; + } + if(_length<=0) + { + if(data!=NULL) + { + delete[] data; + data = NULL; + } + length = 0; + ret = 1; + } + + newbuffer = new T[_length]; + if(newbuffer==NULL) + { + ret = -1; //failed to allocate memory + return ret; + } + + int I; + T def; + + if(data!=NULL) + { + for(I=0;I __host__ int cuarray::device_send(cuarray **dptr) +{ + int ret = 0; + int dlength; + + if(*dptr==NULL) + { + ret = _device_send_overwrite(dptr); + } + else + { + dlength = device_length(*dptr); + if(dlength=length) + { + ret = _device_send_copy(*dptr); + } + else + { + ret = _device_send_overwrite(dptr); + } + } + + return ret; +} + +template __host__ int cuarray::_device_send_overwrite(cuarray **dptr) +{ + int ret = 0; + cuarray dlocal; + cudaError_t err = cudaSuccess; + device_free(dptr); + + if(length>=0 && data!=NULL) + { + err = cudaMalloc(dptr,sizeof(cuarray)); + if(err==cudaSuccess) + { + err = cudaMalloc(&(dlocal.data),sizeof(T)*length); + dlocal.length = length; + + if(err==cudaSuccess) + { + cudaMemcpy(*dptr,&dlocal,sizeof(cuarray),cudaMemcpyHostToDevice); + if(data!=NULL) + err = cudaMemcpy(dlocal.data,data,sizeof(T)*length,cudaMemcpyHostToDevice); + else + err = cudaSuccess; + if(err==cudaSuccess) + { + ret = 1; + } + else + { + ret = -3; + } + } + else + { + ret = -2; + } + } + else + { + ret = -1; + } + } + else + { + dlocal.data = NULL; + dlocal.length = 0; + err = cudaMalloc(dptr,sizeof(cuarray)); + if(err==cudaSuccess) + { + cudaMemcpy(*dptr,&dlocal,sizeof(cuarray),cudaMemcpyHostToDevice); + ret = 1; + } + else + { + ret = -4; + } + } + + + dlocal.data = NULL; + dlocal.length = -1; + + return ret; +} + +template __host__ int cuarray::_device_send_copy(cuarray *dptr) +{ + int ret = 0; + cudaError_t err = cudaSuccess; + T* ddata = NULL; + ddata = device_data_ptr(dptr); + + err = cudaMemcpy(ddata,data,sizeof(T)*length,cudaMemcpyHostToDevice); + if(err==cudaSuccess) + { + ret = 1; + } + else + { + ret = -1; + } + + return ret; +} + +template __host__ int cuarray::device_pull(cuarray *dptr) +{ + int ret = 0; + int dlength; + T* ddata; + cudaError_t err; + + if(dptr==NULL) + { + ret = -1; // null d pointer + return ret; + } + + dlength = device_length(dptr); + if(dlength!=length) + { + this->resize(dlength); + } + + ddata = device_data_ptr(dptr); + + if(length>0 && data!=NULL && ddata!=NULL) + { + err = cudaMemcpy(data,dptr,length*sizeof(T),cudaMemcpyDeviceToHost); + if(err==cudaSuccess) + { + ret = 1; + } + else + { + ret = -2; + } + } + + return ret; +} + +template __host__ int cuarray::device_free(cuarray **dptr) +{ + int ret = 0; + cuarray dlocal; + + if(*dptr!=NULL) + { + cudaMemcpy(&dlocal,dptr,sizeof(cuarray),cudaMemcpyDeviceToHost); + if(dlocal.data!=NULL) + { + cudaFree(dlocal.data); + dlocal.data = NULL; + } + + cudaFree(*dptr); + *dptr = NULL; + ret = 1; + } + + dlocal.data = NULL; + dlocal.length = -1; + + return ret; +} + +template __host__ int cuarray::device_length(cuarray *dptr) +{ + int ret = -1; + cuarray dlocal; + + if(dptr==NULL) + { + return ret; + } + + cudaMemcpy(&dlocal,dptr,sizeof(cuarray),cudaMemcpyDeviceToHost); + ret = dlocal.length; + + dlocal.data = NULL; + dlocal.length = -1; + + return ret; +} + +template __host__ T* cuarray::device_data_ptr(cuarray *dptr) +{ + T* ret = NULL; + cuarray dlocal; + + if(dptr==NULL) + { + return ret; + } + + cudaMemcpy(&dlocal,dptr,sizeof(cuarray),cudaMemcpyDeviceToHost); + ret = dlocal.data; + + dlocal.data = NULL; + dlocal.length = -1; + + return ret; +} + +template __device__ __host__ int cuarray::size() const +{ + return this->length; +} + +template __device__ __host__ T& cuarray::at(const int I) +{ + return this->data[I]; +} + +template __device__ __host__ const T& cuarray::at(const int I) const +{ + return this->data[I]; +} + +template __device__ __host__ T& cuarray::operator[](const int I) +{ + return this->data[I]; +} + +template __device__ __host__ const T& cuarray::operator[](const int I) const +{ + return this->data[I]; +} + +}; + + +#endif \ No newline at end of file diff --git a/include/amsculib2/amscuda_binarrrw.hpp b/include/amsculib2/amscuda_binarrrw.hpp new file mode 100644 index 0000000..14f3dde --- /dev/null +++ b/include/amsculib2/amscuda_binarrrw.hpp @@ -0,0 +1,19 @@ +#ifndef __AMSCUDA_BINARRRW_HPP__ +#define __AMSCUDA_BINARRRW_HPP__ + +namespace amscuda +{ + +template int fread_ndarray(FILE *fp, cuarray *shape, cuarray *buffer); +template int fwrite_ndarray(FILE *fp, const cuarray *shape, const cuarray *buffer); + +template int fwrite_buffer(FILE *fp, const int N, const T *buffer); +template int fread_buffer(FILE *fp, const int Nmax, const T *buffer); + + +}; //end namespace amscuda + +#include + +#endif + diff --git a/include/amsculib2/amscuda_binarrrw_impl.hpp b/include/amsculib2/amscuda_binarrrw_impl.hpp new file mode 100644 index 0000000..84674d1 --- /dev/null +++ b/include/amsculib2/amscuda_binarrrw_impl.hpp @@ -0,0 +1,194 @@ +#ifndef __AMSCUDA_BINARRRW_IMPL_HPP__ +#define __AMSCUDA_BINARRRW_IMPL_HPP__ + +namespace amscuda +{ + +template int fread_ndarray(FILE *fp, cuarray *shape, cuarray *buffer) +{ + int ret = 1; + int I; + long piprod; + int32_t q; + int cnt; + + int32_t Nd; + + if(fp!=NULL) + { + if(!feof(fp)) + { + cnt = fread(&Nd,sizeof(int32_t),1,fp); + if(Nd>0 && cnt>0) + { + shape->resize(Nd); + piprod = 1; + for(I=0;Iat(I) = q; + if(q>0) + { + piprod = piprod*q; + } + else + { + piprod = 0; + } + } + + buffer->resize(piprod); + if(piprod>0) + { + cnt = fread((buffer->data),sizeof(T),piprod,fp); + if(piprod==cnt) + { + ret = 1; + } + else + { + printf("fread_ndarray, read %d values, expecting %ld\n",cnt,piprod); + ret = 0; + } + } + } + else + { + printf("fread_ndarray: Read a number of dimensions<=0.\n"); + Nd = 0; + shape->resize(0); + buffer->resize(0); + } + } + else + { + printf("fread_ndarray: fp=NULL.\n"); + ret = 0; + } + } + else + { + ret = 0; + } + + return ret; +} + +template int fwrite_ndarray(FILE *fp, const cuarray *shape, const cuarray *buffer) +{ + int ret = 1; + long piprod; + int I; + int32_t Nd; + + if(fp==NULL) + { + ret = 0; + printf("fwrite_ndarray: fp=NULL\n"); + return ret; + } + + piprod = 1; + for(I=0;Isize();I++) + { + if(shape->at(I)>0) + { + piprod = piprod*shape->at(I); + } + else + { + piprod = 0; + } + } + + Nd = (int32_t) shape->size(); + + if(piprod!=buffer->size()) + { + ret = 0; + printf("fwrite_ndarray: buffer is size %ld, while shape is size %ld\n",(long)buffer->size(),(long)piprod); + return ret; + } + + fwrite(&Nd,sizeof(int32_t),1,fp); + if(Nd>0) + { + fwrite(shape->data,sizeof(int32_t),Nd,fp); + if(piprod>0) + { + fwrite(buffer->data,sizeof(T),buffer->size(),fp); + } + } + + return ret; +} + +template int fwrite_buffer(FILE *fp, const int N, const T *buffer) +{ + int ret = 0; + int Nd = 1; + + if(fp==NULL) + { + ret = 0; + printf("fwrite_buffer: fp=NULL\n"); + return ret; + } + + fwrite(&Nd,sizeof(int32_t),1,fp); + fwrite(&N,sizeof(int32_t),1,fp); + fwrite(buffer,sizeof(T),N,fp); + + return ret; +} + +template int fread_buffer(FILE *fp, const int Nmax, const T *buffer) +{ + int ret = 0; + + int cnt; + int32_t Nd; + int32_t *dims = NULL; + int piprod; + int32_t q; + int I; + + int Nr; + + + if(fp==NULL) {ret = -1; return ret;} + if(feof(fp)) {ret = -2; return ret;} + + cnt = fread(&Nd,sizeof(int32_t),1,fp); + if(Nd>0 && cnt>0) + { + piprod = 1; + dims = new(std::nothrow) int32_t[Nd]; + for(I=0;I(Nmax,piprod); + cnt = fread(buffer,sizeof(T),Nr,fp); + } + + if(dims!=NULL) {delete[] dims; dims=NULL;} + + return ret; +} + +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib2/amscugeom.hpp b/include/amsculib2/amscugeom.hpp new file mode 100644 index 0000000..60ec3fb --- /dev/null +++ b/include/amsculib2/amscugeom.hpp @@ -0,0 +1,11 @@ +#ifndef __AMSCUGEOM_HPP__ +#define __AMSCUGEOM_HPP__ + +namespace amscuda +{ + + +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib2/amsculib2.hpp b/include/amsculib2/amsculib2.hpp new file mode 100644 index 0000000..4f5e412 --- /dev/null +++ b/include/amsculib2/amsculib2.hpp @@ -0,0 +1,70 @@ +#ifndef __AMSCULIB2_HPP__ +#define __AMSCULIB2_HPP__ + +//Std Lib Includes +#include +#include +#include +#include +#include +#include + +#include //where all the cuda functions live +#include +#include + +//Dependencies + +//Predeclarations +class cuvect2; +class cuvect3; +class cuvect4; +class cuvect2f; +class cuvect3f; +class cuvect4f; + +//Need a way to define the same symbols using both host and device code +//A solution was found here: https://stackoverflow.com/questions/9457572/cuda-host-and-device-using-same-constant-memory +#ifdef __CUDA_ARCH__ +#define AMSCU_CONST __constant__ +#else +#define AMSCU_CONST +#endif + +namespace amscuda +{ + + //default thread and block execution + AMSCU_CONST static const int amscu_defnblocks = 256; + AMSCU_CONST static const int amscu_defnthreads = 512; + + //default numthreads to execute on cpu + AMSCU_CONST static const int amscu_defcputhreads = 8; + +}; //end namespace amscuda + +//Components +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + + + + +#endif + diff --git a/include/amsculib2/amscumath.hpp b/include/amsculib2/amscumath.hpp new file mode 100644 index 0000000..574a1c2 --- /dev/null +++ b/include/amsculib2/amscumath.hpp @@ -0,0 +1,56 @@ +#ifndef __AMSCUMATH_HPP__ +#define __AMSCUMATH_HPP__ + +namespace amscuda +{ + + + + //Problem: These are not in the namespace + //#define nan NAN + //#define fnan (float) NAN + //#define inf INFINITY + //#define finf (float) INFINITY + //#define pi 3.1415926535897936 + + //These need to be the same symbol for both host and device code + AMSCU_CONST static const double nan = NAN; + AMSCU_CONST static const float fnan = (float) NAN; + AMSCU_CONST static const double inf = INFINITY; + AMSCU_CONST static const float finf = (float) INFINITY; + AMSCU_CONST static const double pi = 3.1415926535897936; + AMSCU_CONST static const float pif = 3.1415926535897936; + + __host__ __device__ double dabs(double x); + __host__ __device__ float fabs(float x); + + template __host__ __device__ T abs(const T in) + { + T ret = in; + if(in<0) ret = -in; + return ret; + } + + __host__ __device__ double mod(double a, double md); + __host__ __device__ float mod(float a, float md); + __host__ __device__ int mod(int x, int n); + __host__ __device__ long mod(long x, long n); + + __host__ __device__ int truediv(int x, int y); + __host__ __device__ long truediv(long x, long y); + + template __host__ __device__ T min(T a, T b); + template __host__ __device__ T max(T a, T b); + + __device__ __host__ double arg(double x, double y); + __device__ __host__ void get_azel(double x, double y, double z, double *az, double *el); + + void test_amscumath1(); + + +}; //end namespace amscuda + +#include + +#endif + diff --git a/include/amsculib2/amscumath_impl.hpp b/include/amsculib2/amscumath_impl.hpp new file mode 100644 index 0000000..4a43e06 --- /dev/null +++ b/include/amsculib2/amscumath_impl.hpp @@ -0,0 +1,42 @@ +#ifndef __AMSCUMATH_IMPL_HPP__ +#define __AMSCUMATH_IMPL_HPP__ + +namespace amscuda +{ + +template __host__ __device__ T min(T a, T b) +{ + if(a>b) + { + return b; + } + else + { + return a; + } + return a; +} + +template __host__ __device__ T max(T a, T b) +{ + if(a>b) + { + return a; + } + else + { + return b; + } + return a; +} + +template<> __host__ __device__ double min(double a, double b); +template<> __host__ __device__ float min(float a, float b); +template<> __host__ __device__ double max(double a, double b); +template<> __host__ __device__ float max(float a, float b); + + +}; //end namespace amscuda + +#endif + diff --git a/include/amsculib2/amscurarray.cuh b/include/amsculib2/amscurarray.cuh new file mode 100644 index 0000000..cd9ff0b --- /dev/null +++ b/include/amsculib2/amscurarray.cuh @@ -0,0 +1,66 @@ +#ifndef __AMSCURARRAY_HPP__ +#define __AMSCURARRAY_HPP__ + +namespace amscuda +{ + +//Cuda ragged array class +template class curarray +{ +public: + int device; + curarray* devptr; //pointer to mirror class on the device + + int Narrays; //number of arrays + + int *N; //dimension of each array + T** hostarrayptrs; //pointers to each array on the host - null on the device + T** devarrayptrs; //pointers to each array on the device + //the double pointer is a host pointer to device pointers on the host class + //for the device class, only the second set of arrays is in use + + //the constructor and destructor set all pointers to NULL, they + // do *not* manage memory. This is done with curarray_new and curarray_delete + __device__ __host__ curarray(); + __device__ __host__ ~curarray(); + + __host__ int push(); + __host__ int pull(); + //__device__ int dev_resizearray(int arraynum, int arraysize); + __host__ int resizearray(int arraynum, int arraysize); + // I may want a way to resize arrays on the device without pushing/pulling all the array contents + + +}; + +template int curarray_new(curarray** ptr, int Narrays); + +template int curarray_delete(curarray** ptr); + +template int curarray_device_new(curarray *hostptr); + +template int curarray_device_delete(curarray *hostptr); + +template int curarray_push(curarray *hostptr); + +template int curarray_pull(curarray *hostptr); + + +//template int curarray_host_fillall(curarray *hostptr, const T &val); +//template int curarray_device_fillall(curarray *hostptr, const T &val); + + +//template __host__ int curarray_deletearray(curarray *hostptr, int arrayindex); +//template __device__ int curarray_dev_deletearray(curarray *devptr, int arrayindex); + +//template __host__ int curarray_allocarray(curarray *hostptr, int arrayindex, int size); +//template __device__ int curarray_dev_allocarray(curarray *devptr, int arrayindex, int size); + + +void test_amscurarray1(); + +}; + +#include + +#endif \ No newline at end of file diff --git a/include/amsculib2/amscurarray_impl.cuh b/include/amsculib2/amscurarray_impl.cuh new file mode 100644 index 0000000..d3841f7 --- /dev/null +++ b/include/amsculib2/amscurarray_impl.cuh @@ -0,0 +1,529 @@ +#ifndef __AMSCURARRAY_IMPL_HPP__ +#define __AMSCURARRAY_IMPL_HPP__ + +namespace amscuda +{ + +template curarray::curarray() +{ + device = -1; + devptr = NULL; + Narrays = 0; + N = NULL; + hostarrayptrs = NULL; + devarrayptrs = NULL; + +} + +template curarray::~curarray() +{ + device = -1; + devptr = NULL; + Narrays = 0; + N = NULL; + hostarrayptrs = NULL; + devarrayptrs = NULL; + +} + +template int curarray_new(curarray** ptr, int Narrays) +{ + int ret = 0; + int device; + curarray *lhptr = *ptr; + + cudaGetDevice(&device); + + if(lhptr!=NULL) + { + curarray_delete(ptr); + } + + *ptr = new(std::nothrow) curarray(); + lhptr = *ptr; + + int I; + + if(Narrays<0) Narrays=0; + + lhptr->Narrays = Narrays; + lhptr->device = device; + lhptr->N = new(std::nothrow) int[Narrays]; + lhptr->hostarrayptrs = new(std::nothrow) T*[Narrays]; + lhptr->devarrayptrs = new(std::nothrow) T*[Narrays]; + + for(I=0;IN[I] = 0; + lhptr->hostarrayptrs[I] = NULL; + lhptr->devarrayptrs[I] = NULL; + } + + curarray_device_new(lhptr); + + return ret; +} + +template int curarray_delete(curarray** ptr) +{ + int ret = 0; + curarray *lptr = NULL; + int olddev; + + int I; + + if(*ptr==NULL) + { + return 0; + } + lptr = *ptr; + + cudaGetDevice(&olddev); + cudaSetDevice(lptr->device); + + + if(lptr->devptr!=NULL) + { + curarray_device_delete(lptr); + } + + lptr->device = -1; + + for(I=0;INarrays;I++) + { + if(lptr->hostarrayptrs!=NULL) + { + if(lptr->hostarrayptrs[I]!=NULL) + { + delete[] lptr->hostarrayptrs[I]; + lptr->hostarrayptrs[I] = NULL; + } + } + + if(lptr->devarrayptrs!=NULL) + { + if(lptr->devarrayptrs[I]!=NULL) + { + //erasing device memory should have been handled in curarray_device_delete + lptr->devarrayptrs[I] = NULL; + } + } + + lptr->N[I] = 0; + } + + if(lptr->N != NULL) {delete[] lptr->N; lptr->N = NULL;} + if(lptr->hostarrayptrs!=NULL) {delete[] lptr->hostarrayptrs; lptr->hostarrayptrs=NULL;} + if(lptr->devarrayptrs!=NULL) {delete[] lptr->devarrayptrs; lptr->devarrayptrs=NULL;} + + if(*ptr!=NULL) {delete *ptr; *ptr = NULL;} + + cudaSetDevice(olddev); + + return ret; +} + +template int curarray_device_new(curarray *hostptr) +{ + int ret = 0; + curarray ldevdata; + + if(hostptr==NULL) return -1; + if(hostptr->devptr!=NULL) + { + curarray_device_delete(hostptr); + } + + int I; + + cudaGetDevice(&(hostptr->device)); + + ldevdata.device = hostptr->device; + ldevdata.Narrays = hostptr->Narrays; + int Narrays = hostptr->Narrays; + + cudaMalloc(&(ldevdata.N),sizeof(int)*Narrays); + cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice); + + ldevdata.hostarrayptrs = NULL; + + for(I=0;IN[I]>0) + { + if(hostptr->devarrayptrs[I]!=NULL) + { + cudaFree(hostptr->devarrayptrs[I]); + hostptr->devarrayptrs[I] = NULL; + } + + cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]); + cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice); + } + else + { + if(hostptr->devarrayptrs[I]!=NULL) + { + cudaFree(hostptr->devarrayptrs[I]); + hostptr->devarrayptrs[I] = NULL; + } + } + } + + cudaMalloc(&(ldevdata.devarrayptrs),sizeof(T*)*Narrays); + cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice); + + cudaMalloc(&(hostptr->devptr),sizeof(curarray)); + cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray),cudaMemcpyHostToDevice); + + ret = 1; + + return ret; +} + +template int curarray_device_delete(curarray *hostptr) +{ + int ret = 0; + + curarray ldevdata; + int olddev; + + if(hostptr->devptr==NULL) + { + return 0; + } + + cudaGetDevice(&olddev); + cudaSetDevice(hostptr->device); + + cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray),cudaMemcpyDeviceToHost); + + int I; + for(I=0;INarrays;I++) + { + if(hostptr->devarrayptrs[I]!=NULL) + { + cudaFree(hostptr->devarrayptrs[I]); + hostptr->devarrayptrs[I] = NULL; + } + } + + cudaFree(ldevdata.devarrayptrs); + cudaFree(ldevdata.N); + + cudaFree(hostptr->devptr); + hostptr->devptr = NULL; + hostptr->device = -1; + + cudaSetDevice(olddev); + + ret = 1; + return ret; +} + +template int curarray_push(curarray *hostptr) +{ + int ret = 0; + + int olddev; + + curarray ldevdata; + T** ldevarrayptrs = NULL; + int *devN = NULL; + + if(hostptr==NULL) return -1; + + cudaGetDevice(&olddev); + cudaSetDevice(hostptr->device); + + int Narrays = hostptr->Narrays; + + cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray),cudaMemcpyDeviceToHost); + ldevarrayptrs = new(std::nothrow) T*[Narrays]; + devN = new(std::nothrow) int[Narrays]; + + cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost); + cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost); + + + int I; + + for(I=0;IN[I]!=devN[I]) || + (hostptr->devarrayptrs[I] != ldevarrayptrs[I]) + ) + { + cudaFree(ldevarrayptrs[I]); + ldevarrayptrs[I] = NULL; + hostptr->devarrayptrs[I] = NULL; + + if(hostptr->N[I]>0) + { + cudaMalloc(&(hostptr->devarrayptrs[I]),sizeof(T)*hostptr->N[I]); + ldevarrayptrs[I] = hostptr->devarrayptrs[I]; + devN[I] = hostptr->N[I]; + } + else + { + devN[I] = 0; + } + } + if(hostptr->N[I]>0) + { + //copy host data to device + cudaMemcpy(hostptr->devarrayptrs[I],hostptr->hostarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyHostToDevice); + } + } //for each array + + //rectify and copy device data structure to device + ldevdata.device = hostptr->device; + ldevdata.devptr = NULL; + ldevdata.Narrays = hostptr->Narrays; //later - logic for dealing with when this is not true + ldevdata.hostarrayptrs = NULL; + + + cudaMemcpy(ldevdata.N,hostptr->N,sizeof(int)*Narrays,cudaMemcpyHostToDevice); + cudaMemcpy(ldevdata.devarrayptrs,hostptr->devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyHostToDevice); + + cudaMemcpy(hostptr->devptr,&ldevdata,sizeof(curarray),cudaMemcpyHostToDevice); + + cuda_errortrap("curarray_push cuda error:"); + + cudaSetDevice(olddev); + + delete[] ldevarrayptrs; + delete[] devN; + + return ret; +} + +template int curarray_pull(curarray *hostptr) +{ + int ret = 0; + + int olddev; + + curarray ldevdata; + T** ldevarrayptrs = NULL; + int *devN = NULL; + + if(hostptr==NULL) return -1; + + cudaGetDevice(&olddev); + cudaSetDevice(hostptr->device); + + cuda_errortrap("dbg1"); + + int Narrays = hostptr->Narrays; + + cudaMemcpy(&ldevdata,hostptr->devptr,sizeof(curarray),cudaMemcpyDeviceToHost); + ldevarrayptrs = new(std::nothrow) T*[Narrays]; + devN = new(std::nothrow) int[Narrays]; + + cuda_errortrap("dbg2"); + + cudaMemcpy(ldevarrayptrs,ldevdata.devarrayptrs,sizeof(T*)*Narrays,cudaMemcpyDeviceToHost); + cudaMemcpy(devN,ldevdata.N,sizeof(int)*Narrays,cudaMemcpyDeviceToHost); + + cuda_errortrap("dbg3"); + char dbgjnk[50]; + + int I; + for(I=0;Idevarrayptrs[I] != ldevarrayptrs[I]) + { + hostptr->devarrayptrs[I] = ldevarrayptrs[I]; + } + + if(hostptr->N[I]!=devN[I]) + { + if(hostptr->hostarrayptrs[I]!=NULL) + { + delete[] hostptr->hostarrayptrs[I]; + hostptr->hostarrayptrs[I] = NULL; + } + + if(devN[I]>0) + { + hostptr->hostarrayptrs[I] = new(std::nothrow) T[devN[I]]; + hostptr->N[I] = devN[I]; + } + else + { + hostptr->N[I] = 0; + } + } + + if(hostptr->hostarrayptrs[I]!=NULL && hostptr->devarrayptrs[I]!=NULL) + { + cudaMemcpy(hostptr->hostarrayptrs[I],hostptr->devarrayptrs[I],sizeof(T)*hostptr->N[I],cudaMemcpyDeviceToHost); + sprintf(dbgjnk,"%d dbg %d",I,hostptr->N[I]); + cuda_errortrap(dbgjnk); + } + } //for each array + + //for the pull operation, I don't think any update of the device data structure is necessary + + cudaSetDevice(olddev); + + delete[] ldevarrayptrs; + delete[] devN; + + return ret; +} + +template __host__ int curarray::push() +{ + return curarray_push(this); +} +template __host__ int curarray::pull() +{ + return curarray_pull(this); +} + + + +/* +https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#memory-allocation-and-lifetime%5B/url%5D + +cudaMalloc() and cudaFree() have distinct semantics between the host and +device environments. When invoked from the host, cudaMalloc() allocates a +new region from unused device memory. When invoked from the device runtime +these functions map to device-side malloc() and free(). This implies that +within the device environment the total allocatable memory is limited to the +device malloc() heap size, which may be smaller than the available unused +device memory. Also, it is an error to invoke cudaFree() from the host +program on a pointer which was allocated by cudaMalloc() on the device +or vice-versa. + +So, basically this entire function is not going to work. I'll be unable to resize within +a kernel. +*/ + +/* + +template __device__ int curarray::dev_resizearray(int arraynum, int arraysize) +{ + int ret = 0; + T* newptr = NULL; + int I; + T def; + + if(arraynum>=0 && arraynum __host__ int curarray::resizearray(int arraynum, int arraysize) +{ + int ret = 0; + T* newptr = NULL; + int I; + T def; + + if(arraynum>=0 && arraynum=2): + if(args[1]=="clean"): + obj_list = flist('./objstore',recurse=True,exts=['.o']) + for o in obj_list: + os.remove('{}'.format(o)) + exit(0) + +os.system('python3 ./compscripts/linux64.makelib.py') +os.system('python3 ./compscripts/linux64.maketest.py') + +# obj_list = flist('./src',recurse=True,exts=['.o']) +# for o in obj_list: +# os.remove('{}'.format(o)) + +#os.chdir('./bin_linux64') +callproc('./bin_linux64/test') +#os.chdir('..') \ No newline at end of file diff --git a/run3.py b/run3.py new file mode 100644 index 0000000..75698fc --- /dev/null +++ b/run3.py @@ -0,0 +1,17 @@ +#!/usr/bin/python3 + +import os,sys,math; +from compscripts.complib2 import *; + + +os.system('python ./compscripts/winnvcc.makelib.py') +os.system('python ./compscripts/winnvcc.maketest.py') + +obj_list = flist('./',recurse=True,exts=['.o']) +for o in obj_list: + os.remove('{}'.format(o)) + +#os.chdir('./bin_winx64') +callproc('.\\bin_winx64\\test.exe') +#os.chdir('..') + diff --git a/src/amsculib2/amscu_comp128.cu b/src/amsculib2/amscu_comp128.cu new file mode 100644 index 0000000..859fc4d --- /dev/null +++ b/src/amsculib2/amscu_comp128.cu @@ -0,0 +1,476 @@ +#include + +namespace amscuda +{ +namespace cmp +{ + +__host__ __device__ cucomp128::cucomp128() +{ + real = 0.0; + imag = 0.0; + return; +} + +__host__ __device__ cucomp128::~cucomp128() +{ + real = 0.0; + imag = 0.0; + return; +} + +__host__ __device__ cucomp128::cucomp128(const cucomp128 &other) +{ + real = other.real; + imag = other.imag; + + return; +} + +__host__ __device__ cucomp128::cucomp128(const double &other) +{ + real = other; + imag = 0.0; + return; +} + +__host__ __device__ cucomp128& cucomp128::operator=(cucomp128& other) +{ + real = other.real; + imag = other.imag; + return *this; +} + +__host__ __device__ const cucomp128& cucomp128::operator=(const cucomp128& other) +{ + this->real = other.real; + this->imag = other.imag; + return *this; +} + +__host__ __device__ cucomp128& cucomp128::operator=(double& other) +{ + real = other; + imag = 0.0; + return *this; +} + +__host__ __device__ const cucomp128& cucomp128::operator=(const double& other) +{ + this->real = other; + this->imag = 0.0; + return *this; +} + +__host__ __device__ double& cucomp128::operator[](int& ind) +{ + if(ind==0) + { + return this->real; + } + else + { + return this->imag; + } +} + +__host__ __device__ const double& cucomp128::operator[](const int& ind) const +{ + if(ind==0) + { + return this->real; + } + else + { + return this->imag; + } +} + +__host__ __device__ cucomp128 cucomp128::operator+(const cucomp128& z) +{ + cucomp128 ret; + ret.real = real + z.real; + ret.imag = imag + z.imag; + return ret; +} + +__host__ __device__ cucomp128 cucomp128::operator-(const cucomp128& z) +{ + cucomp128 ret; + ret.real = real - z.real; + ret.imag = imag - z.imag; + return ret; +} + +__host__ __device__ cucomp128 cucomp128::operator*(const cucomp128& z) +{ + cucomp128 ret; + ret.real = (real*z.real - imag*z.imag); + ret.imag = (imag*z.real + real*z.imag); + return ret; +} + +__host__ __device__ cucomp128 cucomp128::operator/(const cucomp128& z) +{ + cucomp128 ret; + double zm2 = z.real*z.real+z.imag*z.imag; + + if(zm2>0.0) + { + ret.real = (this->real*z.real+this->imag*z.imag)/zm2; + ret.imag = (this->imag*z.real-this->real*z.imag)/zm2; + } + else + { + ret.real = (double) finf; + ret.imag = (double) finf; + } + + return ret; +} + +__host__ __device__ cucomp128 cucomp128::operator+(const double& z) +{ + cucomp128 ret; + ret.real = this->real + z; + ret.imag = this->imag; + return ret; +} +__host__ __device__ cucomp128 cucomp128::operator-(const double& z) +{ + cucomp128 ret; + ret.real = real-z; + ret.imag = imag; + return ret; +} +__host__ __device__ cucomp128 cucomp128::operator*(const double& z) +{ + cucomp128 ret; + ret.real = real*z; + ret.imag = imag*z; + return ret; +} +__host__ __device__ cucomp128 cucomp128::operator/(const double& z) +{ + cucomp128 ret; + if(z!=0.0f) + { + ret.real = real/z; + ret.imag = imag/z; + } + else + { + ret.real = finf; + ret.imag = finf; + } + + return ret; +} + +__host__ __device__ bool cucomp128::operator==(const cucomp128& z) const +{ + bool ret = 0; + if(z.real == real && z.imag == imag) + { + ret = 1; + } + return ret; +} + +__host__ __device__ bool cucomp128::operator!=(const cucomp128& z) const +{ + return !(*this==z); +} + +//sort first by real value, then by imaginary value +//this is done so that an ordering exists, as long as two values aren't equal +__host__ __device__ bool cucomp128::operator>(const cucomp128& z) const +{ + bool ret = 0; + if(this->real>z.real) + { + ret = 1; + } + else if(this->real==z.real) + { + if(this->imag>z.imag) + { + ret = 1; + } + else + { + ret = 0; + } + } + else + { + ret = 0; + } + return ret; +} + +__host__ __device__ bool cucomp128::operator<(const cucomp128& z) const +{ + bool ret = 0; + if(this->realreal==z.real) + { + if(this->imag=(const cucomp128& z) const +{ + bool ret = (*this==z || *this>z); + return ret; +} + +__host__ __device__ bool cucomp128::operator<=(const cucomp128& z) const +{ + bool ret = (*this==z || *thisreal) || ::isnan(this->imag)) + { + ret = 1; + } + return ret; +} + +__host__ __device__ bool cucomp128::isinf() const +{ + bool ret = 0; + //calls math.h isinf() + if(::isinf(this->real) || ::isinf(this->imag)) + { + ret = 1; + } + return ret; +} + +__host__ __device__ bool cucomp128::isreal() const +{ + bool ret = 1; + if(imag!=0.0f) + { + ret = 0; + } + return ret; +} + +__host__ __device__ bool cucomp128::isimag() const +{ + bool ret = 1; + if(real!=0.0f) + { + ret = 0; + } + return ret; +} + +__host__ __device__ bool cucomp128::iszero() const +{ + bool ret = 1; + if(real!=0.0f || imag!=0.0f) + { + ret = 0; + } + return ret; +} + +__host__ __device__ double cucomp128::arg() const +{ + double ret = 0.0; + ret = (double) amscuda::arg((double)real,(double)imag); + return ret; +} + +__host__ __device__ double cucomp128::mag() const +{ + double ret = 0.0; + ret = ::sqrt(real*real+imag*imag); + return ret; +} + +__host__ __device__ cucomp128 cucomp128::conj() const +{ + cucomp128 ret; + ret.real = real; + ret.imag = -imag; + return ret; +} + +__host__ __device__ double arg(cucomp128 z) +{ + return z.arg(); +} + +__host__ __device__ double abs(cucomp128 z) +{ + return z.mag(); +} + +__host__ __device__ cucomp128 dtocomp(double _r, double _i) +{ + cucomp128 ret; + ret.real = _r; + ret.imag = _i; + return ret; +} + +__host__ __device__ double real(cucomp128 z) +{ + return z.real; +} + +__host__ __device__ double imag(cucomp128 z) +{ + return z.imag; +} + +__host__ __device__ cucomp128 sin(cucomp128 z) +{ + cucomp128 ret; + cucomp128 im1 = dtocomp(0.0f,1.0f); + cucomp128 div = dtocomp(0.0f,2.0f); + + ret = (exp(im1*z)-exp(-im1*z))/div; + + return ret; +} + +__host__ __device__ cucomp128 cos(cucomp128 z) +{ + cucomp128 ret; + cucomp128 im1 = dtocomp(0.0f,1.0f); + cucomp128 div = dtocomp(2.0f,0.0f); + + ret = (exp(im1*z)+exp(-im1*z))/div; + + return ret; +} + +__host__ __device__ cucomp128 tan(cucomp128 z) +{ + return sin(z)/cos(z); +} + +__host__ __device__ cucomp128 exp(cucomp128 z) +{ + cucomp128 ret; + ret.real = ::exp(z.real)*::cos(z.imag); + ret.imag = ::exp(z.real)*::sin(z.imag); + return ret; +} + +__host__ __device__ cucomp128 log(cucomp128 z) +{ + cucomp128 ret; + ret.real = ::log(::sqrt(z.real*z.real+z.imag*z.imag)); + ret.imag = amscuda::arg(z.real,z.imag); + return ret; +} + +__host__ __device__ cucomp128 conj(cucomp128 z) +{ + return z.conj(); +} + +__host__ __device__ cucomp128 cosh(cucomp128 z) +{ + cucomp128 ret; + cucomp128 div = dtocomp(2.0f,0.0f); + + ret = (exp(z)+exp(-z))/div; + + return ret; +} + +__host__ __device__ cucomp128 sinh(cucomp128 z) +{ + cucomp128 ret; + cucomp128 div = dtocomp(2.0f,0.0f); + + ret = (exp(z)-exp(-z))/div; + + return ret; +} + +__host__ __device__ cucomp128 tanh(cucomp128 z) +{ + return sinh(z)/cosh(z); +} + +__host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2) +{ + cucomp128 ret; + if(z1.mag()>0.0) + ret = exp(z2*log(z1)); + else + ret = dtocomp(0.0f,0.0f); + return ret; +} + + +void test_cucomp128_1() +{ + cucomp128 z1; + cucomp128 a,b,c; + double d1; + double f1; + + printf("sizeof double=%ld\n",(long)(8*sizeof(f1))); + printf("sizeof double=%ld\n",(long)(8*sizeof(d1))); + printf("sizeof complex=%ld\n",(long)(8*sizeof(z1))); + printf("sizeof cucomp128=%ld\n",(long)(8*sizeof(a))); + + a = dtocomp(1.0,1.0); + b = dtocomp(1.0,-1.0); + + printf("a=%1.4f + %1.4fi\n",a[0],a[1]); + printf("b=%1.4f + %1.4fi\n",b[0],b[1]); + c = a+b; + printf("c=a+b: c=%1.4f + %1.4fi\n",c[0],c[1]); + c = a-b; + printf("c=a-b: c=%1.4f + %1.4fi\n",c[0],c[1]); + c = a*b; + printf("c=a*b: c=%1.4f + %1.4fi\n",c[0],c[1]); + c = a/b; + printf("c=a/b: c=%1.4f + %1.4fi\n",c[0],c[1]); + f1 = abs(a); + printf("abs(a)=%1.4f\n",f1); + f1 = arg(a); + printf("abs(a)=%1.4f pi\n",f1/pi); + + +} + +}; //end namespace cmp +}; //end namespace amscuda \ No newline at end of file diff --git a/src/amsculib2/amscu_comp64.cu b/src/amsculib2/amscu_comp64.cu new file mode 100644 index 0000000..6599074 --- /dev/null +++ b/src/amsculib2/amscu_comp64.cu @@ -0,0 +1,476 @@ +#include + +namespace amscuda +{ +namespace cmp +{ + +__host__ __device__ cucomp64::cucomp64() +{ + real = 0.0; + imag = 0.0; + return; +} + +__host__ __device__ cucomp64::~cucomp64() +{ + real = 0.0; + imag = 0.0; + return; +} + +__host__ __device__ cucomp64::cucomp64(const cucomp64 &other) +{ + real = other.real; + imag = other.imag; + + return; +} + +__host__ __device__ cucomp64::cucomp64(const float &other) +{ + real = other; + imag = 0.0; + return; +} + +__host__ __device__ cucomp64& cucomp64::operator=(cucomp64& other) +{ + real = other.real; + imag = other.imag; + return *this; +} + +__host__ __device__ const cucomp64& cucomp64::operator=(const cucomp64& other) +{ + this->real = other.real; + this->imag = other.imag; + return *this; +} + +__host__ __device__ cucomp64& cucomp64::operator=(float& other) +{ + real = other; + imag = 0.0; + return *this; +} + +__host__ __device__ const cucomp64& cucomp64::operator=(const float& other) +{ + this->real = other; + this->imag = 0.0; + return *this; +} + +__host__ __device__ float& cucomp64::operator[](int& ind) +{ + if(ind==0) + { + return this->real; + } + else + { + return this->imag; + } +} + +__host__ __device__ const float& cucomp64::operator[](const int& ind) const +{ + if(ind==0) + { + return this->real; + } + else + { + return this->imag; + } +} + +__host__ __device__ cucomp64 cucomp64::operator+(const cucomp64& z) +{ + cucomp64 ret; + ret.real = real + z.real; + ret.imag = imag + z.imag; + return ret; +} + +__host__ __device__ cucomp64 cucomp64::operator-(const cucomp64& z) +{ + cucomp64 ret; + ret.real = real - z.real; + ret.imag = imag - z.imag; + return ret; +} + +__host__ __device__ cucomp64 cucomp64::operator*(const cucomp64& z) +{ + cucomp64 ret; + ret.real = (real*z.real - imag*z.imag); + ret.imag = (imag*z.real + real*z.imag); + return ret; +} + +__host__ __device__ cucomp64 cucomp64::operator/(const cucomp64& z) +{ + cucomp64 ret; + float zm2 = z.real*z.real+z.imag*z.imag; + + if(zm2>0.0) + { + ret.real = (this->real*z.real+this->imag*z.imag)/zm2; + ret.imag = (this->imag*z.real-this->real*z.imag)/zm2; + } + else + { + ret.real = (float) finf; + ret.imag = (float) finf; + } + + return ret; +} + +__host__ __device__ cucomp64 cucomp64::operator+(const float& z) +{ + cucomp64 ret; + ret.real = this->real + z; + ret.imag = this->imag; + return ret; +} +__host__ __device__ cucomp64 cucomp64::operator-(const float& z) +{ + cucomp64 ret; + ret.real = real-z; + ret.imag = imag; + return ret; +} +__host__ __device__ cucomp64 cucomp64::operator*(const float& z) +{ + cucomp64 ret; + ret.real = real*z; + ret.imag = imag*z; + return ret; +} +__host__ __device__ cucomp64 cucomp64::operator/(const float& z) +{ + cucomp64 ret; + if(z!=0.0f) + { + ret.real = real/z; + ret.imag = imag/z; + } + else + { + ret.real = finf; + ret.imag = finf; + } + + return ret; +} + +__host__ __device__ bool cucomp64::operator==(const cucomp64& z) const +{ + bool ret = 0; + if(z.real == real && z.imag == imag) + { + ret = 1; + } + return ret; +} + +__host__ __device__ bool cucomp64::operator!=(const cucomp64& z) const +{ + return !(*this==z); +} + +//sort first by real value, then by imaginary value +//this is done so that an ordering exists, as long as two values aren't equal +__host__ __device__ bool cucomp64::operator>(const cucomp64& z) const +{ + bool ret = 0; + if(this->real>z.real) + { + ret = 1; + } + else if(this->real==z.real) + { + if(this->imag>z.imag) + { + ret = 1; + } + else + { + ret = 0; + } + } + else + { + ret = 0; + } + return ret; +} + +__host__ __device__ bool cucomp64::operator<(const cucomp64& z) const +{ + bool ret = 0; + if(this->realreal==z.real) + { + if(this->imag=(const cucomp64& z) const +{ + bool ret = (*this==z || *this>z); + return ret; +} + +__host__ __device__ bool cucomp64::operator<=(const cucomp64& z) const +{ + bool ret = (*this==z || *thisreal) || ::isnan(this->imag)) + { + ret = 1; + } + return ret; +} + +__host__ __device__ bool cucomp64::isinf() const +{ + bool ret = 0; + //calls math.h isinf() + if(::isinf(this->real) || ::isinf(this->imag)) + { + ret = 1; + } + return ret; +} + +__host__ __device__ bool cucomp64::isreal() const +{ + bool ret = 1; + if(imag!=0.0f) + { + ret = 0; + } + return ret; +} + +__host__ __device__ bool cucomp64::isimag() const +{ + bool ret = 1; + if(real!=0.0f) + { + ret = 0; + } + return ret; +} + +__host__ __device__ bool cucomp64::iszero() const +{ + bool ret = 1; + if(real!=0.0f || imag!=0.0f) + { + ret = 0; + } + return ret; +} + +__host__ __device__ float cucomp64::arg() const +{ + float ret = 0.0; + ret = (float) amscuda::arg((double)real,(double)imag); + return ret; +} + +__host__ __device__ float cucomp64::mag() const +{ + float ret = 0.0; + ret = ::sqrt(real*real+imag*imag); + return ret; +} + +__host__ __device__ cucomp64 cucomp64::conj() const +{ + cucomp64 ret; + ret.real = real; + ret.imag = -imag; + return ret; +} + +__host__ __device__ float arg(cucomp64 z) +{ + return z.arg(); +} + +__host__ __device__ float abs(cucomp64 z) +{ + return z.mag(); +} + +__host__ __device__ cucomp64 dtocomp64(float _r, float _i) +{ + cucomp64 ret; + ret.real = _r; + ret.imag = _i; + return ret; +} + +__host__ __device__ float real(cucomp64 z) +{ + return z.real; +} + +__host__ __device__ float imag(cucomp64 z) +{ + return z.imag; +} + +__host__ __device__ cucomp64 sin(cucomp64 z) +{ + cucomp64 ret; + cucomp64 im1 = dtocomp64(0.0f,1.0f); + cucomp64 div = dtocomp64(0.0f,2.0f); + + ret = (exp(im1*z)-exp(-im1*z))/div; + + return ret; +} + +__host__ __device__ cucomp64 cos(cucomp64 z) +{ + cucomp64 ret; + cucomp64 im1 = dtocomp64(0.0f,1.0f); + cucomp64 div = dtocomp64(2.0f,0.0f); + + ret = (exp(im1*z)+exp(-im1*z))/div; + + return ret; +} + +__host__ __device__ cucomp64 tan(cucomp64 z) +{ + return sin(z)/cos(z); +} + +__host__ __device__ cucomp64 exp(cucomp64 z) +{ + cucomp64 ret; + ret.real = ::exp(z.real)*::cos(z.imag); + ret.imag = ::exp(z.real)*::sin(z.imag); + return ret; +} + +__host__ __device__ cucomp64 log(cucomp64 z) +{ + cucomp64 ret; + ret.real = ::log(::sqrt(z.real*z.real+z.imag*z.imag)); + ret.imag = amscuda::arg(z.real,z.imag); + return ret; +} + +__host__ __device__ cucomp64 conj(cucomp64 z) +{ + return z.conj(); +} + +__host__ __device__ cucomp64 cosh(cucomp64 z) +{ + cucomp64 ret; + cucomp64 div = dtocomp64(2.0f,0.0f); + + ret = (exp(z)+exp(-z))/div; + + return ret; +} + +__host__ __device__ cucomp64 sinh(cucomp64 z) +{ + cucomp64 ret; + cucomp64 div = dtocomp64(2.0f,0.0f); + + ret = (exp(z)-exp(-z))/div; + + return ret; +} + +__host__ __device__ cucomp64 tanh(cucomp64 z) +{ + return sinh(z)/cosh(z); +} + +__host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2) +{ + cucomp64 ret; + if(z1.mag()>0.0) + ret = exp(z2*log(z1)); + else + ret = dtocomp64(0.0f,0.0f); + return ret; +} + + +void test_cucomp64_1() +{ + cucomp64 z1; + cucomp64 a,b,c; + double d1; + float f1; + + printf("sizeof double=%ld\n",(long)(8*sizeof(f1))); + printf("sizeof double=%ld\n",(long)(8*sizeof(d1))); + printf("sizeof complex=%ld\n",(long)(8*sizeof(z1))); + printf("sizeof cucomp128=%ld\n",(long)(8*sizeof(a))); + + a = dtocomp64(1.0,1.0); + b = dtocomp64(1.0,-1.0); + + printf("a=%1.4f + %1.4fi\n",a[0],a[1]); + printf("b=%1.4f + %1.4fi\n",b[0],b[1]); + c = a+b; + printf("c=a+b: c=%1.4f + %1.4fi\n",c[0],c[1]); + c = a-b; + printf("c=a-b: c=%1.4f + %1.4fi\n",c[0],c[1]); + c = a*b; + printf("c=a*b: c=%1.4f + %1.4fi\n",c[0],c[1]); + c = a/b; + printf("c=a/b: c=%1.4f + %1.4fi\n",c[0],c[1]); + f1 = abs(a); + printf("abs(a)=%1.4f\n",f1); + f1 = arg(a); + printf("abs(a)=%1.4f pi\n",f1/pi); + + +} + +}; //end namespace cmp +}; //end namespace amscuda \ No newline at end of file diff --git a/src/amsculib2/amscu_cudafunctions.cu b/src/amsculib2/amscu_cudafunctions.cu new file mode 100644 index 0000000..7573ae0 --- /dev/null +++ b/src/amsculib2/amscu_cudafunctions.cu @@ -0,0 +1,21 @@ +#include + +namespace amscuda +{ + +int cuda_errortrap(const char *msgheader) +{ + int ret = 0; + cudaError_t err = cudaSuccess; + + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("%s :%s\n",msgheader,cudaGetErrorString(err)); + ret = 1; + } + + return ret; +} + +}; //end namespace amscuda \ No newline at end of file diff --git a/src/amsculib2/amscu_random.cu b/src/amsculib2/amscu_random.cu new file mode 100644 index 0000000..9e36e80 --- /dev/null +++ b/src/amsculib2/amscu_random.cu @@ -0,0 +1,222 @@ +#include + +namespace amscuda +{ + +__device__ __host__ float fhash1d_su(float x) +{ + float ret; + ret = x*(x>0.0f) + -x*(x<0.0f); //sign without conditionals? + ret = fmodf(ret,10000.0f); //restrain domain + ret = fmodf(ret*(ret+3678.453f)+7890.453f,10000.0f); + ret = fmodf(ret*(ret+8927.2134f),10000.0f); + ret = fmodf(ret*(ret+3656.234f),10000.0f); + //ret = fmodf(ret*(ret+892.2134f),1000.0f); + //ret = fmodf(ret,1000.0f); + ret = ret/10000.0f; + return ret; +} + + +__device__ __host__ float fhash3d_su(float x, float y=0.0f, float z=0.0f) +{ + float ret = 0.0f; + + ret = fhash1d_su(z); + ret = fhash1d_su(1000.0f*ret*ret + y); + ret = fhash1d_su(1000.0f*ret*ret + x); + + return ret; +} + +__device__ __host__ float fhash4d_su(float x, float y=0.0f, float z=0.0f, float w=0.0f) +{ + float ret = 0.0f; + + ret = fhash1d_su(w); + ret = fhash1d_su(1000.0f*ret*ret + z); + ret = fhash1d_su(1000.0f*ret*ret + y); + ret = fhash1d_su(1000.0f*ret*ret + x); + + return ret; +} + +////////////////////////////////////////////////// +// Deterministic Pseudorandom int32_t Generator // +////////////////////////////////////////////////// + +//Simple 32 bit integer deterministic pseudo-random generator +// *not* for cryptography +// Frequency of generated floats should be uniform [0,1) + +AMSCU_CONST static const int32_t dpr32_mod = 1<<30-1; +AMSCU_CONST static const int32_t dpr32_mult = 25137; + +//Next seed in simple 32 bit integer deterministic psuedo-rand generator +__host__ __device__ void dpr32_nextseed(int32_t *rseed_inout) +{ + int32_t lseed; + if(rseed_inout!=NULL) lseed = *rseed_inout; + + lseed = (lseed*dpr32_mult + 1)%dpr32_mod; + lseed = (lseed>=0)*(lseed)+(lseed<0)*(lseed+dpr32_mod); //ensure mod is positive + + if(rseed_inout!=NULL) *rseed_inout = lseed; + + return; +} + +//Simple 32 bit integer deterministic pseudo-random generator +// *not* for cryptography +// Frequency of generated floats should be uniform [0,1) +__host__ __device__ float dpr32_randf(int32_t *rseed_inout) +{ + int32_t lseed = 1; + float ret = 0.0f; + + if(rseed_inout!=NULL) lseed = *rseed_inout; + + dpr32_nextseed(&lseed); + ret = ((float)(lseed))/((float)dpr32_mod); + + if(rseed_inout!=NULL) *rseed_inout = lseed; + + return ret; +} + +//box muller standard normal variable +__host__ __device__ float dpr32_randnf(int32_t *rseed_inout) +{ + int32_t lseed = 1; + float ret = 0.0f; + float u1,u2; + + if(rseed_inout!=NULL) lseed = *rseed_inout; + + u1 = dpr32_randf(&lseed); + u2 = dpr32_randf(&lseed); + + ret = ::sqrtf(-2.0f*::logf(u1))*::cosf(2.0f*pif*u2); + + if(rseed_inout!=NULL) *rseed_inout = lseed; + + return ret; +} + +////////////////////////////////////////////////// +// Deterministic Pseudorandom int64_t Generator // +////////////////////////////////////////////////// + +//"goodenough" deterministic pseudo-random number generator +//random enough for procedural applications, deterministic, +//operates without side-effects for thread safety + +AMSCU_CONST const int64_t random_dpr64_mod = (2LL<<31LL)-1LL; +AMSCU_CONST const int64_t random_dpr64_mult = 1201633LL; + +__host__ __device__ void dpr64_nextseed(int64_t *seedinout) +{ + int64_t lseed = 0LL; + if(seedinout!=NULL) lseed = *seedinout; + + lseed = (random_dpr64_mult*lseed+1LL)%random_dpr64_mod; + lseed = (lseed>=0)*(lseed)+(lseed<0)*(lseed+random_dpr64_mod); + + if(seedinout!=NULL) *seedinout = lseed; + + return; +} + +__host__ __device__ double dpr64_randd(int64_t *seedinout) +{ + double ret = 0.0; + int64_t lseed = 0LL; + + if(seedinout!=NULL) lseed = *seedinout; + + dpr64_nextseed(&lseed); + ret = ((double)lseed)/((double)(random_dpr64_mod-1LL)); + + if(seedinout!=NULL) *seedinout = lseed; + + return ret; +} + +__host__ __device__ float dpr64_randf(int64_t *seedinout) +{ + float ret = 0.0f; + int64_t lseed = 0LL; + + if(seedinout!=NULL) lseed = *seedinout; + + dpr64_nextseed(&lseed); + ret = ((float)lseed)/((float)(random_dpr64_mod-1LL)); + + if(seedinout!=NULL) *seedinout = lseed; + + return ret; +} + +/////////// +// Tests // +/////////// + +void test_dprg64() +{ + printf("Tests for dprg:\n"); + long I; + int64_t seed = 133LL; + double d; + float f; + cuvect3 qv; + + printf("dpr64_randd test\n"); + seed = 133LL; + for(I=0;I<10;I++) + { + d = dpr64_randd(&seed); + printf("seed: %lld rand: %1.4f\n",(long long)seed,d); + } + + printf("\n\n"); + printf("dpr64_randf test\n"); + seed = 133LL; + for(I=0;I<10;I++) + { + f = dpr64_randf(&seed); + printf("seed: %lld rand: %1.4f\n",(long long)seed,f); + } + + return; +} + +void test_dprg32() +{ + printf("Tests for dprg:\n"); + long I; + int32_t seed = 133; + double d; + float f; + cuvect3 qv; + + printf("dpr32_randf test\n"); + seed = 133; + for(I=0;I<10;I++) + { + f = dpr32_randf(&seed); + printf("seed: %lld rand: %1.4f\n",(long long)seed,f); + } + + printf("\n\ndpr32_randnf test\n"); + seed = 133; + for(I=0;I<10;I++) + { + f = dpr32_randnf(&seed); + printf("seed: %lld rand: %1.4f\n",(long long)seed,f); + } + + return; +} + + +}; //namespace amscuda \ No newline at end of file diff --git a/src/amsculib2/amscuarray.cu b/src/amsculib2/amscuarray.cu new file mode 100644 index 0000000..81b746e --- /dev/null +++ b/src/amsculib2/amscuarray.cu @@ -0,0 +1,63 @@ +#include + +namespace amscuda +{ + +__global__ void test_cuarray_sum_kf(cuarray *dq1, float *sum) +{ + int I; + *sum = 0.0f; + for(I=0;Ilength;I++) + { + *sum = *sum + dq1->data[I]; + } + //*sum = (float)dq1->length; + return; +} + +float test_cuarray_sum(cuarray *q1) +{ + float ret = 0.0f; + int res; + cuarray *dq1 = NULL; + float *dsum; + cudaError_t err = cudaSuccess; + + cudaMalloc(&dsum,sizeof(float)); + res = q1->device_send(&dq1); + printf("error: res=%d\n",res); + test_cuarray_sum_kf<<<1,1>>>(dq1,dsum); + cudaDeviceSynchronize(); + + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("test_cuarray_sum Error: %s\n",cudaGetErrorString(err)); + } + + cudaMemcpy(&ret,dsum,sizeof(float),cudaMemcpyDeviceToHost); + + q1->device_free(&dq1); + cudaFree(dsum); dsum = NULL; + + return ret; +} + +void test_cuarray() +{ + cuarray q1; + + int I; + + q1.resize(100); + for(I=0;I + +namespace amscuda +{ + +//template instantiations +template float dbuff_sum(float *devbuffer, int N); +template void dbuff_minmax(float *devbuffer, int N, float *min, float *max); + + +template void dbuff_setall(float *devbuffer, int N, float setto, int nblocks, int nthreads); + + + + +//fill devbuffer with random uniform numbers between 0 and 1 using int32_t based generator +__global__ void dbuff_rand_dpr32_kf(float *devbuffer, int N, int32_t *seeds) +{ + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int I; + + int32_t lseed; + float f; + + lseed = seeds[I0]; + for(I=I0;I>>(devbuffer,N,devseeds); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("amscu::dbuff_rand_dpr32 error: %s\n",cudaGetErrorString(err)); + } + + cudaFree(devseeds); devseeds = NULL; + delete[] seeds; seeds = NULL; + + return; +} + +__global__ void dbuff_rand_dpr32n_kf(float *devbuffer, int N, int32_t *seeds) +{ + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int I; + + int32_t lseed; + float f; + + lseed = seeds[I0]; + for(I=I0;I>>(devbuffer,N,devseeds); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("amscu::dbuff_rand_dpr32 error: %s\n",cudaGetErrorString(err)); + } + + cudaFree(devseeds); devseeds = NULL; + delete[] seeds; seeds = NULL; + + return; +} + + + +void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads) +{ + int I; + + return; +} + + + +/////////// +// Tests // +/////////// + +void test_dbuff_rand_dpr32() +{ + cuarray data; + float *dev_data = NULL; + int Nx = 5000; + int Ny = 5000; + cuarray dims; + int32_t rseed = 15; + FILE *fp = NULL; + const char *fname = "./test_scripts/test_dbuff_rand_dpr32.bin"; + + clock_t t0,t1,t2; + double dt0,dt1; + + printf("Tests of dbuff_rand_dpr32...\n"); + + fp = fopen(fname,"w+"); + if(fp==NULL) + { + printf("Error: Could not open %s for writing.\n",fname); + return; + } + + data.resize(Nx*Ny); + dims.resize(2); + dims[0] = Nx; dims[1] = Ny; + cudaMalloc(&dev_data,Nx*Ny*sizeof(float)); + + t0 = clock(); + dbuff_rand_dpr32(dev_data,Nx*Ny,&rseed,256,512); + t1 = clock(); + cudaMemcpy(data.data,dev_data,Nx*Ny*sizeof(float),cudaMemcpyDeviceToHost); + t2 = clock(); + + dt0 = (double)(t1-t0)/(double)CLOCKS_PER_SEC*1000.0; + dt1 = (double)(t2-t1)/(double)CLOCKS_PER_SEC*1000.0; + printf("dbuff_rand_dpr32 exec time: %1.3f msec\n",dt0); + printf("copy (%d,%d) to host time: %1.3f msec\n",Nx,Ny,dt1); + + fwrite_ndarray(fp,&dims,&data); + + t0 = clock(); + dbuff_rand_dpr32n(dev_data,Nx*Ny,&rseed,256,512); + t1 = clock(); + cudaMemcpy(data.data,dev_data,Nx*Ny*sizeof(float),cudaMemcpyDeviceToHost); + t2 = clock(); + + dt0 = (double)(t1-t0)/(double)CLOCKS_PER_SEC*1000.0; + dt1 = (double)(t2-t1)/(double)CLOCKS_PER_SEC*1000.0; + printf("dbuff_rand_dpr32n exec time: %1.3f msec\n",dt0); + printf("copy (%d,%d) to host time: %1.3f msec\n",Nx,Ny,dt1); + + fwrite_ndarray(fp,&dims,&data); + + fclose(fp); + + cudaFree(dev_data); dev_data = NULL; + +} + +}; + diff --git a/src/amsculib2/amscugeom.cu b/src/amsculib2/amscugeom.cu new file mode 100644 index 0000000..f2d5b97 --- /dev/null +++ b/src/amsculib2/amscugeom.cu @@ -0,0 +1,6 @@ +#include + +namespace amscuda +{ + +}; \ No newline at end of file diff --git a/src/amsculib2/amsculib2.cu b/src/amsculib2/amsculib2.cu new file mode 100644 index 0000000..f2d5b97 --- /dev/null +++ b/src/amsculib2/amsculib2.cu @@ -0,0 +1,6 @@ +#include + +namespace amscuda +{ + +}; \ No newline at end of file diff --git a/src/amsculib2/amscumath.cu b/src/amsculib2/amscumath.cu new file mode 100644 index 0000000..154bbca --- /dev/null +++ b/src/amsculib2/amscumath.cu @@ -0,0 +1,269 @@ +#include + +namespace amscuda +{ + + __host__ __device__ double dabs(double x) + { + if(x<0.0) + { + x = -x; + } + return x; + } + + __host__ __device__ float fabs(float x) + { + if(x<0.0f) + { + x = -x; + } + return x; + } + + __host__ __device__ double mod(double x, double md) + { + x = fmod(x,md); + if(x<0.0) + { + x = x + md; + } + return x; + } + + __host__ __device__ float mod(float x, float md) + { + x = fmodf(x,md); + if(x<0.0f) + { + x = x + md; + } + return x; + } + + __host__ __device__ int mod(int x, int n) + { + x = x % n; + if(x<0) + { + x = x + n; + } + return x; + } + + __host__ __device__ long mod(long x, long n) + { + x = x % n; + if(x<0) + { + x = x + n; + } + return x; + } + + __host__ __device__ int truediv(int x, int y) + { + int z = 0; + if(x>=0 && y>0) + { + z = x/y; + } + else if(x<0 && y>0) + { + z = -((-x)/y) - 1; + } + else if(x>=0 && y<0) + { + z = -(x/(-y)) - 1; + } + else if(x<0 && y<0) + { + z = ((-x)/(-y)); + } + + return z; + } + + __host__ __device__ long truediv(long x, long y) + { + int z = 0; + if(x>=0 && y>0) + { + z = x/y; + } + else if(x<0 && y>0) + { + z = -((-x)/y) - 1; + } + else if(x>=0 && y<0) + { + z = -(x/(-y)) - 1; + } + else if(x<0 && y<0) + { + z = ((-x)/(-y)); + } + + return z; + } + + + template<> __host__ __device__ double min(double a, double b) + { + if(isnan(a)) + { + return b; + } + else if(isnan(b)) + { + return a; + } + else if(a>b) + { + return b; + } + else + { + return a; + } + } + + template<> __host__ __device__ float min(float a, float b) + { + if(isnan(a)) + { + return b; + } + else if(isnan(b)) + { + return a; + } + else if(a>b) + { + return b; + } + else + { + return a; + } + } + + template<> __host__ __device__ double max(double a, double b) + { + if(isnan(a)) + { + return b; + } + else if(isnan(b)) + { + return a; + } + else if(a>b) + { + return a; + } + else + { + return b; + } + } + + template<> __host__ __device__ float max(float a, float b) + { + if(isnan(a)) + { + return b; + } + else if(isnan(b)) + { + return a; + } + else if(a>b) + { + return a; + } + else + { + return b; + } + } + + __device__ __host__ double arg(double x, double y) + { + double ret = 0.0; + double z = ::sqrt(x*x+y*y); + + if(z>0.0) + { + if(y<=x && y>=-x) + { + ret = asin(y/z); + } + else if(y>=x && y>=-x) + { + ret = acos(x/z); + } + else if(y>=x && y<=-x) + { + ret = pi-asin(y/z); + } + else + { + ret = 2.0*pi-acos(x/z); + } + } + + if(ret<0.0) + { + ret = 2.0*pi+ret; + } + + return ret; + } + + __device__ __host__ void get_azel(double x, double y, double z, double *az, double *el) + { + //int ret = -2; //should never see this return + double n, rp; + n = ::sqrt(x*x+y*y+z*z); + if(n>0.0) + { + rp = ::sqrt(x*x+y*y); + if(rp>0.0) + { + //ret = 1; //nonzero vector - should work + *az = arg(x,y); + *el = ::atan(z/rp); + } + else + { + //ret = 0; //straight up or straight down + if(z>0.0) + { + *az = 0.0; + *el = pi/2.0; + } + else + { + *az = 0.0; + *el = -pi/2.0; + } + } + } + else + { + *az = 0.0; + *el = 0.0; + //ret = -1; //zero vector - no real azimuth/elevation + } + + return; + } + + + void test_amscumath1() + { + printf("pi = %1.16f\n",amscuda::pi); + } + +}; \ No newline at end of file diff --git a/src/amsculib2/amscurarray.cu b/src/amsculib2/amscurarray.cu new file mode 100644 index 0000000..8b764ff --- /dev/null +++ b/src/amsculib2/amscurarray.cu @@ -0,0 +1,85 @@ +#include + +namespace amscuda +{ + +__global__ void test_amscurarray1_kf1(curarray *q) +{ + int I,J,Na; + int I0 = threadIdx.x + blockIdx.x*blockDim.x; + int Is = blockDim.x*gridDim.x; + int N = q->Narrays; + + for(I=I0; IN[I]; + + //printf("I:%d Na: %d\n",I,Na); + //q->dev_resizearray(I, Na); + for(J=0;Jdevarrayptrs[I][J] = J; + } + } +} + +void test_amscurarray1() +{ + int I; + cudaError_t err = cudaSuccess; + + printf("test_amscurarray1:\n"); + curarray *qarray = NULL; + + curarray_new(&qarray,100); + err = cudaGetLastError(); + if(err!=cudaSuccess) + { + printf("debug error trap 1: %s\n",cudaGetErrorString(err)); + } + + for(I=0;I<100;I++) + { + qarray->resizearray(I,5); + } + + qarray->push(); + qarray->pull(); + + cuda_errortrap("debug: error trap 2"); + + for(I=0;I<5;I++) + { + printf("array[%d], size %d\n",I,qarray->N[I]); + } + + // + for(I=0;I<100;I++) + { + qarray->resizearray(I,I%5); + cuda_errortrap("debug: error trap resize2"); + } + + qarray->push(); + qarray->pull(); + test_amscurarray1_kf1<<<128,1>>>(qarray->devptr); + + cuda_errortrap("debug: error trap kf1"); + + qarray->pull(); + cuda_errortrap("debug: error trap pull2"); + + + for(I=0;I<5;I++) + { + printf("array[%d], size %d\n",I,qarray->N[I]); + } + + curarray_delete(&qarray); + + cuda_errortrap("debug: error trap 3"); + + return; +} + +}; \ No newline at end of file diff --git a/src/amsculib2/cuvect2.cu b/src/amsculib2/cuvect2.cu new file mode 100644 index 0000000..246eff8 --- /dev/null +++ b/src/amsculib2/cuvect2.cu @@ -0,0 +1,361 @@ +#include + +namespace amscuda +{ + + __host__ __device__ cuvect2::cuvect2() + { + x = 0.0; y = 0.0; + return; + } + + __host__ __device__ cuvect2::~cuvect2() + { + x = 0.0; y = 0.0; + return; + } + + __host__ __device__ cuvect2::cuvect2(double _x, double _y) + { + x = _x; y = _y; + return; + } + + __host__ __device__ double& cuvect2::operator[](const int I) + { + if(I==0) return x; + if(I==1) return y; + return x; + } + + __host__ __device__ const double& cuvect2::operator[](const int I) const + { + if(I==0) return x; + if(I==1) return y; + return x; + } + + __host__ __device__ cuvect2 cuvect2::operator+(cuvect2 lhs) + { + cuvect2 ret; + ret.x = x + lhs.x; + ret.y = y + lhs.y; + return ret; + } + + __host__ __device__ cuvect2 cuvect2::operator-(cuvect2 lhs) + { + cuvect2 ret; + ret.x = x - lhs.x; + ret.y = y - lhs.y; + return ret; + } + __host__ __device__ cuvect2 cuvect2::operator*(double lhs) + { + cuvect2 ret; + ret.x = x*lhs; + ret.y = y*lhs; + return ret; + } + + __host__ __device__ cuvect2 cuvect2::operator/(double lhs) + { + cuvect2 ret; + ret.x = x/lhs; + ret.y = y/lhs; + return ret; + } + + __host__ __device__ double cuvect2_dot(cuvect2 a, cuvect2 b) + { + double ret = a.x*b.x+a.y*b.y; + return ret; + } + + __host__ __device__ double cuvect2_cross(cuvect2 a, cuvect2 b) + { + double ret = a.x*b.y-a.y*b.x; + return ret; + } + + __host__ __device__ double cuvect2_norm(cuvect2 a) + { + double ret = ::sqrt(a.x*a.x+a.y*a.y); + return ret; + } + + __host__ __device__ cuvect2 cuvect2_normalize(cuvect2 a) + { + cuvect2 ret; + double m = cuvect2_norm(a); + if(m>0.0) + { + ret.x = a.x/m; ret.y = a.y/m; + } + else + { + ret.x = 0.0; ret.y = 0.0; + } + return ret; + } + + __host__ __device__ cuvect2 cuvect2_proj(cuvect2 a, cuvect2 b) + { + cuvect2 ret; + cuvect2 bn = cuvect2_normalize(b); + double m = cuvect2_dot(a,bn); + ret = bn*m; + return ret; + } + + +__host__ __device__ cumat2::cumat2() +{ + int I; + for(I=0;I<4;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ cumat2::~cumat2() +{ + int I; + for(I=0;I<4;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ double& cumat2::operator[](const int I) +{ + return dat[I]; +} + +__host__ __device__ double& cumat2::operator()(const int I, const int J) +{ + return dat[I+2*J]; +} + +__host__ __device__ cumat2 cumat2::operator+(cumat2 lhs) +{ + int I; + cumat2 ret; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I] + lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat2 cumat2::operator-(cumat2 lhs) +{ + int I; + cumat2 ret; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I] - lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat2 cumat2::operator*(double lhs) +{ + cumat2 ret; + int I; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I]*lhs; + } + return ret; +} + +__host__ __device__ cumat2 cumat2::operator/(double lhs) +{ + cumat2 ret; + int I; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I]/lhs; + } + return ret; +} + +__host__ __device__ double& cumat2::at(const int I, const int J) +{ + return dat[I+2*J]; +} + + +__host__ __device__ cuvect2 cumat2::operator*(cuvect2 lhs) +{ + cuvect2 ret = cuvect2(0.0,0.0); + int I,J; + for(I=0;I<2;I++) + { + for(J=0;J<2;J++) + { + ret[I] = ret[I] + this->at(I,J)*lhs[J]; + } + } + return ret; +} + +__host__ __device__ cumat2 cumat2::operator*(cumat2 lhs) +{ + cumat2 ret; + int I,J,K; + + for(I=0;I<2;I++) + { + for(J=0;J<2;J++) + { + ret(I,J) = 0.0; + for(K=0;K<2;K++) + { + ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J); + } + } + } + + return ret; +} + +__host__ __device__ double cumat2::det() +{ + double ret = 0.0; + ret = ret + this->at(0,0)*this->at(1,1); + ret = ret - this->at(1,0)*this->at(0,1); + return ret; +} + +__host__ __device__ cumat2 cumat2::transpose() +{ + cumat2 q; + int I,J; + for(I=0;I<2;I++) + { + for(J=0;J<2;J++) + { + q.at(I,J) = this->at(J,I); + } + } + return q; +} + +__host__ __device__ cumat2 cumat2::inverse() +{ + cumat2 q; + double dt = q.det(); + if(dt!=0.0) + { + q(0,0) = this->at(1,1)/dt; + q(0,1) = -this->at(0,1)/dt; + q(1,0) = -this->at(1,0)/dt; + q(1,1) = this->at(0,0)/dt; + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(1,0) = inf; + q(1,1) = inf; + } + + return q; +} + +//2x2 matrix operations +//matrix order is assumed to be mat[I,J] = mat[I+3*J] + +//transpose a 2x2 matrix in place +__host__ __device__ void mat2_transpose(double *mat2inout) +{ + double mat2_in[4]; + + mat2_copy(mat2_in,mat2inout); + mat2inout[0+0*2] = mat2_in[0+0*2]; + mat2inout[1+0*2] = mat2_in[0+1*2]; + mat2inout[0+1*2] = mat2_in[1+0*2]; + mat2inout[1+1*2] = mat2_in[1+1*2]; + + return; +} + +//copies src to dest +__host__ __device__ void mat2_copy(double *mat2_dest, const double *mat2_src) +{ + mat2_dest[0+0*2] = mat2_src[0+0*2]; + mat2_dest[1+0*2] = mat2_src[1+0*2]; + mat2_dest[0+1*2] = mat2_src[0+1*2]; + mat2_dest[1+1*2] = mat2_src[1+1*2]; + + return; +} + +//inverts mat?inout[4] +__host__ __device__ void mat2_inverse(double *mat2inout) +{ + double det = mat2inout[0+0*2]*mat2inout[1+1*2]-mat2inout[0+1*2]*mat2inout[1+0*2]; + double mat2in[4]; + + mat2_copy(mat2in,mat2inout); + mat2inout[0+0*2] = mat2inout[1+1*2]/det; + mat2inout[1+0*2] = -mat2inout[1+0*2]/det; + mat2inout[0+1*2] = -mat2inout[0+1*2]/det; + mat2inout[1+1*2] = mat2inout[0+0*2]/det; + + return; +} + +//rotatin matrix from angle +__host__ __device__ void mat2_rot_from_angle(double angle, double *mat2) +{ + mat2[0+0*2] = ::cos(angle); + mat2[1+0*2] = ::sin(angle); + mat2[0+1*2] = -::sin(angle); + mat2[1+1*2] = ::cos(angle); + return; +} + +//multiplies c = a*b +__host__ __device__ void mat2_mult(double *mat2a, double *mat2b, double *mat2c) +{ + double mat2a_in[4]; + double mat2b_in[4]; + + if(mat2a==NULL || mat2b==NULL || mat2c==NULL) + { + return; + } + + mat2_copy(mat2a_in,mat2a); + mat2_copy(mat2b_in,mat2b); + + mat2c[0+0*2] = mat2a_in[0+0*2]*mat2b_in[0+0*2] + mat2a_in[1+0*2]*mat2b_in[0+1*2]; + mat2c[1+0*2] = mat2a_in[0+0*2]*mat2b_in[1+0*2] + mat2a_in[1+0*2]*mat2b_in[1+1*2]; + mat2c[0+1*2] = mat2a_in[0+1*2]*mat2b_in[0+0*2] + mat2a_in[1+1*2]*mat2b_in[0+1*2]; + mat2c[1+1*2] = mat2a_in[0+1*2]*mat2b_in[1+0*2] + mat2a_in[1+1*2]*mat2b_in[1+1*2]; + + return; +} + +// ret = a*b +__host__ __device__ cuvect2 mat2_mult(double *mat2a, cuvect2 b) +{ + cuvect2 ret; + ret.x = b.x*mat2a[0+0*2] + b.y*mat2a[1+0*2]; + ret.y = b.x*mat2a[0+1*2] + b.y*mat2a[1+1*2]; + return ret; +} + +void test_cuvect2_1() +{ + + + return; +} + +}; \ No newline at end of file diff --git a/src/amsculib2/cuvect2f.cu b/src/amsculib2/cuvect2f.cu new file mode 100644 index 0000000..891d39c --- /dev/null +++ b/src/amsculib2/cuvect2f.cu @@ -0,0 +1,361 @@ +#include + +namespace amscuda +{ + + __host__ __device__ cuvect2f::cuvect2f() + { + x = 0.0; y = 0.0; + return; + } + + __host__ __device__ cuvect2f::~cuvect2f() + { + x = 0.0; y = 0.0; + return; + } + + __host__ __device__ cuvect2f::cuvect2f(float _x, float _y) + { + x = _x; y = _y; + return; + } + + __host__ __device__ float& cuvect2f::operator[](const int I) + { + if(I==0) return x; + if(I==1) return y; + return x; + } + + __host__ __device__ const float& cuvect2f::operator[](const int I) const + { + if(I==0) return x; + if(I==1) return y; + return x; + } + + __host__ __device__ cuvect2f cuvect2f::operator+(cuvect2f lhs) + { + cuvect2f ret; + ret.x = x + lhs.x; + ret.y = y + lhs.y; + return ret; + } + + __host__ __device__ cuvect2f cuvect2f::operator-(cuvect2f lhs) + { + cuvect2f ret; + ret.x = x - lhs.x; + ret.y = y - lhs.y; + return ret; + } + __host__ __device__ cuvect2f cuvect2f::operator*(float lhs) + { + cuvect2f ret; + ret.x = x*lhs; + ret.y = y*lhs; + return ret; + } + + __host__ __device__ cuvect2f cuvect2f::operator/(float lhs) + { + cuvect2f ret; + ret.x = x/lhs; + ret.y = y/lhs; + return ret; + } + + __host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b) + { + float ret = a.x*b.x+a.y*b.y; + return ret; + } + + __host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b) + { + float ret = a.x*b.y-a.y*b.x; + return ret; + } + + __host__ __device__ float cuvect2f_norm(cuvect2f a) + { + float ret = ::sqrtf(a.x*a.x+a.y*a.y); + return ret; + } + + __host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a) + { + cuvect2f ret; + float m = cuvect2f_norm(a); + if(m>0.0) + { + ret.x = a.x/m; ret.y = a.y/m; + } + else + { + ret.x = 0.0; ret.y = 0.0; + } + return ret; + } + + __host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b) + { + cuvect2f ret; + cuvect2f bn = cuvect2f_normalize(b); + float m = cuvect2f_dot(a,bn); + ret = bn*m; + return ret; + } + + +__host__ __device__ cumat2f::cumat2f() +{ + int I; + for(I=0;I<4;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ cumat2f::~cumat2f() +{ + int I; + for(I=0;I<4;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ float& cumat2f::operator[](const int I) +{ + return dat[I]; +} + +__host__ __device__ float& cumat2f::operator()(const int I, const int J) +{ + return dat[I+2*J]; +} + +__host__ __device__ cumat2f cumat2f::operator+(cumat2f lhs) +{ + int I; + cumat2f ret; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I] + lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat2f cumat2f::operator-(cumat2f lhs) +{ + int I; + cumat2f ret; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I] - lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat2f cumat2f::operator*(float lhs) +{ + cumat2f ret; + int I; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I]*lhs; + } + return ret; +} + +__host__ __device__ cumat2f cumat2f::operator/(float lhs) +{ + cumat2f ret; + int I; + for(I=0;I<4;I++) + { + ret.dat[I] = this->dat[I]/lhs; + } + return ret; +} + +__host__ __device__ float& cumat2f::at(const int I, const int J) +{ + return dat[I+2*J]; +} + + +__host__ __device__ cuvect2f cumat2f::operator*(cuvect2f lhs) +{ + cuvect2f ret = cuvect2f(0.0,0.0); + int I,J; + for(I=0;I<2;I++) + { + for(J=0;J<2;J++) + { + ret[I] = ret[I] + this->at(I,J)*lhs[J]; + } + } + return ret; +} + +__host__ __device__ cumat2f cumat2f::operator*(cumat2f lhs) +{ + cumat2f ret; + int I,J,K; + + for(I=0;I<2;I++) + { + for(J=0;J<2;J++) + { + ret(I,J) = 0.0; + for(K=0;K<2;K++) + { + ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J); + } + } + } + + return ret; +} + +__host__ __device__ float cumat2f::det() +{ + float ret = 0.0; + ret = ret + this->at(0,0)*this->at(1,1); + ret = ret - this->at(1,0)*this->at(0,1); + return ret; +} + +__host__ __device__ cumat2f cumat2f::transpose() +{ + cumat2f q; + int I,J; + for(I=0;I<2;I++) + { + for(J=0;J<2;J++) + { + q.at(I,J) = this->at(J,I); + } + } + return q; +} + +__host__ __device__ cumat2f cumat2f::inverse() +{ + cumat2f q; + float dt = q.det(); + if(dt!=0.0) + { + q(0,0) = this->at(1,1)/dt; + q(0,1) = -this->at(0,1)/dt; + q(1,0) = -this->at(1,0)/dt; + q(1,1) = this->at(0,0)/dt; + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(1,0) = inf; + q(1,1) = inf; + } + + return q; +} + +//2x2 matrix operations +//matrix order is assumed to be mat[I,J] = mat[I+3*J] + +//transpose a 2x2 matrix in place +__host__ __device__ void mat2f_transpose(float *mat2inout) +{ + float mat2f_in[4]; + + mat2f_copy(mat2f_in,mat2inout); + mat2inout[0+0*2] = mat2f_in[0+0*2]; + mat2inout[1+0*2] = mat2f_in[0+1*2]; + mat2inout[0+1*2] = mat2f_in[1+0*2]; + mat2inout[1+1*2] = mat2f_in[1+1*2]; + + return; +} + +//copies src to dest +__host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src) +{ + mat2f_dest[0+0*2] = mat2f_src[0+0*2]; + mat2f_dest[1+0*2] = mat2f_src[1+0*2]; + mat2f_dest[0+1*2] = mat2f_src[0+1*2]; + mat2f_dest[1+1*2] = mat2f_src[1+1*2]; + + return; +} + +//inverts mat?inout[4] +__host__ __device__ void mat2f_inverse(float *mat2inout) +{ + float det = mat2inout[0+0*2]*mat2inout[1+1*2]-mat2inout[0+1*2]*mat2inout[1+0*2]; + float mat2in[4]; + + mat2f_copy(mat2in,mat2inout); + mat2inout[0+0*2] = mat2inout[1+1*2]/det; + mat2inout[1+0*2] = -mat2inout[1+0*2]/det; + mat2inout[0+1*2] = -mat2inout[0+1*2]/det; + mat2inout[1+1*2] = mat2inout[0+0*2]/det; + + return; +} + +//rotatin matrix from angle +__host__ __device__ void mat2f_rot_from_angle(float angle, float *mat2) +{ + mat2[0+0*2] = ::cosf(angle); + mat2[1+0*2] = ::sinf(angle); + mat2[0+1*2] = -::sinf(angle); + mat2[1+1*2] = ::cosf(angle); + return; +} + +//multiplies c = a*b +__host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c) +{ + float mat2a_in[4]; + float mat2b_in[4]; + + if(mat2a==NULL || mat2b==NULL || mat2c==NULL) + { + return; + } + + mat2f_copy(mat2a_in,mat2a); + mat2f_copy(mat2b_in,mat2b); + + mat2c[0+0*2] = mat2a_in[0+0*2]*mat2b_in[0+0*2] + mat2a_in[1+0*2]*mat2b_in[0+1*2]; + mat2c[1+0*2] = mat2a_in[0+0*2]*mat2b_in[1+0*2] + mat2a_in[1+0*2]*mat2b_in[1+1*2]; + mat2c[0+1*2] = mat2a_in[0+1*2]*mat2b_in[0+0*2] + mat2a_in[1+1*2]*mat2b_in[0+1*2]; + mat2c[1+1*2] = mat2a_in[0+1*2]*mat2b_in[1+0*2] + mat2a_in[1+1*2]*mat2b_in[1+1*2]; + + return; +} + +// ret = a*b +__host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b) +{ + cuvect2f ret; + ret.x = b.x*mat2a[0+0*2] + b.y*mat2a[1+0*2]; + ret.y = b.x*mat2a[0+1*2] + b.y*mat2a[1+1*2]; + return ret; +} + +void test_cuvect2f_1() +{ + + + return; +} + +}; \ No newline at end of file diff --git a/src/amsculib2/cuvect3.cu b/src/amsculib2/cuvect3.cu new file mode 100644 index 0000000..a4858c7 --- /dev/null +++ b/src/amsculib2/cuvect3.cu @@ -0,0 +1,581 @@ +#include + +namespace amscuda +{ + + __host__ __device__ cuvect3::cuvect3() + { + x = 0.0; y = 0.0; z = 0.0; + return; + } + + __host__ __device__ cuvect3::~cuvect3() + { + x = 0.0; y = 0.0; z = 0.0; + return; + } + + __host__ __device__ double& cuvect3::operator[](const int I) + { + if(I==0) return x; + if(I==1) return y; + if(I==2) return z; + return x; + } + + __host__ __device__ const double& cuvect3::operator[](const int I) const + { + if(I==0) return x; + if(I==1) return y; + if(I==2) return z; + return x; + } + + __host__ __device__ cuvect3 cuvect3::operator+(cuvect3 lhs) + { + cuvect3 ret; + ret.x = x+lhs.x; + ret.y = y+lhs.y; + ret.z = z+lhs.z; + + return ret; + } + + __host__ __device__ cuvect3 cuvect3::operator-(cuvect3 lhs) + { + cuvect3 ret; + ret.x = x-lhs.x; + ret.y = y-lhs.y; + ret.z = z-lhs.z; + + return ret; + } + + __host__ __device__ cuvect3 cuvect3::operator*(double lhs) + { + cuvect3 ret; + ret.x = x*lhs; + ret.y = y*lhs; + ret.z = z*lhs; + return ret; + } + + __host__ __device__ cuvect3 cuvect3::operator/(double lhs) + { + cuvect3 ret; + ret.x = x/lhs; + ret.y = y/lhs; + ret.z = z/lhs; + return ret; + } + + __host__ __device__ cuvect3::cuvect3(double _x, double _y, double _z) + { + x = _x; y = _y; z = _z; + return; + } + + + __host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b) + { + double ret = a.x*b.x+a.y*b.y+a.z*b.z; + + return ret; + } + + __host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b) + { + cuvect3 ret; + ret[0] = a[1]*b[2]-a[2]*b[1]; + ret[1] = a[2]*b[0]-a[0]*b[2]; + ret[2] = a[0]*b[1]-a[1]*b[0]; + + return ret; + } + + __host__ __device__ double cuvect3_norm(cuvect3 a) + { + double ret; + ret = ::sqrt(a.x*a.x+a.y*a.y+a.z*a.z); + return ret; + } + + __host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a) + { + cuvect3 ret; + double m; + m = ::sqrt(a.x*a.x+a.y*a.y+a.z*a.z); + if(m>0.0) + { + ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m; + } + else + { + ret.x = 0.0; ret.y = 0.0; ret.z = 0.0; + } + + return ret; + } + + __host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b) + { + cuvect3 ret; + cuvect3 bn = cuvect3_normalize(b); + double m = cuvect3_dot(a,bn); + ret = bn*m; + return ret; + } + +//transposes a 3x3 (9 element) matrix +__host__ __device__ void mat3_transpose(double *mat3inout) +{ + int I,J; + double matint[9]; + for(I=0;I<9;I++) + { + matint[I] = mat3inout[I]; + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + mat3inout[I+J*3] = matint[J+I*3]; + } + } + + return; +} + +//copies src to dest +__host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src) +{ + int I; + if(mat3_dest==NULL || mat3_src==NULL) + return; + + for(I=0;I<9;I++) + mat3_dest[I] = mat3_src[I]; + + return; +} + + +__host__ __device__ double mat3_det(double *mat3in) +{ + double ret = 0.0; + + ret = ret + mat3in[0+0*3]*mat3in[1+1*3]*mat3in[2+2*3]; + ret = ret + mat3in[0+1*3]*mat3in[1+2*3]*mat3in[2+0*3]; + ret = ret + mat3in[0+2*3]*mat3in[1+0*3]*mat3in[2+1*3]; + ret = ret - mat3in[0+0*3]*mat3in[1+2*3]*mat3in[2+1*3]; + ret = ret - mat3in[0+1*3]*mat3in[1+0*3]*mat3in[2+2*3]; + ret = ret - mat3in[0+2*3]*mat3in[1+1*3]*mat3in[2+0*3]; + + return ret; +} + +//inverts a 3x3 (9 element) matrix +__host__ __device__ void mat3_inverse(double *mat3inout) +{ + int I; + double matint[9]; + double det = mat3_det(mat3inout); + + for(I=0;I<9;I++) + { + matint[I] = mat3inout[I]; + } + + mat3inout[0+0*3] = (matint[1+1*3]*matint[2+2*3]-matint[1+2*3]*matint[2+1*3])/det; + mat3inout[0+1*3] = -(matint[1+0*3]*matint[2+2*3]-matint[1+2*3]*matint[2+0*3])/det; + mat3inout[0+2*3] = (matint[1+0*3]*matint[2+1*3]-matint[1+1*3]*matint[2+0*3])/det; + mat3inout[1+0*3] = -(matint[0+1*3]*matint[2+2*3]-matint[0+2*3]*matint[2+1*3])/det; + mat3inout[1+1*3] = (matint[0+0*3]*matint[2+2*3]-matint[0+2*3]*matint[2+0*3])/det; + mat3inout[1+2*3] = -(matint[0+0*3]*matint[2+1*3]-matint[0+1*3]*matint[2+0*3])/det; + mat3inout[2+0*3] = (matint[0+1*3]*matint[1+2*3]-matint[0+2*3]*matint[1+1*3])/det; + mat3inout[2+1*3] = -(matint[0+0*3]*matint[1+2*3]-matint[0+2*3]*matint[1+0*3])/det; + mat3inout[2+2*3] = (matint[0+0*3]*matint[1+1*3]-matint[0+1*3]*matint[1+0*3])/det; + + mat3_transpose(mat3inout); + + return; +} + +__host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin) +{ + int I,J; + cuvect3 ret; + for(I=0;I<3;I++) + { + ret[I] = 0.0; + for(J=0;J<3;J++) + { + ret[I] = ret[I] + mat3in[I+3*J]*cvin[J]; + } + } + + return ret; +} + +__host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout) +{ + double wrk[9]; + int I,J,K; + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + wrk[I+3*J] = 0.0; + } + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + for(K=0;K<3;K++) + { + wrk[I+3*K] = wrk[I+3*K] + matina[I+3*J]*matinb[J+3*K]; + } + } + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + matout[I+3*J] = wrk[I+3*J]; + } + } + + return; +} + +__host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout) +{ + matout[0 + 0*3] = 0.0f; + matout[1 + 0*3] = -vecin[2]; + matout[2 + 0*3] = vecin[1]; + + matout[0 + 1*3] = vecin[2]; + matout[1 + 1*3] = 0.0f; + matout[2 + 1*3] = -vecin[0]; + + matout[0 + 2*3] = -vecin[1]; + matout[1 + 2*3] = vecin[0]; + matout[2 + 2*3] = 0.0f; + return; +} + +__host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout) +{ + vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]); + vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]); + vecout[2] = 0.5*(matin[0 + 1*3] - matin[1 + 0*3]); + + return; +} + +//returns direction cosine rotation matrix from axis and angle +__host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout) +{ + int I; + double H[9]; + double Hsq[9]; + double II[9]; + + for(I=0;I<9;I++) II[I] = 0.0; + II[0+0*3] = 1.0; + II[1+1*3] = 1.0; + II[2+2*3] = 1.0; + + axis = cuvect3_normalize(axis); + + mat3_hodgedual(axis,H); + mat3_mult(H,H,Hsq); + + for(I=0;I<9;I++) + { + matout[I] = (II[I] + Hsq[I]) + H[I]*sin(angle) - Hsq[I]*cos(angle); + } + + return; +} + + + +__host__ void test_cudavect_logic1() +{ + //3 dim vector and matrix functional tests on host side + + printf("3 dim vector and matrix functional tests on host side\n"); + + cuvect3 a,b,c; + double ma[9],mb[9],mc[9]; + + int I,J; + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + ma[I+3*J] = ((double) rand())/((double) RAND_MAX); + mb[I+3*J] = ma[I+3*J]; + } + } + + mat3_inverse(mb); + mat3_mult(ma,mb,mc); + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + printf("ma[%d,%d] = %1.3f\n",I,J,ma[I+3*J]); + } + } + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + printf("mb[%d,%d] = %1.3f\n",I,J,mb[I+3*J]); + } + } + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + printf("mc[%d,%d] = %1.3f\n",I,J,mc[I+3*J]); + } + } + + a = cuvect3(1,1,1); + b = mat3_mult(ma,a); + b = mat3_mult(mb,b); + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f\n",I,a[I],I,b[I]); + } + + a = cuvect3(1,0,1); + b = cuvect3(0,1,-1); + c = a+b; + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + + c = c/2.0; + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + + c = cuvect3_cross(a,b); + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + + printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3_dot(c,a),cuvect3_dot(c,b)); + + printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3_norm(a),cuvect3_norm(b),cuvect3_norm(c)); + a = cuvect3_normalize(a); + b = cuvect3_normalize(b); + c = cuvect3_normalize(c); + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3_dot(c,a),cuvect3_dot(c,b)); + printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3_norm(a),cuvect3_norm(b),cuvect3_norm(c)); + + return; +} + + + +__host__ __device__ cumat3::cumat3() +{ + int I; + for(I=0;I<9;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ cumat3::~cumat3() +{ + int I; + for(I=0;I<9;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ double& cumat3::operator[](const int I) +{ + return dat[I]; +} + +__host__ __device__ double& cumat3::operator()(const int I, const int J) +{ + return dat[I+3*J]; +} + +__host__ __device__ cumat3 cumat3::operator+(cumat3 lhs) +{ + int I; + cumat3 ret; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I] + lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat3 cumat3::operator-(cumat3 lhs) +{ + int I; + cumat3 ret; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I] - lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat3 cumat3::operator*(double lhs) +{ + cumat3 ret; + int I; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I]*lhs; + } + return ret; +} + +__host__ __device__ cumat3 cumat3::operator/(double lhs) +{ + cumat3 ret; + int I; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I]/lhs; + } + return ret; +} + +__host__ __device__ double& cumat3::at(const int I, const int J) +{ + return dat[I+3*J]; +} + + +__host__ __device__ cuvect3 cumat3::operator*(cuvect3 lhs) +{ + cuvect3 ret = cuvect3(0.0,0.0,0.0); + int I,J; + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + ret[I] = ret[I] + this->at(I,J)*lhs[J]; + } + } + return ret; +} + +__host__ __device__ cumat3 cumat3::operator*(cumat3 lhs) +{ + cumat3 ret; + int I,J,K; + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + ret(I,J) = 0.0; + for(K=0;K<3;K++) + { + ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J); + } + } + } + + return ret; +} + +__host__ __device__ double cumat3::det() +{ + double ret = 0.0; + + ret = ret + this->at(0,0)*this->at(1,1)*this->at(2,2); + ret = ret + this->at(0,1)*this->at(1,2)*this->at(2,0); + ret = ret + this->at(0,2)*this->at(1,0)*this->at(2,1); + ret = ret - this->at(0,0)*this->at(1,2)*this->at(2,1); + ret = ret - this->at(0,1)*this->at(1,0)*this->at(2,2); + ret = ret - this->at(0,2)*this->at(1,1)*this->at(2,0); + + return ret; +} + +__host__ __device__ cumat3 cumat3::transpose() +{ + cumat3 q; + int I,J; + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + q.at(I,J) = this->at(J,I); + } + } + return q; +} + +__host__ __device__ cumat3 cumat3::inverse() +{ + cumat3 q; + double dt = q.det(); + if(dt!=0.0) + { + q(0,0) = (this->at(1,1)*this->at(2,2)-this->at(1,2)*this->at(2,1))/dt; + q(0,1) = -(this->at(1,0)*this->at(2,2)-this->at(1,2)*this->at(2,0))/dt; + q(0,2) = (this->at(1,0)*this->at(2,1)-this->at(1,1)*this->at(2,0))/dt; + q(1,0) = -(this->at(0,1)*this->at(2,2)-this->at(0,2)*this->at(2,1))/dt; + q(1,1) = (this->at(0,0)*this->at(2,2)-this->at(0,2)*this->at(2,0))/dt; + q(1,2) = -(this->at(0,0)*this->at(2,1)-this->at(0,1)*this->at(2,0))/dt; + q(2,0) = (this->at(0,1)*this->at(1,2)-this->at(0,2)*this->at(1,1))/dt; + q(2,1) = -(this->at(0,0)*this->at(1,2)-this->at(0,2)*this->at(1,0))/dt; + q(2,2) = (this->at(0,0)*this->at(1,1)-this->at(0,1)*this->at(1,0))/dt; + + q = q.transpose(); + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(0,2) = inf; + q(1,0) = inf; + q(1,1) = inf; + q(1,2) = inf; + q(2,0) = inf; + q(2,1) = inf; + q(2,2) = inf; + } + + return q; +} + +}; \ No newline at end of file diff --git a/src/amsculib2/cuvect3f.cu b/src/amsculib2/cuvect3f.cu new file mode 100644 index 0000000..55bf631 --- /dev/null +++ b/src/amsculib2/cuvect3f.cu @@ -0,0 +1,581 @@ +#include + +namespace amscuda +{ + + __host__ __device__ cuvect3f::cuvect3f() + { + x = 0.0; y = 0.0; z = 0.0; + return; + } + + __host__ __device__ cuvect3f::~cuvect3f() + { + x = 0.0; y = 0.0; z = 0.0; + return; + } + + __host__ __device__ float& cuvect3f::operator[](const int I) + { + if(I==0) return x; + if(I==1) return y; + if(I==2) return z; + return x; + } + + __host__ __device__ const float& cuvect3f::operator[](const int I) const + { + if(I==0) return x; + if(I==1) return y; + if(I==2) return z; + return x; + } + + __host__ __device__ cuvect3f cuvect3f::operator+(cuvect3f lhs) + { + cuvect3f ret; + ret.x = x+lhs.x; + ret.y = y+lhs.y; + ret.z = z+lhs.z; + + return ret; + } + + __host__ __device__ cuvect3f cuvect3f::operator-(cuvect3f lhs) + { + cuvect3f ret; + ret.x = x-lhs.x; + ret.y = y-lhs.y; + ret.z = z-lhs.z; + + return ret; + } + + __host__ __device__ cuvect3f cuvect3f::operator*(float lhs) + { + cuvect3f ret; + ret.x = x*lhs; + ret.y = y*lhs; + ret.z = z*lhs; + return ret; + } + + __host__ __device__ cuvect3f cuvect3f::operator/(float lhs) + { + cuvect3f ret; + ret.x = x/lhs; + ret.y = y/lhs; + ret.z = z/lhs; + return ret; + } + + __host__ __device__ cuvect3f::cuvect3f(float _x, float _y, float _z) + { + x = _x; y = _y; z = _z; + return; + } + + + __host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b) + { + float ret = a.x*b.x+a.y*b.y+a.z*b.z; + + return ret; + } + + __host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b) + { + cuvect3f ret; + ret[0] = a[1]*b[2]-a[2]*b[1]; + ret[1] = a[2]*b[0]-a[0]*b[2]; + ret[2] = a[0]*b[1]-a[1]*b[0]; + + return ret; + } + + __host__ __device__ float cuvect3f_norm(cuvect3f a) + { + float ret; + ret = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + return ret; + } + + __host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a) + { + cuvect3f ret; + float m; + m = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); + if(m>0.0) + { + ret.x = a.x/m; ret.y = a.y/m; ret.z = a.z/m; + } + else + { + ret.x = 0.0; ret.y = 0.0; ret.z = 0.0; + } + + return ret; + } + + __host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b) + { + cuvect3f ret; + cuvect3f bn = cuvect3f_normalize(b); + float m = cuvect3f_dot(a,bn); + ret = bn*m; + return ret; + } + +//transposes a 3x3 (9 element) matrix +__host__ __device__ void mat3f_transpose(float *mat3inout) +{ + int I,J; + float matint[9]; + for(I=0;I<9;I++) + { + matint[I] = mat3inout[I]; + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + mat3inout[I+J*3] = matint[J+I*3]; + } + } + + return; +} + +//copies src to dest +__host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src) +{ + int I; + if(mat3f_dest==NULL || mat3f_src==NULL) + return; + + for(I=0;I<9;I++) + mat3f_dest[I] = mat3f_src[I]; + + return; +} + + +__host__ __device__ float mat3f_det(float *mat3in) +{ + float ret = 0.0; + + ret = ret + mat3in[0+0*3]*mat3in[1+1*3]*mat3in[2+2*3]; + ret = ret + mat3in[0+1*3]*mat3in[1+2*3]*mat3in[2+0*3]; + ret = ret + mat3in[0+2*3]*mat3in[1+0*3]*mat3in[2+1*3]; + ret = ret - mat3in[0+0*3]*mat3in[1+2*3]*mat3in[2+1*3]; + ret = ret - mat3in[0+1*3]*mat3in[1+0*3]*mat3in[2+2*3]; + ret = ret - mat3in[0+2*3]*mat3in[1+1*3]*mat3in[2+0*3]; + + return ret; +} + +//inverts a 3x3 (9 element) matrix +__host__ __device__ void mat3f_inverse(float *mat3inout) +{ + int I; + float matint[9]; + float det = mat3f_det(mat3inout); + + for(I=0;I<9;I++) + { + matint[I] = mat3inout[I]; + } + + mat3inout[0+0*3] = (matint[1+1*3]*matint[2+2*3]-matint[1+2*3]*matint[2+1*3])/det; + mat3inout[0+1*3] = -(matint[1+0*3]*matint[2+2*3]-matint[1+2*3]*matint[2+0*3])/det; + mat3inout[0+2*3] = (matint[1+0*3]*matint[2+1*3]-matint[1+1*3]*matint[2+0*3])/det; + mat3inout[1+0*3] = -(matint[0+1*3]*matint[2+2*3]-matint[0+2*3]*matint[2+1*3])/det; + mat3inout[1+1*3] = (matint[0+0*3]*matint[2+2*3]-matint[0+2*3]*matint[2+0*3])/det; + mat3inout[1+2*3] = -(matint[0+0*3]*matint[2+1*3]-matint[0+1*3]*matint[2+0*3])/det; + mat3inout[2+0*3] = (matint[0+1*3]*matint[1+2*3]-matint[0+2*3]*matint[1+1*3])/det; + mat3inout[2+1*3] = -(matint[0+0*3]*matint[1+2*3]-matint[0+2*3]*matint[1+0*3])/det; + mat3inout[2+2*3] = (matint[0+0*3]*matint[1+1*3]-matint[0+1*3]*matint[1+0*3])/det; + + mat3f_transpose(mat3inout); + + return; +} + +__host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin) +{ + int I,J; + cuvect3f ret; + for(I=0;I<3;I++) + { + ret[I] = 0.0; + for(J=0;J<3;J++) + { + ret[I] = ret[I] + mat3in[I+3*J]*cvin[J]; + } + } + + return ret; +} + +__host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout) +{ + float wrk[9]; + int I,J,K; + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + wrk[I+3*J] = 0.0; + } + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + for(K=0;K<3;K++) + { + wrk[I+3*K] = wrk[I+3*K] + matina[I+3*J]*matinb[J+3*K]; + } + } + } + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + matout[I+3*J] = wrk[I+3*J]; + } + } + + return; +} + +__host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout) +{ + matout[0 + 0*3] = 0.0f; + matout[1 + 0*3] = -vecin[2]; + matout[2 + 0*3] = vecin[1]; + + matout[0 + 1*3] = vecin[2]; + matout[1 + 1*3] = 0.0f; + matout[2 + 1*3] = -vecin[0]; + + matout[0 + 2*3] = -vecin[1]; + matout[1 + 2*3] = vecin[0]; + matout[2 + 2*3] = 0.0f; + return; +} + +__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout) +{ + vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]); + vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]); + vecout[2] = 0.5*(matin[0 + 1*3] - matin[1 + 0*3]); + + return; +} + +//returns direction cosine rotation matrix from axis and angle +__host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout) +{ + int I; + float H[9]; + float Hsq[9]; + float II[9]; + + for(I=0;I<9;I++) II[I] = 0.0; + II[0+0*3] = 1.0; + II[1+1*3] = 1.0; + II[2+2*3] = 1.0; + + axis = cuvect3f_normalize(axis); + + mat3f_hodgedual(axis,H); + mat3f_mult(H,H,Hsq); + + for(I=0;I<9;I++) + { + matout[I] = (II[I] + Hsq[I]) + H[I]*sinf(angle) - Hsq[I]*cosf(angle); + } + + return; +} + + + +__host__ void test_cudavectf_logic1() +{ + //3 dim vector and matrix functional tests on host side + + printf("3 dim vector and matrix functional tests on host side\n"); + + cuvect3f a,b,c; + float ma[9],mb[9],mc[9]; + + int I,J; + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + ma[I+3*J] = ((float) rand())/((float) RAND_MAX); + mb[I+3*J] = ma[I+3*J]; + } + } + + mat3f_inverse(mb); + mat3f_mult(ma,mb,mc); + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + printf("ma[%d,%d] = %1.3f\n",I,J,ma[I+3*J]); + } + } + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + printf("mb[%d,%d] = %1.3f\n",I,J,mb[I+3*J]); + } + } + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + printf("mc[%d,%d] = %1.3f\n",I,J,mc[I+3*J]); + } + } + + a = cuvect3f(1,1,1); + b = mat3f_mult(ma,a); + b = mat3f_mult(mb,b); + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f\n",I,a[I],I,b[I]); + } + + a = cuvect3f(1,0,1); + b = cuvect3f(0,1,-1); + c = a+b; + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + + c = c/2.0; + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + + c = cuvect3f_cross(a,b); + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + + printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3f_dot(c,a),cuvect3f_dot(c,b)); + + printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3f_norm(a),cuvect3f_norm(b),cuvect3f_norm(c)); + a = cuvect3f_normalize(a); + b = cuvect3f_normalize(b); + c = cuvect3f_normalize(c); + + for(I=0;I<3;I++) + { + printf("a[%d] = %1.3f, b[%d] = %1.3f, c[%d] = %1.3f\n",I,a[I],I,b[I],I,c[I]); + } + printf("c dot a = %1.3f, c dot b = %1.3f\n",cuvect3f_dot(c,a),cuvect3f_dot(c,b)); + printf("norm(a)=%1.3f, norm(b)=%1.3f, norm(c)=%1.3f\n",cuvect3f_norm(a),cuvect3f_norm(b),cuvect3f_norm(c)); + + return; +} + + + +__host__ __device__ cumat3f::cumat3f() +{ + int I; + for(I=0;I<9;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ cumat3f::~cumat3f() +{ + int I; + for(I=0;I<9;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ float& cumat3f::operator[](const int I) +{ + return dat[I]; +} + +__host__ __device__ float& cumat3f::operator()(const int I, const int J) +{ + return dat[I+3*J]; +} + +__host__ __device__ cumat3f cumat3f::operator+(cumat3f lhs) +{ + int I; + cumat3f ret; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I] + lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator-(cumat3f lhs) +{ + int I; + cumat3f ret; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I] - lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator*(float lhs) +{ + cumat3f ret; + int I; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I]*lhs; + } + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator/(float lhs) +{ + cumat3f ret; + int I; + for(I=0;I<9;I++) + { + ret.dat[I] = this->dat[I]/lhs; + } + return ret; +} + +__host__ __device__ float& cumat3f::at(const int I, const int J) +{ + return dat[I+3*J]; +} + + +__host__ __device__ cuvect3f cumat3f::operator*(cuvect3f lhs) +{ + cuvect3f ret = cuvect3f(0.0,0.0,0.0); + int I,J; + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + ret[I] = ret[I] + this->at(I,J)*lhs[J]; + } + } + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator*(cumat3f lhs) +{ + cumat3f ret; + int I,J,K; + + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + ret(I,J) = 0.0; + for(K=0;K<3;K++) + { + ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J); + } + } + } + + return ret; +} + +__host__ __device__ float cumat3f::det() +{ + float ret = 0.0; + + ret = ret + this->at(0,0)*this->at(1,1)*this->at(2,2); + ret = ret + this->at(0,1)*this->at(1,2)*this->at(2,0); + ret = ret + this->at(0,2)*this->at(1,0)*this->at(2,1); + ret = ret - this->at(0,0)*this->at(1,2)*this->at(2,1); + ret = ret - this->at(0,1)*this->at(1,0)*this->at(2,2); + ret = ret - this->at(0,2)*this->at(1,1)*this->at(2,0); + + return ret; +} + +__host__ __device__ cumat3f cumat3f::transpose() +{ + cumat3f q; + int I,J; + for(I=0;I<3;I++) + { + for(J=0;J<3;J++) + { + q.at(I,J) = this->at(J,I); + } + } + return q; +} + +__host__ __device__ cumat3f cumat3f::inverse() +{ + cumat3f q; + float dt = q.det(); + if(dt!=0.0) + { + q(0,0) = (this->at(1,1)*this->at(2,2)-this->at(1,2)*this->at(2,1))/dt; + q(0,1) = -(this->at(1,0)*this->at(2,2)-this->at(1,2)*this->at(2,0))/dt; + q(0,2) = (this->at(1,0)*this->at(2,1)-this->at(1,1)*this->at(2,0))/dt; + q(1,0) = -(this->at(0,1)*this->at(2,2)-this->at(0,2)*this->at(2,1))/dt; + q(1,1) = (this->at(0,0)*this->at(2,2)-this->at(0,2)*this->at(2,0))/dt; + q(1,2) = -(this->at(0,0)*this->at(2,1)-this->at(0,1)*this->at(2,0))/dt; + q(2,0) = (this->at(0,1)*this->at(1,2)-this->at(0,2)*this->at(1,1))/dt; + q(2,1) = -(this->at(0,0)*this->at(1,2)-this->at(0,2)*this->at(1,0))/dt; + q(2,2) = (this->at(0,0)*this->at(1,1)-this->at(0,1)*this->at(1,0))/dt; + + q = q.transpose(); + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(0,2) = inf; + q(1,0) = inf; + q(1,1) = inf; + q(1,2) = inf; + q(2,0) = inf; + q(2,1) = inf; + q(2,2) = inf; + } + + return q; +} + +}; \ No newline at end of file diff --git a/src/amsculib2/cuvect4.cu b/src/amsculib2/cuvect4.cu new file mode 100644 index 0000000..f6bced8 --- /dev/null +++ b/src/amsculib2/cuvect4.cu @@ -0,0 +1,414 @@ +#include + +namespace amscuda +{ + +__host__ __device__ cuvect4::cuvect4() +{ + x = 0.0; y = 0.0; z = 0.0; w = 0.0; + return; +} + +__host__ __device__ cuvect4::~cuvect4() +{ + x = 0.0; y = 0.0; z = 0.0; w = 0.0; + return; +} + +__host__ __device__ cuvect4::cuvect4(double _x, double _y, double _z, double _w) +{ + x = _x; y = _y; z = _z; w = _w; + return; +} + +__host__ __device__ double& cuvect4::operator[](const int I) +{ + if(I==0) return x; + else if(I==1) return y; + else if(I==2) return z; + else if(I==3) return w; + return x; +} + +__host__ __device__ const double& cuvect4::operator[](const int I) const +{ + if(I==0) return x; + else if(I==1) return y; + else if(I==2) return z; + else if(I==3) return w; + return x; +} + +__host__ __device__ cuvect4 cuvect4::operator+(cuvect4 lhs) +{ + cuvect4 ret; + ret.x = this->x + lhs.x; + ret.y = this->y + lhs.y; + ret.z = this->z + lhs.z; + ret.w = this->w + lhs.w; + return ret; +} + +__host__ __device__ cuvect4 cuvect4::operator-(cuvect4 lhs) +{ + cuvect4 ret; + ret.x = this->x - lhs.x; + ret.y = this->y - lhs.y; + ret.z = this->z - lhs.z; + ret.w = this->w - lhs.w; + return ret; +} + +__host__ __device__ cuvect4 cuvect4::operator*(double lhs) +{ + cuvect4 ret; + ret.x = this->x*lhs; + ret.y = this->y*lhs; + ret.z = this->z*lhs; + ret.w = this->w*lhs; + return ret; +} + +__host__ __device__ cuvect4 cuvect4::operator/(double lhs) +{ + cuvect4 ret; + ret.x = this->x/lhs; + ret.y = this->y/lhs; + ret.z = this->z/lhs; + ret.w = this->w/lhs; + return ret; +} + +__host__ __device__ cumat4::cumat4() +{ + int I; + for(I=0;I<16;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ cumat4::~cumat4() +{ + int I; + for(I=0;I<16;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ double& cumat4::operator[](const int I) +{ + return dat[I]; +} + +__host__ __device__ double& cumat4::operator()(const int I, const int J) +{ + return dat[I+4*J]; +} + +__host__ __device__ double& cumat4::at(const int I, const int J) +{ + return dat[I+4*J]; +} + +__host__ __device__ cumat4 cumat4::operator+(cumat4 lhs) +{ + cumat4 ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I] + lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat4 cumat4::operator-(cumat4 lhs) +{ + cumat4 ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I] - lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat4 cumat4::operator*(double lhs) +{ + cumat4 ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I]*lhs; + } + return ret; +} + +__host__ __device__ cumat4 cumat4::operator/(double lhs) +{ + cumat4 ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I]/lhs; + } + return ret; +} + +__host__ __device__ cuvect4 cumat4::operator*(cuvect4 lhs) +{ + cuvect4 ret = cuvect4(0.0,0.0,0.0,0.0); + int I,J; + + for(I=0;I<4;I++) + { + for(J=0;J<4;J++) + { + ret[I] = ret[I] + this->at(I,J)*lhs[J]; + } + } + + return ret; +} + +__host__ __device__ cumat4 cumat4::operator*(cumat4 lhs) +{ + cumat4 ret; + int I,J,K; + for(I=0;I<4;I++) + { + for(J=0;J<4;J++) + { + ret(I,J) = 0; + for(K=0;K<4;K++) + { + ret(I,J) = ret(I,J) + this->at(I,K) * lhs(K,J); + } + } + } + return ret; +} + +__host__ __device__ cumat4 cumat4::transpose() +{ + cumat4 q; + int I,J; + for(I=0;I<4;I++) + { + for(J=0;J<4;J++) + { + q(I,J) = this->at(J,I); + } + } + return q; +} + +__host__ __device__ double cumat4::det() +{ + double a00,a01,a02,a03; + double a10,a11,a12,a13; + double a20,a21,a22,a23; + double a30,a31,a32,a33; + double det; + + a00 = this->at(0,0); + a01 = this->at(0,1); + a02 = this->at(0,2); + a03 = this->at(0,3); + a10 = this->at(1,0); + a11 = this->at(1,1); + a12 = this->at(1,2); + a13 = this->at(1,3); + a20 = this->at(2,0); + a21 = this->at(2,1); + a22 = this->at(2,2); + a23 = this->at(2,3); + a30 = this->at(3,0); + a31 = this->at(3,1); + a32 = this->at(3,2); + a33 = this->at(3,3); + + det = a03*a12*a21*a30 - + a02*a13*a21*a30 - + a03*a11*a22*a30 + + a01*a13*a22*a30 + + a02*a11*a23*a30 - + a01*a12*a23*a30 - + a03*a12*a20*a31 + + a02*a13*a20*a31 + + a03*a10*a22*a31 - + a00*a13*a22*a31 - + a02*a10*a23*a31 + + a00*a12*a23*a31 + + a03*a11*a20*a32 - + a01*a13*a20*a32 - + a03*a10*a21*a32 + + a00*a13*a21*a32 + + a01*a10*a23*a32 - + a00*a11*a23*a32 - + a02*a11*a20*a33 + + a01*a12*a20*a33 + + a02*a10*a21*a33 - + a00*a12*a21*a33 - + a01*a10*a22*a33 + + a00*a11*a22*a33; + + return det; +} + +__host__ __device__ cumat4 minverse(cumat4 ma) +{ + cumat4 mb; + + double a00,a01,a02,a03; + double a10,a11,a12,a13; + double a20,a21,a22,a23; + double a30,a31,a32,a33; + + double b00,b01,b02,b03; + double b10,b11,b12,b13; + double b20,b21,b22,b23; + double b30,b31,b32,b33; + + double det = 0.0; + + a00 = ma.at(0,0); + a01 = ma.at(0,1); + a02 = ma.at(0,2); + a03 = ma.at(0,3); + a10 = ma.at(1,0); + a11 = ma.at(1,1); + a12 = ma.at(1,2); + a13 = ma.at(1,3); + a20 = ma.at(2,0); + a21 = ma.at(2,1); + a22 = ma.at(2,2); + a23 = ma.at(2,3); + a30 = ma.at(3,0); + a31 = ma.at(3,1); + a32 = ma.at(3,2); + a33 = ma.at(3,3); + + det = a03*a12*a21*a30 - + a02*a13*a21*a30 - + a03*a11*a22*a30 + + a01*a13*a22*a30 + + a02*a11*a23*a30 - + a01*a12*a23*a30 - + a03*a12*a20*a31 + + a02*a13*a20*a31 + + a03*a10*a22*a31 - + a00*a13*a22*a31 - + a02*a10*a23*a31 + + a00*a12*a23*a31 + + a03*a11*a20*a32 - + a01*a13*a20*a32 - + a03*a10*a21*a32 + + a00*a13*a21*a32 + + a01*a10*a23*a32 - + a00*a11*a23*a32 - + a02*a11*a20*a33 + + a01*a12*a20*a33 + + a02*a10*a21*a33 - + a00*a12*a21*a33 - + a01*a10*a22*a33 + + a00*a11*a22*a33; + + if(det*det>1.0E-30) + { + b00 = -a13*a22*a31 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 + a11*a22*a33; + b01 = a03*a22*a31 - a02*a23*a31 - a03*a21*a32 + a01*a23*a32 + a02*a21*a33 - a01*a22*a33; + b02 = -a03*a12*a31 + a02*a13*a31 + a03*a11*a32 - a01*a13*a32 - a02*a11*a33 + a01*a12*a33; + b03 = a03*a12*a21 - a02*a13*a21 - a03*a11*a22 + a01*a13*a22 + a02*a11*a23 - a01*a12*a23; + b10 = a13*a22*a30 - a12*a23*a30 - a13*a20*a32 + a10*a23*a32 + a12*a20*a33 - a10*a22*a33; + b11 = -a03*a22*a30 + a02*a23*a30 + a03*a20*a32 - a00*a23*a32 - a02*a20*a33 + a00*a22*a33; + b12 = a03*a12*a30 - a02*a13*a30 - a03*a10*a32 + a00*a13*a32 + a02*a10*a33 - a00*a12*a33; + b13 = -a03*a12*a20 + a02*a13*a20 + a03*a10*a22 - a00*a13*a22 - a02*a10*a23 + a00*a12*a23; + b20 = -a13*a21*a30 + a11*a23*a30 + a13*a20*a31 - a10*a23*a31 - a11*a20*a33 + a10*a21*a33; + b21 = a03*a21*a30 - a01*a23*a30 - a03*a20*a31 + a00*a23*a31 + a01*a20*a33 - a00*a21*a33; + b22 = -a03*a11*a30 + a01*a13*a30 + a03*a10*a31 - a00*a13*a31 - a01*a10*a33 + a00*a11*a33; + b23 = a03*a11*a20 - a01*a13*a20 - a03*a10*a21 + a00*a13*a21 + a01*a10*a23 - a00*a11*a23; + b30 = a12*a21*a30 - a11*a22*a30 - a12*a20*a31 + a10*a22*a31 + a11*a20*a32 - a10*a21*a32; + b31 = -a02*a21*a30 + a01*a22*a30 + a02*a20*a31 - a00*a22*a31 - a01*a20*a32 + a00*a21*a32; + b32 = a02*a11*a30 - a01*a12*a30 - a02*a10*a31 + a00*a12*a31 + a01*a10*a32 - a00*a11*a32; + b33 = -a02*a11*a20 + a01*a12*a20 + a02*a10*a21 - a00*a12*a21 - a01*a10*a22 + a00*a11*a22; + b00 = b00/det; + b01 = b01/det; + b02 = b02/det; + b03 = b03/det; + b10 = b10/det; + b11 = b11/det; + b12 = b12/det; + b13 = b13/det; + b20 = b20/det; + b21 = b21/det; + b22 = b22/det; + b23 = b23/det; + b30 = b30/det; + b31 = b31/det; + b32 = b32/det; + b33 = b33/det; + mb.at(0,0) = b00; + mb.at(0,1) = b01; + mb.at(0,2) = b02; + mb.at(0,3) = b03; + mb.at(1,0) = b10; + mb.at(1,1) = b11; + mb.at(1,2) = b12; + mb.at(1,3) = b13; + mb.at(2,0) = b20; + mb.at(2,1) = b21; + mb.at(2,2) = b22; + mb.at(2,3) = b23; + mb.at(3,0) = b30; + mb.at(3,1) = b31; + mb.at(3,2) = b32; + mb.at(3,3) = b33; + } + //this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again! + return mb; +} + +__host__ __device__ cumat4 cumat4::inverse() +{ + return minverse(*this); +} + + +__host__ __device__ double cuvect4_dot(cuvect4 a, cuvect4 b) +{ + double ret = 0.0; + ret = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; + return ret; +} + +__host__ __device__ double cuvect4_norm(cuvect4 a) +{ + double ret = 0.0; + ret = ::sqrt(cuvect4_dot(a,a)); + return ret; +} + +__host__ __device__ cuvect4 cuvect4_normalize(cuvect4 a) +{ + cuvect4 ret = cuvect4(0.0f,0.0f,0.0f,0.0f); + double nrm = cuvect4_norm(a); + if(nrm>0.0) + ret = a/nrm; + return ret; +} + +__host__ __device__ cuvect4 cuvect4_proj(cuvect4 a, cuvect4 b) +{ + cuvect4 ret; + cuvect4 bn = cuvect4_normalize(b); + double d = cuvect4_dot(a,bn); + ret = bn*d; + return ret; +} + + +}; \ No newline at end of file diff --git a/src/amsculib2/cuvect4f.cu b/src/amsculib2/cuvect4f.cu new file mode 100644 index 0000000..ba170a2 --- /dev/null +++ b/src/amsculib2/cuvect4f.cu @@ -0,0 +1,417 @@ +#include + +namespace amscuda +{ + +//////////// +//cuvect4ff// +//////////// + +__host__ __device__ cuvect4f::cuvect4f() +{ + x = 0.0; y = 0.0; z = 0.0; w = 0.0; + return; +} + +__host__ __device__ cuvect4f::~cuvect4f() +{ + x = 0.0; y = 0.0; z = 0.0; w = 0.0; + return; +} + +__host__ __device__ cuvect4f::cuvect4f(float _x, float _y, float _z, float _w) +{ + x = _x; y = _y; z = _z; w = _w; + return; +} + +__host__ __device__ float& cuvect4f::operator[](const int I) +{ + if(I==0) return x; + else if(I==1) return y; + else if(I==2) return z; + else if(I==3) return w; + return x; +} + +__host__ __device__ const float& cuvect4f::operator[](const int I) const +{ + if(I==0) return x; + else if(I==1) return y; + else if(I==2) return z; + else if(I==3) return w; + return x; +} + +__host__ __device__ cuvect4f cuvect4f::operator+(cuvect4f lhs) +{ + cuvect4f ret; + ret.x = this->x + lhs.x; + ret.y = this->y + lhs.y; + ret.z = this->z + lhs.z; + ret.w = this->w + lhs.w; + return ret; +} + +__host__ __device__ cuvect4f cuvect4f::operator-(cuvect4f lhs) +{ + cuvect4f ret; + ret.x = this->x - lhs.x; + ret.y = this->y - lhs.y; + ret.z = this->z - lhs.z; + ret.w = this->w - lhs.w; + return ret; +} + +__host__ __device__ cuvect4f cuvect4f::operator*(float lhs) +{ + cuvect4f ret; + ret.x = this->x*lhs; + ret.y = this->y*lhs; + ret.z = this->z*lhs; + ret.w = this->w*lhs; + return ret; +} + +__host__ __device__ cuvect4f cuvect4f::operator/(float lhs) +{ + cuvect4f ret; + ret.x = this->x/lhs; + ret.y = this->y/lhs; + ret.z = this->z/lhs; + ret.w = this->w/lhs; + return ret; +} + +__host__ __device__ cumat4f::cumat4f() +{ + int I; + for(I=0;I<16;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ cumat4f::~cumat4f() +{ + int I; + for(I=0;I<16;I++) + { + dat[I] = 0.0; + } + return; +} + +__host__ __device__ float& cumat4f::operator[](const int I) +{ + return dat[I]; +} + +__host__ __device__ float& cumat4f::operator()(const int I, const int J) +{ + return dat[I+4*J]; +} + +__host__ __device__ float& cumat4f::at(const int I, const int J) +{ + return dat[I+4*J]; +} + +__host__ __device__ cumat4f cumat4f::operator+(cumat4f lhs) +{ + cumat4f ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I] + lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat4f cumat4f::operator-(cumat4f lhs) +{ + cumat4f ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I] - lhs.dat[I]; + } + return ret; +} + +__host__ __device__ cumat4f cumat4f::operator*(float lhs) +{ + cumat4f ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I]*lhs; + } + return ret; +} + +__host__ __device__ cumat4f cumat4f::operator/(float lhs) +{ + cumat4f ret; + int I; + for(I=0;I<16;I++) + { + ret.dat[I] = this->dat[I]/lhs; + } + return ret; +} + +__host__ __device__ cuvect4f cumat4f::operator*(cuvect4f lhs) +{ + cuvect4f ret = cuvect4f(0.0,0.0,0.0,0.0); + int I,J; + + for(I=0;I<4;I++) + { + for(J=0;J<4;J++) + { + ret[I] = ret[I] + this->at(I,J)*lhs[J]; + } + } + + return ret; +} + +__host__ __device__ cumat4f cumat4f::operator*(cumat4f lhs) +{ + cumat4f ret; + int I,J,K; + for(I=0;I<4;I++) + { + for(J=0;J<4;J++) + { + ret(I,J) = 0; + for(K=0;K<4;K++) + { + ret(I,J) = ret(I,J) + this->at(I,K) * lhs(K,J); + } + } + } + return ret; +} + +__host__ __device__ cumat4f cumat4f::transpose() +{ + cumat4f q; + int I,J; + for(I=0;I<4;I++) + { + for(J=0;J<4;J++) + { + q(I,J) = this->at(J,I); + } + } + return q; +} + +__host__ __device__ float cumat4f::det() +{ + float a00,a01,a02,a03; + float a10,a11,a12,a13; + float a20,a21,a22,a23; + float a30,a31,a32,a33; + float det; + + a00 = this->at(0,0); + a01 = this->at(0,1); + a02 = this->at(0,2); + a03 = this->at(0,3); + a10 = this->at(1,0); + a11 = this->at(1,1); + a12 = this->at(1,2); + a13 = this->at(1,3); + a20 = this->at(2,0); + a21 = this->at(2,1); + a22 = this->at(2,2); + a23 = this->at(2,3); + a30 = this->at(3,0); + a31 = this->at(3,1); + a32 = this->at(3,2); + a33 = this->at(3,3); + + det = a03*a12*a21*a30 - + a02*a13*a21*a30 - + a03*a11*a22*a30 + + a01*a13*a22*a30 + + a02*a11*a23*a30 - + a01*a12*a23*a30 - + a03*a12*a20*a31 + + a02*a13*a20*a31 + + a03*a10*a22*a31 - + a00*a13*a22*a31 - + a02*a10*a23*a31 + + a00*a12*a23*a31 + + a03*a11*a20*a32 - + a01*a13*a20*a32 - + a03*a10*a21*a32 + + a00*a13*a21*a32 + + a01*a10*a23*a32 - + a00*a11*a23*a32 - + a02*a11*a20*a33 + + a01*a12*a20*a33 + + a02*a10*a21*a33 - + a00*a12*a21*a33 - + a01*a10*a22*a33 + + a00*a11*a22*a33; + + return det; +} + +__host__ __device__ cumat4f minverse(cumat4f ma) +{ + cumat4f mb; + + float a00,a01,a02,a03; + float a10,a11,a12,a13; + float a20,a21,a22,a23; + float a30,a31,a32,a33; + + float b00,b01,b02,b03; + float b10,b11,b12,b13; + float b20,b21,b22,b23; + float b30,b31,b32,b33; + + float det = 0.0; + + a00 = ma.at(0,0); + a01 = ma.at(0,1); + a02 = ma.at(0,2); + a03 = ma.at(0,3); + a10 = ma.at(1,0); + a11 = ma.at(1,1); + a12 = ma.at(1,2); + a13 = ma.at(1,3); + a20 = ma.at(2,0); + a21 = ma.at(2,1); + a22 = ma.at(2,2); + a23 = ma.at(2,3); + a30 = ma.at(3,0); + a31 = ma.at(3,1); + a32 = ma.at(3,2); + a33 = ma.at(3,3); + + det = a03*a12*a21*a30 - + a02*a13*a21*a30 - + a03*a11*a22*a30 + + a01*a13*a22*a30 + + a02*a11*a23*a30 - + a01*a12*a23*a30 - + a03*a12*a20*a31 + + a02*a13*a20*a31 + + a03*a10*a22*a31 - + a00*a13*a22*a31 - + a02*a10*a23*a31 + + a00*a12*a23*a31 + + a03*a11*a20*a32 - + a01*a13*a20*a32 - + a03*a10*a21*a32 + + a00*a13*a21*a32 + + a01*a10*a23*a32 - + a00*a11*a23*a32 - + a02*a11*a20*a33 + + a01*a12*a20*a33 + + a02*a10*a21*a33 - + a00*a12*a21*a33 - + a01*a10*a22*a33 + + a00*a11*a22*a33; + + if(det*det>1.0E-30) + { + b00 = -a13*a22*a31 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 + a11*a22*a33; + b01 = a03*a22*a31 - a02*a23*a31 - a03*a21*a32 + a01*a23*a32 + a02*a21*a33 - a01*a22*a33; + b02 = -a03*a12*a31 + a02*a13*a31 + a03*a11*a32 - a01*a13*a32 - a02*a11*a33 + a01*a12*a33; + b03 = a03*a12*a21 - a02*a13*a21 - a03*a11*a22 + a01*a13*a22 + a02*a11*a23 - a01*a12*a23; + b10 = a13*a22*a30 - a12*a23*a30 - a13*a20*a32 + a10*a23*a32 + a12*a20*a33 - a10*a22*a33; + b11 = -a03*a22*a30 + a02*a23*a30 + a03*a20*a32 - a00*a23*a32 - a02*a20*a33 + a00*a22*a33; + b12 = a03*a12*a30 - a02*a13*a30 - a03*a10*a32 + a00*a13*a32 + a02*a10*a33 - a00*a12*a33; + b13 = -a03*a12*a20 + a02*a13*a20 + a03*a10*a22 - a00*a13*a22 - a02*a10*a23 + a00*a12*a23; + b20 = -a13*a21*a30 + a11*a23*a30 + a13*a20*a31 - a10*a23*a31 - a11*a20*a33 + a10*a21*a33; + b21 = a03*a21*a30 - a01*a23*a30 - a03*a20*a31 + a00*a23*a31 + a01*a20*a33 - a00*a21*a33; + b22 = -a03*a11*a30 + a01*a13*a30 + a03*a10*a31 - a00*a13*a31 - a01*a10*a33 + a00*a11*a33; + b23 = a03*a11*a20 - a01*a13*a20 - a03*a10*a21 + a00*a13*a21 + a01*a10*a23 - a00*a11*a23; + b30 = a12*a21*a30 - a11*a22*a30 - a12*a20*a31 + a10*a22*a31 + a11*a20*a32 - a10*a21*a32; + b31 = -a02*a21*a30 + a01*a22*a30 + a02*a20*a31 - a00*a22*a31 - a01*a20*a32 + a00*a21*a32; + b32 = a02*a11*a30 - a01*a12*a30 - a02*a10*a31 + a00*a12*a31 + a01*a10*a32 - a00*a11*a32; + b33 = -a02*a11*a20 + a01*a12*a20 + a02*a10*a21 - a00*a12*a21 - a01*a10*a22 + a00*a11*a22; + b00 = b00/det; + b01 = b01/det; + b02 = b02/det; + b03 = b03/det; + b10 = b10/det; + b11 = b11/det; + b12 = b12/det; + b13 = b13/det; + b20 = b20/det; + b21 = b21/det; + b22 = b22/det; + b23 = b23/det; + b30 = b30/det; + b31 = b31/det; + b32 = b32/det; + b33 = b33/det; + mb.at(0,0) = b00; + mb.at(0,1) = b01; + mb.at(0,2) = b02; + mb.at(0,3) = b03; + mb.at(1,0) = b10; + mb.at(1,1) = b11; + mb.at(1,2) = b12; + mb.at(1,3) = b13; + mb.at(2,0) = b20; + mb.at(2,1) = b21; + mb.at(2,2) = b22; + mb.at(2,3) = b23; + mb.at(3,0) = b30; + mb.at(3,1) = b31; + mb.at(3,2) = b32; + mb.at(3,3) = b33; + } + //this was STUPID. Gaah. Computer algebra system saves the day? I'd be surprised if this didn't end up *slower* than gaussian elimination. Don't do this again! + return mb; +} + +__host__ __device__ cumat4f cumat4f::inverse() +{ + return minverse(*this); +} + + +__host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b) +{ + float ret = 0.0; + ret = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; + return ret; +} + +__host__ __device__ float cuvect4f_norm(cuvect4f a) +{ + float ret = 0.0; + ret = ::sqrtf(cuvect4f_dot(a,a)); + return ret; +} + +__host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a) +{ + cuvect4f ret = cuvect4f(0.0f,0.0f,0.0f,0.0f); + float nrm = cuvect4f_norm(a); + if(nrm>0.0) + ret = a/nrm; + return ret; +} + +__host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b) +{ + cuvect4f ret; + cuvect4f bn = cuvect4f_normalize(b); + float d = cuvect4f_dot(a,bn); + ret = bn*d; + return ret; +} + +}; //namespace amscuda diff --git a/src/main.cu b/src/main.cu new file mode 100644 index 0000000..5cb987d --- /dev/null +++ b/src/main.cu @@ -0,0 +1,27 @@ +#include + +//using namespace ams; +using namespace amscuda; + +int main(int argc, char* argv[]) +{ + printf("AMSCULIB2: Cuda Library Tests.\n"); + + //test_amscuarray_1(); + //test_amscumath1(); + + //cmp::test_cucomp64_1(); + //cmp::test_cucomp128_1(); + + //test_amscuarray_2(); + + //test_dprg64(); + //printf("\n"); + //test_dprg32(); + + //test_dbuff_rand_dpr32(); + + test_amscurarray1(); + + return 0; +} \ No newline at end of file diff --git a/src/main.o b/src/main.o new file mode 100644 index 0000000..878d8a2 Binary files /dev/null and b/src/main.o differ diff --git a/test_scripts/test_dbuff_dpr32.py b/test_scripts/test_dbuff_dpr32.py new file mode 100644 index 0000000..c9bad9c --- /dev/null +++ b/test_scripts/test_dbuff_dpr32.py @@ -0,0 +1,76 @@ +#!/usr/bin/python3 + +import os,sys,math +import numpy as np +import matplotlib.pyplot as plt + +################# +## Subroutines ## +################# + +def binload_float_ndarray(fp): + arr = np.zeros((0),dtype=np.float32,order='F') + + qb = fp.read(4) + Nd = np.frombuffer(qb,dtype=np.int32,count=1)[0] + shp = np.zeros((Nd),dtype=np.int32) + + piprod = 1 + for I in range(0,Nd): + qb = fp.read(4) + shp[I] = np.frombuffer(qb,dtype=np.int32,count=1)[0] + piprod = piprod*shp[I] + + qb = fp.read(4*piprod) + arr = np.frombuffer(qb,dtype=np.float32,count=piprod) + + arr = arr.reshape(shp) + + return arr; + +def binsave_float_ndarray(fp,arr): + + + + + return + + +################# +## Main Script ## +################# + +def test_1(): + + fname = "./test_scripts/test_dbuff_rand_dpr32.bin" + try: + fp = open(fname,"rb") + except: + print("Could not open {} for reading".format(fname)) + return + + arr1 = binload_float_ndarray(fp) + arr2 = binload_float_ndarray(fp) + + fp.close() + + plt.subplot(2,2,1) + plt.imshow(arr1) + plt.subplot(2,2,2) + plt.imshow(arr2) + plt.subplot(2,2,3) + plt.hist(arr1.flatten(),bins=100) + plt.subplot(2,2,4) + plt.hist(arr2.flatten(),bins=100) + plt.show() + + print("{} {}".format(np.mean(arr2),np.std(arr2))) + + + return + +if(__name__=="__main__"): + test_1() + + exit(0) +