cuda library updates

This commit is contained in:
2026-02-20 11:46:15 -05:00
parent c9f6307fc2
commit 3128d5dd19
122 changed files with 10842 additions and 7434 deletions

View File

@ -1 +1 @@
Copyright Aaron M. Schinder, 2023 Copyright Aaron M. Schinder, 2023

7
amsculib2.code-workspace Normal file
View File

@ -0,0 +1,7 @@
{
"folders": [
{
"path": "."
}
]
}

8
backup.sh Normal file
View File

@ -0,0 +1,8 @@
#!/bin/bash
rm ./test_scripts/*.bin
tar --exclude='./data' -czvf ../amsculib2.tar.gz ./*
scp ../amsculib2.tar.gz aschinder@amssolarempire.com:~/workspace/projects

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

813
build/amsbuildlib4.py Normal file
View File

@ -0,0 +1,813 @@
#!/usr/bin/python3
import os,sys,math
import subprocess
"""
Copyright Aaron M. Schinder, 2011 - MIT/BSD License
This script contains a bunch of helper functions for generating simple, imperative, hopefully
transparent build scripts using the python language (and nothing else).
I just want the script to do the compiling and linking operations I want it to do in the order
I want it to do it in, finding every relevant source file.
That's it. That's what I want in a build system.
"""
def flist(pth,**kwargs):
"""
flist - list all files in a given directory pth
optional arguments:
recurse - (T/F): Whether to recursively search for files in directory tree
exts - (list): A list of file extensions to search for, otherwise all files
normpath (T/F): whether to normalize path variables after
filelist = flist(pth,**kwargs):
"""
flst = []
if(not('recurse' in kwargs)):
recurse_ = False
else:
recurse_ = kwargs['recurse']
if(not('exts' in kwargs)):
filterexts_ = False
else:
filterexts_ = True
exts = kwargs['exts']
if(not('normpath' in kwargs)):
normpath_ = True
else:
normpath_ = kwargs['normpath']
if(not('linuxpath' in kwargs)):
linuxpath_ = False
else:
linuxpath_ = kwargs['linuxpath']
if(not('followlinks' in kwargs)):
followlinks_ = False
else:
followlinks_ = kwargs['followlinks']
dirlist = []
rawlist = os.listdir(pth)
for F in rawlist:
F2 = os.path.join(pth,F)
if(os.path.isdir(F2)):
b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
if(b):
if((F2!=".")&(F2!="..")):
dirlist.append(F2)
elif(os.path.isfile(F2)):
flst.append(F2)
#Recurse through directories
if(recurse_):
for D in dirlist:
lst = flist(D,**kwargs)
for L in lst:
flst.append(L)
#Postprocess:
#Filter out all extensions except the selected ext list
if(filterexts_):
flst = filterexts(flst,exts)
#Normalize filename path according to os
if(normpath_):
flst2 = list(flst)
for I in range(0,len(flst2)):
flst[I] = os.path.normpath(flst2[I])
#If linuxpath, convert all \\ to /
#if(linuxpath_):
# flst2 = list(flst)
# for I in range(0,len(flst2)):
# flst[I] = linuxpath(flst2[I])
return flst
def filterexts(flst,exts):
"""
Filters by extensions in a list of files
flst = def filterexts(flst,exts):
"""
flst2 = []
if(isinstance(exts,str)):
exts = list([exts])
for F in flst:
b = False
for ext in exts:
if(ext[0]!='.'):
ext = '.'+ext
F2 = os.path.splitext(F)
if(len(F2)>=2):
ex = F2[1]
if(len(ex)>0):
if(ex[0]!='.'):
ex = '.'+ex
if(ex==ext):
b = True
if(b):
flst2.append(F)
return flst2
#Find a file fname, starting in pth and recursing
#Used for finding library files to link
def findfile(fname,pth,**kwargs):
fullfname = ""
flst = flist(pth,recurse=True)
for F in flst:
F2 = os.path.split(F)[1]
if(F2 == fname):
fullfname = F
return fullfname
def replaceext(fname,ext):
fname2 = ""
if(len(ext)>0):
if(ext[0]!='.'):
ext = '.'+ext
fname2 = os.path.splitext(fname)[0]+ext
else:
fname2 = os.path.splitext(fname)[0]
return fname2
def replaceexts(fnamelist,ext):
"""Takes a list of filenames and returns a list with the extensions replaced by ext """
fname2list = []
for F in fnamelist:
F2 = replaceext(F,ext)
fname2list.append(F2)
return fname2list
def except_contains(lst1,exc):
"""
Takes a list of file names lst1, and removes filenams that match the
list of exceptions exc. Returns a list without the exceptions.
"""
lst2 = []
for item in lst1:
b = 1
for item2 in exc:
fsplit = os.path.split(item)
fn = fsplit[len(fsplit)-1]
if(fn==item2):
b = 0
break
if(b==1):
lst2.append(item)
return lst2
def list_to_sss(lst):
"""List of strings to space-seperated-string"""
lout = ""
for I in range(0,len(lst)-1):
lout = lout + lst[I] + " "
if(len(lst)>0):
lout = lout + lst[len(lst)-1]
return lout
##########################
##System Call Procedures##
##########################
def callproc(cmd, **kwargs):
if(not('logfile' in kwargs)):
use_lf = False
else:
logfile = kwargs['logfile']
if(logfile!=""):
fp = open(kwargs['logfile'],'a+')
use_lf = True
else:
use_lf = False
if(not('echo' in kwargs)):
echo = True
else:
echo = kwargs['echo']
if(echo):
print(cmd)
#encoding/deconding to/from bytes is necessary to use the subprocess command
#in python3.7
#However, only do this in linux
if(sys.platform!='win32'):
cmd2 = cmd.encode(encoding='utf-8')
else:
cmd2 = cmd
proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
out = out.decode(encoding='utf-8')
if(echo):
print(out)
#print(err);
if(use_lf):
fp.writelines(cmd+'\n')
fp.writelines(out+'\n')
if(use_lf):
fp.close()
#############################################
## Compiler, Archive, and Linker Functions ##
#############################################
#MSVC compiler wrapper
def msvc_compile(compilername, srcfile, **kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '/c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '/Fo:'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
outfile = replaceext(srcfile,objext)
ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+'"'+outfile+'"'
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
#MSVC compiler wrapper
def msvc_compile_list(compiler,srclist,**kwargs):
for S in srclist:
msvc_compile(compiler,S,**kwargs)
return
#gnu-style compiler compile: Should work with gcc, g++, gfortran
def gs_compile(compiler,srcfile,**kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '-c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '-o'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
if(not('smartcompile' in kwargs)):
_smartcompile = True
else:
_smartcompile = kwargs['smartcompile']
#Do I want to make this thing this general?
# if(not(_smartcompile) or smartcompile(srcfile,objext)):
# outfile = replaceext(srcfile,objext)
# ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
# ln = ln + " " + include
# callproc(ln,echo=True,logfile=logfile)
outfile = replaceext(srcfile,objext)
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
def gs_compile_list(compiler,srclist,**kwargs):
for S in srclist:
gs_compile(compiler,S,**kwargs)
return
def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
for S in srcfils:
gs_compile(compiler,S,**kwargs)
return
def gs_link_all(linker,srcpath,target,**kwargs):
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
objfils = flist(srcpath,exts=objext,recurse=recurse)
oflst = list_to_sss(objfils)
gs_link_list(linker,oflst,target,**kwargs)
return
def gs_link_list(linker,objlist,target,**kwargs):
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('libdir' in kwargs)):
libdir = ''
else:
libdir = kwargs['libdir']
if(not('staticlibs' in kwargs)):
staticlibs = ''
else:
staticlibs = kwargs['staticlibs']
if(not('libflags' in kwargs)):
libflags = ''
else:
libflags = kwargs['libflags']
if(not('linkerflags' in kwargs)):
linkerflags = ''
else:
linkerflags = kwargs['linkerflags']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('logfile' in kwargs)):
logfile = ''
else:
logfile = kwargs['logfile']
ln = linker+" -o "+target+" "+libdir
ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
callproc(ln,logfile=logfile)
return
def msvc_link_list(objlist,target,**kwargs):
linker = 'link'
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('libdir' in kwargs)):
libdir = ''
else:
libdir = kwargs['libdir']
if(not('staticlibs' in kwargs)):
staticlibs = ''
else:
staticlibs = kwargs['staticlibs']
if(not('libflags' in kwargs)):
libflags = ''
else:
libflags = kwargs['libflags']
if(not('linkerflags' in kwargs)):
linkerflags = ''
else:
linkerflags = kwargs['linkerflags']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('logfile' in kwargs)):
logfile = ''
else:
logfile = kwargs['logfile']
ln = linker+" "+libdir
ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
ln = ln+" /out:"+target+" "+libflags
callproc(ln,logfile=logfile)
return
def ar_all(srcpath,arname,**kwargs):
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
objlist = flist(srcpath,exts=objext,recurse=recurse)
ar_list(objlist,arname,**kwargs)
return
def msvc_lib_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "lib "+objlist2+" /out:"+arname
callproc(ln)
return
def ar_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "ar cr "+ arname+" "+objlist2
callproc(ln)
return
def ar_add_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "ar t "+arname+" "+objlist2
callproc(ln)
return
#####################################
## Incremental Compilation Library ##
#####################################
#silently read lines from a text file if exists
def readtextlines(fname):
txtlns = []
if(not os.path.isfile(fname)):
return txtlns
try:
fp = open(fname,"r")
except:
return txtlns
ln = " "
while(ln!=""):
ln = fp.readline()
txtlns.append(ln)
fp.close()
return txtlns
def getincludefnfrage(includeline):
fnfrag = ""
I1 = -1
I2 = -1
for I in range(0,len(includeline)):
if(I1<0 and (includeline[I]=='<' or includeline[I]=='"')):
I1 = I
if(I1>=0 and (includeline[I]=='>' or includeline[I]=='"')):
I2 = I
break
if(I1>=0 and I2>=0):
fnfrag = includeline[I1+1:I2]
return fnfrag
#Returns the name of the source file fname (if it exists)
#and all included filenames
def getsrcandincludes(fname, incdirs):
flist = []
if(os.path.isfile(fname)):
flist.append(fname)
Ilist = 0
while(Ilist<len(flist)):
#recurse through files
f1 = flist[Ilist]
lns = readtextlines(f1)
for J in range(0,len(lns)):
if(lns[J].find("#include")>=0):
fnfrag = getincludefnfrage(lns[J])
for K in range(0,len(incdirs)):
tfn = os.path.join(incdirs[K],fnfrag)
if(os.path.isfile(tfn)):
flist.append(tfn)
break
Ilist = Ilist + 1
return flist
#Returns the name of the object file associated with the source file
#within the object store folder (if it exists)
def getobjfile(fname,objstore,objext = ".o"):
fret = ""
f1 = os.path.split(fname)[1]
f2 = f1
while(os.path.splitext(f2)[1]!=""):
f2 = os.path.splitext(f2)[0]
objext = objext.strip('.')
f3 = os.path.join(objstore,"{}.{}".format(f2,objext))
if(os.path.exists(f3)):
fret = f3
return fret
def getsrctimes(fname, incdirs):
ftimes = []
flst = getsrcandincludes(fname, incdirs)
for I in range(0,len(flst)):
f = flst[I]
mt = os.path.getmtime(f)
ftimes.append(mt)
return ftimes
def getobjtime(fname,objstore,objext=".o"):
ret = -1
fret = getobjfile(fname,objstore,objext)
if(fret!=""):
ret = os.path.getmtime(fret)
return ret
#Decide whether or not to compile source file
def decidecompile(fname,**kwargs):
ret = True
if(not os.path.isfile(fname)):
ret = False
return ret
##unpack kwargs
if("searchincdirs" in kwargs):
incdirs = kwargs["searchincdirs"]
else:
incdirs = ["./include"]
if("objext" in kwargs):
objext = kwargs["objext"]
else:
objext = ".o"
if("objstore" in kwargs):
objstore = kwargs["objstore"]
else:
objstore = "./objstore"
srclist = getsrcandincludes(fname,incdirs)
srctlist = getsrctimes(fname,incdirs)
obj = getobjfile(fname,objstore,objext)
objt = getobjtime(fname,objstore,objext)
if(obj!=""):
ret = False
for I in range(0,len(srctlist)):
if(srctlist[I]>objt):
ret = True
break
return ret
def gs_incremental_compile(compiler,srcfile,**kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '-c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '-o'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
if(not('smartcompile' in kwargs)):
_smartcompile = True
else:
_smartcompile = kwargs['smartcompile']
#incrementalcompile
if("searchincdirs" in kwargs):
incdirs = kwargs["searchincdirs"]
else:
incdirs = ["./include"]
if("objstore" in kwargs):
objstore = kwargs["objstore"]
else:
objstore = "./objstore"
#Do I want to make this thing this general?
docompile = decidecompile(srcfile,**kwargs)
if(docompile):
f1 = os.path.split(srcfile)[1]
f2 = f1
while(os.path.splitext(f2)[1]!=""):
f2 = os.path.splitext(f2)[0]
outfile = os.path.join(objstore,"{}{}".format(f2,objext))
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
def msvc_incremental_compile(compiler,srcfile,**kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '/c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '/Fo'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
#incrementalcompile
if("searchincdirs" in kwargs):
incdirs = kwargs["searchincdirs"]
else:
incdirs = ["./include"]
if("objstore" in kwargs):
objstore = kwargs["objstore"]
else:
objstore = "./objstore"
#Do I want to make this thing this general?
docompile = decidecompile(srcfile,**kwargs)
if(docompile):
f1 = os.path.split(srcfile)[1]
f2 = f1
while(os.path.splitext(f2)[1]!=""):
f2 = os.path.splitext(f2)[0]
outfile = os.path.join(objstore,"{}{}".format(f2,objext))
outfile = os.path.normpath(outfile)
ln = compiler+" "+flags+" "+srcfileflag+" "+srcfile+" "+ outfileflag+'"'+outfile+'"'
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
def gs_incremental_compile_list(compiler,srclist,**kwargs):
for s in srclist:
gs_incremental_compile(compiler,s,**kwargs)
return
def msvc_incremental_compile_list(compiler,srclist,**kwargs):
for s in srclist:
msvc_incremental_compile(compiler,s,**kwargs)
return
#######################
## Main Script Tests ##
#######################
def testtimes(args):
if(len(args)>=2):
flist = getsrcandincludes(args[1],["./include"])
ftlist = getsrctimes(args[1],["./include"])
for I in range(0,len(flist)):
print("{}\t\t{}".format(flist[I],ftlist[I]))
print("associated obj file:")
fobj = getobjfile(args[1],"./objstore")
ftobj = getobjtime(args[1],"./objstore")
if(fobj!=""):
print("{}\t\t{}".format(fobj,ftobj))
else:
print("none found")
cflag = decidecompile(args[1])
print("compile? : {}".format(cflag))
return
# if(__name__ == "__main__"):
# args = sys.argv
# testtimes(args)

58
build/make.linux64.lib.py Normal file
View File

@ -0,0 +1,58 @@
#!/usr/bin/python3
import os,sys,math
import subprocess
import shutil
from shutil import copytree
from amsbuildlib4 import *
libname = "amsculib2.linux64" #static library name to generate
binname = "test" #create this executable when compiling main.c or main.cpp
commondir = "../../linux64" #common directory to pul libraries and includes from
depdir = "./dependencies/linux64" #local pre-compiled dependency libraries and their includes
installdir = "../../linux64" #directory to install to when finished
builddir = "./build_linux64"
doinstall = True #copies the build_output to the install dir when finished
cc = "nvcc" #compiler
cflags = "-dc --compiler-options '-fPIC -O3'"
libraries = "-l{}".format(libname)
libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
linkerflags = " -Xlinker=-rpath,."
srcexts = [".c",".cpp",".cu"]
binsrc = ["main.c","main.cpp", "main.cu"] #ignore these files when compiling the static library
#keyword list to control the compilers/linkers
kwargs = dict()
include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
kwargs["include"] = include
kwargs["flags"] = cflags
kwargs["libdir"] = libdirs
kwargs["libflags"] = libraries
kwargs["linkerflags"] = linkerflags
kwargs["recurse"] = True
kwargs["objstore"] = "{}/objstore".format(builddir)
kwargs["searchincdirs"] = "./include"
#Find all source files, except the main project files
srcfiles = flist('./src',exts = srcexts, recurse=True)
srcfiles = except_contains(srcfiles,binsrc)
#compile all the source files in the list
#gs_compile_list(cc,files,**kwargs)
gs_incremental_compile_list(cc,srcfiles,**kwargs)
#archive all the source files into a static library
objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
ar_list(objlist,'{}/lib{}.a'.format(builddir,libname))
if(doinstall):
#Push any libraries to the common lib folder
shutil.copy(
'{}/lib{}.a'.format(builddir,libname),
"{}/lib".format(installdir)
)
#Copy include files to the common include folder
copytree('./include/',installdir+'/include/',dirs_exist_ok=True)

View File

@ -0,0 +1,49 @@
#!/usr/bin/python3
import os,sys,math
import subprocess
import shutil
from shutil import copytree
from amsbuildlib4 import *
libname = "amsculib2.linux64" #static library name to generate
binname = "test" #create this executable when compiling main.c or main.cpp
commondir = "../../linux64" #common directory to pul libraries and includes from
depdir = "./dependencies/linux64" #local pre-compiled dependency libraries and their includes
installdir = "../../linux64" #directory to install to when finished
builddir = "./build_linux64"
doinstall = True #copies the build_output to the install dir when finished
cc = "nvcc" #compiler
cflags = "-dc --compiler-options '-fPIC -O3'"
libraries = "-l{}".format(libname)
libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
linkerflags = " -Xlinker=-rpath,."
srcexts = [".c",".cpp",".cu"]
binsrc = ["main.c","main.cpp", "main.cu"] #ignore these files when compiling the static library
#keyword list to control the compilers/linkers
kwargs = dict()
include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
kwargs["include"] = include
kwargs["flags"] = cflags
kwargs["libdir"] = libdirs
kwargs["libflags"] = libraries
kwargs["linkerflags"] = linkerflags
kwargs["recurse"] = True
kwargs["objstore"] = "{}/objstore".format(builddir)
kwargs["searchincdirs"] = "./include"
#Pull required binary dynamic libraries to the bin folder
#shutil.copy('{}/lib/libcamsimg3.linux64.so'.format(commondir),builddir);
#shutil.copy('{}/lib/libamsimg.dll'.format(commondir),builddir);
#shutil.copy('{}/lib/glew32.dll','./bin_winx64');
#Designate source files for main test program
fsrc = ['./src/main.cu']
fobj = replaceexts(fsrc,'.o')
#Compile test programs
gs_compile_list(cc,fsrc,**kwargs)
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(builddir,binname),**kwargs)

61
build/make.msvc64.lib.py Normal file
View File

@ -0,0 +1,61 @@
#!/usr/bin/python3
import os,sys,math
import subprocess
import shutil
from shutil import copytree
from amsbuildlib4 import *
libname = "amsculib2.msvc64" #static library name to generate
binname = "test" #create this executable when compiling main.c or main.cpp
commondir = "../../winx64" #common directory to pul libraries and includes from
depdir = "./dependencies/winx64" #local pre-compiled dependency libraries and their includes
installdir = "../../winx64" #directory to install to when finished
builddir = "./build_msvc64"
doinstall = True #copies the build_output to the install dir when finished
cc = "nvcc" #compiler
cflags = "-dc --compiler-options '-fPIC -O3'"
libraries = "-l{}".format(libname)
libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
linkerflags = " -Xlinker=-rpath,."
srcexts = [".c",".cpp",".cu"]
binsrc = ["main.c","main.cpp","main.cu"] #ignore these files when compiling the static library
#keyword list to control the compilers/linkers
kwargs = dict()
include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
kwargs["include"] = include
kwargs["flags"] = cflags
kwargs["libdir"] = libdirs
kwargs["libflags"] = libraries
kwargs["linkerflags"] = linkerflags
kwargs["recurse"] = True
kwargs["objstore"] = "{}/objstore".format(builddir)
kwargs["searchincdirs"] = "./include"
kwargs["objext"] = ".obj"
#Find all source files, except the main project files
srcfiles = flist('./src',exts = srcexts, recurse=True)
srcfiles = except_contains(srcfiles,binsrc)
#compile all the source files in the list
#gs_compile_list(cc,files,**kwargs)
msvc_incremental_compile_list(cc,srcfiles,**kwargs)
#archive all the source files into a static library
objlist = flist(kwargs['objstore'],exts='.obj',recurse=True)
msvc_lib_list(objlist,'{}/lib{}.lib'.format(builddir,libname))
if(doinstall):
#Push any libraries to the common lib folder
shutil.copy(
'{}/lib{}.lib'.format(builddir,libname),
"{}/lib".format(installdir)
)
#Copy include files to the common include folder
copytree('./include/',installdir+'/include/',dirs_exist_ok=True)

49
build/make.msvc64.test.py Normal file
View File

@ -0,0 +1,49 @@
#!/usr/bin/python3
import os,sys,math
import subprocess
import shutil
from shutil import copytree
from amsbuildlib4 import *
libname = "amsculib2.msvc64" #static library name to generate
binname = "test.exe" #create this executable when compiling main.c or main.cpp
commondir = "../../winx64" #common directory to pul libraries and includes from
depdir = "./dependencies/winx64" #local pre-compiled dependency libraries and their includes
installdir = "../../winx64" #directory to install to when finished
builddir = "./build_msvc64"
doinstall = False #copies the build_output to the install dir when finished
cc = "nvcc" #compiler
cflags = "-dc --compiler-options '-fPIC -O3'"
libraries = "-l{}".format(libname)
libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
linkerflags = " -Xlinker=-rpath,."
srcexts = [".c",".cpp",".cu"]
binsrc = ["main.c","main.cpp","main.cu"] #ignore these files when compiling the static library
#keyword list to control the compilers/linkers
kwargs = dict()
include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
kwargs["include"] = include
kwargs["flags"] = cflags
kwargs["libdir"] = libdirs
kwargs["libflags"] = libraries
kwargs["linkerflags"] = linkerflags
kwargs["recurse"] = True
kwargs["objstore"] = "{}/objstore".format(builddir)
kwargs["searchincdirs"] = "./include"
#Pull required binary dynamic libraries to the bin folder
#shutil.copy('{}/lib/libcamsimg3.linux64.so'.format(commondir),builddir);
#shutil.copy('{}/lib/libamsimg.dll'.format(commondir),builddir);
#shutil.copy('{}/lib/glew32.dll','./bin_winx64');
#Designate source files for main test program
fsrc = ['./src/main.cpp']
fobj = replaceexts(fsrc,'.obj')
#Compile test programs
msvc_compile_list(cc,fsrc,**kwargs)
msvc_link_list(list_to_sss(fobj),'{}/{}'.format(builddir,binname),**kwargs)

Binary file not shown.

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
build_linux64/test Normal file

Binary file not shown.

View File

View File

@ -1,89 +1,89 @@
#ifndef __AMSCU_COMP128_HPP__ #ifndef __AMSCU_COMP128_HPP__
#define __AMSCU_COMP128_HPP__ #define __AMSCU_COMP128_HPP__
namespace amscuda namespace amscuda
{ {
namespace cmp namespace cmp
{ {
class cucomp128 class cucomp128
{ {
public: public:
double real; double real;
double imag; double imag;
__host__ __device__ cucomp128(); __host__ __device__ cucomp128();
__host__ __device__ ~cucomp128(); __host__ __device__ ~cucomp128();
__host__ __device__ cucomp128(const cucomp128 &other); __host__ __device__ cucomp128(const cucomp128 &other);
__host__ __device__ cucomp128(const double &other); __host__ __device__ cucomp128(const double &other);
__host__ __device__ cucomp128& operator=(cucomp128& other); __host__ __device__ cucomp128& operator=(cucomp128& other);
__host__ __device__ const cucomp128& operator=(const cucomp128& other); __host__ __device__ const cucomp128& operator=(const cucomp128& other);
__host__ __device__ cucomp128& operator=(double& other); __host__ __device__ cucomp128& operator=(double& other);
__host__ __device__ const cucomp128& operator=(const double& other); __host__ __device__ const cucomp128& operator=(const double& other);
__host__ __device__ double& operator[](int& ind); __host__ __device__ double& operator[](int& ind);
__host__ __device__ const double& operator[](const int& ind) const; __host__ __device__ const double& operator[](const int& ind) const;
__host__ __device__ cucomp128 operator+(const cucomp128& z); __host__ __device__ cucomp128 operator+(const cucomp128& z);
__host__ __device__ cucomp128 operator-(const cucomp128& z); __host__ __device__ cucomp128 operator-(const cucomp128& z);
__host__ __device__ cucomp128 operator*(const cucomp128& z); __host__ __device__ cucomp128 operator*(const cucomp128& z);
__host__ __device__ cucomp128 operator/(const cucomp128& z); __host__ __device__ cucomp128 operator/(const cucomp128& z);
__host__ __device__ cucomp128 operator+(const double& z); __host__ __device__ cucomp128 operator+(const double& z);
__host__ __device__ cucomp128 operator-(const double& z); __host__ __device__ cucomp128 operator-(const double& z);
__host__ __device__ cucomp128 operator*(const double& z); __host__ __device__ cucomp128 operator*(const double& z);
__host__ __device__ cucomp128 operator/(const double& z); __host__ __device__ cucomp128 operator/(const double& z);
__host__ __device__ friend cucomp128 operator-(const cucomp128& z); //negation sign __host__ __device__ friend cucomp128 operator-(const cucomp128& z); //negation sign
//comparison operators //comparison operators
__host__ __device__ bool operator==(const cucomp128& z) const; __host__ __device__ bool operator==(const cucomp128& z) const;
__host__ __device__ bool operator!=(const cucomp128& z) const; __host__ __device__ bool operator!=(const cucomp128& z) const;
__host__ __device__ bool operator>(const cucomp128& z) const; __host__ __device__ bool operator>(const cucomp128& z) const;
__host__ __device__ bool operator<(const cucomp128& z) const; __host__ __device__ bool operator<(const cucomp128& z) const;
__host__ __device__ bool operator>=(const cucomp128& z) const; __host__ __device__ bool operator>=(const cucomp128& z) const;
__host__ __device__ bool operator<=(const cucomp128& z) const; __host__ __device__ bool operator<=(const cucomp128& z) const;
__host__ __device__ bool isnan() const; __host__ __device__ bool isnan() const;
__host__ __device__ bool isinf() const; __host__ __device__ bool isinf() const;
__host__ __device__ bool isreal() const; __host__ __device__ bool isreal() const;
__host__ __device__ bool isimag() const; __host__ __device__ bool isimag() const;
__host__ __device__ bool iszero() const; __host__ __device__ bool iszero() const;
__host__ __device__ double arg() const; __host__ __device__ double arg() const;
__host__ __device__ double mag() const; __host__ __device__ double mag() const;
__host__ __device__ cucomp128 conj() const; __host__ __device__ cucomp128 conj() const;
}; };
__host__ __device__ double arg(cucomp128 z); __host__ __device__ double arg(cucomp128 z);
__host__ __device__ cucomp128 dtocomp(double _r, double _i); __host__ __device__ cucomp128 dtocomp(double _r, double _i);
__host__ __device__ double real(cucomp128 z); __host__ __device__ double real(cucomp128 z);
__host__ __device__ double imag(cucomp128 z); __host__ __device__ double imag(cucomp128 z);
__host__ __device__ cucomp128 sin(cucomp128 z); __host__ __device__ cucomp128 sin(cucomp128 z);
__host__ __device__ cucomp128 cos(cucomp128 z); __host__ __device__ cucomp128 cos(cucomp128 z);
__host__ __device__ cucomp128 tan(cucomp128 z); __host__ __device__ cucomp128 tan(cucomp128 z);
__host__ __device__ cucomp128 exp(cucomp128 z); __host__ __device__ cucomp128 exp(cucomp128 z);
__host__ __device__ cucomp128 log(cucomp128 z); __host__ __device__ cucomp128 log(cucomp128 z);
__host__ __device__ double abs(cucomp128 z); __host__ __device__ double abs(cucomp128 z);
__host__ __device__ cucomp128 conj(cucomp128 z); __host__ __device__ cucomp128 conj(cucomp128 z);
// //need hyperbolic trig Functions // //need hyperbolic trig Functions
__host__ __device__ cucomp128 cosh(cucomp128 z); __host__ __device__ cucomp128 cosh(cucomp128 z);
__host__ __device__ cucomp128 sinh(cucomp128 z); __host__ __device__ cucomp128 sinh(cucomp128 z);
__host__ __device__ cucomp128 tanh(cucomp128 z); __host__ __device__ cucomp128 tanh(cucomp128 z);
__host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2); __host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2);
// //returns "complex sign" of complex number - 0, or a unit number with same argument // //returns "complex sign" of complex number - 0, or a unit number with same argument
__host__ __device__ cucomp128 csgn(cucomp128 z); __host__ __device__ cucomp128 csgn(cucomp128 z);
void test_cucomp128_1(); void test_cucomp128_1();
}; //end namespace cmp }; //end namespace cmp
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,88 +1,88 @@
#ifndef __AMSCU_COMP64_HPP__ #ifndef __AMSCU_COMP64_HPP__
#define __AMSCU_COMP64_HPP__ #define __AMSCU_COMP64_HPP__
namespace amscuda namespace amscuda
{ {
namespace cmp namespace cmp
{ {
class cucomp64 class cucomp64
{ {
public: public:
float real; float real;
float imag; float imag;
__host__ __device__ cucomp64(); __host__ __device__ cucomp64();
__host__ __device__ ~cucomp64(); __host__ __device__ ~cucomp64();
__host__ __device__ cucomp64(const cucomp64 &other); __host__ __device__ cucomp64(const cucomp64 &other);
__host__ __device__ cucomp64(const float &other); __host__ __device__ cucomp64(const float &other);
__host__ __device__ cucomp64& operator=(cucomp64& other); __host__ __device__ cucomp64& operator=(cucomp64& other);
__host__ __device__ const cucomp64& operator=(const cucomp64& other); __host__ __device__ const cucomp64& operator=(const cucomp64& other);
__host__ __device__ cucomp64& operator=(float& other); __host__ __device__ cucomp64& operator=(float& other);
__host__ __device__ const cucomp64& operator=(const float& other); __host__ __device__ const cucomp64& operator=(const float& other);
__host__ __device__ float& operator[](int& ind); __host__ __device__ float& operator[](int& ind);
__host__ __device__ const float& operator[](const int& ind) const; __host__ __device__ const float& operator[](const int& ind) const;
__host__ __device__ cucomp64 operator+(const cucomp64& z); __host__ __device__ cucomp64 operator+(const cucomp64& z);
__host__ __device__ cucomp64 operator-(const cucomp64& z); __host__ __device__ cucomp64 operator-(const cucomp64& z);
__host__ __device__ cucomp64 operator*(const cucomp64& z); __host__ __device__ cucomp64 operator*(const cucomp64& z);
__host__ __device__ cucomp64 operator/(const cucomp64& z); __host__ __device__ cucomp64 operator/(const cucomp64& z);
__host__ __device__ cucomp64 operator+(const float& z); __host__ __device__ cucomp64 operator+(const float& z);
__host__ __device__ cucomp64 operator-(const float& z); __host__ __device__ cucomp64 operator-(const float& z);
__host__ __device__ cucomp64 operator*(const float& z); __host__ __device__ cucomp64 operator*(const float& z);
__host__ __device__ cucomp64 operator/(const float& z); __host__ __device__ cucomp64 operator/(const float& z);
__host__ __device__ friend cucomp64 operator-(const cucomp64& z); //negation sign __host__ __device__ friend cucomp64 operator-(const cucomp64& z); //negation sign
//comparison operators //comparison operators
__host__ __device__ bool operator==(const cucomp64& z) const; __host__ __device__ bool operator==(const cucomp64& z) const;
__host__ __device__ bool operator!=(const cucomp64& z) const; __host__ __device__ bool operator!=(const cucomp64& z) const;
__host__ __device__ bool operator>(const cucomp64& z) const; __host__ __device__ bool operator>(const cucomp64& z) const;
__host__ __device__ bool operator<(const cucomp64& z) const; __host__ __device__ bool operator<(const cucomp64& z) const;
__host__ __device__ bool operator>=(const cucomp64& z) const; __host__ __device__ bool operator>=(const cucomp64& z) const;
__host__ __device__ bool operator<=(const cucomp64& z) const; __host__ __device__ bool operator<=(const cucomp64& z) const;
__host__ __device__ bool isnan() const; __host__ __device__ bool isnan() const;
__host__ __device__ bool isinf() const; __host__ __device__ bool isinf() const;
__host__ __device__ bool isreal() const; __host__ __device__ bool isreal() const;
__host__ __device__ bool isimag() const; __host__ __device__ bool isimag() const;
__host__ __device__ bool iszero() const; __host__ __device__ bool iszero() const;
__host__ __device__ float arg() const; __host__ __device__ float arg() const;
__host__ __device__ float mag() const; __host__ __device__ float mag() const;
__host__ __device__ cucomp64 conj() const; __host__ __device__ cucomp64 conj() const;
}; };
__host__ __device__ float arg(cucomp64 z); __host__ __device__ float arg(cucomp64 z);
__host__ __device__ cucomp64 dtocomp64(float _r, float _i); __host__ __device__ cucomp64 dtocomp64(float _r, float _i);
__host__ __device__ float real(cucomp64 z); __host__ __device__ float real(cucomp64 z);
__host__ __device__ float imag(cucomp64 z); __host__ __device__ float imag(cucomp64 z);
__host__ __device__ cucomp64 sin(cucomp64 z); __host__ __device__ cucomp64 sin(cucomp64 z);
__host__ __device__ cucomp64 cos(cucomp64 z); __host__ __device__ cucomp64 cos(cucomp64 z);
__host__ __device__ cucomp64 tan(cucomp64 z); __host__ __device__ cucomp64 tan(cucomp64 z);
__host__ __device__ cucomp64 exp(cucomp64 z); __host__ __device__ cucomp64 exp(cucomp64 z);
__host__ __device__ cucomp64 log(cucomp64 z); __host__ __device__ cucomp64 log(cucomp64 z);
__host__ __device__ float abs(cucomp64 z); __host__ __device__ float abs(cucomp64 z);
__host__ __device__ cucomp64 conj(cucomp64 z); __host__ __device__ cucomp64 conj(cucomp64 z);
// //need hyperbolic trig Functions // //need hyperbolic trig Functions
__host__ __device__ cucomp64 cosh(cucomp64 z); __host__ __device__ cucomp64 cosh(cucomp64 z);
__host__ __device__ cucomp64 sinh(cucomp64 z); __host__ __device__ cucomp64 sinh(cucomp64 z);
__host__ __device__ cucomp64 tanh(cucomp64 z); __host__ __device__ cucomp64 tanh(cucomp64 z);
__host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2); __host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2);
// //returns "complex sign" of complex number - 0, or a unit number with same argument // //returns "complex sign" of complex number - 0, or a unit number with same argument
__host__ __device__ cucomp64 csgn(cucomp64 z); __host__ __device__ cucomp64 csgn(cucomp64 z);
void test_cucomp64_1(); void test_cucomp64_1();
}; //end namespace cmp }; //end namespace cmp
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,40 +1,40 @@
#ifndef __AMSCU_CUDAFUNCTIONS_HPP__ #ifndef __AMSCU_CUDAFUNCTIONS_HPP__
#define __AMSCU_CUDAFUNCTIONS_HPP__ #define __AMSCU_CUDAFUNCTIONS_HPP__
namespace amscuda namespace amscuda
{ {
// device memory operations // device memory operations
// I'm trying to avoid some of the boilerplate mental overhead involved // I'm trying to avoid some of the boilerplate mental overhead involved
// in calling cuda functions and handling errors // in calling cuda functions and handling errors
//frees devbuffer if it is not already NULL, and sets devbuffer to NULL //frees devbuffer if it is not already NULL, and sets devbuffer to NULL
//wrapper to cudaFree //wrapper to cudaFree
template<typename T> int cuda_free(T **devptr); template<typename T> int cuda_free(T **devptr);
//copies hostbuffer to devbuffer //copies hostbuffer to devbuffer
//initializes devbuffer from NULL if devbuffer is NULL //initializes devbuffer from NULL if devbuffer is NULL
//if overwrite is true, deletes and reallocates devbuffer on device (for resizing) //if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite); template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite);
//copies info from devbuffer to hostbuffer //copies info from devbuffer to hostbuffer
//initialzies hostbuffer from NULL if NULL //initialzies hostbuffer from NULL if NULL
//if overwrite is true, deletes and reallocates hostbuffer on host with new[] (for resizing) //if overwrite is true, deletes and reallocates hostbuffer on host with new[] (for resizing)
template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite); template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite);
//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device //wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
//initializes devptr from NULL if not already initialized //initializes devptr from NULL if not already initialized
template<typename T> int cuda_copytodevice(T *hostptr, T **devptr); template<typename T> int cuda_copytodevice(T *hostptr, T **devptr);
//wrapper for cudaMemcpy - copies an item or struct (count 1) from device //wrapper for cudaMemcpy - copies an item or struct (count 1) from device
//initializes hostptr from NULL with new if not already initialized //initializes hostptr from NULL with new if not already initialized
template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr); template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr);
int cuda_errortrap(const char *msgheader); int cuda_errortrap(const char *msgheader);
}; };
#include <amsculib2/amscu_cudafunctions_impl.hpp> #include <amsculib2/amscu_cudafunctions_impl.hpp>
#endif #endif

View File

@ -1,228 +1,228 @@
#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__ #ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__ #define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
namespace amscuda namespace amscuda
{ {
//frees devbuffer if it is not already NULL, and sets devbuffer to NULL //frees devbuffer if it is not already NULL, and sets devbuffer to NULL
//wrapper to cudaFree //wrapper to cudaFree
template<typename T> int cuda_free(T **devptr) template<typename T> int cuda_free(T **devptr)
{ {
int ret = 0; int ret = 0;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
if(*devptr==NULL) if(*devptr==NULL)
{ {
return ret; //devbuffer is already NULL/freed return ret; //devbuffer is already NULL/freed
} }
err = cudaFree(*devptr); err = cudaFree(*devptr);
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
ret = -1; //failed to free device pointer ret = -1; //failed to free device pointer
*devptr = NULL; // - ? should only happen if I'm trying to double-free something *devptr = NULL; // - ? should only happen if I'm trying to double-free something
} }
else else
{ {
ret = 1; ret = 1;
*devptr = NULL; *devptr = NULL;
} }
return ret; return ret;
} }
//copies hostbuffer to devbuffer //copies hostbuffer to devbuffer
//initializes devbuffer from NULL if devbuffer is NULL //initializes devbuffer from NULL if devbuffer is NULL
//if overwrite is true, deletes and reallocates devbuffer on device (for resizing) //if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite) template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite)
{ {
int ret = 0; int ret = 0;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
if(N<=0) if(N<=0)
{ {
ret = 0; ret = 0;
return ret; return ret;
} }
if(hostbuffer==NULL) if(hostbuffer==NULL)
{ {
ret = -2; //host buffer is NULL ret = -2; //host buffer is NULL
return ret; return ret;
} }
if(overwrite==1) if(overwrite==1)
{ {
if(*devbuffer !=NULL) if(*devbuffer !=NULL)
{ {
cuda_free(devbuffer); cuda_free(devbuffer);
} }
} }
if(*devbuffer==NULL) if(*devbuffer==NULL)
{ {
err = cudaMalloc(devbuffer,sizeof(T)*N); err = cudaMalloc(devbuffer,sizeof(T)*N);
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
ret = -3; //failed to allocate ret = -3; //failed to allocate
*devbuffer = NULL; *devbuffer = NULL;
return ret; return ret;
} }
} }
err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice); err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice);
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
ret = -4; //failed to copy ret = -4; //failed to copy
} }
else else
{ {
ret = 1; ret = 1;
} }
return ret; return ret;
} }
//copies info from devbuffer to hostbuffer //copies info from devbuffer to hostbuffer
//initialzies hostbuffer from NULL if NULL //initialzies hostbuffer from NULL if NULL
//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing) //if overwrite is true, deletes and reallocates hostbuffer on host (for resizing)
template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite) template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite)
{ {
int ret = 0; int ret = 0;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
if(N<=0) if(N<=0)
{ {
ret = 0; ret = 0;
return ret; return ret;
} }
if(devbuffer==NULL) if(devbuffer==NULL)
{ {
ret = -5; //null dev buffer ret = -5; //null dev buffer
return ret; return ret;
} }
if(overwrite==1 && *hostbuffer!=NULL) if(overwrite==1 && *hostbuffer!=NULL)
{ {
delete[] (*hostbuffer); hostbuffer = NULL; delete[] (*hostbuffer); hostbuffer = NULL;
} }
if(*hostbuffer==NULL) if(*hostbuffer==NULL)
{ {
*hostbuffer = new(std::nothrow) T[N]; *hostbuffer = new(std::nothrow) T[N];
if(*hostbuffer==NULL) if(*hostbuffer==NULL)
{ {
ret = -6; //failed to allocate host buffer ret = -6; //failed to allocate host buffer
return ret; return ret;
} }
} }
err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost); err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost);
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
ret = -7; //failed to copy ret = -7; //failed to copy
} }
else else
{ {
ret = 1; ret = 1;
} }
return ret; return ret;
} }
//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device //wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
//initializes devptr from NULL if not already initialized //initializes devptr from NULL if not already initialized
template<typename T> int cuda_copytodevice(T *hostptr, T **devptr) template<typename T> int cuda_copytodevice(T *hostptr, T **devptr)
{ {
int ret = 0; int ret = 0;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
bool overwrite = 1; bool overwrite = 1;
if(hostptr==NULL) if(hostptr==NULL)
{ {
ret = -2; //host buffer is NULL ret = -2; //host buffer is NULL
return ret; return ret;
} }
if(overwrite==1) if(overwrite==1)
{ {
if(*devptr !=NULL) if(*devptr !=NULL)
{ {
cuda_free(devptr); cuda_free(devptr);
} }
} }
if(*devptr==NULL) if(*devptr==NULL)
{ {
err = cudaMalloc(devptr,sizeof(T)); err = cudaMalloc(devptr,sizeof(T));
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
ret = -3; //failed to allocate ret = -3; //failed to allocate
*devptr = NULL; *devptr = NULL;
return ret; return ret;
} }
} }
err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice); err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice);
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
ret = -4; //failed to copy ret = -4; //failed to copy
} }
else else
{ {
ret = 1; ret = 1;
} }
return ret; return ret;
} }
//wrapper for cudaMemcpy - copies an item or struct (count 1) from device //wrapper for cudaMemcpy - copies an item or struct (count 1) from device
//initializes hostptr from NULL with new if not already initialized //initializes hostptr from NULL with new if not already initialized
template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr) template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr)
{ {
int ret = 0; int ret = 0;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
bool overwrite = 1; bool overwrite = 1;
if(devptr==NULL) if(devptr==NULL)
{ {
ret = -5; //null dev buffer ret = -5; //null dev buffer
return ret; return ret;
} }
if(overwrite==1 && *hostptr!=NULL) if(overwrite==1 && *hostptr!=NULL)
{ {
delete (*hostptr); hostptr = NULL; delete (*hostptr); hostptr = NULL;
} }
if(*hostptr==NULL) if(*hostptr==NULL)
{ {
*hostptr = new(std::nothrow) T; *hostptr = new(std::nothrow) T;
if(*hostptr==NULL) if(*hostptr==NULL)
{ {
ret = -6; //failed to allocate host buffer ret = -6; //failed to allocate host buffer
return ret; return ret;
} }
} }
err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost); err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost);
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
ret = -7; //failed to copy ret = -7; //failed to copy
} }
else else
{ {
ret = 1; ret = 1;
} }
return ret; return ret;
} }
}; };
#endif #endif

View File

@ -1,55 +1,55 @@
#ifndef __AMSCU_RANDOM_HPP__ #ifndef __AMSCU_RANDOM_HPP__
#define __AMSCU_RANDOM_HPP__ #define __AMSCU_RANDOM_HPP__
namespace amscuda namespace amscuda
{ {
// Random Number Gerneators // Random Number Gerneators
// faster floating point hash function used in fractal generators // faster floating point hash function used in fractal generators
__device__ __host__ float fhash1d_su(float x); __device__ __host__ float fhash1d_su(float x);
__device__ __host__ float fhash3d_su(float x, float y, float z); __device__ __host__ float fhash3d_su(float x, float y, float z);
__device__ __host__ float fhash4d_su(float x, float y, float z, float w); __device__ __host__ float fhash4d_su(float x, float y, float z, float w);
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Deterministic Pseudorandom int32_t Generator // // Deterministic Pseudorandom int32_t Generator //
////////////////////////////////////////////////// //////////////////////////////////////////////////
//Next seed in simple 32 bit integer deterministic psuedo-rand generator //Next seed in simple 32 bit integer deterministic psuedo-rand generator
__host__ __device__ void dpr32_nextseed(int32_t *rseed_inout); __host__ __device__ void dpr32_nextseed(int32_t *rseed_inout);
//Simple 32 bit integer deterministic pseudo-random generator //Simple 32 bit integer deterministic pseudo-random generator
// *not* for cryptography // *not* for cryptography
// Frequency of generated floats should be uniform [0,1) // Frequency of generated floats should be uniform [0,1)
__host__ __device__ float dpr32_randf(int32_t *rseed_inout); __host__ __device__ float dpr32_randf(int32_t *rseed_inout);
//box muller standard normal pseudorandom variable //box muller standard normal pseudorandom variable
__host__ __device__ float dpr32_randnf(int32_t *rseed_inout); __host__ __device__ float dpr32_randnf(int32_t *rseed_inout);
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Deterministic Pseudorandom int64_t Generator // // Deterministic Pseudorandom int64_t Generator //
////////////////////////////////////////////////// //////////////////////////////////////////////////
//operates without side-effects on explicit seed for threaded use //operates without side-effects on explicit seed for threaded use
//deterministic pseudorandom number generator - takes seed and returns next seed //deterministic pseudorandom number generator - takes seed and returns next seed
__host__ __device__ void dpr64_nextseed(int64_t *seedinout); __host__ __device__ void dpr64_nextseed(int64_t *seedinout);
//deterministic pseudorandom number generator - takes seed and returns next seed //deterministic pseudorandom number generator - takes seed and returns next seed
//returns uniformly distributed double //returns uniformly distributed double
__host__ __device__ double dpr64_randd(int64_t *seedinout); __host__ __device__ double dpr64_randd(int64_t *seedinout);
__host__ __device__ float dpr64_randf(int64_t *seedinout); __host__ __device__ float dpr64_randf(int64_t *seedinout);
void test_dprg64(); void test_dprg64();
void test_dprg32(); void test_dprg32();
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,47 +1,47 @@
#ifndef __CUARRAY_HPP__ #ifndef __CUARRAY_HPP__
#define __CUARRAY_HPP__ #define __CUARRAY_HPP__
namespace amscuda namespace amscuda
{ {
template<typename T> class cuarray template<typename T> class cuarray
{ {
public: public:
int length; int length;
T* data; T* data;
__device__ __host__ cuarray(); __device__ __host__ cuarray();
__device__ __host__ ~cuarray(); __device__ __host__ ~cuarray();
//Only call this on the device for thread/block local //Only call this on the device for thread/block local
// dynamic arrays // dynamic arrays
__device__ __host__ int resize(const int _length); __device__ __host__ int resize(const int _length);
__device__ __host__ int size() const; __device__ __host__ int size() const;
__device__ __host__ T& at(const int I); __device__ __host__ T& at(const int I);
__device__ __host__ const T& at(const int I) const; __device__ __host__ const T& at(const int I) const;
__device__ __host__ T& operator[](const int I); __device__ __host__ T& operator[](const int I);
__device__ __host__ const T& operator[](const int I) const; __device__ __host__ const T& operator[](const int I) const;
__host__ int device_send(cuarray<T> **dptr); __host__ int device_send(cuarray<T> **dptr);
__host__ int _device_send_overwrite(cuarray<T> **dptr); __host__ int _device_send_overwrite(cuarray<T> **dptr);
__host__ int _device_send_copy(cuarray<T> *dptr); __host__ int _device_send_copy(cuarray<T> *dptr);
__host__ int device_pull(cuarray<T> *dptr); __host__ int device_pull(cuarray<T> *dptr);
__host__ int device_free(cuarray<T> **dptr); __host__ int device_free(cuarray<T> **dptr);
__host__ int device_length(cuarray<T> *dptr); __host__ int device_length(cuarray<T> *dptr);
__host__ T* device_data_ptr(cuarray<T> *dptr); __host__ T* device_data_ptr(cuarray<T> *dptr);
}; };
void test_cuarray(); void test_cuarray();
}; };
#include <amsculib2/amscuarray_impl.hpp> #include <amsculib2/amscuarray_impl.hpp>
#endif #endif

View File

@ -1,76 +1,76 @@
#ifndef __AMSCUARRAY_DOPS_HPP__ #ifndef __AMSCUARRAY_DOPS_HPP__
#define __AMSCUARRAY_DOPS_HPP__ #define __AMSCUARRAY_DOPS_HPP__
//Device Operations on Arrays //Device Operations on Arrays
// //
//Device Operations on Device Buffers //Device Operations on Device Buffers
// dodb // dodb
namespace amscuda namespace amscuda
{ {
//sum //sum
template<typename T> T devcuarray_sum(cuarray<T> *devptr); template<typename T> T devcuarray_sum(cuarray<T> *devptr);
template<typename T> T dbuff_sum(T *devbuffer, int N); template<typename T> T dbuff_sum(T *devbuffer, int N);
struct dbuff_statstruct struct dbuff_statstruct
{ {
public: public:
float min; float min;
float max; float max;
float mean; float mean;
float stdev; float stdev;
float sum; float sum;
}; };
//stats (min,max,mean,stdev) //stats (min,max,mean,stdev)
template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max); template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max);
template<typename T> dbuff_statstruct dbuff_stats(T *devbuffer, int N); // template<typename T> dbuff_statstruct dbuff_stats(T *devbuffer, int N); //
//sets all elements to setto //sets all elements to setto
template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads); template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads);
//random device buffer functions //random device buffer functions
void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); // void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); // void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads); // void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads); //
//Elementwise device-buffer vector binary operation //Elementwise device-buffer vector binary operation
//takes two input arrays ( , ) --> one output array //takes two input arrays ( , ) --> one output array
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads); template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
//Elementwise device-buffer vector two-parameter operation //Elementwise device-buffer vector two-parameter operation
//takes one input array, and a constant paramter ( ) ---> one output array //takes one input array, and a constant paramter ( ) ---> one output array
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads); template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
//vector_add //vector_add
template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
// Tests // // Tests //
void test_dbuff_rand_dpr32(); void test_dbuff_rand_dpr32();
}; };
#include <amsculib2/amscuarray_dops_impl.hpp> #include <amsculib2/amscuarray_dops_impl.hpp>
#endif #endif

View File

@ -1,404 +1,404 @@
#ifndef __AMSCUARRAY_DOPS_IMPL_HPP__ #ifndef __AMSCUARRAY_DOPS_IMPL_HPP__
#define __AMSCUARRAY_DOPS_IMPL_HPP__ #define __AMSCUARRAY_DOPS_IMPL_HPP__
namespace amscuda namespace amscuda
{ {
template<typename T> __global__ void dbuff_sum_kf(T *devbuffer, int N, T *rets) template<typename T> __global__ void dbuff_sum_kf(T *devbuffer, int N, T *rets)
{ {
int I0 = threadIdx.x + blockIdx.x*blockDim.x; int I0 = threadIdx.x + blockIdx.x*blockDim.x;
int Is = blockDim.x*gridDim.x; int Is = blockDim.x*gridDim.x;
int I; int I;
T ret = (T) 0; T ret = (T) 0;
for(I=I0;I<N;I=I+Is) for(I=I0;I<N;I=I+Is)
{ {
ret = ret + devbuffer[I]; ret = ret + devbuffer[I];
} }
rets[I0] = ret; rets[I0] = ret;
} }
template<typename T> T devcuarray_sum(cuarray<T> *devptr) template<typename T> T devcuarray_sum(cuarray<T> *devptr)
{ {
T ret = T(); T ret = T();
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
cuarray<T> ldptr; cuarray<T> ldptr;
cudaMemcpy(&ldptr,devptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost); cudaMemcpy(&ldptr,devptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
ret = devbuffer_sum(ldptr.data,ldptr.length); ret = devbuffer_sum(ldptr.data,ldptr.length);
ldptr.data = NULL; ldptr.data = NULL;
ldptr.length=0; ldptr.length=0;
return ret; return ret;
} }
template<typename T> T dbuff_sum(T *dbuff, int N) template<typename T> T dbuff_sum(T *dbuff, int N)
{ {
int I; int I;
T ret = T(); T ret = T();
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
int nblocks; int nblocks;
int nthreads; int nthreads;
if(dbuff==NULL || N<=0) if(dbuff==NULL || N<=0)
{ {
return ret; return ret;
} }
if(N>100) if(N>100)
{ {
nblocks = 10; nblocks = 10;
nthreads = (int)sqrt((float) (N/nblocks)); nthreads = (int)sqrt((float) (N/nblocks));
if(nthreads<=0) nthreads=1; if(nthreads<=0) nthreads=1;
if(nthreads>512) nthreads=512; if(nthreads>512) nthreads=512;
} }
else else
{ {
nblocks = 1; nblocks = 1;
nthreads = 1; nthreads = 1;
} }
T *rets = NULL; T *rets = NULL;
T *devrets = NULL; T *devrets = NULL;
rets = new T[nblocks*nthreads]; rets = new T[nblocks*nthreads];
cudaMalloc(&devrets,sizeof(T)*nblocks*nthreads); cudaMalloc(&devrets,sizeof(T)*nblocks*nthreads);
dbuff_sum_kf<<<nblocks,nthreads>>>(dbuff,N,devrets); dbuff_sum_kf<<<nblocks,nthreads>>>(dbuff,N,devrets);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
err = cudaGetLastError(); err = cudaGetLastError();
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
printf("amscu::dbuff_sum error: %s\n",cudaGetErrorString(err)); printf("amscu::dbuff_sum error: %s\n",cudaGetErrorString(err));
} }
cudaMemcpy(rets,devrets,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost); cudaMemcpy(rets,devrets,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
ret = (T)0; ret = (T)0;
for(I=0;I<nblocks*nthreads;I++) for(I=0;I<nblocks*nthreads;I++)
{ {
ret = ret + rets[I]; ret = ret + rets[I];
} }
cudaFree(devrets); devrets = NULL; cudaFree(devrets); devrets = NULL;
delete[] rets; delete[] rets;
return ret; return ret;
} }
template<typename T> __global__ void dbuff_minmax_kf(T *devbuffer, int N, T *maxs, T *mins) template<typename T> __global__ void dbuff_minmax_kf(T *devbuffer, int N, T *maxs, T *mins)
{ {
int I0 = threadIdx.x + blockIdx.x*blockDim.x; int I0 = threadIdx.x + blockIdx.x*blockDim.x;
int Is = blockDim.x*gridDim.x; int Is = blockDim.x*gridDim.x;
int I; int I;
for(I=I0;I<N;I=I+Is) for(I=I0;I<N;I=I+Is)
{ {
if(I==I0) if(I==I0)
{ {
maxs[I0] = devbuffer[I]; maxs[I0] = devbuffer[I];
mins[I0] = devbuffer[I]; mins[I0] = devbuffer[I];
} }
else else
{ {
if(devbuffer[I]>maxs[I0]) if(devbuffer[I]>maxs[I0])
{ {
maxs[I0] = devbuffer[I]; maxs[I0] = devbuffer[I];
} }
if(devbuffer[I]<mins[I0]) if(devbuffer[I]<mins[I0])
{ {
mins[I0] = devbuffer[I]; mins[I0] = devbuffer[I];
} }
} }
} }
return; return;
} }
template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max) template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max)
{ {
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
int nblocks; int nblocks;
int nthreads; int nthreads;
int I; int I;
T *maxs = NULL; T *maxs = NULL;
T *dev_maxs = NULL; T *dev_maxs = NULL;
T *mins = NULL; T *mins = NULL;
T *dev_mins = NULL; T *dev_mins = NULL;
T localmax = T(0); T localmax = T(0);
T localmin = T(0); T localmin = T(0);
if(devbuffer==NULL || N<=0) if(devbuffer==NULL || N<=0)
{ {
if(min!=NULL) *min = T(0); if(min!=NULL) *min = T(0);
if(max!=NULL) *max = T(0); if(max!=NULL) *max = T(0);
return; return;
} }
if(N>25) if(N>25)
{ {
nblocks = 25; nblocks = 25;
nthreads = (int) sqrt((float)(N/nblocks)); nthreads = (int) sqrt((float)(N/nblocks));
if(nthreads<1) nthreads = 1; if(nthreads<1) nthreads = 1;
if(nthreads>512) nthreads = 512; if(nthreads>512) nthreads = 512;
} }
else else
{ {
nblocks = 1; nblocks = 1;
nthreads = 1; nthreads = 1;
} }
maxs = new T[nblocks*nthreads]; maxs = new T[nblocks*nthreads];
mins = new T[nblocks*nthreads]; mins = new T[nblocks*nthreads];
cudaMalloc(&dev_maxs,nblocks*nthreads); cudaMalloc(&dev_maxs,nblocks*nthreads);
cudaMalloc(&dev_mins,nblocks*nthreads); cudaMalloc(&dev_mins,nblocks*nthreads);
dbuff_minmax_kf<<<nblocks,nthreads>>>(devbuffer,N,dev_maxs,dev_mins); dbuff_minmax_kf<<<nblocks,nthreads>>>(devbuffer,N,dev_maxs,dev_mins);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
err = cudaGetLastError(); err = cudaGetLastError();
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
printf("amscu::dbuff_minmax error: %s\n",cudaGetErrorString(err)); printf("amscu::dbuff_minmax error: %s\n",cudaGetErrorString(err));
} }
cudaMemcpy(maxs,dev_maxs,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost); cudaMemcpy(maxs,dev_maxs,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
cudaMemcpy(mins,dev_mins,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost); cudaMemcpy(mins,dev_mins,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
for(I=0;I<nblocks*nthreads;I++) for(I=0;I<nblocks*nthreads;I++)
{ {
if(I==0) if(I==0)
{ {
localmax = maxs[0]; localmax = maxs[0];
localmin = mins[0]; localmin = mins[0];
} }
else else
{ {
if(maxs[I]>localmax) localmax = maxs[I]; if(maxs[I]>localmax) localmax = maxs[I];
if(mins[I]<localmin) localmin = mins[I]; if(mins[I]<localmin) localmin = mins[I];
} }
} }
if(max!=NULL) *max = localmax; if(max!=NULL) *max = localmax;
if(min!=NULL) *min = localmin; if(min!=NULL) *min = localmin;
cudaFree(dev_maxs); dev_maxs = NULL; cudaFree(dev_maxs); dev_maxs = NULL;
cudaFree(dev_mins); dev_mins = NULL; cudaFree(dev_mins); dev_mins = NULL;
delete[] maxs; maxs = NULL; delete[] maxs; maxs = NULL;
delete[] mins; mins = NULL; delete[] mins; mins = NULL;
return; return;
} }
template<typename T> __global__ void dbuff_setall_kf(T *devbuffer, int N, T setto) template<typename T> __global__ void dbuff_setall_kf(T *devbuffer, int N, T setto)
{ {
int I0 = threadIdx.x + blockIdx.x*blockDim.x; int I0 = threadIdx.x + blockIdx.x*blockDim.x;
int Is = blockDim.x*gridDim.x; int Is = blockDim.x*gridDim.x;
int I; int I;
for(I=I0;I<N;I=I+Is) for(I=I0;I<N;I=I+Is)
{ {
devbuffer[I] = setto; devbuffer[I] = setto;
} }
return; return;
} }
template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads) template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads)
{ {
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
if(devbuffer==NULL || N<=0) if(devbuffer==NULL || N<=0)
{ {
return; return;
} }
dbuff_setall_kf<<<nblocks,nthreads>>>(devbuffer,N,setto); dbuff_setall_kf<<<nblocks,nthreads>>>(devbuffer,N,setto);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
err = cudaGetLastError(); err = cudaGetLastError();
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
printf("amscu::dbuff_setall error: %s\n",cudaGetErrorString(err)); printf("amscu::dbuff_setall error: %s\n",cudaGetErrorString(err));
} }
return; return;
} }
template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf1(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2)) template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf1(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
{ {
int I0 = threadIdx.x + blockIdx.x*blockDim.x; int I0 = threadIdx.x + blockIdx.x*blockDim.x;
int Is = blockDim.x*gridDim.x; int Is = blockDim.x*gridDim.x;
int I; int I;
T1 a; T1 a;
T2 b; T2 b;
T3 c; T3 c;
for(I=I0;I<N;I=I+Is) for(I=I0;I<N;I=I+Is)
{ {
a = dbuf_a[I]; a = dbuf_a[I];
b = dbuf_b[I]; b = dbuf_b[I];
c = fpnt(a,b); c = fpnt(a,b);
dbuf_out[I] = c; dbuf_out[I] = c;
} }
return; return;
} }
template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf2(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2)) template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf2(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
{ {
int I0 = threadIdx.x + blockIdx.x*blockDim.x; int I0 = threadIdx.x + blockIdx.x*blockDim.x;
int Is = blockDim.x*gridDim.x; int Is = blockDim.x*gridDim.x;
int I; int I;
T1 a; T1 a;
T2 b; T2 b;
T3 c; T3 c;
for(I=I0;I<N;I=I+Is) for(I=I0;I<N;I=I+Is)
{ {
a = dbuf_a[I]; a = dbuf_a[I];
b = par_b; b = par_b;
c = fpnt(a,b); c = fpnt(a,b);
dbuf_out[I] = c; dbuf_out[I] = c;
} }
return; return;
} }
//Elementwise device-buffer vector binary operation //Elementwise device-buffer vector binary operation
//takes two input arrays ( , ) --> one output array //takes two input arrays ( , ) --> one output array
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads) template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
{ {
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
if(dbuf_a == NULL || dbuf_b == NULL || dbuf_out == NULL || N<=0) if(dbuf_a == NULL || dbuf_b == NULL || dbuf_out == NULL || N<=0)
{ {
return; return;
} }
dbuff_vectorbinop_kf1<<<nblocks,nthreads>>>(dbuf_a,dbuf_b,dbuf_out,N); dbuff_vectorbinop_kf1<<<nblocks,nthreads>>>(dbuf_a,dbuf_b,dbuf_out,N);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
err = cudaGetLastError(); err = cudaGetLastError();
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err)); printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
} }
return; return;
} }
//Elementwise device-buffer vector two-parameter operation //Elementwise device-buffer vector two-parameter operation
//takes one input array, and a constant paramter ( ) ---> one output array //takes one input array, and a constant paramter ( ) ---> one output array
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads) template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
{ {
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
if(dbuf_a == NULL || dbuf_out == NULL || N<=0) if(dbuf_a == NULL || dbuf_out == NULL || N<=0)
{ {
return; return;
} }
dbuff_vectorbinop_kf2<<<nblocks,nthreads>>>(dbuf_a,par_b,dbuf_out,N); dbuff_vectorbinop_kf2<<<nblocks,nthreads>>>(dbuf_a,par_b,dbuf_out,N);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
err = cudaGetLastError(); err = cudaGetLastError();
if(err!=cudaSuccess) if(err!=cudaSuccess)
{ {
printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err)); printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
} }
return; return;
} }
template<typename T> T dbuff_add_fn(T a, T b) template<typename T> T dbuff_add_fn(T a, T b)
{ {
return a+b; return a+b;
} }
template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
return; return;
} }
template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
return; return;
} }
template<typename T> T dbuff_sub_fn(T a, T b) template<typename T> T dbuff_sub_fn(T a, T b)
{ {
return a-b; return a-b;
} }
template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
return; return;
} }
template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
return; return;
} }
template<typename T> T dbuff_mult_fn(T a, T b) template<typename T> T dbuff_mult_fn(T a, T b)
{ {
return a*b; return a*b;
} }
template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
return; return;
} }
template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
return; return;
} }
template<typename T> T dbuff_div_fn(T a, T b) template<typename T> T dbuff_div_fn(T a, T b)
{ {
return a/b; return a/b;
} }
template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
return; return;
} }
template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
return; return;
} }
template<typename T> T dbuff_ldiv_fn(T a, T b) template<typename T> T dbuff_ldiv_fn(T a, T b)
{ {
return b/a; return b/a;
} }
template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads) template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
{ {
dbuff_vectorbinop(dbuff_b,par_a,dbuff_out,N,&dbuff_ldiv_fn,nblocks,nthreads); dbuff_vectorbinop(dbuff_b,par_a,dbuff_out,N,&dbuff_ldiv_fn,nblocks,nthreads);
return; return;
} }
}; };
#endif #endif

View File

@ -1,323 +1,323 @@
#ifndef __CUARRAY_IMPL_HPP__ #ifndef __CUARRAY_IMPL_HPP__
#define __CUARRAY_IMPL_HPP__ #define __CUARRAY_IMPL_HPP__
namespace amscuda namespace amscuda
{ {
// New Version cuarray<T> // New Version cuarray<T>
// simpler, less crap going on // simpler, less crap going on
template<typename T> __device__ __host__ cuarray<T>::cuarray() template<typename T> __device__ __host__ cuarray<T>::cuarray()
{ {
length = 0; length = 0;
data = NULL; data = NULL;
} }
template<typename T> __device__ __host__ cuarray<T>::~cuarray() template<typename T> __device__ __host__ cuarray<T>::~cuarray()
{ {
if(data!=NULL) if(data!=NULL)
{ {
delete[] data; data = NULL; delete[] data; data = NULL;
} }
length = 0; length = 0;
} }
template<typename T> __device__ __host__ int cuarray<T>::resize(const int _length) template<typename T> __device__ __host__ int cuarray<T>::resize(const int _length)
{ {
int ret = 0; int ret = 0;
T *newbuffer = NULL; T *newbuffer = NULL;
if(length==_length) if(length==_length)
{ {
//do nothing //do nothing
ret = 1; ret = 1;
return ret; return ret;
} }
if(_length<=0) if(_length<=0)
{ {
if(data!=NULL) if(data!=NULL)
{ {
delete[] data; delete[] data;
data = NULL; data = NULL;
} }
length = 0; length = 0;
ret = 1; ret = 1;
} }
newbuffer = new T[_length]; newbuffer = new T[_length];
if(newbuffer==NULL) if(newbuffer==NULL)
{ {
ret = -1; //failed to allocate memory ret = -1; //failed to allocate memory
return ret; return ret;
} }
int I; int I;
T def; T def;
if(data!=NULL) if(data!=NULL)
{ {
for(I=0;I<length&&I<_length;I++) for(I=0;I<length&&I<_length;I++)
{ {
newbuffer[I] = data[I]; newbuffer[I] = data[I];
} }
for(I=length;I<_length;I++) for(I=length;I<_length;I++)
{ {
newbuffer[I] = def; newbuffer[I] = def;
} }
delete[] data; data=NULL; delete[] data; data=NULL;
} }
else else
{ {
for(I=0;I<_length;I++) for(I=0;I<_length;I++)
{ {
newbuffer[I] = def; newbuffer[I] = def;
} }
} }
data = newbuffer; data = newbuffer;
length = _length; length = _length;
ret = 1; ret = 1;
return ret; return ret;
} }
template<typename T> __host__ int cuarray<T>::device_send(cuarray<T> **dptr) template<typename T> __host__ int cuarray<T>::device_send(cuarray<T> **dptr)
{ {
int ret = 0; int ret = 0;
int dlength; int dlength;
if(*dptr==NULL) if(*dptr==NULL)
{ {
ret = _device_send_overwrite(dptr); ret = _device_send_overwrite(dptr);
} }
else else
{ {
dlength = device_length(*dptr); dlength = device_length(*dptr);
if(dlength=length) if(dlength=length)
{ {
ret = _device_send_copy(*dptr); ret = _device_send_copy(*dptr);
} }
else else
{ {
ret = _device_send_overwrite(dptr); ret = _device_send_overwrite(dptr);
} }
} }
return ret; return ret;
} }
template<typename T> __host__ int cuarray<T>::_device_send_overwrite(cuarray<T> **dptr) template<typename T> __host__ int cuarray<T>::_device_send_overwrite(cuarray<T> **dptr)
{ {
int ret = 0; int ret = 0;
cuarray<T> dlocal; cuarray<T> dlocal;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
device_free(dptr); device_free(dptr);
if(length>=0 && data!=NULL) if(length>=0 && data!=NULL)
{ {
err = cudaMalloc(dptr,sizeof(cuarray<T>)); err = cudaMalloc(dptr,sizeof(cuarray<T>));
if(err==cudaSuccess) if(err==cudaSuccess)
{ {
err = cudaMalloc(&(dlocal.data),sizeof(T)*length); err = cudaMalloc(&(dlocal.data),sizeof(T)*length);
dlocal.length = length; dlocal.length = length;
if(err==cudaSuccess) if(err==cudaSuccess)
{ {
cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice); cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
if(data!=NULL) if(data!=NULL)
err = cudaMemcpy(dlocal.data,data,sizeof(T)*length,cudaMemcpyHostToDevice); err = cudaMemcpy(dlocal.data,data,sizeof(T)*length,cudaMemcpyHostToDevice);
else else
err = cudaSuccess; err = cudaSuccess;
if(err==cudaSuccess) if(err==cudaSuccess)
{ {
ret = 1; ret = 1;
} }
else else
{ {
ret = -3; ret = -3;
} }
} }
else else
{ {
ret = -2; ret = -2;
} }
} }
else else
{ {
ret = -1; ret = -1;
} }
} }
else else
{ {
dlocal.data = NULL; dlocal.data = NULL;
dlocal.length = 0; dlocal.length = 0;
err = cudaMalloc(dptr,sizeof(cuarray<T>)); err = cudaMalloc(dptr,sizeof(cuarray<T>));
if(err==cudaSuccess) if(err==cudaSuccess)
{ {
cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice); cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
ret = 1; ret = 1;
} }
else else
{ {
ret = -4; ret = -4;
} }
} }
dlocal.data = NULL; dlocal.data = NULL;
dlocal.length = -1; dlocal.length = -1;
return ret; return ret;
} }
template<typename T> __host__ int cuarray<T>::_device_send_copy(cuarray<T> *dptr) template<typename T> __host__ int cuarray<T>::_device_send_copy(cuarray<T> *dptr)
{ {
int ret = 0; int ret = 0;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
T* ddata = NULL; T* ddata = NULL;
ddata = device_data_ptr(dptr); ddata = device_data_ptr(dptr);
err = cudaMemcpy(ddata,data,sizeof(T)*length,cudaMemcpyHostToDevice); err = cudaMemcpy(ddata,data,sizeof(T)*length,cudaMemcpyHostToDevice);
if(err==cudaSuccess) if(err==cudaSuccess)
{ {
ret = 1; ret = 1;
} }
else else
{ {
ret = -1; ret = -1;
} }
return ret; return ret;
} }
template<typename T> __host__ int cuarray<T>::device_pull(cuarray<T> *dptr) template<typename T> __host__ int cuarray<T>::device_pull(cuarray<T> *dptr)
{ {
int ret = 0; int ret = 0;
int dlength; int dlength;
T* ddata; T* ddata;
cudaError_t err; cudaError_t err;
if(dptr==NULL) if(dptr==NULL)
{ {
ret = -1; // null d pointer ret = -1; // null d pointer
return ret; return ret;
} }
dlength = device_length(dptr); dlength = device_length(dptr);
if(dlength!=length) if(dlength!=length)
{ {
this->resize(dlength); this->resize(dlength);
} }
ddata = device_data_ptr(dptr); ddata = device_data_ptr(dptr);
if(length>0 && data!=NULL && ddata!=NULL) if(length>0 && data!=NULL && ddata!=NULL)
{ {
err = cudaMemcpy(data,dptr,length*sizeof(T),cudaMemcpyDeviceToHost); err = cudaMemcpy(data,dptr,length*sizeof(T),cudaMemcpyDeviceToHost);
if(err==cudaSuccess) if(err==cudaSuccess)
{ {
ret = 1; ret = 1;
} }
else else
{ {
ret = -2; ret = -2;
} }
} }
return ret; return ret;
} }
template<typename T> __host__ int cuarray<T>::device_free(cuarray<T> **dptr) template<typename T> __host__ int cuarray<T>::device_free(cuarray<T> **dptr)
{ {
int ret = 0; int ret = 0;
cuarray<T> dlocal; cuarray<T> dlocal;
if(*dptr!=NULL) if(*dptr!=NULL)
{ {
cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost); cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
if(dlocal.data!=NULL) if(dlocal.data!=NULL)
{ {
cudaFree(dlocal.data); cudaFree(dlocal.data);
dlocal.data = NULL; dlocal.data = NULL;
} }
cudaFree(*dptr); cudaFree(*dptr);
*dptr = NULL; *dptr = NULL;
ret = 1; ret = 1;
} }
dlocal.data = NULL; dlocal.data = NULL;
dlocal.length = -1; dlocal.length = -1;
return ret; return ret;
} }
template<typename T> __host__ int cuarray<T>::device_length(cuarray<T> *dptr) template<typename T> __host__ int cuarray<T>::device_length(cuarray<T> *dptr)
{ {
int ret = -1; int ret = -1;
cuarray<T> dlocal; cuarray<T> dlocal;
if(dptr==NULL) if(dptr==NULL)
{ {
return ret; return ret;
} }
cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost); cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
ret = dlocal.length; ret = dlocal.length;
dlocal.data = NULL; dlocal.data = NULL;
dlocal.length = -1; dlocal.length = -1;
return ret; return ret;
} }
template<typename T> __host__ T* cuarray<T>::device_data_ptr(cuarray<T> *dptr) template<typename T> __host__ T* cuarray<T>::device_data_ptr(cuarray<T> *dptr)
{ {
T* ret = NULL; T* ret = NULL;
cuarray<T> dlocal; cuarray<T> dlocal;
if(dptr==NULL) if(dptr==NULL)
{ {
return ret; return ret;
} }
cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost); cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
ret = dlocal.data; ret = dlocal.data;
dlocal.data = NULL; dlocal.data = NULL;
dlocal.length = -1; dlocal.length = -1;
return ret; return ret;
} }
template<typename T> __device__ __host__ int cuarray<T>::size() const template<typename T> __device__ __host__ int cuarray<T>::size() const
{ {
return this->length; return this->length;
} }
template<typename T> __device__ __host__ T& cuarray<T>::at(const int I) template<typename T> __device__ __host__ T& cuarray<T>::at(const int I)
{ {
return this->data[I]; return this->data[I];
} }
template<typename T> __device__ __host__ const T& cuarray<T>::at(const int I) const template<typename T> __device__ __host__ const T& cuarray<T>::at(const int I) const
{ {
return this->data[I]; return this->data[I];
} }
template<typename T> __device__ __host__ T& cuarray<T>::operator[](const int I) template<typename T> __device__ __host__ T& cuarray<T>::operator[](const int I)
{ {
return this->data[I]; return this->data[I];
} }
template<typename T> __device__ __host__ const T& cuarray<T>::operator[](const int I) const template<typename T> __device__ __host__ const T& cuarray<T>::operator[](const int I) const
{ {
return this->data[I]; return this->data[I];
} }
}; };
#endif #endif

View File

@ -1,19 +1,19 @@
#ifndef __AMSCUDA_BINARRRW_HPP__ #ifndef __AMSCUDA_BINARRRW_HPP__
#define __AMSCUDA_BINARRRW_HPP__ #define __AMSCUDA_BINARRRW_HPP__
namespace amscuda namespace amscuda
{ {
template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer); template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer);
template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer); template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer);
template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer); template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer);
template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer); template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer);
}; //end namespace amscuda }; //end namespace amscuda
#include <amsculib2/amscuda_binarrrw_impl.hpp> #include <amsculib2/amscuda_binarrrw_impl.hpp>
#endif #endif

View File

@ -1,194 +1,194 @@
#ifndef __AMSCUDA_BINARRRW_IMPL_HPP__ #ifndef __AMSCUDA_BINARRRW_IMPL_HPP__
#define __AMSCUDA_BINARRRW_IMPL_HPP__ #define __AMSCUDA_BINARRRW_IMPL_HPP__
namespace amscuda namespace amscuda
{ {
template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer) template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer)
{ {
int ret = 1; int ret = 1;
int I; int I;
long piprod; long piprod;
int32_t q; int32_t q;
int cnt; int cnt;
int32_t Nd; int32_t Nd;
if(fp!=NULL) if(fp!=NULL)
{ {
if(!feof(fp)) if(!feof(fp))
{ {
cnt = fread(&Nd,sizeof(int32_t),1,fp); cnt = fread(&Nd,sizeof(int32_t),1,fp);
if(Nd>0 && cnt>0) if(Nd>0 && cnt>0)
{ {
shape->resize(Nd); shape->resize(Nd);
piprod = 1; piprod = 1;
for(I=0;I<Nd;I++) for(I=0;I<Nd;I++)
{ {
cnt = fread(&q,sizeof(int32_t),1,fp); cnt = fread(&q,sizeof(int32_t),1,fp);
shape->at(I) = q; shape->at(I) = q;
if(q>0) if(q>0)
{ {
piprod = piprod*q; piprod = piprod*q;
} }
else else
{ {
piprod = 0; piprod = 0;
} }
} }
buffer->resize(piprod); buffer->resize(piprod);
if(piprod>0) if(piprod>0)
{ {
cnt = fread((buffer->data),sizeof(T),piprod,fp); cnt = fread((buffer->data),sizeof(T),piprod,fp);
if(piprod==cnt) if(piprod==cnt)
{ {
ret = 1; ret = 1;
} }
else else
{ {
printf("fread_ndarray, read %d values, expecting %ld\n",cnt,piprod); printf("fread_ndarray, read %d values, expecting %ld\n",cnt,piprod);
ret = 0; ret = 0;
} }
} }
} }
else else
{ {
printf("fread_ndarray: Read a number of dimensions<=0.\n"); printf("fread_ndarray: Read a number of dimensions<=0.\n");
Nd = 0; Nd = 0;
shape->resize(0); shape->resize(0);
buffer->resize(0); buffer->resize(0);
} }
} }
else else
{ {
printf("fread_ndarray: fp=NULL.\n"); printf("fread_ndarray: fp=NULL.\n");
ret = 0; ret = 0;
} }
} }
else else
{ {
ret = 0; ret = 0;
} }
return ret; return ret;
} }
template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer) template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer)
{ {
int ret = 1; int ret = 1;
long piprod; long piprod;
int I; int I;
int32_t Nd; int32_t Nd;
if(fp==NULL) if(fp==NULL)
{ {
ret = 0; ret = 0;
printf("fwrite_ndarray: fp=NULL\n"); printf("fwrite_ndarray: fp=NULL\n");
return ret; return ret;
} }
piprod = 1; piprod = 1;
for(I=0;I<shape->size();I++) for(I=0;I<shape->size();I++)
{ {
if(shape->at(I)>0) if(shape->at(I)>0)
{ {
piprod = piprod*shape->at(I); piprod = piprod*shape->at(I);
} }
else else
{ {
piprod = 0; piprod = 0;
} }
} }
Nd = (int32_t) shape->size(); Nd = (int32_t) shape->size();
if(piprod!=buffer->size()) if(piprod!=buffer->size())
{ {
ret = 0; ret = 0;
printf("fwrite_ndarray: buffer is size %ld, while shape is size %ld\n",(long)buffer->size(),(long)piprod); printf("fwrite_ndarray: buffer is size %ld, while shape is size %ld\n",(long)buffer->size(),(long)piprod);
return ret; return ret;
} }
fwrite(&Nd,sizeof(int32_t),1,fp); fwrite(&Nd,sizeof(int32_t),1,fp);
if(Nd>0) if(Nd>0)
{ {
fwrite(shape->data,sizeof(int32_t),Nd,fp); fwrite(shape->data,sizeof(int32_t),Nd,fp);
if(piprod>0) if(piprod>0)
{ {
fwrite(buffer->data,sizeof(T),buffer->size(),fp); fwrite(buffer->data,sizeof(T),buffer->size(),fp);
} }
} }
return ret; return ret;
} }
template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer) template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer)
{ {
int ret = 0; int ret = 0;
int Nd = 1; int Nd = 1;
if(fp==NULL) if(fp==NULL)
{ {
ret = 0; ret = 0;
printf("fwrite_buffer: fp=NULL\n"); printf("fwrite_buffer: fp=NULL\n");
return ret; return ret;
} }
fwrite(&Nd,sizeof(int32_t),1,fp); fwrite(&Nd,sizeof(int32_t),1,fp);
fwrite(&N,sizeof(int32_t),1,fp); fwrite(&N,sizeof(int32_t),1,fp);
fwrite(buffer,sizeof(T),N,fp); fwrite(buffer,sizeof(T),N,fp);
return ret; return ret;
} }
template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer) template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer)
{ {
int ret = 0; int ret = 0;
int cnt; int cnt;
int32_t Nd; int32_t Nd;
int32_t *dims = NULL; int32_t *dims = NULL;
int piprod; int piprod;
int32_t q; int32_t q;
int I; int I;
int Nr; int Nr;
if(fp==NULL) {ret = -1; return ret;} if(fp==NULL) {ret = -1; return ret;}
if(feof(fp)) {ret = -2; return ret;} if(feof(fp)) {ret = -2; return ret;}
cnt = fread(&Nd,sizeof(int32_t),1,fp); cnt = fread(&Nd,sizeof(int32_t),1,fp);
if(Nd>0 && cnt>0) if(Nd>0 && cnt>0)
{ {
piprod = 1; piprod = 1;
dims = new(std::nothrow) int32_t[Nd]; dims = new(std::nothrow) int32_t[Nd];
for(I=0;I<Nd;I++) for(I=0;I<Nd;I++)
{ {
cnt = fread(&q,sizeof(int32_t),1,fp); cnt = fread(&q,sizeof(int32_t),1,fp);
dims[I] = q; dims[I] = q;
piprod = piprod*dims[I]; piprod = piprod*dims[I];
if(piprod==cnt) if(piprod==cnt)
{ {
ret = 1; ret = 1;
} }
else else
{ {
printf("fwrite_buffer, read %d values, expecting %d\n",cnt,piprod); printf("fwrite_buffer, read %d values, expecting %d\n",cnt,piprod);
} }
} }
Nr = amscuda::min<int32_t>(Nmax,piprod); Nr = amscuda::min<int32_t>(Nmax,piprod);
cnt = fread(buffer,sizeof(T),Nr,fp); cnt = fread(buffer,sizeof(T),Nr,fp);
} }
if(dims!=NULL) {delete[] dims; dims=NULL;} if(dims!=NULL) {delete[] dims; dims=NULL;}
return ret; return ret;
} }
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,11 +1,11 @@
#ifndef __AMSCUGEOM_HPP__ #ifndef __AMSCUGEOM_HPP__
#define __AMSCUGEOM_HPP__ #define __AMSCUGEOM_HPP__
namespace amscuda namespace amscuda
{ {
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,70 +1,70 @@
#ifndef __AMSCULIB2_HPP__ #ifndef __AMSCULIB2_HPP__
#define __AMSCULIB2_HPP__ #define __AMSCULIB2_HPP__
//Std Lib Includes //Std Lib Includes
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
#include <stdint.h> #include <stdint.h>
#include <time.h> #include <time.h>
#include <new> #include <new>
#include <cuda_runtime_api.h> //where all the cuda functions live #include <cuda_runtime_api.h> //where all the cuda functions live
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cuda.h> #include <cuda.h>
//Dependencies //Dependencies
//Predeclarations //Predeclarations
class cuvect2; class cuvect2;
class cuvect3; class cuvect3;
class cuvect4; class cuvect4;
class cuvect2f; class cuvect2f;
class cuvect3f; class cuvect3f;
class cuvect4f; class cuvect4f;
//Need a way to define the same symbols using both host and device code //Need a way to define the same symbols using both host and device code
//A solution was found here: https://stackoverflow.com/questions/9457572/cuda-host-and-device-using-same-constant-memory //A solution was found here: https://stackoverflow.com/questions/9457572/cuda-host-and-device-using-same-constant-memory
#ifdef __CUDA_ARCH__ #ifdef __CUDA_ARCH__
#define AMSCU_CONST __constant__ #define AMSCU_CONST __constant__
#else #else
#define AMSCU_CONST #define AMSCU_CONST
#endif #endif
namespace amscuda namespace amscuda
{ {
//default thread and block execution //default thread and block execution
AMSCU_CONST static const int amscu_defnblocks = 256; AMSCU_CONST static const int amscu_defnblocks = 256;
AMSCU_CONST static const int amscu_defnthreads = 512; AMSCU_CONST static const int amscu_defnthreads = 512;
//default numthreads to execute on cpu //default numthreads to execute on cpu
AMSCU_CONST static const int amscu_defcputhreads = 8; AMSCU_CONST static const int amscu_defcputhreads = 8;
}; //end namespace amscuda }; //end namespace amscuda
//Components //Components
#include <amsculib2/amscu_cudafunctions.hpp> #include <amsculib2/amscu_cudafunctions.hpp>
#include <amsculib2/amscumath.hpp> #include <amsculib2/amscumath.hpp>
#include <amsculib2/amscu_comp64.hpp> #include <amsculib2/amscu_comp64.hpp>
#include <amsculib2/amscu_comp128.hpp> #include <amsculib2/amscu_comp128.hpp>
#include <amsculib2/cuvect2.hpp> #include <amsculib2/cuvect2.hpp>
#include <amsculib2/cuvect3.hpp> #include <amsculib2/cuvect3.hpp>
#include <amsculib2/cuvect4.hpp> #include <amsculib2/cuvect4.hpp>
#include <amsculib2/cuvect2f.hpp> #include <amsculib2/cuvect2f.hpp>
#include <amsculib2/cuvect3f.hpp> #include <amsculib2/cuvect3f.hpp>
#include <amsculib2/cuvect4f.hpp> #include <amsculib2/cuvect4f.hpp>
#include <amsculib2/amscugeom.hpp> #include <amsculib2/amscugeom.hpp>
#include <amsculib2/amscuarray.hpp> #include <amsculib2/amscuarray.hpp>
#include <amsculib2/amscuda_binarrrw.hpp> #include <amsculib2/amscuda_binarrrw.hpp>
#include <amsculib2/amscu_random.hpp> #include <amsculib2/amscu_random.hpp>
#include <amsculib2/amscuarray_dops.hpp> #include <amsculib2/amscuarray_dops.hpp>
#include <amsculib2/amscurarray.cuh> #include <amsculib2/amscurarray.cuh>
#endif #endif

View File

@ -1,56 +1,56 @@
#ifndef __AMSCUMATH_HPP__ #ifndef __AMSCUMATH_HPP__
#define __AMSCUMATH_HPP__ #define __AMSCUMATH_HPP__
namespace amscuda namespace amscuda
{ {
//Problem: These are not in the namespace //Problem: These are not in the namespace
//#define nan NAN //#define nan NAN
//#define fnan (float) NAN //#define fnan (float) NAN
//#define inf INFINITY //#define inf INFINITY
//#define finf (float) INFINITY //#define finf (float) INFINITY
//#define pi 3.1415926535897936 //#define pi 3.1415926535897936
//These need to be the same symbol for both host and device code //These need to be the same symbol for both host and device code
AMSCU_CONST static const double nan = NAN; AMSCU_CONST static const double nan = NAN;
AMSCU_CONST static const float fnan = (float) NAN; AMSCU_CONST static const float fnan = (float) NAN;
AMSCU_CONST static const double inf = INFINITY; AMSCU_CONST static const double inf = INFINITY;
AMSCU_CONST static const float finf = (float) INFINITY; AMSCU_CONST static const float finf = (float) INFINITY;
AMSCU_CONST static const double pi = 3.1415926535897936; AMSCU_CONST static const double pi = 3.1415926535897936;
AMSCU_CONST static const float pif = 3.1415926535897936; AMSCU_CONST static const float pif = 3.1415926535897936;
__host__ __device__ double dabs(double x); __host__ __device__ double dabs(double x);
__host__ __device__ float fabs(float x); __host__ __device__ float fabs(float x);
template<typename T> __host__ __device__ T abs(const T in) template<typename T> __host__ __device__ T abs(const T in)
{ {
T ret = in; T ret = in;
if(in<0) ret = -in; if(in<0) ret = -in;
return ret; return ret;
} }
__host__ __device__ double mod(double a, double md); __host__ __device__ double mod(double a, double md);
__host__ __device__ float mod(float a, float md); __host__ __device__ float mod(float a, float md);
__host__ __device__ int mod(int x, int n); __host__ __device__ int mod(int x, int n);
__host__ __device__ long mod(long x, long n); __host__ __device__ long mod(long x, long n);
__host__ __device__ int truediv(int x, int y); __host__ __device__ int truediv(int x, int y);
__host__ __device__ long truediv(long x, long y); __host__ __device__ long truediv(long x, long y);
template<typename T> __host__ __device__ T min(T a, T b); template<typename T> __host__ __device__ T min(T a, T b);
template<typename T> __host__ __device__ T max(T a, T b); template<typename T> __host__ __device__ T max(T a, T b);
__device__ __host__ double arg(double x, double y); __device__ __host__ double arg(double x, double y);
__device__ __host__ void get_azel(double x, double y, double z, double *az, double *el); __device__ __host__ void get_azel(double x, double y, double z, double *az, double *el);
void test_amscumath1(); void test_amscumath1();
}; //end namespace amscuda }; //end namespace amscuda
#include <amsculib2/amscumath_impl.hpp> #include <amsculib2/amscumath_impl.hpp>
#endif #endif

View File

@ -1,42 +1,42 @@
#ifndef __AMSCUMATH_IMPL_HPP__ #ifndef __AMSCUMATH_IMPL_HPP__
#define __AMSCUMATH_IMPL_HPP__ #define __AMSCUMATH_IMPL_HPP__
namespace amscuda namespace amscuda
{ {
template<typename T> __host__ __device__ T min(T a, T b) template<typename T> __host__ __device__ T min(T a, T b)
{ {
if(a>b) if(a>b)
{ {
return b; return b;
} }
else else
{ {
return a; return a;
} }
return a; return a;
} }
template<typename T> __host__ __device__ T max(T a, T b) template<typename T> __host__ __device__ T max(T a, T b)
{ {
if(a>b) if(a>b)
{ {
return a; return a;
} }
else else
{ {
return b; return b;
} }
return a; return a;
} }
template<> __host__ __device__ double min(double a, double b); template<> __host__ __device__ double min(double a, double b);
template<> __host__ __device__ float min(float a, float b); template<> __host__ __device__ float min(float a, float b);
template<> __host__ __device__ double max(double a, double b); template<> __host__ __device__ double max(double a, double b);
template<> __host__ __device__ float max(float a, float b); template<> __host__ __device__ float max(float a, float b);
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,66 +1,66 @@
#ifndef __AMSCURARRAY_HPP__ #ifndef __AMSCURARRAY_HPP__
#define __AMSCURARRAY_HPP__ #define __AMSCURARRAY_HPP__
namespace amscuda namespace amscuda
{ {
//Cuda ragged array class //Cuda ragged array class
template<typename T> class curarray template<typename T> class curarray
{ {
public: public:
int device; int device;
curarray* devptr; //pointer to mirror class on the device curarray* devptr; //pointer to mirror class on the device
int Narrays; //number of arrays int Narrays; //number of arrays
int *N; //dimension of each array int *N; //dimension of each array
T** hostarrayptrs; //pointers to each array on the host - null on the device T** hostarrayptrs; //pointers to each array on the host - null on the device
T** devarrayptrs; //pointers to each array on the device T** devarrayptrs; //pointers to each array on the device
//the double pointer is a host pointer to device pointers on the host class //the double pointer is a host pointer to device pointers on the host class
//for the device class, only the second set of arrays is in use //for the device class, only the second set of arrays is in use
//the constructor and destructor set all pointers to NULL, they //the constructor and destructor set all pointers to NULL, they
// do *not* manage memory. This is done with curarray_new and curarray_delete // do *not* manage memory. This is done with curarray_new and curarray_delete
__device__ __host__ curarray(); __device__ __host__ curarray();
__device__ __host__ ~curarray(); __device__ __host__ ~curarray();
__host__ int push(); __host__ int push();
__host__ int pull(); __host__ int pull();
//__device__ int dev_resizearray(int arraynum, int arraysize); //__device__ int dev_resizearray(int arraynum, int arraysize);
__host__ int resizearray(int arraynum, int arraysize); __host__ int resizearray(int arraynum, int arraysize);
// I may want a way to resize arrays on the device without pushing/pulling all the array contents // I may want a way to resize arrays on the device without pushing/pulling all the array contents
}; };
template<typename T> int curarray_new(curarray<T>** ptr, int Narrays); template<typename T> int curarray_new(curarray<T>** ptr, int Narrays);
template<typename T> int curarray_delete(curarray<T>** ptr); template<typename T> int curarray_delete(curarray<T>** ptr);
template<typename T> int curarray_device_new(curarray<T> *hostptr); template<typename T> int curarray_device_new(curarray<T> *hostptr);
template<typename T> int curarray_device_delete(curarray<T> *hostptr); template<typename T> int curarray_device_delete(curarray<T> *hostptr);
template<typename T> int curarray_push(curarray<T> *hostptr); template<typename T> int curarray_push(curarray<T> *hostptr);
template<typename T> int curarray_pull(curarray<T> *hostptr); template<typename T> int curarray_pull(curarray<T> *hostptr);
//template<typename T> int curarray_host_fillall(curarray<T> *hostptr, const T &val); //template<typename T> int curarray_host_fillall(curarray<T> *hostptr, const T &val);
//template<typename T> int curarray_device_fillall(curarray<T> *hostptr, const T &val); //template<typename T> int curarray_device_fillall(curarray<T> *hostptr, const T &val);
//template<typename T> __host__ int curarray_deletearray(curarray<T> *hostptr, int arrayindex); //template<typename T> __host__ int curarray_deletearray(curarray<T> *hostptr, int arrayindex);
//template<typename T> __device__ int curarray_dev_deletearray(curarray<T> *devptr, int arrayindex); //template<typename T> __device__ int curarray_dev_deletearray(curarray<T> *devptr, int arrayindex);
//template<typename T> __host__ int curarray_allocarray(curarray<T> *hostptr, int arrayindex, int size); //template<typename T> __host__ int curarray_allocarray(curarray<T> *hostptr, int arrayindex, int size);
//template<typename T> __device__ int curarray_dev_allocarray(curarray<T> *devptr, int arrayindex, int size); //template<typename T> __device__ int curarray_dev_allocarray(curarray<T> *devptr, int arrayindex, int size);
void test_amscurarray1(); void test_amscurarray1();
}; };
#include <amsculib2/amscurarray_impl.cuh> #include <amsculib2/amscurarray_impl.cuh>
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -1,84 +1,85 @@
#ifndef __CUVECT2_HPP__ #ifndef __CUVECT2_HPP__
#define __CUVECT2_HPP__ #define __CUVECT2_HPP__
namespace amscuda namespace amscuda
{ {
class cuvect2 class cuvect2
{ {
public: public:
double x; double x;
double y; double y;
__host__ __device__ cuvect2(); __host__ __device__ cuvect2();
__host__ __device__ ~cuvect2(); __host__ __device__ ~cuvect2();
__host__ __device__ cuvect2(double _x, double _y); __host__ __device__ cuvect2(double _x, double _y);
__host__ __device__ double& operator[](const int I); __host__ __device__ double& operator[](const int I);
__host__ __device__ const double& operator[](const int I) const; __host__ __device__ const double& operator[](const int I) const;
__host__ __device__ cuvect2 operator+(cuvect2 lhs); __host__ __device__ cuvect2 operator+(cuvect2 lhs);
__host__ __device__ cuvect2 operator-(cuvect2 lhs); __host__ __device__ cuvect2 operator-(cuvect2 lhs);
__host__ __device__ cuvect2 operator*(double lhs); __host__ __device__ cuvect2 operator*(double lhs);
__host__ __device__ cuvect2 operator/(double lhs); __host__ __device__ cuvect2 operator/(double lhs);
}; };
class cumat2 class cumat2
{ {
public: public:
double dat[4]; double dat[4];
__host__ __device__ cumat2(); __host__ __device__ cumat2();
__host__ __device__ ~cumat2(); __host__ __device__ ~cumat2();
__host__ __device__ double& operator[](const int I); __host__ __device__ double& operator[](const int I);
__host__ __device__ double& operator()(const int I, const int J); __host__ __device__ double& operator()(const int I, const int J);
__host__ __device__ double& at(const int I, const int J); __host__ __device__ double& at(const int I, const int J);
__host__ __device__ cumat2 operator+(cumat2 lhs); __host__ __device__ cumat2 operator+(cumat2 lhs);
__host__ __device__ cumat2 operator-(cumat2 lhs); __host__ __device__ cumat2 operator-(cumat2 lhs);
__host__ __device__ cumat2 operator*(double lhs); __host__ __device__ cumat2 operator*(double lhs);
__host__ __device__ cumat2 operator/(double lhs); __host__ __device__ cumat2 operator/(double lhs);
__host__ __device__ cuvect2 operator*(cuvect2 lhs); __host__ __device__ cuvect2 operator*(cuvect2 lhs);
__host__ __device__ cumat2 operator*(cumat2 lhs); __host__ __device__ cumat2 operator*(cumat2 lhs);
__host__ __device__ double det(); __host__ __device__ double det();
__host__ __device__ cumat2 transpose(); __host__ __device__ cumat2 transpose();
__host__ __device__ cumat2 inverse(); __host__ __device__ cumat2 inverse();
}; };
__host__ __device__ double cuvect2_dot(cuvect2 a, cuvect2 b); __host__ __device__ double cuvect2_dot(cuvect2 a, cuvect2 b);
__host__ __device__ double cuvect2_cross(cuvect2 a, cuvect2 b); __host__ __device__ double cuvect2_cross(cuvect2 a, cuvect2 b);
__host__ __device__ double cuvect2_norm(cuvect2 a); __host__ __device__ double cuvect2_norm(cuvect2 a);
__host__ __device__ cuvect2 cuvect2_normalize(cuvect2 a); __host__ __device__ cuvect2 cuvect2_normalize(cuvect2 a);
__host__ __device__ cuvect2 cuvect2_proj(cuvect2 a, cuvect2 b); __host__ __device__ cuvect2 cuvect2_proj(cuvect2 a, cuvect2 b);
//2x2 matrix operations //2x2 matrix operations
//matrix order is assumed to be mat[I,J] = mat[I+3*J] //matrix order is assumed to be mat[I,J] = mat[I+3*J]
//transpose a 2x2 matrix in place //transpose a 2x2 matrix in place
__host__ __device__ void mat2_transpose(double *mat2inout); __host__ __device__ void mat2_transpose(double *mat2inout);
//copies src to dest //copies src to dest
__host__ __device__ void mat2_copy(double *mat2_dest, const double *mat2_src); __host__ __device__ void mat2_copy(double *mat2_dest, const double *mat2_src);
//inverts mat?inout[4] //inverts mat?inout[4]
__host__ __device__ void mat2_inverse(double *mat2inout); __host__ __device__ void mat2_inverse(double *mat2inout);
//rotatin matrix from angle //rotatin matrix from angle
__host__ __device__ void mat2_rot_from_angle(double angle, double *mat2); __host__ __device__ void mat2_rot_from_angle(double angle, double *mat2);
//multiplies c = a*b //multiplies c = a*b
__host__ __device__ void mat2_mult(double *mat2a, double *mat2b, double *mat2c); __host__ __device__ void mat2_mult(double *mat2a, double *mat2b, double *mat2c);
// ret = a*b // ret = a*b
__host__ __device__ cuvect2 mat2_mult(double *mat2a, cuvect2 b); __host__ __device__ cuvect2 mat2_mult(double *mat2a, cuvect2 b);
void test_cuvect2_1();
void test_cuvect2_1();
}; //end namespace amscuda
}; //end namespace amscuda
#endif
#endif

View File

@ -1,84 +1,85 @@
#ifndef __CUVECT2F_HPP__ #ifndef __CUVECT2F_HPP__
#define __CUVECT2F_HPP__ #define __CUVECT2F_HPP__
namespace amscuda namespace amscuda
{ {
class cuvect2f class cuvect2f
{ {
public: public:
float x; float x;
float y; float y;
__host__ __device__ cuvect2f(); __host__ __device__ cuvect2f();
__host__ __device__ ~cuvect2f(); __host__ __device__ ~cuvect2f();
__host__ __device__ cuvect2f(float _x, float _y); __host__ __device__ cuvect2f(float _x, float _y);
__host__ __device__ float& operator[](const int I); __host__ __device__ float& operator[](const int I);
__host__ __device__ const float& operator[](const int I) const; __host__ __device__ const float& operator[](const int I) const;
__host__ __device__ cuvect2f operator+(cuvect2f lhs); __host__ __device__ cuvect2f operator+(cuvect2f lhs);
__host__ __device__ cuvect2f operator-(cuvect2f lhs); __host__ __device__ cuvect2f operator-(cuvect2f lhs);
__host__ __device__ cuvect2f operator*(float lhs); __host__ __device__ cuvect2f operator*(float lhs);
__host__ __device__ cuvect2f operator/(float lhs); __host__ __device__ cuvect2f operator/(float lhs);
}; __host__ __device__ friend cuvect2f operator-(cuvect2f rhs);
};
class cumat2f
{ class cumat2f
public: {
float dat[4]; public:
float dat[4];
__host__ __device__ cumat2f();
__host__ __device__ ~cumat2f(); __host__ __device__ cumat2f();
__host__ __device__ float& operator[](const int I); __host__ __device__ ~cumat2f();
__host__ __device__ float& operator()(const int I, const int J); __host__ __device__ float& operator[](const int I);
__host__ __device__ float& at(const int I, const int J); __host__ __device__ float& operator()(const int I, const int J);
__host__ __device__ float& at(const int I, const int J);
__host__ __device__ cumat2f operator+(cumat2f lhs);
__host__ __device__ cumat2f operator-(cumat2f lhs); __host__ __device__ cumat2f operator+(cumat2f lhs);
__host__ __device__ cumat2f operator*(float lhs); __host__ __device__ cumat2f operator-(cumat2f lhs);
__host__ __device__ cumat2f operator/(float lhs); __host__ __device__ cumat2f operator*(float lhs);
__host__ __device__ cuvect2f operator*(cuvect2f lhs); __host__ __device__ cumat2f operator/(float lhs);
__host__ __device__ cumat2f operator*(cumat2f lhs); __host__ __device__ cuvect2f operator*(cuvect2f lhs);
__host__ __device__ cumat2f operator*(cumat2f lhs);
__host__ __device__ float det();
__host__ __device__ cumat2f transpose(); __host__ __device__ float det();
__host__ __device__ cumat2f inverse(); __host__ __device__ cumat2f transpose();
}; __host__ __device__ cumat2f inverse();
};
__host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b);
__host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b); __host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b);
__host__ __device__ float cuvect2f_norm(cuvect2f a); __host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b);
__host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a); __host__ __device__ float cuvect2f_norm(cuvect2f a);
__host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b); __host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a);
__host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b);
//2x2 matrix operations
//matrix order is assumed to be mat[I,J] = mat[I+3*J] //2x2 matrix operations
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
//transpose a 2x2 matrix in place
__host__ __device__ void mat2f_transpose(float *mat2inout); //transpose a 2x2 matrix in place
__host__ __device__ void mat2f_transpose(float *mat2inout);
//copies src to dest
__host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src); //copies src to dest
__host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src);
//inverts mat?inout[4]
__host__ __device__ void mat2f_inverse(float *mat2inout); //inverts mat?inout[4]
__host__ __device__ void mat2f_inverse(float *mat2inout);
//rotatin matrix from angle
__host__ __device__ void mat2f_rot_from_angle(float angle, float *mat2); //rotatin matrix from angle
__host__ __device__ void mat2f_rot_from_angle(float angle, float *mat2);
//multiplies c = a*b
__host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c); //multiplies c = a*b
__host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c);
// ret = a*b
__host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b); // ret = a*b
__host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b);
void test_cuvect2f_1();
void test_cuvect2f_1();
};
};
#endif
#endif

View File

@ -1,86 +1,86 @@
#ifndef __CUVECT3_HPP__ #ifndef __CUVECT3_HPP__
#define __CUVECT3_HPP__ #define __CUVECT3_HPP__
namespace amscuda namespace amscuda
{ {
class cuvect3 class cuvect3
{ {
public: public:
double x; double x;
double y; double y;
double z; double z;
__host__ __device__ cuvect3(); __host__ __device__ cuvect3();
__host__ __device__ ~cuvect3(); __host__ __device__ ~cuvect3();
__host__ __device__ cuvect3(double _x, double _y, double _z); __host__ __device__ cuvect3(double _x, double _y, double _z);
__host__ __device__ double& operator[](const int I); __host__ __device__ double& operator[](const int I);
__host__ __device__ const double& operator[](const int I) const; __host__ __device__ const double& operator[](const int I) const;
__host__ __device__ cuvect3 operator+(cuvect3 lhs); __host__ __device__ cuvect3 operator+(cuvect3 lhs);
__host__ __device__ cuvect3 operator-(cuvect3 lhs); __host__ __device__ cuvect3 operator-(cuvect3 lhs);
__host__ __device__ cuvect3 operator*(double lhs); __host__ __device__ cuvect3 operator*(double lhs);
__host__ __device__ cuvect3 operator/(double lhs); __host__ __device__ cuvect3 operator/(double lhs);
}; };
class cumat3 class cumat3
{ {
public: public:
double dat[9]; double dat[9];
__host__ __device__ cumat3(); __host__ __device__ cumat3();
__host__ __device__ ~cumat3(); __host__ __device__ ~cumat3();
__host__ __device__ double& operator[](const int I); __host__ __device__ double& operator[](const int I);
__host__ __device__ double& operator()(const int I, const int J); __host__ __device__ double& operator()(const int I, const int J);
__host__ __device__ double& at(const int I, const int J); __host__ __device__ double& at(const int I, const int J);
__host__ __device__ cumat3 operator+(cumat3 lhs); __host__ __device__ cumat3 operator+(cumat3 lhs);
__host__ __device__ cumat3 operator-(cumat3 lhs); __host__ __device__ cumat3 operator-(cumat3 lhs);
__host__ __device__ cumat3 operator*(double lhs); __host__ __device__ cumat3 operator*(double lhs);
__host__ __device__ cumat3 operator/(double lhs); __host__ __device__ cumat3 operator/(double lhs);
__host__ __device__ cuvect3 operator*(cuvect3 lhs); __host__ __device__ cuvect3 operator*(cuvect3 lhs);
__host__ __device__ cumat3 operator*(cumat3 lhs); __host__ __device__ cumat3 operator*(cumat3 lhs);
__host__ __device__ double det(); __host__ __device__ double det();
__host__ __device__ cumat3 transpose(); __host__ __device__ cumat3 transpose();
__host__ __device__ cumat3 inverse(); __host__ __device__ cumat3 inverse();
}; };
__host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b); __host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b);
__host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b); __host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b);
__host__ __device__ double cuvect3_norm(cuvect3 a); __host__ __device__ double cuvect3_norm(cuvect3 a);
__host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a); __host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a);
__host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b); __host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b);
//3x3 matrix operations //3x3 matrix operations
//matrix order is assumed to be mat[I,J] = mat[I+3*J] //matrix order is assumed to be mat[I,J] = mat[I+3*J]
//transposes a 3x3 (9 element) matrix //transposes a 3x3 (9 element) matrix
__host__ __device__ void mat3_transpose(double *mat3inout); __host__ __device__ void mat3_transpose(double *mat3inout);
//copies src to dest //copies src to dest
__host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src); __host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src);
//returns determinant of 3x3 matrix //returns determinant of 3x3 matrix
__host__ __device__ double mat3_det(double *mat3in); __host__ __device__ double mat3_det(double *mat3in);
//inverts a 3x3 (9 element) matrix //inverts a 3x3 (9 element) matrix
__host__ __device__ void mat3_inverse(double *mat3inout); __host__ __device__ void mat3_inverse(double *mat3inout);
__host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin); __host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin);
__host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout); __host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout);
__host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout); __host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout);
__host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout); __host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout);
//returns direction cosine rotation matrix from axis and angle //returns direction cosine rotation matrix from axis and angle
__host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout); __host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout);
__host__ void test_cudavect_logic1(); __host__ void test_cudavect_logic1();
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,86 +1,87 @@
#ifndef __CUVECT3F_HPP__ #ifndef __CUVECT3F_HPP__
#define __CUVECT3F_HPP__ #define __CUVECT3F_HPP__
namespace amscuda namespace amscuda
{ {
class cuvect3f class cuvect3f
{ {
public: public:
float x; float x;
float y; float y;
float z; float z;
__host__ __device__ cuvect3f(); __host__ __device__ cuvect3f();
__host__ __device__ ~cuvect3f(); __host__ __device__ ~cuvect3f();
__host__ __device__ cuvect3f(float _x, float _y, float _z); __host__ __device__ cuvect3f(float _x, float _y, float _z);
__host__ __device__ float& operator[](const int I); __host__ __device__ float& operator[](const int I);
__host__ __device__ const float& operator[](const int I) const; __host__ __device__ const float& operator[](const int I) const;
__host__ __device__ cuvect3f operator+(cuvect3f lhs); __host__ __device__ cuvect3f operator+(cuvect3f lhs);
__host__ __device__ cuvect3f operator-(cuvect3f lhs); __host__ __device__ cuvect3f operator-(cuvect3f lhs);
__host__ __device__ cuvect3f operator*(float lhs); __host__ __device__ cuvect3f operator*(float lhs);
__host__ __device__ cuvect3f operator/(float lhs); __host__ __device__ cuvect3f operator/(float lhs);
}; __host__ __device__ friend cuvect3f operator-(cuvect3f rhs);
};
class cumat3f
{ class cumat3f
public: {
float dat[9]; public:
float dat[9];
__host__ __device__ cumat3f();
__host__ __device__ ~cumat3f(); __host__ __device__ cumat3f();
__host__ __device__ float& operator[](const int I); __host__ __device__ ~cumat3f();
__host__ __device__ float& operator()(const int I, const int J); __host__ __device__ float& operator[](const int I);
__host__ __device__ float& at(const int I, const int J); __host__ __device__ float& operator()(const int I, const int J);
__host__ __device__ float& at(const int I, const int J);
__host__ __device__ cumat3f operator+(cumat3f lhs);
__host__ __device__ cumat3f operator-(cumat3f lhs); __host__ __device__ cumat3f operator+(cumat3f lhs);
__host__ __device__ cumat3f operator*(float lhs); __host__ __device__ cumat3f operator-(cumat3f lhs);
__host__ __device__ cumat3f operator/(float lhs); __host__ __device__ cumat3f operator*(float lhs);
__host__ __device__ cuvect3f operator*(cuvect3f lhs); __host__ __device__ cumat3f operator/(float lhs);
__host__ __device__ cumat3f operator*(cumat3f lhs); __host__ __device__ cuvect3f operator*(cuvect3f lhs);
__host__ __device__ cumat3f operator*(cumat3f lhs);
__host__ __device__ float det();
__host__ __device__ cumat3f transpose(); __host__ __device__ float det();
__host__ __device__ cumat3f inverse(); __host__ __device__ cumat3f transpose();
}; __host__ __device__ cumat3f inverse();
};
__host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b);
__host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b); __host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b);
__host__ __device__ float cuvect3f_norm(cuvect3f a); __host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b);
__host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a); __host__ __device__ float cuvect3f_norm(cuvect3f a);
__host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b); __host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a);
__host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b);
//3x3 matrix operations
//matrix order is assumed to be mat[I,J] = mat[I+3*J] //3x3 matrix operations
//matrix order is assumed to be mat[I,J] = mat[I+3*J]
//transposes a 3x3 (9 element) matrix
__host__ __device__ void mat3f_transpose(float *mat3inout); //transposes a 3x3 (9 element) matrix
__host__ __device__ void mat3f_transpose(float *mat3inout);
//copies src to dest
__host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src); //copies src to dest
__host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src);
//returns determinant of 3x3 matrix
__host__ __device__ float mat3f_det(float *mat3in); //returns determinant of 3x3 matrix
__host__ __device__ float mat3f_det(float *mat3in);
//inverts a 3x3 (9 element) matrix
__host__ __device__ void mat3f_inverse(float *mat3inout); //inverts a 3x3 (9 element) matrix
__host__ __device__ void mat3f_inverse(float *mat3inout);
__host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin);
__host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout); __host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin);
__host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout);
__host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout);
__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout); __host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout);
__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout);
//returns direction cosine rotation matrix from axis and angle
__host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout); //returns direction cosine rotation matrix from axis and angle
__host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout);
__host__ void test_cudavectf_logic1();
__host__ void test_cudavectf_logic1();
};
};
#endif
#endif

View File

@ -1,59 +1,59 @@
#ifndef __CUVECT4_HPP__ #ifndef __CUVECT4_HPP__
#define __CUVECT4_HPP__ #define __CUVECT4_HPP__
namespace amscuda namespace amscuda
{ {
class cuvect4 class cuvect4
{ {
public: public:
double x; double x;
double y; double y;
double z; double z;
double w; double w;
__host__ __device__ cuvect4(); __host__ __device__ cuvect4();
__host__ __device__ ~cuvect4(); __host__ __device__ ~cuvect4();
__host__ __device__ cuvect4(double _x, double _y, double _z, double _w); __host__ __device__ cuvect4(double _x, double _y, double _z, double _w);
__host__ __device__ double& operator[](const int I); __host__ __device__ double& operator[](const int I);
__host__ __device__ const double& operator[](const int I) const; __host__ __device__ const double& operator[](const int I) const;
__host__ __device__ cuvect4 operator+(cuvect4 lhs); __host__ __device__ cuvect4 operator+(cuvect4 lhs);
__host__ __device__ cuvect4 operator-(cuvect4 lhs); __host__ __device__ cuvect4 operator-(cuvect4 lhs);
__host__ __device__ cuvect4 operator*(double lhs); __host__ __device__ cuvect4 operator*(double lhs);
__host__ __device__ cuvect4 operator/(double lhs); __host__ __device__ cuvect4 operator/(double lhs);
}; };
class cumat4 class cumat4
{ {
public: public:
double dat[16]; double dat[16];
__host__ __device__ cumat4(); __host__ __device__ cumat4();
__host__ __device__ ~cumat4(); __host__ __device__ ~cumat4();
__host__ __device__ double& operator[](const int I); __host__ __device__ double& operator[](const int I);
__host__ __device__ double& operator()(const int I, const int J); __host__ __device__ double& operator()(const int I, const int J);
__host__ __device__ double& at(const int I, const int J); __host__ __device__ double& at(const int I, const int J);
__host__ __device__ cumat4 operator+(cumat4 lhs); __host__ __device__ cumat4 operator+(cumat4 lhs);
__host__ __device__ cumat4 operator-(cumat4 lhs); __host__ __device__ cumat4 operator-(cumat4 lhs);
__host__ __device__ cumat4 operator*(double lhs); __host__ __device__ cumat4 operator*(double lhs);
__host__ __device__ cumat4 operator/(double lhs); __host__ __device__ cumat4 operator/(double lhs);
__host__ __device__ cuvect4 operator*(cuvect4 lhs); __host__ __device__ cuvect4 operator*(cuvect4 lhs);
__host__ __device__ cumat4 operator*(cumat4 lhs); __host__ __device__ cumat4 operator*(cumat4 lhs);
__host__ __device__ double det(); __host__ __device__ double det();
__host__ __device__ cumat4 transpose(); __host__ __device__ cumat4 transpose();
__host__ __device__ cumat4 inverse(); __host__ __device__ cumat4 inverse();
}; };
__host__ __device__ double cuvect4_dot(cuvect4 a, cuvect4 b); __host__ __device__ double cuvect4_dot(cuvect4 a, cuvect4 b);
__host__ __device__ double cuvect4_norm(cuvect4 a); __host__ __device__ double cuvect4_norm(cuvect4 a);
__host__ __device__ cuvect4 cuvect4_normalize(cuvect4 a); __host__ __device__ cuvect4 cuvect4_normalize(cuvect4 a);
__host__ __device__ cuvect4 cuvect4_proj(cuvect4 a, cuvect4 b); __host__ __device__ cuvect4 cuvect4_proj(cuvect4 a, cuvect4 b);
}; //end namespace amscuda }; //end namespace amscuda
#endif #endif

View File

@ -1,60 +1,61 @@
#ifndef __CUVECT4F_HPP__ #ifndef __CUVECT4F_HPP__
#define __CUVECT4F_HPP__ #define __CUVECT4F_HPP__
namespace amscuda namespace amscuda
{ {
class cuvect4f class cuvect4f
{ {
public: public:
float x; float x;
float y; float y;
float z; float z;
float w; float w;
__host__ __device__ cuvect4f(); __host__ __device__ cuvect4f();
__host__ __device__ ~cuvect4f(); __host__ __device__ ~cuvect4f();
__host__ __device__ cuvect4f(float _x, float _y, float _z, float _w); __host__ __device__ cuvect4f(float _x, float _y, float _z, float _w);
__host__ __device__ float& operator[](const int I); __host__ __device__ float& operator[](const int I);
__host__ __device__ const float& operator[](const int I) const; __host__ __device__ const float& operator[](const int I) const;
__host__ __device__ cuvect4f operator+(cuvect4f lhs); __host__ __device__ cuvect4f operator+(cuvect4f lhs);
__host__ __device__ cuvect4f operator-(cuvect4f lhs); __host__ __device__ cuvect4f operator-(cuvect4f lhs);
__host__ __device__ cuvect4f operator*(float lhs); __host__ __device__ cuvect4f operator*(float lhs);
__host__ __device__ cuvect4f operator/(float lhs); __host__ __device__ cuvect4f operator/(float lhs);
}; __host__ __device__ friend cuvect4f operator-(cuvect4f rhs);
};
class cumat4f
{ class cumat4f
public: {
float dat[16]; public:
float dat[16];
__host__ __device__ cumat4f();
__host__ __device__ ~cumat4f(); __host__ __device__ cumat4f();
__host__ __device__ float& operator[](const int I); __host__ __device__ ~cumat4f();
__host__ __device__ float& operator()(const int I, const int J); __host__ __device__ float& operator[](const int I);
__host__ __device__ float& at(const int I, const int J); __host__ __device__ float& operator()(const int I, const int J);
__host__ __device__ float& at(const int I, const int J);
__host__ __device__ cumat4f operator+(cumat4f lhs);
__host__ __device__ cumat4f operator-(cumat4f lhs); __host__ __device__ cumat4f operator+(cumat4f lhs);
__host__ __device__ cumat4f operator*(float lhs); __host__ __device__ cumat4f operator-(cumat4f lhs);
__host__ __device__ cumat4f operator/(float lhs); __host__ __device__ cumat4f operator*(float lhs);
__host__ __device__ cuvect4f operator*(cuvect4f lhs); __host__ __device__ cumat4f operator/(float lhs);
__host__ __device__ cumat4f operator*(cumat4f lhs); __host__ __device__ cuvect4f operator*(cuvect4f lhs);
__host__ __device__ cumat4f operator*(cumat4f lhs);
__host__ __device__ float det();
__host__ __device__ cumat4f transpose(); __host__ __device__ float det();
__host__ __device__ cumat4f inverse(); __host__ __device__ cumat4f transpose();
}; __host__ __device__ cumat4f inverse();
};
__host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b);
__host__ __device__ float cuvect4f_norm(cuvect4f a); __host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b);
__host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a); __host__ __device__ float cuvect4f_norm(cuvect4f a);
__host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b); __host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a);
__host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b);
};
};
#endif
#endif

22
make_linux.py Normal file
View File

@ -0,0 +1,22 @@
#!/usr/bin/python3
import os,sys,math
from build.amsbuildlib4 import *
if(len(sys.argv)>=2):
if(sys.argv[1]=="clean"):
obj_list = flist('./build_linux64',recurse=True,exts=['.o'])
for o in obj_list:
os.remove('{}'.format(o))
exit()
os.system('python3 ./build/make.linux64.lib.py')
os.system('python3 ./build/make.linux64.test.py')
obj_list = flist('./src',recurse=True,exts=['.o'])
for o in obj_list:
os.remove('{}'.format(o))
os.chdir('./build_linux64')
callproc('./test')
os.chdir('..')

28
make_mingw.py Normal file
View File

@ -0,0 +1,28 @@
#!/usr/bin/python3
#!/usr/bin/python3
import os,sys,math
from build.amsbuildlib4 import *
if(len(sys.argv)>=2):
if(sys.argv[1]=="clean"):
obj_list = flist('./build_mingw64',recurse=True,exts=['.o'])
for o in obj_list:
os.remove('{}'.format(o))
exit()
os.system('python3 ./build/make.mingw64.lib.py')
os.system('python3 ./build/make.mingw64.test.py')
obj_list = flist('./src',recurse=True,exts=['.o','.obj'])
for o in obj_list:
os.remove('{}'.format(o))
if(sys.platform!="win32"):
os.chdir('./build_mingw64')
callproc('wine ./test.exe')
os.chdir('..')
else:
os.chdir('./build_mingw64')
callproc('test.exe')
os.chdir('..')

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
old/bin_linux64/test Normal file

Binary file not shown.

Binary file not shown.

BIN
old/bin_winx64/test.exe Normal file

Binary file not shown.

BIN
old/bin_winx64/test.exp Normal file

Binary file not shown.

BIN
old/bin_winx64/test.lib Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

639
old/compscripts/complib2.py Normal file
View File

@ -0,0 +1,639 @@
#!/usr/bin/python3
#Python3 compilation library
#Aaron M. Schinder
#29 Dec 2020
#
#Cleanup and refactor from 2017 python2 version compilation libraries
import os,sys,math,subprocess
#####################
#Directory Functions#
#####################
##flist - list all files in a given directory pth
##optional arguments:
# recurse - (T/F): Whether to recursively search for files in directory tree
# exts - (list): A list of file extensions to filter on
# normpath (T/F): whether to normalize path variables after
#filelist = flist(pth,**kwargs):
def flist(pth,**kwargs):
flst = []
if(not('recurse' in kwargs)):
recurse_ = False
else:
recurse_ = kwargs['recurse']
if(not('exts' in kwargs)):
filterexts_ = False
else:
filterexts_ = True
exts = kwargs['exts']
if(not('normpath' in kwargs)):
normpath_ = True
else:
normpath_ = kwargs['normpath']
if(not('linuxpath' in kwargs)):
linuxpath_ = False
else:
linuxpath_ = kwargs['linuxpath']
if(not('followlinks' in kwargs)):
followlinks_ = False
else:
followlinks_ = kwargs['followlinks']
dirlist = []
rawlist = os.listdir(pth)
for F in rawlist:
F2 = os.path.join(pth,F)
if(os.path.isdir(F2)):
b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
if(b):
if((F2!=".")&(F2!="..")):
dirlist.append(F2)
elif(os.path.isfile(F2)):
flst.append(F2)
#Recurse through directories
if(recurse_):
for D in dirlist:
lst = flist(D,**kwargs)
for L in lst:
flst.append(L)
#Postprocess:
#Filter out all extensions except the selected ext list
if(filterexts_):
flst = filterexts(flst,exts)
#Normalize filename path according to os
if(normpath_):
flst2 = list(flst)
for I in range(0,len(flst2)):
flst[I] = os.path.normpath(flst2[I])
#If linuxpath, convert all \\ to /
#if(linuxpath_):
# flst2 = list(flst)
# for I in range(0,len(flst2)):
# flst[I] = linuxpath(flst2[I])
return flst
#Filters by extensions in a list of files
#flst = def filterexts(flst,exts):
def filterexts(flst,exts):
flst2 = []
if(isinstance(exts,str)):
exts = list([exts])
for F in flst:
b = False
for ext in exts:
if(ext[0]!='.'):
ext = '.'+ext
F2 = os.path.splitext(F)
if(len(F2)>=2):
ex = F2[1]
if(len(ex)>0):
if(ex[0]!='.'):
ex = '.'+ex
if(ex==ext):
b = True
if(b):
flst2.append(F)
return flst2
#Find a file fname, starting in pth and recursing
#Used for finding library files to link
def findfile(fname,pth,**kwargs):
fullfname = ""
flst = flist(pth,recurse=True)
for F in flst:
F2 = os.path.split(F)[1]
if(F2 == fname):
fullfname = F
return fullfname
#List to space-seperated-string
def list_to_sss(lst):
lout = ""
for I in range(0,len(lst)-1):
lout = lout + lst[I] + " "
if(len(lst)>0):
lout = lout + lst[len(lst)-1]
return lout
def strip_whitespace(strin):
strout = ""
I1 = -1
I2 = -1
for I in range(0,len(strin)):
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
I1 = I
break
q = list(range(0,len(strin)))
q.reverse()
for I in q:
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
I2 = I+1
break
if(I1>=0 and I2>=0):
strout = strin[I1:I2]
return strout
def sss_to_list(sss):
lout = []
l1 = sss.split(' ')
for l in l1:
l2 = strip_whitespace(l)
lout.append(l2)
return lout
def replaceext(fname,ext):
fname2 = ""
if(len(ext)>0):
if(ext[0]!='.'):
ext = '.'+ext
fname2 = os.path.splitext(fname)[0]+ext
else:
fname2 = os.path.splitext(fname)[0]
return fname2
def replaceexts(fnamelist,ext):
fname2list = []
for F in fnamelist:
F2 = replaceext(F,ext)
fname2list.append(F2)
return fname2list
# def except_contains_oldv(lst1,exc):
# lst2 = []
# for item in lst1:
# b = 1
# for item2 in exc:
# if(item.find(item2)>=0):
# b = 0
# break
# if(b==1):
# lst2.append(item)
# return lst2
#filenames must match
def except_contains(lst1,exc):
lst2 = []
for item in lst1:
b = 1
for item2 in exc:
fsplit = os.path.split(item)
fn = fsplit[len(fsplit)-1]
if(fn==item2):
b = 0
break
if(b==1):
lst2.append(item)
return lst2
##########################
##System Call Procedures##
##########################
def callproc(cmd, **kwargs):
if(not('logfile' in kwargs)):
use_lf = False
else:
logfile = kwargs['logfile']
if(logfile!=""):
fp = open(kwargs['logfile'],'a+')
use_lf = True
else:
use_lf = False
if(not('echo' in kwargs)):
echo = True
else:
echo = kwargs['echo']
if(echo):
print(cmd)
#encoding/deconding to/from bytes is necessary to use the subprocess command
#in python3.7
#However, only do this in linux
if(sys.platform!='win32'):
cmd2 = cmd.encode(encoding='utf-8')
else:
cmd2 = cmd
proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
out = out.decode(encoding='utf-8')
if(echo):
print(out)
#print(err);
if(use_lf):
fp.writelines(cmd+'\n')
fp.writelines(out+'\n')
if(use_lf):
fp.close()
#######################################
##Compiler, Archive, and Linker Calls##
#######################################
def smartcompile(srcfile,objext='.o'):
mtsrc = os.path.getmtime(srcfile)
objfile = replaceext(srcfile,objext)
objexists = os.path.exists(objfile)
ret = True
if(objexists):
mtobj = os.path.getmtime(objfile)
if(mtobj>=mtsrc):
ret = False
return ret
#MSVC compiler wrapper
def msvc_compile(compilername, srcfile, **kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '/c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '/Fo:'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
outfile = replaceext(srcfile,objext)
ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
#MSVC compiler wrapper
def msvc_compile_list(compiler,srclist,**kwargs):
for S in srclist:
msvc_compile(compiler,S,**kwargs)
return
#gnu-style compiler compile: Should work with gcc, g++, gfortran
def gs_compile(compiler,srcfile,**kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '-c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '-o'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
if(not('smartcompile' in kwargs)):
_smartcompile = True
else:
_smartcompile = kwargs['smartcompile']
#Do I want to make this thing this general?
if(not(_smartcompile) or smartcompile(srcfile,objext)):
outfile = replaceext(srcfile,objext)
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
def gs_compile_list(compiler,srclist,**kwargs):
for S in srclist:
gs_compile(compiler,S,**kwargs)
return
def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
for S in srcfils:
gs_compile(compiler,S,**kwargs)
return
def gs_link_all(linker,srcpath,target,**kwargs):
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
objfils = flist(srcpath,exts=objext,recurse=recurse)
oflst = list_to_sss(objfils)
gs_link_list(linker,oflst,target,**kwargs)
return
def gs_link_list(linker,objlist,target,**kwargs):
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('libdir' in kwargs)):
libdir = ''
else:
libdir = kwargs['libdir']
if(not('staticlibs' in kwargs)):
staticlibs = ''
else:
staticlibs = kwargs['staticlibs']
if(not('libflags' in kwargs)):
libflags = ''
else:
libflags = kwargs['libflags']
if(not('linkerflags' in kwargs)):
linkerflags = ''
else:
linkerflags = kwargs['linkerflags']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('logfile' in kwargs)):
logfile = ''
else:
logfile = kwargs['logfile']
ln = linker+" -o "+target+" "+libdir
ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
callproc(ln,logfile=logfile)
return
def msvc_link_list(objlist,target,**kwargs):
linker = 'link'
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('libdir' in kwargs)):
libdir = ''
else:
libdir = kwargs['libdir']
if(not('staticlibs' in kwargs)):
staticlibs = ''
else:
staticlibs = kwargs['staticlibs']
if(not('libflags' in kwargs)):
libflags = ''
else:
libflags = kwargs['libflags']
if(not('linkerflags' in kwargs)):
linkerflags = ''
else:
linkerflags = kwargs['linkerflags']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('logfile' in kwargs)):
logfile = ''
else:
logfile = kwargs['logfile']
ln = linker+" "+libdir
ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
ln = ln+" /out:"+target+" "+libflags
callproc(ln,logfile=logfile)
return
def ar_all(srcpath,arname,**kwargs):
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
objlist = flist(srcpath,exts=objext,recurse=recurse)
ar_list(objlist,arname,**kwargs)
return
def msvc_lib_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "lib "+objlist2+" /out:"+arname
callproc(ln)
return
def ar_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "ar cr "+ arname+" "+objlist2
callproc(ln)
return
def ar_add_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "ar t "+arname+" "+objlist2
callproc(ln)
return
##############################
##Derived Compiler Functions##
##############################
def gcc_compile(srcfile,**kwargs):
compiler = 'gcc'
kwargs['objext'] = '.o'
#srcexts = ['.c']
gs_compile(compiler,srcfile,**kwargs)
return
def gcc_compile_all(srcdir,**kwargs):
compiler = 'gcc'
kwargs['objext'] = '.o'
srcexts = ['.c']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def gcc_compile_list(srclist,**kwargs):
compiler = 'gcc'
kwargs['objext'] = '.o'
#srcexts = ['.c']
gs_compile_list(compiler,srclist,**kwargs)
return
def gpp_compile(srcfile,**kwargs):
compiler = 'g++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile(compiler,srcfile,**kwargs)
return
def gpp_compile_all(srcdir,**kwargs):
compiler = 'g++'
kwargs['objext'] = '.o'
srcexts = ['.c','.cpp']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def gpp_compile_list(srclist,**kwargs):
compiler = 'g++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile_list(compiler,srclist,**kwargs)
return
def gfortran_compile(srcfile,**kwargs):
compiler = 'gfortran'
kwargs['objext'] = '.o'
#srcexts = ['.f','.f90','.f77']
gs_compile(compiler,srcfile,**kwargs)
return
def gfortran_compile_all(srcdir,**kwargs):
compiler = 'gfortran'
kwargs['objext'] = '.o'
srcexts = ['.f','.f90','.f77']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def gfortran_compile_list(srclist,**kwargs):
compiler = 'gfortran'
kwargs['objext'] = '.o'
#srcexts = ['.f','.f90','.f77']
gs_compile_list(compiler,srclist,**kwargs)
return
def clang_compile(srcfile,**kwargs):
compiler = 'clang++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile(compiler,srcfile,**kwargs)
return
def clang_compile_all(srcdir,**kwargs):
compiler = 'clang++'
kwargs['objext'] = '.o'
srcexts = ['.c','.cpp']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def clang_compile_list(srclist,**kwargs):
compiler = 'clang++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile_list(compiler,srclist,**kwargs)
return

524
old/compscripts/complib3.py Normal file
View File

@ -0,0 +1,524 @@
#!/usr/bin/python3
import os,sys,math
import subprocess
##flist - list all files in a given directory pth
##optional arguments:
# recurse - (T/F): Whether to recursively search for files in directory tree
# exts - (list): A list of file extensions to filter on
# normpath (T/F): whether to normalize path variables after
#filelist = flist(pth,**kwargs):
def flist(pth,**kwargs):
flst = []
if(not('recurse' in kwargs)):
recurse_ = False
else:
recurse_ = kwargs['recurse']
if(not('exts' in kwargs)):
filterexts_ = False
else:
filterexts_ = True
exts = kwargs['exts']
if(not('normpath' in kwargs)):
normpath_ = True
else:
normpath_ = kwargs['normpath']
if(not('linuxpath' in kwargs)):
linuxpath_ = False
else:
linuxpath_ = kwargs['linuxpath']
if(not('followlinks' in kwargs)):
followlinks_ = False
else:
followlinks_ = kwargs['followlinks']
dirlist = []
rawlist = os.listdir(pth)
for F in rawlist:
F2 = os.path.join(pth,F)
if(os.path.isdir(F2)):
b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
if(b):
if((F2!=".")&(F2!="..")):
dirlist.append(F2)
elif(os.path.isfile(F2)):
flst.append(F2)
#Recurse through directories
if(recurse_):
for D in dirlist:
lst = flist(D,**kwargs)
for L in lst:
flst.append(L)
#Postprocess:
#Filter out all extensions except the selected ext list
if(filterexts_):
flst = filterexts(flst,exts)
#Normalize filename path according to os
if(normpath_):
flst2 = list(flst)
for I in range(0,len(flst2)):
flst[I] = os.path.normpath(flst2[I])
#If linuxpath, convert all \\ to /
#if(linuxpath_):
# flst2 = list(flst)
# for I in range(0,len(flst2)):
# flst[I] = linuxpath(flst2[I])
return flst
#Filters by extensions in a list of files
#flst = def filterexts(flst,exts):
def filterexts(flst,exts):
flst2 = []
if(isinstance(exts,str)):
exts = list([exts])
for F in flst:
b = False
for ext in exts:
if(ext[0]!='.'):
ext = '.'+ext
F2 = os.path.splitext(F)
if(len(F2)>=2):
ex = F2[1]
if(len(ex)>0):
if(ex[0]!='.'):
ex = '.'+ex
if(ex==ext):
b = True
if(b):
flst2.append(F)
return flst2
#Find a file fname, starting in pth and recursing
#Used for finding library files to link
def findfile(fname,pth,**kwargs):
fullfname = ""
flst = flist(pth,recurse=True)
for F in flst:
F2 = os.path.split(F)[1]
if(F2 == fname):
fullfname = F
return fullfname
def replaceext(fname,ext):
fname2 = ""
if(len(ext)>0):
if(ext[0]!='.'):
ext = '.'+ext
fname2 = os.path.splitext(fname)[0]+ext
else:
fname2 = os.path.splitext(fname)[0]
return fname2
def replaceexts(fnamelist,ext):
fname2list = []
for F in fnamelist:
F2 = replaceext(F,ext)
fname2list.append(F2)
return fname2list
#filenames must match
def except_contains(lst1,exc):
lst2 = []
for item in lst1:
b = 1
for item2 in exc:
fsplit = os.path.split(item)
fn = fsplit[len(fsplit)-1]
if(fn==item2):
b = 0
break
if(b==1):
lst2.append(item)
return lst2
##########################
##System Call Procedures##
##########################
def callproc(cmd, **kwargs):
if(not('logfile' in kwargs)):
use_lf = False
else:
logfile = kwargs['logfile']
if(logfile!=""):
fp = open(kwargs['logfile'],'a+')
use_lf = True
else:
use_lf = False
if(not('echo' in kwargs)):
echo = True
else:
echo = kwargs['echo']
if(echo):
print(cmd)
#encoding/deconding to/from bytes is necessary to use the subprocess command
#in python3.7
#However, only do this in linux
if(sys.platform!='win32'):
cmd2 = cmd.encode(encoding='utf-8')
else:
cmd2 = cmd
proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
out = out.decode(encoding='utf-8')
if(echo):
print(out)
#print(err);
if(use_lf):
fp.writelines(cmd+'\n')
fp.writelines(out+'\n')
if(use_lf):
fp.close()
#List to space-seperated-string
def list_to_sss(lst):
lout = ""
for I in range(0,len(lst)-1):
lout = lout + lst[I] + " "
if(len(lst)>0):
lout = lout + lst[len(lst)-1]
return lout
#####################################
## Incremental Compilation Library ##
#####################################
#silently read lines from a text file if exists
def readtextlines(fname):
txtlns = []
if(not os.path.isfile(fname)):
return txtlns
try:
fp = open(fname,"r")
except:
return txtlns
ln = " "
while(ln!=""):
ln = fp.readline()
txtlns.append(ln)
fp.close()
return txtlns
def getincludefnfrage(includeline):
fnfrag = ""
I1 = -1
I2 = -1
for I in range(0,len(includeline)):
if(I1<0 and (includeline[I]=='<' or includeline[I]=='"')):
I1 = I
if(I1>=0 and (includeline[I]=='>' or includeline[I]=='"')):
I2 = I
break
if(I1>=0 and I2>=0):
fnfrag = includeline[I1+1:I2]
return fnfrag
#Returns the name of the source file fname (if it exists)
#and all included filenames
def getsrcandincludes(fname, incdirs):
flist = []
if(os.path.isfile(fname)):
flist.append(fname)
Ilist = 0
while(Ilist<len(flist)):
#recurse through files
f1 = flist[Ilist]
lns = readtextlines(f1)
for J in range(0,len(lns)):
if(lns[J].find("#include")>=0):
fnfrag = getincludefnfrage(lns[J])
for K in range(0,len(incdirs)):
tfn = os.path.join(incdirs[K],fnfrag)
if(os.path.isfile(tfn)):
flist.append(tfn)
break
Ilist = Ilist + 1
return flist
#Returns the name of the object file associated with the source file
#within the object store folder (if it exists)
def getobjfile(fname,objstore,objext = ".o"):
fret = ""
f1 = os.path.split(fname)[1]
f2 = f1
while(os.path.splitext(f2)[1]!=""):
f2 = os.path.splitext(f2)[0]
objext = objext.strip('.')
f3 = os.path.join(objstore,"{}.{}".format(f2,objext))
if(os.path.exists(f3)):
fret = f3
return fret
def getsrctimes(fname, incdirs):
ftimes = []
flst = getsrcandincludes(fname, incdirs)
for I in range(0,len(flst)):
f = flst[I]
mt = os.path.getmtime(f)
ftimes.append(mt)
return ftimes
def getobjtime(fname,objstore,objext=".o"):
ret = -1
fret = getobjfile(fname,objstore,objext)
if(fret!=""):
ret = os.path.getmtime(fret)
return ret
#Decide whether or not to compile source file
def decidecompile(fname,**kwargs):
ret = True
if(not os.path.isfile(fname)):
ret = False
return ret
##unpack kwargs
if("searchincdirs" in kwargs):
incdirs = kwargs["searchincdirs"]
else:
incdirs = ["./include"]
if("objext" in kwargs):
objext = kwargs["objext"]
else:
objext = ".o"
if("objstore" in kwargs):
objstore = kwargs["objstore"]
else:
objstore = "./objstore"
srclist = getsrcandincludes(fname,incdirs)
srctlist = getsrctimes(fname,incdirs)
obj = getobjfile(fname,objstore,objext)
objt = getobjtime(fname,objstore,objext)
if(obj!=""):
ret = False
for I in range(0,len(srctlist)):
if(srctlist[I]>objt):
ret = True
break
return ret
def gs_incremental_compile(compiler,srcfile,**kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '-c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '-o'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
if(not('smartcompile' in kwargs)):
_smartcompile = True
else:
_smartcompile = kwargs['smartcompile']
#incrementalcompile
if("searchincdirs" in kwargs):
incdirs = kwargs["searchincdirs"]
else:
incdirs = ["./include"]
if("objext" in kwargs):
objext = kwargs["objext"]
else:
objext = ".o"
if("objstore" in kwargs):
objstore = kwargs["objstore"]
else:
objstore = "./objstore"
#Do I want to make this thing this general?
docompile = decidecompile(srcfile,**kwargs)
if(docompile):
f1 = os.path.split(srcfile)[1]
f2 = f1
while(os.path.splitext(f2)[1]!=""):
f2 = os.path.splitext(f2)[0]
outfile = os.path.join(objstore,"{}{}".format(f2,objext))
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
def gs_incremental_compile_list(compiler,srclist,**kwargs):
for s in srclist:
gs_incremental_compile(compiler,s,**kwargs)
return
#MSVC compiler wrapper
def msvc_incremental_compile(compilername, srcfile, **kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '/c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '/Fo:'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
#incrementalcompile
if("searchincdirs" in kwargs):
incdirs = kwargs["searchincdirs"]
else:
incdirs = ["./include"]
# if("objext" in kwargs):
# objext = kwargs["objext"]
# else:
# objext = ".o"
if("objstore" in kwargs):
objstore = kwargs["objstore"]
else:
objstore = "./objstore"
docompile = decidecompile(srcfile,**kwargs)
if(docompile):
f1 = os.path.split(srcfile)[1]
f2 = f1
while(os.path.splitext(f2)[1]!=""):
f2 = os.path.splitext(f2)[0]
outfile = os.path.join(objstore,"{}{}".format(f2,objext))
ln = compilername+" "+flags+" "+srcfileflag+" "+srcfile+" "+outfileflag+" "+outfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
# outfile = replaceext(srcfile,objext)
# ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
# ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
def msvc_incremental_compile_list(compiler,srclist,**kwargs):
for S in srclist:
msvc_incremental_compile(compiler,S,**kwargs)
return
#######################
## Main Script Tests ##
#######################
def testtimes(args):
if(len(args)>=2):
flist = getsrcandincludes(args[1],["./include"])
ftlist = getsrctimes(args[1],["./include"])
for I in range(0,len(flist)):
print("{}\t\t{}".format(flist[I],ftlist[I]))
print("associated obj file:")
fobj = getobjfile(args[1],"./objstore")
ftobj = getobjtime(args[1],"./objstore")
if(fobj!=""):
print("{}\t\t{}".format(fobj,ftobj))
else:
print("none found")
cflag = decidecompile(args[1])
print("compile? : {}".format(cflag))
return
# if(__name__ == "__main__"):
# args = sys.argv
# testtimes(args)

View File

@ -0,0 +1,52 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
from complib3 import gs_incremental_compile, gs_incremental_compile_list
import shutil
#from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
from shutil import copytree
libname = 'amsculib2.linux64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../linux64/include"
commonlibdir = "../../linux64/lib"
localbindir = "./bin_linux64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
#--ptxas-options=-v
kwargs['flags'] = "-dc --compiler-options '-fPIC -O3'"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-l{}".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
kwargs['objstore'] = "./objstore"
kwargs['searchincdirs'] = ['./include']
#find all source files, except the main project files
files = flist('./src',exts = srcexts, recurse=True)
files = except_contains(files,mainsrc)
objfiles = replaceexts(files,'.o')
objfiles_sss = list_to_sss(objfiles)
#compile all the source files in the list
#gs_compile_list(cc,files,**kwargs)
gs_incremental_compile_list(cc,files,**kwargs)
#archive all the source files into a static library
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
ar_list(objlist,'{}/lib{}.a'.format(localbindir,libname))
# #Push any libraries to the common lib folder
shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir)
# #Copy include files to the common include folder
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)

View File

@ -0,0 +1,43 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
from complib3 import gs_incremental_compile, gs_incremental_compile_list
import shutil
libname = 'amsculib2.linux64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../linux64/include"
commonlibdir = "../../linux64/lib"
localbindir = "./bin_linux64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
kwargs['flags'] = "-dc --compiler-options '-fPIC'"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-l{} -lamsculib2.linux64".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
kwargs['objstore'] = "./objstore"
kwargs['searchincdirs'] = ['./include']
#-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64
#Pull required binary dynamic libraries to the bin folder
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
#Designate source files for main test program
fsrc = ['./src/main.cu']
fobj = replaceexts(fsrc,'.o')
#Compile test programs
gs_compile_list(cc,fsrc,**kwargs)
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)

View File

@ -0,0 +1,639 @@
#!/usr/bin/python3
#Python3 compilation library
#Aaron M. Schinder
#29 Dec 2020
#
#Cleanup and refactor from 2017 python2 version compilation libraries
import os,sys,math,subprocess
#####################
#Directory Functions#
#####################
##flist - list all files in a given directory pth
##optional arguments:
# recurse - (T/F): Whether to recursively search for files in directory tree
# exts - (list): A list of file extensions to filter on
# normpath (T/F): whether to normalize path variables after
#filelist = flist(pth,**kwargs):
def flist(pth,**kwargs):
flst = []
if(not('recurse' in kwargs)):
recurse_ = False
else:
recurse_ = kwargs['recurse']
if(not('exts' in kwargs)):
filterexts_ = False
else:
filterexts_ = True
exts = kwargs['exts']
if(not('normpath' in kwargs)):
normpath_ = True
else:
normpath_ = kwargs['normpath']
if(not('linuxpath' in kwargs)):
linuxpath_ = False
else:
linuxpath_ = kwargs['linuxpath']
if(not('followlinks' in kwargs)):
followlinks_ = False
else:
followlinks_ = kwargs['followlinks']
dirlist = []
rawlist = os.listdir(pth)
for F in rawlist:
F2 = os.path.join(pth,F)
if(os.path.isdir(F2)):
b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
if(b):
if((F2!=".")&(F2!="..")):
dirlist.append(F2)
elif(os.path.isfile(F2)):
flst.append(F2)
#Recurse through directories
if(recurse_):
for D in dirlist:
lst = flist(D,**kwargs)
for L in lst:
flst.append(L)
#Postprocess:
#Filter out all extensions except the selected ext list
if(filterexts_):
flst = filterexts(flst,exts)
#Normalize filename path according to os
if(normpath_):
flst2 = list(flst)
for I in range(0,len(flst2)):
flst[I] = os.path.normpath(flst2[I])
#If linuxpath, convert all \\ to /
#if(linuxpath_):
# flst2 = list(flst)
# for I in range(0,len(flst2)):
# flst[I] = linuxpath(flst2[I])
return flst
#Filters by extensions in a list of files
#flst = def filterexts(flst,exts):
def filterexts(flst,exts):
flst2 = []
if(isinstance(exts,str)):
exts = list([exts])
for F in flst:
b = False
for ext in exts:
if(ext[0]!='.'):
ext = '.'+ext
F2 = os.path.splitext(F)
if(len(F2)>=2):
ex = F2[1]
if(len(ex)>0):
if(ex[0]!='.'):
ex = '.'+ex
if(ex==ext):
b = True
if(b):
flst2.append(F)
return flst2
#Find a file fname, starting in pth and recursing
#Used for finding library files to link
def findfile(fname,pth,**kwargs):
fullfname = ""
flst = flist(pth,recurse=True)
for F in flst:
F2 = os.path.split(F)[1]
if(F2 == fname):
fullfname = F
return fullfname
#List to space-seperated-string
def list_to_sss(lst):
lout = ""
for I in range(0,len(lst)-1):
lout = lout + lst[I] + " "
if(len(lst)>0):
lout = lout + lst[len(lst)-1]
return lout
def strip_whitespace(strin):
strout = ""
I1 = -1
I2 = -1
for I in range(0,len(strin)):
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
I1 = I
break
q = list(range(0,len(strin)))
q.reverse()
for I in q:
if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
I2 = I+1
break
if(I1>=0 and I2>=0):
strout = strin[I1:I2]
return strout
def sss_to_list(sss):
lout = []
l1 = sss.split(' ')
for l in l1:
l2 = strip_whitespace(l)
lout.append(l2)
return lout
def replaceext(fname,ext):
fname2 = ""
if(len(ext)>0):
if(ext[0]!='.'):
ext = '.'+ext
fname2 = os.path.splitext(fname)[0]+ext
else:
fname2 = os.path.splitext(fname)[0]
return fname2
def replaceexts(fnamelist,ext):
fname2list = []
for F in fnamelist:
F2 = replaceext(F,ext)
fname2list.append(F2)
return fname2list
# def except_contains_oldv(lst1,exc):
# lst2 = []
# for item in lst1:
# b = 1
# for item2 in exc:
# if(item.find(item2)>=0):
# b = 0
# break
# if(b==1):
# lst2.append(item)
# return lst2
#filenames must match
def except_contains(lst1,exc):
lst2 = []
for item in lst1:
b = 1
for item2 in exc:
fsplit = os.path.split(item)
fn = fsplit[len(fsplit)-1]
if(fn==item2):
b = 0
break
if(b==1):
lst2.append(item)
return lst2
##########################
##System Call Procedures##
##########################
def callproc(cmd, **kwargs):
if(not('logfile' in kwargs)):
use_lf = False
else:
logfile = kwargs['logfile']
if(logfile!=""):
fp = open(kwargs['logfile'],'a+')
use_lf = True
else:
use_lf = False
if(not('echo' in kwargs)):
echo = True
else:
echo = kwargs['echo']
if(echo):
print(cmd)
#encoding/deconding to/from bytes is necessary to use the subprocess command
#in python3.7
#However, only do this in linux
if(sys.platform!='win32'):
cmd2 = cmd.encode(encoding='utf-8')
else:
cmd2 = cmd
proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
out = out.decode(encoding='utf-8')
if(echo):
print(out)
#print(err);
if(use_lf):
fp.writelines(cmd+'\n')
fp.writelines(out+'\n')
if(use_lf):
fp.close()
#######################################
##Compiler, Archive, and Linker Calls##
#######################################
def smartcompile(srcfile,objext='.o'):
mtsrc = os.path.getmtime(srcfile)
objfile = replaceext(srcfile,objext)
objexists = os.path.exists(objfile)
ret = True
if(objexists):
mtobj = os.path.getmtime(objfile)
if(mtobj>=mtsrc):
ret = False
return ret
#MSVC compiler wrapper
def msvc_compile(compilername, srcfile, **kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '/c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '/Fo:'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
outfile = replaceext(srcfile,objext)
ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
#MSVC compiler wrapper
def msvc_compile_list(compiler,srclist,**kwargs):
for S in srclist:
msvc_compile(compiler,S,**kwargs)
return
#gnu-style compiler compile: Should work with gcc, g++, gfortran
def gs_compile(compiler,srcfile,**kwargs):
if(not('include' in kwargs)):
include = ''
else:
include = kwargs['include']
if(isinstance(include,list)):
include = list_to_sss(include)
if(not('flags' in kwargs)):
flags = ''
else:
flags = kwargs['flags']
if(isinstance(flags,list)):
flags = list_to_sss(flags)
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('srcfileflag' in kwargs)):
srcfileflag = '-c'
else:
srcfileflag = kwargs['srcfileflag']
if(not('outfileflag' in kwargs)):
outfileflag = '-o'
else:
outfileflag = kwargs['outfileflag']
if(not('logfile' in kwargs)):
logfile = ""
else:
logfile = kwargs['logfile']
if(not('smartcompile' in kwargs)):
_smartcompile = True
else:
_smartcompile = kwargs['smartcompile']
#Do I want to make this thing this general?
if(not(_smartcompile) or smartcompile(srcfile,objext)):
outfile = replaceext(srcfile,objext)
ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
ln = ln + " " + include
callproc(ln,echo=True,logfile=logfile)
return
def gs_compile_list(compiler,srclist,**kwargs):
for S in srclist:
gs_compile(compiler,S,**kwargs)
return
def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
for S in srcfils:
gs_compile(compiler,S,**kwargs)
return
def gs_link_all(linker,srcpath,target,**kwargs):
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
objfils = flist(srcpath,exts=objext,recurse=recurse)
oflst = list_to_sss(objfils)
gs_link_list(linker,oflst,target,**kwargs)
return
def gs_link_list(linker,objlist,target,**kwargs):
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
if(not('libdir' in kwargs)):
libdir = ''
else:
libdir = kwargs['libdir']
if(not('staticlibs' in kwargs)):
staticlibs = ''
else:
staticlibs = kwargs['staticlibs']
if(not('libflags' in kwargs)):
libflags = ''
else:
libflags = kwargs['libflags']
if(not('linkerflags' in kwargs)):
linkerflags = ''
else:
linkerflags = kwargs['linkerflags']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('logfile' in kwargs)):
logfile = ''
else:
logfile = kwargs['logfile']
ln = linker+" -o "+target+" "+libdir
ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
callproc(ln,logfile=logfile)
return
def msvc_link_list(objlist,target,**kwargs):
linker = 'link'
if(not('objext' in kwargs)):
objext = '.obj'
else:
objext = kwargs['objext']
if(not('libdir' in kwargs)):
libdir = ''
else:
libdir = kwargs['libdir']
if(not('staticlibs' in kwargs)):
staticlibs = ''
else:
staticlibs = kwargs['staticlibs']
if(not('libflags' in kwargs)):
libflags = ''
else:
libflags = kwargs['libflags']
if(not('linkerflags' in kwargs)):
linkerflags = ''
else:
linkerflags = kwargs['linkerflags']
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('logfile' in kwargs)):
logfile = ''
else:
logfile = kwargs['logfile']
ln = linker+" "+libdir
ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
ln = ln+" /out:"+target+" "+libflags
callproc(ln,logfile=logfile)
return
def ar_all(srcpath,arname,**kwargs):
if(not('recurse' in kwargs)):
recurse = True
else:
recurse = kwargs['recurse']
if(not('objext' in kwargs)):
objext = '.o'
else:
objext = kwargs['objext']
objlist = flist(srcpath,exts=objext,recurse=recurse)
ar_list(objlist,arname,**kwargs)
return
def msvc_lib_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "lib "+objlist2+" /out:"+arname
callproc(ln)
return
def ar_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "ar cr "+ arname+" "+objlist2
callproc(ln)
return
def ar_add_list(objlist,arname,**kwargs):
objlist2 = list_to_sss(objlist)
ln = "ar t "+arname+" "+objlist2
callproc(ln)
return
##############################
##Derived Compiler Functions##
##############################
def gcc_compile(srcfile,**kwargs):
compiler = 'gcc'
kwargs['objext'] = '.o'
#srcexts = ['.c']
gs_compile(compiler,srcfile,**kwargs)
return
def gcc_compile_all(srcdir,**kwargs):
compiler = 'gcc'
kwargs['objext'] = '.o'
srcexts = ['.c']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def gcc_compile_list(srclist,**kwargs):
compiler = 'gcc'
kwargs['objext'] = '.o'
#srcexts = ['.c']
gs_compile_list(compiler,srclist,**kwargs)
return
def gpp_compile(srcfile,**kwargs):
compiler = 'g++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile(compiler,srcfile,**kwargs)
return
def gpp_compile_all(srcdir,**kwargs):
compiler = 'g++'
kwargs['objext'] = '.o'
srcexts = ['.c','.cpp']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def gpp_compile_list(srclist,**kwargs):
compiler = 'g++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile_list(compiler,srclist,**kwargs)
return
def gfortran_compile(srcfile,**kwargs):
compiler = 'gfortran'
kwargs['objext'] = '.o'
#srcexts = ['.f','.f90','.f77']
gs_compile(compiler,srcfile,**kwargs)
return
def gfortran_compile_all(srcdir,**kwargs):
compiler = 'gfortran'
kwargs['objext'] = '.o'
srcexts = ['.f','.f90','.f77']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def gfortran_compile_list(srclist,**kwargs):
compiler = 'gfortran'
kwargs['objext'] = '.o'
#srcexts = ['.f','.f90','.f77']
gs_compile_list(compiler,srclist,**kwargs)
return
def clang_compile(srcfile,**kwargs):
compiler = 'clang++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile(compiler,srcfile,**kwargs)
return
def clang_compile_all(srcdir,**kwargs):
compiler = 'clang++'
kwargs['objext'] = '.o'
srcexts = ['.c','.cpp']
gs_compile_all(compiler,srcdir,srcexts,**kwargs)
return
def clang_compile_list(srclist,**kwargs):
compiler = 'clang++'
kwargs['objext'] = '.o'
#srcexts = ['.c','.cpp']
gs_compile_list(compiler,srclist,**kwargs)
return

View File

@ -0,0 +1,45 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
import shutil
#from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
from shutil import copytree as copytree
libname = 'amsculib2.linux64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../linux64/include"
commonlibdir = "../../linux64/lib"
localbindir = "./bin_linux64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
kwargs['flags'] = "-dc"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-l{}".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
#find all source files, except the main project files
files = flist('./src',exts = srcexts, recurse=True)
files = except_contains(files,mainsrc)
objfiles = replaceexts(files,'.o')
objfiles_sss = list_to_sss(objfiles)
#compile all the source files in the list
gs_compile_list(cc,files,**kwargs)
#archive all the source files into a static library
ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
#Push any libraries to the common lib folder
shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir)
#Copy include files to the common include folder
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)

View File

@ -0,0 +1,38 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
import shutil
libname = 'amsculib2.linux64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../linux64/include"
commonlibdir = "../../linux64/lib"
localbindir = "./bin_linux64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
#-dc flag: relocatable device code - needed for device functions to link in different "execution units"
kwargs['flags'] = "-dc"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-l{}".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
#Pull required binary dynamic libraries to the bin folder
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
#Designate source files for main test program
fsrc = ['./src/main.cu']
fobj = replaceexts(fsrc,'.o')
#Compile test programs
gs_compile_list(cc,fsrc,**kwargs)
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)

View File

@ -0,0 +1,45 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
import shutil
from shutil import copytree as copytree
libname = 'assetcuda.msvc64' #prefix static library name to generate
targetname = 'main' #create this executable when compiling tests
commonincdir = "../../winx64/include"
commonlibdir = "../../winx64/lib"
localbindir = "./bin_winx64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp']
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
kwargs['flags'] = "/O2"
kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-l{}".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
#find all source files, except the main project files
files = flist('./src',exts = srcexts, recurse=True)
files = except_contains(files,mainsrc)
objfiles = replaceexts(files,'.obj')
objfiles_sss = list_to_sss(objfiles)
#compile all the source files in the list
msvc_compile_list(cc,files,**kwargs)
#gs_compile_list(cc,files,**kwargs)
#archive all the source files into a static library
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname))
#Push any libraries to the common lib folder
shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
#Copy include files to the common include folder
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)

View File

@ -0,0 +1,39 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
import shutil
from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
libname = 'assetcuda.msvc64' #prefix static library name to generate
targetname = 'tests.exe' #create this executable when compiling tests
commonincdir = "../../winx64/include"
commonlibdir = "../../winx64/lib"
localbindir = "./bin_winx64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp']
mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
kwargs['flags'] = "/O2"
kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir)
#kwargs['libflags'] = "lib{}.lib libamsearthtools.msvc64.lib libamsmeshtools.msvc64.lib libamsmathlib3.msvc64.lib libamsmatrix_cpp.msvc64.lib liblapack.a libblas.a libamsstring3.msvc64.lib libamsmathutil2.msvc64.lib".format(libname)
kwargs['libflags'] = "lib{}.lib".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
#Pull required binary dynamic libraries to the bin folder
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
#Designate source files for main test program
fsrc = ['./src/main.cu']
fobj = replaceexts(fsrc,'.obj')
#Compile test programs
msvc_compile_list(cc,fsrc,**kwargs)
msvc_link_list(list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)

View File

@ -0,0 +1,44 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
import shutil
from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
libname = 'amsculib2.msvc64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../winx64/include"
commonlibdir = "../../winx64/lib"
localbindir = "./bin_winx64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
kwargs['flags'] = "-dc"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-l{}".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
#find all source files, except the main project files
files = flist('./src',exts = srcexts, recurse=True)
files = except_contains(files,mainsrc)
objfiles = replaceexts(files,'.o')
objfiles_sss = list_to_sss(objfiles)
#compile all the source files in the list
gs_compile_list(cc,files,**kwargs)
#archive all the source files into a static library
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname))
#Push any libraries to the common lib folder
shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
#Copy include files to the common include folder
copy_tree('./include/',commonincdir+'/')

View File

@ -0,0 +1,38 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
import shutil
from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
libname = 'amsculib2.msvc64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../winx64/include"
commonlibdir = "../../winx64/lib"
localbindir = "./bin_winx64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
kwargs['flags'] = "-dc"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-llib{}".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
#Pull required binary dynamic libraries to the bin folder
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
#Designate source files for main test program
fsrc = ['./src/main.cpp']
fobj = replaceexts(fsrc,'.o')
#Compile test programs
gs_compile_list(cc,fsrc,**kwargs)
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)

View File

@ -0,0 +1,49 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
from complib3 import gs_incremental_compile, gs_incremental_compile_list
import shutil
from shutil import copytree
libname = 'amsculib2.msvc64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../winx64/include"
commonlibdir = "../../winx64/lib"
localbindir = "./bin_winx64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
kwargs['flags'] = "-dc"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-l{}".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
kwargs['objstore'] = "./objstore"
kwargs['searchincdirs'] = ['./include']
#find all source files, except the main project files
files = flist('./src',exts = srcexts, recurse=True)
files = except_contains(files,mainsrc)
objfiles = replaceexts(files,'.o')
objfiles_sss = list_to_sss(objfiles)
#compile all the source files in the list
#gs_compile_list(cc,files,**kwargs)
gs_incremental_compile_list(cc,files,**kwargs)
#archive all the source files into a static library
#ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
msvc_lib_list(objlist,'{}/lib{}.lib'.format(localbindir,libname))
# #Push any libraries to the common lib folder
shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
# #Copy include files to the common include folder
copytree('./include/',commonincdir+'/',dirs_exist_ok=True)

View File

@ -0,0 +1,43 @@
#!/usr/bin/python3
import os,sys,subprocess,math
from complib2 import *
from complib3 import gs_incremental_compile, gs_incremental_compile_list
import shutil
from shutil import copytree
libname = 'amsculib2.msvc64' #prefix static library name to generate
targetname = 'test' #create this executable when compiling tests
commonincdir = "../../winx64/include"
commonlibdir = "../../winx64/lib"
localbindir = "./bin_winx64"
cc = 'nvcc' #compiler
srcexts = ['.c','.cpp','.cu']
mainsrc = ['main.cu'] #ignore these files when compiling the static library
kwargs = dict()
include = "-I./include -I{}".format(commonincdir)
kwargs['include'] = include
kwargs['flags'] = "-dc"
kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
kwargs['libflags'] = "-llib{} -llibamsculib2.msvc64".format(libname)
kwargs['linkerflags'] = ""
kwargs['recurse'] = True
kwargs['objstore'] = "./objstore"
kwargs['searchincdirs'] = ['./include']
#-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64
#Pull required binary dynamic libraries to the bin folder
#shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
#shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
#shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
#Designate source files for main test program
fsrc = ['./src/main.cu']
fobj = replaceexts(fsrc,'.o')
#Compile test programs
gs_compile_list(cc,fsrc,**kwargs)
gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)

Some files were not shown because too many files have changed in this diff Show More