cuda library updates

2026-02-20 11:46:15 -05:00
parent c9f6307fc2
commit 3128d5dd19
122 changed files with 10842 additions and 7434 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-Copyright Aaron M. Schinder, 2023
+Copyright Aaron M. Schinder, 2023
--- a/amsculib2.code-workspace
+++ b/amsculib2.code-workspace
@ -0,0 +1,7 @@
 {
 	"folders": [
 		{
 			"path": "."
 		}
 	]
 }
--- a/backup.sh
+++ b/backup.sh
@ -0,0 +1,8 @@
 #!/bin/bash
 rm ./test_scripts/*.bin
 tar --exclude='./data' -czvf ../amsculib2.tar.gz ./*
 scp ../amsculib2.tar.gz aschinder@amssolarempire.com:~/workspace/projects
--- a/build/pycache/amsbuildlib4.cpython-310.pyc
+++ b/build/pycache/amsbuildlib4.cpython-310.pyc
--- a/build/pycache/amsbuildlib4.cpython-311.pyc
+++ b/build/pycache/amsbuildlib4.cpython-311.pyc
--- a/build/pycache/amsbuildlib4.cpython-312.pyc
+++ b/build/pycache/amsbuildlib4.cpython-312.pyc
--- a/build/pycache/amsbuildlib4.cpython-39.pyc
+++ b/build/pycache/amsbuildlib4.cpython-39.pyc
--- a/build/amsbuildlib4.py
+++ b/build/amsbuildlib4.py
@ -0,0 +1,813 @@
 #!/usr/bin/python3
 import os,sys,math
 import subprocess
 """
 Copyright Aaron M. Schinder, 2011 - MIT/BSD License
 This script contains a bunch of helper functions for generating simple, imperative, hopefully
 transparent build scripts using the python language (and nothing else). 
 I just want the script to do the compiling and linking operations I want it to do in the order
 I want it to do it in, finding every relevant source file. 
 That's it. That's what I want in a build system.
 """
 def flist(pth,**kwargs): 
    """
    flist - list all files in a given directory pth
    optional arguments:
       recurse - (T/F): Whether to recursively search for files in directory tree
       exts - (list): A list of file extensions to search for, otherwise all files
       normpath (T/F): whether to normalize path variables after
    filelist = flist(pth,**kwargs): 
    """
    flst = []
    if(not('recurse' in kwargs)):
        recurse_ = False
    else:
        recurse_ = kwargs['recurse']
    if(not('exts' in kwargs)):
        filterexts_ = False
    else:
        filterexts_ = True
        exts = kwargs['exts']
    if(not('normpath' in kwargs)):
        normpath_ = True
    else:
        normpath_ = kwargs['normpath']
    if(not('linuxpath' in kwargs)):
        linuxpath_ = False
    else:
        linuxpath_ = kwargs['linuxpath']
    if(not('followlinks' in kwargs)):
        followlinks_ = False
    else:
        followlinks_ = kwargs['followlinks']
    dirlist = []
    rawlist = os.listdir(pth)
    for F in rawlist:
        F2 = os.path.join(pth,F)
        if(os.path.isdir(F2)):
            b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
            if(b):
                if((F2!=".")&(F2!="..")):
                    dirlist.append(F2)
        elif(os.path.isfile(F2)):
            flst.append(F2)
    #Recurse through directories
    if(recurse_):
        for D in dirlist:
            lst = flist(D,**kwargs)
            for L in lst:
                flst.append(L)
    #Postprocess:
    #Filter out all extensions except the selected ext list
    if(filterexts_):
        flst = filterexts(flst,exts)
    #Normalize filename path according to os
    if(normpath_):
        flst2 = list(flst)
        for I in range(0,len(flst2)):
            flst[I] = os.path.normpath(flst2[I])
    #If linuxpath, convert all \\ to /
    #if(linuxpath_):
    #    flst2 = list(flst)
    #    for I in range(0,len(flst2)):
    #        flst[I] = linuxpath(flst2[I])
    return flst
 def filterexts(flst,exts):
    """
    Filters by extensions in a list of files
    flst = def filterexts(flst,exts):
    """
    flst2 = []
    if(isinstance(exts,str)):
        exts = list([exts])
    for F in flst:
        b = False
        for ext in exts:
            if(ext[0]!='.'):
                ext = '.'+ext
            F2 = os.path.splitext(F)
            if(len(F2)>=2):
                ex = F2[1]
                if(len(ex)>0):            
                    if(ex[0]!='.'):
                        ex = '.'+ex
                    if(ex==ext):
                        b = True
        if(b):
            flst2.append(F)
    return flst2
 #Find a file fname, starting in pth and recursing
 #Used for finding library files to link
 def findfile(fname,pth,**kwargs):
    fullfname = ""
    flst = flist(pth,recurse=True)
    for F in flst:
        F2 = os.path.split(F)[1]
        if(F2 == fname):
            fullfname = F
    return fullfname
 def replaceext(fname,ext):
    fname2 = ""
    if(len(ext)>0):
        if(ext[0]!='.'):
            ext = '.'+ext
        fname2 = os.path.splitext(fname)[0]+ext
    else:
        fname2 = os.path.splitext(fname)[0]
    return fname2
 def replaceexts(fnamelist,ext):
    """Takes a list of filenames and returns a list with the extensions replaced by ext """
    fname2list = []
    for F in fnamelist:
        F2 = replaceext(F,ext)
        fname2list.append(F2)
    return fname2list
 def except_contains(lst1,exc):
    """
    Takes a list of file names lst1, and removes filenams that match the
    list of exceptions exc. Returns a list without the exceptions.
    """
    lst2 = []
    for item in lst1:
        b = 1
        for item2 in exc:
            fsplit = os.path.split(item)
            fn = fsplit[len(fsplit)-1]
            if(fn==item2):
                b = 0
                break
        if(b==1):
            lst2.append(item)
    return lst2
 def list_to_sss(lst):
    """List of strings to space-seperated-string"""
    lout = ""
    for I in range(0,len(lst)-1):
        lout = lout + lst[I] + " "
    if(len(lst)>0):
        lout = lout + lst[len(lst)-1]
    return lout
 ##########################
 ##System Call Procedures##
 ##########################
 def callproc(cmd, **kwargs):
    if(not('logfile' in kwargs)):
        use_lf = False
    else:
        logfile = kwargs['logfile']
        if(logfile!=""):
            fp = open(kwargs['logfile'],'a+')
            use_lf = True
        else:
            use_lf = False
    if(not('echo' in kwargs)):
        echo = True
    else:
        echo = kwargs['echo']
    if(echo):
        print(cmd)
    #encoding/deconding to/from bytes is necessary to use the subprocess command
    #in python3.7
    #However, only do this in linux
    if(sys.platform!='win32'):
        cmd2 = cmd.encode(encoding='utf-8')
    else:
        cmd2 = cmd
    proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    out = out.decode(encoding='utf-8')
    if(echo):
        print(out)
        #print(err);
    if(use_lf):
        fp.writelines(cmd+'\n')
        fp.writelines(out+'\n')
    if(use_lf):
        fp.close()
 #############################################
 ## Compiler, Archive, and Linker Functions ##
 #############################################
 #MSVC compiler wrapper
 def msvc_compile(compilername, srcfile, **kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '/c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '/Fo:'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    outfile = replaceext(srcfile,objext)
    ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+'"'+outfile+'"'
    ln = ln + " " + include
    callproc(ln,echo=True,logfile=logfile)
    return
 #MSVC compiler wrapper
 def msvc_compile_list(compiler,srclist,**kwargs):
    for S in srclist:
        msvc_compile(compiler,S,**kwargs)
    return
 #gnu-style compiler compile: Should work with gcc, g++, gfortran
 def gs_compile(compiler,srcfile,**kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '-c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '-o'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    if(not('smartcompile' in kwargs)):
        _smartcompile = True
    else:
        _smartcompile = kwargs['smartcompile']
    #Do I want to make this thing this general?
    # if(not(_smartcompile) or smartcompile(srcfile,objext)):
    #     outfile = replaceext(srcfile,objext)
    #     ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
    #     ln = ln + " " + include
    #     callproc(ln,echo=True,logfile=logfile)
    outfile = replaceext(srcfile,objext)
    ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
    ln = ln + " " + include
    callproc(ln,echo=True,logfile=logfile)
    return
 def gs_compile_list(compiler,srclist,**kwargs):
    for S in srclist:
        gs_compile(compiler,S,**kwargs)
    return
 def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
    for S in srcfils:
        gs_compile(compiler,S,**kwargs)
    return
 def gs_link_all(linker,srcpath,target,**kwargs):
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    objfils = flist(srcpath,exts=objext,recurse=recurse)
    oflst = list_to_sss(objfils)
    gs_link_list(linker,oflst,target,**kwargs)
    return
 def gs_link_list(linker,objlist,target,**kwargs):
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('libdir' in kwargs)):
        libdir = ''
    else:
        libdir = kwargs['libdir']
    if(not('staticlibs' in kwargs)):
        staticlibs = ''
    else:
        staticlibs = kwargs['staticlibs']
    if(not('libflags' in kwargs)):
        libflags = ''
    else:
        libflags = kwargs['libflags']
    if(not('linkerflags' in kwargs)):
        linkerflags = ''
    else:
        linkerflags = kwargs['linkerflags']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('logfile' in kwargs)):
        logfile = ''
    else:
        logfile = kwargs['logfile']
    ln = linker+" -o "+target+" "+libdir
    ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
    callproc(ln,logfile=logfile)
    return
 def msvc_link_list(objlist,target,**kwargs):
    linker = 'link'
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('libdir' in kwargs)):
        libdir = ''
    else:
        libdir = kwargs['libdir']
    if(not('staticlibs' in kwargs)):
        staticlibs = ''
    else:
        staticlibs = kwargs['staticlibs']
    if(not('libflags' in kwargs)):
        libflags = ''
    else:
        libflags = kwargs['libflags']
    if(not('linkerflags' in kwargs)):
        linkerflags = ''
    else:
        linkerflags = kwargs['linkerflags']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('logfile' in kwargs)):
        logfile = ''
    else:
        logfile = kwargs['logfile']
    ln = linker+" "+libdir
    ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
    ln = ln+" /out:"+target+" "+libflags
    callproc(ln,logfile=logfile)
    return
 def ar_all(srcpath,arname,**kwargs):
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    objlist = flist(srcpath,exts=objext,recurse=recurse)
    ar_list(objlist,arname,**kwargs)
    return
 def msvc_lib_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "lib "+objlist2+" /out:"+arname
    callproc(ln)
    return
 def ar_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "ar cr "+ arname+" "+objlist2
    callproc(ln)
    return
 def ar_add_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "ar t "+arname+" "+objlist2
    callproc(ln)
    return
 #####################################
 ## Incremental Compilation Library ##
 #####################################
 #silently read lines from a text file if exists
 def readtextlines(fname):
    txtlns = []
    if(not os.path.isfile(fname)):
        return txtlns
    try:
        fp = open(fname,"r")
    except:
        return txtlns
    ln = " "
    while(ln!=""):
        ln = fp.readline()
        txtlns.append(ln)
    fp.close()
    return txtlns
 def getincludefnfrage(includeline):
    fnfrag = ""
    I1 = -1
    I2 = -1
    for I in range(0,len(includeline)):
        if(I1<0 and (includeline[I]=='<' or includeline[I]=='"')):
            I1 = I
        if(I1>=0 and (includeline[I]=='>' or includeline[I]=='"')):
            I2 = I
            break
    if(I1>=0 and I2>=0):
        fnfrag = includeline[I1+1:I2]
    return fnfrag
 #Returns the name of the source file fname (if it exists)
 #and all included filenames
 def getsrcandincludes(fname, incdirs):
    flist = []
    if(os.path.isfile(fname)):
        flist.append(fname)
        Ilist = 0
        while(Ilist<len(flist)):
            #recurse through files
            f1 = flist[Ilist]
            lns = readtextlines(f1)
            for J in range(0,len(lns)):
                if(lns[J].find("#include")>=0):
                    fnfrag = getincludefnfrage(lns[J])
                    for K in range(0,len(incdirs)):
                        tfn = os.path.join(incdirs[K],fnfrag)
                        if(os.path.isfile(tfn)):
                            flist.append(tfn)
                            break
            Ilist = Ilist + 1
    return flist
 #Returns the name of the object file associated with the source file
 #within the object store folder (if it exists)
 def getobjfile(fname,objstore,objext = ".o"):
    fret = ""
    f1 = os.path.split(fname)[1]
    f2 = f1
    while(os.path.splitext(f2)[1]!=""):
        f2 = os.path.splitext(f2)[0]
    objext = objext.strip('.')
    f3 = os.path.join(objstore,"{}.{}".format(f2,objext))
    if(os.path.exists(f3)):
        fret = f3
    return fret
 def getsrctimes(fname, incdirs):
    ftimes = []
    flst = getsrcandincludes(fname, incdirs)
    for I in range(0,len(flst)):
        f = flst[I]
        mt = os.path.getmtime(f)
        ftimes.append(mt)
    return ftimes
 def getobjtime(fname,objstore,objext=".o"):
    ret = -1
    fret = getobjfile(fname,objstore,objext)
    if(fret!=""):
        ret = os.path.getmtime(fret)
    return ret
 #Decide whether or not to compile source file
 def decidecompile(fname,**kwargs):
    ret = True
    if(not os.path.isfile(fname)):
        ret = False
        return ret
    ##unpack kwargs
    if("searchincdirs" in kwargs):
        incdirs = kwargs["searchincdirs"]
    else:
        incdirs = ["./include"]
    if("objext" in kwargs):
        objext = kwargs["objext"]
    else:
        objext = ".o"
    if("objstore" in kwargs):
        objstore = kwargs["objstore"]
    else:
        objstore = "./objstore"
    srclist = getsrcandincludes(fname,incdirs)
    srctlist = getsrctimes(fname,incdirs)
    obj = getobjfile(fname,objstore,objext)
    objt = getobjtime(fname,objstore,objext)
    if(obj!=""):
        ret = False
        for I in range(0,len(srctlist)):
            if(srctlist[I]>objt):
                ret = True
                break
    return ret
 def gs_incremental_compile(compiler,srcfile,**kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '-c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '-o'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    if(not('smartcompile' in kwargs)):
        _smartcompile = True
    else:
        _smartcompile = kwargs['smartcompile']
    #incrementalcompile
    if("searchincdirs" in kwargs):
        incdirs = kwargs["searchincdirs"]
    else:
        incdirs = ["./include"]
    if("objstore" in kwargs):
        objstore = kwargs["objstore"]
    else:
        objstore = "./objstore"
    #Do I want to make this thing this general?
    docompile = decidecompile(srcfile,**kwargs)
    if(docompile):
        f1 = os.path.split(srcfile)[1]
        f2 = f1
        while(os.path.splitext(f2)[1]!=""):
            f2 = os.path.splitext(f2)[0]
        outfile = os.path.join(objstore,"{}{}".format(f2,objext))
        ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
        ln = ln + " " + include
        callproc(ln,echo=True,logfile=logfile)
    return
 def msvc_incremental_compile(compiler,srcfile,**kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '/c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '/Fo'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    #incrementalcompile
    if("searchincdirs" in kwargs):
        incdirs = kwargs["searchincdirs"]
    else:
        incdirs = ["./include"]
    if("objstore" in kwargs):
        objstore = kwargs["objstore"]
    else:
        objstore = "./objstore"
    #Do I want to make this thing this general?
    docompile = decidecompile(srcfile,**kwargs)
    if(docompile):
        f1 = os.path.split(srcfile)[1]
        f2 = f1
        while(os.path.splitext(f2)[1]!=""):
            f2 = os.path.splitext(f2)[0]
        outfile = os.path.join(objstore,"{}{}".format(f2,objext))
        outfile = os.path.normpath(outfile)
        ln = compiler+" "+flags+" "+srcfileflag+" "+srcfile+" "+ outfileflag+'"'+outfile+'"'
        ln = ln + " " + include
        callproc(ln,echo=True,logfile=logfile)
    return
 def gs_incremental_compile_list(compiler,srclist,**kwargs):
    for s in srclist:
        gs_incremental_compile(compiler,s,**kwargs)
    return
 def msvc_incremental_compile_list(compiler,srclist,**kwargs):
    for s in srclist:
        msvc_incremental_compile(compiler,s,**kwargs)
    return
 #######################
 ## Main Script Tests ##
 #######################
 def testtimes(args):
    if(len(args)>=2):
        flist = getsrcandincludes(args[1],["./include"])
        ftlist = getsrctimes(args[1],["./include"])
        for I in range(0,len(flist)):
            print("{}\t\t{}".format(flist[I],ftlist[I]))
        print("associated obj file:")
        fobj = getobjfile(args[1],"./objstore")
        ftobj = getobjtime(args[1],"./objstore")
        if(fobj!=""):
            print("{}\t\t{}".format(fobj,ftobj))
        else:
            print("none found")
        cflag = decidecompile(args[1])
        print("compile? : {}".format(cflag))
    return
 # if(__name__ == "__main__"):
 #     args = sys.argv
 #     testtimes(args)
--- a/build/make.linux64.lib.py
+++ b/build/make.linux64.lib.py
@ -0,0 +1,58 @@
 #!/usr/bin/python3
 import os,sys,math
 import subprocess
 import shutil
 from shutil import copytree
 from amsbuildlib4 import *
 libname = "amsculib2.linux64" #static library name to generate
 binname = "test" #create this executable when compiling main.c or main.cpp
 commondir = "../../linux64" #common directory to pul libraries and includes from
 depdir = "./dependencies/linux64" #local pre-compiled dependency libraries and their includes
 installdir = "../../linux64" #directory to install to when finished
 builddir = "./build_linux64"
 doinstall = True   #copies the build_output to the install dir when finished
 cc = "nvcc" #compiler
 cflags = "-dc --compiler-options '-fPIC -O3'"
 libraries = "-l{}".format(libname)
 libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
 linkerflags = " -Xlinker=-rpath,."
 srcexts = [".c",".cpp",".cu"]
 binsrc = ["main.c","main.cpp", "main.cu"] #ignore these files when compiling the static library
 #keyword list to control the compilers/linkers
 kwargs = dict()
 include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
 kwargs["include"] = include
 kwargs["flags"] = cflags
 kwargs["libdir"] = libdirs
 kwargs["libflags"] = libraries
 kwargs["linkerflags"] = linkerflags
 kwargs["recurse"] = True
 kwargs["objstore"] = "{}/objstore".format(builddir)
 kwargs["searchincdirs"] = "./include"
 #Find all source files, except the main project files
 srcfiles = flist('./src',exts = srcexts, recurse=True)
 srcfiles = except_contains(srcfiles,binsrc)
 #compile all the source files in the list
 #gs_compile_list(cc,files,**kwargs)
 gs_incremental_compile_list(cc,srcfiles,**kwargs)
 #archive all the source files into a static library
 objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
 ar_list(objlist,'{}/lib{}.a'.format(builddir,libname))
 if(doinstall):
    #Push any libraries to the common lib folder
    shutil.copy(
        '{}/lib{}.a'.format(builddir,libname),
        "{}/lib".format(installdir)
    )
    #Copy include files to the common include folder
    copytree('./include/',installdir+'/include/',dirs_exist_ok=True)
--- a/build/make.linux64.test.py
+++ b/build/make.linux64.test.py
@ -0,0 +1,49 @@
 #!/usr/bin/python3
 import os,sys,math
 import subprocess
 import shutil
 from shutil import copytree
 from amsbuildlib4 import *
 libname = "amsculib2.linux64" #static library name to generate
 binname = "test" #create this executable when compiling main.c or main.cpp
 commondir = "../../linux64" #common directory to pul libraries and includes from
 depdir = "./dependencies/linux64" #local pre-compiled dependency libraries and their includes
 installdir = "../../linux64" #directory to install to when finished
 builddir = "./build_linux64"
 doinstall = True   #copies the build_output to the install dir when finished
 cc = "nvcc" #compiler
 cflags = "-dc --compiler-options '-fPIC -O3'"
 libraries = "-l{}".format(libname)
 libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
 linkerflags = " -Xlinker=-rpath,."
 srcexts = [".c",".cpp",".cu"]
 binsrc = ["main.c","main.cpp", "main.cu"] #ignore these files when compiling the static library
 #keyword list to control the compilers/linkers
 kwargs = dict()
 include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
 kwargs["include"] = include
 kwargs["flags"] = cflags
 kwargs["libdir"] = libdirs
 kwargs["libflags"] = libraries
 kwargs["linkerflags"] = linkerflags
 kwargs["recurse"] = True
 kwargs["objstore"] = "{}/objstore".format(builddir)
 kwargs["searchincdirs"] = "./include"
 #Pull required binary dynamic libraries to the bin folder
 #shutil.copy('{}/lib/libcamsimg3.linux64.so'.format(commondir),builddir);
 #shutil.copy('{}/lib/libamsimg.dll'.format(commondir),builddir);
 #shutil.copy('{}/lib/glew32.dll','./bin_winx64');
 #Designate source files for main test program
 fsrc = ['./src/main.cu']
 fobj = replaceexts(fsrc,'.o')
 #Compile test programs
 gs_compile_list(cc,fsrc,**kwargs)
 gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(builddir,binname),**kwargs)
--- a/build/make.msvc64.lib.py
+++ b/build/make.msvc64.lib.py
@ -0,0 +1,61 @@
 #!/usr/bin/python3
 import os,sys,math
 import subprocess
 import shutil
 from shutil import copytree
 from amsbuildlib4 import *
 libname = "amsculib2.msvc64" #static library name to generate
 binname = "test" #create this executable when compiling main.c or main.cpp
 commondir = "../../winx64" #common directory to pul libraries and includes from
 depdir = "./dependencies/winx64" #local pre-compiled dependency libraries and their includes
 installdir = "../../winx64" #directory to install to when finished
 builddir = "./build_msvc64"
 doinstall = True   #copies the build_output to the install dir when finished
 cc = "nvcc" #compiler
 cflags = "-dc --compiler-options '-fPIC -O3'"
 libraries = "-l{}".format(libname)
 libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
 linkerflags = " -Xlinker=-rpath,."
 srcexts = [".c",".cpp",".cu"]
 binsrc = ["main.c","main.cpp","main.cu"] #ignore these files when compiling the static library
 #keyword list to control the compilers/linkers
 kwargs = dict()
 include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
 kwargs["include"] = include
 kwargs["flags"] = cflags
 kwargs["libdir"] = libdirs
 kwargs["libflags"] = libraries
 kwargs["linkerflags"] = linkerflags
 kwargs["recurse"] = True
 kwargs["objstore"] = "{}/objstore".format(builddir)
 kwargs["searchincdirs"] = "./include"
 kwargs["objext"] = ".obj"
 #Find all source files, except the main project files
 srcfiles = flist('./src',exts = srcexts, recurse=True)
 srcfiles = except_contains(srcfiles,binsrc)
 #compile all the source files in the list
 #gs_compile_list(cc,files,**kwargs)
 msvc_incremental_compile_list(cc,srcfiles,**kwargs)
 #archive all the source files into a static library
 objlist = flist(kwargs['objstore'],exts='.obj',recurse=True)
 msvc_lib_list(objlist,'{}/lib{}.lib'.format(builddir,libname))
 if(doinstall):
    #Push any libraries to the common lib folder
    shutil.copy(
        '{}/lib{}.lib'.format(builddir,libname),
        "{}/lib".format(installdir)
    )
    #Copy include files to the common include folder
    copytree('./include/',installdir+'/include/',dirs_exist_ok=True)
--- a/build/make.msvc64.test.py
+++ b/build/make.msvc64.test.py
@ -0,0 +1,49 @@
 #!/usr/bin/python3
 import os,sys,math
 import subprocess
 import shutil
 from shutil import copytree
 from amsbuildlib4 import *
 libname = "amsculib2.msvc64" #static library name to generate
 binname = "test.exe" #create this executable when compiling main.c or main.cpp
 commondir = "../../winx64" #common directory to pul libraries and includes from
 depdir = "./dependencies/winx64" #local pre-compiled dependency libraries and their includes
 installdir = "../../winx64" #directory to install to when finished
 builddir = "./build_msvc64"
 doinstall = False   #copies the build_output to the install dir when finished
 cc = "nvcc" #compiler
 cflags = "-dc --compiler-options '-fPIC -O3'"
 libraries = "-l{}".format(libname)
 libdirs = "-L{} -L{}/lib -L{}/lib".format(builddir,commondir,depdir)
 linkerflags = " -Xlinker=-rpath,."
 srcexts = [".c",".cpp",".cu"]
 binsrc = ["main.c","main.cpp","main.cu"] #ignore these files when compiling the static library
 #keyword list to control the compilers/linkers
 kwargs = dict()
 include = "-I./include -I{}/include -I{}/include".format(commondir, depdir)
 kwargs["include"] = include
 kwargs["flags"] = cflags
 kwargs["libdir"] = libdirs
 kwargs["libflags"] = libraries
 kwargs["linkerflags"] = linkerflags
 kwargs["recurse"] = True
 kwargs["objstore"] = "{}/objstore".format(builddir)
 kwargs["searchincdirs"] = "./include"
 #Pull required binary dynamic libraries to the bin folder
 #shutil.copy('{}/lib/libcamsimg3.linux64.so'.format(commondir),builddir);
 #shutil.copy('{}/lib/libamsimg.dll'.format(commondir),builddir);
 #shutil.copy('{}/lib/glew32.dll','./bin_winx64');
 #Designate source files for main test program
 fsrc = ['./src/main.cpp']
 fobj = replaceexts(fsrc,'.obj')
 #Compile test programs
 msvc_compile_list(cc,fsrc,**kwargs)
 msvc_link_list(list_to_sss(fobj),'{}/{}'.format(builddir,binname),**kwargs)
--- a/build_linux64/libamsculib2.linux64.a
+++ b/build_linux64/libamsculib2.linux64.a
--- a/build_linux64/objstore/.placeholder
+++ b/build_linux64/objstore/.placeholder
--- a/build_linux64/objstore/amscu_comp128.o
+++ b/build_linux64/objstore/amscu_comp128.o
--- a/build_linux64/objstore/amscu_comp64.o
+++ b/build_linux64/objstore/amscu_comp64.o
--- a/build_linux64/objstore/amscu_cudafunctions.o
+++ b/build_linux64/objstore/amscu_cudafunctions.o
--- a/build_linux64/objstore/amscu_random.o
+++ b/build_linux64/objstore/amscu_random.o
--- a/build_linux64/objstore/amscuarray.o
+++ b/build_linux64/objstore/amscuarray.o
--- a/build_linux64/objstore/amscuarray_dops.o
+++ b/build_linux64/objstore/amscuarray_dops.o
--- a/build_linux64/objstore/amscugeom.o
+++ b/build_linux64/objstore/amscugeom.o
--- a/build_linux64/objstore/amsculib2.o
+++ b/build_linux64/objstore/amsculib2.o
--- a/build_linux64/objstore/amscumath.o
+++ b/build_linux64/objstore/amscumath.o
--- a/build_linux64/objstore/amscurarray.o
+++ b/build_linux64/objstore/amscurarray.o
--- a/build_linux64/objstore/cuvect2.o
+++ b/build_linux64/objstore/cuvect2.o
--- a/build_linux64/objstore/cuvect2f.o
+++ b/build_linux64/objstore/cuvect2f.o
--- a/build_linux64/objstore/cuvect3.o
+++ b/build_linux64/objstore/cuvect3.o
--- a/build_linux64/objstore/cuvect3f.o
+++ b/build_linux64/objstore/cuvect3f.o
--- a/build_linux64/objstore/cuvect4.o
+++ b/build_linux64/objstore/cuvect4.o
--- a/build_linux64/objstore/cuvect4f.o
+++ b/build_linux64/objstore/cuvect4f.o
--- a/build_linux64/test
+++ b/build_linux64/test
--- a/build_msvc64/objstore/.placeholder
+++ b/build_msvc64/objstore/.placeholder
--- a/include/amsculib2/amscu_comp128.hpp
+++ b/include/amsculib2/amscu_comp128.hpp
@ -1,89 +1,89 @@
-#ifndef __AMSCU_COMP128_HPP__
+#ifndef __AMSCU_COMP128_HPP__
-#define __AMSCU_COMP128_HPP__
+#define __AMSCU_COMP128_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-namespace cmp
+namespace cmp
-{
+{
-
+
-    class cucomp128
+    class cucomp128
-    {
+    {
-        public:
+        public:
-        double real;
+        double real;
-        double imag;
+        double imag;
-
+
-        __host__ __device__ cucomp128();
+        __host__ __device__ cucomp128();
-        __host__ __device__ ~cucomp128();
+        __host__ __device__ ~cucomp128();
-        __host__ __device__ cucomp128(const cucomp128 &other);
+        __host__ __device__ cucomp128(const cucomp128 &other);
-        __host__ __device__ cucomp128(const double &other);
+        __host__ __device__ cucomp128(const double &other);
-
+
-        __host__ __device__ cucomp128& operator=(cucomp128& other);
+        __host__ __device__ cucomp128& operator=(cucomp128& other);
-        __host__ __device__ const cucomp128& operator=(const cucomp128& other);
+        __host__ __device__ const cucomp128& operator=(const cucomp128& other);
-        __host__ __device__ cucomp128& operator=(double& other);
+        __host__ __device__ cucomp128& operator=(double& other);
-        __host__ __device__ const cucomp128& operator=(const double& other);
+        __host__ __device__ const cucomp128& operator=(const double& other);
-        
+        
-        __host__ __device__ double& operator[](int& ind);
+        __host__ __device__ double& operator[](int& ind);
-        __host__ __device__ const double& operator[](const int& ind) const;
+        __host__ __device__ const double& operator[](const int& ind) const;
-
+
-        __host__ __device__ cucomp128 operator+(const cucomp128& z);
+        __host__ __device__ cucomp128 operator+(const cucomp128& z);
-        __host__ __device__ cucomp128 operator-(const cucomp128& z);
+        __host__ __device__ cucomp128 operator-(const cucomp128& z);
-        __host__ __device__ cucomp128 operator*(const cucomp128& z);
+        __host__ __device__ cucomp128 operator*(const cucomp128& z);
-        __host__ __device__ cucomp128 operator/(const cucomp128& z);
+        __host__ __device__ cucomp128 operator/(const cucomp128& z);
-
+
-        __host__ __device__ cucomp128 operator+(const double& z);
+        __host__ __device__ cucomp128 operator+(const double& z);
-        __host__ __device__ cucomp128 operator-(const double& z);
+        __host__ __device__ cucomp128 operator-(const double& z);
-        __host__ __device__ cucomp128 operator*(const double& z);
+        __host__ __device__ cucomp128 operator*(const double& z);
-        __host__ __device__ cucomp128 operator/(const double& z);
+        __host__ __device__ cucomp128 operator/(const double& z);
-
+
-        __host__ __device__ friend cucomp128 operator-(const cucomp128& z); //negation sign
+        __host__ __device__ friend cucomp128 operator-(const cucomp128& z); //negation sign
-
+
-        //comparison operators
+        //comparison operators
-        __host__ __device__ bool operator==(const cucomp128& z) const;
+        __host__ __device__ bool operator==(const cucomp128& z) const;
-        __host__ __device__ bool operator!=(const cucomp128& z) const;
+        __host__ __device__ bool operator!=(const cucomp128& z) const;
-        __host__ __device__ bool operator>(const cucomp128& z) const;
+        __host__ __device__ bool operator>(const cucomp128& z) const;
-        __host__ __device__ bool operator<(const cucomp128& z) const;
+        __host__ __device__ bool operator<(const cucomp128& z) const;
-        __host__ __device__ bool operator>=(const cucomp128& z) const;
+        __host__ __device__ bool operator>=(const cucomp128& z) const;
-        __host__ __device__ bool operator<=(const cucomp128& z) const;
+        __host__ __device__ bool operator<=(const cucomp128& z) const;
-
+
-        __host__ __device__ bool isnan() const;
+        __host__ __device__ bool isnan() const;
-        __host__ __device__ bool isinf() const;
+        __host__ __device__ bool isinf() const;
-
+
-        __host__ __device__ bool isreal() const;
+        __host__ __device__ bool isreal() const;
-        __host__ __device__ bool isimag() const;
+        __host__ __device__ bool isimag() const;
-        __host__ __device__ bool iszero() const;
+        __host__ __device__ bool iszero() const;
-        __host__ __device__ double arg() const;
+        __host__ __device__ double arg() const;
-        __host__ __device__ double mag() const;
+        __host__ __device__ double mag() const;
-        __host__ __device__ cucomp128 conj() const;
+        __host__ __device__ cucomp128 conj() const;
-    };
+    };
-
+
-    __host__ __device__ double arg(cucomp128 z);
+    __host__ __device__ double arg(cucomp128 z);
-
+
-    __host__ __device__ cucomp128 dtocomp(double _r, double _i);
+    __host__ __device__ cucomp128 dtocomp(double _r, double _i);
-    __host__ __device__ double real(cucomp128 z);
+    __host__ __device__ double real(cucomp128 z);
-    __host__ __device__ double imag(cucomp128 z);
+    __host__ __device__ double imag(cucomp128 z);
-    __host__ __device__ cucomp128 sin(cucomp128 z);
+    __host__ __device__ cucomp128 sin(cucomp128 z);
-    __host__ __device__ cucomp128 cos(cucomp128 z);
+    __host__ __device__ cucomp128 cos(cucomp128 z);
-    __host__ __device__ cucomp128 tan(cucomp128 z);
+    __host__ __device__ cucomp128 tan(cucomp128 z);
-    __host__ __device__ cucomp128 exp(cucomp128 z);
+    __host__ __device__ cucomp128 exp(cucomp128 z);
-    __host__ __device__ cucomp128 log(cucomp128 z);
+    __host__ __device__ cucomp128 log(cucomp128 z);
-    __host__ __device__ double abs(cucomp128 z);
+    __host__ __device__ double abs(cucomp128 z);
-    __host__ __device__ cucomp128 conj(cucomp128 z);
+    __host__ __device__ cucomp128 conj(cucomp128 z);
-
+
-    // //need hyperbolic trig Functions
+    // //need hyperbolic trig Functions
-    __host__ __device__ cucomp128 cosh(cucomp128 z);
+    __host__ __device__ cucomp128 cosh(cucomp128 z);
-    __host__ __device__ cucomp128 sinh(cucomp128 z);
+    __host__ __device__ cucomp128 sinh(cucomp128 z);
-    __host__ __device__ cucomp128 tanh(cucomp128 z);
+    __host__ __device__ cucomp128 tanh(cucomp128 z);
-
+
-    __host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2);
+    __host__ __device__ cucomp128 pow(cucomp128 z1, cucomp128 z2);
-
+
-    // //returns "complex sign" of complex number - 0, or a unit number with same argument
+    // //returns "complex sign" of complex number - 0, or a unit number with same argument
-    __host__ __device__ cucomp128 csgn(cucomp128 z);
+    __host__ __device__ cucomp128 csgn(cucomp128 z);
-
+
-void test_cucomp128_1();
+void test_cucomp128_1();
-
+
-
+
-}; //end namespace cmp
+}; //end namespace cmp
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscu_comp64.hpp
+++ b/include/amsculib2/amscu_comp64.hpp
@ -1,88 +1,88 @@
-#ifndef __AMSCU_COMP64_HPP__
+#ifndef __AMSCU_COMP64_HPP__
-#define __AMSCU_COMP64_HPP__
+#define __AMSCU_COMP64_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-namespace cmp
+namespace cmp
-{
+{
-
+
-    class cucomp64
+    class cucomp64
-    {
+    {
-        public:
+        public:
-        float real;
+        float real;
-        float imag;
+        float imag;
-
+
-        __host__ __device__ cucomp64();
+        __host__ __device__ cucomp64();
-        __host__ __device__ ~cucomp64();
+        __host__ __device__ ~cucomp64();
-        __host__ __device__ cucomp64(const cucomp64 &other);
+        __host__ __device__ cucomp64(const cucomp64 &other);
-        __host__ __device__ cucomp64(const float &other);
+        __host__ __device__ cucomp64(const float &other);
-
+
-        __host__ __device__ cucomp64& operator=(cucomp64& other);
+        __host__ __device__ cucomp64& operator=(cucomp64& other);
-        __host__ __device__ const cucomp64& operator=(const cucomp64& other);
+        __host__ __device__ const cucomp64& operator=(const cucomp64& other);
-        __host__ __device__ cucomp64& operator=(float& other);
+        __host__ __device__ cucomp64& operator=(float& other);
-        __host__ __device__ const cucomp64& operator=(const float& other);
+        __host__ __device__ const cucomp64& operator=(const float& other);
-        
+        
-        __host__ __device__ float& operator[](int& ind);
+        __host__ __device__ float& operator[](int& ind);
-        __host__ __device__ const float& operator[](const int& ind) const;
+        __host__ __device__ const float& operator[](const int& ind) const;
-
+
-        __host__ __device__ cucomp64 operator+(const cucomp64& z);
+        __host__ __device__ cucomp64 operator+(const cucomp64& z);
-        __host__ __device__ cucomp64 operator-(const cucomp64& z);
+        __host__ __device__ cucomp64 operator-(const cucomp64& z);
-        __host__ __device__ cucomp64 operator*(const cucomp64& z);
+        __host__ __device__ cucomp64 operator*(const cucomp64& z);
-        __host__ __device__ cucomp64 operator/(const cucomp64& z);
+        __host__ __device__ cucomp64 operator/(const cucomp64& z);
-
+
-        __host__ __device__ cucomp64 operator+(const float& z);
+        __host__ __device__ cucomp64 operator+(const float& z);
-        __host__ __device__ cucomp64 operator-(const float& z);
+        __host__ __device__ cucomp64 operator-(const float& z);
-        __host__ __device__ cucomp64 operator*(const float& z);
+        __host__ __device__ cucomp64 operator*(const float& z);
-        __host__ __device__ cucomp64 operator/(const float& z);
+        __host__ __device__ cucomp64 operator/(const float& z);
-
+
-        __host__ __device__ friend cucomp64 operator-(const cucomp64& z); //negation sign
+        __host__ __device__ friend cucomp64 operator-(const cucomp64& z); //negation sign
-
+
-        //comparison operators
+        //comparison operators
-        __host__ __device__ bool operator==(const cucomp64& z) const;
+        __host__ __device__ bool operator==(const cucomp64& z) const;
-        __host__ __device__ bool operator!=(const cucomp64& z) const;
+        __host__ __device__ bool operator!=(const cucomp64& z) const;
-        __host__ __device__ bool operator>(const cucomp64& z) const;
+        __host__ __device__ bool operator>(const cucomp64& z) const;
-        __host__ __device__ bool operator<(const cucomp64& z) const;
+        __host__ __device__ bool operator<(const cucomp64& z) const;
-        __host__ __device__ bool operator>=(const cucomp64& z) const;
+        __host__ __device__ bool operator>=(const cucomp64& z) const;
-        __host__ __device__ bool operator<=(const cucomp64& z) const;
+        __host__ __device__ bool operator<=(const cucomp64& z) const;
-
+
-        __host__ __device__ bool isnan() const;
+        __host__ __device__ bool isnan() const;
-        __host__ __device__ bool isinf() const;
+        __host__ __device__ bool isinf() const;
-
+
-        __host__ __device__ bool isreal() const;
+        __host__ __device__ bool isreal() const;
-        __host__ __device__ bool isimag() const;
+        __host__ __device__ bool isimag() const;
-        __host__ __device__ bool iszero() const;
+        __host__ __device__ bool iszero() const;
-        __host__ __device__ float arg() const;
+        __host__ __device__ float arg() const;
-        __host__ __device__ float mag() const;
+        __host__ __device__ float mag() const;
-        __host__ __device__ cucomp64 conj() const;
+        __host__ __device__ cucomp64 conj() const;
-    };
+    };
-
+
-    __host__ __device__ float arg(cucomp64 z);
+    __host__ __device__ float arg(cucomp64 z);
-
+
-    __host__ __device__ cucomp64 dtocomp64(float _r, float _i);
+    __host__ __device__ cucomp64 dtocomp64(float _r, float _i);
-    __host__ __device__ float real(cucomp64 z);
+    __host__ __device__ float real(cucomp64 z);
-    __host__ __device__ float imag(cucomp64 z);
+    __host__ __device__ float imag(cucomp64 z);
-    __host__ __device__ cucomp64 sin(cucomp64 z);
+    __host__ __device__ cucomp64 sin(cucomp64 z);
-    __host__ __device__ cucomp64 cos(cucomp64 z);
+    __host__ __device__ cucomp64 cos(cucomp64 z);
-    __host__ __device__ cucomp64 tan(cucomp64 z);
+    __host__ __device__ cucomp64 tan(cucomp64 z);
-    __host__ __device__ cucomp64 exp(cucomp64 z);
+    __host__ __device__ cucomp64 exp(cucomp64 z);
-    __host__ __device__ cucomp64 log(cucomp64 z);
+    __host__ __device__ cucomp64 log(cucomp64 z);
-    __host__ __device__ float abs(cucomp64 z);
+    __host__ __device__ float abs(cucomp64 z);
-    __host__ __device__ cucomp64 conj(cucomp64 z);
+    __host__ __device__ cucomp64 conj(cucomp64 z);
-
+
-    // //need hyperbolic trig Functions
+    // //need hyperbolic trig Functions
-    __host__ __device__ cucomp64 cosh(cucomp64 z);
+    __host__ __device__ cucomp64 cosh(cucomp64 z);
-    __host__ __device__ cucomp64 sinh(cucomp64 z);
+    __host__ __device__ cucomp64 sinh(cucomp64 z);
-    __host__ __device__ cucomp64 tanh(cucomp64 z);
+    __host__ __device__ cucomp64 tanh(cucomp64 z);
-
+
-    __host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2);
+    __host__ __device__ cucomp64 pow(cucomp64 z1, cucomp64 z2);
-
+
-    // //returns "complex sign" of complex number - 0, or a unit number with same argument
+    // //returns "complex sign" of complex number - 0, or a unit number with same argument
-    __host__ __device__ cucomp64 csgn(cucomp64 z);
+    __host__ __device__ cucomp64 csgn(cucomp64 z);
-
+
-void test_cucomp64_1();
+void test_cucomp64_1();
-
+
-}; //end namespace cmp
+}; //end namespace cmp
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscu_cudafunctions.hpp
+++ b/include/amsculib2/amscu_cudafunctions.hpp
@ -1,40 +1,40 @@
-#ifndef __AMSCU_CUDAFUNCTIONS_HPP__
+#ifndef __AMSCU_CUDAFUNCTIONS_HPP__
-#define __AMSCU_CUDAFUNCTIONS_HPP__
+#define __AMSCU_CUDAFUNCTIONS_HPP__
-
+
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-    // device memory operations
+    // device memory operations
-    // I'm trying to avoid some of the boilerplate mental overhead involved
+    // I'm trying to avoid some of the boilerplate mental overhead involved
-    // in calling cuda functions and handling errors
+    // in calling cuda functions and handling errors
-
+
-    //frees devbuffer if it is not already NULL, and sets devbuffer to NULL
+    //frees devbuffer if it is not already NULL, and sets devbuffer to NULL
-    //wrapper to cudaFree
+    //wrapper to cudaFree
-    template<typename T> int cuda_free(T **devptr);
+    template<typename T> int cuda_free(T **devptr);
-
+
-    //copies hostbuffer to devbuffer
+    //copies hostbuffer to devbuffer
-    //initializes devbuffer from NULL if devbuffer is NULL
+    //initializes devbuffer from NULL if devbuffer is NULL
-    //if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
+    //if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
-    template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite);
+    template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite);
-
+
-    //copies info from devbuffer to hostbuffer
+    //copies info from devbuffer to hostbuffer
-    //initialzies hostbuffer from NULL if NULL
+    //initialzies hostbuffer from NULL if NULL
-    //if overwrite is true, deletes and reallocates hostbuffer on host with new[] (for resizing)
+    //if overwrite is true, deletes and reallocates hostbuffer on host with new[] (for resizing)
-    template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite);
+    template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite);
-
+
-    //wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
+    //wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
-    //initializes devptr from NULL if not already initialized
+    //initializes devptr from NULL if not already initialized
-    template<typename T> int cuda_copytodevice(T *hostptr, T **devptr);
+    template<typename T> int cuda_copytodevice(T *hostptr, T **devptr);
-
+
-    //wrapper for cudaMemcpy - copies an item or struct (count 1) from device
+    //wrapper for cudaMemcpy - copies an item or struct (count 1) from device
-    //initializes hostptr from NULL with new if not already initialized
+    //initializes hostptr from NULL with new if not already initialized
-    template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr);
+    template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr);
-    
+    
-    int cuda_errortrap(const char *msgheader);
+    int cuda_errortrap(const char *msgheader);
-
+
-};
+};
-
+
-#include <amsculib2/amscu_cudafunctions_impl.hpp>
+#include <amsculib2/amscu_cudafunctions_impl.hpp>
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscu_cudafunctions_impl.hpp
+++ b/include/amsculib2/amscu_cudafunctions_impl.hpp
@ -1,228 +1,228 @@
-#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
+#ifndef __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
-#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
+#define __AMSCU_CUDAFUNCTIONS_IMPL_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-//frees devbuffer if it is not already NULL, and sets devbuffer to NULL
+//frees devbuffer if it is not already NULL, and sets devbuffer to NULL
-//wrapper to cudaFree
+//wrapper to cudaFree
-template<typename T> int cuda_free(T **devptr)
+template<typename T> int cuda_free(T **devptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    if(*devptr==NULL)
+    if(*devptr==NULL)
-    {
+    {
-        return ret; //devbuffer is already NULL/freed
+        return ret; //devbuffer is already NULL/freed
-    }
+    }
-
+
-    err = cudaFree(*devptr);
+    err = cudaFree(*devptr);
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        ret = -1; //failed to free device pointer
+        ret = -1; //failed to free device pointer
-        *devptr = NULL; // - ? should only happen if I'm trying to double-free something
+        *devptr = NULL; // - ? should only happen if I'm trying to double-free something
-    }
+    }
-    else
+    else
-    {
+    {
-        ret = 1;
+        ret = 1;
-        *devptr = NULL;
+        *devptr = NULL;
-    }
+    }
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-//copies hostbuffer to devbuffer
+//copies hostbuffer to devbuffer
-//initializes devbuffer from NULL if devbuffer is NULL
+//initializes devbuffer from NULL if devbuffer is NULL
-//if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
+//if overwrite is true, deletes and reallocates devbuffer on device (for resizing)
-template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite)
+template<typename T> int buffer_copytodevice(T *hostbuffer, T **devbuffer, long N, bool overwrite)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    if(N<=0)
+    if(N<=0)
-    {
+    {
-        ret = 0;
+        ret = 0;
-        return ret;
+        return ret;
-    }
+    }
-
+
-    if(hostbuffer==NULL)
+    if(hostbuffer==NULL)
-    {
+    {
-        ret = -2; //host buffer is NULL
+        ret = -2; //host buffer is NULL
-        return ret;
+        return ret;
-    }
+    }
-
+
-    if(overwrite==1)
+    if(overwrite==1)
-    {
+    {
-        if(*devbuffer !=NULL)
+        if(*devbuffer !=NULL)
-        {
+        {
-            cuda_free(devbuffer);
+            cuda_free(devbuffer);
-        }
+        }
-    }
+    }
-
+
-    if(*devbuffer==NULL)
+    if(*devbuffer==NULL)
-    {
+    {
-        err = cudaMalloc(devbuffer,sizeof(T)*N);
+        err = cudaMalloc(devbuffer,sizeof(T)*N);
-        if(err!=cudaSuccess)
+        if(err!=cudaSuccess)
-        {
+        {
-            ret = -3; //failed to allocate
+            ret = -3; //failed to allocate
-            *devbuffer = NULL;
+            *devbuffer = NULL;
-            return ret;
+            return ret;
-        }
+        }
-    }
+    }
-
+
-    err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice);
+    err = cudaMemcpy(*devbuffer,hostbuffer,sizeof(T)*N,cudaMemcpyHostToDevice);
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        ret = -4; //failed to copy
+        ret = -4; //failed to copy
-    }
+    }
-    else
+    else
-    {
+    {
-        ret = 1;
+        ret = 1;
-    }
+    }
-
+
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-//copies info from devbuffer to hostbuffer
+//copies info from devbuffer to hostbuffer
-//initialzies hostbuffer from NULL if NULL
+//initialzies hostbuffer from NULL if NULL
-//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing)
+//if overwrite is true, deletes and reallocates hostbuffer on host (for resizing)
-template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite)
+template<typename T> int buffer_copyfromdevice(T *devbuffer, T **hostbuffer, long N, bool overwrite)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    if(N<=0)
+    if(N<=0)
-    {
+    {
-        ret = 0;
+        ret = 0;
-        return ret;
+        return ret;
-    }
+    }
-
+
-    if(devbuffer==NULL)
+    if(devbuffer==NULL)
-    {
+    {
-        ret = -5; //null dev buffer
+        ret = -5; //null dev buffer
-        return ret;
+        return ret;
-    }
+    }
-
+
-    if(overwrite==1 && *hostbuffer!=NULL)
+    if(overwrite==1 && *hostbuffer!=NULL)
-    {
+    {
-        delete[] (*hostbuffer); hostbuffer = NULL;
+        delete[] (*hostbuffer); hostbuffer = NULL;
-    }
+    }
-
+
-    if(*hostbuffer==NULL)
+    if(*hostbuffer==NULL)
-    {
+    {
-        *hostbuffer = new(std::nothrow) T[N];
+        *hostbuffer = new(std::nothrow) T[N];
-        if(*hostbuffer==NULL)
+        if(*hostbuffer==NULL)
-        {
+        {
-            ret = -6; //failed to allocate host buffer
+            ret = -6; //failed to allocate host buffer
-            return ret;
+            return ret;
-        }
+        }
-    }
+    }
-
+
-    err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost);
+    err = cudaMemcpy(*hostbuffer, devbuffer, sizeof(T)*N, cudaMemcpyDeviceToHost);
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        ret = -7; //failed to copy
+        ret = -7; //failed to copy
-    }
+    }
-    else
+    else
-    {
+    {
-        ret = 1;
+        ret = 1;
-    }
+    }
-    
+    
-    return ret;
+    return ret;
-}
+}
-
+
-//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
+//wrapper for cudaMemcpy - copies an item or struct (count 1) to the device
-//initializes devptr from NULL if not already initialized
+//initializes devptr from NULL if not already initialized
-template<typename T> int cuda_copytodevice(T *hostptr, T **devptr)
+template<typename T> int cuda_copytodevice(T *hostptr, T **devptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-    bool overwrite = 1;
+    bool overwrite = 1;
-
+
-    if(hostptr==NULL)
+    if(hostptr==NULL)
-    {
+    {
-        ret = -2; //host buffer is NULL
+        ret = -2; //host buffer is NULL
-        return ret;
+        return ret;
-    }
+    }
-
+
-    if(overwrite==1)
+    if(overwrite==1)
-    {
+    {
-        if(*devptr !=NULL)
+        if(*devptr !=NULL)
-        {
+        {
-            cuda_free(devptr);
+            cuda_free(devptr);
-        }
+        }
-    }
+    }
-
+
-    if(*devptr==NULL)
+    if(*devptr==NULL)
-    {
+    {
-        err = cudaMalloc(devptr,sizeof(T));
+        err = cudaMalloc(devptr,sizeof(T));
-        if(err!=cudaSuccess)
+        if(err!=cudaSuccess)
-        {
+        {
-            ret = -3; //failed to allocate
+            ret = -3; //failed to allocate
-            *devptr = NULL;
+            *devptr = NULL;
-            return ret;
+            return ret;
-        }
+        }
-    }
+    }
-
+
-    err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice);
+    err = cudaMemcpy(*devptr,hostptr,sizeof(T),cudaMemcpyHostToDevice);
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        ret = -4; //failed to copy
+        ret = -4; //failed to copy
-    }
+    }
-    else
+    else
-    {
+    {
-        ret = 1;
+        ret = 1;
-    }
+    }
-
+
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-//wrapper for cudaMemcpy - copies an item or struct (count 1) from device
+//wrapper for cudaMemcpy - copies an item or struct (count 1) from device
-//initializes hostptr from NULL with new if not already initialized
+//initializes hostptr from NULL with new if not already initialized
-template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr)
+template<typename T> int cuda_copyfromdevice(T *devptr, T **hostptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-    bool overwrite = 1;
+    bool overwrite = 1;
-
+
-    if(devptr==NULL)
+    if(devptr==NULL)
-    {
+    {
-        ret = -5; //null dev buffer
+        ret = -5; //null dev buffer
-        return ret;
+        return ret;
-    }
+    }
-
+
-    if(overwrite==1 && *hostptr!=NULL)
+    if(overwrite==1 && *hostptr!=NULL)
-    {
+    {
-        delete (*hostptr); hostptr = NULL;
+        delete (*hostptr); hostptr = NULL;
-    }
+    }
-
+
-    if(*hostptr==NULL)
+    if(*hostptr==NULL)
-    {
+    {
-        *hostptr = new(std::nothrow) T;
+        *hostptr = new(std::nothrow) T;
-        if(*hostptr==NULL)
+        if(*hostptr==NULL)
-        {
+        {
-            ret = -6; //failed to allocate host buffer
+            ret = -6; //failed to allocate host buffer
-            return ret;
+            return ret;
-        }
+        }
-    }
+    }
-
+
-    err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost);
+    err = cudaMemcpy(*hostptr, devptr, sizeof(T), cudaMemcpyDeviceToHost);
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        ret = -7; //failed to copy
+        ret = -7; //failed to copy
-    }
+    }
-    else
+    else
-    {
+    {
-        ret = 1;
+        ret = 1;
-    }
+    }
-    
+    
-    return ret;
+    return ret;
-}
+}
-
+
-
+
-};
+};
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscu_random.hpp
+++ b/include/amsculib2/amscu_random.hpp
@ -1,55 +1,55 @@
-#ifndef __AMSCU_RANDOM_HPP__
+#ifndef __AMSCU_RANDOM_HPP__
-#define __AMSCU_RANDOM_HPP__
+#define __AMSCU_RANDOM_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-// Random Number Gerneators
+// Random Number Gerneators
-
+
-
+
-// faster floating point hash function used in fractal generators
+// faster floating point hash function used in fractal generators
-__device__ __host__ float fhash1d_su(float x);
+__device__ __host__ float fhash1d_su(float x);
-
+
-__device__ __host__ float fhash3d_su(float x, float y, float z);
+__device__ __host__ float fhash3d_su(float x, float y, float z);
-
+
-__device__ __host__ float fhash4d_su(float x, float y, float z, float w);
+__device__ __host__ float fhash4d_su(float x, float y, float z, float w);
-
+
-
+
-//////////////////////////////////////////////////
+//////////////////////////////////////////////////
-// Deterministic Pseudorandom int32_t Generator //
+// Deterministic Pseudorandom int32_t Generator //
-//////////////////////////////////////////////////
+//////////////////////////////////////////////////
-
+
-//Next seed in simple 32 bit integer deterministic psuedo-rand generator
+//Next seed in simple 32 bit integer deterministic psuedo-rand generator
-__host__ __device__ void dpr32_nextseed(int32_t *rseed_inout);
+__host__ __device__ void dpr32_nextseed(int32_t *rseed_inout);
-
+
-//Simple 32 bit integer deterministic pseudo-random generator
+//Simple 32 bit integer deterministic pseudo-random generator
-// *not* for cryptography
+// *not* for cryptography
-// Frequency of generated floats should be uniform [0,1)
+// Frequency of generated floats should be uniform [0,1)
-__host__ __device__ float dpr32_randf(int32_t *rseed_inout);
+__host__ __device__ float dpr32_randf(int32_t *rseed_inout);
-
+
-//box muller standard normal pseudorandom variable
+//box muller standard normal pseudorandom variable
-__host__ __device__ float dpr32_randnf(int32_t *rseed_inout);
+__host__ __device__ float dpr32_randnf(int32_t *rseed_inout);
-
+
-//////////////////////////////////////////////////
+//////////////////////////////////////////////////
-// Deterministic Pseudorandom int64_t Generator //
+// Deterministic Pseudorandom int64_t Generator //
-//////////////////////////////////////////////////
+//////////////////////////////////////////////////
-
+
-//operates without side-effects on explicit seed for threaded use
+//operates without side-effects on explicit seed for threaded use
-//deterministic pseudorandom number generator - takes seed and returns next seed
+//deterministic pseudorandom number generator - takes seed and returns next seed
-__host__ __device__ void dpr64_nextseed(int64_t *seedinout);
+__host__ __device__ void dpr64_nextseed(int64_t *seedinout);
-
+
-//deterministic pseudorandom number generator - takes seed and returns next seed
+//deterministic pseudorandom number generator - takes seed and returns next seed
-//returns uniformly distributed double
+//returns uniformly distributed double
-__host__ __device__ double dpr64_randd(int64_t *seedinout);
+__host__ __device__ double dpr64_randd(int64_t *seedinout);
-
+
-__host__ __device__ float dpr64_randf(int64_t *seedinout);
+__host__ __device__ float dpr64_randf(int64_t *seedinout);
-
+
-
+
-void test_dprg64();
+void test_dprg64();
-void test_dprg32();
+void test_dprg32();
-
+
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscuarray.hpp
+++ b/include/amsculib2/amscuarray.hpp
@ -1,47 +1,47 @@
-#ifndef __CUARRAY_HPP__
+#ifndef __CUARRAY_HPP__
-#define __CUARRAY_HPP__
+#define __CUARRAY_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-template<typename T> class cuarray
+template<typename T> class cuarray
-{
+{
-public:
+public:
-    int length;
+    int length;
-    T* data;
+    T* data;
-
+
-    __device__ __host__ cuarray();
+    __device__ __host__ cuarray();
-    __device__ __host__ ~cuarray();
+    __device__ __host__ ~cuarray();
-
+
-    //Only call this on the device for thread/block local 
+    //Only call this on the device for thread/block local 
-    // dynamic arrays
+    // dynamic arrays
-    __device__ __host__ int resize(const int _length);
+    __device__ __host__ int resize(const int _length);
-
+
-    __device__ __host__ int size() const;
+    __device__ __host__ int size() const;
-    __device__ __host__ T& at(const int I);
+    __device__ __host__ T& at(const int I);
-    __device__ __host__ const T& at(const int I) const;
+    __device__ __host__ const T& at(const int I) const;
-
+
-    __device__ __host__ T& operator[](const int I);
+    __device__ __host__ T& operator[](const int I);
-    __device__ __host__ const T& operator[](const int I) const;
+    __device__ __host__ const T& operator[](const int I) const;
-    
+    
-    
+    
-
+
-    __host__ int device_send(cuarray<T> **dptr);
+    __host__ int device_send(cuarray<T> **dptr);
-    __host__ int _device_send_overwrite(cuarray<T> **dptr);
+    __host__ int _device_send_overwrite(cuarray<T> **dptr);
-    __host__ int _device_send_copy(cuarray<T> *dptr);
+    __host__ int _device_send_copy(cuarray<T> *dptr);
-    
+    
-    __host__ int device_pull(cuarray<T> *dptr);
+    __host__ int device_pull(cuarray<T> *dptr);
-    __host__ int device_free(cuarray<T> **dptr);
+    __host__ int device_free(cuarray<T> **dptr);
-
+
-    __host__ int device_length(cuarray<T> *dptr);
+    __host__ int device_length(cuarray<T> *dptr);
-    __host__ T* device_data_ptr(cuarray<T> *dptr);
+    __host__ T* device_data_ptr(cuarray<T> *dptr);
-    
+    
-};
+};
-
+
-void test_cuarray();
+void test_cuarray();
-
+
-};
+};
-
+
-#include <amsculib2/amscuarray_impl.hpp>
+#include <amsculib2/amscuarray_impl.hpp>
-
+
 #endif
--- a/include/amsculib2/amscuarray_dops.hpp
+++ b/include/amsculib2/amscuarray_dops.hpp
@ -1,76 +1,76 @@
-#ifndef __AMSCUARRAY_DOPS_HPP__
+#ifndef __AMSCUARRAY_DOPS_HPP__
-#define __AMSCUARRAY_DOPS_HPP__
+#define __AMSCUARRAY_DOPS_HPP__
-
+
-//Device Operations on Arrays
+//Device Operations on Arrays
-//
+//
-
+
-//Device Operations on Device Buffers
+//Device Operations on Device Buffers
-// dodb
+// dodb
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-
+
-    //sum
+    //sum
-    template<typename T> T devcuarray_sum(cuarray<T> *devptr);
+    template<typename T> T devcuarray_sum(cuarray<T> *devptr);
-
+
-    template<typename T> T dbuff_sum(T *devbuffer, int N);
+    template<typename T> T dbuff_sum(T *devbuffer, int N);
-
+
-
+
-    struct dbuff_statstruct
+    struct dbuff_statstruct
-    {
+    {
-        public:
+        public:
-        float min;
+        float min;
-        float max;
+        float max;
-        float mean;
+        float mean;
-        float stdev;
+        float stdev;
-        float sum;
+        float sum;
-    };
+    };
-    
+    
-    //stats (min,max,mean,stdev)
+    //stats (min,max,mean,stdev)
-
+
-    template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max);
+    template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max);
-
+
-    template<typename T> dbuff_statstruct dbuff_stats(T *devbuffer, int N); //
+    template<typename T> dbuff_statstruct dbuff_stats(T *devbuffer, int N); //
-
+
-    //sets all elements to setto
+    //sets all elements to setto
-    template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads);
+    template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads);
-
+
-    //random device buffer functions
+    //random device buffer functions
-    void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout,  int nblocks, int nthreads); //
+    void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout,  int nblocks, int nthreads); //
-    void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout,  int nblocks, int nthreads); //
+    void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout,  int nblocks, int nthreads); //
-    
+    
-
+
-    void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout,  int nblocks, int nthreads); //
+    void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout,  int nblocks, int nthreads); //
-
+
-    //Elementwise device-buffer vector binary operation
+    //Elementwise device-buffer vector binary operation
-    //takes two input arrays ( , ) --> one output array
+    //takes two input arrays ( , ) --> one output array
-    template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
+    template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
-
+
-    //Elementwise device-buffer vector two-parameter operation
+    //Elementwise device-buffer vector two-parameter operation
-    //takes one input array, and a constant paramter ( ) ---> one output array
+    //takes one input array, and a constant paramter ( ) ---> one output array
-    template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
+    template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
-
+
-
+
-    //vector_add
+    //vector_add
-    template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-    template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
+    template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); 
-
+
-
+
-    // Tests //
+    // Tests //
-
+
-    void test_dbuff_rand_dpr32();
+    void test_dbuff_rand_dpr32();
-
+
-};
+};
-
+
-#include <amsculib2/amscuarray_dops_impl.hpp>
+#include <amsculib2/amscuarray_dops_impl.hpp>
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscuarray_dops_impl.hpp
+++ b/include/amsculib2/amscuarray_dops_impl.hpp
@ -1,404 +1,404 @@
-#ifndef __AMSCUARRAY_DOPS_IMPL_HPP__
+#ifndef __AMSCUARRAY_DOPS_IMPL_HPP__
-#define __AMSCUARRAY_DOPS_IMPL_HPP__
+#define __AMSCUARRAY_DOPS_IMPL_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-template<typename T> __global__ void dbuff_sum_kf(T *devbuffer, int N, T *rets)
+template<typename T> __global__ void dbuff_sum_kf(T *devbuffer, int N, T *rets)
-{
+{
-    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
+    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
-    int Is = blockDim.x*gridDim.x;
+    int Is = blockDim.x*gridDim.x;
-    int I;
+    int I;
-
+
-    T ret = (T) 0;
+    T ret = (T) 0;
-    for(I=I0;I<N;I=I+Is)
+    for(I=I0;I<N;I=I+Is)
-    {
+    {
-        ret = ret + devbuffer[I];
+        ret = ret + devbuffer[I];
-    }
+    }
-    rets[I0] = ret;
+    rets[I0] = ret;
-}
+}
-
+
-template<typename T> T devcuarray_sum(cuarray<T> *devptr)
+template<typename T> T devcuarray_sum(cuarray<T> *devptr)
-{
+{
-    T ret = T();
+    T ret = T();
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    cuarray<T> ldptr;
+    cuarray<T> ldptr;
-    
+    
-    cudaMemcpy(&ldptr,devptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
+    cudaMemcpy(&ldptr,devptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
-    
+    
-    ret = devbuffer_sum(ldptr.data,ldptr.length);
+    ret = devbuffer_sum(ldptr.data,ldptr.length);
-
+
-    ldptr.data = NULL;
+    ldptr.data = NULL;
-    ldptr.length=0;
+    ldptr.length=0;
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> T dbuff_sum(T *dbuff, int N)
+template<typename T> T dbuff_sum(T *dbuff, int N)
-{
+{
-    int I;
+    int I;
-    T ret = T();
+    T ret = T();
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    int nblocks;
+    int nblocks;
-    int nthreads;
+    int nthreads;
-
+
-    if(dbuff==NULL || N<=0)
+    if(dbuff==NULL || N<=0)
-    {
+    {
-        return ret;
+        return ret;
-    }
+    }
-
+
-    if(N>100)
+    if(N>100)
-    {
+    {
-        nblocks = 10;
+        nblocks = 10;
-        nthreads = (int)sqrt((float) (N/nblocks));
+        nthreads = (int)sqrt((float) (N/nblocks));
-        if(nthreads<=0) nthreads=1;
+        if(nthreads<=0) nthreads=1;
-        if(nthreads>512) nthreads=512;
+        if(nthreads>512) nthreads=512;
-    }
+    }
-    else
+    else
-    {
+    {
-        nblocks = 1;
+        nblocks = 1;
-        nthreads = 1;
+        nthreads = 1;
-    }
+    }
-
+
-    T *rets = NULL;
+    T *rets = NULL;
-    T *devrets = NULL;
+    T *devrets = NULL;
-
+
-    rets = new T[nblocks*nthreads];
+    rets = new T[nblocks*nthreads];
-    cudaMalloc(&devrets,sizeof(T)*nblocks*nthreads);
+    cudaMalloc(&devrets,sizeof(T)*nblocks*nthreads);
-
+
-    dbuff_sum_kf<<<nblocks,nthreads>>>(dbuff,N,devrets);
+    dbuff_sum_kf<<<nblocks,nthreads>>>(dbuff,N,devrets);
-    cudaDeviceSynchronize();
+    cudaDeviceSynchronize();
-    err = cudaGetLastError();
+    err = cudaGetLastError();
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        printf("amscu::dbuff_sum error: %s\n",cudaGetErrorString(err));
+        printf("amscu::dbuff_sum error: %s\n",cudaGetErrorString(err));
-    }
+    }
-
+
-    cudaMemcpy(rets,devrets,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
+    cudaMemcpy(rets,devrets,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
-
+
-    ret = (T)0;
+    ret = (T)0;
-    for(I=0;I<nblocks*nthreads;I++)
+    for(I=0;I<nblocks*nthreads;I++)
-    {
+    {
-        ret = ret + rets[I];
+        ret = ret + rets[I];
-    }
+    }
-
+
-    cudaFree(devrets); devrets = NULL;
+    cudaFree(devrets); devrets = NULL;
-    delete[] rets;
+    delete[] rets;
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-
+
-template<typename T> __global__ void dbuff_minmax_kf(T *devbuffer, int N, T *maxs, T *mins)
+template<typename T> __global__ void dbuff_minmax_kf(T *devbuffer, int N, T *maxs, T *mins)
-{
+{
-    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
+    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
-    int Is = blockDim.x*gridDim.x;
+    int Is = blockDim.x*gridDim.x;
-    int I;
+    int I;
-
+
-    for(I=I0;I<N;I=I+Is)
+    for(I=I0;I<N;I=I+Is)
-    {
+    {
-        if(I==I0)
+        if(I==I0)
-        {
+        {
-            maxs[I0] = devbuffer[I];
+            maxs[I0] = devbuffer[I];
-            mins[I0] = devbuffer[I];
+            mins[I0] = devbuffer[I];
-        }
+        }
-        else
+        else
-        {
+        {
-            if(devbuffer[I]>maxs[I0])
+            if(devbuffer[I]>maxs[I0])
-            {
+            {
-                maxs[I0] = devbuffer[I];
+                maxs[I0] = devbuffer[I];
-            }
+            }
-            if(devbuffer[I]<mins[I0])
+            if(devbuffer[I]<mins[I0])
-            {
+            {
-                mins[I0] = devbuffer[I];
+                mins[I0] = devbuffer[I];
-            }
+            }
-        }
+        }
-    }
+    }
-
+
-    return;
+    return;
-}
+}
-
+
-template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max)
+template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max)
-{
+{
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-    int nblocks;
+    int nblocks;
-    int nthreads;
+    int nthreads;
-    int I;
+    int I;
-
+
-    T *maxs = NULL;
+    T *maxs = NULL;
-    T *dev_maxs = NULL;
+    T *dev_maxs = NULL;
-    T *mins = NULL;
+    T *mins = NULL;
-    T *dev_mins = NULL;
+    T *dev_mins = NULL;
-
+
-    T localmax = T(0);
+    T localmax = T(0);
-    T localmin = T(0);
+    T localmin = T(0);
-
+
-    if(devbuffer==NULL || N<=0)
+    if(devbuffer==NULL || N<=0)
-    {
+    {
-        if(min!=NULL) *min = T(0);
+        if(min!=NULL) *min = T(0);
-        if(max!=NULL) *max = T(0);
+        if(max!=NULL) *max = T(0);
-        return;
+        return;
-    }
+    }
-
+
-    if(N>25)
+    if(N>25)
-    {
+    {
-        nblocks = 25;
+        nblocks = 25;
-        nthreads = (int) sqrt((float)(N/nblocks));
+        nthreads = (int) sqrt((float)(N/nblocks));
-        if(nthreads<1) nthreads = 1;
+        if(nthreads<1) nthreads = 1;
-        if(nthreads>512) nthreads = 512;
+        if(nthreads>512) nthreads = 512;
-    }
+    }
-    else
+    else
-    {
+    {
-        nblocks = 1;
+        nblocks = 1;
-        nthreads = 1;
+        nthreads = 1;
-    }
+    }
-
+
-    maxs = new T[nblocks*nthreads];
+    maxs = new T[nblocks*nthreads];
-    mins = new T[nblocks*nthreads];
+    mins = new T[nblocks*nthreads];
-    cudaMalloc(&dev_maxs,nblocks*nthreads);
+    cudaMalloc(&dev_maxs,nblocks*nthreads);
-    cudaMalloc(&dev_mins,nblocks*nthreads);
+    cudaMalloc(&dev_mins,nblocks*nthreads);
-    
+    
-    dbuff_minmax_kf<<<nblocks,nthreads>>>(devbuffer,N,dev_maxs,dev_mins);
+    dbuff_minmax_kf<<<nblocks,nthreads>>>(devbuffer,N,dev_maxs,dev_mins);
-    cudaDeviceSynchronize();
+    cudaDeviceSynchronize();
-    err = cudaGetLastError();
+    err = cudaGetLastError();
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        printf("amscu::dbuff_minmax error: %s\n",cudaGetErrorString(err));
+        printf("amscu::dbuff_minmax error: %s\n",cudaGetErrorString(err));
-    }
+    }
-
+
-    cudaMemcpy(maxs,dev_maxs,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
+    cudaMemcpy(maxs,dev_maxs,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
-    cudaMemcpy(mins,dev_mins,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
+    cudaMemcpy(mins,dev_mins,sizeof(T)*nblocks*nthreads,cudaMemcpyDeviceToHost);
-    
+    
-
+
-    for(I=0;I<nblocks*nthreads;I++)
+    for(I=0;I<nblocks*nthreads;I++)
-    {
+    {
-        if(I==0)
+        if(I==0)
-        {
+        {
-            localmax = maxs[0];
+            localmax = maxs[0];
-            localmin = mins[0];
+            localmin = mins[0];
-        }
+        }
-        else
+        else
-        {
+        {
-            if(maxs[I]>localmax) localmax = maxs[I];
+            if(maxs[I]>localmax) localmax = maxs[I];
-            if(mins[I]<localmin) localmin = mins[I];
+            if(mins[I]<localmin) localmin = mins[I];
-        }
+        }
-    }
+    }
-
+
-    if(max!=NULL) *max = localmax;
+    if(max!=NULL) *max = localmax;
-    if(min!=NULL) *min = localmin;
+    if(min!=NULL) *min = localmin;
-
+
-    cudaFree(dev_maxs); dev_maxs = NULL;
+    cudaFree(dev_maxs); dev_maxs = NULL;
-    cudaFree(dev_mins); dev_mins = NULL;
+    cudaFree(dev_mins); dev_mins = NULL;
-    delete[] maxs; maxs = NULL;
+    delete[] maxs; maxs = NULL;
-    delete[] mins; mins = NULL;
+    delete[] mins; mins = NULL;
-
+
-    return;
+    return;
-}
+}
-
+
-template<typename T> __global__ void dbuff_setall_kf(T *devbuffer, int N, T setto)
+template<typename T> __global__ void dbuff_setall_kf(T *devbuffer, int N, T setto)
-{
+{
-    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
+    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
-    int Is = blockDim.x*gridDim.x;
+    int Is = blockDim.x*gridDim.x;
-    int I;
+    int I;
-
+
-    for(I=I0;I<N;I=I+Is)
+    for(I=I0;I<N;I=I+Is)
-    {
+    {
-        devbuffer[I] = setto;
+        devbuffer[I] = setto;
-    }
+    }
-    return;
+    return;
-}
+}
-
+
-template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads)
+template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads)
-{
+{
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    if(devbuffer==NULL || N<=0)
+    if(devbuffer==NULL || N<=0)
-    {
+    {
-        return;
+        return;
-    }
+    }
-
+
-    dbuff_setall_kf<<<nblocks,nthreads>>>(devbuffer,N,setto);
+    dbuff_setall_kf<<<nblocks,nthreads>>>(devbuffer,N,setto);
-    cudaDeviceSynchronize();
+    cudaDeviceSynchronize();
-    err = cudaGetLastError();
+    err = cudaGetLastError();
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        printf("amscu::dbuff_setall error: %s\n",cudaGetErrorString(err));
+        printf("amscu::dbuff_setall error: %s\n",cudaGetErrorString(err));
-    }
+    }
-
+
-    return;
+    return;
-}
+}
-
+
-template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf1(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
+template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf1(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
-{
+{
-    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
+    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
-    int Is = blockDim.x*gridDim.x;
+    int Is = blockDim.x*gridDim.x;
-    int I;
+    int I;
-
+
-    T1 a;
+    T1 a;
-    T2 b;
+    T2 b;
-    T3 c;
+    T3 c;
-
+
-    for(I=I0;I<N;I=I+Is)
+    for(I=I0;I<N;I=I+Is)
-    {
+    {
-        a = dbuf_a[I];
+        a = dbuf_a[I];
-        b = dbuf_b[I];
+        b = dbuf_b[I];
-        c = fpnt(a,b);
+        c = fpnt(a,b);
-        dbuf_out[I] = c;
+        dbuf_out[I] = c;
-    }
+    }
-
+
-    return;
+    return;
-}
+}
-
+
-template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf2(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
+template<typename T1, typename T2, typename T3> __global__ void dbuff_vectorbinop_kf2(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2))
-{
+{
-    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
+    int I0 = threadIdx.x + blockIdx.x*blockDim.x;
-    int Is = blockDim.x*gridDim.x;
+    int Is = blockDim.x*gridDim.x;
-    int I;
+    int I;
-
+
-    T1 a;
+    T1 a;
-    T2 b;
+    T2 b;
-    T3 c;
+    T3 c;
-
+
-    for(I=I0;I<N;I=I+Is)
+    for(I=I0;I<N;I=I+Is)
-    {
+    {
-        a = dbuf_a[I];
+        a = dbuf_a[I];
-        b = par_b;
+        b = par_b;
-        c = fpnt(a,b);
+        c = fpnt(a,b);
-        dbuf_out[I] = c;
+        dbuf_out[I] = c;
-    }
+    }
-
+
-    return;
+    return;
-}
+}
-
+
-
+
-//Elementwise device-buffer vector binary operation
+//Elementwise device-buffer vector binary operation
-//takes two input arrays ( , ) --> one output array
+//takes two input arrays ( , ) --> one output array
-template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
+template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
-{
+{
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    if(dbuf_a == NULL || dbuf_b == NULL || dbuf_out == NULL || N<=0)
+    if(dbuf_a == NULL || dbuf_b == NULL || dbuf_out == NULL || N<=0)
-    {
+    {
-        return;
+        return;
-    }
+    }
-
+
-    dbuff_vectorbinop_kf1<<<nblocks,nthreads>>>(dbuf_a,dbuf_b,dbuf_out,N);
+    dbuff_vectorbinop_kf1<<<nblocks,nthreads>>>(dbuf_a,dbuf_b,dbuf_out,N);
-    cudaDeviceSynchronize();
+    cudaDeviceSynchronize();
-    err = cudaGetLastError();
+    err = cudaGetLastError();
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
+        printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
-    }
+    }
-
+
-    return;
+    return;
-}
+}
-
+
-//Elementwise device-buffer vector two-parameter operation
+//Elementwise device-buffer vector two-parameter operation
-//takes one input array, and a constant paramter ( ) ---> one output array
+//takes one input array, and a constant paramter ( ) ---> one output array
-template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
+template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads)
-{
+{
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-
+
-    if(dbuf_a == NULL || dbuf_out == NULL || N<=0)
+    if(dbuf_a == NULL || dbuf_out == NULL || N<=0)
-    {
+    {
-        return;
+        return;
-    }
+    }
-
+
-    dbuff_vectorbinop_kf2<<<nblocks,nthreads>>>(dbuf_a,par_b,dbuf_out,N);
+    dbuff_vectorbinop_kf2<<<nblocks,nthreads>>>(dbuf_a,par_b,dbuf_out,N);
-    cudaDeviceSynchronize();
+    cudaDeviceSynchronize();
-    err = cudaGetLastError();
+    err = cudaGetLastError();
-    if(err!=cudaSuccess)
+    if(err!=cudaSuccess)
-    {
+    {
-        printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
+        printf("amscu::devbuffer_vectorbinop error: %s\n",cudaGetErrorString(err));
-    }
+    }
-
+
-    return;
+    return;
-}
+}
-
+
-template<typename T> T dbuff_add_fn(T a, T b)
+template<typename T> T dbuff_add_fn(T a, T b)
-{
+{
-    return a+b;
+    return a+b;
-}
+}
-
+
-template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_add_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> T dbuff_sub_fn(T a, T b)
+template<typename T> T dbuff_sub_fn(T a, T b)
-{
+{
-    return a-b;
+    return a-b;
-}
+}
-
+
-template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_sub_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> T dbuff_mult_fn(T a, T b)
+template<typename T> T dbuff_mult_fn(T a, T b)
-{
+{
-    return a*b;
+    return a*b;
-}
+}
-
+
-template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_mult_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> T dbuff_div_fn(T a, T b)
+template<typename T> T dbuff_div_fn(T a, T b)
-{
+{
-    return a/b;
+    return a/b;
-}
+}
-
+
-template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,dbuff_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_a,par_b,dbuff_out,N,&dbuff_div_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-template<typename T> T dbuff_ldiv_fn(T a, T b)
+template<typename T> T dbuff_ldiv_fn(T a, T b)
-{
+{
-    return b/a;
+    return b/a;
-}
+}
-
+
-
+
-template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
+template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads)
-{
+{
-    dbuff_vectorbinop(dbuff_b,par_a,dbuff_out,N,&dbuff_ldiv_fn,nblocks,nthreads);
+    dbuff_vectorbinop(dbuff_b,par_a,dbuff_out,N,&dbuff_ldiv_fn,nblocks,nthreads);
-    return;
+    return;
-}
+}
-
+
-
+
-};
+};
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscuarray_impl.hpp
+++ b/include/amsculib2/amscuarray_impl.hpp
@ -1,323 +1,323 @@
-#ifndef __CUARRAY_IMPL_HPP__
+#ifndef __CUARRAY_IMPL_HPP__
-#define __CUARRAY_IMPL_HPP__
+#define __CUARRAY_IMPL_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-// New Version cuarray<T>
+// New Version cuarray<T>
-// simpler, less crap going on
+// simpler, less crap going on
-
+
-template<typename T> __device__ __host__ cuarray<T>::cuarray()
+template<typename T> __device__ __host__ cuarray<T>::cuarray()
-{
+{
-    length = 0;
+    length = 0;
-    data = NULL;
+    data = NULL;
-}
+}
-
+
-template<typename T> __device__ __host__ cuarray<T>::~cuarray()
+template<typename T> __device__ __host__ cuarray<T>::~cuarray()
-{
+{
-    if(data!=NULL)
+    if(data!=NULL)
-    {
+    {
-        delete[] data; data = NULL;
+        delete[] data; data = NULL;
-    }
+    }
-    length = 0;
+    length = 0;
-}
+}
-
+
-template<typename T> __device__ __host__ int cuarray<T>::resize(const int _length)
+template<typename T> __device__ __host__ int cuarray<T>::resize(const int _length)
-{
+{
-    int ret = 0;
+    int ret = 0;
-
+
-    T *newbuffer = NULL;
+    T *newbuffer = NULL;
-
+
-    if(length==_length)
+    if(length==_length)
-    {
+    {
-        //do nothing
+        //do nothing
-        ret = 1;
+        ret = 1;
-        return ret;
+        return ret;
-    }
+    }
-    if(_length<=0)
+    if(_length<=0)
-    {
+    {
-        if(data!=NULL)
+        if(data!=NULL)
-        {
+        {
-            delete[] data;
+            delete[] data;
-            data = NULL;
+            data = NULL;
-        }
+        }
-        length = 0;
+        length = 0;
-        ret = 1;
+        ret = 1;
-    }
+    }
-
+
-    newbuffer = new T[_length];
+    newbuffer = new T[_length];
-    if(newbuffer==NULL)
+    if(newbuffer==NULL)
-    {
+    {
-        ret = -1; //failed to allocate memory
+        ret = -1; //failed to allocate memory
-        return ret;
+        return ret;
-    }
+    }
-
+
-    int I;
+    int I;
-    T def;
+    T def;
-
+
-    if(data!=NULL)
+    if(data!=NULL)
-    {
+    {
-        for(I=0;I<length&&I<_length;I++)
+        for(I=0;I<length&&I<_length;I++)
-        {
+        {
-            newbuffer[I] = data[I];
+            newbuffer[I] = data[I];
-        }
+        }
-        for(I=length;I<_length;I++)
+        for(I=length;I<_length;I++)
-        {
+        {
-            newbuffer[I] = def;
+            newbuffer[I] = def;
-        }
+        }
-        delete[] data; data=NULL;
+        delete[] data; data=NULL;
-    }
+    }
-    else
+    else
-    {
+    {
-        for(I=0;I<_length;I++)
+        for(I=0;I<_length;I++)
-        {
+        {
-            newbuffer[I] = def;
+            newbuffer[I] = def;
-        }
+        }
-    }
+    }
-
+
-    data = newbuffer;
+    data = newbuffer;
-    length = _length;
+    length = _length;
-    ret = 1;
+    ret = 1;
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-
+
-template<typename T> __host__ int cuarray<T>::device_send(cuarray<T> **dptr)
+template<typename T> __host__ int cuarray<T>::device_send(cuarray<T> **dptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    int dlength;
+    int dlength;
-
+
-    if(*dptr==NULL)
+    if(*dptr==NULL)
-    {
+    {
-        ret = _device_send_overwrite(dptr);
+        ret = _device_send_overwrite(dptr);
-    }
+    }
-    else
+    else
-    {
+    {
-        dlength = device_length(*dptr);
+        dlength = device_length(*dptr);
-        if(dlength=length)
+        if(dlength=length)
-        {
+        {
-            ret = _device_send_copy(*dptr);
+            ret = _device_send_copy(*dptr);
-        }
+        }
-        else
+        else
-        {
+        {
-            ret = _device_send_overwrite(dptr);
+            ret = _device_send_overwrite(dptr);
-        }
+        }
-    }
+    }
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> __host__ int cuarray<T>::_device_send_overwrite(cuarray<T> **dptr)
+template<typename T> __host__ int cuarray<T>::_device_send_overwrite(cuarray<T> **dptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cuarray<T> dlocal;
+    cuarray<T> dlocal;
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-    device_free(dptr);
+    device_free(dptr);
-    
+    
-    if(length>=0 && data!=NULL)
+    if(length>=0 && data!=NULL)
-    {
+    {
-        err = cudaMalloc(dptr,sizeof(cuarray<T>));
+        err = cudaMalloc(dptr,sizeof(cuarray<T>));
-        if(err==cudaSuccess)
+        if(err==cudaSuccess)
-        {
+        {
-            err = cudaMalloc(&(dlocal.data),sizeof(T)*length);
+            err = cudaMalloc(&(dlocal.data),sizeof(T)*length);
-            dlocal.length = length;
+            dlocal.length = length;
-
+
-            if(err==cudaSuccess)
+            if(err==cudaSuccess)
-            {
+            {
-                cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
+                cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
-                if(data!=NULL)
+                if(data!=NULL)
-                    err = cudaMemcpy(dlocal.data,data,sizeof(T)*length,cudaMemcpyHostToDevice);
+                    err = cudaMemcpy(dlocal.data,data,sizeof(T)*length,cudaMemcpyHostToDevice);
-                else
+                else
-                    err = cudaSuccess;
+                    err = cudaSuccess;
-                if(err==cudaSuccess)
+                if(err==cudaSuccess)
-                {
+                {
-                    ret = 1;
+                    ret = 1;
-                }
+                }
-                else
+                else
-                {
+                {
-                    ret = -3;
+                    ret = -3;
-                }
+                }
-            }
+            }
-            else
+            else
-            {
+            {
-                ret = -2;
+                ret = -2;
-            }
+            }
-        }
+        }
-        else
+        else
-        {
+        {
-            ret = -1;
+            ret = -1;
-        }
+        }
-    }
+    }
-    else
+    else
-    {
+    {
-        dlocal.data = NULL;
+        dlocal.data = NULL;
-        dlocal.length = 0;
+        dlocal.length = 0;
-        err = cudaMalloc(dptr,sizeof(cuarray<T>));
+        err = cudaMalloc(dptr,sizeof(cuarray<T>));
-        if(err==cudaSuccess)
+        if(err==cudaSuccess)
-        {
+        {
-            cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
+            cudaMemcpy(*dptr,&dlocal,sizeof(cuarray<T>),cudaMemcpyHostToDevice);
-            ret = 1;
+            ret = 1;
-        }
+        }
-        else
+        else
-        {
+        {
-            ret = -4;
+            ret = -4;
-        }
+        }
-    }
+    }
-
+
-
+
-    dlocal.data = NULL;
+    dlocal.data = NULL;
-    dlocal.length = -1;
+    dlocal.length = -1;
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> __host__ int cuarray<T>::_device_send_copy(cuarray<T> *dptr)
+template<typename T> __host__ int cuarray<T>::_device_send_copy(cuarray<T> *dptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cudaError_t err = cudaSuccess;
+    cudaError_t err = cudaSuccess;
-    T* ddata = NULL;
+    T* ddata = NULL;
-    ddata = device_data_ptr(dptr);
+    ddata = device_data_ptr(dptr);
-
+
-    err = cudaMemcpy(ddata,data,sizeof(T)*length,cudaMemcpyHostToDevice);
+    err = cudaMemcpy(ddata,data,sizeof(T)*length,cudaMemcpyHostToDevice);
-    if(err==cudaSuccess)
+    if(err==cudaSuccess)
-    {
+    {
-        ret = 1;
+        ret = 1;
-    }
+    }
-    else
+    else
-    {
+    {
-        ret = -1;
+        ret = -1;
-    }
+    }
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> __host__ int cuarray<T>::device_pull(cuarray<T> *dptr)
+template<typename T> __host__ int cuarray<T>::device_pull(cuarray<T> *dptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    int dlength;
+    int dlength;
-    T* ddata;
+    T* ddata;
-    cudaError_t err;
+    cudaError_t err;
-
+
-    if(dptr==NULL) 
+    if(dptr==NULL) 
-    {
+    {
-        ret = -1; // null d pointer
+        ret = -1; // null d pointer
-        return ret;
+        return ret;
-    }
+    }
-
+
-    dlength = device_length(dptr);
+    dlength = device_length(dptr);
-    if(dlength!=length)
+    if(dlength!=length)
-    {
+    {
-        this->resize(dlength);
+        this->resize(dlength);
-    }
+    }
-
+
-    ddata = device_data_ptr(dptr);
+    ddata = device_data_ptr(dptr);
-
+
-    if(length>0 && data!=NULL && ddata!=NULL)
+    if(length>0 && data!=NULL && ddata!=NULL)
-    {
+    {
-        err = cudaMemcpy(data,dptr,length*sizeof(T),cudaMemcpyDeviceToHost);
+        err = cudaMemcpy(data,dptr,length*sizeof(T),cudaMemcpyDeviceToHost);
-        if(err==cudaSuccess)
+        if(err==cudaSuccess)
-        {
+        {
-            ret = 1;
+            ret = 1;
-        }
+        }
-        else
+        else
-        {
+        {
-            ret = -2;
+            ret = -2;
-        }
+        }
-    }
+    }
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> __host__ int cuarray<T>::device_free(cuarray<T> **dptr)
+template<typename T> __host__ int cuarray<T>::device_free(cuarray<T> **dptr)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    cuarray<T> dlocal;
+    cuarray<T> dlocal;
-
+
-    if(*dptr!=NULL)
+    if(*dptr!=NULL)
-    {
+    {
-        cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
+        cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
-        if(dlocal.data!=NULL)
+        if(dlocal.data!=NULL)
-        {
+        {
-            cudaFree(dlocal.data);
+            cudaFree(dlocal.data);
-            dlocal.data = NULL;
+            dlocal.data = NULL;
-        }
+        }
-
+
-        cudaFree(*dptr);
+        cudaFree(*dptr);
-        *dptr = NULL;
+        *dptr = NULL;
-        ret = 1;
+        ret = 1;
-    }
+    }
-
+
-    dlocal.data = NULL;
+    dlocal.data = NULL;
-    dlocal.length = -1;
+    dlocal.length = -1;
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> __host__ int cuarray<T>::device_length(cuarray<T> *dptr)
+template<typename T> __host__ int cuarray<T>::device_length(cuarray<T> *dptr)
-{
+{
-    int ret = -1;
+    int ret = -1;
-    cuarray<T> dlocal;
+    cuarray<T> dlocal;
-
+
-    if(dptr==NULL)
+    if(dptr==NULL)
-    {
+    {
-        return ret;
+        return ret;
-    }
+    }
-
+
-    cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
+    cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
-    ret = dlocal.length;
+    ret = dlocal.length;
-
+
-    dlocal.data = NULL;
+    dlocal.data = NULL;
-    dlocal.length = -1;
+    dlocal.length = -1;
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> __host__ T* cuarray<T>::device_data_ptr(cuarray<T> *dptr)
+template<typename T> __host__ T* cuarray<T>::device_data_ptr(cuarray<T> *dptr)
-{
+{
-    T* ret = NULL;
+    T* ret = NULL;
-    cuarray<T> dlocal;
+    cuarray<T> dlocal;
-
+
-    if(dptr==NULL)
+    if(dptr==NULL)
-    {
+    {
-        return ret;
+        return ret;
-    }
+    }
-
+
-    cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
+    cudaMemcpy(&dlocal,dptr,sizeof(cuarray<T>),cudaMemcpyDeviceToHost);
-    ret = dlocal.data;
+    ret = dlocal.data;
-
+
-    dlocal.data = NULL;
+    dlocal.data = NULL;
-    dlocal.length = -1;
+    dlocal.length = -1;
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T>  __device__ __host__ int cuarray<T>::size() const
+template<typename T>  __device__ __host__ int cuarray<T>::size() const
-{
+{
-    return this->length;
+    return this->length;
-}
+}
-
+
-template<typename T>  __device__ __host__ T& cuarray<T>::at(const int I)
+template<typename T>  __device__ __host__ T& cuarray<T>::at(const int I)
-{
+{
-    return this->data[I];
+    return this->data[I];
-}
+}
-
+
-template<typename T>  __device__ __host__ const T& cuarray<T>::at(const int I) const
+template<typename T>  __device__ __host__ const T& cuarray<T>::at(const int I) const
-{
+{
-    return this->data[I];
+    return this->data[I];
-}
+}
-
+
-template<typename T> __device__ __host__ T& cuarray<T>::operator[](const int I)
+template<typename T> __device__ __host__ T& cuarray<T>::operator[](const int I)
-{
+{
-    return this->data[I];
+    return this->data[I];
-}
+}
-
+
-template<typename T> __device__ __host__ const T& cuarray<T>::operator[](const int I) const
+template<typename T> __device__ __host__ const T& cuarray<T>::operator[](const int I) const
-{
+{
-    return this->data[I];
+    return this->data[I];
-}
+}
-
+
-};
+};
-
+
-
+
 #endif
--- a/include/amsculib2/amscuda_binarrrw.hpp
+++ b/include/amsculib2/amscuda_binarrrw.hpp
@ -1,19 +1,19 @@
-#ifndef __AMSCUDA_BINARRRW_HPP__
+#ifndef __AMSCUDA_BINARRRW_HPP__
-#define __AMSCUDA_BINARRRW_HPP__
+#define __AMSCUDA_BINARRRW_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer);
+template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer);
-template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer);
+template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer);
-
+
-template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer);
+template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer);
-template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer);
+template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer);
-
+
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#include <amsculib2/amscuda_binarrrw_impl.hpp>
+#include <amsculib2/amscuda_binarrrw_impl.hpp>
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscuda_binarrrw_impl.hpp
+++ b/include/amsculib2/amscuda_binarrrw_impl.hpp
@ -1,194 +1,194 @@
-#ifndef __AMSCUDA_BINARRRW_IMPL_HPP__
+#ifndef __AMSCUDA_BINARRRW_IMPL_HPP__
-#define __AMSCUDA_BINARRRW_IMPL_HPP__
+#define __AMSCUDA_BINARRRW_IMPL_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer)
+template<typename T> int fread_ndarray(FILE *fp, cuarray<int32_t> *shape, cuarray<T> *buffer)
-{
+{
-    int ret = 1;
+    int ret = 1;
-    int I;
+    int I;
-    long piprod;
+    long piprod;
-    int32_t q;
+    int32_t q;
-    int cnt;
+    int cnt;
-
+
-    int32_t Nd;
+    int32_t Nd;
-
+
-    if(fp!=NULL)
+    if(fp!=NULL)
-    {
+    {
-        if(!feof(fp))
+        if(!feof(fp))
-        {
+        {
-            cnt = fread(&Nd,sizeof(int32_t),1,fp);
+            cnt = fread(&Nd,sizeof(int32_t),1,fp);
-            if(Nd>0 && cnt>0)
+            if(Nd>0 && cnt>0)
-            {
+            {
-                shape->resize(Nd);
+                shape->resize(Nd);
-                piprod = 1;
+                piprod = 1;
-                for(I=0;I<Nd;I++)
+                for(I=0;I<Nd;I++)
-                {
+                {
-                    cnt = fread(&q,sizeof(int32_t),1,fp);
+                    cnt = fread(&q,sizeof(int32_t),1,fp);
-                    shape->at(I) = q;
+                    shape->at(I) = q;
-                    if(q>0)
+                    if(q>0)
-                    {
+                    {
-                        piprod = piprod*q;
+                        piprod = piprod*q;
-                    }
+                    }
-                    else
+                    else
-                    {
+                    {
-                        piprod = 0;
+                        piprod = 0;
-                    }
+                    }
-                }
+                }
-
+
-                buffer->resize(piprod);
+                buffer->resize(piprod);
-                if(piprod>0)
+                if(piprod>0)
-                {
+                {
-                    cnt = fread((buffer->data),sizeof(T),piprod,fp);
+                    cnt = fread((buffer->data),sizeof(T),piprod,fp);
-                    if(piprod==cnt)
+                    if(piprod==cnt)
-                    {
+                    {
-                        ret = 1;
+                        ret = 1;
-                    }
+                    }
-                    else
+                    else
-                    {
+                    {
-                        printf("fread_ndarray, read %d values, expecting %ld\n",cnt,piprod);
+                        printf("fread_ndarray, read %d values, expecting %ld\n",cnt,piprod);
-                        ret = 0;
+                        ret = 0;
-                    }
+                    }
-                }
+                }
-            }
+            }
-            else
+            else
-            {
+            {
-                printf("fread_ndarray: Read a number of dimensions<=0.\n");
+                printf("fread_ndarray: Read a number of dimensions<=0.\n");
-                Nd = 0;
+                Nd = 0;
-                shape->resize(0);
+                shape->resize(0);
-                buffer->resize(0);
+                buffer->resize(0);
-            }
+            }
-        }
+        }
-        else
+        else
-        {
+        {
-            printf("fread_ndarray: fp=NULL.\n");
+            printf("fread_ndarray: fp=NULL.\n");
-            ret = 0;
+            ret = 0;
-        }
+        }
-    }
+    }
-    else
+    else
-    {
+    {
-        ret = 0;
+        ret = 0;
-    }
+    }
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer)
+template<typename T> int fwrite_ndarray(FILE *fp, const cuarray<int32_t> *shape, const cuarray<T> *buffer)
-{
+{
-    int ret = 1;
+    int ret = 1;
-    long piprod;
+    long piprod;
-    int I;
+    int I;
-    int32_t Nd;
+    int32_t Nd;
-
+
-    if(fp==NULL)
+    if(fp==NULL)
-    {
+    {
-        ret = 0;
+        ret = 0;
-        printf("fwrite_ndarray: fp=NULL\n");
+        printf("fwrite_ndarray: fp=NULL\n");
-        return ret;
+        return ret;
-    }
+    }
-
+
-    piprod = 1;
+    piprod = 1;
-    for(I=0;I<shape->size();I++)
+    for(I=0;I<shape->size();I++)
-    {
+    {
-        if(shape->at(I)>0)
+        if(shape->at(I)>0)
-        {
+        {
-            piprod = piprod*shape->at(I);
+            piprod = piprod*shape->at(I);
-        }
+        }
-        else
+        else
-        {
+        {
-            piprod = 0;
+            piprod = 0;
-        }
+        }
-    }
+    }
-
+
-    Nd = (int32_t) shape->size();
+    Nd = (int32_t) shape->size();
-
+
-    if(piprod!=buffer->size())
+    if(piprod!=buffer->size())
-    {
+    {
-        ret = 0;
+        ret = 0;
-        printf("fwrite_ndarray: buffer is size %ld, while shape is size %ld\n",(long)buffer->size(),(long)piprod);
+        printf("fwrite_ndarray: buffer is size %ld, while shape is size %ld\n",(long)buffer->size(),(long)piprod);
-        return ret;
+        return ret;
-    }
+    }
-
+
-    fwrite(&Nd,sizeof(int32_t),1,fp);
+    fwrite(&Nd,sizeof(int32_t),1,fp);
-    if(Nd>0)
+    if(Nd>0)
-    {
+    {
-        fwrite(shape->data,sizeof(int32_t),Nd,fp);
+        fwrite(shape->data,sizeof(int32_t),Nd,fp);
-        if(piprod>0)
+        if(piprod>0)
-        {
+        {
-            fwrite(buffer->data,sizeof(T),buffer->size(),fp);
+            fwrite(buffer->data,sizeof(T),buffer->size(),fp);
-        }
+        }
-    }
+    }
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer)
+template<typename T> int fwrite_buffer(FILE *fp, const int N, const T *buffer)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    int Nd = 1;
+    int Nd = 1;
-    
+    
-    if(fp==NULL)
+    if(fp==NULL)
-    {
+    {
-        ret = 0;
+        ret = 0;
-        printf("fwrite_buffer: fp=NULL\n");
+        printf("fwrite_buffer: fp=NULL\n");
-        return ret;
+        return ret;
-    }
+    }
-
+
-    fwrite(&Nd,sizeof(int32_t),1,fp);
+    fwrite(&Nd,sizeof(int32_t),1,fp);
-    fwrite(&N,sizeof(int32_t),1,fp);
+    fwrite(&N,sizeof(int32_t),1,fp);
-    fwrite(buffer,sizeof(T),N,fp);
+    fwrite(buffer,sizeof(T),N,fp);
-    
+    
-    return ret;
+    return ret;
-}
+}
-
+
-template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer)
+template<typename T> int fread_buffer(FILE *fp, const int Nmax, const T *buffer)
-{
+{
-    int ret = 0;
+    int ret = 0;
-    
+    
-    int cnt;
+    int cnt;
-    int32_t Nd;
+    int32_t Nd;
-    int32_t *dims = NULL;
+    int32_t *dims = NULL;
-    int piprod;
+    int piprod;
-    int32_t q;
+    int32_t q;
-    int I;
+    int I;
-
+
-    int Nr;
+    int Nr;
-
+
-
+
-    if(fp==NULL) {ret = -1; return ret;}
+    if(fp==NULL) {ret = -1; return ret;}
-    if(feof(fp)) {ret = -2; return ret;}
+    if(feof(fp)) {ret = -2; return ret;}
-
+
-    cnt = fread(&Nd,sizeof(int32_t),1,fp);
+    cnt = fread(&Nd,sizeof(int32_t),1,fp);
-    if(Nd>0 && cnt>0)
+    if(Nd>0 && cnt>0)
-    {
+    {
-        piprod = 1;
+        piprod = 1;
-        dims = new(std::nothrow) int32_t[Nd];
+        dims = new(std::nothrow) int32_t[Nd];
-        for(I=0;I<Nd;I++)
+        for(I=0;I<Nd;I++)
-        {
+        {
-            cnt = fread(&q,sizeof(int32_t),1,fp);
+            cnt = fread(&q,sizeof(int32_t),1,fp);
-            dims[I] = q;
+            dims[I] = q;
-            piprod = piprod*dims[I];
+            piprod = piprod*dims[I];
-            if(piprod==cnt)
+            if(piprod==cnt)
-            {
+            {
-                ret = 1;
+                ret = 1;
-            }
+            }
-            else
+            else
-            {
+            {
-                printf("fwrite_buffer, read %d values, expecting %d\n",cnt,piprod);
+                printf("fwrite_buffer, read %d values, expecting %d\n",cnt,piprod);
-            }
+            }
-        }
+        }
-
+
-        Nr = amscuda::min<int32_t>(Nmax,piprod);
+        Nr = amscuda::min<int32_t>(Nmax,piprod);
-        cnt = fread(buffer,sizeof(T),Nr,fp);
+        cnt = fread(buffer,sizeof(T),Nr,fp);
-    }
+    }
-
+
-    if(dims!=NULL) {delete[] dims; dims=NULL;}
+    if(dims!=NULL) {delete[] dims; dims=NULL;}
-
+
-    return ret;
+    return ret;
-}
+}
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscugeom.hpp
+++ b/include/amsculib2/amscugeom.hpp
@ -1,11 +1,11 @@
-#ifndef __AMSCUGEOM_HPP__
+#ifndef __AMSCUGEOM_HPP__
-#define __AMSCUGEOM_HPP__
+#define __AMSCUGEOM_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amsculib2.hpp
+++ b/include/amsculib2/amsculib2.hpp
@ -1,70 +1,70 @@
-#ifndef __AMSCULIB2_HPP__
+#ifndef __AMSCULIB2_HPP__
-#define __AMSCULIB2_HPP__
+#define __AMSCULIB2_HPP__
-
+
-//Std Lib Includes
+//Std Lib Includes
-#include <stdio.h>
+#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
-#include <math.h>
+#include <math.h>
-#include <stdint.h>
+#include <stdint.h>
-#include <time.h>
+#include <time.h>
-#include <new>
+#include <new>
-
+
-#include <cuda_runtime_api.h> //where all the cuda functions live
+#include <cuda_runtime_api.h> //where all the cuda functions live
-#include <cuda_runtime.h>
+#include <cuda_runtime.h>
-#include <cuda.h>
+#include <cuda.h>
-
+
-//Dependencies
+//Dependencies
-
+
-//Predeclarations
+//Predeclarations
-class cuvect2;
+class cuvect2;
-class cuvect3;
+class cuvect3;
-class cuvect4;
+class cuvect4;
-class cuvect2f;
+class cuvect2f;
-class cuvect3f;
+class cuvect3f;
-class cuvect4f;
+class cuvect4f;
-
+
-//Need a way to define the same symbols using both host and device code
+//Need a way to define the same symbols using both host and device code
-//A solution was found here: https://stackoverflow.com/questions/9457572/cuda-host-and-device-using-same-constant-memory
+//A solution was found here: https://stackoverflow.com/questions/9457572/cuda-host-and-device-using-same-constant-memory
-#ifdef __CUDA_ARCH__
+#ifdef __CUDA_ARCH__
-#define AMSCU_CONST __constant__
+#define AMSCU_CONST __constant__
-#else
+#else
-#define AMSCU_CONST
+#define AMSCU_CONST
-#endif
+#endif
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-    //default thread and block execution
+    //default thread and block execution
-    AMSCU_CONST static const int amscu_defnblocks = 256;
+    AMSCU_CONST static const int amscu_defnblocks = 256;
-    AMSCU_CONST static const int amscu_defnthreads = 512;
+    AMSCU_CONST static const int amscu_defnthreads = 512;
-
+
-    //default numthreads to execute on cpu
+    //default numthreads to execute on cpu
-    AMSCU_CONST static const int amscu_defcputhreads = 8;
+    AMSCU_CONST static const int amscu_defcputhreads = 8;
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-//Components
+//Components
-#include <amsculib2/amscu_cudafunctions.hpp>
+#include <amsculib2/amscu_cudafunctions.hpp>
-#include <amsculib2/amscumath.hpp>
+#include <amsculib2/amscumath.hpp>
-#include <amsculib2/amscu_comp64.hpp>
+#include <amsculib2/amscu_comp64.hpp>
-#include <amsculib2/amscu_comp128.hpp>
+#include <amsculib2/amscu_comp128.hpp>
-#include <amsculib2/cuvect2.hpp>
+#include <amsculib2/cuvect2.hpp>
-#include <amsculib2/cuvect3.hpp>
+#include <amsculib2/cuvect3.hpp>
-#include <amsculib2/cuvect4.hpp>
+#include <amsculib2/cuvect4.hpp>
-#include <amsculib2/cuvect2f.hpp>
+#include <amsculib2/cuvect2f.hpp>
-#include <amsculib2/cuvect3f.hpp>
+#include <amsculib2/cuvect3f.hpp>
-#include <amsculib2/cuvect4f.hpp>
+#include <amsculib2/cuvect4f.hpp>
-#include <amsculib2/amscugeom.hpp>
+#include <amsculib2/amscugeom.hpp>
-#include <amsculib2/amscuarray.hpp>
+#include <amsculib2/amscuarray.hpp>
-#include <amsculib2/amscuda_binarrrw.hpp>
+#include <amsculib2/amscuda_binarrrw.hpp>
-#include <amsculib2/amscu_random.hpp>
+#include <amsculib2/amscu_random.hpp>
-
+
-#include <amsculib2/amscuarray_dops.hpp>
+#include <amsculib2/amscuarray_dops.hpp>
-
+
-#include <amsculib2/amscurarray.cuh>
+#include <amsculib2/amscurarray.cuh>
-
+
-
+
-
+
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscumath.hpp
+++ b/include/amsculib2/amscumath.hpp
@ -1,56 +1,56 @@
-#ifndef __AMSCUMATH_HPP__
+#ifndef __AMSCUMATH_HPP__
-#define __AMSCUMATH_HPP__
+#define __AMSCUMATH_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-
+
-
+
-    //Problem: These are not in the namespace
+    //Problem: These are not in the namespace
-    //#define nan NAN
+    //#define nan NAN
-    //#define fnan (float) NAN
+    //#define fnan (float) NAN
-    //#define inf INFINITY
+    //#define inf INFINITY
-    //#define finf (float) INFINITY
+    //#define finf (float) INFINITY
-    //#define pi 3.1415926535897936
+    //#define pi 3.1415926535897936
-
+
-    //These need to be the same symbol for both host and device code
+    //These need to be the same symbol for both host and device code
-    AMSCU_CONST static const double nan = NAN;
+    AMSCU_CONST static const double nan = NAN;
-    AMSCU_CONST static const float fnan = (float) NAN;
+    AMSCU_CONST static const float fnan = (float) NAN;
-    AMSCU_CONST static const double inf = INFINITY;
+    AMSCU_CONST static const double inf = INFINITY;
-    AMSCU_CONST static const float finf = (float) INFINITY;
+    AMSCU_CONST static const float finf = (float) INFINITY;
-    AMSCU_CONST static const double pi = 3.1415926535897936;
+    AMSCU_CONST static const double pi = 3.1415926535897936;
-    AMSCU_CONST static const float pif = 3.1415926535897936;
+    AMSCU_CONST static const float pif = 3.1415926535897936;
-
+
-    __host__ __device__ double dabs(double x);
+    __host__ __device__ double dabs(double x);
-    __host__ __device__ float fabs(float x);
+    __host__ __device__ float fabs(float x);
-    
+    
-    template<typename T> __host__ __device__ T abs(const T in)
+    template<typename T> __host__ __device__ T abs(const T in)
-    {
+    {
-        T ret = in;
+        T ret = in;
-        if(in<0) ret = -in;
+        if(in<0) ret = -in;
-        return ret;
+        return ret;
-    }
+    }
-
+
-    __host__ __device__ double mod(double a, double md);
+    __host__ __device__ double mod(double a, double md);
-    __host__ __device__ float mod(float a, float md);
+    __host__ __device__ float mod(float a, float md);
-    __host__ __device__ int mod(int x, int n);
+    __host__ __device__ int mod(int x, int n);
-    __host__ __device__ long mod(long x, long n);
+    __host__ __device__ long mod(long x, long n);
-
+
-    __host__ __device__ int truediv(int x, int y);
+    __host__ __device__ int truediv(int x, int y);
-    __host__ __device__ long truediv(long x, long y);
+    __host__ __device__ long truediv(long x, long y);
-
+
-    template<typename T> __host__ __device__ T min(T a, T b);
+    template<typename T> __host__ __device__ T min(T a, T b);
-    template<typename T> __host__ __device__ T max(T a, T b);
+    template<typename T> __host__ __device__ T max(T a, T b);
-
+
-    __device__ __host__ double arg(double x, double y);
+    __device__ __host__ double arg(double x, double y);
-    __device__ __host__ void get_azel(double x, double y, double z, double *az, double *el);
+    __device__ __host__ void get_azel(double x, double y, double z, double *az, double *el);
-
+
-    void test_amscumath1();
+    void test_amscumath1();
-
+
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#include <amsculib2/amscumath_impl.hpp>
+#include <amsculib2/amscumath_impl.hpp>
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscumath_impl.hpp
+++ b/include/amsculib2/amscumath_impl.hpp
@ -1,42 +1,42 @@
-#ifndef __AMSCUMATH_IMPL_HPP__
+#ifndef __AMSCUMATH_IMPL_HPP__
-#define __AMSCUMATH_IMPL_HPP__
+#define __AMSCUMATH_IMPL_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-template<typename T> __host__ __device__ T min(T a, T b)
+template<typename T> __host__ __device__ T min(T a, T b)
-{
+{
-    if(a>b)
+    if(a>b)
-    {
+    {
-        return b;
+        return b;
-    }
+    }
-    else
+    else
-    {
+    {
-        return a;
+        return a;
-    }
+    }
-    return a;
+    return a;
-}
+}
-
+
-template<typename T> __host__ __device__ T max(T a, T b)
+template<typename T> __host__ __device__ T max(T a, T b)
-{
+{
-    if(a>b)
+    if(a>b)
-    {
+    {
-        return a;
+        return a;
-    }
+    }
-    else
+    else
-    {
+    {
-        return b;
+        return b;
-    }
+    }
-    return a;
+    return a;
-}
+}
-
+
-template<> __host__ __device__ double min(double a, double b);
+template<> __host__ __device__ double min(double a, double b);
-template<> __host__ __device__ float min(float a, float b);
+template<> __host__ __device__ float min(float a, float b);
-template<> __host__ __device__ double max(double a, double b);
+template<> __host__ __device__ double max(double a, double b);
-template<> __host__ __device__ float max(float a, float b);
+template<> __host__ __device__ float max(float a, float b);
-
+
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/amscurarray.cuh
+++ b/include/amsculib2/amscurarray.cuh
@ -1,66 +1,66 @@
-#ifndef __AMSCURARRAY_HPP__
+#ifndef __AMSCURARRAY_HPP__
-#define __AMSCURARRAY_HPP__
+#define __AMSCURARRAY_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-//Cuda ragged array class
+//Cuda ragged array class
-template<typename T> class curarray
+template<typename T> class curarray
-{
+{
-public:
+public:
-    int device;
+    int device;
-    curarray* devptr; //pointer to mirror class on the device
+    curarray* devptr; //pointer to mirror class on the device
-
+
-    int Narrays; //number of arrays
+    int Narrays; //number of arrays
-    
+    
-    int *N; //dimension of each array
+    int *N; //dimension of each array
-    T** hostarrayptrs; //pointers to each array on the host - null on the device
+    T** hostarrayptrs; //pointers to each array on the host - null on the device
-    T** devarrayptrs; //pointers to each array on the device
+    T** devarrayptrs; //pointers to each array on the device
-        //the double pointer is a host pointer to device pointers on the host class
+        //the double pointer is a host pointer to device pointers on the host class
-        //for the device class, only the second set of arrays is in use
+        //for the device class, only the second set of arrays is in use
-    
+    
-    //the constructor and destructor set all pointers to NULL, they
+    //the constructor and destructor set all pointers to NULL, they
-    // do *not* manage memory. This is done with curarray_new and curarray_delete
+    // do *not* manage memory. This is done with curarray_new and curarray_delete
-    __device__ __host__ curarray();
+    __device__ __host__ curarray();
-    __device__ __host__ ~curarray();
+    __device__ __host__ ~curarray();
-    
+    
-    __host__ int push();
+    __host__ int push();
-    __host__ int pull();
+    __host__ int pull();
-    //__device__ int dev_resizearray(int arraynum, int arraysize);
+    //__device__ int dev_resizearray(int arraynum, int arraysize);
-    __host__ int resizearray(int arraynum, int arraysize);
+    __host__ int resizearray(int arraynum, int arraysize);
-    // I may want a way to resize arrays on the device without pushing/pulling all the array contents
+    // I may want a way to resize arrays on the device without pushing/pulling all the array contents
-    
+    
-
+
-};
+};
-
+
-template<typename T> int curarray_new(curarray<T>** ptr, int Narrays);
+template<typename T> int curarray_new(curarray<T>** ptr, int Narrays);
-
+
-template<typename T> int curarray_delete(curarray<T>** ptr);
+template<typename T> int curarray_delete(curarray<T>** ptr);
-
+
-template<typename T> int curarray_device_new(curarray<T> *hostptr);
+template<typename T> int curarray_device_new(curarray<T> *hostptr);
-
+
-template<typename T> int curarray_device_delete(curarray<T> *hostptr);
+template<typename T> int curarray_device_delete(curarray<T> *hostptr);
-
+
-template<typename T> int curarray_push(curarray<T> *hostptr);
+template<typename T> int curarray_push(curarray<T> *hostptr);
-
+
-template<typename T> int curarray_pull(curarray<T> *hostptr);
+template<typename T> int curarray_pull(curarray<T> *hostptr);
-
+
-
+
-//template<typename T> int curarray_host_fillall(curarray<T> *hostptr, const T &val);
+//template<typename T> int curarray_host_fillall(curarray<T> *hostptr, const T &val);
-//template<typename T> int curarray_device_fillall(curarray<T> *hostptr, const T &val);
+//template<typename T> int curarray_device_fillall(curarray<T> *hostptr, const T &val);
-
+
-
+
-//template<typename T> __host__ int curarray_deletearray(curarray<T> *hostptr, int arrayindex);
+//template<typename T> __host__ int curarray_deletearray(curarray<T> *hostptr, int arrayindex);
-//template<typename T> __device__ int curarray_dev_deletearray(curarray<T> *devptr, int arrayindex);
+//template<typename T> __device__ int curarray_dev_deletearray(curarray<T> *devptr, int arrayindex);
-
+
-//template<typename T> __host__ int curarray_allocarray(curarray<T> *hostptr, int arrayindex, int size);
+//template<typename T> __host__ int curarray_allocarray(curarray<T> *hostptr, int arrayindex, int size);
-//template<typename T> __device__ int curarray_dev_allocarray(curarray<T> *devptr, int arrayindex, int size);
+//template<typename T> __device__ int curarray_dev_allocarray(curarray<T> *devptr, int arrayindex, int size);
-
+
-
+
-void test_amscurarray1();
+void test_amscurarray1();
-
+
-};
+};
-
+
-#include <amsculib2/amscurarray_impl.cuh>
+#include <amsculib2/amscurarray_impl.cuh>
-
+
 #endif
--- a/include/amsculib2/amscurarray_impl.cuh
+++ b/include/amsculib2/amscurarray_impl.cuh
--- a/include/amsculib2/cuvect2.hpp
+++ b/include/amsculib2/cuvect2.hpp
@ -1,84 +1,85 @@
-#ifndef __CUVECT2_HPP__
+#ifndef __CUVECT2_HPP__
-#define __CUVECT2_HPP__
+#define __CUVECT2_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-    class cuvect2
+    class cuvect2
-    {
+    {
-        public:
+        public:
-        double x;
+        double x;
-        double y;
+        double y;
-
+
-
+
-        __host__ __device__ cuvect2();
+        __host__ __device__ cuvect2();
-        __host__ __device__ ~cuvect2();
+        __host__ __device__ ~cuvect2();
-        __host__ __device__ cuvect2(double _x, double _y);
+        __host__ __device__ cuvect2(double _x, double _y);
-
+
-        __host__ __device__ double& operator[](const int I);
+        __host__ __device__ double& operator[](const int I);
-        __host__ __device__ const double& operator[](const int I) const;
+        __host__ __device__ const double& operator[](const int I) const;
-
+
-        __host__ __device__ cuvect2 operator+(cuvect2 lhs);
+        __host__ __device__ cuvect2 operator+(cuvect2 lhs);
-        __host__ __device__ cuvect2 operator-(cuvect2 lhs);
+        __host__ __device__ cuvect2 operator-(cuvect2 lhs);
-        __host__ __device__ cuvect2 operator*(double lhs);
+        __host__ __device__ cuvect2 operator*(double lhs);
-        __host__ __device__ cuvect2 operator/(double lhs);
+        __host__ __device__ cuvect2 operator/(double lhs);
-    };
+    };
-
+
-    class cumat2
+    class cumat2
-    {
+    {
-        public:
+        public:
-        double dat[4];
+        double dat[4];
-
+
-        __host__ __device__ cumat2();
+        __host__ __device__ cumat2();
-        __host__ __device__ ~cumat2();
+        __host__ __device__ ~cumat2();
-        __host__ __device__ double& operator[](const int I);
+        __host__ __device__ double& operator[](const int I);
-        __host__ __device__ double& operator()(const int I, const int J);
+        __host__ __device__ double& operator()(const int I, const int J);
-        __host__ __device__ double& at(const int I, const int J);
+        __host__ __device__ double& at(const int I, const int J);
-
+
-        __host__ __device__ cumat2 operator+(cumat2 lhs);
+        __host__ __device__ cumat2 operator+(cumat2 lhs);
-        __host__ __device__ cumat2 operator-(cumat2 lhs);
+        __host__ __device__ cumat2 operator-(cumat2 lhs);
-        __host__ __device__ cumat2 operator*(double lhs);
+        __host__ __device__ cumat2 operator*(double lhs);
-        __host__ __device__ cumat2 operator/(double lhs);
+        __host__ __device__ cumat2 operator/(double lhs);
-        __host__ __device__ cuvect2 operator*(cuvect2 lhs);
+        __host__ __device__ cuvect2 operator*(cuvect2 lhs);
-        __host__ __device__ cumat2 operator*(cumat2 lhs);
+        __host__ __device__ cumat2 operator*(cumat2 lhs);
-
+
-        __host__ __device__ double det();
+        __host__ __device__ double det();
-        __host__ __device__ cumat2 transpose();
+        __host__ __device__ cumat2 transpose();
-        __host__ __device__ cumat2 inverse();
+        __host__ __device__ cumat2 inverse();
-    };
+    };
-
+
-    __host__ __device__ double cuvect2_dot(cuvect2 a, cuvect2 b);
+    __host__ __device__ double cuvect2_dot(cuvect2 a, cuvect2 b);
-    __host__ __device__ double cuvect2_cross(cuvect2 a, cuvect2 b);
+    __host__ __device__ double cuvect2_cross(cuvect2 a, cuvect2 b);
-    __host__ __device__ double cuvect2_norm(cuvect2 a);
+    __host__ __device__ double cuvect2_norm(cuvect2 a);
-    __host__ __device__ cuvect2 cuvect2_normalize(cuvect2 a);
+    __host__ __device__ cuvect2 cuvect2_normalize(cuvect2 a);
-    __host__ __device__ cuvect2 cuvect2_proj(cuvect2 a, cuvect2 b);
+    __host__ __device__ cuvect2 cuvect2_proj(cuvect2 a, cuvect2 b);
-
+
-    //2x2 matrix operations
+    //2x2 matrix operations
-    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
+    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
-
+
-    //transpose a 2x2 matrix in place
+    //transpose a 2x2 matrix in place
-    __host__ __device__ void mat2_transpose(double *mat2inout);
+    __host__ __device__ void mat2_transpose(double *mat2inout);
-    
+    
-    //copies src to dest
+    //copies src to dest
-    __host__ __device__ void mat2_copy(double *mat2_dest, const double *mat2_src);
+    __host__ __device__ void mat2_copy(double *mat2_dest, const double *mat2_src);
-    
+    
-    //inverts mat?inout[4]
+    //inverts mat?inout[4]
-    __host__ __device__ void mat2_inverse(double *mat2inout);
+    __host__ __device__ void mat2_inverse(double *mat2inout);
-    
+    
-    //rotatin matrix from angle
+    //rotatin matrix from angle
-    __host__ __device__ void mat2_rot_from_angle(double angle, double *mat2);
+    __host__ __device__ void mat2_rot_from_angle(double angle, double *mat2);
-
+
-    //multiplies c = a*b
+    //multiplies c = a*b
-    __host__ __device__ void mat2_mult(double *mat2a, double *mat2b, double *mat2c);
+    __host__ __device__ void mat2_mult(double *mat2a, double *mat2b, double *mat2c);
-
+
-    // ret = a*b
+    // ret = a*b
-    __host__ __device__ cuvect2 mat2_mult(double *mat2a, cuvect2 b);
+    __host__ __device__ cuvect2 mat2_mult(double *mat2a, cuvect2 b);
-
+
-
+
-    void test_cuvect2_1();
+
-    
+    void test_cuvect2_1();
-
+    
-}; //end namespace amscuda
+
-
+}; //end namespace amscuda
-#endif
+
-
+#endif
--- a/include/amsculib2/cuvect2f.hpp
+++ b/include/amsculib2/cuvect2f.hpp
@ -1,84 +1,85 @@
-#ifndef __CUVECT2F_HPP__
+#ifndef __CUVECT2F_HPP__
-#define __CUVECT2F_HPP__
+#define __CUVECT2F_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-    class cuvect2f
+    class cuvect2f
-    {
+    {
-        public:
+        public:
-        float x;
+        float x;
-        float y;
+        float y;
-
+
-
+
-        __host__ __device__ cuvect2f();
+        __host__ __device__ cuvect2f();
-        __host__ __device__ ~cuvect2f();
+        __host__ __device__ ~cuvect2f();
-        __host__ __device__ cuvect2f(float _x, float _y);
+        __host__ __device__ cuvect2f(float _x, float _y);
-
+
-        __host__ __device__ float& operator[](const int I);
+        __host__ __device__ float& operator[](const int I);
-        __host__ __device__ const float& operator[](const int I) const;
+        __host__ __device__ const float& operator[](const int I) const;
-
+
-        __host__ __device__ cuvect2f operator+(cuvect2f lhs);
+        __host__ __device__ cuvect2f operator+(cuvect2f lhs);
-        __host__ __device__ cuvect2f operator-(cuvect2f lhs);
+        __host__ __device__ cuvect2f operator-(cuvect2f lhs);
-        __host__ __device__ cuvect2f operator*(float lhs);
+        __host__ __device__ cuvect2f operator*(float lhs);
-        __host__ __device__ cuvect2f operator/(float lhs);
+        __host__ __device__ cuvect2f operator/(float lhs);
-    };
+        __host__ __device__ friend cuvect2f operator-(cuvect2f rhs);
-
+    };
-    class cumat2f
+
-    {
+    class cumat2f
-        public:
+    {
-        float dat[4];
+        public:
-
+        float dat[4];
-        __host__ __device__ cumat2f();
+
-        __host__ __device__ ~cumat2f();
+        __host__ __device__ cumat2f();
-        __host__ __device__ float& operator[](const int I);
+        __host__ __device__ ~cumat2f();
-        __host__ __device__ float& operator()(const int I, const int J);
+        __host__ __device__ float& operator[](const int I);
-        __host__ __device__ float& at(const int I, const int J);
+        __host__ __device__ float& operator()(const int I, const int J);
-
+        __host__ __device__ float& at(const int I, const int J);
-        __host__ __device__ cumat2f operator+(cumat2f lhs);
+
-        __host__ __device__ cumat2f operator-(cumat2f lhs);
+        __host__ __device__ cumat2f operator+(cumat2f lhs);
-        __host__ __device__ cumat2f operator*(float lhs);
+        __host__ __device__ cumat2f operator-(cumat2f lhs);
-        __host__ __device__ cumat2f operator/(float lhs);
+        __host__ __device__ cumat2f operator*(float lhs);
-        __host__ __device__ cuvect2f operator*(cuvect2f lhs);
+        __host__ __device__ cumat2f operator/(float lhs);
-        __host__ __device__ cumat2f operator*(cumat2f lhs);
+        __host__ __device__ cuvect2f operator*(cuvect2f lhs);
-
+        __host__ __device__ cumat2f operator*(cumat2f lhs);
-        __host__ __device__ float det();
+
-        __host__ __device__ cumat2f transpose();
+        __host__ __device__ float det();
-        __host__ __device__ cumat2f inverse();
+        __host__ __device__ cumat2f transpose();
-    };
+        __host__ __device__ cumat2f inverse();
-
+    };
-    __host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b);
+
-    __host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b);
+    __host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b);
-    __host__ __device__ float cuvect2f_norm(cuvect2f a);
+    __host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b);
-    __host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a);
+    __host__ __device__ float cuvect2f_norm(cuvect2f a);
-    __host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b);
+    __host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a);
-
+    __host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b);
-    //2x2 matrix operations
+
-    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
+    //2x2 matrix operations
-
+    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
-    //transpose a 2x2 matrix in place
+
-    __host__ __device__ void mat2f_transpose(float *mat2inout);
+    //transpose a 2x2 matrix in place
-    
+    __host__ __device__ void mat2f_transpose(float *mat2inout);
-    //copies src to dest
+    
-    __host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src);
+    //copies src to dest
-    
+    __host__ __device__ void mat2f_copy(float *mat2f_dest, const float *mat2f_src);
-    //inverts mat?inout[4]
+    
-    __host__ __device__ void mat2f_inverse(float *mat2inout);
+    //inverts mat?inout[4]
-    
+    __host__ __device__ void mat2f_inverse(float *mat2inout);
-    //rotatin matrix from angle
+    
-    __host__ __device__ void mat2f_rot_from_angle(float angle, float *mat2);
+    //rotatin matrix from angle
-
+    __host__ __device__ void mat2f_rot_from_angle(float angle, float *mat2);
-    //multiplies c = a*b
+
-    __host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c);
+    //multiplies c = a*b
-
+    __host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c);
-    // ret = a*b
+
-    __host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b);
+    // ret = a*b
-
+    __host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b);
-
+
-    void test_cuvect2f_1();
+
-
+    void test_cuvect2f_1();
-
+
-};
+
-
+};
-#endif
+
-
+#endif
--- a/include/amsculib2/cuvect3.hpp
+++ b/include/amsculib2/cuvect3.hpp
@ -1,86 +1,86 @@
-#ifndef __CUVECT3_HPP__
+#ifndef __CUVECT3_HPP__
-#define __CUVECT3_HPP__
+#define __CUVECT3_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-    class cuvect3
+    class cuvect3
-    {
+    {
-        public:
+        public:
-        double x;
+        double x;
-        double y;
+        double y;
-        double z;
+        double z;
-
+
-        __host__ __device__ cuvect3();
+        __host__ __device__ cuvect3();
-        __host__ __device__ ~cuvect3();
+        __host__ __device__ ~cuvect3();
-        __host__ __device__ cuvect3(double _x, double _y, double _z);
+        __host__ __device__ cuvect3(double _x, double _y, double _z);
-        
+        
-        
+        
-        __host__ __device__ double& operator[](const int I);
+        __host__ __device__ double& operator[](const int I);
-        __host__ __device__ const double& operator[](const int I) const;
+        __host__ __device__ const double& operator[](const int I) const;
-
+
-        __host__ __device__ cuvect3 operator+(cuvect3 lhs);
+        __host__ __device__ cuvect3 operator+(cuvect3 lhs);
-        __host__ __device__ cuvect3 operator-(cuvect3 lhs);
+        __host__ __device__ cuvect3 operator-(cuvect3 lhs);
-        __host__ __device__ cuvect3 operator*(double lhs);
+        __host__ __device__ cuvect3 operator*(double lhs);
-        __host__ __device__ cuvect3 operator/(double lhs);
+        __host__ __device__ cuvect3 operator/(double lhs);
-    };
+    };
-
+
-    class cumat3
+    class cumat3
-    {
+    {
-        public:
+        public:
-        double dat[9];
+        double dat[9];
-
+
-        __host__ __device__ cumat3();
+        __host__ __device__ cumat3();
-        __host__ __device__ ~cumat3();
+        __host__ __device__ ~cumat3();
-        __host__ __device__ double& operator[](const int I);
+        __host__ __device__ double& operator[](const int I);
-        __host__ __device__ double& operator()(const int I, const int J);
+        __host__ __device__ double& operator()(const int I, const int J);
-        __host__ __device__ double& at(const int I, const int J);
+        __host__ __device__ double& at(const int I, const int J);
-
+
-        __host__ __device__ cumat3 operator+(cumat3 lhs);
+        __host__ __device__ cumat3 operator+(cumat3 lhs);
-        __host__ __device__ cumat3 operator-(cumat3 lhs);
+        __host__ __device__ cumat3 operator-(cumat3 lhs);
-        __host__ __device__ cumat3 operator*(double lhs);
+        __host__ __device__ cumat3 operator*(double lhs);
-        __host__ __device__ cumat3 operator/(double lhs);
+        __host__ __device__ cumat3 operator/(double lhs);
-        __host__ __device__ cuvect3 operator*(cuvect3 lhs);
+        __host__ __device__ cuvect3 operator*(cuvect3 lhs);
-        __host__ __device__ cumat3 operator*(cumat3 lhs);
+        __host__ __device__ cumat3 operator*(cumat3 lhs);
-
+
-        __host__ __device__ double det();
+        __host__ __device__ double det();
-        __host__ __device__ cumat3 transpose();
+        __host__ __device__ cumat3 transpose();
-        __host__ __device__ cumat3 inverse();
+        __host__ __device__ cumat3 inverse();
-    };
+    };
-
+
-    __host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b);
+    __host__ __device__ double cuvect3_dot(cuvect3 a, cuvect3 b);
-    __host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b);
+    __host__ __device__ cuvect3 cuvect3_cross(cuvect3 a, cuvect3 b);
-    __host__ __device__ double cuvect3_norm(cuvect3 a);
+    __host__ __device__ double cuvect3_norm(cuvect3 a);
-    __host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a);
+    __host__ __device__ cuvect3 cuvect3_normalize(cuvect3 a);
-    __host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b);
+    __host__ __device__ cuvect3 cuvect3_proj(cuvect3 a, cuvect3 b);
-
+
-    //3x3 matrix operations
+    //3x3 matrix operations
-    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
+    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
-    
+    
-    //transposes a 3x3 (9 element) matrix
+    //transposes a 3x3 (9 element) matrix
-    __host__ __device__ void mat3_transpose(double *mat3inout);
+    __host__ __device__ void mat3_transpose(double *mat3inout);
-
+
-    //copies src to dest
+    //copies src to dest
-    __host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src);
+    __host__ __device__ void mat3_copy(double *mat3_dest, const double *mat3_src);
-
+
-    //returns determinant of 3x3 matrix
+    //returns determinant of 3x3 matrix
-    __host__ __device__ double mat3_det(double *mat3in);
+    __host__ __device__ double mat3_det(double *mat3in);
-
+
-    //inverts a 3x3 (9 element) matrix
+    //inverts a 3x3 (9 element) matrix
-    __host__ __device__ void mat3_inverse(double *mat3inout);
+    __host__ __device__ void mat3_inverse(double *mat3inout);
-
+
-    __host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin);
+    __host__ __device__ cuvect3 mat3_mult(double *mat3in, cuvect3 cvin);
-    __host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout);
+    __host__ __device__ void mat3_mult(double *matina, double *matinb, double *matout);
-
+
-    __host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout);
+    __host__ __device__ void mat3_hodgedual(cuvect3 vecin, double *matout);
-    __host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout);
+    __host__ __device__ void mat3_hodgedual(double *matin, cuvect3 vecout);
-
+
-    //returns direction cosine rotation matrix from axis and angle
+    //returns direction cosine rotation matrix from axis and angle
-    __host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout);
+    __host__ __device__ void mat3_rot_from_axisangle(cuvect3 axis, double angle, double *matout);
-
+
-    __host__ void test_cudavect_logic1();
+    __host__ void test_cudavect_logic1();
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/cuvect3f.hpp
+++ b/include/amsculib2/cuvect3f.hpp
@ -1,86 +1,87 @@
-#ifndef __CUVECT3F_HPP__
+#ifndef __CUVECT3F_HPP__
-#define __CUVECT3F_HPP__
+#define __CUVECT3F_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-    class cuvect3f
+    class cuvect3f
-    {
+    {
-        public:
+        public:
-        float x;
+        float x;
-        float y;
+        float y;
-        float z;
+        float z;
-
+
-        __host__ __device__ cuvect3f();
+        __host__ __device__ cuvect3f();
-        __host__ __device__ ~cuvect3f();
+        __host__ __device__ ~cuvect3f();
-        __host__ __device__ cuvect3f(float _x, float _y, float _z);
+        __host__ __device__ cuvect3f(float _x, float _y, float _z);
-        
+        
-        
+        
-        __host__ __device__ float& operator[](const int I);
+        __host__ __device__ float& operator[](const int I);
-        __host__ __device__ const float& operator[](const int I) const;
+        __host__ __device__ const float& operator[](const int I) const;
-
+
-        __host__ __device__ cuvect3f operator+(cuvect3f lhs);
+        __host__ __device__ cuvect3f operator+(cuvect3f lhs);
-        __host__ __device__ cuvect3f operator-(cuvect3f lhs);
+        __host__ __device__ cuvect3f operator-(cuvect3f lhs);
-        __host__ __device__ cuvect3f operator*(float lhs);
+        __host__ __device__ cuvect3f operator*(float lhs);
-        __host__ __device__ cuvect3f operator/(float lhs);
+        __host__ __device__ cuvect3f operator/(float lhs);
-    };
+        __host__ __device__ friend cuvect3f operator-(cuvect3f rhs);
-
+    };
-    class cumat3f
+
-    {
+    class cumat3f
-        public:
+    {
-        float dat[9];
+        public:
-
+        float dat[9];
-        __host__ __device__ cumat3f();
+
-        __host__ __device__ ~cumat3f();
+        __host__ __device__ cumat3f();
-        __host__ __device__ float& operator[](const int I);
+        __host__ __device__ ~cumat3f();
-        __host__ __device__ float& operator()(const int I, const int J);
+        __host__ __device__ float& operator[](const int I);
-        __host__ __device__ float& at(const int I, const int J);
+        __host__ __device__ float& operator()(const int I, const int J);
-
+        __host__ __device__ float& at(const int I, const int J);
-        __host__ __device__ cumat3f operator+(cumat3f lhs);
+
-        __host__ __device__ cumat3f operator-(cumat3f lhs);
+        __host__ __device__ cumat3f operator+(cumat3f lhs);
-        __host__ __device__ cumat3f operator*(float lhs);
+        __host__ __device__ cumat3f operator-(cumat3f lhs);
-        __host__ __device__ cumat3f operator/(float lhs);
+        __host__ __device__ cumat3f operator*(float lhs);
-        __host__ __device__ cuvect3f operator*(cuvect3f lhs);
+        __host__ __device__ cumat3f operator/(float lhs);
-        __host__ __device__ cumat3f operator*(cumat3f lhs);
+        __host__ __device__ cuvect3f operator*(cuvect3f lhs);
-
+        __host__ __device__ cumat3f operator*(cumat3f lhs);
-        __host__ __device__ float det();
+
-        __host__ __device__ cumat3f transpose();
+        __host__ __device__ float det();
-        __host__ __device__ cumat3f inverse();
+        __host__ __device__ cumat3f transpose();
-    };
+        __host__ __device__ cumat3f inverse();
-
+    };
-    __host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b);
+
-    __host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b);
+    __host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b);
-    __host__ __device__ float cuvect3f_norm(cuvect3f a);
+    __host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b);
-    __host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a);
+    __host__ __device__ float cuvect3f_norm(cuvect3f a);
-    __host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b);
+    __host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a);
-
+    __host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b);
-    //3x3 matrix operations
+
-    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
+    //3x3 matrix operations
-    
+    //matrix order is assumed to be mat[I,J] = mat[I+3*J]
-    //transposes a 3x3 (9 element) matrix
+    
-    __host__ __device__ void mat3f_transpose(float *mat3inout);
+    //transposes a 3x3 (9 element) matrix
-
+    __host__ __device__ void mat3f_transpose(float *mat3inout);
-    //copies src to dest
+
-    __host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src);
+    //copies src to dest
-
+    __host__ __device__ void mat3f_copy(float *mat3f_dest, const float *mat3f_src);
-    //returns determinant of 3x3 matrix
+
-    __host__ __device__ float mat3f_det(float *mat3in);
+    //returns determinant of 3x3 matrix
-
+    __host__ __device__ float mat3f_det(float *mat3in);
-    //inverts a 3x3 (9 element) matrix
+
-    __host__ __device__ void mat3f_inverse(float *mat3inout);
+    //inverts a 3x3 (9 element) matrix
-
+    __host__ __device__ void mat3f_inverse(float *mat3inout);
-    __host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin);
+
-    __host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout);
+    __host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin);
-
+    __host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout);
-    __host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout);
+
-    __host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout);
+    __host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout);
-
+    __host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout);
-    //returns direction cosine rotation matrix from axis and angle
+
-    __host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout);
+    //returns direction cosine rotation matrix from axis and angle
-
+    __host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout);
-    __host__ void test_cudavectf_logic1();
+
-
+    __host__ void test_cudavectf_logic1();
-};
+
-
+};
-#endif
+
-
+#endif
--- a/include/amsculib2/cuvect4.hpp
+++ b/include/amsculib2/cuvect4.hpp
@ -1,59 +1,59 @@
-#ifndef __CUVECT4_HPP__
+#ifndef __CUVECT4_HPP__
-#define __CUVECT4_HPP__
+#define __CUVECT4_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-    class cuvect4
+    class cuvect4
-    {
+    {
-        public:
+        public:
-        double x;
+        double x;
-        double y;
+        double y;
-        double z;
+        double z;
-        double w;
+        double w;
-
+
-        __host__ __device__ cuvect4();
+        __host__ __device__ cuvect4();
-        __host__ __device__ ~cuvect4();
+        __host__ __device__ ~cuvect4();
-        __host__ __device__ cuvect4(double _x, double _y, double _z, double _w);
+        __host__ __device__ cuvect4(double _x, double _y, double _z, double _w);
-                
+                
-        __host__ __device__ double& operator[](const int I);
+        __host__ __device__ double& operator[](const int I);
-        __host__ __device__ const double& operator[](const int I) const;
+        __host__ __device__ const double& operator[](const int I) const;
-
+
-        __host__ __device__ cuvect4 operator+(cuvect4 lhs);
+        __host__ __device__ cuvect4 operator+(cuvect4 lhs);
-        __host__ __device__ cuvect4 operator-(cuvect4 lhs);
+        __host__ __device__ cuvect4 operator-(cuvect4 lhs);
-        __host__ __device__ cuvect4 operator*(double lhs);
+        __host__ __device__ cuvect4 operator*(double lhs);
-        __host__ __device__ cuvect4 operator/(double lhs);
+        __host__ __device__ cuvect4 operator/(double lhs);
-    };
+    };
-
+
-    class cumat4
+    class cumat4
-    {
+    {
-        public:
+        public:
-        double dat[16];
+        double dat[16];
-
+
-        __host__ __device__ cumat4();
+        __host__ __device__ cumat4();
-        __host__ __device__ ~cumat4();
+        __host__ __device__ ~cumat4();
-        __host__ __device__ double& operator[](const int I);
+        __host__ __device__ double& operator[](const int I);
-        __host__ __device__ double& operator()(const int I, const int J);
+        __host__ __device__ double& operator()(const int I, const int J);
-        __host__ __device__ double& at(const int I, const int J);
+        __host__ __device__ double& at(const int I, const int J);
-
+
-        __host__ __device__ cumat4 operator+(cumat4 lhs);
+        __host__ __device__ cumat4 operator+(cumat4 lhs);
-        __host__ __device__ cumat4 operator-(cumat4 lhs);
+        __host__ __device__ cumat4 operator-(cumat4 lhs);
-        __host__ __device__ cumat4 operator*(double lhs);
+        __host__ __device__ cumat4 operator*(double lhs);
-        __host__ __device__ cumat4 operator/(double lhs);
+        __host__ __device__ cumat4 operator/(double lhs);
-        __host__ __device__ cuvect4 operator*(cuvect4 lhs);
+        __host__ __device__ cuvect4 operator*(cuvect4 lhs);
-        __host__ __device__ cumat4 operator*(cumat4 lhs);
+        __host__ __device__ cumat4 operator*(cumat4 lhs);
-
+
-        __host__ __device__ double det();
+        __host__ __device__ double det();
-        __host__ __device__ cumat4 transpose();
+        __host__ __device__ cumat4 transpose();
-        __host__ __device__ cumat4 inverse();
+        __host__ __device__ cumat4 inverse();
-    };
+    };
-
+
-    __host__ __device__ double cuvect4_dot(cuvect4 a, cuvect4 b);
+    __host__ __device__ double cuvect4_dot(cuvect4 a, cuvect4 b);
-    __host__ __device__ double cuvect4_norm(cuvect4 a);
+    __host__ __device__ double cuvect4_norm(cuvect4 a);
-    __host__ __device__ cuvect4 cuvect4_normalize(cuvect4 a);
+    __host__ __device__ cuvect4 cuvect4_normalize(cuvect4 a);
-    __host__ __device__ cuvect4 cuvect4_proj(cuvect4 a, cuvect4 b);
+    __host__ __device__ cuvect4 cuvect4_proj(cuvect4 a, cuvect4 b);
-
+
-}; //end namespace amscuda
+}; //end namespace amscuda
-
+
-#endif
+#endif
-
+
--- a/include/amsculib2/cuvect4f.hpp
+++ b/include/amsculib2/cuvect4f.hpp
@ -1,60 +1,61 @@
-#ifndef __CUVECT4F_HPP__
+#ifndef __CUVECT4F_HPP__
-#define __CUVECT4F_HPP__
+#define __CUVECT4F_HPP__
-
+
-namespace amscuda
+namespace amscuda
-{
+{
-
+
-    class cuvect4f
+    class cuvect4f
-    {
+    {
-        public:
+        public:
-        float x;
+        float x;
-        float y;
+        float y;
-        float z;
+        float z;
-        float w;
+        float w;
-
+
-        __host__ __device__ cuvect4f();
+        __host__ __device__ cuvect4f();
-        __host__ __device__ ~cuvect4f();
+        __host__ __device__ ~cuvect4f();
-        __host__ __device__ cuvect4f(float _x, float _y, float _z, float _w);
+        __host__ __device__ cuvect4f(float _x, float _y, float _z, float _w);
-                
+                
-        __host__ __device__ float& operator[](const int I);
+        __host__ __device__ float& operator[](const int I);
-        __host__ __device__ const float& operator[](const int I) const;
+        __host__ __device__ const float& operator[](const int I) const;
-
+
-        __host__ __device__ cuvect4f operator+(cuvect4f lhs);
+        __host__ __device__ cuvect4f operator+(cuvect4f lhs);
-        __host__ __device__ cuvect4f operator-(cuvect4f lhs);
+        __host__ __device__ cuvect4f operator-(cuvect4f lhs);
-        __host__ __device__ cuvect4f operator*(float lhs);
+        __host__ __device__ cuvect4f operator*(float lhs);
-        __host__ __device__ cuvect4f operator/(float lhs);
+        __host__ __device__ cuvect4f operator/(float lhs);
-    };
+        __host__ __device__ friend cuvect4f operator-(cuvect4f rhs);
-
+    };
-    class cumat4f
+
-    {
+    class cumat4f
-        public:
+    {
-        float dat[16];
+        public:
-
+        float dat[16];
-        __host__ __device__ cumat4f();
+
-        __host__ __device__ ~cumat4f();
+        __host__ __device__ cumat4f();
-        __host__ __device__ float& operator[](const int I);
+        __host__ __device__ ~cumat4f();
-        __host__ __device__ float& operator()(const int I, const int J);
+        __host__ __device__ float& operator[](const int I);
-        __host__ __device__ float& at(const int I, const int J);
+        __host__ __device__ float& operator()(const int I, const int J);
-
+        __host__ __device__ float& at(const int I, const int J);
-        __host__ __device__ cumat4f operator+(cumat4f lhs);
+
-        __host__ __device__ cumat4f operator-(cumat4f lhs);
+        __host__ __device__ cumat4f operator+(cumat4f lhs);
-        __host__ __device__ cumat4f operator*(float lhs);
+        __host__ __device__ cumat4f operator-(cumat4f lhs);
-        __host__ __device__ cumat4f operator/(float lhs);
+        __host__ __device__ cumat4f operator*(float lhs);
-        __host__ __device__ cuvect4f operator*(cuvect4f lhs);
+        __host__ __device__ cumat4f operator/(float lhs);
-        __host__ __device__ cumat4f operator*(cumat4f lhs);
+        __host__ __device__ cuvect4f operator*(cuvect4f lhs);
-
+        __host__ __device__ cumat4f operator*(cumat4f lhs);
-        __host__ __device__ float det();
+
-        __host__ __device__ cumat4f transpose();
+        __host__ __device__ float det();
-        __host__ __device__ cumat4f inverse();
+        __host__ __device__ cumat4f transpose();
-    };
+        __host__ __device__ cumat4f inverse();
-
+    };
-    __host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b);
+
-    __host__ __device__ float cuvect4f_norm(cuvect4f a);
+    __host__ __device__ float cuvect4f_dot(cuvect4f a, cuvect4f b);
-    __host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a);
+    __host__ __device__ float cuvect4f_norm(cuvect4f a);
-    __host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b);
+    __host__ __device__ cuvect4f cuvect4f_normalize(cuvect4f a);
-
+    __host__ __device__ cuvect4f cuvect4f_proj(cuvect4f a, cuvect4f b);
-
+
-};
+
-
+};
-#endif
+
-
+#endif
--- a/make_linux.py
+++ b/make_linux.py
@ -0,0 +1,22 @@
 #!/usr/bin/python3 
 import os,sys,math
 from build.amsbuildlib4 import *
 if(len(sys.argv)>=2):
    if(sys.argv[1]=="clean"):
        obj_list = flist('./build_linux64',recurse=True,exts=['.o'])
        for o in obj_list:
            os.remove('{}'.format(o))
    exit()
 os.system('python3 ./build/make.linux64.lib.py')
 os.system('python3 ./build/make.linux64.test.py')
 obj_list = flist('./src',recurse=True,exts=['.o'])
 for o in obj_list:
    os.remove('{}'.format(o))
 os.chdir('./build_linux64')
 callproc('./test')
 os.chdir('..')
--- a/make_mingw.py
+++ b/make_mingw.py
@ -0,0 +1,28 @@
 #!/usr/bin/python3
 #!/usr/bin/python3 
 import os,sys,math
 from build.amsbuildlib4 import *
 if(len(sys.argv)>=2):
    if(sys.argv[1]=="clean"):
        obj_list = flist('./build_mingw64',recurse=True,exts=['.o'])
        for o in obj_list:
            os.remove('{}'.format(o))
    exit()
 os.system('python3 ./build/make.mingw64.lib.py')
 os.system('python3 ./build/make.mingw64.test.py')
 obj_list = flist('./src',recurse=True,exts=['.o','.obj'])
 for o in obj_list:
    os.remove('{}'.format(o))
 if(sys.platform!="win32"):
    os.chdir('./build_mingw64')
    callproc('wine ./test.exe')
    os.chdir('..')
 else:
    os.chdir('./build_mingw64')
    callproc('test.exe')
    os.chdir('..')
--- a/objstore/amscu_comp128.o
+++ b/objstore/amscu_comp128.o
--- a/objstore/amscu_comp64.o
+++ b/objstore/amscu_comp64.o
--- a/objstore/amscu_cudafunctions.o
+++ b/objstore/amscu_cudafunctions.o
--- a/objstore/amscu_random.o
+++ b/objstore/amscu_random.o
--- a/objstore/amscuarray.o
+++ b/objstore/amscuarray.o
--- a/objstore/amscuarray_dops.o
+++ b/objstore/amscuarray_dops.o
--- a/objstore/amscugeom.o
+++ b/objstore/amscugeom.o
--- a/objstore/amsculib2.o
+++ b/objstore/amsculib2.o
--- a/objstore/amscumath.o
+++ b/objstore/amscumath.o
--- a/objstore/amscurarray.o
+++ b/objstore/amscurarray.o
--- a/objstore/cuvect2.o
+++ b/objstore/cuvect2.o
--- a/objstore/cuvect2f.o
+++ b/objstore/cuvect2f.o
--- a/objstore/cuvect3.o
+++ b/objstore/cuvect3.o
--- a/objstore/cuvect3f.o
+++ b/objstore/cuvect3f.o
--- a/objstore/cuvect4.o
+++ b/objstore/cuvect4.o
--- a/objstore/cuvect4f.o
+++ b/objstore/cuvect4f.o
--- a/old/bin_linux64/libamsculib2.linux64.a
+++ b/old/bin_linux64/libamsculib2.linux64.a
--- a/old/bin_linux64/test
+++ b/old/bin_linux64/test
--- a/old/bin_winx64/libamsculib2.msvc64.lib
+++ b/old/bin_winx64/libamsculib2.msvc64.lib
--- a/old/bin_winx64/test.exe
+++ b/old/bin_winx64/test.exe
--- a/old/bin_winx64/test.exp
+++ b/old/bin_winx64/test.exp
--- a/old/bin_winx64/test.lib
+++ b/old/bin_winx64/test.lib
--- a/old/compscripts/pycache/complib2.cpython-310.pyc
+++ b/old/compscripts/pycache/complib2.cpython-310.pyc
--- a/old/compscripts/pycache/complib2.cpython-312.pyc
+++ b/old/compscripts/pycache/complib2.cpython-312.pyc
--- a/old/compscripts/pycache/complib2.cpython-36.pyc
+++ b/old/compscripts/pycache/complib2.cpython-36.pyc
--- a/old/compscripts/pycache/complib2.cpython-38.pyc
+++ b/old/compscripts/pycache/complib2.cpython-38.pyc
--- a/old/compscripts/pycache/complib2.cpython-39.pyc
+++ b/old/compscripts/pycache/complib2.cpython-39.pyc
--- a/old/compscripts/pycache/complib3.cpython-310.pyc
+++ b/old/compscripts/pycache/complib3.cpython-310.pyc
--- a/old/compscripts/pycache/complib3.cpython-312.pyc
+++ b/old/compscripts/pycache/complib3.cpython-312.pyc
--- a/old/compscripts/pycache/complib3.cpython-39.pyc
+++ b/old/compscripts/pycache/complib3.cpython-39.pyc
--- a/old/compscripts/complib2.py
+++ b/old/compscripts/complib2.py
@ -0,0 +1,639 @@
 #!/usr/bin/python3
 #Python3 compilation library
 #Aaron M. Schinder
 #29 Dec 2020
 #
 #Cleanup and refactor from 2017 python2 version compilation libraries
 import os,sys,math,subprocess
 #####################
 #Directory Functions#
 #####################
 ##flist - list all files in a given directory pth
 ##optional arguments:
 #   recurse - (T/F): Whether to recursively search for files in directory tree
 #   exts - (list): A list of file extensions to filter on
 #   normpath (T/F): whether to normalize path variables after
 #filelist = flist(pth,**kwargs): 
 def flist(pth,**kwargs): 
    flst = []
    if(not('recurse' in kwargs)):
        recurse_ = False
    else:
        recurse_ = kwargs['recurse']
    if(not('exts' in kwargs)):
        filterexts_ = False
    else:
        filterexts_ = True
        exts = kwargs['exts']
    if(not('normpath' in kwargs)):
        normpath_ = True
    else:
        normpath_ = kwargs['normpath']
    if(not('linuxpath' in kwargs)):
        linuxpath_ = False
    else:
        linuxpath_ = kwargs['linuxpath']
    if(not('followlinks' in kwargs)):
        followlinks_ = False
    else:
        followlinks_ = kwargs['followlinks']
    dirlist = []
    rawlist = os.listdir(pth)
    for F in rawlist:
        F2 = os.path.join(pth,F)
        if(os.path.isdir(F2)):
            b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
            if(b):
                if((F2!=".")&(F2!="..")):
                    dirlist.append(F2)
        elif(os.path.isfile(F2)):
            flst.append(F2)
    #Recurse through directories
    if(recurse_):
        for D in dirlist:
            lst = flist(D,**kwargs)
            for L in lst:
                flst.append(L)
    #Postprocess:
    #Filter out all extensions except the selected ext list
    if(filterexts_):
        flst = filterexts(flst,exts)
    #Normalize filename path according to os
    if(normpath_):
        flst2 = list(flst)
        for I in range(0,len(flst2)):
            flst[I] = os.path.normpath(flst2[I])
    #If linuxpath, convert all \\ to /
    #if(linuxpath_):
    #    flst2 = list(flst)
    #    for I in range(0,len(flst2)):
    #        flst[I] = linuxpath(flst2[I])
    return flst
 #Filters by extensions in a list of files
 #flst = def filterexts(flst,exts):
 def filterexts(flst,exts):
    flst2 = []
    if(isinstance(exts,str)):
        exts = list([exts])
    for F in flst:
        b = False
        for ext in exts:
            if(ext[0]!='.'):
                ext = '.'+ext
            F2 = os.path.splitext(F)
            if(len(F2)>=2):
                ex = F2[1]
                if(len(ex)>0):            
                    if(ex[0]!='.'):
                        ex = '.'+ex
                    if(ex==ext):
                        b = True
        if(b):
            flst2.append(F)
    return flst2
 #Find a file fname, starting in pth and recursing
 #Used for finding library files to link
 def findfile(fname,pth,**kwargs):
    fullfname = ""
    flst = flist(pth,recurse=True)
    for F in flst:
        F2 = os.path.split(F)[1]
        if(F2 == fname):
            fullfname = F
    return fullfname
 #List to space-seperated-string
 def list_to_sss(lst):
    lout = ""
    for I in range(0,len(lst)-1):
        lout = lout + lst[I] + " "
    if(len(lst)>0):
        lout = lout + lst[len(lst)-1]
    return lout
 def strip_whitespace(strin):
    strout = ""
    I1 = -1
    I2 = -1
    for I in range(0,len(strin)):
        if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
            I1 = I
            break
    q = list(range(0,len(strin)))
    q.reverse()
    for I in q:
        if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
            I2 = I+1
            break
    if(I1>=0 and I2>=0):
        strout = strin[I1:I2]
    return strout
 def sss_to_list(sss):
    lout = []
    l1 = sss.split(' ')
    for l in l1:
        l2 = strip_whitespace(l)
        lout.append(l2)
    return lout
 def replaceext(fname,ext):
    fname2 = ""
    if(len(ext)>0):
        if(ext[0]!='.'):
            ext = '.'+ext
        fname2 = os.path.splitext(fname)[0]+ext
    else:
        fname2 = os.path.splitext(fname)[0]
    return fname2
 def replaceexts(fnamelist,ext):
    fname2list = []
    for F in fnamelist:
        F2 = replaceext(F,ext)
        fname2list.append(F2)
    return fname2list
 # def except_contains_oldv(lst1,exc):
 #     lst2 = []
 #     for item in lst1:
 #         b = 1
 #         for item2 in exc:
 #             if(item.find(item2)>=0):
 #                 b = 0
 #                 break
 #         if(b==1):
 #             lst2.append(item)
 #     return lst2
 #filenames must match
 def except_contains(lst1,exc):
    lst2 = []
    for item in lst1:
        b = 1
        for item2 in exc:
            fsplit = os.path.split(item)
            fn = fsplit[len(fsplit)-1]
            if(fn==item2):
                b = 0
                break
        if(b==1):
            lst2.append(item)
    return lst2
 ##########################
 ##System Call Procedures##
 ##########################
 def callproc(cmd, **kwargs):
    if(not('logfile' in kwargs)):
        use_lf = False
    else:
        logfile = kwargs['logfile']
        if(logfile!=""):
            fp = open(kwargs['logfile'],'a+')
            use_lf = True
        else:
            use_lf = False
    if(not('echo' in kwargs)):
        echo = True
    else:
        echo = kwargs['echo']
    if(echo):
        print(cmd)
    #encoding/deconding to/from bytes is necessary to use the subprocess command
    #in python3.7
    #However, only do this in linux
    if(sys.platform!='win32'):
        cmd2 = cmd.encode(encoding='utf-8')
    else:
        cmd2 = cmd
    proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    out = out.decode(encoding='utf-8')
    if(echo):
        print(out)
        #print(err);
    if(use_lf):
        fp.writelines(cmd+'\n')
        fp.writelines(out+'\n')
    if(use_lf):
        fp.close()
 #######################################
 ##Compiler, Archive, and Linker Calls##
 #######################################
 def smartcompile(srcfile,objext='.o'):
    mtsrc = os.path.getmtime(srcfile)
    objfile = replaceext(srcfile,objext)
    objexists = os.path.exists(objfile)
    ret = True
    if(objexists):
        mtobj = os.path.getmtime(objfile)
        if(mtobj>=mtsrc):
            ret = False
    return ret
 #MSVC compiler wrapper
 def msvc_compile(compilername, srcfile, **kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '/c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '/Fo:'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    outfile = replaceext(srcfile,objext)
    ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
    ln = ln + " " + include
    callproc(ln,echo=True,logfile=logfile)
    return
 #MSVC compiler wrapper
 def msvc_compile_list(compiler,srclist,**kwargs):
    for S in srclist:
        msvc_compile(compiler,S,**kwargs)
    return
 #gnu-style compiler compile: Should work with gcc, g++, gfortran
 def gs_compile(compiler,srcfile,**kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '-c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '-o'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    if(not('smartcompile' in kwargs)):
        _smartcompile = True
    else:
        _smartcompile = kwargs['smartcompile']
    #Do I want to make this thing this general?
    if(not(_smartcompile) or smartcompile(srcfile,objext)):
        outfile = replaceext(srcfile,objext)
        ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
        ln = ln + " " + include
        callproc(ln,echo=True,logfile=logfile)
    return
 def gs_compile_list(compiler,srclist,**kwargs):
    for S in srclist:
        gs_compile(compiler,S,**kwargs)
    return
 def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
    for S in srcfils:
        gs_compile(compiler,S,**kwargs)
    return
 def gs_link_all(linker,srcpath,target,**kwargs):
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    objfils = flist(srcpath,exts=objext,recurse=recurse)
    oflst = list_to_sss(objfils)
    gs_link_list(linker,oflst,target,**kwargs)
    return
 def gs_link_list(linker,objlist,target,**kwargs):
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('libdir' in kwargs)):
        libdir = ''
    else:
        libdir = kwargs['libdir']
    if(not('staticlibs' in kwargs)):
        staticlibs = ''
    else:
        staticlibs = kwargs['staticlibs']
    if(not('libflags' in kwargs)):
        libflags = ''
    else:
        libflags = kwargs['libflags']
    if(not('linkerflags' in kwargs)):
        linkerflags = ''
    else:
        linkerflags = kwargs['linkerflags']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('logfile' in kwargs)):
        logfile = ''
    else:
        logfile = kwargs['logfile']
    ln = linker+" -o "+target+" "+libdir
    ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
    callproc(ln,logfile=logfile)
    return
 def msvc_link_list(objlist,target,**kwargs):
    linker = 'link'
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('libdir' in kwargs)):
        libdir = ''
    else:
        libdir = kwargs['libdir']
    if(not('staticlibs' in kwargs)):
        staticlibs = ''
    else:
        staticlibs = kwargs['staticlibs']
    if(not('libflags' in kwargs)):
        libflags = ''
    else:
        libflags = kwargs['libflags']
    if(not('linkerflags' in kwargs)):
        linkerflags = ''
    else:
        linkerflags = kwargs['linkerflags']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('logfile' in kwargs)):
        logfile = ''
    else:
        logfile = kwargs['logfile']
    ln = linker+" "+libdir
    ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
    ln = ln+" /out:"+target+" "+libflags
    callproc(ln,logfile=logfile)
    return
 def ar_all(srcpath,arname,**kwargs):
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    objlist = flist(srcpath,exts=objext,recurse=recurse)
    ar_list(objlist,arname,**kwargs)
    return
 def msvc_lib_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "lib "+objlist2+" /out:"+arname
    callproc(ln)
    return
 def ar_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "ar cr "+ arname+" "+objlist2
    callproc(ln)
    return
 def ar_add_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "ar t "+arname+" "+objlist2
    callproc(ln)
    return
 ##############################
 ##Derived Compiler Functions##
 ##############################
 def gcc_compile(srcfile,**kwargs):
    compiler = 'gcc'
    kwargs['objext'] = '.o'
    #srcexts = ['.c']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def gcc_compile_all(srcdir,**kwargs):
    compiler = 'gcc'
    kwargs['objext'] = '.o'
    srcexts = ['.c']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def gcc_compile_list(srclist,**kwargs):
    compiler = 'gcc'
    kwargs['objext'] = '.o'
    #srcexts = ['.c']
    gs_compile_list(compiler,srclist,**kwargs)
    return
 def gpp_compile(srcfile,**kwargs):
    compiler = 'g++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def gpp_compile_all(srcdir,**kwargs):
    compiler = 'g++'
    kwargs['objext'] = '.o'
    srcexts = ['.c','.cpp']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def gpp_compile_list(srclist,**kwargs):
    compiler = 'g++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile_list(compiler,srclist,**kwargs)
    return
 def gfortran_compile(srcfile,**kwargs):
    compiler = 'gfortran'
    kwargs['objext'] = '.o'
    #srcexts = ['.f','.f90','.f77']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def gfortran_compile_all(srcdir,**kwargs):
    compiler = 'gfortran'
    kwargs['objext'] = '.o'
    srcexts = ['.f','.f90','.f77']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def gfortran_compile_list(srclist,**kwargs):
    compiler = 'gfortran'
    kwargs['objext'] = '.o'
    #srcexts = ['.f','.f90','.f77']
    gs_compile_list(compiler,srclist,**kwargs)
    return
 def clang_compile(srcfile,**kwargs):
    compiler = 'clang++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def clang_compile_all(srcdir,**kwargs):
    compiler = 'clang++'
    kwargs['objext'] = '.o'
    srcexts = ['.c','.cpp']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def clang_compile_list(srclist,**kwargs):
    compiler = 'clang++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile_list(compiler,srclist,**kwargs)
    return
--- a/old/compscripts/complib3.py
+++ b/old/compscripts/complib3.py
@ -0,0 +1,524 @@
 #!/usr/bin/python3
 import os,sys,math
 import subprocess
 ##flist - list all files in a given directory pth
 ##optional arguments:
 #   recurse - (T/F): Whether to recursively search for files in directory tree
 #   exts - (list): A list of file extensions to filter on
 #   normpath (T/F): whether to normalize path variables after
 #filelist = flist(pth,**kwargs): 
 def flist(pth,**kwargs): 
    flst = []
    if(not('recurse' in kwargs)):
        recurse_ = False
    else:
        recurse_ = kwargs['recurse']
    if(not('exts' in kwargs)):
        filterexts_ = False
    else:
        filterexts_ = True
        exts = kwargs['exts']
    if(not('normpath' in kwargs)):
        normpath_ = True
    else:
        normpath_ = kwargs['normpath']
    if(not('linuxpath' in kwargs)):
        linuxpath_ = False
    else:
        linuxpath_ = kwargs['linuxpath']
    if(not('followlinks' in kwargs)):
        followlinks_ = False
    else:
        followlinks_ = kwargs['followlinks']
    dirlist = []
    rawlist = os.listdir(pth)
    for F in rawlist:
        F2 = os.path.join(pth,F)
        if(os.path.isdir(F2)):
            b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
            if(b):
                if((F2!=".")&(F2!="..")):
                    dirlist.append(F2)
        elif(os.path.isfile(F2)):
            flst.append(F2)
    #Recurse through directories
    if(recurse_):
        for D in dirlist:
            lst = flist(D,**kwargs)
            for L in lst:
                flst.append(L)
    #Postprocess:
    #Filter out all extensions except the selected ext list
    if(filterexts_):
        flst = filterexts(flst,exts)
    #Normalize filename path according to os
    if(normpath_):
        flst2 = list(flst)
        for I in range(0,len(flst2)):
            flst[I] = os.path.normpath(flst2[I])
    #If linuxpath, convert all \\ to /
    #if(linuxpath_):
    #    flst2 = list(flst)
    #    for I in range(0,len(flst2)):
    #        flst[I] = linuxpath(flst2[I])
    return flst
 #Filters by extensions in a list of files
 #flst = def filterexts(flst,exts):
 def filterexts(flst,exts):
    flst2 = []
    if(isinstance(exts,str)):
        exts = list([exts])
    for F in flst:
        b = False
        for ext in exts:
            if(ext[0]!='.'):
                ext = '.'+ext
            F2 = os.path.splitext(F)
            if(len(F2)>=2):
                ex = F2[1]
                if(len(ex)>0):            
                    if(ex[0]!='.'):
                        ex = '.'+ex
                    if(ex==ext):
                        b = True
        if(b):
            flst2.append(F)
    return flst2
 #Find a file fname, starting in pth and recursing
 #Used for finding library files to link
 def findfile(fname,pth,**kwargs):
    fullfname = ""
    flst = flist(pth,recurse=True)
    for F in flst:
        F2 = os.path.split(F)[1]
        if(F2 == fname):
            fullfname = F
    return fullfname
 def replaceext(fname,ext):
    fname2 = ""
    if(len(ext)>0):
        if(ext[0]!='.'):
            ext = '.'+ext
        fname2 = os.path.splitext(fname)[0]+ext
    else:
        fname2 = os.path.splitext(fname)[0]
    return fname2
 def replaceexts(fnamelist,ext):
    fname2list = []
    for F in fnamelist:
        F2 = replaceext(F,ext)
        fname2list.append(F2)
    return fname2list
 #filenames must match
 def except_contains(lst1,exc):
    lst2 = []
    for item in lst1:
        b = 1
        for item2 in exc:
            fsplit = os.path.split(item)
            fn = fsplit[len(fsplit)-1]
            if(fn==item2):
                b = 0
                break
        if(b==1):
            lst2.append(item)
    return lst2
 ##########################
 ##System Call Procedures##
 ##########################
 def callproc(cmd, **kwargs):
    if(not('logfile' in kwargs)):
        use_lf = False
    else:
        logfile = kwargs['logfile']
        if(logfile!=""):
            fp = open(kwargs['logfile'],'a+')
            use_lf = True
        else:
            use_lf = False
    if(not('echo' in kwargs)):
        echo = True
    else:
        echo = kwargs['echo']
    if(echo):
        print(cmd)
    #encoding/deconding to/from bytes is necessary to use the subprocess command
    #in python3.7
    #However, only do this in linux
    if(sys.platform!='win32'):
        cmd2 = cmd.encode(encoding='utf-8')
    else:
        cmd2 = cmd
    proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    out = out.decode(encoding='utf-8')
    if(echo):
        print(out)
        #print(err);
    if(use_lf):
        fp.writelines(cmd+'\n')
        fp.writelines(out+'\n')
    if(use_lf):
        fp.close()
 #List to space-seperated-string
 def list_to_sss(lst):
    lout = ""
    for I in range(0,len(lst)-1):
        lout = lout + lst[I] + " "
    if(len(lst)>0):
        lout = lout + lst[len(lst)-1]
    return lout
 #####################################
 ## Incremental Compilation Library ##
 #####################################
 #silently read lines from a text file if exists
 def readtextlines(fname):
    txtlns = []
    if(not os.path.isfile(fname)):
        return txtlns
    try:
        fp = open(fname,"r")
    except:
        return txtlns
    ln = " "
    while(ln!=""):
        ln = fp.readline()
        txtlns.append(ln)
    fp.close()
    return txtlns
 def getincludefnfrage(includeline):
    fnfrag = ""
    I1 = -1
    I2 = -1
    for I in range(0,len(includeline)):
        if(I1<0 and (includeline[I]=='<' or includeline[I]=='"')):
            I1 = I
        if(I1>=0 and (includeline[I]=='>' or includeline[I]=='"')):
            I2 = I
            break
    if(I1>=0 and I2>=0):
        fnfrag = includeline[I1+1:I2]
    return fnfrag
 #Returns the name of the source file fname (if it exists)
 #and all included filenames
 def getsrcandincludes(fname, incdirs):
    flist = []
    if(os.path.isfile(fname)):
        flist.append(fname)
        Ilist = 0
        while(Ilist<len(flist)):
            #recurse through files
            f1 = flist[Ilist]
            lns = readtextlines(f1)
            for J in range(0,len(lns)):
                if(lns[J].find("#include")>=0):
                    fnfrag = getincludefnfrage(lns[J])
                    for K in range(0,len(incdirs)):
                        tfn = os.path.join(incdirs[K],fnfrag)
                        if(os.path.isfile(tfn)):
                            flist.append(tfn)
                            break
            Ilist = Ilist + 1
    return flist
 #Returns the name of the object file associated with the source file
 #within the object store folder (if it exists)
 def getobjfile(fname,objstore,objext = ".o"):
    fret = ""
    f1 = os.path.split(fname)[1]
    f2 = f1
    while(os.path.splitext(f2)[1]!=""):
        f2 = os.path.splitext(f2)[0]
    objext = objext.strip('.')
    f3 = os.path.join(objstore,"{}.{}".format(f2,objext))
    if(os.path.exists(f3)):
        fret = f3
    return fret
 def getsrctimes(fname, incdirs):
    ftimes = []
    flst = getsrcandincludes(fname, incdirs)
    for I in range(0,len(flst)):
        f = flst[I]
        mt = os.path.getmtime(f)
        ftimes.append(mt)
    return ftimes
 def getobjtime(fname,objstore,objext=".o"):
    ret = -1
    fret = getobjfile(fname,objstore,objext)
    if(fret!=""):
        ret = os.path.getmtime(fret)
    return ret
 #Decide whether or not to compile source file
 def decidecompile(fname,**kwargs):
    ret = True
    if(not os.path.isfile(fname)):
        ret = False
        return ret
    ##unpack kwargs
    if("searchincdirs" in kwargs):
        incdirs = kwargs["searchincdirs"]
    else:
        incdirs = ["./include"]
    if("objext" in kwargs):
        objext = kwargs["objext"]
    else:
        objext = ".o"
    if("objstore" in kwargs):
        objstore = kwargs["objstore"]
    else:
        objstore = "./objstore"
    srclist = getsrcandincludes(fname,incdirs)
    srctlist = getsrctimes(fname,incdirs)
    obj = getobjfile(fname,objstore,objext)
    objt = getobjtime(fname,objstore,objext)
    if(obj!=""):
        ret = False
        for I in range(0,len(srctlist)):
            if(srctlist[I]>objt):
                ret = True
                break
    return ret
 def gs_incremental_compile(compiler,srcfile,**kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '-c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '-o'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    if(not('smartcompile' in kwargs)):
        _smartcompile = True
    else:
        _smartcompile = kwargs['smartcompile']
    #incrementalcompile
    if("searchincdirs" in kwargs):
        incdirs = kwargs["searchincdirs"]
    else:
        incdirs = ["./include"]
    if("objext" in kwargs):
        objext = kwargs["objext"]
    else:
        objext = ".o"
    if("objstore" in kwargs):
        objstore = kwargs["objstore"]
    else:
        objstore = "./objstore"
    #Do I want to make this thing this general?
    docompile = decidecompile(srcfile,**kwargs)
    if(docompile):
        f1 = os.path.split(srcfile)[1]
        f2 = f1
        while(os.path.splitext(f2)[1]!=""):
            f2 = os.path.splitext(f2)[0]
        outfile = os.path.join(objstore,"{}{}".format(f2,objext))
        ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
        ln = ln + " " + include
        callproc(ln,echo=True,logfile=logfile)
    return
 def gs_incremental_compile_list(compiler,srclist,**kwargs):
    for s in srclist:
        gs_incremental_compile(compiler,s,**kwargs)
    return
 #MSVC compiler wrapper
 def msvc_incremental_compile(compilername, srcfile, **kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '/c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '/Fo:'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    #incrementalcompile
    if("searchincdirs" in kwargs):
        incdirs = kwargs["searchincdirs"]
    else:
        incdirs = ["./include"]
    # if("objext" in kwargs):
    #     objext = kwargs["objext"]
    # else:
    #     objext = ".o"
    if("objstore" in kwargs):
        objstore = kwargs["objstore"]
    else:
        objstore = "./objstore"
    docompile = decidecompile(srcfile,**kwargs)
    if(docompile):
        f1 = os.path.split(srcfile)[1]
        f2 = f1
        while(os.path.splitext(f2)[1]!=""):
            f2 = os.path.splitext(f2)[0]
        outfile = os.path.join(objstore,"{}{}".format(f2,objext))
        ln = compilername+" "+flags+" "+srcfileflag+" "+srcfile+" "+outfileflag+" "+outfile
        ln = ln + " " + include
        callproc(ln,echo=True,logfile=logfile)
    # outfile = replaceext(srcfile,objext)
    # ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
    # ln = ln + " " + include
    callproc(ln,echo=True,logfile=logfile)
    return
 def msvc_incremental_compile_list(compiler,srclist,**kwargs):
    for S in srclist:
        msvc_incremental_compile(compiler,S,**kwargs)
    return
 #######################
 ## Main Script Tests ##
 #######################
 def testtimes(args):
    if(len(args)>=2):
        flist = getsrcandincludes(args[1],["./include"])
        ftlist = getsrctimes(args[1],["./include"])
        for I in range(0,len(flist)):
            print("{}\t\t{}".format(flist[I],ftlist[I]))
        print("associated obj file:")
        fobj = getobjfile(args[1],"./objstore")
        ftobj = getobjtime(args[1],"./objstore")
        if(fobj!=""):
            print("{}\t\t{}".format(fobj,ftobj))
        else:
            print("none found")
        cflag = decidecompile(args[1])
        print("compile? : {}".format(cflag))
    return
 # if(__name__ == "__main__"):
 #     args = sys.argv
 #     testtimes(args)
--- a/old/compscripts/linux64.makelib.py
+++ b/old/compscripts/linux64.makelib.py
@ -0,0 +1,52 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 from complib3 import gs_incremental_compile, gs_incremental_compile_list
 import shutil
 #from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
 from shutil import copytree
 libname = 'amsculib2.linux64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../linux64/include"
 commonlibdir = "../../linux64/lib"
 localbindir = "./bin_linux64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 #-dc flag: relocatable device code - needed for device functions to link in different "execution units"
 #--ptxas-options=-v
 kwargs['flags'] = "-dc --compiler-options '-fPIC -O3'"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-l{}".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 kwargs['objstore'] = "./objstore"
 kwargs['searchincdirs'] = ['./include']
 #find all source files, except the main project files
 files = flist('./src',exts = srcexts, recurse=True)
 files = except_contains(files,mainsrc)
 objfiles = replaceexts(files,'.o')
 objfiles_sss = list_to_sss(objfiles)
 #compile all the source files in the list
 #gs_compile_list(cc,files,**kwargs)
 gs_incremental_compile_list(cc,files,**kwargs)
 #archive all the source files into a static library
 #ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
 objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
 ar_list(objlist,'{}/lib{}.a'.format(localbindir,libname))
 # #Push any libraries to the common lib folder
 shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir)
 # #Copy include files to the common include folder
 copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
--- a/old/compscripts/linux64.maketest.py
+++ b/old/compscripts/linux64.maketest.py
@ -0,0 +1,43 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 from complib3 import gs_incremental_compile, gs_incremental_compile_list
 import shutil
 libname = 'amsculib2.linux64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../linux64/include"
 commonlibdir = "../../linux64/lib"
 localbindir = "./bin_linux64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 #-dc flag: relocatable device code - needed for device functions to link in different "execution units"
 kwargs['flags'] = "-dc --compiler-options '-fPIC'"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-l{} -lamsculib2.linux64".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 kwargs['objstore'] = "./objstore"
 kwargs['searchincdirs'] = ['./include']
 #-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64
 #Pull required binary dynamic libraries to the bin folder
 #shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
 #shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
 #shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
 #Designate source files for main test program
 fsrc = ['./src/main.cu']
 fobj = replaceexts(fsrc,'.o')
 #Compile test programs
 gs_compile_list(cc,fsrc,**kwargs)
 gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
--- a/old/compscripts/old/complib2.py
+++ b/old/compscripts/old/complib2.py
@ -0,0 +1,639 @@
 #!/usr/bin/python3
 #Python3 compilation library
 #Aaron M. Schinder
 #29 Dec 2020
 #
 #Cleanup and refactor from 2017 python2 version compilation libraries
 import os,sys,math,subprocess
 #####################
 #Directory Functions#
 #####################
 ##flist - list all files in a given directory pth
 ##optional arguments:
 #   recurse - (T/F): Whether to recursively search for files in directory tree
 #   exts - (list): A list of file extensions to filter on
 #   normpath (T/F): whether to normalize path variables after
 #filelist = flist(pth,**kwargs): 
 def flist(pth,**kwargs): 
    flst = []
    if(not('recurse' in kwargs)):
        recurse_ = False
    else:
        recurse_ = kwargs['recurse']
    if(not('exts' in kwargs)):
        filterexts_ = False
    else:
        filterexts_ = True
        exts = kwargs['exts']
    if(not('normpath' in kwargs)):
        normpath_ = True
    else:
        normpath_ = kwargs['normpath']
    if(not('linuxpath' in kwargs)):
        linuxpath_ = False
    else:
        linuxpath_ = kwargs['linuxpath']
    if(not('followlinks' in kwargs)):
        followlinks_ = False
    else:
        followlinks_ = kwargs['followlinks']
    dirlist = []
    rawlist = os.listdir(pth)
    for F in rawlist:
        F2 = os.path.join(pth,F)
        if(os.path.isdir(F2)):
            b = (followlinks_) or ((not followlinks_) and not(os.path.islink(F2)))
            if(b):
                if((F2!=".")&(F2!="..")):
                    dirlist.append(F2)
        elif(os.path.isfile(F2)):
            flst.append(F2)
    #Recurse through directories
    if(recurse_):
        for D in dirlist:
            lst = flist(D,**kwargs)
            for L in lst:
                flst.append(L)
    #Postprocess:
    #Filter out all extensions except the selected ext list
    if(filterexts_):
        flst = filterexts(flst,exts)
    #Normalize filename path according to os
    if(normpath_):
        flst2 = list(flst)
        for I in range(0,len(flst2)):
            flst[I] = os.path.normpath(flst2[I])
    #If linuxpath, convert all \\ to /
    #if(linuxpath_):
    #    flst2 = list(flst)
    #    for I in range(0,len(flst2)):
    #        flst[I] = linuxpath(flst2[I])
    return flst
 #Filters by extensions in a list of files
 #flst = def filterexts(flst,exts):
 def filterexts(flst,exts):
    flst2 = []
    if(isinstance(exts,str)):
        exts = list([exts])
    for F in flst:
        b = False
        for ext in exts:
            if(ext[0]!='.'):
                ext = '.'+ext
            F2 = os.path.splitext(F)
            if(len(F2)>=2):
                ex = F2[1]
                if(len(ex)>0):            
                    if(ex[0]!='.'):
                        ex = '.'+ex
                    if(ex==ext):
                        b = True
        if(b):
            flst2.append(F)
    return flst2
 #Find a file fname, starting in pth and recursing
 #Used for finding library files to link
 def findfile(fname,pth,**kwargs):
    fullfname = ""
    flst = flist(pth,recurse=True)
    for F in flst:
        F2 = os.path.split(F)[1]
        if(F2 == fname):
            fullfname = F
    return fullfname
 #List to space-seperated-string
 def list_to_sss(lst):
    lout = ""
    for I in range(0,len(lst)-1):
        lout = lout + lst[I] + " "
    if(len(lst)>0):
        lout = lout + lst[len(lst)-1]
    return lout
 def strip_whitespace(strin):
    strout = ""
    I1 = -1
    I2 = -1
    for I in range(0,len(strin)):
        if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
            I1 = I
            break
    q = list(range(0,len(strin)))
    q.reverse()
    for I in q:
        if(strin[I]!=' ' and strin[I]!='\t' and strin[I]!='\r'and strin[I]!='\n'):
            I2 = I+1
            break
    if(I1>=0 and I2>=0):
        strout = strin[I1:I2]
    return strout
 def sss_to_list(sss):
    lout = []
    l1 = sss.split(' ')
    for l in l1:
        l2 = strip_whitespace(l)
        lout.append(l2)
    return lout
 def replaceext(fname,ext):
    fname2 = ""
    if(len(ext)>0):
        if(ext[0]!='.'):
            ext = '.'+ext
        fname2 = os.path.splitext(fname)[0]+ext
    else:
        fname2 = os.path.splitext(fname)[0]
    return fname2
 def replaceexts(fnamelist,ext):
    fname2list = []
    for F in fnamelist:
        F2 = replaceext(F,ext)
        fname2list.append(F2)
    return fname2list
 # def except_contains_oldv(lst1,exc):
 #     lst2 = []
 #     for item in lst1:
 #         b = 1
 #         for item2 in exc:
 #             if(item.find(item2)>=0):
 #                 b = 0
 #                 break
 #         if(b==1):
 #             lst2.append(item)
 #     return lst2
 #filenames must match
 def except_contains(lst1,exc):
    lst2 = []
    for item in lst1:
        b = 1
        for item2 in exc:
            fsplit = os.path.split(item)
            fn = fsplit[len(fsplit)-1]
            if(fn==item2):
                b = 0
                break
        if(b==1):
            lst2.append(item)
    return lst2
 ##########################
 ##System Call Procedures##
 ##########################
 def callproc(cmd, **kwargs):
    if(not('logfile' in kwargs)):
        use_lf = False
    else:
        logfile = kwargs['logfile']
        if(logfile!=""):
            fp = open(kwargs['logfile'],'a+')
            use_lf = True
        else:
            use_lf = False
    if(not('echo' in kwargs)):
        echo = True
    else:
        echo = kwargs['echo']
    if(echo):
        print(cmd)
    #encoding/deconding to/from bytes is necessary to use the subprocess command
    #in python3.7
    #However, only do this in linux
    if(sys.platform!='win32'):
        cmd2 = cmd.encode(encoding='utf-8')
    else:
        cmd2 = cmd
    proc = subprocess.Popen(cmd2,stderr = subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
    (out, err) = proc.communicate()
    out = out.decode(encoding='utf-8')
    if(echo):
        print(out)
        #print(err);
    if(use_lf):
        fp.writelines(cmd+'\n')
        fp.writelines(out+'\n')
    if(use_lf):
        fp.close()
 #######################################
 ##Compiler, Archive, and Linker Calls##
 #######################################
 def smartcompile(srcfile,objext='.o'):
    mtsrc = os.path.getmtime(srcfile)
    objfile = replaceext(srcfile,objext)
    objexists = os.path.exists(objfile)
    ret = True
    if(objexists):
        mtobj = os.path.getmtime(objfile)
        if(mtobj>=mtsrc):
            ret = False
    return ret
 #MSVC compiler wrapper
 def msvc_compile(compilername, srcfile, **kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '/c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '/Fo:'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    outfile = replaceext(srcfile,objext)
    ln = compilername+" "+flags+" "+" "+srcfileflag+" "+srcfile+" "+outfileflag+outfile
    ln = ln + " " + include
    callproc(ln,echo=True,logfile=logfile)
    return
 #MSVC compiler wrapper
 def msvc_compile_list(compiler,srclist,**kwargs):
    for S in srclist:
        msvc_compile(compiler,S,**kwargs)
    return
 #gnu-style compiler compile: Should work with gcc, g++, gfortran
 def gs_compile(compiler,srcfile,**kwargs):
    if(not('include' in kwargs)):
        include = ''
    else:
        include = kwargs['include']
        if(isinstance(include,list)):
            include  = list_to_sss(include)
    if(not('flags' in kwargs)):
        flags = ''
    else:
        flags = kwargs['flags']
        if(isinstance(flags,list)):
            flags = list_to_sss(flags)
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('srcfileflag' in kwargs)):
        srcfileflag = '-c'
    else:
        srcfileflag = kwargs['srcfileflag']
    if(not('outfileflag' in kwargs)):
        outfileflag = '-o'
    else:
        outfileflag = kwargs['outfileflag']
    if(not('logfile' in kwargs)):
        logfile = ""
    else:
        logfile = kwargs['logfile']
    if(not('smartcompile' in kwargs)):
        _smartcompile = True
    else:
        _smartcompile = kwargs['smartcompile']
    #Do I want to make this thing this general?
    if(not(_smartcompile) or smartcompile(srcfile,objext)):
        outfile = replaceext(srcfile,objext)
        ln = compiler+" "+flags+" " + outfileflag+" "+outfile+" "+srcfileflag+" "+srcfile
        ln = ln + " " + include
        callproc(ln,echo=True,logfile=logfile)
    return
 def gs_compile_list(compiler,srclist,**kwargs):
    for S in srclist:
        gs_compile(compiler,S,**kwargs)
    return
 def gs_compile_all(compiler,srcdir,srcexts,**kwargs):
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    srcfils = flist(srcdir,exts=srcexts,recurse=recurse)
    for S in srcfils:
        gs_compile(compiler,S,**kwargs)
    return
 def gs_link_all(linker,srcpath,target,**kwargs):
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    objfils = flist(srcpath,exts=objext,recurse=recurse)
    oflst = list_to_sss(objfils)
    gs_link_list(linker,oflst,target,**kwargs)
    return
 def gs_link_list(linker,objlist,target,**kwargs):
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    if(not('libdir' in kwargs)):
        libdir = ''
    else:
        libdir = kwargs['libdir']
    if(not('staticlibs' in kwargs)):
        staticlibs = ''
    else:
        staticlibs = kwargs['staticlibs']
    if(not('libflags' in kwargs)):
        libflags = ''
    else:
        libflags = kwargs['libflags']
    if(not('linkerflags' in kwargs)):
        linkerflags = ''
    else:
        linkerflags = kwargs['linkerflags']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('logfile' in kwargs)):
        logfile = ''
    else:
        logfile = kwargs['logfile']
    ln = linker+" -o "+target+" "+libdir
    ln = ln+" "+objlist+" "+staticlibs+" "+libflags+" "+linkerflags
    callproc(ln,logfile=logfile)
    return
 def msvc_link_list(objlist,target,**kwargs):
    linker = 'link'
    if(not('objext' in kwargs)):
        objext = '.obj'
    else:
        objext = kwargs['objext']
    if(not('libdir' in kwargs)):
        libdir = ''
    else:
        libdir = kwargs['libdir']
    if(not('staticlibs' in kwargs)):
        staticlibs = ''
    else:
        staticlibs = kwargs['staticlibs']
    if(not('libflags' in kwargs)):
        libflags = ''
    else:
        libflags = kwargs['libflags']
    if(not('linkerflags' in kwargs)):
        linkerflags = ''
    else:
        linkerflags = kwargs['linkerflags']
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('logfile' in kwargs)):
        logfile = ''
    else:
        logfile = kwargs['logfile']
    ln = linker+" "+libdir
    ln = ln+" "+objlist+" "+staticlibs+" "+linkerflags
    ln = ln+" /out:"+target+" "+libflags
    callproc(ln,logfile=logfile)
    return
 def ar_all(srcpath,arname,**kwargs):
    if(not('recurse' in kwargs)):
        recurse = True
    else:
        recurse = kwargs['recurse']
    if(not('objext' in kwargs)):
        objext = '.o'
    else:
        objext = kwargs['objext']
    objlist = flist(srcpath,exts=objext,recurse=recurse)
    ar_list(objlist,arname,**kwargs)
    return
 def msvc_lib_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "lib "+objlist2+" /out:"+arname
    callproc(ln)
    return
 def ar_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "ar cr "+ arname+" "+objlist2
    callproc(ln)
    return
 def ar_add_list(objlist,arname,**kwargs):
    objlist2 = list_to_sss(objlist)
    ln = "ar t "+arname+" "+objlist2
    callproc(ln)
    return
 ##############################
 ##Derived Compiler Functions##
 ##############################
 def gcc_compile(srcfile,**kwargs):
    compiler = 'gcc'
    kwargs['objext'] = '.o'
    #srcexts = ['.c']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def gcc_compile_all(srcdir,**kwargs):
    compiler = 'gcc'
    kwargs['objext'] = '.o'
    srcexts = ['.c']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def gcc_compile_list(srclist,**kwargs):
    compiler = 'gcc'
    kwargs['objext'] = '.o'
    #srcexts = ['.c']
    gs_compile_list(compiler,srclist,**kwargs)
    return
 def gpp_compile(srcfile,**kwargs):
    compiler = 'g++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def gpp_compile_all(srcdir,**kwargs):
    compiler = 'g++'
    kwargs['objext'] = '.o'
    srcexts = ['.c','.cpp']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def gpp_compile_list(srclist,**kwargs):
    compiler = 'g++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile_list(compiler,srclist,**kwargs)
    return
 def gfortran_compile(srcfile,**kwargs):
    compiler = 'gfortran'
    kwargs['objext'] = '.o'
    #srcexts = ['.f','.f90','.f77']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def gfortran_compile_all(srcdir,**kwargs):
    compiler = 'gfortran'
    kwargs['objext'] = '.o'
    srcexts = ['.f','.f90','.f77']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def gfortran_compile_list(srclist,**kwargs):
    compiler = 'gfortran'
    kwargs['objext'] = '.o'
    #srcexts = ['.f','.f90','.f77']
    gs_compile_list(compiler,srclist,**kwargs)
    return
 def clang_compile(srcfile,**kwargs):
    compiler = 'clang++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile(compiler,srcfile,**kwargs)
    return
 def clang_compile_all(srcdir,**kwargs):
    compiler = 'clang++'
    kwargs['objext'] = '.o'
    srcexts = ['.c','.cpp']
    gs_compile_all(compiler,srcdir,srcexts,**kwargs)
    return
 def clang_compile_list(srclist,**kwargs):
    compiler = 'clang++'
    kwargs['objext'] = '.o'
    #srcexts = ['.c','.cpp']
    gs_compile_list(compiler,srclist,**kwargs)
    return
--- a/old/compscripts/old/linux64.makelib.py
+++ b/old/compscripts/old/linux64.makelib.py
@ -0,0 +1,45 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 import shutil
 #from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
 from shutil import copytree as copytree
 libname = 'amsculib2.linux64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../linux64/include"
 commonlibdir = "../../linux64/lib"
 localbindir = "./bin_linux64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 #-dc flag: relocatable device code - needed for device functions to link in different "execution units"
 kwargs['flags'] = "-dc"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-l{}".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 #find all source files, except the main project files
 files = flist('./src',exts = srcexts, recurse=True)
 files = except_contains(files,mainsrc)
 objfiles = replaceexts(files,'.o')
 objfiles_sss = list_to_sss(objfiles)
 #compile all the source files in the list
 gs_compile_list(cc,files,**kwargs)
 #archive all the source files into a static library
 ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
 #Push any libraries to the common lib folder
 shutil.copy('{}/lib{}.a'.format(localbindir,libname),commonlibdir)
 #Copy include files to the common include folder
 copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
--- a/old/compscripts/old/linux64.maketest.py
+++ b/old/compscripts/old/linux64.maketest.py
@ -0,0 +1,38 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 import shutil
 libname = 'amsculib2.linux64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../linux64/include"
 commonlibdir = "../../linux64/lib"
 localbindir = "./bin_linux64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 #-dc flag: relocatable device code - needed for device functions to link in different "execution units"
 kwargs['flags'] = "-dc"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-l{}".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 #Pull required binary dynamic libraries to the bin folder
 #shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
 #shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
 #shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
 #Designate source files for main test program
 fsrc = ['./src/main.cu']
 fobj = replaceexts(fsrc,'.o')
 #Compile test programs
 gs_compile_list(cc,fsrc,**kwargs)
 gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
--- a/old/compscripts/old/msvc.makelib.py
+++ b/old/compscripts/old/msvc.makelib.py
@ -0,0 +1,45 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 import shutil
 from shutil import copytree as copytree
 libname = 'assetcuda.msvc64' #prefix static library name to generate
 targetname = 'main' #create this executable when compiling tests
 commonincdir = "../../winx64/include"
 commonlibdir = "../../winx64/lib"
 localbindir = "./bin_winx64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp']
 mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 kwargs['flags'] = "/O2"
 kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-l{}".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 #find all source files, except the main project files
 files = flist('./src',exts = srcexts, recurse=True)
 files = except_contains(files,mainsrc)
 objfiles = replaceexts(files,'.obj')
 objfiles_sss = list_to_sss(objfiles)
 #compile all the source files in the list
 msvc_compile_list(cc,files,**kwargs)
 #gs_compile_list(cc,files,**kwargs)
 #archive all the source files into a static library
 #ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
 msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname))
 #Push any libraries to the common lib folder
 shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
 #Copy include files to the common include folder
 copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
--- a/old/compscripts/old/msvc.maketest.py
+++ b/old/compscripts/old/msvc.maketest.py
@ -0,0 +1,39 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 import shutil
 from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
 libname = 'assetcuda.msvc64' #prefix static library name to generate
 targetname = 'tests.exe' #create this executable when compiling tests
 commonincdir = "../../winx64/include"
 commonlibdir = "../../winx64/lib"
 localbindir = "./bin_winx64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp']
 mainsrc = ['main.c','main.cpp','main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 kwargs['flags'] = "/O2"
 kwargs['libdir'] = "/LIBPATH:{} /LIBPATH:{}".format(localbindir,commonlibdir)
 #kwargs['libflags'] = "lib{}.lib libamsearthtools.msvc64.lib libamsmeshtools.msvc64.lib libamsmathlib3.msvc64.lib libamsmatrix_cpp.msvc64.lib liblapack.a libblas.a libamsstring3.msvc64.lib libamsmathutil2.msvc64.lib".format(libname)
 kwargs['libflags'] = "lib{}.lib".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 #Pull required binary dynamic libraries to the bin folder
 #shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
 #shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
 #shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
 #Designate source files for main test program
 fsrc = ['./src/main.cu']
 fobj = replaceexts(fsrc,'.obj')
 #Compile test programs
 msvc_compile_list(cc,fsrc,**kwargs)
 msvc_link_list(list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
--- a/old/compscripts/old/winnvcc.makelib.py
+++ b/old/compscripts/old/winnvcc.makelib.py
@ -0,0 +1,44 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 import shutil
 from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
 libname = 'amsculib2.msvc64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../winx64/include"
 commonlibdir = "../../winx64/lib"
 localbindir = "./bin_winx64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 kwargs['flags'] = "-dc"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-l{}".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 #find all source files, except the main project files
 files = flist('./src',exts = srcexts, recurse=True)
 files = except_contains(files,mainsrc)
 objfiles = replaceexts(files,'.o')
 objfiles_sss = list_to_sss(objfiles)
 #compile all the source files in the list
 gs_compile_list(cc,files,**kwargs)
 #archive all the source files into a static library
 #ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
 msvc_lib_list(objfiles,'{}/lib{}.lib'.format(localbindir,libname))
 #Push any libraries to the common lib folder
 shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
 #Copy include files to the common include folder
 copy_tree('./include/',commonincdir+'/')
--- a/old/compscripts/old/winnvcc.maketest.py
+++ b/old/compscripts/old/winnvcc.maketest.py
@ -0,0 +1,38 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 import shutil
 from distutils.dir_util import copy_tree as copy_tree #this version does overwrites
 libname = 'amsculib2.msvc64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../winx64/include"
 commonlibdir = "../../winx64/lib"
 localbindir = "./bin_winx64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.c','main.cpp'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 kwargs['flags'] = "-dc"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-llib{}".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 #Pull required binary dynamic libraries to the bin folder
 #shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
 #shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
 #shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
 #Designate source files for main test program
 fsrc = ['./src/main.cpp']
 fobj = replaceexts(fsrc,'.o')
 #Compile test programs
 gs_compile_list(cc,fsrc,**kwargs)
 gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
--- a/old/compscripts/winnvcc.makelib.py
+++ b/old/compscripts/winnvcc.makelib.py
@ -0,0 +1,49 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 from complib3 import gs_incremental_compile, gs_incremental_compile_list
 import shutil
 from shutil import copytree
 libname = 'amsculib2.msvc64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../winx64/include"
 commonlibdir = "../../winx64/lib"
 localbindir = "./bin_winx64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 kwargs['flags'] = "-dc"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-l{}".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 kwargs['objstore'] = "./objstore"
 kwargs['searchincdirs'] = ['./include']
 #find all source files, except the main project files
 files = flist('./src',exts = srcexts, recurse=True)
 files = except_contains(files,mainsrc)
 objfiles = replaceexts(files,'.o')
 objfiles_sss = list_to_sss(objfiles)
 #compile all the source files in the list
 #gs_compile_list(cc,files,**kwargs)
 gs_incremental_compile_list(cc,files,**kwargs)
 #archive all the source files into a static library
 #ar_list(objfiles,'{}/lib{}.a'.format(localbindir,libname))
 objlist = flist(kwargs['objstore'],exts='.o',recurse=True)
 msvc_lib_list(objlist,'{}/lib{}.lib'.format(localbindir,libname))
 # #Push any libraries to the common lib folder
 shutil.copy('{}/lib{}.lib'.format(localbindir,libname),commonlibdir)
 # #Copy include files to the common include folder
 copytree('./include/',commonincdir+'/',dirs_exist_ok=True)
--- a/old/compscripts/winnvcc.maketest.py
+++ b/old/compscripts/winnvcc.maketest.py
@ -0,0 +1,43 @@
 #!/usr/bin/python3
 import os,sys,subprocess,math
 from complib2 import *
 from complib3 import gs_incremental_compile, gs_incremental_compile_list
 import shutil
 from shutil import copytree
 libname = 'amsculib2.msvc64' #prefix static library name to generate
 targetname = 'test' #create this executable when compiling tests
 commonincdir = "../../winx64/include"
 commonlibdir = "../../winx64/lib"
 localbindir = "./bin_winx64"
 cc = 'nvcc' #compiler
 srcexts = ['.c','.cpp','.cu']
 mainsrc = ['main.cu'] #ignore these files when compiling the static library
 kwargs = dict()
 include = "-I./include -I{}".format(commonincdir)
 kwargs['include'] = include
 kwargs['flags'] = "-dc"
 kwargs['libdir'] = "-L{} -L{}".format(localbindir,commonlibdir)
 kwargs['libflags'] = "-llib{} -llibamsculib2.msvc64".format(libname)
 kwargs['linkerflags'] = ""
 kwargs['recurse'] = True
 kwargs['objstore'] = "./objstore"
 kwargs['searchincdirs'] = ['./include']
 #-lamsmathlib3.linux64 -lamsstring3.linux64 -lamsmatrix_cpp.linux64 -llapack -lblas -lgfortran -lamsmathutilthread.linux64 -lamsmathutil2.linux64
 #Pull required binary dynamic libraries to the bin folder
 #shutil.copy('{}/libamsimg.dll.a'.format(commonlibdir),localbindir);
 #shutil.copy('{}/libamsimg.dll'.format(commonlibdir),localbindir);
 #shutil.copy('../../lib_winx64/glew32.dll','./bin_winx64');
 #Designate source files for main test program
 fsrc = ['./src/main.cu']
 fobj = replaceexts(fsrc,'.o')
 #Compile test programs
 gs_compile_list(cc,fsrc,**kwargs)
 gs_link_list(cc,list_to_sss(fobj),'{}/{}'.format(localbindir,targetname),**kwargs)
--- a/Show More
+++ b/Show More
`@ -1 +1 @@`
	`Copyright Aaron M. Schinder, 2023`	`Copyright Aaron M. Schinder, 2023`