diff --git a/build_linux64/libamsculib2.linux64.a b/build_linux64/libamsculib2.linux64.a index 03f8a6f..6b696bb 100644 Binary files a/build_linux64/libamsculib2.linux64.a and b/build_linux64/libamsculib2.linux64.a differ diff --git a/build_linux64/objstore/amscu_comp128.o b/build_linux64/objstore/amscu_comp128.o index 31b4869..ff579cb 100644 Binary files a/build_linux64/objstore/amscu_comp128.o and b/build_linux64/objstore/amscu_comp128.o differ diff --git a/build_linux64/objstore/amscu_comp64.o b/build_linux64/objstore/amscu_comp64.o index 600fd01..43bc637 100644 Binary files a/build_linux64/objstore/amscu_comp64.o and b/build_linux64/objstore/amscu_comp64.o differ diff --git a/build_linux64/objstore/amscu_cudafunctions.o b/build_linux64/objstore/amscu_cudafunctions.o index c8def50..0b9561a 100644 Binary files a/build_linux64/objstore/amscu_cudafunctions.o and b/build_linux64/objstore/amscu_cudafunctions.o differ diff --git a/build_linux64/objstore/amscu_random.o b/build_linux64/objstore/amscu_random.o index c9163e1..5b69888 100644 Binary files a/build_linux64/objstore/amscu_random.o and b/build_linux64/objstore/amscu_random.o differ diff --git a/build_linux64/objstore/amscuarray.o b/build_linux64/objstore/amscuarray.o index 7b9bf27..777dd9e 100644 Binary files a/build_linux64/objstore/amscuarray.o and b/build_linux64/objstore/amscuarray.o differ diff --git a/build_linux64/objstore/amscuarray_dops.o b/build_linux64/objstore/amscuarray_dops.o index 25e8f42..280372b 100644 Binary files a/build_linux64/objstore/amscuarray_dops.o and b/build_linux64/objstore/amscuarray_dops.o differ diff --git a/build_linux64/objstore/amscugeom.o b/build_linux64/objstore/amscugeom.o index 7260fa5..3794d5d 100644 Binary files a/build_linux64/objstore/amscugeom.o and b/build_linux64/objstore/amscugeom.o differ diff --git a/build_linux64/objstore/amsculib2.o b/build_linux64/objstore/amsculib2.o index 838b8fa..e93abb5 100644 Binary files a/build_linux64/objstore/amsculib2.o and b/build_linux64/objstore/amsculib2.o differ diff --git a/build_linux64/objstore/amscumath.o b/build_linux64/objstore/amscumath.o index b947051..375cf4c 100644 Binary files a/build_linux64/objstore/amscumath.o and b/build_linux64/objstore/amscumath.o differ diff --git a/build_linux64/objstore/amscurarray.o b/build_linux64/objstore/amscurarray.o index 6e2bd50..de7d765 100644 Binary files a/build_linux64/objstore/amscurarray.o and b/build_linux64/objstore/amscurarray.o differ diff --git a/build_linux64/objstore/cuvect2.o b/build_linux64/objstore/cuvect2.o index 1a20557..3d1a0cc 100644 Binary files a/build_linux64/objstore/cuvect2.o and b/build_linux64/objstore/cuvect2.o differ diff --git a/build_linux64/objstore/cuvect2f.o b/build_linux64/objstore/cuvect2f.o index 9d85181..bf2666f 100644 Binary files a/build_linux64/objstore/cuvect2f.o and b/build_linux64/objstore/cuvect2f.o differ diff --git a/build_linux64/objstore/cuvect3.o b/build_linux64/objstore/cuvect3.o index 629ecd0..4251c08 100644 Binary files a/build_linux64/objstore/cuvect3.o and b/build_linux64/objstore/cuvect3.o differ diff --git a/build_linux64/objstore/cuvect3f.o b/build_linux64/objstore/cuvect3f.o index dbf481c..0dfc3e2 100644 Binary files a/build_linux64/objstore/cuvect3f.o and b/build_linux64/objstore/cuvect3f.o differ diff --git a/build_linux64/objstore/cuvect4.o b/build_linux64/objstore/cuvect4.o index 038e6a4..d5e8af5 100644 Binary files a/build_linux64/objstore/cuvect4.o and b/build_linux64/objstore/cuvect4.o differ diff --git a/build_linux64/objstore/cuvect4f.o b/build_linux64/objstore/cuvect4f.o index 78c5350..3a789cd 100644 Binary files a/build_linux64/objstore/cuvect4f.o and b/build_linux64/objstore/cuvect4f.o differ diff --git a/build_linux64/test b/build_linux64/test index aa56a4f..7862fc4 100644 Binary files a/build_linux64/test and b/build_linux64/test differ diff --git a/include/amsculib2/cuvect2f.hpp b/include/amsculib2/cuvect2f.hpp index 237f736..a0ba6ce 100644 --- a/include/amsculib2/cuvect2f.hpp +++ b/include/amsculib2/cuvect2f.hpp @@ -13,16 +13,16 @@ namespace amscuda __host__ __device__ cuvect2f(); __host__ __device__ ~cuvect2f(); - __host__ __device__ cuvect2f(float _x, float _y); + __host__ __device__ cuvect2f(const float &_x, const float &_y); - __host__ __device__ float& operator[](const int I); - __host__ __device__ const float& operator[](const int I) const; + __host__ __device__ float& operator[](const int &I); + __host__ __device__ const float& operator[](const int &I) const; - __host__ __device__ cuvect2f operator+(cuvect2f lhs); - __host__ __device__ cuvect2f operator-(cuvect2f lhs); - __host__ __device__ cuvect2f operator*(float lhs); - __host__ __device__ cuvect2f operator/(float lhs); - __host__ __device__ friend cuvect2f operator-(cuvect2f rhs); + __host__ __device__ cuvect2f operator+(const cuvect2f &lhs); + __host__ __device__ cuvect2f operator-(const cuvect2f &lhs); + __host__ __device__ cuvect2f operator*(const float &lhs); + __host__ __device__ cuvect2f operator/(const float &lhs); + __host__ __device__ friend cuvect2f operator-(const cuvect2f &rhs); }; class cumat2f @@ -49,11 +49,11 @@ namespace amscuda __host__ __device__ cumat2f inverse(); }; - __host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b); - __host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b); - __host__ __device__ float cuvect2f_norm(cuvect2f a); - __host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a); - __host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b); + __host__ __device__ float cuvect2f_dot(const cuvect2f &a, const cuvect2f &b); + __host__ __device__ float cuvect2f_cross(const cuvect2f &a, const cuvect2f &b); + __host__ __device__ float cuvect2f_norm(const cuvect2f &a); + __host__ __device__ cuvect2f cuvect2f_normalize(const cuvect2f &a); + __host__ __device__ cuvect2f cuvect2f_proj(const cuvect2f &a, const cuvect2f &b); //2x2 matrix operations //matrix order is assumed to be mat[I,J] = mat[I+3*J] @@ -74,7 +74,7 @@ namespace amscuda __host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c); // ret = a*b - __host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b); + __host__ __device__ cuvect2f mat2f_mult(float *mat2a, const cuvect2f &b); void test_cuvect2f_1(); diff --git a/include/amsculib2/cuvect3f.hpp b/include/amsculib2/cuvect3f.hpp index bc66d68..0e06f45 100644 --- a/include/amsculib2/cuvect3f.hpp +++ b/include/amsculib2/cuvect3f.hpp @@ -13,49 +13,82 @@ namespace amscuda __host__ __device__ cuvect3f(); __host__ __device__ ~cuvect3f(); - __host__ __device__ cuvect3f(float _x, float _y, float _z); + __host__ __device__ cuvect3f(const float &_x, const float &_y, const float &_z); - __host__ __device__ float& operator[](const int I); - __host__ __device__ const float& operator[](const int I) const; + __host__ __device__ float& operator[](const int &I); + __host__ __device__ const float& operator[](const int &I) const; + + __host__ __device__ cuvect3f operator+(const cuvect3f &rhs); + __host__ __device__ cuvect3f operator-(const cuvect3f &rhs); + __host__ __device__ cuvect3f operator*(const float &rhs); + __host__ __device__ cuvect3f operator/(const float &rhs); + __host__ __device__ friend cuvect3f operator-(const cuvect3f &rhs); + + __host__ __device__ cuvect3f& operator+=(const cuvect3f &rhs); + __host__ __device__ cuvect3f& operator-=(const cuvect3f &rhs); + __host__ __device__ cuvect3f& operator/=(const float &rhs); + __host__ __device__ cuvect3f& operator*=(const float &rhs); - __host__ __device__ cuvect3f operator+(cuvect3f lhs); - __host__ __device__ cuvect3f operator-(cuvect3f lhs); - __host__ __device__ cuvect3f operator*(float lhs); - __host__ __device__ cuvect3f operator/(float lhs); - __host__ __device__ friend cuvect3f operator-(cuvect3f rhs); }; class cumat3f { public: - float dat[9]; + float m00,m10,m20; //named references to force register use? + float m01,m11,m21; //switched to column-major-order to match GLSL/lapack + float m02,m12,m22; __host__ __device__ cumat3f(); __host__ __device__ ~cumat3f(); - __host__ __device__ float& operator[](const int I); - __host__ __device__ float& operator()(const int I, const int J); - __host__ __device__ float& at(const int I, const int J); + __host__ __device__ cumat3f( + const float & _m00, const float & _m01, const float & _m02, + const float & _m10, const float & _m11, const float & _m12, + const float & _m20, const float & _m21, const float & _m22 + ); - __host__ __device__ cumat3f operator+(cumat3f lhs); - __host__ __device__ cumat3f operator-(cumat3f lhs); - __host__ __device__ cumat3f operator*(float lhs); - __host__ __device__ cumat3f operator/(float lhs); - __host__ __device__ cuvect3f operator*(cuvect3f lhs); - __host__ __device__ cumat3f operator*(cumat3f lhs); - __host__ __device__ friend cumat3f operator-(cumat3f rhs); + __host__ __device__ float& operator[](const int &I); + __host__ __device__ float& operator()(const int &I, const int &J); + __host__ __device__ float& at(const int &I, const int &J); + + __host__ __device__ const float& operator[](const int &I) const; + __host__ __device__ const float& operator()(const int &I, const int &J) const; + __host__ __device__ const float& at(const int &I, const int &J) const; + + __host__ __device__ cumat3f operator+(const cumat3f &rhs); + __host__ __device__ cumat3f operator-(const cumat3f &rhs); + __host__ __device__ cumat3f operator*(const float &rhs); + __host__ __device__ cumat3f operator/(const float &rhs); + __host__ __device__ cuvect3f operator*(const cuvect3f &rhs); + __host__ __device__ cumat3f operator*(const cumat3f &rhs); + __host__ __device__ friend cumat3f operator-(const cumat3f &rhs); __host__ __device__ float det(); __host__ __device__ cumat3f transpose(); __host__ __device__ cumat3f inverse(); + + __host__ __device__ float* data(); //pointer to float[9] representation of matrix + __host__ __device__ const float* data() const; //pointer to float[9] representation of matrix + + //In place operations (to save GPU register use) + __host__ __device__ cumat3f& operator+=(const cumat3f &rhs); + __host__ __device__ cumat3f& operator-=(const cumat3f &rhs); + __host__ __device__ cumat3f& operator/=(const float &rhs); + __host__ __device__ cumat3f& operator*=(const float &rhs); + __host__ __device__ cumat3f& operator*=(const cumat3f &rhs); }; - __host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b); - __host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b); - __host__ __device__ float cuvect3f_norm(cuvect3f a); - __host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a); - __host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b); + __host__ __device__ float cuvect3f_dot(const cuvect3f &a,const cuvect3f &b); + __host__ __device__ cuvect3f cuvect3f_cross(const cuvect3f &a, const cuvect3f &b); + __host__ __device__ float cuvect3f_norm(const cuvect3f &a); + __host__ __device__ cuvect3f cuvect3f_normalize(const cuvect3f &a); + __host__ __device__ cuvect3f cuvect3f_proj(const cuvect3f &a, const cuvect3f &b); + + __host__ __device__ cumat3f hodge_dual(const cuvect3f &vin); + __host__ __device__ cuvect3f hodge_dual(const cumat3f &min); + __host__ __device__ cumat3f rotmat_from_axisangle(const cuvect3f &axis, const float &angle); + //3x3 matrix operations //matrix order is assumed to be mat[I,J] = mat[I+3*J] @@ -72,11 +105,11 @@ namespace amscuda //inverts a 3x3 (9 element) matrix __host__ __device__ void mat3f_inverse(float *mat3inout); - __host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin); + __host__ __device__ cuvect3f mat3f_mult(float *mat3in, const cuvect3f &cvin); __host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout); - __host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout); - __host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout); + __host__ __device__ void mat3f_hodgedual(const cuvect3f &vecin, float *matout); + __host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f &vecout); //returns direction cosine rotation matrix from axis and angle __host__ __device__ void mat3f_rot_from_axisangle(cuvect3f axis, float angle, float *matout); diff --git a/src/amsculib2/cuvect2f.cu b/src/amsculib2/cuvect2f.cu index 42c63db..9b53d79 100644 --- a/src/amsculib2/cuvect2f.cu +++ b/src/amsculib2/cuvect2f.cu @@ -15,27 +15,27 @@ namespace amscuda return; } - __host__ __device__ cuvect2f::cuvect2f(float _x, float _y) + __host__ __device__ cuvect2f::cuvect2f(const float &_x, const float &_y) { x = _x; y = _y; return; } - __host__ __device__ float& cuvect2f::operator[](const int I) + __host__ __device__ float& cuvect2f::operator[](const int &I) { if(I==0) return x; if(I==1) return y; return x; } - __host__ __device__ const float& cuvect2f::operator[](const int I) const + __host__ __device__ const float& cuvect2f::operator[](const int &I) const { if(I==0) return x; if(I==1) return y; return x; } - __host__ __device__ cuvect2f cuvect2f::operator+(cuvect2f lhs) + __host__ __device__ cuvect2f cuvect2f::operator+(const cuvect2f &lhs) { cuvect2f ret; ret.x = x + lhs.x; @@ -43,14 +43,14 @@ namespace amscuda return ret; } - __host__ __device__ cuvect2f cuvect2f::operator-(cuvect2f lhs) + __host__ __device__ cuvect2f cuvect2f::operator-(const cuvect2f &lhs) { cuvect2f ret; ret.x = x - lhs.x; ret.y = y - lhs.y; return ret; } - __host__ __device__ cuvect2f cuvect2f::operator*(float lhs) + __host__ __device__ cuvect2f cuvect2f::operator*(const float &lhs) { cuvect2f ret; ret.x = x*lhs; @@ -58,7 +58,7 @@ namespace amscuda return ret; } - __host__ __device__ cuvect2f cuvect2f::operator/(float lhs) + __host__ __device__ cuvect2f cuvect2f::operator/(const float &lhs) { cuvect2f ret; ret.x = x/lhs; @@ -66,25 +66,25 @@ namespace amscuda return ret; } - __host__ __device__ float cuvect2f_dot(cuvect2f a, cuvect2f b) + __host__ __device__ float cuvect2f_dot(const cuvect2f &a, const cuvect2f &b) { float ret = a.x*b.x+a.y*b.y; return ret; } - __host__ __device__ float cuvect2f_cross(cuvect2f a, cuvect2f b) + __host__ __device__ float cuvect2f_cross(const cuvect2f &a, const cuvect2f &b) { float ret = a.x*b.y-a.y*b.x; return ret; } - __host__ __device__ float cuvect2f_norm(cuvect2f a) + __host__ __device__ float cuvect2f_norm(const cuvect2f &a) { float ret = ::sqrtf(a.x*a.x+a.y*a.y); return ret; } - __host__ __device__ cuvect2f cuvect2f_normalize(cuvect2f a) + __host__ __device__ cuvect2f cuvect2f_normalize(const cuvect2f &a) { cuvect2f ret; float m = cuvect2f_norm(a); @@ -99,7 +99,7 @@ namespace amscuda return ret; } - __host__ __device__ cuvect2f cuvect2f_proj(cuvect2f a, cuvect2f b) + __host__ __device__ cuvect2f cuvect2f_proj(const cuvect2f &a, const cuvect2f &b) { cuvect2f ret; cuvect2f bn = cuvect2f_normalize(b); @@ -343,7 +343,7 @@ __host__ __device__ void mat2f_mult(float *mat2a, float *mat2b, float *mat2c) } // ret = a*b -__host__ __device__ cuvect2f mat2f_mult(float *mat2a, cuvect2f b) +__host__ __device__ cuvect2f mat2f_mult(float *mat2a, const cuvect2f &b) { cuvect2f ret; ret.x = b.x*mat2a[0+0*2] + b.y*mat2a[1+0*2]; diff --git a/src/amsculib2/cuvect3f.cu b/src/amsculib2/cuvect3f.cu index 8be5f3d..309d6bb 100644 --- a/src/amsculib2/cuvect3f.cu +++ b/src/amsculib2/cuvect3f.cu @@ -15,7 +15,7 @@ namespace amscuda return; } - __host__ __device__ float& cuvect3f::operator[](const int I) + __host__ __device__ float& cuvect3f::operator[](const int &I) { if(I==0) return x; if(I==1) return y; @@ -23,7 +23,7 @@ namespace amscuda return x; } - __host__ __device__ const float& cuvect3f::operator[](const int I) const + __host__ __device__ const float& cuvect3f::operator[](const int &I) const { if(I==0) return x; if(I==1) return y; @@ -31,59 +31,92 @@ namespace amscuda return x; } - __host__ __device__ cuvect3f cuvect3f::operator+(cuvect3f lhs) + __host__ __device__ cuvect3f cuvect3f::operator+(const cuvect3f &rhs) { cuvect3f ret; - ret.x = x+lhs.x; - ret.y = y+lhs.y; - ret.z = z+lhs.z; + ret.x = x+rhs.x; + ret.y = y+rhs.y; + ret.z = z+rhs.z; return ret; } - __host__ __device__ cuvect3f cuvect3f::operator-(cuvect3f lhs) + __host__ __device__ cuvect3f cuvect3f::operator-(const cuvect3f &rhs) { cuvect3f ret; - ret.x = x-lhs.x; - ret.y = y-lhs.y; - ret.z = z-lhs.z; + ret.x = x-rhs.x; + ret.y = y-rhs.y; + ret.z = z-rhs.z; return ret; } - __host__ __device__ cuvect3f cuvect3f::operator*(float lhs) + __host__ __device__ cuvect3f cuvect3f::operator*(const float &rhs) { cuvect3f ret; - ret.x = x*lhs; - ret.y = y*lhs; - ret.z = z*lhs; + ret.x = x*rhs; + ret.y = y*rhs; + ret.z = z*rhs; return ret; } - __host__ __device__ cuvect3f cuvect3f::operator/(float lhs) + __host__ __device__ cuvect3f cuvect3f::operator/(const float &rhs) { cuvect3f ret; - ret.x = x/lhs; - ret.y = y/lhs; - ret.z = z/lhs; + ret.x = x/rhs; + ret.y = y/rhs; + ret.z = z/rhs; return ret; } - __host__ __device__ cuvect3f::cuvect3f(float _x, float _y, float _z) + __host__ __device__ cuvect3f& cuvect3f::operator+=(const cuvect3f &rhs) + { + x = x + rhs.x; + y = y + rhs.y; + z = z + rhs.z; + return *this; + } + + __host__ __device__ cuvect3f& cuvect3f::operator-=(const cuvect3f &rhs) + { + x = x - rhs.x; + y = y - rhs.y; + z = z - rhs.z; + return *this; + } + + __host__ __device__ cuvect3f& cuvect3f::operator*=(const float &rhs) + { + x = x * rhs; + y = y * rhs; + z = z * rhs; + return *this; + } + + __host__ __device__ cuvect3f& cuvect3f::operator/=(const float &rhs) + { + x = x / rhs; + y = y / rhs; + z = z / rhs; + return *this; + } + + + __host__ __device__ cuvect3f::cuvect3f(const float &_x, const float &_y, const float &_z) { x = _x; y = _y; z = _z; return; } - __host__ __device__ float cuvect3f_dot(cuvect3f a, cuvect3f b) + __host__ __device__ float cuvect3f_dot(const cuvect3f &a, const cuvect3f &b) { float ret = a.x*b.x+a.y*b.y+a.z*b.z; return ret; } - __host__ __device__ cuvect3f cuvect3f_cross(cuvect3f a, cuvect3f b) + __host__ __device__ cuvect3f cuvect3f_cross(const cuvect3f &a, const cuvect3f &b) { cuvect3f ret; ret[0] = a[1]*b[2]-a[2]*b[1]; @@ -93,14 +126,14 @@ namespace amscuda return ret; } - __host__ __device__ float cuvect3f_norm(cuvect3f a) + __host__ __device__ float cuvect3f_norm(const cuvect3f &a) { float ret; ret = ::sqrtf(a.x*a.x+a.y*a.y+a.z*a.z); return ret; } - __host__ __device__ cuvect3f cuvect3f_normalize(cuvect3f a) + __host__ __device__ cuvect3f cuvect3f_normalize(const cuvect3f &a) { cuvect3f ret; float m; @@ -117,7 +150,7 @@ namespace amscuda return ret; } - __host__ __device__ cuvect3f cuvect3f_proj(cuvect3f a, cuvect3f b) + __host__ __device__ cuvect3f cuvect3f_proj(const cuvect3f &a, const cuvect3f &b) { cuvect3f ret; cuvect3f bn = cuvect3f_normalize(b); @@ -126,6 +159,420 @@ namespace amscuda return ret; } + + + +__host__ __device__ cumat3f::cumat3f() +{ + m00 = 0.0f; + m01 = 0.0f; + m02 = 0.0f; + m10 = 0.0f; + m11 = 0.0f; + m12 = 0.0f; + m20 = 0.0f; + m21 = 0.0f; + m22 = 0.0f; + + return; +} + +__host__ __device__ cumat3f::~cumat3f() +{ + m00 = 0.0f; + m01 = 0.0f; + m02 = 0.0f; + m10 = 0.0f; + m11 = 0.0f; + m12 = 0.0f; + m20 = 0.0f; + m21 = 0.0f; + m22 = 0.0f; + return; +} + +__host__ __device__ float& cumat3f::operator[](const int &I) +{ + if(I==0) return m00; + if(I==1) return m10; + if(I==2) return m20; + if(I==3) return m01; + if(I==4) return m11; + if(I==5) return m21; + if(I==6) return m02; + if(I==7) return m12; + if(I==8) return m22; + + return m00; +} + +__host__ __device__ float& cumat3f::operator()(const int &I, const int &J) +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + + +__host__ __device__ float& cumat3f::at(const int &I, const int &J) +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + +__host__ __device__ const float& cumat3f::operator[](const int &I) const +{ + if(I==0) return m00; + if(I==1) return m10; + if(I==2) return m20; + if(I==3) return m01; + if(I==4) return m11; + if(I==5) return m21; + if(I==6) return m02; + if(I==7) return m12; + if(I==8) return m22; + + return m00; +} + +__host__ __device__ const float& cumat3f::operator()(const int &I, const int &J) const +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + + +__host__ __device__ const float& cumat3f::at(const int &I, const int &J) const +{ + if(I==0 && J==0) return m00; + if(I==1 && J==0) return m10; + if(I==2 && J==0) return m20; + if(I==0 && J==1) return m01; + if(I==1 && J==1) return m11; + if(I==2 && J==1) return m21; + if(I==0 && J==2) return m02; + if(I==1 && J==2) return m12; + if(I==2 && J==2) return m22; + + return m00; +} + + +__host__ __device__ cumat3f cumat3f::operator+(const cumat3f &rhs) +{ + cumat3f ret; + ret.m00 = m00 + rhs.m00; + ret.m10 = m10 + rhs.m10; + ret.m20 = m20 + rhs.m20; + ret.m01 = m01 + rhs.m01; + ret.m11 = m11 + rhs.m11; + ret.m21 = m21 + rhs.m21; + ret.m02 = m02 + rhs.m02; + ret.m12 = m12 + rhs.m12; + ret.m22 = m22 + rhs.m22; + + + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator-(const cumat3f &rhs) +{ + cumat3f ret; + ret.m00 = m00 - rhs.m00; + ret.m10 = m10 - rhs.m10; + ret.m20 = m20 - rhs.m20; + ret.m01 = m01 - rhs.m01; + ret.m11 = m11 - rhs.m11; + ret.m21 = m21 - rhs.m21; + ret.m02 = m02 - rhs.m02; + ret.m12 = m12 - rhs.m12; + ret.m22 = m22 - rhs.m22; + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator*(const float &rhs) +{ + cumat3f ret; + ret.m00 = m00 * rhs; + ret.m10 = m10 * rhs; + ret.m20 = m20 * rhs; + ret.m01 = m01 * rhs; + ret.m11 = m11 * rhs; + ret.m21 = m21 * rhs; + ret.m02 = m02 * rhs; + ret.m12 = m12 * rhs; + ret.m22 = m22 * rhs; + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator/(const float &rhs) +{ + cumat3f ret; + ret.m00 = m00 / rhs; + ret.m10 = m10 / rhs; + ret.m20 = m20 / rhs; + ret.m01 = m01 / rhs; + ret.m11 = m11 / rhs; + ret.m21 = m21 / rhs; + ret.m02 = m02 / rhs; + ret.m12 = m12 / rhs; + ret.m22 = m22 / rhs; + return ret; +} + +__host__ __device__ cuvect3f cumat3f::operator*(const cuvect3f &rhs) +{ + cuvect3f ret; + + ret.x = m00*rhs.x + m01*rhs.y + m02*rhs.z; + ret.y = m10*rhs.x + m11*rhs.y + m12*rhs.z; + ret.z = m20*rhs.x + m21*rhs.y + m22*rhs.z; + + return ret; +} + +__host__ __device__ cumat3f cumat3f::operator*(const cumat3f &rhs) +{ + cumat3f ret; + + ret.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; + ret.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; + ret.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; + ret.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; + ret.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; + ret.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; + ret.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; + ret.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; + ret.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; + + return ret; +} + +__host__ __device__ float cumat3f::det() +{ + float ret = 0.0; + + ret += m00*m11*m22; + ret += m01*m12*m20; + ret += m02*m10*m21; + ret -= m20*m11*m02; + ret -= m21*m12*m00; + ret -= m22*m10*m01; + + return ret; +} + +__host__ __device__ cumat3f cumat3f::transpose() +{ + cumat3f ret; + + ret.m00 = m00; + ret.m01 = m10; + ret.m02 = m20; + ret.m10 = m01; + ret.m11 = m11; + ret.m12 = m21; + ret.m20 = m02; + ret.m21 = m12; + ret.m22 = m22; + + return ret; +} + +__host__ __device__ cumat3f cumat3f::inverse() +{ + cumat3f q; + float dt = det(); + if(dt!=0.0) + { + q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))/dt; + q(1,0) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))/dt; + q(2,0) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))/dt; + q(0,1) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))/dt; + q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))/dt; + q(2,1) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))/dt; + q(0,2) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))/dt; + q(1,2) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))/dt; + q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))/dt; + // q(0,0) = (at(1,1)*at(2,2)-at(1,2)*at(2,1))/dt; + // q(0,1) = -(at(1,0)*at(2,2)-at(1,2)*at(2,0))/dt; + // q(0,2) = (at(1,0)*at(2,1)-at(1,1)*at(2,0))/dt; + // q(1,0) = -(at(0,1)*at(2,2)-at(0,2)*at(2,1))/dt; + // q(1,1) = (at(0,0)*at(2,2)-at(0,2)*at(2,0))/dt; + // q(1,2) = -(at(0,0)*at(2,1)-at(0,1)*at(2,0))/dt; + // q(2,0) = (at(0,1)*at(1,2)-at(0,2)*at(1,1))/dt; + // q(2,1) = -(at(0,0)*at(1,2)-at(0,2)*at(1,0))/dt; + // q(2,2) = (at(0,0)*at(1,1)-at(0,1)*at(1,0))/dt; + // q = q.transpose(); + } + else + { + q(0,0) = inf; + q(0,1) = inf; + q(0,2) = inf; + q(1,0) = inf; + q(1,1) = inf; + q(1,2) = inf; + q(2,0) = inf; + q(2,1) = inf; + q(2,2) = inf; + } + + return q; +} + +__host__ __device__ cumat3f operator-(const cumat3f &rhs) +{ + cumat3f ret; + ret.m00 = -rhs.m00; + ret.m10 = -rhs.m10; + ret.m20 = -rhs.m20; + ret.m01 = -rhs.m01; + ret.m11 = -rhs.m11; + ret.m21 = -rhs.m21; + ret.m02 = -rhs.m02; + ret.m12 = -rhs.m12; + ret.m22 = -rhs.m22; + + return ret; +} + +__host__ __device__ cumat3f& cumat3f::operator+=(const cumat3f &rhs) +{ + m00 += rhs.m00; + m10 += rhs.m10; + m20 += rhs.m20; + m01 += rhs.m01; + m11 += rhs.m11; + m21 += rhs.m21; + m02 += rhs.m02; + m12 += rhs.m12; + m22 += rhs.m22; + + return *this; +} + +__host__ __device__ cumat3f& cumat3f::operator-=(const cumat3f &rhs) +{ + m00 -= rhs.m00; + m10 -= rhs.m10; + m20 -= rhs.m20; + m01 -= rhs.m01; + m11 -= rhs.m11; + m21 -= rhs.m21; + m02 -= rhs.m02; + m12 -= rhs.m12; + m22 -= rhs.m22; + + return *this; +} + + + +__host__ __device__ cumat3f& cumat3f::operator/=(const float &rhs) +{ + m00 /= rhs; + m10 /= rhs; + m20 /= rhs; + m01 /= rhs; + m11 /= rhs; + m21 /= rhs; + m02 /= rhs; + m12 /= rhs; + m22 /= rhs; + + return *this; +} + +__host__ __device__ cumat3f& cumat3f::operator*=(const float &rhs) +{ + m00 *= rhs; + m10 *= rhs; + m20 *= rhs; + m01 *= rhs; + m11 *= rhs; + m21 *= rhs; + m02 *= rhs; + m12 *= rhs; + m22 *= rhs; + + return *this; +} + +__host__ __device__ cumat3f& cumat3f::operator*=(const cumat3f &rhs) +{ + cumat3f tmp; + + tmp.m00 = m00*rhs.m00 + m01*rhs.m10 + m02*rhs.m20; + tmp.m01 = m00*rhs.m01 + m01*rhs.m11 + m02*rhs.m21; + tmp.m02 = m00*rhs.m02 + m01*rhs.m12 + m02*rhs.m22; + tmp.m10 = m10*rhs.m00 + m11*rhs.m10 + m12*rhs.m20; + tmp.m11 = m10*rhs.m01 + m11*rhs.m11 + m12*rhs.m21; + tmp.m12 = m10*rhs.m02 + m11*rhs.m12 + m12*rhs.m22; + tmp.m20 = m20*rhs.m00 + m21*rhs.m10 + m22*rhs.m20; + tmp.m21 = m20*rhs.m01 + m21*rhs.m11 + m22*rhs.m21; + tmp.m22 = m20*rhs.m02 + m21*rhs.m12 + m22*rhs.m22; + + (*this) = tmp; + + return *this; +} + +__host__ __device__ cumat3f::cumat3f( + const float & _m00, const float & _m01, const float & _m02, + const float & _m10, const float & _m11, const float & _m12, + const float & _m20, const float & _m21, const float & _m22 +) +{ + m00 = _m00; + m01 = _m01; + m02 = _m02; + m10 = _m10; + m11 = _m11; + m12 = _m12; + m20 = _m20; + m21 = _m21; + m22 = _m22; +} + +__host__ __device__ float* cumat3f::data() +{ + //pointer to float[9] representation of matrix + return (float*) this; +} + +__host__ __device__ const float* cumat3f::data() const +{ + //pointer to float[9] representation of matrix + return (const float*) this; +} + //transposes a 3x3 (9 element) matrix __host__ __device__ void mat3f_transpose(float *mat3inout) { @@ -202,7 +649,7 @@ __host__ __device__ void mat3f_inverse(float *mat3inout) return; } -__host__ __device__ cuvect3f mat3f_mult(float *mat3in, cuvect3f cvin) +__host__ __device__ cuvect3f mat3f_mult(float *mat3in, const cuvect3f &cvin) { int I,J; cuvect3f ret; @@ -253,7 +700,7 @@ __host__ __device__ void mat3f_mult(float *matina, float *matinb, float *matout) return; } -__host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout) +__host__ __device__ void mat3f_hodgedual(const cuvect3f &vecin, float *matout) { matout[0 + 0*3] = 0.0f; matout[1 + 0*3] = -vecin[2]; @@ -269,7 +716,7 @@ __host__ __device__ void mat3f_hodgedual(cuvect3f vecin, float *matout) return; } -__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f vecout) +__host__ __device__ void mat3f_hodgedual(float *matin, cuvect3f &vecout) { vecout[0] = 0.5*(matin[1 + 2*3] - matin[2 + 1*3]); vecout[1] = 0.5*(matin[2 + 0*3] - matin[0 + 2*3]); @@ -400,199 +847,69 @@ __host__ void test_cudavectf_logic1() return; } - - -__host__ __device__ cumat3f::cumat3f() -{ - int I; - for(I=0;I<9;I++) - { - dat[I] = 0.0; - } - return; -} - -__host__ __device__ cumat3f::~cumat3f() -{ - int I; - for(I=0;I<9;I++) - { - dat[I] = 0.0; - } - return; -} - -__host__ __device__ float& cumat3f::operator[](const int I) -{ - return dat[I]; -} - -__host__ __device__ float& cumat3f::operator()(const int I, const int J) -{ - return dat[I+3*J]; -} - -__host__ __device__ cumat3f cumat3f::operator+(cumat3f lhs) -{ - int I; - cumat3f ret; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I] + lhs.dat[I]; - } - return ret; -} - -__host__ __device__ cumat3f cumat3f::operator-(cumat3f lhs) -{ - int I; - cumat3f ret; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I] - lhs.dat[I]; - } - return ret; -} - -__host__ __device__ cumat3f cumat3f::operator*(float lhs) +__host__ __device__ cumat3f hodge_dual(const cuvect3f &vin) { cumat3f ret; - int I; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I]*lhs; - } - return ret; -} -__host__ __device__ cumat3f cumat3f::operator/(float lhs) -{ - cumat3f ret; - int I; - for(I=0;I<9;I++) - { - ret.dat[I] = this->dat[I]/lhs; - } - return ret; -} + ret.m00 = 0.0f; + ret.m11 = 0.0f; + ret.m22 = 0.0f; -__host__ __device__ float& cumat3f::at(const int I, const int J) -{ - return dat[I+3*J]; -} - + ret.m01 = vin.z; + ret.m12 = vin.x; + ret.m20 = vin.y; -__host__ __device__ cuvect3f cumat3f::operator*(cuvect3f lhs) -{ - cuvect3f ret = cuvect3f(0.0,0.0,0.0); - int I,J; - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - ret[I] = ret[I] + this->at(I,J)*lhs[J]; - } - } - return ret; -} - -__host__ __device__ cumat3f cumat3f::operator*(cumat3f lhs) -{ - cumat3f ret; - int I,J,K; - - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - ret(I,J) = 0.0; - for(K=0;K<3;K++) - { - ret(I,J) = ret(I,J) + this->at(I,K)*lhs(K,J); - } - } - } + ret.m10 = -vin.z; + ret.m21 = -vin.x; + ret.m02 = -vin.y; return ret; } -__host__ __device__ float cumat3f::det() -{ - float ret = 0.0; - - ret = ret + this->at(0,0)*this->at(1,1)*this->at(2,2); - ret = ret + this->at(0,1)*this->at(1,2)*this->at(2,0); - ret = ret + this->at(0,2)*this->at(1,0)*this->at(2,1); - ret = ret - this->at(0,0)*this->at(1,2)*this->at(2,1); - ret = ret - this->at(0,1)*this->at(1,0)*this->at(2,2); - ret = ret - this->at(0,2)*this->at(1,1)*this->at(2,0); - - return ret; -} - -__host__ __device__ cumat3f cumat3f::transpose() -{ - cumat3f q; - int I,J; - for(I=0;I<3;I++) - { - for(J=0;J<3;J++) - { - q.at(I,J) = this->at(J,I); - } - } - return q; -} - -__host__ __device__ cumat3f cumat3f::inverse() -{ - cumat3f q; - float dt = q.det(); - if(dt!=0.0) - { - q(0,0) = (this->at(1,1)*this->at(2,2)-this->at(1,2)*this->at(2,1))/dt; - q(0,1) = -(this->at(1,0)*this->at(2,2)-this->at(1,2)*this->at(2,0))/dt; - q(0,2) = (this->at(1,0)*this->at(2,1)-this->at(1,1)*this->at(2,0))/dt; - q(1,0) = -(this->at(0,1)*this->at(2,2)-this->at(0,2)*this->at(2,1))/dt; - q(1,1) = (this->at(0,0)*this->at(2,2)-this->at(0,2)*this->at(2,0))/dt; - q(1,2) = -(this->at(0,0)*this->at(2,1)-this->at(0,1)*this->at(2,0))/dt; - q(2,0) = (this->at(0,1)*this->at(1,2)-this->at(0,2)*this->at(1,1))/dt; - q(2,1) = -(this->at(0,0)*this->at(1,2)-this->at(0,2)*this->at(1,0))/dt; - q(2,2) = (this->at(0,0)*this->at(1,1)-this->at(0,1)*this->at(1,0))/dt; - - q = q.transpose(); - } - else - { - q(0,0) = inf; - q(0,1) = inf; - q(0,2) = inf; - q(1,0) = inf; - q(1,1) = inf; - q(1,2) = inf; - q(2,0) = inf; - q(2,1) = inf; - q(2,2) = inf; - } - - return q; -} - -__host__ __device__ cuvect3f operator-(cuvect3f rhs) +__host__ __device__ cuvect3f hodge_dual(const cumat3f &min) { cuvect3f ret; - ret[0] = -rhs[0]; - ret[1] = -rhs[1]; - ret[2] = -rhs[2]; + + ret.x = 0.5f*(min.m12 - min.m21); + ret.y = 0.5f*(min.m20 - min.m02); + ret.z = 0.5f*(min.m01 - min.m10); + return ret; } -__host__ __device__ cumat3f operator-(cumat3f rhs) +__host__ __device__ const cumat3f cumat3f_eye() { cumat3f ret; - int I; - for(I=0;I<9;I++) ret[I] = -rhs[I]; + ret.m00 = 1.0f; + ret.m11 = 1.0f; + ret.m22 = 1.0f; + return ret; } +__host__ __device__ const cumat3f cumat3f_zeros() +{ + cumat3f ret; + return ret; +} + +__host__ __device__ cumat3f rotmat_from_axisangle(const cuvect3f &axis, const float &angle) +{ + cumat3f ret = cumat3f_zeros(); + cumat3f H; + cuvect3f _naxis; + + _naxis = cuvect3f_normalize(axis); + H = hodge_dual(_naxis); + + ret += H*sinf(angle); + H = H*H; + ret -= H*cosf(angle); + ret += (H + cumat3f_eye()); + + return ret; +} + + + }; \ No newline at end of file