#ifndef __AMSCUARRAY_DOPS_HPP__ #define __AMSCUARRAY_DOPS_HPP__ //Device Operations on Arrays // //Device Operations on Device Buffers // dodb namespace amscuda { //sum template T devcuarray_sum(cuarray *devptr); template T dbuff_sum(T *devbuffer, int N); struct dbuff_statstruct { public: float min; float max; float mean; float stdev; float sum; }; //stats (min,max,mean,stdev) template void dbuff_minmax(T *devbuffer, int N, T *min, T *max); template dbuff_statstruct dbuff_stats(T *devbuffer, int N); // //sets all elements to setto template void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads); //random device buffer functions void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); // void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); // void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads); // //Elementwise device-buffer vector binary operation //takes two input arrays ( , ) --> one output array template void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads); //Elementwise device-buffer vector two-parameter operation //takes one input array, and a constant paramter ( ) ---> one output array template void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads); //vector_add template void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads); template void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads); // Tests // void test_dbuff_rand_dpr32(); }; #include #endif