This commit is contained in:
2026-04-10 13:29:50 -04:00
commit a8ed51a904
162 changed files with 19902 additions and 0 deletions

View File

@ -0,0 +1,76 @@
#ifndef __AMSCUARRAY_DOPS_HPP__
#define __AMSCUARRAY_DOPS_HPP__
//Device Operations on Arrays
//
//Device Operations on Device Buffers
// dodb
namespace amscuda
{
//sum
template<typename T> T devcuarray_sum(cuarray<T> *devptr);
template<typename T> T dbuff_sum(T *devbuffer, int N);
struct dbuff_statstruct
{
public:
float min;
float max;
float mean;
float stdev;
float sum;
};
//stats (min,max,mean,stdev)
template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max);
template<typename T> dbuff_statstruct dbuff_stats(T *devbuffer, int N); //
//sets all elements to setto
template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads);
//random device buffer functions
void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads); //
//Elementwise device-buffer vector binary operation
//takes two input arrays ( , ) --> one output array
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
//Elementwise device-buffer vector two-parameter operation
//takes one input array, and a constant paramter ( ) ---> one output array
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
//vector_add
template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
// Tests //
void test_dbuff_rand_dpr32();
};
#include <amsculib2/amscuarray_dops_impl.hpp>
#endif