77 lines
2.9 KiB
C++
77 lines
2.9 KiB
C++
#ifndef __AMSCUARRAY_DOPS_HPP__
|
|
#define __AMSCUARRAY_DOPS_HPP__
|
|
|
|
//Device Operations on Arrays
|
|
//
|
|
|
|
//Device Operations on Device Buffers
|
|
// dodb
|
|
|
|
namespace amscuda
|
|
{
|
|
|
|
|
|
//sum
|
|
template<typename T> T devcuarray_sum(cuarray<T> *devptr);
|
|
|
|
template<typename T> T dbuff_sum(T *devbuffer, int N);
|
|
|
|
|
|
struct dbuff_statstruct
|
|
{
|
|
public:
|
|
float min;
|
|
float max;
|
|
float mean;
|
|
float stdev;
|
|
float sum;
|
|
};
|
|
|
|
//stats (min,max,mean,stdev)
|
|
|
|
template<typename T> void dbuff_minmax(T *devbuffer, int N, T *min, T *max);
|
|
|
|
template<typename T> dbuff_statstruct dbuff_stats(T *devbuffer, int N); //
|
|
|
|
//sets all elements to setto
|
|
template<typename T> void dbuff_setall(T *devbuffer, int N, T setto, int nblocks, int nthreads);
|
|
|
|
//random device buffer functions
|
|
void dbuff_rand_dpr32(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
|
|
void dbuff_rand_dpr32n(float *devbuffer, int N, int32_t *rseedinout, int nblocks, int nthreads); //
|
|
|
|
|
|
void dbuff_rand_dpr64(float *devbuffer, int N, int64_t *rseedinout, int nblocks, int nthreads); //
|
|
|
|
//Elementwise device-buffer vector binary operation
|
|
//takes two input arrays ( , ) --> one output array
|
|
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 *dbuf_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
|
|
|
|
//Elementwise device-buffer vector two-parameter operation
|
|
//takes one input array, and a constant paramter ( ) ---> one output array
|
|
template<typename T1, typename T2, typename T3> void dbuff_vectorbinop(T1 *dbuf_a, T2 par_b, T3 *dbuf_out, int N, T3 (*fpnt)(T1,T2), int nblocks, int nthreads);
|
|
|
|
|
|
//vector_add
|
|
template<typename T> void dbuff_add(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_add(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_sub(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_sub(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_mult(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_mult(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_div(T *dbuff_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_div(T *dbuff_a, T par_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
template<typename T> void dbuff_div(T par_a, T *dbuff_b, T *dbuff_out, int N, int nblocks, int nthreads);
|
|
|
|
|
|
// Tests //
|
|
|
|
void test_dbuff_rand_dpr32();
|
|
|
|
};
|
|
|
|
#include <amsculib2/amscuarray_dops_impl.hpp>
|
|
|
|
#endif
|
|
|