|
|
@ -27,6 +27,7 @@ void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); |
|
|
|
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); |
|
|
|
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); |
|
|
|
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); |
|
|
|
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); |
|
|
|
void scal_cpu(int N, float ALPHA, float *X, int INCX); |
|
|
|
void scal_cpu(int N, float ALPHA, float *X, int INCX); |
|
|
|
|
|
|
|
void scal_add_cpu(int N, float ALPHA, float BETA, float *X, int INCX); |
|
|
|
void fill_cpu(int N, float ALPHA, float * X, int INCX); |
|
|
|
void fill_cpu(int N, float ALPHA, float * X, int INCX); |
|
|
|
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); |
|
|
|
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); |
|
|
|
void test_gpu_blas(); |
|
|
|
void test_gpu_blas(); |
|
|
@ -61,6 +62,7 @@ void simple_copy_ongpu(int size, float *src, float *dst); |
|
|
|
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY); |
|
|
|
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY); |
|
|
|
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); |
|
|
|
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); |
|
|
|
void scal_ongpu(int N, float ALPHA, float * X, int INCX); |
|
|
|
void scal_ongpu(int N, float ALPHA, float * X, int INCX); |
|
|
|
|
|
|
|
void scal_add_ongpu(int N, float ALPHA, float BETA, float * X, int INCX); |
|
|
|
void supp_ongpu(int N, float ALPHA, float * X, int INCX); |
|
|
|
void supp_ongpu(int N, float ALPHA, float * X, int INCX); |
|
|
|
void mask_gpu_new_api(int N, float * X, float mask_num, float * mask, float val); |
|
|
|
void mask_gpu_new_api(int N, float * X, float mask_num, float * mask, float val); |
|
|
|
void mask_ongpu(int N, float * X, float mask_num, float * mask); |
|
|
|
void mask_ongpu(int N, float * X, float mask_num, float * mask); |
|
|
|