Compile fix

7 years ago · 0224ba3d0d
parent 03e95320a1
commit 0224ba3d0d
1 changed files with 7 additions and 5 deletions
--- a/src/im2col_kernels.cu
+++ b/src/im2col_kernels.cu
@ -12,6 +12,9 @@ extern "C" {
 #include <assert.h>
 #include <cuda.h>

+#define WARP_SIZE 32
+
+
 // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
 // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE

@ -66,7 +69,6 @@ void im2col_ongpu(float *im,
 }
 // --------------------------------

-#define WARP_SIZE 32

 __global__ void im2col_align_gpu_kernel(const int n, const float* data_im,
    const int height, const int width, const int ksize,
@ -568,7 +570,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
                int64_t B_cur_index = (j*ldb + k) / 8;
                if (i >= M) A_cur_index = 0;

-                #pragma unroll WARP_SIZE
+                #pragma unroll
                for (int t = 0; t < WARP_SIZE; ++t) {
                    const int lane_id = threadIdx.x % WARP_SIZE;

@ -598,7 +600,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
                int64_t B_cur_index = (j*ldb + k) / 8;
                if (i >= M) A_cur_index = 0;

-                #pragma unroll WARP_SIZE
+                #pragma unroll
                for (int t = 0; t < WARP_SIZE; ++t) {
                    const int lane_id = threadIdx.x % WARP_SIZE;

@ -705,7 +707,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
                int64_t B_cur_index = (local_j*ldb + k) / 8;
                if (i >= M) A_cur_index = 0;

-                #pragma unroll WARP_SIZE
+                #pragma unroll
                for (int t = 0; t < WARP_SIZE; ++t) {
                    const int lane_id = threadIdx.x % WARP_SIZE;

@ -735,7 +737,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
                int64_t B_cur_index = (local_j*ldb + k) / 8;
                if (i >= M) A_cur_index = 0;

-                #pragma unroll WARP_SIZE
+                #pragma unroll
                for (int t = 0; t < WARP_SIZE; ++t) {
                    const int lane_id = threadIdx.x % WARP_SIZE;