Compile fix

pull/1724/head
AlexeyAB 7 years ago
parent 03e95320a1
commit 0224ba3d0d
  1. 12
      src/im2col_kernels.cu

@ -12,6 +12,9 @@ extern "C" {
#include <assert.h>
#include <cuda.h>
#define WARP_SIZE 32
// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
@ -66,7 +69,6 @@ void im2col_ongpu(float *im,
}
// --------------------------------
#define WARP_SIZE 32
__global__ void im2col_align_gpu_kernel(const int n, const float* data_im,
const int height, const int width, const int ksize,
@ -568,7 +570,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
int64_t B_cur_index = (j*ldb + k) / 8;
if (i >= M) A_cur_index = 0;
#pragma unroll WARP_SIZE
#pragma unroll
for (int t = 0; t < WARP_SIZE; ++t) {
const int lane_id = threadIdx.x % WARP_SIZE;
@ -598,7 +600,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
int64_t B_cur_index = (j*ldb + k) / 8;
if (i >= M) A_cur_index = 0;
#pragma unroll WARP_SIZE
#pragma unroll
for (int t = 0; t < WARP_SIZE; ++t) {
const int lane_id = threadIdx.x % WARP_SIZE;
@ -705,7 +707,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
int64_t B_cur_index = (local_j*ldb + k) / 8;
if (i >= M) A_cur_index = 0;
#pragma unroll WARP_SIZE
#pragma unroll
for (int t = 0; t < WARP_SIZE; ++t) {
const int lane_id = threadIdx.x % WARP_SIZE;
@ -735,7 +737,7 @@ __global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int
int64_t B_cur_index = (local_j*ldb + k) / 8;
if (i >= M) A_cur_index = 0;
#pragma unroll WARP_SIZE
#pragma unroll
for (int t = 0; t < WARP_SIZE; ++t) {
const int lane_id = threadIdx.x % WARP_SIZE;

Loading…
Cancel
Save