From 9920410ba9cc756c46d6ee84f7b7a2a9fe941448 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 13 Jul 2017 14:36:54 +0300 Subject: [PATCH] minor fix --- build/darknet/darknet.vcxproj | 2 +- src/blas_kernels.cu | 4 ++++ src/convolutional_kernels.cu | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index b5fff824..8c154ed5 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -132,7 +132,7 @@ true true C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include - OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + CUDNN;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y CompileAsCpp diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index d9401766..79fc1c1d 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -223,6 +223,7 @@ __global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, local[id] += (i+id < spatial) ? delta[index] : 0; } } + __syncthreads(); if(id == 0){ mean_delta[filter] = 0; @@ -251,6 +252,7 @@ __global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; } } + __syncthreads(); if(id == 0){ variance_delta[filter] = 0; @@ -446,6 +448,7 @@ __global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, local[id] += (i+id < spatial) ? x[index] : 0; } } + __syncthreads(); if(id == 0){ mean[filter] = 0; @@ -474,6 +477,7 @@ __global__ void fast_variance_kernel(float *x, float *mean, int batch, int filt local[id] += (i+id < spatial) ? pow((x[index] - mean[filter]), 2) : 0; } } + __syncthreads(); if(id == 0){ variance[filter] = 0; diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 005269b0..03c9ab79 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -127,6 +127,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if(l.dot > 0) dot_error_gpu(l); if(l.binary || l.xnor) swap_binary(&l); + //cudaDeviceSynchronize(); // for correct profiling of performance } void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)