From ef979a1fd2d14f41c65aa4b15a5f1db390b7bb43 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 1 Mar 2020 14:11:08 +0300 Subject: [PATCH] For CPU and GPU structures in the darknet.h have the same size --- include/darknet.h | 27 +++++++++++++++++++++------ src/batchnorm_layer.c | 8 +++++--- src/blas.h | 2 +- src/blas_kernels.cu | 13 ++++++++----- src/darknet.c | 1 + 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index a13168da..429af213 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -34,6 +34,8 @@ #define SECRET_NUM -1234 +typedef enum { UNUSED_DEF_VAL } UNUSED_ENUM_TYPE; + #ifdef GPU #include @@ -42,8 +44,8 @@ #ifdef CUDNN #include -#endif -#endif +#endif // CUDNN +#endif // GPU #ifdef __cplusplus extern "C" { @@ -495,7 +497,7 @@ struct layer { size_t workspace_size; -#ifdef GPU +//#ifdef GPU int *indexes_gpu; float *z_gpu; @@ -610,8 +612,21 @@ struct layer { cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16; cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16; cudnnPoolingDescriptor_t poolingDesc; +#else // CUDNN + void* srcTensorDesc, *dstTensorDesc; + void* srcTensorDesc16, *dstTensorDesc16; + void* dsrcTensorDesc, *ddstTensorDesc; + void* dsrcTensorDesc16, *ddstTensorDesc16; + void* normTensorDesc, *normDstTensorDesc, *normDstTensorDescF16; + void* weightDesc, *weightDesc16; + void* dweightDesc, *dweightDesc16; + void* convDesc; + UNUSED_ENUM_TYPE fw_algo, fw_algo16; + UNUSED_ENUM_TYPE bd_algo, bd_algo16; + UNUSED_ENUM_TYPE bf_algo, bf_algo16; + void* poolingDesc; #endif // CUDNN -#endif // GPU +//#endif // GPU }; @@ -701,7 +716,7 @@ typedef struct network { float *cost; float clip; -#ifdef GPU +//#ifdef GPU //float *input_gpu; //float *truth_gpu; float *delta_gpu; @@ -722,7 +737,7 @@ typedef struct network { float *global_delta_gpu; float *state_delta_gpu; size_t max_delta_gpu_size; -#endif +//#endif // GPU int optimized_memory; size_t workspace_size_limit; } network; diff --git a/src/batchnorm_layer.c b/src/batchnorm_layer.c index 7d9b172d..8920d414 100644 --- a/src/batchnorm_layer.c +++ b/src/batchnorm_layer.c @@ -258,15 +258,17 @@ void forward_batchnorm_layer_gpu(layer l, network_state state) fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); //fast_v_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.v_cbn_gpu); - int minibatch_index = state.net.current_subdivision + 1; - float alpha = 0.01; + const int minibatch_index = state.net.current_subdivision + 1; + const int max_minibatch_index = state.net.subdivisions; + //printf("\n minibatch_index = %d, max_minibatch_index = %d \n", minibatch_index, max_minibatch_index); + const float alpha = 0.01; int inverse_variance = 0; #ifdef CUDNN inverse_variance = 1; #endif // CUDNN - fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu, + fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, max_minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu, alpha, l.rolling_mean_gpu, l.rolling_variance_gpu, inverse_variance, .00001); normalize_scale_bias_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.scales_gpu, l.biases_gpu, l.batch, l.out_c, l.out_h*l.out_w, inverse_variance, .00001f); diff --git a/src/blas.h b/src/blas.h index 252497a8..9cf665d8 100644 --- a/src/blas.h +++ b/src/blas.h @@ -87,7 +87,7 @@ void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *varianc void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); -void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance, +void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon); void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon); void compare_2_arrays_gpu(float *one, float *two, int size); diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index d7ad75ea..331b53e7 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -572,7 +572,7 @@ extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, } -__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance, +__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon) { const int threads = BLOCK; @@ -615,16 +615,19 @@ __global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int f if (inverse_variance) variance[filter] = 1.0f / sqrtf(variance_tmp + epsilon); else variance[filter] = variance_tmp; - rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter]; + //if (max_minibatch_index == minibatch_index) + { + rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter]; - rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter]; + rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter]; + } } } -extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, float *m_avg, float *v_avg, float *variance, +extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon) { - fast_v_cbn_kernel << > >(x, mean, batch, filters, spatial, minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon); + fast_v_cbn_kernel << > >(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon); CHECK_CUDA(cudaPeekAtLastError()); } diff --git a/src/darknet.c b/src/darknet.c index ec47b05a..d4f88b02 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -455,6 +455,7 @@ int main(int argc, char **argv) #ifndef GPU gpu_index = -1; + printf(" GPU isn't used \n"); init_cpu(); #else if(gpu_index >= 0){