diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 8cecbc2f..1db47fc1 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -231,10 +231,10 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_ // 2. Loss Scaling - required only for: activation gradients. We do not use. // 3. FP32 Master Copy of Weights // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops - CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH)); CHECK_CUDNN(cudnnSetConvolutionGroupCount(l->convDesc, l->groups)); + if (l->groups == 1) CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH)); #if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72) // cuDNN >= 7.2 - CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)); + //CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)); // reduces the speed of regular and group convolution #endif #else //if(CUDNN_MAJOR >= 7) if (l->groups > 1) {