Disabled CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION, and only if groups==1 use CUDNN_TENSOR_OP_MATH

5 years ago · e66d3b1bdb
parent dc7277f152
commit e66d3b1bdb
1 changed files with 2 additions and 2 deletions
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -231,10 +231,10 @@ void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_
    // 2. Loss Scaling - required only for: activation gradients. We do not use.
    // 3. FP32 Master Copy of Weights
    // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops
-    CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH));
    CHECK_CUDNN(cudnnSetConvolutionGroupCount(l->convDesc, l->groups));
+    if (l->groups == 1) CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH));
 #if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72)   // cuDNN >= 7.2
-    CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
+    //CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)); // reduces the speed of regular and group convolution
 #endif
 #else   //if(CUDNN_MAJOR >= 7)
    if (l->groups > 1) {