Increased 2.3 times performance of Yolo on CPU by using OpenMP in both darknet_no_gpu and yolo_cpp_dll_no_gpu

pull/5342/head
AlexeyAB 8 years ago
parent 37921caad3
commit 51477ab274
  1. 2
      build/darknet/darknet_no_gpu.vcxproj
  2. 1
      build/darknet/yolo_cpp_dll_no_gpu.vcxproj
  3. 21
      src/gemm.c

@ -89,6 +89,7 @@
<AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_MBCS;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>_MBCS;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions> <UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile> </ClCompile>
<Link> <Link>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
@ -133,6 +134,7 @@
<PrecompiledHeaderCompileAs>CompileAsCpp</PrecompiledHeaderCompileAs> <PrecompiledHeaderCompileAs>CompileAsCpp</PrecompiledHeaderCompileAs>
<CompileAs>Default</CompileAs> <CompileAs>Default</CompileAs>
<UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions> <UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile> </ClCompile>
<Link> <Link>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>

@ -139,6 +139,7 @@
<UndefinePreprocessorDefinitions> <UndefinePreprocessorDefinitions>
</UndefinePreprocessorDefinitions> </UndefinePreprocessorDefinitions>
<MultiProcessorCompilation>true</MultiProcessorCompilation> <MultiProcessorCompilation>true</MultiProcessorCompilation>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile> </ClCompile>
<Link> <Link>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>

@ -151,14 +151,19 @@ void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
C[i*ldc + j] *= BETA; C[i*ldc + j] *= BETA;
} }
} }
if(!TA && !TB)
gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); int t;
else if(TA && !TB) #pragma omp parallel for
gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); for (t = 0; t < M; ++t) {
else if(!TA && TB) if (!TA && !TB)
gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); gemm_nn(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc);
else else if (TA && !TB)
gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc); gemm_tn(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc);
else if (!TA && TB)
gemm_nt(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc);
else
gemm_tt(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc);
}
} }
#ifdef GPU #ifdef GPU

Loading…
Cancel
Save