Fixed bug for 32-bit compilation without GPU.

pull/1816/head
AlexeyAB 7 years ago
parent d487bdf471
commit 31ac46ba22
  1. 21
      build/darknet/darknet_no_gpu.vcxproj
  2. 2
      src/convolutional_layer.c
  3. 25
      src/gemm.c
  4. 10
      src/layer.c
  5. 4
      src/yolo_layer.c

@ -78,9 +78,15 @@
<WarningLevel>Level3</WarningLevel> <WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>OPENCV;</UndefinePreprocessorDefinitions>
<ForcedIncludeFiles>stdlib.h;crtdbg.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
</ClCompile> </ClCompile>
<Link> <Link>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>..\..\3rdparty\lib\x86\pthreadVC2.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -89,9 +95,10 @@
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_MBCS;OPENCV;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions> <UndefinePreprocessorDefinitions>OPENCV;CUDNN</UndefinePreprocessorDefinitions>
<OpenMPSupport>true</OpenMPSupport> <OpenMPSupport>true</OpenMPSupport>
<ForcedIncludeFiles>stdlib.h;crtdbg.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
</ClCompile> </ClCompile>
<Link> <Link>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
@ -111,15 +118,17 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);</AdditionalIncludeDirectories>
<PreprocessorDefinitions>OPENCV;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>
</UndefinePreprocessorDefinitions>
</ClCompile> </ClCompile>
<Link> <Link>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding> <EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences> <OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>C:\opencv_2.4.9\opencv\build\x64\vc12\lib;$(CUDA_PATH)lib\$(PlatformName);$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>..\..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cudnn.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>..\..\3rdparty\lib\x86\pthreadVC2.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">

@ -615,7 +615,7 @@ void binary_align_weights(convolutional_layer *l)
binarize_weights(l->weights, m, k, l->binary_weights); binarize_weights(l->weights, m, k, l->binary_weights);
size_t align_weights_size = new_lda * m; size_t align_weights_size = new_lda * m;
l->align_bit_weights_size = align_weights_size / 8;// +1; l->align_bit_weights_size = align_weights_size / 8 + 1;
float *align_weights = calloc(align_weights_size, sizeof(float)); float *align_weights = calloc(align_weights_size, sizeof(float));
l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char)); l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char));

@ -1719,6 +1719,25 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
} }
} }
static inline int popcnt_64(uint64_t val64) {
#ifdef WIN32 // Windows
#ifdef _WIN64 // Windows 64-bit
int tmp_count = __popcnt64(val64);
#else // Windows 32-bit
int tmp_count = __popcnt(val64);
tmp_count += __popcnt(val64 >> 32);
#endif
#else // Linux
#ifdef __x86_64__ // Linux 64-bit
int tmp_count = __builtin_popcountll(val64);
#else // Linux 32-bit
int tmp_count = __builtin_popcount(val64);
tmp_count += __builtin_popcount(val64);
#endif
#endif
return tmp_count;
}
void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
unsigned char *A, int lda, unsigned char *A, int lda,
unsigned char *B, int ldb, unsigned char *B, int ldb,
@ -1739,11 +1758,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
#ifdef WIN32 int tmp_count = popcnt_64(c_bit64);
int tmp_count = __popcnt64(c_bit64);
#else
int tmp_count = __builtin_popcountll(c_bit64);
#endif
if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits
count += tmp_count; count += tmp_count;

@ -33,8 +33,8 @@ void free_layer(layer l)
if (l.scale_updates) free(l.scale_updates); if (l.scale_updates) free(l.scale_updates);
if (l.weights) free(l.weights); if (l.weights) free(l.weights);
if (l.weight_updates) free(l.weight_updates); if (l.weight_updates) free(l.weight_updates);
if (l.weights) free(l.align_bit_weights); if (l.align_bit_weights) free(l.align_bit_weights);
if (l.weights) free(l.mean_arr); if (l.mean_arr) free(l.mean_arr);
if (l.delta) free(l.delta); if (l.delta) free(l.delta);
if (l.output) free(l.output); if (l.output) free(l.output);
if (l.squared) free(l.squared); if (l.squared) free(l.squared);
@ -84,6 +84,12 @@ void free_layer(layer l)
if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
if (l.x_gpu) cuda_free(l.x_gpu); if (l.x_gpu) cuda_free(l.x_gpu);
if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
if (l.align_bit_weights_gpu) cuda_free(l.align_bit_weights_gpu);
if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu);
if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu);
if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);
if (l.weights_gpu) cuda_free(l.weights_gpu); if (l.weights_gpu) cuda_free(l.weights_gpu);
if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu);
if (l.weights_gpu16) cuda_free(l.weights_gpu16); if (l.weights_gpu16) cuda_free(l.weights_gpu16);

@ -374,7 +374,8 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh,
for(n = 0; n < l.n; ++n){ for(n = 0; n < l.n; ++n){
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
float objectness = predictions[obj_index]; float objectness = predictions[obj_index];
if(objectness <= thresh) continue; //if(objectness <= thresh) continue;
if (objectness > thresh) {
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
dets[count].objectness = objectness; dets[count].objectness = objectness;
@ -387,6 +388,7 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh,
++count; ++count;
} }
} }
}
correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter);
return count; return count;
} }

Loading…
Cancel
Save