Fixed bug for 32-bit compilation without GPU.

pull/1816/head
AlexeyAB 7 years ago
parent d487bdf471
commit 31ac46ba22
  1. 21
      build/darknet/darknet_no_gpu.vcxproj
  2. 2
      src/convolutional_layer.c
  3. 25
      src/gemm.c
  4. 10
      src/layer.c
  5. 22
      src/yolo_layer.c

@ -78,9 +78,15 @@
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>OPENCV;</UndefinePreprocessorDefinitions>
<ForcedIncludeFiles>stdlib.h;crtdbg.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>..\..\3rdparty\lib\x86\pthreadVC2.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -89,9 +95,10 @@
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_MBCS;OPENCV;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>CUDNN</UndefinePreprocessorDefinitions>
<PreprocessorDefinitions>_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>OPENCV;CUDNN</UndefinePreprocessorDefinitions>
<OpenMPSupport>true</OpenMPSupport>
<ForcedIncludeFiles>stdlib.h;crtdbg.h;%(ForcedIncludeFiles)</ForcedIncludeFiles>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
@ -111,15 +118,17 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<AdditionalIncludeDirectories>C:\opencv_2.4.9\opencv\build\include;..\..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>OPENCV;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<UndefinePreprocessorDefinitions>
</UndefinePreprocessorDefinitions>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>C:\opencv_2.4.9\opencv\build\x64\vc12\lib;$(CUDA_PATH)lib\$(PlatformName);$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>..\..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cudnn.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>..\..\3rdparty\lib\x86\pthreadVC2.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">

@ -615,7 +615,7 @@ void binary_align_weights(convolutional_layer *l)
binarize_weights(l->weights, m, k, l->binary_weights);
size_t align_weights_size = new_lda * m;
l->align_bit_weights_size = align_weights_size / 8;// +1;
l->align_bit_weights_size = align_weights_size / 8 + 1;
float *align_weights = calloc(align_weights_size, sizeof(float));
l->align_bit_weights = calloc(l->align_bit_weights_size, sizeof(char));

@ -1719,6 +1719,25 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
}
}
static inline int popcnt_64(uint64_t val64) {
#ifdef WIN32 // Windows
#ifdef _WIN64 // Windows 64-bit
int tmp_count = __popcnt64(val64);
#else // Windows 32-bit
int tmp_count = __popcnt(val64);
tmp_count += __popcnt(val64 >> 32);
#endif
#else // Linux
#ifdef __x86_64__ // Linux 64-bit
int tmp_count = __builtin_popcountll(val64);
#else // Linux 32-bit
int tmp_count = __builtin_popcount(val64);
tmp_count += __builtin_popcount(val64);
#endif
#endif
return tmp_count;
}
void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
unsigned char *A, int lda,
unsigned char *B, int ldb,
@ -1739,11 +1758,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
#ifdef WIN32
int tmp_count = __popcnt64(c_bit64);
#else
int tmp_count = __builtin_popcountll(c_bit64);
#endif
int tmp_count = popcnt_64(c_bit64);
if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits
count += tmp_count;

@ -33,8 +33,8 @@ void free_layer(layer l)
if (l.scale_updates) free(l.scale_updates);
if (l.weights) free(l.weights);
if (l.weight_updates) free(l.weight_updates);
if (l.weights) free(l.align_bit_weights);
if (l.weights) free(l.mean_arr);
if (l.align_bit_weights) free(l.align_bit_weights);
if (l.mean_arr) free(l.mean_arr);
if (l.delta) free(l.delta);
if (l.output) free(l.output);
if (l.squared) free(l.squared);
@ -84,6 +84,12 @@ void free_layer(layer l)
if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu);
if (l.x_gpu) cuda_free(l.x_gpu);
if (l.x_norm_gpu) cuda_free(l.x_norm_gpu);
if (l.align_bit_weights_gpu) cuda_free(l.align_bit_weights_gpu);
if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu);
if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu);
if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu);
if (l.weights_gpu) cuda_free(l.weights_gpu);
if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu);
if (l.weights_gpu16) cuda_free(l.weights_gpu16);

@ -374,17 +374,19 @@ int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh,
for(n = 0; n < l.n; ++n){
int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
float objectness = predictions[obj_index];
if(objectness <= thresh) continue;
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
dets[count].objectness = objectness;
dets[count].classes = l.classes;
for(j = 0; j < l.classes; ++j){
int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
float prob = objectness*predictions[class_index];
dets[count].prob[j] = (prob > thresh) ? prob : 0;
//if(objectness <= thresh) continue;
if (objectness > thresh) {
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
dets[count].objectness = objectness;
dets[count].classes = l.classes;
for (j = 0; j < l.classes; ++j) {
int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
float prob = objectness*predictions[class_index];
dets[count].prob[j] = (prob > thresh) ? prob : 0;
}
++count;
}
++count;
}
}
correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter);

Loading…
Cancel
Save