diff --git a/CMakeLists.txt b/CMakeLists.txt index d902e82f..6a5076ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,7 +82,7 @@ set(CMAKE_DEBUG_POSTFIX d) add_definitions(-DUSE_CMAKE_LIBS) if(MSVC) - set(CMAKE_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /fp:fast ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast ${CMAKE_CXX_FLAGS}") string(REGEX REPLACE "/O2" "/Ox" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) add_definitions(-D_CRT_RAND_S) add_definitions(-DNOMINMAX) @@ -138,15 +138,15 @@ if(ENABLE_CUDA) if (MSVC) if(CUDNN_FOUND) if(OpenCV_FOUND) - set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU,/DCUDNN,/DOPENCV\" ${CMAKE_CUDA_FLAGS}") + set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU,/DCUDNN,/DOPENCV\" ${CMAKE_CUDA_FLAGS}") else() - set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU,/DCUDNN\" ${CMAKE_CUDA_FLAGS}") + set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU,/DCUDNN\" ${CMAKE_CUDA_FLAGS}") endif() else() if(OpenCV_FOUND) - set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU,/DOPENCV\" ${CMAKE_CUDA_FLAGS}") + set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU,/DOPENCV\" ${CMAKE_CUDA_FLAGS}") else() - set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU\" ${CMAKE_CUDA_FLAGS}") + set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU\" ${CMAKE_CUDA_FLAGS}") endif() endif() else() @@ -166,6 +166,25 @@ if(ENABLE_CUDA) endif() endif() +set(ENABLE_ZED_CAMERA "TRUE" CACHE BOOL "Enable ZED Camera support") +if(ENABLE_CUDA) + if(ENABLE_ZED_CAMERA) + find_package(ZED 2 QUIET) + if(ZED_FOUND) + include_directories(${ZED_INCLUDE_DIRS}) + link_directories(${ZED_LIBRARY_DIR}) + add_definitions(-DZED_STEREO) + message("ZED Camera support enabled") + else() + message(WARNING "ZED SDK not found !") + set(ENABLE_ZED_CAMERA "FALSE" CACHE BOOL "Enable ZED Camera support" FORCE) + endif() + endif() +else() + message(WARNING "ZED SDK requires CUDA !") + set(ENABLE_ZED_CAMERA "FALSE" CACHE BOOL "Enable ZED Camera support" FORCE) +endif() + set(CMAKE_THREAD_PREFER_PTHREAD ON) find_package(Threads REQUIRED) if(MSVC) @@ -286,9 +305,14 @@ endif() target_link_libraries(darknet PRIVATE Threads::Threads) target_link_libraries(darklib PRIVATE Threads::Threads) +if(ENABLE_ZED_CAMERA) + target_link_libraries(darknet PRIVATE ${ZED_LIBRARIES}) + target_link_libraries(uselib PRIVATE ${ZED_LIBRARIES}) +endif() + if(ENABLE_CUDA) - target_link_libraries(darknet PRIVATE curand cublas) - target_link_libraries(darklib PRIVATE curand cublas) + target_link_libraries(darknet PRIVATE curand cublas cuda) + target_link_libraries(darklib PRIVATE curand cublas cuda) set_target_properties(darklib PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) endif() diff --git a/Makefile b/Makefile index dd6e6e2e..d184ca17 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ OPENCV=0 AVX=0 OPENMP=0 LIBSO=0 +ZED_CAMERA=0 # set GPU=1 and CUDNN=1 to speedup on GPU # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher @@ -111,9 +112,15 @@ CFLAGS+= -DCUDNN_HALF ARCH+= -gencode arch=compute_70,code=[sm_70,compute_70] endif -OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o -ifeq ($(GPU), 1) -LDFLAGS+= -lstdc++ +ifeq ($(ZED_CAMERA), 1) +CFLAGS+= -DZED_STEREO -I/usr/local/zed/include +LDFLAGS+= -L/usr/local/zed/lib -lsl_core -lsl_input -lsl_zed +#-lstdc++ -D_GLIBCXX_USE_CXX11_ABI=0 +endif + +OBJ=http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o +ifeq ($(GPU), 1) +LDFLAGS+= -lstdc++ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o endif diff --git a/README.md b/README.md index 245a0f06..9b740ae2 100644 --- a/README.md +++ b/README.md @@ -76,9 +76,9 @@ You can get cfg-files by path: `darknet/cfg/` ##### Examples of results -[![Everything Is AWESOME](http://img.youtube.com/vi/VOC3huqHrss/0.jpg)](https://www.youtube.com/watch?v=VOC3huqHrss "Everything Is AWESOME") +[![Yolo v3](http://img.youtube.com/vi/VOC3huqHrss/0.jpg)](https://www.youtube.com/watch?v=MPU2HistivI "Yolo v3") -Others: https://www.youtube.com/channel/UC7ev3hNVkx4DzZ3LO19oebg +Others: https://www.youtube.com/user/pjreddie/videos ### Improvements in this repository @@ -236,7 +236,8 @@ Then add to your created project: * file `darknet.h` from `\include` directory - (right click on project) -> properties -> Linker -> General -> Additional Library Directories, put here: -`C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;%(AdditionalLibraryDirectories)` +`C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;%(AdditionalLibraryDirectories)` + - (right click on project) -> properties -> Linker -> Input -> Additional dependecies, put here: `..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cudnn.lib;%(AdditionalDependencies)` @@ -278,7 +279,7 @@ Then add to your created project: (**Note:** To disable Loss-Window use flag `-dont_show`. If you are using CPU, try `darknet_no_gpu.exe` instead of `darknet.exe`.) -If required change pathes in the file `build\darknet\x64\data\voc.data` +If required change pathes in the file `build\darknet\cfg\voc.data` More information about training by the link: http://pjreddie.com/darknet/yolo/#train-voc @@ -290,7 +291,7 @@ More information about training by the link: http://pjreddie.com/darknet/yolo/#t 2. Then stop and by using partially-trained model `/backup/yolov3-voc_1000.weights` run training with multigpu (up to 4 GPUs): `darknet.exe detector train cfg/voc.data cfg/yolov3-voc.cfg /backup/yolov3-voc_1000.weights -gpus 0,1,2,3` -Only for small datasets sometimes better to decrease learning rate, for 4 GPUs set `learning_rate = 0.00025` (i.e. learning_rate = 0.001 / GPUs). In this case also increase 4x times `burn_in =` and `max_batches =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`. +Only for small datasets sometimes better to decrease learning rate, for 4 GPUs set `learning_rate = 0.00025` (i.e. learning_rate = 0.001 / GPUs). In this case also increase 4x times `burn_in =` and `max_batches =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`. Same goes for `steps=` if `policy=steps` is set. https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ @@ -344,11 +345,13 @@ Training Yolo v3: 5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark -It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: ` ` +It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: + +` ` Where: * `` - integer object number from `0` to `(classes-1)` - * ` ` - float values relative to width and height of image, it can be equal from (0.0 to 1.0] + * ` ` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]` * for example: ` = / ` or ` = / ` * atention: ` ` - are center of rectangle (are not top-left corner) diff --git a/build.ps1 b/build.ps1 index 1bed249c..1dfcc4fd 100755 --- a/build.ps1 +++ b/build.ps1 @@ -31,12 +31,16 @@ if ($vcpkg_triplet -Match "x86") { if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) { $vstype = "Professional" if (Test-Path "C:\Program Files (x86)\Microsoft Visual Studio\2017\${vstype}\Common7\Tools") { - Write-Host "Found VS 2017 Professional" } else { - $vstype = "Community" - Write-Host "Found VS 2017 Community" + $vstype = "Enterprise" + if (Test-Path "C:\Program Files (x86)\Microsoft Visual Studio\2017\${vstype}\Common7\Tools") { + } + else { + $vstype = "Community" + } } + Write-Host "Found VS 2017 ${vstype}" Push-Location "C:\Program Files (x86)\Microsoft Visual Studio\2017\${vstype}\Common7\Tools" cmd /c "VsDevCmd.bat -arch=x64 & set" | ForEach-Object { diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index 1858f063..f46efa15 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -96,7 +96,7 @@ true - C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) + C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) $(OutDir)\$(TargetName)$(TargetExt) ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies) true @@ -133,7 +133,7 @@ true true C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(CUDNN)\include;$(cudnn)\include - CUDNN_HALF;OPENCV;CUDNN;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + OPENCV;CUDNN_HALF;CUDNN;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y CompileAsCpp @@ -146,7 +146,7 @@ true true true - C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) + C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies) $(OutDir)\$(TargetName)$(TargetExt) @@ -188,8 +188,8 @@ - + @@ -251,7 +251,7 @@ - + diff --git a/build/darknet/x64/cfg/crnn.train.cfg b/build/darknet/x64/cfg/crnn.train.cfg index dcc12d74..e0e0b54c 100644 --- a/build/darknet/x64/cfg/crnn.train.cfg +++ b/build/darknet/x64/cfg/crnn.train.cfg @@ -11,8 +11,16 @@ policy=steps steps=1000,1500 scales=.1,.1 -[rnn] +try_fix_nan=1 + +[connected] +output=256 +activation=leaky + +[crnn] batch_normalize=1 +size=1 +pad=0 output = 1024 hidden=1024 activation=leaky diff --git a/build/darknet/yolo_console_dll.vcxproj b/build/darknet/yolo_console_dll.vcxproj index a965fcaa..c93844b7 100644 --- a/build/darknet/yolo_console_dll.vcxproj +++ b/build/darknet/yolo_console_dll.vcxproj @@ -51,8 +51,7 @@ MultiByte - - + @@ -93,7 +92,7 @@ _CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) - C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib + C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\x64\;$(ZED_SDK_ROOT_DIR)\lib @@ -117,7 +116,7 @@ true true true - ..\..\include;C:\opencv_source\opencv\bin\install\include + ..\..\include;C:\opencv_source\opencv\bin\install\include;$(CUDA_PATH)\include;$(ZED_SDK_ROOT_DIR)\include OPENCV;_CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) Async true @@ -125,7 +124,8 @@ true true - C:\opencv_source\opencv\bin\install\x64\vc14\lib;C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib + C:\opencv_source\opencv\bin\install\x64\vc14\lib;C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\x64\;$(ZED_SDK_ROOT_DIR)\lib + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) @@ -138,6 +138,5 @@ - - + \ No newline at end of file diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index 97ff65c6..16050e9e 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -100,9 +100,9 @@ true - $(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) + $(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) $(OutDir)\$(TargetName)$(TargetExt) - ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies) + ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies) true @@ -142,16 +142,15 @@ c++1y CompileAsCpp Default - - + OPENCV; true true true true - C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) - ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies) + C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) + ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies) $(OutDir)\$(TargetName)$(TargetExt) @@ -192,8 +191,8 @@ - + @@ -257,7 +256,7 @@ - + diff --git a/cfg/crnn.train.cfg b/cfg/crnn.train.cfg index dcc12d74..e0e0b54c 100644 --- a/cfg/crnn.train.cfg +++ b/cfg/crnn.train.cfg @@ -11,8 +11,16 @@ policy=steps steps=1000,1500 scales=.1,.1 -[rnn] +try_fix_nan=1 + +[connected] +output=256 +activation=leaky + +[crnn] batch_normalize=1 +size=1 +pad=0 output = 1024 hidden=1024 activation=leaky diff --git a/include/darknet.h b/include/darknet.h index 8ca8b493..0a1451e3 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -792,10 +792,11 @@ LIB_API void free_data(data d); LIB_API pthread_t load_data(load_args args); LIB_API pthread_t load_data_in_thread(load_args args); -// cuda.h +// dark_cuda.h LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); LIB_API void cuda_pull_array_async(float *x_gpu, float *x, size_t n); LIB_API void cuda_set_device(int n); +LIB_API void *cuda_get_context(); // utils.h LIB_API void free_ptrs(void **ptrs, int n); @@ -809,6 +810,8 @@ LIB_API metadata get_metadata(char *file); // http_stream.h +LIB_API void delete_json_sender(); +LIB_API void send_json_custom(char const* send_buf, int port, int timeout); LIB_API double get_time_point(); void start_timer(); void stop_timer(); diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp index efe2b5f3..bf3b28aa 100644 --- a/include/yolo_v2_class.hpp +++ b/include/yolo_v2_class.hpp @@ -25,6 +25,7 @@ struct bbox_t { unsigned int obj_id; // class of object - from range [0, classes-1] unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) unsigned int frames_counter; // counter of frames on which the object was detected + float x_3d, y_3d, z_3d; // center of object (in Meters) if ZED 3D Camera is used }; struct image_t { @@ -43,6 +44,10 @@ struct bbox_t_container { #include #include #include +#include +#include +#include +#include #ifdef OPENCV #include // C++ @@ -60,8 +65,8 @@ extern "C" LIB_API int get_device_name(int gpu, char* deviceName); class Detector { std::shared_ptr detector_gpu_ptr; std::deque> prev_bbox_vec_deque; - const int cur_gpu_id; public: + const int cur_gpu_id; float nms = .4; bool wait_stream; @@ -79,6 +84,11 @@ public: LIB_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, int const frames_story = 5, int const max_dist = 40); + LIB_API void *get_cuda_context(); + + LIB_API bool send_json_http(std::vector cur_bbox_vec, std::vector obj_names, int frame_id, + std::string filename = std::string(), int timeout = 400000, int port = 8070); + std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false) { if (img.data == NULL) @@ -115,7 +125,10 @@ public: static std::shared_ptr mat_to_image(cv::Mat img_src) { cv::Mat img; - cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); + if (img_src.channels() == 4) cv::cvtColor(img_src, img, cv::COLOR_RGBA2BGR); + else if (img_src.channels() == 3) cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); + else if (img_src.channels() == 1) cv::cvtColor(img_src, img, cv::COLOR_GRAY2BGR); + else std::cerr << " Warning: img_src.channels() is not 1, 3 or 4. It is = " << img_src.channels() << std::endl; std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; }); std::shared_ptr ipl_small = std::make_shared(img); *image_ptr = ipl_to_image(ipl_small.get()); @@ -166,7 +179,7 @@ private: #endif // OPENCV }; - +// -------------------------------------------------------------------------------- #if defined(TRACK_OPTFLOW) && defined(OPENCV) && defined(GPU) @@ -183,7 +196,7 @@ public: const int flow_error; - Tracker_optflow(int _gpu_id = 0, int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : + Tracker_optflow(int _gpu_id = 0, int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) : gpu_count(cv::cuda::getCudaEnabledDeviceCount()), gpu_id(std::min(_gpu_id, gpu_count-1)), flow_error((_flow_error > 0)? _flow_error:(win_size*4)) { @@ -249,18 +262,32 @@ public: if (old_gpu_id != gpu_id) cv::cuda::setDevice(gpu_id); - if (src_mat.channels() == 3) { + if (src_mat.channels() == 1 || src_mat.channels() == 3 || src_mat.channels() == 4) { if (src_mat_gpu.cols == 0) { src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type()); src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1); } - update_cur_bbox_vec(_cur_bbox_vec); + if (src_mat.channels() == 1) { + src_mat_gpu.upload(src_mat, stream); + src_mat_gpu.copyTo(src_grey_gpu); + } + else if (src_mat.channels() == 3) { + src_mat_gpu.upload(src_mat, stream); + cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); + } + else if (src_mat.channels() == 4) { + src_mat_gpu.upload(src_mat, stream); + cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGRA2GRAY, 1, stream); + } + else { + std::cerr << " Warning: src_mat.channels() is not: 1, 3 or 4. It is = " << src_mat.channels() << " \n"; + return; + } - //src_grey_gpu.upload(src_mat, stream); // use BGR - src_mat_gpu.upload(src_mat, stream); - cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); } + update_cur_bbox_vec(_cur_bbox_vec); + if (old_gpu_id != gpu_id) cv::cuda::setDevice(old_gpu_id); } @@ -355,7 +382,7 @@ public: const int flow_error; - Tracker_optflow(int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) : + Tracker_optflow(int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) : flow_error((_flow_error > 0)? _flow_error:(win_size*4)) { sync_PyrLKOpticalFlow = cv::SparsePyrLKOpticalFlow::create(); @@ -396,12 +423,20 @@ public: void update_tracking_flow(cv::Mat new_src_mat, std::vector _cur_bbox_vec) { - if (new_src_mat.channels() == 3) { - - update_cur_bbox_vec(_cur_bbox_vec); - + if (new_src_mat.channels() == 1) { + src_grey = new_src_mat.clone(); + } + else if (new_src_mat.channels() == 3) { cv::cvtColor(new_src_mat, src_grey, CV_BGR2GRAY, 1); } + else if (new_src_mat.channels() == 4) { + cv::cvtColor(new_src_mat, src_grey, CV_BGRA2GRAY, 1); + } + else { + std::cerr << " Warning: new_src_mat.channels() is not: 1, 3 or 4. It is = " << new_src_mat.channels() << " \n"; + return; + } + update_cur_bbox_vec(_cur_bbox_vec); } @@ -416,6 +451,7 @@ public: if (src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols) { src_grey = dst_grey.clone(); + //std::cerr << " Warning: src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols \n"; return cur_bbox_vec; } @@ -611,56 +647,361 @@ public: } } }; -#endif // OPENCV -//extern "C" { -#endif // __cplusplus -/* - // C - wrappers - LIB_API void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id); - LIB_API void delete_detector(); - LIB_API bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size); - LIB_API bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size); - LIB_API bbox_t* detect(image_t img, int *result_size); - LIB_API image_t load_img(char *image_filename); - LIB_API void free_img(image_t m); +class track_kalman_t +{ + int track_id_counter; + std::chrono::steady_clock::time_point global_last_time; + float dT; -#ifdef __cplusplus -} // extern "C" +public: + int max_objects; // max objects for tracking + int min_frames; // min frames to consider an object as detected + const float max_dist; // max distance (in px) to track with the same ID + cv::Size img_size; // max value of x,y,w,h + + struct tst_t { + int track_id; + int state_id; + std::chrono::steady_clock::time_point last_time; + int detection_count; + tst_t() : track_id(-1), state_id(-1) {} + }; + std::vector track_id_state_id_time; + std::vector result_vec_pred; -static std::shared_ptr c_detector_ptr; -static std::vector c_result_vec; + struct one_kalman_t; + std::vector kalman_vec; -void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id) { - c_detector_ptr = std::make_shared(cfg_filename, weight_filename, gpu_id); -} + struct one_kalman_t + { + cv::KalmanFilter kf; + cv::Mat state; + cv::Mat meas; + int measSize, stateSize, contrSize; + + void set_delta_time(float dT) { + kf.transitionMatrix.at(2) = dT; + kf.transitionMatrix.at(9) = dT; + } -void delete_detector() { c_detector_ptr.reset(); } + void set(bbox_t box) + { + initialize_kalman(); + + kf.errorCovPre.at(0) = 1; // px + kf.errorCovPre.at(7) = 1; // px + kf.errorCovPre.at(14) = 1; + kf.errorCovPre.at(21) = 1; + kf.errorCovPre.at(28) = 1; // px + kf.errorCovPre.at(35) = 1; // px + + state.at(0) = box.x; + state.at(1) = box.y; + state.at(2) = 0; + state.at(3) = 0; + state.at(4) = box.w; + state.at(5) = box.h; + // <<<< Initialization + + kf.statePost = state; + } -bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size) { - c_result_vec = static_cast(c_detector_ptr.get())->detect(img, thresh, use_mean); - *result_size = c_result_vec.size(); - return c_result_vec.data(); -} + // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre); + // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) + void correct(bbox_t box) { + meas.at(0) = box.x; + meas.at(1) = box.y; + meas.at(2) = box.w; + meas.at(3) = box.h; + + kf.correct(meas); + + bbox_t new_box = predict(); + if (new_box.w == 0 || new_box.h == 0) { + set(box); + //std::cerr << " force set(): track_id = " << box.track_id << + // ", x = " << box.x << ", y = " << box.y << ", w = " << box.w << ", h = " << box.h << std::endl; + } + } -bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size) { - c_result_vec = static_cast(c_detector_ptr.get())->detect_resized(img, init_w, init_h, thresh, use_mean); - *result_size = c_result_vec.size(); - return c_result_vec.data(); -} + // Kalman.predict() calculates: statePre = TransitionMatrix * statePost; + // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) + bbox_t predict() { + bbox_t box; + state = kf.predict(); + + box.x = state.at(0); + box.y = state.at(1); + box.w = state.at(4); + box.h = state.at(5); + return box; + } -bbox_t* detect(image_t img, int *result_size) { - return detect_custom(img, 0.24, true, result_size); -} + void initialize_kalman() + { + kf = cv::KalmanFilter(stateSize, measSize, contrSize, CV_32F); + + // Transition State Matrix A + // Note: set dT at each processing step! + // [ 1 0 dT 0 0 0 ] + // [ 0 1 0 dT 0 0 ] + // [ 0 0 1 0 0 0 ] + // [ 0 0 0 1 0 0 ] + // [ 0 0 0 0 1 0 ] + // [ 0 0 0 0 0 1 ] + cv::setIdentity(kf.transitionMatrix); + + // Measure Matrix H + // [ 1 0 0 0 0 0 ] + // [ 0 1 0 0 0 0 ] + // [ 0 0 0 0 1 0 ] + // [ 0 0 0 0 0 1 ] + kf.measurementMatrix = cv::Mat::zeros(measSize, stateSize, CV_32F); + kf.measurementMatrix.at(0) = 1.0f; + kf.measurementMatrix.at(7) = 1.0f; + kf.measurementMatrix.at(16) = 1.0f; + kf.measurementMatrix.at(23) = 1.0f; + + // Process Noise Covariance Matrix Q - result smoother with lower values (1e-2) + // [ Ex 0 0 0 0 0 ] + // [ 0 Ey 0 0 0 0 ] + // [ 0 0 Ev_x 0 0 0 ] + // [ 0 0 0 Ev_y 0 0 ] + // [ 0 0 0 0 Ew 0 ] + // [ 0 0 0 0 0 Eh ] + //cv::setIdentity(kf.processNoiseCov, cv::Scalar(1e-3)); + kf.processNoiseCov.at(0) = 1e-2; + kf.processNoiseCov.at(7) = 1e-2; + kf.processNoiseCov.at(14) = 1e-2;// 5.0f; + kf.processNoiseCov.at(21) = 1e-2;// 5.0f; + kf.processNoiseCov.at(28) = 5e-3; + kf.processNoiseCov.at(35) = 5e-3; + + // Measures Noise Covariance Matrix R - result smoother with higher values (1e-1) + cv::setIdentity(kf.measurementNoiseCov, cv::Scalar(1e-1)); + + //cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2)); + // <<<< Kalman Filter + + set_delta_time(0); + } -image_t load_img(char *image_filename) { - return static_cast(c_detector_ptr.get())->load_image(image_filename); -} -void free_img(image_t m) { - static_cast(c_detector_ptr.get())->free_image(m); -} + + one_kalman_t(int _stateSize = 6, int _measSize = 4, int _contrSize = 0) : + kf(_stateSize, _measSize, _contrSize, CV_32F), measSize(_measSize), stateSize(_stateSize), contrSize(_contrSize) + { + state = cv::Mat(stateSize, 1, CV_32F); // [x,y,v_x,v_y,w,h] + meas = cv::Mat(measSize, 1, CV_32F); // [z_x,z_y,z_w,z_h] + //cv::Mat procNoise(stateSize, 1, type) + // [E_x,E_y,E_v_x,E_v_y,E_w,E_h] + + initialize_kalman(); + } + }; + // ------------------------------------------ + + + + track_kalman_t(int _max_objects = 1000, int _min_frames = 3, float _max_dist = 40, cv::Size _img_size = cv::Size(10000, 10000)) : + max_objects(_max_objects), min_frames(_min_frames), max_dist(_max_dist), img_size(_img_size), + track_id_counter(0) + { + kalman_vec.resize(max_objects); + track_id_state_id_time.resize(max_objects); + result_vec_pred.resize(max_objects); + } + + float calc_dt() { + dT = std::chrono::duration(std::chrono::steady_clock::now() - global_last_time).count(); + return dT; + } + + static float get_distance(float src_x, float src_y, float dst_x, float dst_y) { + return sqrtf((src_x - dst_x)*(src_x - dst_x) + (src_y - dst_y)*(src_y - dst_y)); + } + + void clear_old_states() { + // clear old bboxes + for (size_t state_id = 0; state_id < track_id_state_id_time.size(); ++state_id) + { + float time_sec = std::chrono::duration(std::chrono::steady_clock::now() - track_id_state_id_time[state_id].last_time).count(); + float time_wait = 0.5; // 0.5 second + if (track_id_state_id_time[state_id].track_id > -1) + { + if ((result_vec_pred[state_id].x > img_size.width) || + (result_vec_pred[state_id].y > img_size.height)) + { + track_id_state_id_time[state_id].track_id = -1; + } + + if (time_sec >= time_wait || track_id_state_id_time[state_id].detection_count < 0) { + //std::cerr << " remove track_id = " << track_id_state_id_time[state_id].track_id << ", state_id = " << state_id << std::endl; + track_id_state_id_time[state_id].track_id = -1; // remove bbox + } + } + } + } + + tst_t get_state_id(bbox_t find_box, std::vector &busy_vec) + { + tst_t tst; + tst.state_id = -1; + + float min_dist = std::numeric_limits::max(); + + for (size_t i = 0; i < max_objects; ++i) + { + if (track_id_state_id_time[i].track_id > -1 && result_vec_pred[i].obj_id == find_box.obj_id && busy_vec[i] == false) + { + bbox_t pred_box = result_vec_pred[i]; + + float dist = get_distance(pred_box.x, pred_box.y, find_box.x, find_box.y); + + float movement_dist = std::max(max_dist, static_cast(std::max(pred_box.w, pred_box.h)) ); + + if ((dist < movement_dist) && (dist < min_dist)) { + min_dist = dist; + tst.state_id = i; + } + } + } + + if (tst.state_id > -1) { + track_id_state_id_time[tst.state_id].last_time = std::chrono::steady_clock::now(); + track_id_state_id_time[tst.state_id].detection_count = std::max(track_id_state_id_time[tst.state_id].detection_count + 2, 10); + tst = track_id_state_id_time[tst.state_id]; + busy_vec[tst.state_id] = true; + } + else { + //std::cerr << " Didn't find: obj_id = " << find_box.obj_id << ", x = " << find_box.x << ", y = " << find_box.y << + // ", track_id_counter = " << track_id_counter << std::endl; + } + + return tst; + } + + tst_t new_state_id(std::vector &busy_vec) + { + tst_t tst; + // find empty cell to add new track_id + auto it = std::find_if(track_id_state_id_time.begin(), track_id_state_id_time.end(), [&](tst_t &v) { return v.track_id == -1; }); + if (it != track_id_state_id_time.end()) { + it->state_id = it - track_id_state_id_time.begin(); + //it->track_id = track_id_counter++; + it->track_id = 0; + it->last_time = std::chrono::steady_clock::now(); + it->detection_count = 1; + tst = *it; + busy_vec[it->state_id] = true; + } + + return tst; + } + + std::vector find_state_ids(std::vector result_vec) + { + std::vector tst_vec(result_vec.size()); + + std::vector busy_vec(max_objects, false); + + for (size_t i = 0; i < result_vec.size(); ++i) + { + tst_t tst = get_state_id(result_vec[i], busy_vec); + int state_id = tst.state_id; + int track_id = tst.track_id; + + // if new state_id + if (state_id < 0) { + tst = new_state_id(busy_vec); + state_id = tst.state_id; + track_id = tst.track_id; + if (state_id > -1) { + kalman_vec[state_id].set(result_vec[i]); + //std::cerr << " post: "; + } + } + + //std::cerr << " track_id = " << track_id << ", state_id = " << state_id << + // ", x = " << result_vec[i].x << ", det_count = " << tst.detection_count << std::endl; + + if (state_id > -1) { + tst_vec[i] = tst; + result_vec_pred[state_id] = result_vec[i]; + result_vec_pred[state_id].track_id = track_id; + } + } + + return tst_vec; + } + + std::vector predict() + { + clear_old_states(); + std::vector result_vec; + + for (size_t i = 0; i < max_objects; ++i) + { + tst_t tst = track_id_state_id_time[i]; + if (tst.track_id > -1) { + bbox_t box = kalman_vec[i].predict(); + + result_vec_pred[i].x = box.x; + result_vec_pred[i].y = box.y; + result_vec_pred[i].w = box.w; + result_vec_pred[i].h = box.h; + + if (tst.detection_count >= min_frames) + { + if (track_id_state_id_time[i].track_id == 0) { + track_id_state_id_time[i].track_id = ++track_id_counter; + result_vec_pred[i].track_id = track_id_counter; + } + + result_vec.push_back(result_vec_pred[i]); + } + } + } + //std::cerr << " result_vec.size() = " << result_vec.size() << std::endl; + + //global_last_time = std::chrono::steady_clock::now(); + + return result_vec; + } + + + std::vector correct(std::vector result_vec) + { + calc_dt(); + clear_old_states(); + + for (size_t i = 0; i < max_objects; ++i) + track_id_state_id_time[i].detection_count--; + + std::vector tst_vec = find_state_ids(result_vec); + + for (size_t i = 0; i < tst_vec.size(); ++i) { + tst_t tst = tst_vec[i]; + int state_id = tst.state_id; + if (state_id > -1) + { + kalman_vec[state_id].set_delta_time(dT); + kalman_vec[state_id].correct(result_vec_pred[state_id]); + } + } + + result_vec = predict(); + + global_last_time = std::chrono::steady_clock::now(); + + return result_vec; + } + +}; +// ---------------------------------------------- +#endif // OPENCV #endif // __cplusplus -*/ -#endif + +#endif // YOLO_V2_CLASS_HPP diff --git a/scripts/README.md b/scripts/README.md index 0c8327f0..36985f81 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -16,6 +16,12 @@ Cityscapes: https://www.cityscapes-dataset.com/ Object Tracking Benchmark: http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html +MOT (Multiple object tracking benchmark): https://motchallenge.net/ + +VOT (Visual object tracking): http://www.votchallenge.net/challenges.html + +FREE FLIR Thermal Dataset (infrared): https://www.flir.eu/oem/adas/adas-dataset-form/ + MARS: http://www.liangzheng.com.cn/Project/project_mars.html Market-1501: http://www.liangzheng.org/Project/project_reid.html @@ -30,7 +36,22 @@ Visual Question Answering: https://visualqa.org/download.html Large Movie Review Dataset: http://ai.stanford.edu/~amaas/data/sentiment/ +KITTI (for autonomous driving): http://www.cvlibs.net/datasets/kitti/ + +nuScenes (for autonomous driving): https://www.nuscenes.org/overview + +---- + +Wikipedia's List of datasets: https://en.wikipedia.org/wiki/List_of_datasets_for_machine-learning_research Other datasets (Music, Natural Images, Artificial Datasets, Faces, Text, Speech, Recommendation Systems, Misc): http://deeplearning.net/datasets/ -25 datasets: https://www.analyticsvidhya.com/blog/2018/03/comprehensive-collection-deep-learning-datasets/ \ No newline at end of file +25 datasets: https://www.analyticsvidhya.com/blog/2018/03/comprehensive-collection-deep-learning-datasets/ + +List of datasets: https://riemenschneider.hayko.at/vision/dataset/index.php + +Another list of datasets: http://homepages.inf.ed.ac.uk/rbf/CVonline/Imagedbase.htm + +Pedestrian DATASETs for Vision based Detection and Tracking: https://hemprasad.wordpress.com/2014/11/08/pedestrian-datasets-for-vision-based-detection-and-tracking/ + +TrackingNet: https://tracking-net.org/ \ No newline at end of file diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 0144ca51..6c9445a6 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -4,7 +4,7 @@ #include "cublas_v2.h" #include "activations.h" -#include "cuda.h" +#include "dark_cuda.h" __device__ float lhtan_activate_kernel(float x) diff --git a/src/activation_layer.c b/src/activation_layer.c index 2c323b8d..b8b5d023 100644 --- a/src/activation_layer.c +++ b/src/activation_layer.c @@ -1,6 +1,6 @@ #include "activation_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/activations.h b/src/activations.h index 4ecf97d7..4a382b2d 100644 --- a/src/activations.h +++ b/src/activations.h @@ -1,7 +1,7 @@ #ifndef ACTIVATIONS_H #define ACTIVATIONS_H #include "darknet.h" -#include "cuda.h" +#include "dark_cuda.h" #include "math.h" //typedef enum{ diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c index bae5ff4d..20838bbd 100644 --- a/src/avgpool_layer.c +++ b/src/avgpool_layer.c @@ -1,5 +1,5 @@ #include "avgpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) diff --git a/src/avgpool_layer.h b/src/avgpool_layer.h index f7679aa9..2277ec6d 100644 --- a/src/avgpool_layer.h +++ b/src/avgpool_layer.h @@ -2,7 +2,7 @@ #define AVGPOOL_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/avgpool_layer_kernels.cu b/src/avgpool_layer_kernels.cu index b38ba450..676faa69 100644 --- a/src/avgpool_layer_kernels.cu +++ b/src/avgpool_layer_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "avgpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) { diff --git a/src/blas.h b/src/blas.h index 12d4b9a3..8e91fff2 100644 --- a/src/blas.h +++ b/src/blas.h @@ -1,7 +1,7 @@ #ifndef BLAS_H #define BLAS_H #ifdef GPU -#include "cuda.h" +#include "dark_cuda.h" #include "tree.h" #endif #ifdef __cplusplus diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 98592c81..2070bc1f 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -4,7 +4,7 @@ #include #include "blas.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include "tree.h" diff --git a/src/captcha.c b/src/captcha.c index 0bb15b84..0cc15915 100644 --- a/src/captcha.c +++ b/src/captcha.c @@ -85,7 +85,7 @@ void train_captcha(char *cfgfile, char *weightfile) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if(i%100==0){ char buff[256]; diff --git a/src/cifar.c b/src/cifar.c index 04dec155..24a13a57 100644 --- a/src/cifar.c +++ b/src/cifar.c @@ -33,7 +33,7 @@ void train_cifar(char *cfgfile, char *weightfile) float loss = train_network_sgd(net, train, 1); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); if(*net.seen/N > epoch){ epoch = *net.seen/N; char buff[256]; @@ -89,7 +89,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile) float loss = train_network_sgd(net, train, 1); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); if(*net.seen/N > epoch){ epoch = *net.seen/N; char buff[256]; diff --git a/src/classifier.c b/src/classifier.c index 6207385d..961e4cbb 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -5,7 +5,7 @@ #include "blas.h" #include "assert.h" #include "classifier.h" -#include "cuda.h" +#include "dark_cuda.h" #ifdef WIN32 #include #include "gettimeofday.h" @@ -100,7 +100,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, args.exposure = net.exposure; args.saturation = net.saturation; args.hue = net.hue; - args.size = net.w; + args.size = net.w > net.h ? net.w : net.h; args.paths = paths; args.classes = classes; @@ -177,7 +177,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, draw_precision = 1; } - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); #ifdef OPENCV draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, "top5", dont_show, mjpeg_port); #endif // OPENCV @@ -198,7 +198,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, if (ngpus != 1) sync_nets(nets, ngpus, 0); #endif char buff[256]; - sprintf(buff, "%s/%s_last.weights", backup_directory, base, i); + sprintf(buff, "%s/%s_last.weights", backup_directory, base); save_weights(net, buff); } free_data(train); @@ -791,7 +791,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi int* indexes = (int*)calloc(top, sizeof(int)); char buff[256]; char *input = buff; - int size = net.w; + //int size = net.w; while(1){ if(filename){ strncpy(input, filename, 256); diff --git a/src/col2im_kernels.cu b/src/col2im_kernels.cu index e3b1d233..2f18a0fd 100644 --- a/src/col2im_kernels.cu +++ b/src/col2im_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "col2im.h" -#include "cuda.h" +#include "dark_cuda.h" // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE diff --git a/src/compare.c b/src/compare.c index 5c1e0451..bb842261 100644 --- a/src/compare.c +++ b/src/compare.c @@ -54,7 +54,7 @@ void train_compare(char *cfgfile, char *weightfile) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); + printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if(i%100 == 0){ char buff[256]; diff --git a/src/connected_layer.c b/src/connected_layer.c index 3520e914..242ab8fb 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -2,7 +2,7 @@ #include "batchnorm_layer.h" #include "convolutional_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 9625ddc6..8ae99710 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -2,12 +2,6 @@ #include "curand.h" #include "cublas_v2.h" -#ifdef CUDNN -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "cudnn.lib") -#endif -#endif - #include "convolutional_layer.h" #include "batchnorm_layer.h" #include "gemm.h" @@ -15,7 +9,7 @@ #include "im2col.h" #include "col2im.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" __global__ void binarize_kernel(float *x, int n, float *binary) @@ -598,6 +592,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) { + if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); if (!l.batch_normalize) diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 756fe917..84d36d93 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -8,12 +8,6 @@ #include #include -#ifdef CUDNN -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "cudnn.lib") -#endif -#endif - #ifdef AI2 #include "xnor_layer.h" #endif @@ -685,7 +679,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size) { void bit_to_float(unsigned char *src, float *dst, size_t size, size_t filters, float *mean_arr) { memset(dst, 0, size *sizeof(float)); - size_t i, src_i, src_shift; + size_t i; for (i = 0; i < size; ++i) { float mean_val = 1; @@ -732,7 +726,7 @@ void binary_align_weights(convolutional_layer *l) const int items_per_channel = l->size*l->size; for (i = 0; i < items_per_channel; ++i) { - uint32_t val = 0; + //uint32_t val = 0; int c_pack; for (c_pack = 0; c_pack < 32; ++c_pack) { float src = l->binary_weights[fil*items_per_filter + (chan + c_pack)*items_per_channel + i]; @@ -755,8 +749,8 @@ void binary_align_weights(convolutional_layer *l) //if (l->n >= 32) if(gpu_index >= 0) { - int M = l->n; - int N = l->out_w*l->out_h; + //int M = l->n; + //int N = l->out_w*l->out_h; //printf("\n M = %d, N = %d, M %% 8 = %d, N %% 8 = %d - weights \n", M, N, M % 8, N % 8); //printf("\n l.w = %d, l.c = %d, l.n = %d \n", l->w, l->c, l->n); for (i = 0; i < align_weights_size / 8; ++i) l->align_bit_weights[i] = ~(l->align_bit_weights[i]); @@ -812,7 +806,7 @@ size_t binary_transpose_align_input(int k, int n, float *b, char **t_bit_input, size_t t_bit_input_size = t_intput_size / 8;// +1; memset(*t_bit_input, 0, t_bit_input_size * sizeof(char)); - int src_size = k * bit_align; + //int src_size = k * bit_align; // b - [bit_align, k] - [l.bit_align, l.size*l.size*l.c] = src_size // t_input - [bit_align, k] - [n', k] @@ -868,8 +862,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) int ldb_align = l.lda_align; size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; - size_t t_intput_size = new_ldb * l.bit_align;// n; - size_t t_bit_input_size = t_intput_size / 8;// +1; + //size_t t_intput_size = new_ldb * l.bit_align;// n; + //size_t t_bit_input_size = t_intput_size / 8;// +1; int re_packed_input_size = l.c * l.w * l.h; memset(state.workspace, 0, re_packed_input_size * sizeof(float)); @@ -934,17 +928,18 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) //im2col_cpu_custom_align(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b, l.bit_align); im2col_cpu_custom_bin(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, state.workspace, l.bit_align); - size_t output_size = l.outputs; + //size_t output_size = l.outputs; //float *count_output = calloc(output_size, sizeof(float)); //size_t bit_output_size = output_size / 8 + 1; //char *bit_output = calloc(bit_output_size, sizeof(char)); - size_t intput_size = n * k; // (out_h*out_w) X (l.size*l.size*l.c) : after im2col() - size_t bit_input_size = intput_size / 8 + 1; + //size_t intput_size = n * k; // (out_h*out_w) X (l.size*l.size*l.c) : after im2col() + //size_t bit_input_size = intput_size / 8 + 1; //char *bit_input = calloc(bit_input_size, sizeof(char)); - size_t weights_size = k * m; //l.size*l.size*l.c*l.n; - size_t bit_weights_size = weights_size / 8 + 1; + //size_t weights_size = k * m; //l.size*l.size*l.c*l.n; + //size_t bit_weights_size = weights_size / 8 + 1; + //char *bit_weights = calloc(bit_weights_size, sizeof(char)); //float *mean_arr = calloc(l.n, sizeof(float)); diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index bad3b84e..da7b8feb 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -1,7 +1,7 @@ #ifndef CONVOLUTIONAL_LAYER_H #define CONVOLUTIONAL_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" #include "activations.h" #include "layer.h" @@ -51,6 +51,7 @@ image get_convolutional_image(convolutional_layer layer); image get_convolutional_delta(convolutional_layer layer); image get_convolutional_weight(convolutional_layer layer, int i); + int convolutional_out_height(convolutional_layer layer); int convolutional_out_width(convolutional_layer layer); void rescale_weights(convolutional_layer l, float scale, float trans); diff --git a/src/cost_layer.c b/src/cost_layer.c index 33fd8572..c967c29d 100644 --- a/src/cost_layer.c +++ b/src/cost_layer.c @@ -1,6 +1,6 @@ #include "cost_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include #include @@ -25,8 +25,9 @@ char *get_cost_string(COST_TYPE a) return "masked"; case SMOOTH: return "smooth"; + default: + return "sse"; } - return "sse"; } cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) diff --git a/src/crnn_layer.c b/src/crnn_layer.c index 0fc6da72..59cac509 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -1,7 +1,7 @@ #include "crnn_layer.h" #include "convolutional_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" @@ -268,16 +268,18 @@ void forward_crnn_layer_gpu(layer l, network_state state) layer input_layer = *(l.input_layer); layer self_layer = *(l.self_layer); layer output_layer = *(l.output_layer); - /* -#ifdef CUDNN_HALF -// slow and bad - s.index = state.index; - s.net = state.net; - cuda_convert_f32_to_f16(input_layer.weights_gpu, input_layer.c*input_layer.n*input_layer.size*input_layer.size, input_layer.weights_gpu16); - cuda_convert_f32_to_f16(self_layer.weights_gpu, self_layer.c*self_layer.n*self_layer.size*self_layer.size, self_layer.weights_gpu16); - cuda_convert_f32_to_f16(output_layer.weights_gpu, output_layer.c*output_layer.n*output_layer.size*output_layer.size, output_layer.weights_gpu16); + +/* +#ifdef CUDNN_HALF // slow and bad for training + if (!state.train && state.net.cudnn_half) { + s.index = state.index; + cuda_convert_f32_to_f16(input_layer.weights_gpu, input_layer.c*input_layer.n*input_layer.size*input_layer.size, input_layer.weights_gpu16); + cuda_convert_f32_to_f16(self_layer.weights_gpu, self_layer.c*self_layer.n*self_layer.size*self_layer.size, self_layer.weights_gpu16); + cuda_convert_f32_to_f16(output_layer.weights_gpu, output_layer.c*output_layer.n*output_layer.size*output_layer.size, output_layer.weights_gpu16); + } #endif //CUDNN_HALF - */ +*/ + if (state.train) { fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); diff --git a/src/crop_layer.c b/src/crop_layer.c index 258030be..092237f4 100644 --- a/src/crop_layer.c +++ b/src/crop_layer.c @@ -1,5 +1,5 @@ #include "crop_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include image get_crop_image(crop_layer l) diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu index 5b084fe5..46afca6c 100644 --- a/src/crop_layer_kernels.cu +++ b/src/crop_layer_kernels.cu @@ -4,7 +4,7 @@ #include "crop_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) diff --git a/src/cuda.c b/src/dark_cuda.c similarity index 95% rename from src/cuda.c rename to src/dark_cuda.c index 9e7745e2..5b57ea1d 100644 --- a/src/cuda.c +++ b/src/dark_cuda.c @@ -8,12 +8,22 @@ int gpu_index = 0; #ifdef GPU -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include "blas.h" #include "assert.h" #include #include +#include +#include + +#pragma comment(lib, "cuda.lib") + +#ifdef CUDNN +#ifndef USE_CMAKE_LIBS +#pragma comment(lib, "cudnn.lib") +#endif // USE_CMAKE_LIBS +#endif // CUDNN void cuda_set_device(int n) { @@ -30,6 +40,14 @@ int cuda_get_device() return n; } +void *cuda_get_context() +{ + CUcontext pctx; + CUresult status = cuCtxGetCurrent(&pctx); + if(status != CUDA_SUCCESS) fprintf(stderr, " Error: cuCtxGetCurrent() is failed \n"); + return (void *)pctx; +} + void check_error(cudaError_t status) { cudaError_t status2 = cudaGetLastError(); diff --git a/src/cuda.h b/src/dark_cuda.h similarity index 96% rename from src/cuda.h rename to src/dark_cuda.h index fe33a258..34614dc1 100644 --- a/src/cuda.h +++ b/src/dark_cuda.h @@ -18,11 +18,12 @@ extern int gpu_index; #define WARP_SIZE 32 #define BLOCK_TRANSPOSE32 256 +#include #include #include #include #include -#include +//#include #ifdef CUDNN #include @@ -87,4 +88,4 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line #else // GPU //LIB_API void cuda_set_device(int n); #endif // GPU -#endif // CUDA_H +#endif // DARKCUDA_H diff --git a/src/darknet.c b/src/darknet.c index bc7a7b7d..67c7d9a4 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -8,7 +8,7 @@ #include "darknet.h" #include "parser.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "connected_layer.h" @@ -479,7 +479,7 @@ int main(int argc, char **argv) float thresh = find_float_arg(argc, argv, "-thresh", .24); int ext_output = find_arg(argc, argv, "-ext_output"); char *filename = (argc > 4) ? argv[4]: 0; - test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, 1, 0, NULL); + test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL); } else if (0 == strcmp(argv[1], "cifar")){ run_cifar(argc, argv); } else if (0 == strcmp(argv[1], "go")){ diff --git a/src/darkunistd.h b/src/darkunistd.h index 1ba3ef2e..d2d555c3 100644 --- a/src/darkunistd.h +++ b/src/darkunistd.h @@ -21,7 +21,7 @@ These may be OR'd together. */ #define R_OK 4 /* Test for read permission. */ #define W_OK 2 /* Test for write permission. */ -#define X_OK R_OK /* execute permission - unsupported in Windows, \ +#define X_OK R_OK /* execute permission - unsupported in Windows, */ #define F_OK 0 /* Test for existence. */ #define access _access diff --git a/src/data.c b/src/data.c index e6fa95c5..a3612519 100644 --- a/src/data.c +++ b/src/data.c @@ -1,7 +1,7 @@ #include "data.h" #include "utils.h" #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include @@ -790,8 +790,8 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo d.X.vals = (float**)calloc(d.X.rows, sizeof(float*)); d.X.cols = h*w*c; - float r1, r2, r3, r4; - float dhue, dsat, dexp, flip; + float r1 = 0, r2 = 0, r3 = 0, r4 = 0; + float dhue = 0, dsat = 0, dexp = 0, flip = 0; int augmentation_calculated = 0; d.y = make_matrix(n, 5*boxes); @@ -890,8 +890,8 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo d.X.vals = (float**)calloc(d.X.rows, sizeof(float*)); d.X.cols = h*w*c; - float r1, r2, r3, r4; - float dhue, dsat, dexp, flip; + float r1 = 0, r2 = 0, r3 = 0, r4 = 0; + float dhue = 0, dsat = 0, dexp = 0, flip = 0; int augmentation_calculated = 0; d.y = make_matrix(n, 5 * boxes); diff --git a/src/deconvolutional_kernels.cu b/src/deconvolutional_kernels.cu index b0ba1a86..472b2247 100644 --- a/src/deconvolutional_kernels.cu +++ b/src/deconvolutional_kernels.cu @@ -9,7 +9,7 @@ #include "im2col.h" #include "col2im.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) { diff --git a/src/deconvolutional_layer.h b/src/deconvolutional_layer.h index 2e1c4527..bb15a429 100644 --- a/src/deconvolutional_layer.h +++ b/src/deconvolutional_layer.h @@ -1,7 +1,7 @@ #ifndef DECONVOLUTIONAL_LAYER_H #define DECONVOLUTIONAL_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" #include "activations.h" #include "layer.h" diff --git a/src/detection_layer.c b/src/detection_layer.c index 60fdc90d..64d133f9 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -3,7 +3,7 @@ #include "softmax_layer.h" #include "blas.h" #include "box.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include #include diff --git a/src/detector.c b/src/detector.c index e82cc069..62a8a078 100644 --- a/src/detector.c +++ b/src/detector.c @@ -326,7 +326,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i if (ngpus != 1) sync_nets(nets, ngpus, 0); #endif char buff[256]; - sprintf(buff, "%s/%s_last.weights", backup_directory, base, i); + sprintf(buff, "%s/%s_last.weights", backup_directory, base); save_weights(net, buff); } free_data(train); @@ -594,7 +594,7 @@ void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile) list *plist = get_paths(valid_images); char **paths = (char **)list_to_array(plist); - layer l = net.layers[net.n - 1]; + //layer l = net.layers[net.n - 1]; int j, k; @@ -681,16 +681,16 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa char *difficult_valid_images = option_find_str(options, "difficult", NULL); char *name_list = option_find_str(options, "names", "data/names.list"); char **names = get_labels(name_list); - char *mapf = option_find_str(options, "map", 0); - int *map = 0; - if (mapf) map = read_map(mapf); + //char *mapf = option_find_str(options, "map", 0); + //int *map = 0; + //if (mapf) map = read_map(mapf); FILE* reinforcement_fd = NULL; network net; - int initial_batch; + //int initial_batch; if (existing_net) { char *train_images = option_find_str(options, "train", "data/train.txt"); - char *valid_images = option_find_str(options, "valid", train_images); + valid_images = option_find_str(options, "valid", train_images); net = *existing_net; } else { @@ -923,6 +923,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa printf("\n detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count); + int* detection_per_class_count = (int*)calloc(classes, sizeof(int)); + for (j = 0; j < detections_count; ++j) { + detection_per_class_count[detections[j].class_id]++; + } + int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int)); int rank; @@ -945,7 +950,8 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa { truth_flags[d.unique_truth_index] = 1; pr[d.class_id][rank].tp++; // true-positive - } + } else + pr[d.class_id][rank].fp++; } else { pr[d.class_id][rank].fp++; // false-positive @@ -963,6 +969,10 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa if ((tp + fn) > 0) pr[i][rank].recall = (double)tp / (double)(tp + fn); else pr[i][rank].recall = 0; + + if (rank == (detections_count - 1) && detection_per_class_count[i] != (tp + fp)) { // check for last rank + printf(" class_id: %d - detections = %d, tp+fp = %d, tp = %d, fp = %d \n", i, detection_per_class_count[i], tp+fp, tp, fp); + } } } @@ -1014,6 +1024,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa free(pr); free(detections); free(truth_classes_count); + free(detection_per_class_count); fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); if (reinforcement_fd != NULL) fclose(reinforcement_fd); @@ -1033,7 +1044,6 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa return mean_average_precision; } -//#ifdef OPENCV typedef struct { float w, h; } anchors_t; @@ -1120,8 +1130,8 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int printf("\n"); for (i = 0; i < number_of_boxes; ++i) { - float w = boxes_data.vals[i][0] = rel_width_height_array[i * 2]; - float h = boxes_data.vals[i][1] = rel_width_height_array[i * 2 + 1]; + boxes_data.vals[i][0] = rel_width_height_array[i * 2]; + boxes_data.vals[i][1] = rel_width_height_array[i * 2 + 1]; //if (w > 410 || h > 410) printf("i:%d, w = %f, h = %f \n", i, w, h); } @@ -1159,7 +1169,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int float anchor_w = anchors_data.centers.vals[cluster_idx][0]; //centers->data.fl[cluster_idx * 2]; float anchor_h = anchors_data.centers.vals[cluster_idx][1]; //centers->data.fl[cluster_idx * 2 + 1]; if (best_iou > 1 || best_iou < 0) { // || box_w > width || box_h > height) { - printf(" Wrong label: i = %d, box_w = %d, box_h = %d, anchor_w = %d, anchor_h = %d, iou = %f \n", + printf(" Wrong label: i = %d, box_w = %f, box_h = %f, anchor_w = %f, anchor_h = %f, iou = %f \n", i, box_w, box_h, anchor_w, anchor_h, best_iou); } else avg_iou += best_iou; @@ -1235,6 +1245,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int cvCircle(img, pt, 1, CV_RGB(red_id, green_id, blue_id), CV_FILLED, 8, 0); //if(pt.x > img_size || pt.y > img_size) printf("\n pt.x = %d, pt.y = %d \n", pt.x, pt.y); } + save_cv_png(img, "cloud.png"); cvShowImage("clusters", img); cvWaitKey(0); cvReleaseImage(&img); @@ -1248,11 +1259,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int getchar(); } -//#else -//void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) { -// printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n"); -//} -//#endif // OPENCV + void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile) @@ -1276,7 +1283,6 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam if (net.layers[net.n - 1].classes > names_size) getchar(); } srand(2222222); - double time; char buff[256]; char *input = buff; char *json_buf = NULL; diff --git a/src/dice.c b/src/dice.c index 94155271..8a0393a8 100644 --- a/src/dice.c +++ b/src/dice.c @@ -33,7 +33,7 @@ void train_dice(char *cfgfile, char *weightfile) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); + printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); free_data(train); if((i % 100) == 0) net.learning_rate *= .1; if(i%100==0){ diff --git a/src/dropout_layer.c b/src/dropout_layer.c index 9eb22982..3c2abed4 100644 --- a/src/dropout_layer.c +++ b/src/dropout_layer.c @@ -1,6 +1,6 @@ #include "dropout_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include diff --git a/src/dropout_layer_kernels.cu b/src/dropout_layer_kernels.cu index f6a93c91..ceba0fa3 100644 --- a/src/dropout_layer_kernels.cu +++ b/src/dropout_layer_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "dropout_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) diff --git a/src/gemm.c b/src/gemm.c index b110da61..861a1906 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -1,7 +1,7 @@ #include "gemm.h" #include "utils.h" #include "im2col.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include #include @@ -321,7 +321,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, // is not used void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb) { - unsigned x, y, t; + unsigned int x, y; for (y = 0; y < 32; ++y) { for (x = 0; x < 32; ++x) { if (A[y * lda] & (1 << x)) B[x * ldb] |= (uint32_t)1 << y; @@ -400,7 +400,7 @@ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, in void transpose_8x8_bits_my(unsigned char *A, unsigned char *B, int lda, int ldb) { - unsigned x, y, t; + unsigned x, y; for (y = 0; y < 8; ++y) { for (x = 0; x < 8; ++x) { if (A[y * lda] & (1 << x)) B[x * ldb] |= 1 << y; @@ -755,7 +755,7 @@ void gemm_nn_fast(int M, int N, int K, float ALPHA, for (i = 0; i < (M / TILE_M)*TILE_M; i += TILE_M) { int j, k; - int i_d, j_d, k_d; + int i_d, k_d; for (k = 0; k < (K / TILE_K)*TILE_K; k += TILE_K) { @@ -768,8 +768,8 @@ void gemm_nn_fast(int M, int N, int K, float ALPHA, __m256 result256; __m256 a256_0, b256_0; // AVX __m256 a256_1, b256_1; // AVX - __m256 a256_2, b256_2; // AVX - __m256 a256_3, b256_3; // AVX + __m256 a256_2;// , b256_2; // AVX + __m256 a256_3;// , b256_3; // AVX __m256 c256_0, c256_1, c256_2, c256_3; __m256 c256_4, c256_5, c256_6, c256_7; @@ -943,8 +943,8 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, void convolution_2d_old(int w, int h, int ksize, int n, int c, int pad, int stride, float *weights, float *input, float *output) { - const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 - const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 + //const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 + //const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 int fil; // filter index @@ -991,8 +991,8 @@ void convolution_2d_old(int w, int h, int ksize, int n, int c, int pad, int stri void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, float *weights, float *input, float *output, float *mean) { - const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 - const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 + //const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 + //const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 int i; #if defined(_OPENMP) @@ -1203,7 +1203,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, float mean_val_0 = mean_arr[i + 0]; float mean_val_1 = mean_arr[i + 1]; int j, k; - __m256i all_1 = _mm256_set1_epi8(255); + //__m256i all_1 = _mm256_set1_epi8(255); //for (j = 0; j < N; ++j) for (j = 0; j < (N/2)*2; j += 2) @@ -1770,7 +1770,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size) memset(dst, 0, dst_size); size_t i; - __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); + //__m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); __m256 float_zero256 = _mm256_set1_ps(0.0); for (i = 0; i < size; i+=8) @@ -1881,8 +1881,8 @@ void forward_maxpool_layer_avx(float *src, float *dst, int *indexes, int size, i else if (size == 2 && stride == 2 && is_avx() == 1) { for (j = 0; j < out_w - 4; j += 4) { int out_index = j + out_w*(i + out_h*(k + c*b)); - float max = -FLT_MAX; - int max_i = -1; + //float max = -FLT_MAX; + //int max_i = -1; __m128 max128 = _mm_set1_ps(-FLT_MAX); for (n = 0; n < size; ++n) { @@ -2513,7 +2513,7 @@ void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, floa #pragma omp parallel for for (fil = 0; fil < n; ++fil) { float mean_val = mean_arr[fil]; - int chan, c_pack, y, x, f_y, f_x; + int chan, y, x, f_y, f_x; // c_pack // channel index for (chan = 0; chan < c / 32; ++chan) //for (chan = 0; chan < l.c; chan += 32) diff --git a/src/go.c b/src/go.c index 54a739d6..79529432 100644 --- a/src/go.c +++ b/src/go.c @@ -144,7 +144,7 @@ void train_go(char *cfgfile, char *weightfile) float loss = train_network_datum(net, board, move) / net.batch; if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); if(*net.seen/N > epoch){ epoch = *net.seen/N; char buff[256]; diff --git a/src/gru_layer.c b/src/gru_layer.c index eac751a5..29acdaa2 100644 --- a/src/gru_layer.c +++ b/src/gru_layer.c @@ -1,7 +1,7 @@ #include "gru_layer.h" #include "connected_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/http_stream.cpp b/src/http_stream.cpp index b911f22f..77096d2b 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -1,7 +1,8 @@ +#define _XOPEN_SOURCE #include "image.h" #include "http_stream.h" -#ifdef OPENCV + // // a single-threaded, multi client(using select), debug webserver - streaming out mjpg. // on win, _WIN32 has to be defined, must link against ws2_32.lib (socks on linux are for free) @@ -11,6 +12,8 @@ #include #include #include +#include +#include using std::cerr; using std::endl; @@ -87,24 +90,12 @@ static int close_socket(SOCKET s) { #endif // _WIN32 -#include -#include -#include -#include -#ifndef CV_VERSION_EPOCH -#include -#endif -using namespace cv; - - - -class MJPG_sender +class JSON_sender { SOCKET sock; SOCKET maxfd; fd_set master; int timeout; // master sock timeout, shutdown after timeout usec. - int quality; // jpeg compression [1..100] int close_all_sockets; int _write(int sock, char const*const s, int len) @@ -115,10 +106,9 @@ class MJPG_sender public: - MJPG_sender(int port = 0, int _timeout = 400000, int _quality = 30) + JSON_sender(int port = 0, int _timeout = 400000) : sock(INVALID_SOCKET) , timeout(_timeout) - , quality(_quality) { close_all_sockets = 0; FD_ZERO(&master); @@ -126,7 +116,7 @@ public: open(port); } - ~MJPG_sender() + ~JSON_sender() { close_all(); release(); @@ -143,8 +133,7 @@ public: void close_all() { close_all_sockets = 1; - cv::Mat tmp(cv::Size(10, 10), CV_8UC3); - write(tmp); + write("\n]"); // close JSON array } bool open(int port) @@ -165,12 +154,12 @@ public: #endif if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) { - cerr << "error MJPG_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; + cerr << "error JSON_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; return release(); } if (::listen(sock, 10) == SOCKET_ERROR) { - cerr << "error MJPG_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; + cerr << "error JSON_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; return release(); } FD_ZERO(&master); @@ -184,7 +173,7 @@ public: return sock != INVALID_SOCKET; } - bool write(const Mat & frame) + bool write(char const* outputbuf) { fd_set rread = master; struct timeval select_timeout = { 0, 0 }; @@ -192,14 +181,7 @@ public: if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0) return true; // nothing broken, there's just noone listening - std::vector outbuf; - std::vector params; - params.push_back(IMWRITE_JPEG_QUALITY); - params.push_back(quality); - cv::imencode(".jpg", frame, outbuf, params); //REMOVED FOR COMPATIBILITY - // https://docs.opencv.org/3.4/d4/da8/group__imgcodecs.html#ga292d81be8d76901bff7988d18d2b42ac - //std::cerr << "cv::imencode call disabled!" << std::endl; - size_t outlen = outbuf.size(); + int outlen = static_cast(strlen(outputbuf)); #ifdef _WIN32 for (unsigned i = 0; iclient ? maxfd : client); FD_SET(client, &master); @@ -239,60 +221,106 @@ public: "Expires: 0\r\n" "Cache-Control: no-cache, private\r\n" "Pragma: no-cache\r\n" - "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n" + "Content-Type: application/json\r\n" + //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n" "\r\n", 0); - cerr << "MJPG_sender: new client " << client << endl; + _write(client, "[\n", 0); // open JSON array + int n = _write(client, outputbuf, outlen); + cerr << "JSON_sender: new client " << client << endl; } else // existing client, just stream pix { - if (close_all_sockets) { - int result = close_socket(s); - cerr << "MJPG_sender: close clinet: " << result << " \n"; - continue; - } - - char head[400]; - sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen); - _write(s, head, 0); - int n = _write(s, (char*)(&outbuf[0]), outlen); - //cerr << "known client " << s << " " << n << endl; + //char head[400]; + // application/x-resource+json or application/x-collection+json - when you are representing REST resources and collections + // application/json or text/json or text/javascript or text/plain. + // https://stackoverflow.com/questions/477816/what-is-the-correct-json-content-type + //sprintf(head, "\r\nContent-Length: %zu\r\n\r\n", outlen); + //sprintf(head, "--boundary\r\nContent-Type: application/json\r\nContent-Length: %zu\r\n\r\n", outlen); + //_write(s, head, 0); + if (!close_all_sockets) _write(s, ", \n", 0); + int n = _write(s, outputbuf, outlen); if (n < outlen) { - cerr << "MJPG_sender: kill client " << s << endl; + cerr << "JSON_sender: kill client " << s << endl; ::shutdown(s, 2); FD_CLR(s, &master); } + + if (close_all_sockets) { + int result = close_socket(s); + cerr << "JSON_sender: close clinet: " << result << " \n"; + continue; + } } } if (close_all_sockets) { int result = close_socket(sock); - cerr << "MJPG_sender: close acceptor: " << result << " \n\n"; + cerr << "JSON_sender: close acceptor: " << result << " \n\n"; } return true; - } + } }; // ---------------------------------------- -void send_mjpeg(IplImage* ipl, int port, int timeout, int quality) +static std::unique_ptr js_ptr; +static std::mutex mtx; + +void delete_json_sender() +{ + std::lock_guard lock(mtx); + js_ptr.release(); +} + +void send_json_custom(char const* send_buf, int port, int timeout) { try { - static MJPG_sender wri(port, timeout, quality); - cv::Mat mat = cv::cvarrToMat(ipl); - wri.write(mat); - std::cout << " MJPEG-stream sent. \n"; + std::lock_guard lock(mtx); + if(!js_ptr) js_ptr.reset(new JSON_sender(port, timeout)); + + js_ptr->write(send_buf); } catch (...) { - cerr << " Error in send_mjpeg() function \n"; + cerr << " Error in send_json_custom() function \n"; + } +} + +void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout) +{ + try { + char *send_buf = detection_to_json(dets, nboxes, classes, names, frame_id, NULL); + + send_json_custom(send_buf, port, timeout); + std::cout << " JSON-stream sent. \n"; + + free(send_buf); + } + catch (...) { + cerr << " Error in send_json() function \n"; } } // ---------------------------------------- -class JSON_sender + +#ifdef OPENCV + +#include +#include +#include +#include +#ifndef CV_VERSION_EPOCH +#include +#endif +using namespace cv; + + + +class MJPG_sender { SOCKET sock; SOCKET maxfd; fd_set master; int timeout; // master sock timeout, shutdown after timeout usec. + int quality; // jpeg compression [1..100] int close_all_sockets; int _write(int sock, char const*const s, int len) @@ -303,9 +331,10 @@ class JSON_sender public: - JSON_sender(int port = 0, int _timeout = 400000) + MJPG_sender(int port = 0, int _timeout = 400000, int _quality = 30) : sock(INVALID_SOCKET) , timeout(_timeout) + , quality(_quality) { close_all_sockets = 0; FD_ZERO(&master); @@ -313,7 +342,7 @@ public: open(port); } - ~JSON_sender() + ~MJPG_sender() { close_all(); release(); @@ -330,7 +359,8 @@ public: void close_all() { close_all_sockets = 1; - write("\n]"); // close JSON array + cv::Mat tmp(cv::Size(10, 10), CV_8UC3); + write(tmp); } bool open(int port) @@ -351,12 +381,12 @@ public: #endif if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) { - cerr << "error JSON_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; + cerr << "error MJPG_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; return release(); } if (::listen(sock, 10) == SOCKET_ERROR) { - cerr << "error JSON_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; + cerr << "error MJPG_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; return release(); } FD_ZERO(&master); @@ -370,7 +400,7 @@ public: return sock != INVALID_SOCKET; } - bool write(char *outputbuf) + bool write(const Mat & frame) { fd_set rread = master; struct timeval select_timeout = { 0, 0 }; @@ -378,7 +408,14 @@ public: if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0) return true; // nothing broken, there's just noone listening - size_t outlen = strlen(outputbuf); + std::vector outbuf; + std::vector params; + params.push_back(IMWRITE_JPEG_QUALITY); + params.push_back(quality); + cv::imencode(".jpg", frame, outbuf, params); //REMOVED FOR COMPATIBILITY + // https://docs.opencv.org/3.4/d4/da8/group__imgcodecs.html#ga292d81be8d76901bff7988d18d2b42ac + //std::cerr << "cv::imencode call disabled!" << std::endl; + size_t outlen = outbuf.size(); #ifdef _WIN32 for (unsigned i = 0; iclient ? maxfd : client); FD_SET(client, &master); @@ -418,62 +455,52 @@ public: "Expires: 0\r\n" "Cache-Control: no-cache, private\r\n" "Pragma: no-cache\r\n" - "Content-Type: application/json\r\n" - //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n" + "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n" "\r\n", 0); - _write(client, "[\n", 0); // open JSON array - int n = _write(client, outputbuf, outlen); - cerr << "JSON_sender: new client " << client << endl; + cerr << "MJPG_sender: new client " << client << endl; } else // existing client, just stream pix { - //char head[400]; - // application/x-resource+json or application/x-collection+json - when you are representing REST resources and collections - // application/json or text/json or text/javascript or text/plain. - // https://stackoverflow.com/questions/477816/what-is-the-correct-json-content-type - //sprintf(head, "\r\nContent-Length: %zu\r\n\r\n", outlen); - //sprintf(head, "--boundary\r\nContent-Type: application/json\r\nContent-Length: %zu\r\n\r\n", outlen); - //_write(s, head, 0); - if(!close_all_sockets) _write(s, ", \n", 0); - int n = _write(s, outputbuf, outlen); + if (close_all_sockets) { + int result = close_socket(s); + cerr << "MJPG_sender: close clinet: " << result << " \n"; + continue; + } + + char head[400]; + sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen); + _write(s, head, 0); + int n = _write(s, (char*)(&outbuf[0]), outlen); + //cerr << "known client " << s << " " << n << endl; if (n < outlen) { - cerr << "JSON_sender: kill client " << s << endl; + cerr << "MJPG_sender: kill client " << s << endl; ::shutdown(s, 2); FD_CLR(s, &master); } - - if (close_all_sockets) { - int result = close_socket(s); - cerr << "JSON_sender: close clinet: " << result << " \n"; - continue; - } } } if (close_all_sockets) { int result = close_socket(sock); - cerr << "JSON_sender: close acceptor: " << result << " \n\n"; + cerr << "MJPG_sender: close acceptor: " << result << " \n\n"; } return true; } }; // ---------------------------------------- -void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout) +void send_mjpeg(IplImage* ipl, int port, int timeout, int quality) { try { - static JSON_sender js(port, timeout); - char *send_buf = detection_to_json(dets, nboxes, classes, names, frame_id, NULL); - - js.write(send_buf); - std::cout << " JSON-stream sent. \n"; - free(send_buf); + static MJPG_sender wri(port, timeout, quality); + cv::Mat mat = cv::cvarrToMat(ipl); + wri.write(mat); + std::cout << " MJPEG-stream sent. \n"; } catch (...) { - cerr << " Error in send_json() function \n"; + cerr << " Error in send_mjpeg() function \n"; } } - // ---------------------------------------- CvCapture* get_capture_video_stream(const char *path) { @@ -641,8 +668,7 @@ image load_image_resize(char *filename, int w, int h, int c, image *im) return out; } - -#endif // OPENCV +#endif // OPENCV // ----------------------------------------------------- diff --git a/src/im2col_kernels.cu b/src/im2col_kernels.cu index da952df5..539824a9 100644 --- a/src/im2col_kernels.cu +++ b/src/im2col_kernels.cu @@ -4,11 +4,10 @@ #include #include "im2col.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include -//#include template diff --git a/src/image.c b/src/image.c index dadaa1e5..72b4bdfb 100644 --- a/src/image.c +++ b/src/image.c @@ -1,7 +1,7 @@ #include "image.h" #include "utils.h" #include "blas.h" -#include "cuda.h" +#include "dark_cuda.h" #include #ifndef _USE_MATH_DEFINES #define _USE_MATH_DEFINES diff --git a/src/image.h b/src/image.h index be947d34..91c6fde7 100644 --- a/src/image.h +++ b/src/image.h @@ -96,6 +96,7 @@ image **load_alphabet(); image get_image_from_stream(CvCapture* cap); image get_image_from_stream_cpp(CvCapture* cap); image ipl_to_image(IplImage* src); +void save_cv_png(IplImage *img, const char *name); #endif //float get_pixel(image m, int x, int y, int c); //float get_pixel_extend(image m, int x, int y, int c); diff --git a/src/layer.c b/src/layer.c index 2361c1e0..ae87065f 100644 --- a/src/layer.c +++ b/src/layer.c @@ -1,5 +1,5 @@ #include "layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include void free_layer(layer l) diff --git a/src/local_layer.h b/src/local_layer.h index 198ca362..8ef0dbad 100644 --- a/src/local_layer.h +++ b/src/local_layer.h @@ -1,7 +1,7 @@ #ifndef LOCAL_LAYER_H #define LOCAL_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "image.h" #include "activations.h" #include "layer.h" diff --git a/src/lstm_layer.c b/src/lstm_layer.c index 2894d3d3..bf1e303b 100644 --- a/src/lstm_layer.c +++ b/src/lstm_layer.c @@ -1,7 +1,7 @@ #include "lstm_layer.h" #include "connected_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/matrix.c b/src/matrix.c index c7df22b6..edacb07b 100644 --- a/src/matrix.c +++ b/src/matrix.c @@ -257,7 +257,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers) void random_centers(matrix data, matrix centers) { - int i, j; + int i; int *s = sample(data.rows); for (i = 0; i < centers.rows; ++i) { copy(data.vals[s[i]], centers.vals[i], data.cols); @@ -281,7 +281,6 @@ int *sample(int n) float dist(float *x, float *y, int n) { - int i; //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]); float mw = (x[0] < y[0]) ? x[0] : y[0]; float mh = (x[1] < y[1]) ? x[1] : y[1]; diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index cf99e619..2e217459 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -1,5 +1,5 @@ #include "maxpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "gemm.h" #include @@ -32,8 +32,8 @@ void cudnn_maxpool_setup(layer *l) CUDNN_NOT_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN l->size, l->size, - 0, //l.pad, - 0, //l.pad, + l->pad/2, //0, //l.pad, + l->pad/2, //0, //l.pad, l->stride, l->stride); diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index ba6a2020..0a90c376 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -2,7 +2,7 @@ #define MAXPOOL_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 29aa257c..36fccf32 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -3,7 +3,7 @@ #include "cublas_v2.h" #include "maxpool_layer.h" -#include "cuda.h" +#include "dark_cuda.h" __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) { diff --git a/src/network.c b/src/network.c index 30f99c4d..cfc747cb 100644 --- a/src/network.c +++ b/src/network.c @@ -488,8 +488,8 @@ int resize_network(network *net, int w, int h) h = l.out_h; if(l.type == AVGPOOL) break; } - const int size = get_network_input_size(*net) * net->batch; #ifdef GPU + const int size = get_network_input_size(*net) * net->batch; if(gpu_index >= 0){ printf(" try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000); net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1); @@ -728,10 +728,10 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names, char *send_buf = (char *)calloc(1024, sizeof(char)); if (filename) { - sprintf(send_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename); + sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename); } else { - sprintf(send_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id); + sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"objects\": [ \n", frame_id); } int i, j; diff --git a/src/network_kernels.cu b/src/network_kernels.cu index a40dae0a..298372df 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -1,7 +1,4 @@ -//#include "cuda_runtime.h" -//#include "curand.h" -//#include "cublas_v2.h" -#include "cuda.h" +#include "dark_cuda.h" #include #include diff --git a/src/region_layer.c b/src/region_layer.c index 3221e77e..4610d637 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -2,7 +2,7 @@ #include "activations.h" #include "blas.h" #include "box.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include #include @@ -53,8 +53,10 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int void resize_region_layer(layer *l, int w, int h) { +#ifdef GPU int old_w = l->w; int old_h = l->h; +#endif l->w = w; l->h = h; diff --git a/src/reorg_layer.c b/src/reorg_layer.c index 47c5efa4..72d05773 100644 --- a/src/reorg_layer.c +++ b/src/reorg_layer.c @@ -1,5 +1,5 @@ #include "reorg_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/reorg_layer.h b/src/reorg_layer.h index b6ed379c..f2b90930 100644 --- a/src/reorg_layer.h +++ b/src/reorg_layer.h @@ -2,7 +2,7 @@ #define REORG_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/reorg_old_layer.c b/src/reorg_old_layer.c index defc034d..530da202 100644 --- a/src/reorg_old_layer.c +++ b/src/reorg_old_layer.c @@ -1,5 +1,5 @@ #include "reorg_old_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/reorg_old_layer.h b/src/reorg_old_layer.h index b66769b5..15c61f8d 100644 --- a/src/reorg_old_layer.h +++ b/src/reorg_old_layer.h @@ -2,7 +2,7 @@ #define REORG_OLD_LAYER_H #include "image.h" -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/rnn.c b/src/rnn.c index 93107585..5aee0863 100644 --- a/src/rnn.c +++ b/src/rnn.c @@ -163,7 +163,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int i = (*net.seen)/net.batch; int streams = batch/steps; - printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %d \n", batch, steps, streams, net.subdivisions, size); + printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %ld \n", batch, steps, streams, net.subdivisions, size); printf(" global_batch = %d \n", batch*net.subdivisions); size_t* offsets = (size_t*)calloc(streams, sizeof(size_t)); int j; diff --git a/src/rnn_layer.c b/src/rnn_layer.c index 22aade09..28163d75 100644 --- a/src/rnn_layer.c +++ b/src/rnn_layer.c @@ -1,7 +1,7 @@ #include "rnn_layer.h" #include "connected_layer.h" #include "utils.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include "gemm.h" diff --git a/src/route_layer.c b/src/route_layer.c index 8bd6817a..b502fbe7 100644 --- a/src/route_layer.c +++ b/src/route_layer.c @@ -1,5 +1,5 @@ #include "route_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index b84b72f2..06cd6056 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -1,5 +1,5 @@ #include "shortcut_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include #include diff --git a/src/softmax_layer.c b/src/softmax_layer.c index 3cd607be..9bbff9a0 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -1,6 +1,6 @@ #include "softmax_layer.h" #include "blas.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include "blas.h" diff --git a/src/tag.c b/src/tag.c index 4033216e..0942d503 100644 --- a/src/tag.c +++ b/src/tag.c @@ -64,7 +64,7 @@ void train_tag(char *cfgfile, char *weightfile, int clear) float loss = train_network(net, train); if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); free_data(train); if(*net.seen/N > epoch){ epoch = *net.seen/N; diff --git a/src/upsample_layer.c b/src/upsample_layer.c index d31dd49e..1a2783c2 100644 --- a/src/upsample_layer.c +++ b/src/upsample_layer.c @@ -1,5 +1,5 @@ #include "upsample_layer.h" -#include "cuda.h" +#include "dark_cuda.h" #include "blas.h" #include diff --git a/src/upsample_layer.h b/src/upsample_layer.h index 68aff329..4461cb15 100644 --- a/src/upsample_layer.h +++ b/src/upsample_layer.h @@ -1,6 +1,6 @@ #ifndef UPSAMPLE_LAYER_H #define UPSAMPLE_LAYER_H -#include "cuda.h" +#include "dark_cuda.h" #include "layer.h" #include "network.h" diff --git a/src/utils.c b/src/utils.c index b4d17c23..6afc187c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -252,6 +252,18 @@ void replace_image_to_label(const char* input_path, char* output_path) find_replace_extension(output_path, ".BMP", ".txt", output_path); find_replace_extension(output_path, ".ppm", ".txt", output_path); find_replace_extension(output_path, ".PPM", ".txt", output_path); + find_replace_extension(output_path, ".tiff", ".txt", output_path); + find_replace_extension(output_path, ".TIFF", ".txt", output_path); + + // Check file ends with txt: + if(strlen(output_path) > 4) { + char *output_path_ext = output_path + strlen(output_path) - 4; + if( strcmp(".txt", output_path_ext) != 0){ + fprintf(stderr, "Failed to infer label file name (check image extension is supported): %s \n", output_path); + } + }else{ + fprintf(stderr, "Label file name is too short: %s \n", output_path); + } } float sec(clock_t clocks) @@ -756,13 +768,12 @@ float rand_uniform(float min, float max) max = swap; } - if (RAND_MAX < 65536) { +#if (RAND_MAX < 65536) int rnd = rand()*(RAND_MAX + 1) + rand(); return ((float)rnd / (RAND_MAX*RAND_MAX) * (max - min)) + min; - } - else { +#else return ((float)rand() / RAND_MAX * (max - min)) + min; - } +#endif //return (random_float() * (max - min)) + min; } @@ -790,12 +801,12 @@ unsigned int random_gen() unsigned int rnd = 0; #ifdef WIN32 rand_s(&rnd); -#else +#else // WIN32 rnd = rand(); - if (RAND_MAX < 65536) { +#if (RAND_MAX < 65536) rnd = rand()*(RAND_MAX + 1) + rnd; - } -#endif +#endif //(RAND_MAX < 65536) +#endif // WIN32 return rnd; } diff --git a/src/writing.c b/src/writing.c index 4acda60a..e0d8019e 100644 --- a/src/writing.c +++ b/src/writing.c @@ -69,7 +69,7 @@ void train_writing(char *cfgfile, char *weightfile) if(avg_loss == -1) avg_loss = loss; avg_loss = avg_loss*.9 + loss*.1; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); + printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); free_data(train); if(get_current_batch(net)%100 == 0){ char buff[256]; diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index be84f4d5..b0e25e2d 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -5,21 +5,148 @@ #include #include #include +#include #include -#include // std::mutex, std::unique_lock -#include // std::condition_variable +#include // std::mutex, std::unique_lock +#include -// To use tracking - uncomment the following line. Tracking is supported only by OpenCV 3.x +// It makes sense only for video-Camera (not for video-File) +// To use - uncomment the following line. Optical-flow is supported only by OpenCV 3.x - 4.x //#define TRACK_OPTFLOW +//#define GPU + +// To use 3D-stereo camera ZED - uncomment the following line. ZED_SDK should be installed. +//#define ZED_STEREO -//#include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.1\include\cuda_runtime.h" -//#pragma comment(lib, "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/lib/x64/cudart.lib") -//static std::shared_ptr device_ptr(NULL, [](void *img) { cudaDeviceReset(); }); #include "yolo_v2_class.hpp" // imported functions from DLL #ifdef OPENCV +#ifdef ZED_STEREO +#include +#pragma comment(lib, "sl_core64.lib") +#pragma comment(lib, "sl_input64.lib") +#pragma comment(lib, "sl_zed64.lib") + +float getMedian(std::vector &v) { + size_t n = v.size() / 2; + std::nth_element(v.begin(), v.begin() + n, v.end()); + return v[n]; +} + +std::vector get_3d_coordinates(std::vector bbox_vect, cv::Mat xyzrgba) +{ + bool valid_measure; + int i, j; + const unsigned int R_max_global = 10; + + std::vector bbox3d_vect; + + for (auto &cur_box : bbox_vect) { + + const unsigned int obj_size = std::min(cur_box.w, cur_box.h); + const unsigned int R_max = std::min(R_max_global, obj_size / 2); + int center_i = cur_box.x + cur_box.w * 0.5f, center_j = cur_box.y + cur_box.h * 0.5f; + + std::vector x_vect, y_vect, z_vect; + for (int R = 0; R < R_max; R++) { + for (int y = -R; y <= R; y++) { + for (int x = -R; x <= R; x++) { + i = center_i + x; + j = center_j + y; + sl::float4 out(NAN, NAN, NAN, NAN); + if (i >= 0 && i < xyzrgba.cols && j >= 0 && j < xyzrgba.rows) { + cv::Vec4f &elem = xyzrgba.at(j, i); // x,y,z,w + out.x = elem[0]; + out.y = elem[1]; + out.z = elem[2]; + out.w = elem[3]; + } + valid_measure = std::isfinite(out.z); + if (valid_measure) + { + x_vect.push_back(out.x); + y_vect.push_back(out.y); + z_vect.push_back(out.z); + } + } + } + } + + if (x_vect.size() * y_vect.size() * z_vect.size() > 0) + { + cur_box.x_3d = getMedian(x_vect); + cur_box.y_3d = getMedian(y_vect); + cur_box.z_3d = getMedian(z_vect); + } + else { + cur_box.x_3d = NAN; + cur_box.y_3d = NAN; + cur_box.z_3d = NAN; + } + + bbox3d_vect.emplace_back(cur_box); + } + + return bbox3d_vect; +} + +cv::Mat slMat2cvMat(sl::Mat &input) { + // Mapping between MAT_TYPE and CV_TYPE + int cv_type = -1; + switch (input.getDataType()) { + case sl::MAT_TYPE_32F_C1: + cv_type = CV_32FC1; + break; + case sl::MAT_TYPE_32F_C2: + cv_type = CV_32FC2; + break; + case sl::MAT_TYPE_32F_C3: + cv_type = CV_32FC3; + break; + case sl::MAT_TYPE_32F_C4: + cv_type = CV_32FC4; + break; + case sl::MAT_TYPE_8U_C1: + cv_type = CV_8UC1; + break; + case sl::MAT_TYPE_8U_C2: + cv_type = CV_8UC2; + break; + case sl::MAT_TYPE_8U_C3: + cv_type = CV_8UC3; + break; + case sl::MAT_TYPE_8U_C4: + cv_type = CV_8UC4; + break; + default: + break; + } + return cv::Mat(input.getHeight(), input.getWidth(), cv_type, input.getPtr(sl::MEM_CPU)); +} + +cv::Mat zed_capture_rgb(sl::Camera &zed) { + sl::Mat left; + zed.retrieveImage(left); + return slMat2cvMat(left).clone(); +} + +cv::Mat zed_capture_3d(sl::Camera &zed) { + sl::Mat cur_cloud; + zed.retrieveMeasure(cur_cloud, sl::MEASURE_XYZ); + return slMat2cvMat(cur_cloud).clone(); +} + +static sl::Camera zed; // ZED-camera + +#else // ZED_STEREO +std::vector get_3d_coordinates(std::vector bbox_vect, cv::Mat xyzrgba) { + return bbox_vect; +} +#endif // ZED_STEREO + + #include // C++ #include #ifndef CV_VERSION_EPOCH @@ -44,139 +171,6 @@ #endif // USE_CMAKE_LIBS #endif // CV_VERSION_EPOCH -class track_kalman { -public: - cv::KalmanFilter kf; - int state_size, meas_size, contr_size; - - - track_kalman(int _state_size = 10, int _meas_size = 10, int _contr_size = 0) - : state_size(_state_size), meas_size(_meas_size), contr_size(_contr_size) - { - kf.init(state_size, meas_size, contr_size, CV_32F); - - cv::setIdentity(kf.measurementMatrix); - cv::setIdentity(kf.measurementNoiseCov, cv::Scalar::all(1e-1)); - cv::setIdentity(kf.processNoiseCov, cv::Scalar::all(1e-5)); - cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2)); - cv::setIdentity(kf.transitionMatrix); - } - - void set(std::vector result_vec) { - for (size_t i = 0; i < result_vec.size() && i < state_size*2; ++i) { - kf.statePost.at(i * 2 + 0) = result_vec[i].x; - kf.statePost.at(i * 2 + 1) = result_vec[i].y; - } - } - - // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre); - // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) - std::vector correct(std::vector result_vec) { - cv::Mat measurement(meas_size, 1, CV_32F); - for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { - measurement.at(i * 2 + 0) = result_vec[i].x; - measurement.at(i * 2 + 1) = result_vec[i].y; - } - cv::Mat estimated = kf.correct(measurement); - for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) { - result_vec[i].x = estimated.at(i * 2 + 0); - result_vec[i].y = estimated.at(i * 2 + 1); - } - return result_vec; - } - - // Kalman.predict() calculates: statePre = TransitionMatrix * statePost; - // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) - std::vector predict() { - std::vector result_vec; - cv::Mat control; - cv::Mat prediction = kf.predict(control); - for (size_t i = 0; i < prediction.rows && i < state_size * 2; ++i) { - result_vec[i].x = prediction.at(i * 2 + 0); - result_vec[i].y = prediction.at(i * 2 + 1); - } - return result_vec; - } - -}; - - - - -class extrapolate_coords_t { -public: - std::vector old_result_vec; - std::vector dx_vec, dy_vec, time_vec; - std::vector old_dx_vec, old_dy_vec; - - void new_result(std::vector new_result_vec, float new_time) { - old_dx_vec = dx_vec; - old_dy_vec = dy_vec; - if (old_dx_vec.size() != old_result_vec.size()) std::cout << "old_dx != old_res \n"; - dx_vec = std::vector(new_result_vec.size(), 0); - dy_vec = std::vector(new_result_vec.size(), 0); - update_result(new_result_vec, new_time, false); - old_result_vec = new_result_vec; - time_vec = std::vector(new_result_vec.size(), new_time); - } - - void update_result(std::vector new_result_vec, float new_time, bool update = true) { - for (size_t i = 0; i < new_result_vec.size(); ++i) { - for (size_t k = 0; k < old_result_vec.size(); ++k) { - if (old_result_vec[k].track_id == new_result_vec[i].track_id && old_result_vec[k].obj_id == new_result_vec[i].obj_id) { - float const delta_time = new_time - time_vec[k]; - if (abs(delta_time) < 1) break; - size_t index = (update) ? k : i; - float dx = ((float)new_result_vec[i].x - (float)old_result_vec[k].x) / delta_time; - float dy = ((float)new_result_vec[i].y - (float)old_result_vec[k].y) / delta_time; - float old_dx = dx, old_dy = dy; - - // if it's shaking - if (update) { - if (dx * dx_vec[i] < 0) dx = dx / 2; - if (dy * dy_vec[i] < 0) dy = dy / 2; - } else { - if (dx * old_dx_vec[k] < 0) dx = dx / 2; - if (dy * old_dy_vec[k] < 0) dy = dy / 2; - } - dx_vec[index] = dx; - dy_vec[index] = dy; - - //if (old_dx == dx && old_dy == dy) std::cout << "not shakin \n"; - //else std::cout << "shakin \n"; - - if (dx_vec[index] > 1000 || dy_vec[index] > 1000) { - //std::cout << "!!! bad dx or dy, dx = " << dx_vec[index] << ", dy = " << dy_vec[index] << - // ", delta_time = " << delta_time << ", update = " << update << std::endl; - dx_vec[index] = 0; - dy_vec[index] = 0; - } - old_result_vec[k].x = new_result_vec[i].x; - old_result_vec[k].y = new_result_vec[i].y; - time_vec[k] = new_time; - break; - } - } - } - } - - std::vector predict(float cur_time) { - std::vector result_vec = old_result_vec; - for (size_t i = 0; i < old_result_vec.size(); ++i) { - float const delta_time = cur_time - time_vec[i]; - auto &bbox = result_vec[i]; - float new_x = (float) bbox.x + dx_vec[i] * delta_time; - float new_y = (float) bbox.y + dy_vec[i] * delta_time; - if (new_x > 0) bbox.x = new_x; - else bbox.x = 0; - if (new_y > 0) bbox.y = new_y; - else bbox.y = 0; - } - return result_vec; - } - -}; - void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector obj_names, int current_det_fps = -1, int current_cap_fps = -1) @@ -190,11 +184,24 @@ void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector 0) obj_name += " - " + std::to_string(i.track_id); cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0); - int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); - cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 30, 0)), - cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)), + int max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); + max_width = std::max(max_width, (int)i.w + 2); + //max_width = std::max(max_width, 283); + std::string coords_3d; + if (!std::isnan(i.z_3d)) { + std::stringstream ss; + ss << std::fixed << std::setprecision(2) << "x:" << i.x_3d << "m y:" << i.y_3d << "m z:" << i.z_3d << "m "; + coords_3d = ss.str(); + cv::Size const text_size_3d = getTextSize(ss.str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, 1, 0); + int const max_width_3d = (text_size_3d.width > i.w + 2) ? text_size_3d.width : (i.w + 2); + if (max_width_3d > max_width) max_width = max_width_3d; + } + + cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 35, 0)), + cv::Point2f(std::min((int)i.x + max_width, mat_img.cols - 1), std::min((int)i.y, mat_img.rows - 1)), color, CV_FILLED, 8, 0); - putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2); + putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 16), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2); + if(!coords_3d.empty()) putText(mat_img, coords_3d, cv::Point2f(i.x, i.y-1), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); } } if (current_det_fps >= 0 && current_cap_fps >= 0) { @@ -205,7 +212,8 @@ void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector const result_vec, std::vector const obj_names) { +void show_console_result(std::vector const result_vec, std::vector const obj_names, int frame_id = -1) { + if (frame_id >= 0) std::cout << " Frame: " << frame_id << std::endl; for (auto &i : result_vec) { if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - "; std::cout << "obj_id = " << i.obj_id << ", x = " << i.x << ", y = " << i.y @@ -223,6 +231,38 @@ std::vector objects_names_from_file(std::string const filename) { return file_lines; } +template +class send_one_replaceable_object_t { + const bool sync; + std::atomic a_ptr; +public: + + void send(T const& _obj) { + T *new_ptr = new T; + *new_ptr = _obj; + if (sync) { + while (a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3)); + } + std::unique_ptr old_ptr(a_ptr.exchange(new_ptr)); + } + + T receive() { + std::unique_ptr ptr; + do { + while(!a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3)); + ptr.reset(a_ptr.exchange(NULL)); + } while (!ptr); + T obj = *ptr; + return obj; + } + + bool is_object_present() { + return (a_ptr.load() != NULL); + } + + send_one_replaceable_object_t(bool _sync) : sync(_sync), a_ptr(NULL) + {} +}; int main(int argc, char *argv[]) { @@ -239,17 +279,23 @@ int main(int argc, char *argv[]) } else if (argc > 1) filename = argv[1]; - float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.20; + float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.2; Detector detector(cfg_file, weights_file); auto obj_names = objects_names_from_file(names_file); std::string out_videofile = "result.avi"; - bool const save_output_videofile = true; -#ifdef TRACK_OPTFLOW + bool const save_output_videofile = false; // true - for history + bool const send_network = false; // true - for remote detection + bool const use_kalman_filter = false; // true - for stationary camera + + bool detection_sync = true; // true - for video-file +#ifdef TRACK_OPTFLOW // for slow GPU + detection_sync = false; Tracker_optflow tracker_flow; - detector.wait_stream = true; -#endif + //detector.wait_stream = true; +#endif // TRACK_OPTFLOW + while (true) { @@ -259,187 +305,325 @@ int main(int argc, char *argv[]) try { #ifdef OPENCV - extrapolate_coords_t extrapolate_coords; - bool extrapolate_flag = false; - float cur_time_extrapolate = 0, old_time_extrapolate = 0; preview_boxes_t large_preview(100, 150, false), small_preview(50, 50, true); bool show_small_boxes = false; std::string const file_ext = filename.substr(filename.find_last_of(".") + 1); std::string const protocol = filename.substr(0, 7); if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" || // video file - protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/") // video network stream + protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" || // video network stream + filename == "zed_camera" || file_ext == "svo" || filename == "web_camera") // ZED stereo camera + { - cv::Mat cap_frame, cur_frame, det_frame, write_frame; - std::queue track_optflow_queue; - int passed_flow_frames = 0; - std::shared_ptr det_image; - std::vector result_vec, thread_result_vec; - detector.nms = 0.02; // comment it - if track_id is not required - std::atomic consumed, videowrite_ready; - bool exit_flag = false; - consumed = true; - videowrite_ready = true; - std::atomic fps_det_counter, fps_cap_counter; - fps_det_counter = 0; - fps_cap_counter = 0; - int current_det_fps = 0, current_cap_fps = 0; - std::thread t_detect, t_cap, t_videowrite; - std::mutex mtx; - std::condition_variable cv_detected, cv_pre_tracked; + if (protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" || filename == "zed_camera" || filename == "web_camera") + detection_sync = false; + + cv::Mat cur_frame; + std::atomic fps_cap_counter(0), fps_det_counter(0); + std::atomic current_fps_cap(0), current_fps_det(0); + std::atomic exit_flag(false); std::chrono::steady_clock::time_point steady_start, steady_end; - cv::VideoCapture cap(filename); cap >> cur_frame; - int const video_fps = cap.get(CV_CAP_PROP_FPS); + int video_fps = 25; + bool use_zed_camera = false; + + track_kalman_t track_kalman; + +#ifdef ZED_STEREO + sl::InitParameters init_params; + init_params.depth_minimum_distance = 0.5; + init_params.depth_mode = sl::DEPTH_MODE_ULTRA; + init_params.camera_resolution = sl::RESOLUTION_HD720; + init_params.coordinate_units = sl::UNIT_METER; + //init_params.sdk_cuda_ctx = (CUcontext)detector.get_cuda_context(); + init_params.sdk_gpu_id = detector.cur_gpu_id; + init_params.camera_buffer_count_linux = 2; + if (file_ext == "svo") init_params.svo_input_filename.set(filename.c_str()); + if (filename == "zed_camera" || file_ext == "svo") { + std::cout << "ZED 3D Camera " << zed.open(init_params) << std::endl; + if (!zed.isOpened()) { + std::cout << " Error: ZED Camera should be connected to USB 3.0. And ZED_SDK should be installed. \n"; + getchar(); + return 0; + } + cur_frame = zed_capture_rgb(zed); + use_zed_camera = true; + } +#endif // ZED_STEREO + + cv::VideoCapture cap; + if (filename == "web_camera") { + cap.open(0); + video_fps = cap.get(CV_CAP_PROP_FPS); + cap >> cur_frame; + } else if (!use_zed_camera) { + cap.open(filename); + video_fps = cap.get(CV_CAP_PROP_FPS); + cap >> cur_frame; + } cv::Size const frame_size = cur_frame.size(); + //cv::Size const frame_size(cap.get(CV_CAP_PROP_FRAME_WIDTH), cap.get(CV_CAP_PROP_FRAME_HEIGHT)); + std::cout << "\n Video size: " << frame_size << std::endl; + cv::VideoWriter output_video; if (save_output_videofile) output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true); - while (!cur_frame.empty()) + struct detection_data_t { + cv::Mat cap_frame; + std::shared_ptr det_image; + std::vector result_vec; + cv::Mat draw_frame; + bool new_detection; + uint64_t frame_id; + bool exit_flag; + cv::Mat zed_cloud; + std::queue track_optflow_queue; + detection_data_t() : exit_flag(false), new_detection(false) {} + }; + + const bool sync = detection_sync; // sync data exchange + send_one_replaceable_object_t cap2prepare(sync), cap2draw(sync), + prepare2detect(sync), detect2draw(sync), draw2show(sync), draw2write(sync), draw2net(sync); + + std::thread t_cap, t_prepare, t_detect, t_post, t_draw, t_write, t_network; + + // capture new video-frame + if (t_cap.joinable()) t_cap.join(); + t_cap = std::thread([&]() { - // always sync - if (t_cap.joinable()) { - t_cap.join(); - ++fps_cap_counter; - cur_frame = cap_frame.clone(); - } - t_cap = std::thread([&]() { cap >> cap_frame; }); - ++cur_time_extrapolate; + uint64_t frame_id = 0; + detection_data_t detection_data; + do { + detection_data = detection_data_t(); +#ifdef ZED_STEREO + if (use_zed_camera) { + while (zed.grab() != sl::SUCCESS) std::this_thread::sleep_for(std::chrono::milliseconds(2)); + detection_data.cap_frame = zed_capture_rgb(zed); + detection_data.zed_cloud = zed_capture_3d(zed); + } + else +#endif // ZED_STEREO + { + cap >> detection_data.cap_frame; + } + fps_cap_counter++; + detection_data.frame_id = frame_id++; + if (detection_data.cap_frame.empty() || exit_flag) { + std::cout << " exit_flag: detection_data.cap_frame.size = " << detection_data.cap_frame.size() << std::endl; + detection_data.exit_flag = true; + detection_data.cap_frame = cv::Mat(frame_size, CV_8UC3); + } - // swap result bouned-boxes and input-frame - if(consumed) - { - std::unique_lock lock(mtx); - det_image = detector.mat_to_image_resize(cur_frame); - auto old_result_vec = detector.tracking_id(result_vec); - auto detected_result_vec = thread_result_vec; - result_vec = detected_result_vec; -#ifdef TRACK_OPTFLOW - // track optical flow - if (track_optflow_queue.size() > 0) { - //std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl; - cv::Mat first_frame = track_optflow_queue.front(); - tracker_flow.update_tracking_flow(track_optflow_queue.front(), result_vec); + if (!detection_sync) { + cap2draw.send(detection_data); // skip detection + } + cap2prepare.send(detection_data); + } while (!detection_data.exit_flag); + std::cout << " t_cap exit \n"; + }); - while (track_optflow_queue.size() > 1) { - track_optflow_queue.pop(); - result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), true); - } - track_optflow_queue.pop(); - passed_flow_frames = 0; - result_vec = detector.tracking_id(result_vec); - auto tmp_result_vec = detector.tracking_id(detected_result_vec, false); - small_preview.set(first_frame, tmp_result_vec); + // pre-processing video frame (resize, convertion) + t_prepare = std::thread([&]() + { + std::shared_ptr det_image; + detection_data_t detection_data; + do { + detection_data = cap2prepare.receive(); + + det_image = detector.mat_to_image_resize(detection_data.cap_frame); + detection_data.det_image = det_image; + prepare2detect.send(detection_data); // detection + + } while (!detection_data.exit_flag); + std::cout << " t_prepare exit \n"; + }); - extrapolate_coords.new_result(tmp_result_vec, old_time_extrapolate); - old_time_extrapolate = cur_time_extrapolate; - extrapolate_coords.update_result(result_vec, cur_time_extrapolate - 1); + + // detection by Yolo + if (t_detect.joinable()) t_detect.join(); + t_detect = std::thread([&]() + { + std::shared_ptr det_image; + detection_data_t detection_data; + do { + detection_data = prepare2detect.receive(); + det_image = detection_data.det_image; + std::vector result_vec; + + if(det_image) + result_vec = detector.detect_resized(*det_image, frame_size.width, frame_size.height, thresh, true); // true + fps_det_counter++; + //std::this_thread::sleep_for(std::chrono::milliseconds(150)); + + detection_data.new_detection = true; + detection_data.result_vec = result_vec; + detect2draw.send(detection_data); + } while (!detection_data.exit_flag); + std::cout << " t_detect exit \n"; + }); + + // draw rectangles (and track objects) + t_draw = std::thread([&]() + { + std::queue track_optflow_queue; + detection_data_t detection_data; + do { + + // for Video-file + if (detection_sync) { + detection_data = detect2draw.receive(); } -#else - result_vec = detector.tracking_id(result_vec); // comment it - if track_id is not required - extrapolate_coords.new_result(result_vec, cur_time_extrapolate - 1); -#endif - // add old tracked objects - for (auto &i : old_result_vec) { - auto it = std::find_if(result_vec.begin(), result_vec.end(), - [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); - bool track_id_absent = (it == result_vec.end()); - if (track_id_absent) { - if (i.frames_counter-- > 1) - result_vec.push_back(i); + // for Video-camera + else + { + // get new Detection result if present + if (detect2draw.is_object_present()) { + cv::Mat old_cap_frame = detection_data.cap_frame; // use old captured frame + detection_data = detect2draw.receive(); + if (!old_cap_frame.empty()) detection_data.cap_frame = old_cap_frame; } + // get new Captured frame else { - it->frames_counter = std::min((unsigned)3, i.frames_counter + 1); + std::vector old_result_vec = detection_data.result_vec; // use old detections + detection_data = cap2draw.receive(); + detection_data.result_vec = old_result_vec; } } + + cv::Mat cap_frame = detection_data.cap_frame; + cv::Mat draw_frame = detection_data.cap_frame.clone(); + std::vector result_vec = detection_data.result_vec; + #ifdef TRACK_OPTFLOW - tracker_flow.update_cur_bbox_vec(result_vec); - result_vec = tracker_flow.tracking_flow(cur_frame, true); // track optical flow -#endif - consumed = false; - cv_pre_tracked.notify_all(); - } - // launch thread once - Detection - if (!t_detect.joinable()) { - t_detect = std::thread([&]() { - auto current_image = det_image; - consumed = true; - while (current_image.use_count() > 0 && !exit_flag) { - auto result = detector.detect_resized(*current_image, frame_size.width, frame_size.height, - thresh, false); // true - ++fps_det_counter; - std::unique_lock lock(mtx); - thread_result_vec = result; - consumed = true; - cv_detected.notify_all(); - if (detector.wait_stream) { - while (consumed && !exit_flag) cv_pre_tracked.wait(lock); - } - current_image = det_image; + if (detection_data.new_detection) { + tracker_flow.update_tracking_flow(detection_data.cap_frame, detection_data.result_vec); + while (track_optflow_queue.size() > 0) { + draw_frame = track_optflow_queue.back(); + result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), false); + track_optflow_queue.pop(); } - }); - } - //while (!consumed); // sync detection - - if (!cur_frame.empty()) { - steady_end = std::chrono::steady_clock::now(); - if (std::chrono::duration(steady_end - steady_start).count() >= 1) { - current_det_fps = fps_det_counter; - current_cap_fps = fps_cap_counter; - steady_start = steady_end; - fps_det_counter = 0; - fps_cap_counter = 0; } + else { + track_optflow_queue.push(cap_frame); + result_vec = tracker_flow.tracking_flow(cap_frame, false); + } + detection_data.new_detection = true; // to correct kalman filter +#endif //TRACK_OPTFLOW - large_preview.set(cur_frame, result_vec); -#ifdef TRACK_OPTFLOW - ++passed_flow_frames; - track_optflow_queue.push(cur_frame.clone()); - result_vec = tracker_flow.tracking_flow(cur_frame); // track optical flow - extrapolate_coords.update_result(result_vec, cur_time_extrapolate); - small_preview.draw(cur_frame, show_small_boxes); -#endif - auto result_vec_draw = result_vec; - if (extrapolate_flag) { - result_vec_draw = extrapolate_coords.predict(cur_time_extrapolate); - cv::putText(cur_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2); + // track ID by using kalman filter + if (use_kalman_filter) { + if (detection_data.new_detection) { + result_vec = track_kalman.correct(result_vec); + } + else { + result_vec = track_kalman.predict(); + } } - draw_boxes(cur_frame, result_vec_draw, obj_names, current_det_fps, current_cap_fps); - //show_console_result(result_vec, obj_names); - large_preview.draw(cur_frame); - - cv::imshow("window name", cur_frame); - int key = cv::waitKey(3); // 3 or 16ms - if (key == 'f') show_small_boxes = !show_small_boxes; - if (key == 'p') while (true) if(cv::waitKey(100) == 'p') break; - if (key == 'e') extrapolate_flag = !extrapolate_flag; - if (key == 27) { exit_flag = true; break; } - - if (output_video.isOpened() && videowrite_ready) { - if (t_videowrite.joinable()) t_videowrite.join(); - write_frame = cur_frame.clone(); - videowrite_ready = false; - t_videowrite = std::thread([&]() { - output_video << write_frame; videowrite_ready = true; - }); + // track ID by using custom function + else { + int frame_story = std::max(5, current_fps_cap.load()); + result_vec = detector.tracking_id(result_vec, true, frame_story, 40); } + + if (use_zed_camera && !detection_data.zed_cloud.empty()) { + result_vec = get_3d_coordinates(result_vec, detection_data.zed_cloud); + } + + //small_preview.set(draw_frame, result_vec); + //large_preview.set(draw_frame, result_vec); + draw_boxes(draw_frame, result_vec, obj_names, current_fps_det, current_fps_cap); + //show_console_result(result_vec, obj_names, detection_data.frame_id); + //large_preview.draw(draw_frame); + //small_preview.draw(draw_frame, true); + + detection_data.result_vec = result_vec; + detection_data.draw_frame = draw_frame; + draw2show.send(detection_data); + if (send_network) draw2net.send(detection_data); + if (output_video.isOpened()) draw2write.send(detection_data); + } while (!detection_data.exit_flag); + std::cout << " t_draw exit \n"; + }); + + + // write frame to videofile + t_write = std::thread([&]() + { + if (output_video.isOpened()) { + detection_data_t detection_data; + cv::Mat output_frame; + do { + detection_data = draw2write.receive(); + if(detection_data.draw_frame.channels() == 4) cv::cvtColor(detection_data.draw_frame, output_frame, CV_RGBA2RGB); + else output_frame = detection_data.draw_frame; + output_video << output_frame; + } while (!detection_data.exit_flag); + output_video.release(); } + std::cout << " t_write exit \n"; + }); + + // send detection to the network + t_network = std::thread([&]() + { + if (send_network) { + detection_data_t detection_data; + do { + detection_data = draw2net.receive(); + + detector.send_json_http(detection_data.result_vec, obj_names, detection_data.frame_id, filename); -#ifndef TRACK_OPTFLOW - // wait detection result for video-file only (not for net-cam) - if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") { - std::unique_lock lock(mtx); - while (!consumed) cv_detected.wait(lock); + } while (!detection_data.exit_flag); } -#endif - } - exit_flag = true; + std::cout << " t_network exit \n"; + }); + + + // show detection + detection_data_t detection_data; + do { + + steady_end = std::chrono::steady_clock::now(); + float time_sec = std::chrono::duration(steady_end - steady_start).count(); + if (time_sec >= 1) { + current_fps_det = fps_det_counter.load() / time_sec; + current_fps_cap = fps_cap_counter.load() / time_sec; + steady_start = steady_end; + fps_det_counter = 0; + fps_cap_counter = 0; + } + + detection_data = draw2show.receive(); + cv::Mat draw_frame = detection_data.draw_frame; + + //if (extrapolate_flag) { + // cv::putText(draw_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2); + //} + + cv::imshow("window name", draw_frame); + int key = cv::waitKey(3); // 3 or 16ms + if (key == 'f') show_small_boxes = !show_small_boxes; + if (key == 'p') while (true) if (cv::waitKey(100) == 'p') break; + //if (key == 'e') extrapolate_flag = !extrapolate_flag; + if (key == 27) { exit_flag = true;} + + //std::cout << " current_fps_det = " << current_fps_det << ", current_fps_cap = " << current_fps_cap << std::endl; + } while (!detection_data.exit_flag); + std::cout << " show detection exit \n"; + + cv::destroyWindow("window name"); + // wait for all threads if (t_cap.joinable()) t_cap.join(); + if (t_prepare.joinable()) t_prepare.join(); if (t_detect.joinable()) t_detect.join(); - if (t_videowrite.joinable()) t_videowrite.join(); - std::cout << "Video ended \n"; + if (t_post.joinable()) t_post.join(); + if (t_draw.joinable()) t_draw.join(); + if (t_write.joinable()) t_write.join(); + if (t_network.joinable()) t_network.join(); + break; + } else if (file_ext == "txt") { // list of image files std::ifstream file(filename); @@ -470,14 +654,14 @@ int main(int argc, char *argv[]) show_console_result(result_vec, obj_names); cv::waitKey(0); } -#else +#else // OPENCV //std::vector result_vec = detector.detect(filename); auto img = detector.load_image(filename); std::vector result_vec = detector.detect(img); detector.free_image(img); show_console_result(result_vec, obj_names); -#endif +#endif // OPENCV } catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); } catch (...) { std::cerr << "unknown exception \n"; getchar(); } diff --git a/src/yolo_layer.c b/src/yolo_layer.c index a9309d1d..d303b5aa 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -2,7 +2,7 @@ #include "activations.h" #include "blas.h" #include "box.h" -#include "cuda.h" +#include "dark_cuda.h" #include "utils.h" #include diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index 1b07b43b..ab582bad 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -20,6 +20,7 @@ extern "C" { #include #include #include +#include //static Detector* detector = NULL; @@ -103,8 +104,8 @@ struct detector_gpu_t { LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id) { wait_stream = 0; - int old_gpu_index; #ifdef GPU + int old_gpu_index; check_cuda( cudaGetDevice(&old_gpu_index) ); #endif @@ -150,7 +151,7 @@ LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename LIB_API Detector::~Detector() { detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - layer l = detector_gpu.net.layers[detector_gpu.net.n - 1]; + //layer l = detector_gpu.net.layers[detector_gpu.net.n - 1]; free(detector_gpu.track_id); @@ -158,8 +159,8 @@ LIB_API Detector::~Detector() for (int j = 0; j < NFRAMES; ++j) free(detector_gpu.predictions[j]); for (int j = 0; j < NFRAMES; ++j) if (detector_gpu.images[j].data) free(detector_gpu.images[j].data); - int old_gpu_index; #ifdef GPU + int old_gpu_index; cudaGetDevice(&old_gpu_index); cuda_set_device(detector_gpu.net.gpu_index); #endif @@ -240,8 +241,8 @@ LIB_API std::vector Detector::detect(image_t img, float thresh, bool use { detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); network &net = detector_gpu.net; - int old_gpu_index; #ifdef GPU + int old_gpu_index; cudaGetDevice(&old_gpu_index); if(cur_gpu_id != old_gpu_index) cudaSetDevice(net.gpu_index); @@ -250,8 +251,6 @@ LIB_API std::vector Detector::detect(image_t img, float thresh, bool use #endif //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; - //float nms = .4; - image im; im.c = img.c; im.data = img.data; @@ -290,7 +289,7 @@ LIB_API std::vector Detector::detect(image_t img, float thresh, bool use std::vector bbox_vec; - for (size_t i = 0; i < nboxes; ++i) { + for (int i = 0; i < nboxes; ++i) { box b = dets[i].bbox; int const obj_id = max_index(dets[i].prob, l.classes); float const prob = dets[i].prob[obj_id]; @@ -305,6 +304,10 @@ LIB_API std::vector Detector::detect(image_t img, float thresh, bool use bbox.obj_id = obj_id; bbox.prob = prob; bbox.track_id = 0; + bbox.frames_counter = 0; + bbox.x_3d = NAN; + bbox.y_3d = NAN; + bbox.z_3d = NAN; bbox_vec.push_back(bbox); } @@ -379,3 +382,70 @@ LIB_API std::vector Detector::tracking_id(std::vector cur_bbox_v return cur_bbox_vec; } + + +LIB_API bool Detector::send_json_http(std::vector cur_bbox_vec, std::vector obj_names, int frame_id, std::string filename, int timeout, int port) +{ + //int timeout = 400000; + //int port = 8070; + //send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout); + + std::string send_str; + + char *tmp_buf = (char *)calloc(1024, sizeof(char)); + if (!filename.empty()) { + sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename.c_str()); + } + else { + sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id); + } + send_str = tmp_buf; + free(tmp_buf); + + for (auto & i : cur_bbox_vec) { + char *buf = (char *)calloc(2048, sizeof(char)); + + sprintf(buf, " {\"class_id\":%d, \"name\":\"%s\", \"absolute_coordinates\":{\"center_x\":%d, \"center_y\":%d, \"width\":%d, \"height\":%d}, \"confidence\":%f", + i.obj_id, obj_names[i.obj_id].c_str(), i.x, i.y, i.w, i.h, i.prob); + + //sprintf(buf, " {\"class_id\":%d, \"name\":\"%s\", \"relative_coordinates\":{\"center_x\":%f, \"center_y\":%f, \"width\":%f, \"height\":%f}, \"confidence\":%f", + // i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob); + + send_str += buf; + + if (!std::isnan(i.z_3d)) { + sprintf(buf, "\n , \"coordinates_in_meters\":{\"x_3d\":%.2f, \"y_3d\":%.2f, \"z_3d\":%.2f}", + i.x_3d, i.y_3d, i.z_3d); + send_str += buf; + } + + send_str += "}\n"; + + free(buf); + } + + //send_str += "\n ] \n}, \n"; + send_str += "\n ] \n}"; + + send_json_custom(send_str.c_str(), port, timeout); + return true; +} + +void *Detector::get_cuda_context() +{ +#ifdef GPU + int old_gpu_index; + cudaGetDevice(&old_gpu_index); + if (cur_gpu_id != old_gpu_index) + cudaSetDevice(cur_gpu_id); + + void *cuda_context = cuda_get_context(); + + if (cur_gpu_id != old_gpu_index) + cudaSetDevice(old_gpu_index); + + return cuda_context; +#else // GPU + return NULL; +#endif // GPU +} \ No newline at end of file