From 3659d84f24ddc95102483cca430e01dc05568cbb Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 31 Mar 2017 21:42:51 +0300 Subject: [PATCH] Added tracking: numerating the detected objects on video --- src/yolo_console_dll.cpp | 44 +++++++++++++++++++++++-------- src/yolo_v2_class.cpp | 3 ++- src/yolo_v2_class.hpp | 57 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 12 deletions(-) diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index 4938ba86..5172b5e3 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -1,9 +1,10 @@ #include +#include #include #include #include -//#define OPENCV +#define OPENCV #include "yolo_v2_class.hpp" // imported functions from DLL @@ -13,21 +14,27 @@ #pragma comment(lib, "opencv_core249.lib") #pragma comment(lib, "opencv_imgproc249.lib") #pragma comment(lib, "opencv_highgui249.lib") -void draw_boxes(cv::Mat mat_img, std::vector result_vec) { +void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector obj_names, unsigned int wait_msec = 0) { for (auto &i : result_vec) { - cv::rectangle(mat_img, cv::Rect(i.x, i.y, i.w, i.h), cv::Scalar(50, 200, 50), 3); + cv::Scalar color(60, 160, 260); + cv::rectangle(mat_img, cv::Rect(i.x, i.y, i.w, i.h), color, 3); + if(obj_names.size() > i.obj_id) + putText(mat_img, obj_names[i.obj_id], cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, color); + if(i.track_id > 0) + putText(mat_img, std::to_string(i.track_id), cv::Point2f(i.x+5, i.y + 15), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, color); } cv::imshow("window name", mat_img); - cv::waitKey(0); + cv::waitKey(wait_msec); } #endif // OPENCV + void show_result(std::vector const result_vec, std::vector const obj_names) { for (auto &i : result_vec) { if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - "; - std::cout << "obj_id = " << i.obj_id << " - x = " << i.x << ", y = " << i.y + std::cout << "obj_id = " << i.obj_id << ", x = " << i.x << ", y = " << i.y << ", w = " << i.w << ", h = " << i.h - << ", prob = " << i.prob << std::endl; + << std::setprecision(3) << ", prob = " << i.prob << std::endl; } } @@ -50,23 +57,38 @@ int main() while (true) { std::string filename; - std::cout << "input image filename: "; + std::cout << "input image or video filename: "; std::cin >> filename; if (filename.size() == 0) break; try { #ifdef OPENCV - cv::Mat mat_img = cv::imread(filename); - std::vector result_vec = detector.detect(mat_img); - draw_boxes(mat_img, result_vec); + std::string const file_ext = filename.substr(filename.find_last_of(".") + 1); + if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov") { // video file + cv::Mat frame; + detector.nms = 0.02; // comment it - if track_id is not required + for(cv::VideoCapture cap(filename); cap >> frame, cap.isOpened();) { + std::vector result_vec = detector.detect(frame, 0.2); + result_vec = detector.tracking(result_vec); // comment it - if track_id is not required + + draw_boxes(frame, result_vec, obj_names, 3); + show_result(result_vec, obj_names); + } + } + else { // image file + cv::Mat mat_img = cv::imread(filename); + std::vector result_vec = detector.detect(mat_img); + draw_boxes(mat_img, result_vec, obj_names); + show_result(result_vec, obj_names); + } #else //std::vector result_vec = detector.detect(filename); auto img = detector.load_image(filename); std::vector result_vec = detector.detect(img); detector.free_image(img); -#endif show_result(result_vec, obj_names); +#endif } catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); } catch (...) { std::cerr << "unknown exception \n"; getchar(); } diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index ea13ea34..8643a22a 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -154,7 +154,7 @@ YOLODLL_API std::vector Detector::detect(image_t img, float thresh) cudaSetDevice(net.gpu_index); //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; - float nms = .4; + //float nms = .4; image im; im.c = img.c; @@ -189,6 +189,7 @@ YOLODLL_API std::vector Detector::detect(image_t img, float thresh) bbox.h = b.h*im.h; bbox.obj_id = obj_id; bbox.prob = prob; + bbox.track_id = 0; bbox_vec.push_back(bbox); } diff --git a/src/yolo_v2_class.hpp b/src/yolo_v2_class.hpp index e3d79331..37fcd61a 100644 --- a/src/yolo_v2_class.hpp +++ b/src/yolo_v2_class.hpp @@ -1,6 +1,8 @@ #pragma once #include #include +#include +#include #ifdef OPENCV #include // C++ @@ -18,6 +20,7 @@ struct bbox_t { unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box float prob; // confidence - probability that the object was found correctly unsigned int obj_id; // class of object - from range [0, classes-1] + unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) }; struct image_t { @@ -31,6 +34,7 @@ struct image_t { class Detector { std::shared_ptr detector_gpu_ptr; public: + float nms = .4; YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); YOLODLL_API ~Detector(); @@ -107,6 +111,59 @@ private: } #endif // OPENCV + + std::deque> prev_bbox_vec_deque; + +public: + std::vector tracking(std::vector cur_bbox_vec, int const frames_story = 4) + { + bool prev_track_id_present = false; + for (auto &i : prev_bbox_vec_deque) + if (i.size() > 0) prev_track_id_present = true; + + static unsigned int track_id = 1; + + if(!prev_track_id_present) { + //track_id = 1; + for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + cur_bbox_vec[i].track_id = track_id++; + prev_bbox_vec_deque.push_front(cur_bbox_vec); + if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); + return cur_bbox_vec; + } + + std::vector dist_vec(cur_bbox_vec.size(), std::numeric_limits::max()); + + for (auto &prev_bbox_vec : prev_bbox_vec_deque) { + for (auto &i : prev_bbox_vec) { + int cur_index = -1; + for (size_t m = 0; m < cur_bbox_vec.size(); ++m) { + bbox_t const& k = cur_bbox_vec[m]; + if (i.obj_id == k.obj_id) { + unsigned int cur_dist = sqrt(((float)i.x - k.x)*((float)i.x - k.x) + ((float)i.y - k.y)*((float)i.y - k.y)); + if (cur_dist < 100 && (k.track_id == 0 || dist_vec[m] > cur_dist)) { + dist_vec[m] = cur_dist; + cur_index = m; + } + } + } + + bool track_id_absent = !std::any_of(cur_bbox_vec.begin(), cur_bbox_vec.end(), [&](bbox_t const& b) { return b.track_id == i.track_id; }); + + if (cur_index >= 0 && track_id_absent) + cur_bbox_vec[cur_index].track_id = i.track_id; + } + } + + for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + if (cur_bbox_vec[i].track_id == 0) + cur_bbox_vec[i].track_id = track_id++; + + prev_bbox_vec_deque.push_front(cur_bbox_vec); + if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); + + return cur_bbox_vec; + } };