Improved speed of yolo_console_dll.cpp - 40 FPS on 4K using GeForce GTX 960

8 years ago · 8fa9f44211
parent 6f1ce1f3e8
commit 8fa9f44211
3 changed files with 31 additions and 16 deletions
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@ -101,6 +101,7 @@ int main(int argc, char *argv[])
 				protocol == "rtsp://" || protocol == "http://" || protocol == "https:/")	// video network stream
 			{
 				cv::Mat cap_frame, cur_frame, det_frame, write_frame;
+				std::shared_ptr<image_t> det_image;
 				std::vector<bbox_t> result_vec, thread_result_vec;
 				detector.nms = 0.02;	// comment it - if track_id is not required
 				std::atomic<bool> consumed, videowrite_ready;
@ -116,9 +117,10 @@ int main(int argc, char *argv[])
 				std::chrono::steady_clock::time_point steady_start, steady_end;
 				cv::VideoCapture cap(filename); cap >> cur_frame;
 				int const video_fps = cap.get(CV_CAP_PROP_FPS);
+				cv::Size const frame_size = cur_frame.size();
 				cv::VideoWriter output_video;
 				if (save_output_videofile)
-					output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), cur_frame.size(), true);
+					output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true);

 				while (!cur_frame.empty()) {
 					if (t_cap.joinable()) {
@ -132,7 +134,7 @@ int main(int argc, char *argv[])
 					if(consumed)
 					{
 						std::unique_lock<std::mutex> lock(mtx);
-						cur_frame.copyTo(det_frame);
+						det_image = detector.mat_to_image_resize(cur_frame);
 						result_vec = thread_result_vec;
 						result_vec = detector.tracking(result_vec);	// comment it - if track_id is not required
 						consumed = false;
@ -140,14 +142,14 @@ int main(int argc, char *argv[])
 					// launch thread once
 					if (!t_detect.joinable()) {
 						t_detect = std::thread([&]() {
-							cv::Mat current_mat = det_frame.clone();
+							auto current_image = det_image;
 							consumed = true;
-							while (!current_mat.empty()) {
-								auto result = detector.detect(current_mat, 0.24, true);
+							while (current_image.use_count() > 0) {
+								auto result = detector.detect_resized(*current_image, frame_size, 0.24, true);
 								++fps_det_counter;
 								std::unique_lock<std::mutex> lock(mtx);
 								thread_result_vec = result;
-								current_mat = det_frame.clone();
+								current_image = det_image;
 								consumed = true;
 								cv.notify_all();
 							}
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@ -109,11 +109,11 @@ YOLODLL_API Detector::~Detector()
 #endif
 }

-YOLODLL_API int Detector::get_net_width() {
+YOLODLL_API int Detector::get_net_width() const {
 	detector_gpu_t &detector_gpu = *reinterpret_cast<detector_gpu_t *>(detector_gpu_ptr.get());
 	return detector_gpu.net.w;
 }
-YOLODLL_API int Detector::get_net_height() {
+YOLODLL_API int Detector::get_net_height() const {
 	detector_gpu_t &detector_gpu = *reinterpret_cast<detector_gpu_t *>(detector_gpu_ptr.get());
 	return detector_gpu.net.h;
 }
--- a/src/yolo_v2_class.hpp
+++ b/src/yolo_v2_class.hpp
@ -51,8 +51,8 @@ public:
 	YOLODLL_API std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
 	static YOLODLL_API image_t load_image(std::string image_filename);
 	static YOLODLL_API void free_image(image_t m);
-	YOLODLL_API int get_net_width();
-	YOLODLL_API int get_net_height();
+	YOLODLL_API int get_net_width() const;
+	YOLODLL_API int get_net_height() const;

 	YOLODLL_API std::vector<bbox_t> tracking(std::vector<bbox_t> cur_bbox_vec, int const frames_story = 6);

@ -60,16 +60,29 @@ public:
 	std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)
 	{
 		if(mat.data == NULL)
-			throw std::runtime_error("file not found");
-		cv::Mat det_mat;
-		cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));
-		auto image_ptr = mat_to_image(det_mat);
-		auto detection_boxes = detect(*image_ptr, thresh, use_mean);
-		float wk = (float)mat.cols / det_mat.cols, hk = (float)mat.rows / det_mat.rows;
+			throw std::runtime_error("Image is empty");
+		auto image_ptr = mat_to_image_resize(mat);
+		return detect_resized(*image_ptr, mat.size(), thresh, use_mean);
+	}
+
+	std::vector<bbox_t> detect_resized(image_t img, cv::Size init_size, float thresh = 0.2, bool use_mean = false)
+	{
+		if (img.data == NULL)
+			throw std::runtime_error("Image is empty");
+		auto detection_boxes = detect(img, thresh, use_mean);
+		float wk = (float)init_size.width / img.w, hk = (float)init_size.height / img.h;
 		for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk;
 		return detection_boxes;
 	}

+	std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const
+	{
+		if (mat.data == NULL) return std::shared_ptr<image_t>(NULL);
+		cv::Mat det_mat;
+		cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));
+		return mat_to_image(det_mat);
+	}
+
 	static std::shared_ptr<image_t> mat_to_image(cv::Mat img)
 	{
 		std::shared_ptr<image_t> image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; });