Some stream fixes

pull/355/head
AlexeyAB 7 years ago
parent 0419c54042
commit 0cb81e5f50
  1. 1
      src/cuda.h
  2. 4
      src/network_kernels.cu
  3. 3
      src/yolo_console_dll.cpp
  4. 8
      src/yolo_v2_class.cpp
  5. 25
      src/yolo_v2_class.hpp

@ -26,6 +26,7 @@ int *cuda_make_int_array(size_t n);
void cuda_push_array(float *x_gpu, float *x, size_t n); void cuda_push_array(float *x_gpu, float *x, size_t n);
void cuda_pull_array(float *x_gpu, float *x, size_t n); void cuda_pull_array(float *x_gpu, float *x, size_t n);
void cuda_set_device(int n); void cuda_set_device(int n);
int cuda_get_device();
void cuda_free(float *x_gpu); void cuda_free(float *x_gpu);
void cuda_random(float *x_gpu, size_t n); void cuda_random(float *x_gpu, size_t n);
float cuda_compare(float *x_gpu, float *x, size_t n, char *s); float cuda_compare(float *x_gpu, float *x, size_t n, char *s);

@ -51,6 +51,7 @@ void forward_network_gpu(network net, network_state state)
fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1); fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
} }
l.forward_gpu(l, state); l.forward_gpu(l, state);
cudaStreamSynchronize(get_cuda_stream());
state.input = l.output_gpu; state.input = l.output_gpu;
} }
} }
@ -392,7 +393,8 @@ float *get_network_output_gpu(network net)
float *network_predict_gpu(network net, float *input) float *network_predict_gpu(network net, float *input)
{ {
cuda_set_device(net.gpu_index); if (net.gpu_index != cuda_get_device())
cuda_set_device(net.gpu_index);
int size = get_network_input_size(net) * net.batch; int size = get_network_input_size(net) * net.batch;
network_state state; network_state state;
state.index = 0; state.index = 0;

@ -158,7 +158,6 @@ int main(int argc, char *argv[])
det_image = detector.mat_to_image_resize(cur_frame); det_image = detector.mat_to_image_resize(cur_frame);
result_vec = thread_result_vec; result_vec = thread_result_vec;
result_vec = detector.tracking(result_vec); // comment it - if track_id is not required result_vec = detector.tracking(result_vec); // comment it - if track_id is not required
#ifdef TRACK_OPTFLOW #ifdef TRACK_OPTFLOW
// track optical flow // track optical flow
if (track_optflow_queue.size() > 0) { if (track_optflow_queue.size() > 0) {
@ -189,7 +188,7 @@ int main(int argc, char *argv[])
//std::vector<bbox_t> result; //std::vector<bbox_t> result;
auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true
//Sleep(200); //Sleep(200);
Sleep(50); //Sleep(50);
++fps_det_counter; ++fps_det_counter;
std::unique_lock<std::mutex> lock(mtx); std::unique_lock<std::mutex> lock(mtx);
thread_result_vec = result; thread_result_vec = result;

@ -34,7 +34,7 @@ struct detector_gpu_t{
}; };
YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id)
{ {
int old_gpu_index; int old_gpu_index;
#ifdef GPU #ifdef GPU
@ -178,7 +178,8 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
int old_gpu_index; int old_gpu_index;
#ifdef GPU #ifdef GPU
cudaGetDevice(&old_gpu_index); cudaGetDevice(&old_gpu_index);
cudaSetDevice(net.gpu_index); if(cur_gpu_id != old_gpu_index)
cudaSetDevice(net.gpu_index);
#endif #endif
//std::cout << "net.gpu_index = " << net.gpu_index << std::endl; //std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
@ -242,7 +243,8 @@ YOLODLL_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool
free(sized.data); free(sized.data);
#ifdef GPU #ifdef GPU
cudaSetDevice(old_gpu_index); if (cur_gpu_id != old_gpu_index)
cudaSetDevice(old_gpu_index);
#endif #endif
return bbox_vec; return bbox_vec;

@ -47,6 +47,7 @@ struct image_t {
class Detector { class Detector {
std::shared_ptr<void> detector_gpu_ptr; std::shared_ptr<void> detector_gpu_ptr;
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque; std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
const int cur_gpu_id;
public: public:
float nms = .4; float nms = .4;
@ -170,8 +171,8 @@ public:
sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create(); sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create();
sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31 sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(21, 21)); // 15, 21, 31
sync_PyrLKOpticalFlow_gpu->setMaxLevel(5); // +- 50 ptx sync_PyrLKOpticalFlow_gpu->setMaxLevel(3); // +- 5 ptx
sync_PyrLKOpticalFlow_gpu->setNumIters(2000); // def: 30 sync_PyrLKOpticalFlow_gpu->setNumIters(1000); // def: 30
cv::cuda::setDevice(old_gpu_id); cv::cuda::setDevice(old_gpu_id);
} }
@ -190,9 +191,8 @@ public:
void update_tracking_flow(cv::Mat src_mat) void update_tracking_flow(cv::Mat src_mat)
{ {
int const old_gpu_id = cv::cuda::getDevice(); int const old_gpu_id = cv::cuda::getDevice();
cv::cuda::setDevice(gpu_id); if (old_gpu_id != gpu_id)
cv::cuda::setDevice(gpu_id);
//cv::cuda::Stream stream;
if (src_mat.channels() == 3) { if (src_mat.channels() == 3) {
if (src_mat_gpu.cols == 0) { if (src_mat_gpu.cols == 0) {
@ -203,7 +203,8 @@ public:
src_mat_gpu.upload(src_mat, stream); src_mat_gpu.upload(src_mat, stream);
cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream); cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 0, stream);
} }
cv::cuda::setDevice(old_gpu_id); if (old_gpu_id != gpu_id)
cv::cuda::setDevice(old_gpu_id);
} }
@ -215,9 +216,8 @@ public:
} }
int const old_gpu_id = cv::cuda::getDevice(); int const old_gpu_id = cv::cuda::getDevice();
cv::cuda::setDevice(gpu_id); if(old_gpu_id != gpu_id)
cv::cuda::setDevice(gpu_id);
//cv::cuda::Stream stream;
if (dst_mat_gpu.cols == 0) { if (dst_mat_gpu.cols == 0) {
dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type()); dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type());
@ -225,9 +225,9 @@ public:
tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1); tmp_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1);
} }
dst_mat_gpu.upload(dst_mat, stream); dst_mat_gpu.upload(dst_mat, stream);
cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream); cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 0, stream);
if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) { if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) {
@ -237,6 +237,8 @@ public:
return cur_bbox_vec; return cur_bbox_vec;
} }
//return cur_bbox_vec;
cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu; cv::Mat prev_pts, prev_pts_flow_cpu, cur_pts_flow_cpu;
for (auto &i : cur_bbox_vec) { for (auto &i : cur_bbox_vec) {
@ -298,7 +300,8 @@ public:
} }
} }
cv::cuda::setDevice(old_gpu_id); if (old_gpu_id != gpu_id)
cv::cuda::setDevice(old_gpu_id);
return result_bbox_vec; return result_bbox_vec;
} }

Loading…
Cancel
Save