diff --git a/src/detector.c b/src/detector.c index 5f2cec83..f401d20a 100644 --- a/src/detector.c +++ b/src/detector.c @@ -70,6 +70,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i //int N = plist->size; char **paths = (char **)list_to_array(plist); + int init_w = net.w; + int init_h = net.h; + load_args args = {0}; args.w = net.w; args.h = net.h; @@ -95,8 +98,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i while(get_current_batch(net) < net.max_batches){ if(l.random && count++%10 == 0){ printf("Resizing\n"); - int dim = (rand() % 10 + 10) * 32; - if (get_current_batch(net)+100 > net.max_batches) dim = 544; + int dim = (rand() % 12 + (init_w/32 - 5)) * 32; // +-160 + //int dim = (rand() % 10 + 10) * 32; + //if (get_current_batch(net)+100 > net.max_batches) dim = 544; //int dim = (rand() % 4 + 16) * 32; printf("%d\n", dim); args.w = dim; @@ -152,7 +156,8 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i i = get_current_batch(net); printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); - if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { + //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { + if (i % 100 == 0) { #ifdef GPU if (ngpus != 1) sync_nets(nets, ngpus, 0); #endif diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 9ecabdf3..64f4f9b8 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -114,6 +114,7 @@ void forward_backward_network_gpu(network net, float *x, float *y) state.truth = *net.truth_gpu; state.train = 1; forward_network_gpu(net, state); + cudaStreamSynchronize(get_cuda_stream()); backward_network_gpu(net, state); } diff --git a/src/region_layer.c b/src/region_layer.c index 0638301f..7772bc30 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -419,6 +419,7 @@ void forward_region_layer_gpu(const region_layer l, network_state state) free(cpu_state.input); if(!state.train) return; cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); + cudaStreamSynchronize(get_cuda_stream()); if(cpu_state.truth) free(cpu_state.truth); } diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index 5acca7cf..a9ce6b9e 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -194,7 +194,7 @@ int main(int argc, char *argv[]) auto current_image = det_image; consumed = true; while (current_image.use_count() > 0) { - auto result = detector.detect_resized(*current_image, frame_size, 0.24, false); // true + auto result = detector.detect_resized(*current_image, frame_size, 0.20, false); // true ++fps_det_counter; std::unique_lock lock(mtx); thread_result_vec = result; @@ -236,11 +236,13 @@ int main(int argc, char *argv[]) } } +#ifndef TRACK_OPTFLOW // wait detection result for video-file only (not for net-cam) - //if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") { - // std::unique_lock lock(mtx); - // while (!consumed) cv_detected.wait(lock); - //} + if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") { + std::unique_lock lock(mtx); + while (!consumed) cv_detected.wait(lock); + } +#endif } if (t_cap.joinable()) t_cap.join(); if (t_detect.joinable()) t_detect.join();