From af4cc09a38ee33e69885c929aae0cf25dba29bfa Mon Sep 17 00:00:00 2001
From: Chih-Hsu Lin <lin.chihhsu@gmail.com>
Date: Fri, 1 Jun 2018 00:07:04 -0500
Subject: [PATCH 01/23] Fix the path to voc.data

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index e56efa8c..35bb7549 100644
--- a/README.md
+++ b/README.md
@@ -210,11 +210,11 @@ Then add to your created project:
 
 7. Start training by using `train_voc.cmd` or by using the command line: 
 
-    `darknet.exe detector train data/voc.data cfg/yolov3-voc.cfg darknet53.conv.74` 
+    `darknet.exe detector train cfg/voc.data cfg/yolov3-voc.cfg darknet53.conv.74` 
 
 (**Note:** To disable Loss-Window use flag `-dont_show`. If you are using CPU, try `darknet_no_gpu.exe` instead of `darknet.exe`.)
 
-If required change pathes in the file `build\darknet\x64\data\voc.data`
+If required change pathes in the file `build\darknet\cfg\voc.data`
 
 More information about training by the link: http://pjreddie.com/darknet/yolo/#train-voc
 

From 440c942cd318f56c145ba0eceb98f11173be9708 Mon Sep 17 00:00:00 2001
From: apatsekin <apatsekin@gmail.com>
Date: Sun, 23 Dec 2018 12:03:17 -0500
Subject: [PATCH 02/23] Update README.md (steps= for multi-gpu)

steps= remark for multi-gpu training
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c8f1a1c6..03913cb6 100644
--- a/README.md
+++ b/README.md
@@ -255,7 +255,7 @@ More information about training by the link: http://pjreddie.com/darknet/yolo/#t
 
 2. Then stop and by using partially-trained model `/backup/yolov3-voc_1000.weights` run training with multigpu (up to 4 GPUs): `darknet.exe detector train data/voc.data cfg/yolov3-voc.cfg /backup/yolov3-voc_1000.weights -gpus 0,1,2,3`
 
-Only for small datasets sometimes better to decrease learning rate, for 4 GPUs set `learning_rate = 0.00025` (i.e. learning_rate = 0.001 / GPUs). In this case also increase 4x times `burn_in =` and `max_batches =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`.
+Only for small datasets sometimes better to decrease learning rate, for 4 GPUs set `learning_rate = 0.00025` (i.e. learning_rate = 0.001 / GPUs). In this case also increase 4x times `burn_in =` and `max_batches =` in your cfg-file. I.e. use `burn_in = 4000` instead of `1000`. Same goes for `steps=` if `policy=steps` is set.
 
 https://groups.google.com/d/msg/darknet/NbJqonJBTSY/Te5PfIpuCAAJ
 

From 7612131abb6b6510d4136068028cf54130f6bf54 Mon Sep 17 00:00:00 2001
From: duohappy <duohappy@users.noreply.github.com>
Date: Sat, 29 Dec 2018 20:14:08 +0800
Subject: [PATCH 03/23] =?UTF-8?q?in=20yolo=5Fcpp=5Fdll.sln,=20miss=20a=20?=
 =?UTF-8?q?=E2=80=9C\=E2=80=9D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I open yolo_cpp_dll.sln, and check the config.  "\" is missed.
Linker -> General -> Additional Library Directories
$(CUDA_PATH)lib\$(PlatformName)-->$(CUDA_PATH)\lib\$(PlatformName)
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ad506796..a9feedaa 100644
--- a/README.md
+++ b/README.md
@@ -201,7 +201,7 @@ Then add to your created project:
 - add to project all `.c` & `.cu` files and file `http_stream.cpp` from `\src`
 - (right click on project) -> properties  -> Linker -> General -> Additional Library Directories, put here: 
 
-`C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)`
+`C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)\lib\$(PlatformName);$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)`
 -  (right click on project) -> properties  -> Linker -> Input -> Additional dependecies, put here: 
 
 `..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cudnn.lib;%(AdditionalDependencies)`

From ba1f1363c07a0f1085d37febedaf20d689eec8e4 Mon Sep 17 00:00:00 2001
From: David Smith <david.smith@gmail.com>
Date: Mon, 11 Feb 2019 18:20:16 -0600
Subject: [PATCH 04/23] fix assorted compile warnings

---
 src/captcha.c             | 2 +-
 src/cifar.c               | 4 ++--
 src/classifier.c          | 4 ++--
 src/compare.c             | 2 +-
 src/convolutional_layer.h | 1 +
 src/cost_layer.c          | 3 ++-
 src/crnn_layer.c          | 6 +++---
 src/go.c                  | 2 +-
 src/matrix.c              | 3 +--
 src/network.c             | 6 +++---
 src/region_layer.c        | 4 +++-
 src/rnn.c                 | 2 +-
 src/tag.c                 | 2 +-
 src/writing.c             | 2 +-
 14 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/src/captcha.c b/src/captcha.c
index 3d449b26..755388aa 100644
--- a/src/captcha.c
+++ b/src/captcha.c
@@ -85,7 +85,7 @@ void train_captcha(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if(i%100==0){
             char buff[256];
diff --git a/src/cifar.c b/src/cifar.c
index 4a27e795..d671a7ca 100644
--- a/src/cifar.c
+++ b/src/cifar.c
@@ -33,7 +33,7 @@ void train_cifar(char *cfgfile, char *weightfile)
         float loss = train_network_sgd(net, train, 1);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
@@ -89,7 +89,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
         float loss = train_network_sgd(net, train, 1);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
diff --git a/src/classifier.c b/src/classifier.c
index 11b6968d..68becc8a 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -179,7 +179,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
             draw_precision = 1;
         }
 
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
 #ifdef OPENCV
         draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, "top5", dont_show, mjpeg_port);
 #endif  // OPENCV
@@ -200,7 +200,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
             if (ngpus != 1) sync_nets(nets, ngpus, 0);
 #endif
             char buff[256];
-            sprintf(buff, "%s/%s_last.weights", backup_directory, base, i);
+            sprintf(buff, "%s/%s_last.weights", backup_directory, base);
             save_weights(net, buff);
         }
         free_data(train);
diff --git a/src/compare.c b/src/compare.c
index 803d812d..39bb4530 100644
--- a/src/compare.c
+++ b/src/compare.c
@@ -54,7 +54,7 @@ void train_compare(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if(i%100 == 0){
             char buff[256];
diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
index d6ec9551..360cd43b 100644
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@@ -48,6 +48,7 @@ image get_convolutional_image(convolutional_layer layer);
 image get_convolutional_delta(convolutional_layer layer);
 image get_convolutional_weight(convolutional_layer layer, int i);
 
+
 int convolutional_out_height(convolutional_layer layer);
 int convolutional_out_width(convolutional_layer layer);
 void rescale_weights(convolutional_layer l, float scale, float trans);
diff --git a/src/cost_layer.c b/src/cost_layer.c
index 39d2398b..c8d7661a 100644
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@@ -25,8 +25,9 @@ char *get_cost_string(COST_TYPE a)
             return "masked";
         case SMOOTH:
             return "smooth";
+		default:
+			return "sse";
     }
-    return "sse";
 }
 
 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
diff --git a/src/crnn_layer.c b/src/crnn_layer.c
index 5a9d6f58..3fcbd65c 100644
--- a/src/crnn_layer.c
+++ b/src/crnn_layer.c
@@ -47,19 +47,19 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
     l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
 
     l.input_layer = malloc(sizeof(layer));
-    fprintf(stderr, "");
+    //fprintf(stderr, "");
     *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
     l.input_layer->batch = batch;
     if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
 
     l.self_layer = malloc(sizeof(layer));
-    fprintf(stderr, "");
+    //fprintf(stderr, "");
     *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
     l.self_layer->batch = batch;
     if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
 
     l.output_layer = malloc(sizeof(layer));
-    fprintf(stderr, "");
+    //fprintf(stderr, "");
     *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
     l.output_layer->batch = batch;
     if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
diff --git a/src/go.c b/src/go.c
index 223d136d..f973a717 100644
--- a/src/go.c
+++ b/src/go.c
@@ -144,7 +144,7 @@ void train_go(char *cfgfile, char *weightfile)
         float loss = train_network_datum(net, board, move) / net.batch;
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
diff --git a/src/matrix.c b/src/matrix.c
index 74300280..98ef5535 100644
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -257,7 +257,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
 
 
 void random_centers(matrix data, matrix centers) {
-    int i, j;
+    int i;
     int *s = sample(data.rows);
     for (i = 0; i < centers.rows; ++i) {
         copy(data.vals[s[i]], centers.vals[i], data.cols);
@@ -281,7 +281,6 @@ int *sample(int n)
 
 float dist(float *x, float *y, int n)
 {
-    int i;
     //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]);
     float mw = (x[0] < y[0]) ? x[0] : y[0];
     float mh = (x[1] < y[1]) ? x[1] : y[1];
diff --git a/src/network.c b/src/network.c
index 226fe410..9f53847b 100644
--- a/src/network.c
+++ b/src/network.c
@@ -485,8 +485,8 @@ int resize_network(network *net, int w, int h)
         h = l.out_h;
         if(l.type == AVGPOOL) break;
     }
-    const int size = get_network_input_size(*net) * net->batch;
 #ifdef GPU
+    const int size = get_network_input_size(*net) * net->batch;
     if(gpu_index >= 0){
         printf(" try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
         net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1);
@@ -725,10 +725,10 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
 
     char *send_buf = (char *)calloc(1024, sizeof(char));
     if (filename) {
-        sprintf(send_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
+        sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
     }
     else {
-        sprintf(send_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id);
+        sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"objects\": [ \n", frame_id);
     }
 
     int i, j;
diff --git a/src/region_layer.c b/src/region_layer.c
index 42730e09..7f3988c2 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -53,8 +53,10 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
 
 void resize_region_layer(layer *l, int w, int h)
 {
+#ifdef GPU
     int old_w = l->w;
     int old_h = l->h;
+#endif
     l->w = w;
     l->h = h;
 
@@ -589,4 +591,4 @@ void zero_objectness(layer l)
             l.output[obj_index] = 0;
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/rnn.c b/src/rnn.c
index 3fb634e4..96de670f 100644
--- a/src/rnn.c
+++ b/src/rnn.c
@@ -163,7 +163,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
     int i = (*net.seen)/net.batch;
 
     int streams = batch/steps;
-    printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %d \n", batch, steps, streams, net.subdivisions, size);
+    printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %ld \n", batch, steps, streams, net.subdivisions, size);
     printf(" global_batch = %d \n", batch*net.subdivisions);
     size_t *offsets = calloc(streams, sizeof(size_t));
     int j;
diff --git a/src/tag.c b/src/tag.c
index 1e43e7d9..a53af044 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -64,7 +64,7 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         free_data(train);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
diff --git a/src/writing.c b/src/writing.c
index 0a76d48e..345ad271 100644
--- a/src/writing.c
+++ b/src/writing.c
@@ -69,7 +69,7 @@ void train_writing(char *cfgfile, char *weightfile)
 
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         free_data(train);
         if(get_current_batch(net)%100 == 0){
             char buff[256];

From 17fed7c4e254708699c905e958c76ed0cf2ed9d9 Mon Sep 17 00:00:00 2001
From: Steve Zaretti <kossolax@gmail.com>
Date: Wed, 6 Mar 2019 10:19:13 +0100
Subject: [PATCH 05/23] Segfault if height > width

./darknet classifier train data/sort.data cfg/alexnet.cfg -dont_show
alexnet
1
 compute_capability = 750, cudnn_half = 1
layer     filters    size              input                output
   0 conv     96 11 x11 / 4   128 x 256 x   3   ->    30 x  62 x  96 0.130 BF
   1 max          3 x 3 / 2    30 x  62 x  96   ->    14 x  30 x  96 0.000 BF
   2 conv    256  5 x 5 / 1    14 x  30 x  96   ->    14 x  30 x 256 0.516 BF
   3 max          3 x 3 / 2    14 x  30 x 256   ->     6 x  14 x 256 0.000 BF
   4 conv    384  3 x 3 / 1     6 x  14 x 256   ->     6 x  14 x 384 0.149 BF
   5 conv    384  3 x 3 / 1     6 x  14 x 384   ->     6 x  14 x 384 0.223 BF
   6 conv    256  3 x 3 / 1     6 x  14 x 384   ->     6 x  14 x 256 0.149 BF
   7 max          3 x 3 / 2     6 x  14 x 256   ->     2 x   6 x 256 0.000 BF
   8 connected                            3072  ->  4096
   9 dropout       p = 0.50               4096  ->  4096
  10 connected                            4096  ->  4096
  11 dropout       p = 0.50               4096  ->  4096
  12 connected                            4096  ->  1261
  13 softmax                                        1261
  14 cost                                           1261
Total BFLOPS 1.167
 Allocate additional workspace_size = 1940.23 MB
Learning Rate: 0.01, Momentum: 0.9, Decay: 0.0005
893253
Loaded: 0.205056 seconds
Segmentation fault (core dumped)


 This PR fix this issue:
./darknet classifier train data/sort.data cfg/alexnet.cfg -dont_show
alexnet
1
 compute_capability = 750, cudnn_half = 1
layer     filters    size              input                output
   0 conv     96 11 x11 / 4   128 x 256 x   3   ->    30 x  62 x  96 0.130 BF
   1 max          3 x 3 / 2    30 x  62 x  96   ->    14 x  30 x  96 0.000 BF
   2 conv    256  5 x 5 / 1    14 x  30 x  96   ->    14 x  30 x 256 0.516 BF
   3 max          3 x 3 / 2    14 x  30 x 256   ->     6 x  14 x 256 0.000 BF
   4 conv    384  3 x 3 / 1     6 x  14 x 256   ->     6 x  14 x 384 0.149 BF
   5 conv    384  3 x 3 / 1     6 x  14 x 384   ->     6 x  14 x 384 0.223 BF
   6 conv    256  3 x 3 / 1     6 x  14 x 384   ->     6 x  14 x 256 0.149 BF
   7 max          3 x 3 / 2     6 x  14 x 256   ->     2 x   6 x 256 0.000 BF
   8 connected                            3072  ->  4096
   9 dropout       p = 0.50               4096  ->  4096
  10 connected                            4096  ->  4096
  11 dropout       p = 0.50               4096  ->  4096
  12 connected                            4096  ->  1261
  13 softmax                                        1261
  14 cost                                           1261
Total BFLOPS 1.167
 Allocate additional workspace_size = 1940.23 MB
Learning Rate: 0.01, Momentum: 0.9, Decay: 0.0005
893253
Loaded: 0.509795 seconds
1, 0.000: 4.070450, 4.070450 avg, 0.010000 rate, 0.310202 seconds, 128 images
Loaded: 0.416345 seconds
2, 0.000: 4.069521, 4.070357 avg, 0.010000 rate, 0.055622 seconds, 256 images
Loaded: 0.451087 seconds


I'll tell you later if training is converging.
---
 src/classifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/classifier.c b/src/classifier.c
index 6207385d..b5c78d65 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -100,7 +100,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
     args.exposure = net.exposure;
     args.saturation = net.saturation;
     args.hue = net.hue;
-    args.size = net.w;
+    args.size = net.w > net.h ? net.w : net.h;
 
     args.paths = paths;
     args.classes = classes;

From fc2996968ecaa8ce36508f4036bea082684850a1 Mon Sep 17 00:00:00 2001
From: Josh Veitch-Michaelis <j.veitchmichaelis+github@gmail.com>
Date: Fri, 15 Mar 2019 21:58:53 +0000
Subject: [PATCH 06/23] find annotation files for tiff images

---
 src/utils.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/utils.c b/src/utils.c
index b4d17c23..904aa2e0 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -252,6 +252,8 @@ void replace_image_to_label(const char* input_path, char* output_path)
     find_replace_extension(output_path, ".BMP", ".txt", output_path);
     find_replace_extension(output_path, ".ppm", ".txt", output_path);
     find_replace_extension(output_path, ".PPM", ".txt", output_path);
+    find_replace_extension(output_path, ".tiff", ".txt", output_path);
+    find_replace_extension(output_path, ".TIFF", ".txt", output_path);
 }
 
 float sec(clock_t clocks)

From 1a3971f5f72db513f388dfa0617aa07afcb3022b Mon Sep 17 00:00:00 2001
From: Josh Veitch-Michaelis <j.veitchmichaelis+github@gmail.com>
Date: Fri, 15 Mar 2019 22:01:04 +0000
Subject: [PATCH 07/23] alert user if annotation file doesn't exist

Currently darknet will continue silently if an annotation file isn't found. This can happen when the user provides an unsupported image format and the extension is not replaced. Probably this function should be modified to change any extension to .txt, since OpenCV (for example) supports many image extensions.
---
 src/utils.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/utils.c b/src/utils.c
index 904aa2e0..e11b4f3b 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -254,6 +254,13 @@ void replace_image_to_label(const char* input_path, char* output_path)
     find_replace_extension(output_path, ".PPM", ".txt", output_path);
     find_replace_extension(output_path, ".tiff", ".txt", output_path);
     find_replace_extension(output_path, ".TIFF", ".txt", output_path);
+    
+    // Check file ends with txt and exists:
+    char output_path_ext[3];
+    memcpy( output_path_ext, &output_path[strlen(output_path)-3], 3);
+    if( strcmp("txt", output_path_ext) != 0){
+        fprintf(stderr, "Failed to find valid annotation file: %s \n", output_path_ext);
+    }
 }
 
 float sec(clock_t clocks)

From f73ace38fafa605d21d5c8e86f16c30f72dfb8ac Mon Sep 17 00:00:00 2001
From: Josh Veitch-Michaelis <j.veitchmichaelis+github@gmail.com>
Date: Fri, 15 Mar 2019 22:10:38 +0000
Subject: [PATCH 08/23] fix output message

---
 src/utils.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/utils.c b/src/utils.c
index e11b4f3b..2ab20acc 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -255,11 +255,11 @@ void replace_image_to_label(const char* input_path, char* output_path)
     find_replace_extension(output_path, ".tiff", ".txt", output_path);
     find_replace_extension(output_path, ".TIFF", ".txt", output_path);
     
-    // Check file ends with txt and exists:
+    // Check file ends with txt:
     char output_path_ext[3];
     memcpy( output_path_ext, &output_path[strlen(output_path)-3], 3);
     if( strcmp("txt", output_path_ext) != 0){
-        fprintf(stderr, "Failed to find valid annotation file: %s \n", output_path_ext);
+        fprintf(stderr, "Failed to infer label file name (check image extension is supported): %s \n", output_path);
     }
 }
 

From 328cce0bf53c22ed2ea829a13614060bf41b47d2 Mon Sep 17 00:00:00 2001
From: Josh Veitch-Michaelis <j.veitchmichaelis+github@gmail.com>
Date: Fri, 15 Mar 2019 22:51:15 +0000
Subject: [PATCH 09/23] cleaner check for extension

---
 src/utils.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/utils.c b/src/utils.c
index 2ab20acc..a2a6178a 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -256,10 +256,13 @@ void replace_image_to_label(const char* input_path, char* output_path)
     find_replace_extension(output_path, ".TIFF", ".txt", output_path);
     
     // Check file ends with txt:
-    char output_path_ext[3];
-    memcpy( output_path_ext, &output_path[strlen(output_path)-3], 3);
-    if( strcmp("txt", output_path_ext) != 0){
-        fprintf(stderr, "Failed to infer label file name (check image extension is supported): %s \n", output_path);
+    if(strlen(output_path) > 4) {
+        char *output_path_ext = output_path + strlen(output_path) - 4;
+        if( strcmp(".txt", output_path_ext) != 0){
+            fprintf(stderr, "Failed to infer label file name (check image extension is supported): %s \n", output_path);
+        }
+    }else{
+        fprintf(stderr, "Label file name is too short: %s \n", output_path);
     }
 }
 

From b6e15f16561c401c993092d180198555c1360cf7 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 18 Mar 2019 02:48:52 +0300
Subject: [PATCH 10/23] ZED 3D Camera support added to ./uselib
 (yolo_console_cpp.exe) example

---
 Makefile                               |   2 +-
 README.md                              |  10 +-
 build/darknet/darknet.vcxproj          |   6 +-
 build/darknet/x64/cfg/crnn.train.cfg   |  10 +-
 build/darknet/yolo_console_dll.vcxproj |  13 +-
 build/darknet/yolo_cpp_dll.vcxproj     |  11 +-
 cfg/crnn.train.cfg                     |  10 +-
 include/darknet.h                      |   5 +-
 include/yolo_v2_class.hpp              | 447 ++++++++++++--
 scripts/README.md                      |  23 +-
 src/activation_kernels.cu              |   2 +-
 src/activation_layer.c                 |   2 +-
 src/activations.h                      |   2 +-
 src/avgpool_layer.c                    |   2 +-
 src/avgpool_layer.h                    |   2 +-
 src/avgpool_layer_kernels.cu           |   2 +-
 src/blas.h                             |   2 +-
 src/blas_kernels.cu                    |   2 +-
 src/classifier.c                       |   2 +-
 src/col2im_kernels.cu                  |   2 +-
 src/connected_layer.c                  |   2 +-
 src/convolutional_kernels.cu           |   9 +-
 src/convolutional_layer.c              |   6 -
 src/convolutional_layer.h              |   2 +-
 src/cost_layer.c                       |   2 +-
 src/crnn_layer.c                       |  22 +-
 src/crop_layer.c                       |   2 +-
 src/crop_layer_kernels.cu              |   2 +-
 src/{cuda.c => dark_cuda.c}            |  17 +
 src/{cuda.h => dark_cuda.h}            |   5 +-
 src/darknet.c                          |   2 +-
 src/data.c                             |   2 +-
 src/deconvolutional_kernels.cu         |   2 +-
 src/deconvolutional_layer.h            |   2 +-
 src/detection_layer.c                  |   2 +-
 src/detector.c                         |  13 +-
 src/dropout_layer.c                    |   2 +-
 src/dropout_layer_kernels.cu           |   2 +-
 src/gemm.c                             |   2 +-
 src/gru_layer.c                        |   2 +-
 src/http_stream.cpp                    | 224 ++++----
 src/im2col_kernels.cu                  |   3 +-
 src/image.c                            |   2 +-
 src/layer.c                            |   2 +-
 src/local_layer.h                      |   2 +-
 src/lstm_layer.c                       |   2 +-
 src/maxpool_layer.c                    |   6 +-
 src/maxpool_layer.h                    |   2 +-
 src/maxpool_layer_kernels.cu           |   2 +-
 src/network_kernels.cu                 |   5 +-
 src/region_layer.c                     |   2 +-
 src/reorg_layer.c                      |   2 +-
 src/reorg_layer.h                      |   2 +-
 src/reorg_old_layer.c                  |   2 +-
 src/reorg_old_layer.h                  |   2 +-
 src/rnn_layer.c                        |   2 +-
 src/route_layer.c                      |   2 +-
 src/shortcut_layer.c                   |   2 +-
 src/softmax_layer.c                    |   2 +-
 src/upsample_layer.c                   |   2 +-
 src/upsample_layer.h                   |   2 +-
 src/utils.c                            |   2 +
 src/yolo_console_dll.cpp               | 767 +++++++++++++++----------
 src/yolo_layer.c                       |   2 +-
 src/yolo_v2_class.cpp                  |  72 ++-
 65 files changed, 1217 insertions(+), 555 deletions(-)
 rename src/{cuda.c => dark_cuda.c} (96%)
 rename src/{cuda.h => dark_cuda.h} (96%)

diff --git a/Makefile b/Makefile
index d06e6ace..c59088af 100644
--- a/Makefile
+++ b/Makefile
@@ -111,7 +111,7 @@ CFLAGS+= -DCUDNN_HALF
 ARCH+= -gencode arch=compute_70,code=[sm_70,compute_70]
 endif
 
-OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o
+OBJ=http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o
 ifeq ($(GPU), 1) 
 LDFLAGS+= -lstdc++ 
 OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
diff --git a/README.md b/README.md
index 245a0f06..c605ce1f 100644
--- a/README.md
+++ b/README.md
@@ -76,9 +76,9 @@ You can get cfg-files by path: `darknet/cfg/`
 
 ##### Examples of results
 
-[![Everything Is AWESOME](http://img.youtube.com/vi/VOC3huqHrss/0.jpg)](https://www.youtube.com/watch?v=VOC3huqHrss "Everything Is AWESOME")
+[![Yolo v3](http://img.youtube.com/vi/VOC3huqHrss/0.jpg)](https://www.youtube.com/watch?v=MPU2HistivI "Yolo v3")
 
-Others: https://www.youtube.com/channel/UC7ev3hNVkx4DzZ3LO19oebg
+Others: https://www.youtube.com/user/pjreddie/videos
 
 ### Improvements in this repository
 
@@ -344,11 +344,13 @@ Training Yolo v3:
 
 5. You should label each object on images from your dataset. Use this visual GUI-software for marking bounded boxes of objects and generating annotation files for Yolo v2 & v3: https://github.com/AlexeyAB/Yolo_mark
 
-It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: `<object-class> <x> <y> <width> <height>`
+It will create `.txt`-file for each `.jpg`-image-file - in the same directory and with the same name, but with `.txt`-extension, and put to file: object number and object coordinates on this image, for each object in new line: 
+
+`<object-class> <x_center> <y_center> <width> <height>`
 
   Where: 
   * `<object-class>` - integer object number from `0` to `(classes-1)`
-  * `<x_center> <y_center> <width> <height>` - float values relative to width and height of image, it can be equal from (0.0 to 1.0]
+  * `<x_center> <y_center> <width> <height>` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]`
   * for example: `<x> = <absolute_x> / <image_width>` or `<height> = <absolute_height> / <image_height>`
   * atention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
 
diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj
index 1858f063..d629cac5 100644
--- a/build/darknet/darknet.vcxproj
+++ b/build/darknet/darknet.vcxproj
@@ -133,7 +133,7 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <SDLCheck>true</SDLCheck>
       <AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(CUDNN)\include;$(cudnn)\include</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>CUDNN_HALF;OPENCV;CUDNN;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>OPENCV;CUDNN_HALF;CUDNN;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <CLanguageStandard>c11</CLanguageStandard>
       <CppLanguageStandard>c++1y</CppLanguageStandard>
       <PrecompiledHeaderCompileAs>CompileAsCpp</PrecompiledHeaderCompileAs>
@@ -188,8 +188,8 @@
     <ClCompile Include="..\..\src\cpu_gemm.c" />
     <ClCompile Include="..\..\src\crnn_layer.c" />
     <ClCompile Include="..\..\src\crop_layer.c" />
-    <ClCompile Include="..\..\src\cuda.c" />
     <ClCompile Include="..\..\src\darknet.c" />
+    <ClCompile Include="..\..\src\dark_cuda.c" />
     <ClCompile Include="..\..\src\data.c" />
     <ClCompile Include="..\..\src\deconvolutional_layer.c" />
     <ClCompile Include="..\..\src\demo.c" />
@@ -251,7 +251,7 @@
     <ClInclude Include="..\..\src\cost_layer.h" />
     <ClInclude Include="..\..\src\crnn_layer.h" />
     <ClInclude Include="..\..\src\crop_layer.h" />
-    <ClInclude Include="..\..\src\cuda.h" />
+    <ClInclude Include="..\..\src\dark_cuda.h" />
     <ClInclude Include="..\..\src\data.h" />
     <ClInclude Include="..\..\src\deconvolutional_layer.h" />
     <ClInclude Include="..\..\src\demo.h" />
diff --git a/build/darknet/x64/cfg/crnn.train.cfg b/build/darknet/x64/cfg/crnn.train.cfg
index dcc12d74..e0e0b54c 100644
--- a/build/darknet/x64/cfg/crnn.train.cfg
+++ b/build/darknet/x64/cfg/crnn.train.cfg
@@ -11,8 +11,16 @@ policy=steps
 steps=1000,1500
 scales=.1,.1
 
-[rnn]
+try_fix_nan=1
+
+[connected]
+output=256
+activation=leaky
+
+[crnn]
 batch_normalize=1
+size=1
+pad=0
 output = 1024
 hidden=1024
 activation=leaky
diff --git a/build/darknet/yolo_console_dll.vcxproj b/build/darknet/yolo_console_dll.vcxproj
index a965fcaa..c93844b7 100644
--- a/build/darknet/yolo_console_dll.vcxproj
+++ b/build/darknet/yolo_console_dll.vcxproj
@@ -51,8 +51,7 @@
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
+  <ImportGroup Label="ExtensionSettings" />
   <ImportGroup Label="Shared">
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -93,7 +92,7 @@
       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
     <Link>
-      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\x64\;$(ZED_SDK_ROOT_DIR)\lib</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@@ -117,7 +116,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <SDLCheck>true</SDLCheck>
-      <AdditionalIncludeDirectories>..\..\include;C:\opencv_source\opencv\bin\install\include</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\..\include;C:\opencv_source\opencv\bin\install\include;$(CUDA_PATH)\include;$(ZED_SDK_ROOT_DIR)\include</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>OPENCV;_CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ExceptionHandling>Async</ExceptionHandling>
       <OpenMPSupport>true</OpenMPSupport>
@@ -125,7 +124,8 @@
     <Link>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalLibraryDirectories>C:\opencv_source\opencv\bin\install\x64\vc14\lib;C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>C:\opencv_source\opencv\bin\install\x64\vc14\lib;C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\x64\;$(ZED_SDK_ROOT_DIR)\lib</AdditionalLibraryDirectories>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
@@ -138,6 +138,5 @@
     <ClInclude Include="..\..\include\yolo_v2_class.hpp" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
+  <ImportGroup Label="ExtensionTargets" />
 </Project>
\ No newline at end of file
diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj
index 97ff65c6..c03bbea3 100644
--- a/build/darknet/yolo_cpp_dll.vcxproj
+++ b/build/darknet/yolo_cpp_dll.vcxproj
@@ -102,7 +102,7 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <AdditionalLibraryDirectories>$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <OutputFile>$(OutDir)\$(TargetName)$(TargetExt)</OutputFile>
-      <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AssemblyDebug>true</AssemblyDebug>
     </Link>
     <CudaCompile>
@@ -142,8 +142,7 @@
       <CppLanguageStandard>c++1y</CppLanguageStandard>
       <PrecompiledHeaderCompileAs>CompileAsCpp</PrecompiledHeaderCompileAs>
       <CompileAs>Default</CompileAs>
-      <UndefinePreprocessorDefinitions>
-      </UndefinePreprocessorDefinitions>
+      <UndefinePreprocessorDefinitions>OPENCV;</UndefinePreprocessorDefinitions>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
     </ClCompile>
     <Link>
@@ -151,7 +150,7 @@
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
       <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <OutputFile>$(OutDir)\$(TargetName)$(TargetExt)</OutputFile>
     </Link>
     <CudaCompile>
@@ -192,8 +191,8 @@
     <ClCompile Include="..\..\src\cpu_gemm.c" />
     <ClCompile Include="..\..\src\crnn_layer.c" />
     <ClCompile Include="..\..\src\crop_layer.c" />
-    <ClCompile Include="..\..\src\cuda.c" />
     <ClCompile Include="..\..\src\darknet.c" />
+    <ClCompile Include="..\..\src\dark_cuda.c" />
     <ClCompile Include="..\..\src\data.c" />
     <ClCompile Include="..\..\src\deconvolutional_layer.c" />
     <ClCompile Include="..\..\src\demo.c" />
@@ -257,7 +256,7 @@
     <ClInclude Include="..\..\src\cost_layer.h" />
     <ClInclude Include="..\..\src\crnn_layer.h" />
     <ClInclude Include="..\..\src\crop_layer.h" />
-    <ClInclude Include="..\..\src\cuda.h" />
+    <ClInclude Include="..\..\src\dark_cuda.h" />
     <ClInclude Include="..\..\src\data.h" />
     <ClInclude Include="..\..\src\deconvolutional_layer.h" />
     <ClInclude Include="..\..\src\demo.h" />
diff --git a/cfg/crnn.train.cfg b/cfg/crnn.train.cfg
index dcc12d74..e0e0b54c 100644
--- a/cfg/crnn.train.cfg
+++ b/cfg/crnn.train.cfg
@@ -11,8 +11,16 @@ policy=steps
 steps=1000,1500
 scales=.1,.1
 
-[rnn]
+try_fix_nan=1
+
+[connected]
+output=256
+activation=leaky
+
+[crnn]
 batch_normalize=1
+size=1
+pad=0
 output = 1024
 hidden=1024
 activation=leaky
diff --git a/include/darknet.h b/include/darknet.h
index 8ca8b493..0a1451e3 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -792,10 +792,11 @@ LIB_API void free_data(data d);
 LIB_API pthread_t load_data(load_args args);
 LIB_API pthread_t load_data_in_thread(load_args args);
 
-// cuda.h
+// dark_cuda.h
 LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n);
 LIB_API void cuda_pull_array_async(float *x_gpu, float *x, size_t n);
 LIB_API void cuda_set_device(int n);
+LIB_API void *cuda_get_context();
 
 // utils.h
 LIB_API void free_ptrs(void **ptrs, int n);
@@ -809,6 +810,8 @@ LIB_API metadata get_metadata(char *file);
 
 
 // http_stream.h
+LIB_API void delete_json_sender();
+LIB_API void send_json_custom(char const* send_buf, int port, int timeout);
 LIB_API double get_time_point();
 void start_timer();
 void stop_timer();
diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp
index efe2b5f3..f9278721 100644
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@@ -25,6 +25,7 @@ struct bbox_t {
     unsigned int obj_id;           // class of object - from range [0, classes-1]
     unsigned int track_id;         // tracking id for video (0 - untracked, 1 - inf - tracked object)
     unsigned int frames_counter;   // counter of frames on which the object was detected
+    float x_3d, y_3d, z_3d;        // center of object (in Meters) if ZED 3D Camera is used
 };
 
 struct image_t {
@@ -60,8 +61,8 @@ extern "C" LIB_API int get_device_name(int gpu, char* deviceName);
 class Detector {
     std::shared_ptr<void> detector_gpu_ptr;
     std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
-    const int cur_gpu_id;
 public:
+    const int cur_gpu_id;
     float nms = .4;
     bool wait_stream;
 
@@ -79,6 +80,11 @@ public:
     LIB_API std::vector<bbox_t> tracking_id(std::vector<bbox_t> cur_bbox_vec, bool const change_history = true,
                                                 int const frames_story = 5, int const max_dist = 40);
 
+    LIB_API void *get_cuda_context();
+
+    LIB_API bool send_json_http(std::vector<bbox_t> cur_bbox_vec, std::vector<std::string> obj_names, int frame_id, 
+        std::string filename = "", int timeout = 400000, int port = 8070);
+
     std::vector<bbox_t> detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false)
     {
         if (img.data == NULL)
@@ -115,7 +121,10 @@ public:
     static std::shared_ptr<image_t> mat_to_image(cv::Mat img_src)
     {
         cv::Mat img;
-        cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR);
+        if (img_src.channels() == 4) cv::cvtColor(img_src, img, cv::COLOR_RGBA2BGR);
+        else if (img_src.channels() == 3) cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR);
+        else if (img_src.channels() == 1) cv::cvtColor(img_src, img, cv::COLOR_GRAY2BGR);
+        else std::cerr << " Warning: img_src.channels() is not 1, 3 or 4. It is = " << img_src.channels() << std::endl;
         std::shared_ptr<image_t> image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; });
         std::shared_ptr<IplImage> ipl_small = std::make_shared<IplImage>(img);
         *image_ptr = ipl_to_image(ipl_small.get());
@@ -166,7 +175,7 @@ private:
 #endif    // OPENCV
 
 };
-
+// --------------------------------------------------------------------------------
 
 
 #if defined(TRACK_OPTFLOW) && defined(OPENCV) && defined(GPU)
@@ -183,7 +192,7 @@ public:
     const int flow_error;
 
 
-    Tracker_optflow(int _gpu_id = 0, int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) :
+    Tracker_optflow(int _gpu_id = 0, int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) :
         gpu_count(cv::cuda::getCudaEnabledDeviceCount()), gpu_id(std::min(_gpu_id, gpu_count-1)),
         flow_error((_flow_error > 0)? _flow_error:(win_size*4))
     {
@@ -249,18 +258,32 @@ public:
         if (old_gpu_id != gpu_id)
             cv::cuda::setDevice(gpu_id);
 
-        if (src_mat.channels() == 3) {
+        if (src_mat.channels() == 1 || src_mat.channels() == 3 || src_mat.channels() == 4) {
             if (src_mat_gpu.cols == 0) {
                 src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type());
                 src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1);
             }
 
-            update_cur_bbox_vec(_cur_bbox_vec);
+            if (src_mat.channels() == 1) {
+                src_mat_gpu.upload(src_mat, stream);
+                src_mat_gpu.copyTo(src_grey_gpu);
+            }
+            else if (src_mat.channels() == 3) {
+                src_mat_gpu.upload(src_mat, stream);
+                cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream);
+            }
+            else if (src_mat.channels() == 4) {
+                src_mat_gpu.upload(src_mat, stream);
+                cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGRA2GRAY, 1, stream);
+            }
+            else {
+                std::cerr << " Warning: src_mat.channels() is not: 1, 3 or 4. It is = " << src_mat.channels() << " \n";
+                return;
+            }
 
-            //src_grey_gpu.upload(src_mat, stream);    // use BGR
-            src_mat_gpu.upload(src_mat, stream);
-            cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream);
         }
+        update_cur_bbox_vec(_cur_bbox_vec);
+
         if (old_gpu_id != gpu_id)
             cv::cuda::setDevice(old_gpu_id);
     }
@@ -355,7 +378,7 @@ public:
     const int flow_error;
 
 
-    Tracker_optflow(int win_size = 9, int max_level = 3, int iterations = 8000, int _flow_error = -1) :
+    Tracker_optflow(int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) :
         flow_error((_flow_error > 0)? _flow_error:(win_size*4))
     {
         sync_PyrLKOpticalFlow = cv::SparsePyrLKOpticalFlow::create();
@@ -396,12 +419,20 @@ public:
 
     void update_tracking_flow(cv::Mat new_src_mat, std::vector<bbox_t> _cur_bbox_vec)
     {
-        if (new_src_mat.channels() == 3) {
-
-            update_cur_bbox_vec(_cur_bbox_vec);
-
+        if (new_src_mat.channels() == 1) {
+            src_grey = new_src_mat.clone();
+        }
+        else if (new_src_mat.channels() == 3) {
             cv::cvtColor(new_src_mat, src_grey, CV_BGR2GRAY, 1);
         }
+        else if (new_src_mat.channels() == 4) {
+            cv::cvtColor(new_src_mat, src_grey, CV_BGRA2GRAY, 1);
+        }
+        else {
+            std::cerr << " Warning: new_src_mat.channels() is not: 1, 3 or 4. It is = " << new_src_mat.channels() << " \n";
+            return;
+        }        
+        update_cur_bbox_vec(_cur_bbox_vec);
     }
 
 
@@ -416,6 +447,7 @@ public:
 
         if (src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols) {
             src_grey = dst_grey.clone();
+            //std::cerr << " Warning: src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols \n";
             return cur_bbox_vec;
         }
 
@@ -611,56 +643,361 @@ public:
         }
     }
 };
-#endif    // OPENCV
 
-//extern "C" {
-#endif    // __cplusplus
 
-/*
-    // C - wrappers
-    LIB_API void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id);
-    LIB_API void delete_detector();
-    LIB_API bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size);
-    LIB_API bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size);
-    LIB_API bbox_t* detect(image_t img, int *result_size);
-    LIB_API image_t load_img(char *image_filename);
-    LIB_API void free_img(image_t m);
+class track_kalman_t 
+{
+    int track_id_counter;
+    std::chrono::steady_clock::time_point global_last_time;
+    float dT;
 
-#ifdef __cplusplus
-}    // extern "C"
+public:
+    int max_objects;    // max objects for tracking
+    int min_frames;     // min frames to consider an object as detected
+    const float max_dist;   // max distance (in px) to track with the same ID
+    cv::Size img_size;  // max value of x,y,w,h
+
+    struct tst_t {
+        int track_id;
+        int state_id;
+        std::chrono::steady_clock::time_point last_time;
+        int detection_count;
+        tst_t() : track_id(-1), state_id(-1) {}
+    };
+    std::vector<tst_t> track_id_state_id_time;
+    std::vector<bbox_t> result_vec_pred;
 
-static std::shared_ptr<void> c_detector_ptr;
-static std::vector<bbox_t> c_result_vec;
+    struct one_kalman_t;
+    std::vector<one_kalman_t> kalman_vec;
 
-void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id) {
-    c_detector_ptr = std::make_shared<LIB_API Detector>(cfg_filename, weight_filename, gpu_id);
-}
+    struct one_kalman_t
+    {
+        cv::KalmanFilter kf;
+        cv::Mat state;
+        cv::Mat meas;
+        int measSize, stateSize, contrSize;
+
+        void set_delta_time(float dT) {
+            kf.transitionMatrix.at<float>(2) = dT;
+            kf.transitionMatrix.at<float>(9) = dT;
+        }
 
-void delete_detector() { c_detector_ptr.reset(); }
+        void set(bbox_t box)
+        {
+            initialize_kalman();
+
+            kf.errorCovPre.at<float>(0) = 1; // px
+            kf.errorCovPre.at<float>(7) = 1; // px
+            kf.errorCovPre.at<float>(14) = 1;
+            kf.errorCovPre.at<float>(21) = 1;
+            kf.errorCovPre.at<float>(28) = 1; // px
+            kf.errorCovPre.at<float>(35) = 1; // px
+
+            state.at<float>(0) = box.x;
+            state.at<float>(1) = box.y;
+            state.at<float>(2) = 0;
+            state.at<float>(3) = 0;
+            state.at<float>(4) = box.w;
+            state.at<float>(5) = box.h;
+            // <<<< Initialization
+
+            kf.statePost = state;
+        }
 
-bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size) {
-    c_result_vec = static_cast<Detector*>(c_detector_ptr.get())->detect(img, thresh, use_mean);
-    *result_size = c_result_vec.size();
-    return c_result_vec.data();
-}
+        // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre);
+        // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
+        void correct(bbox_t box) {
+            meas.at<float>(0) = box.x;
+            meas.at<float>(1) = box.y;
+            meas.at<float>(2) = box.w;
+            meas.at<float>(3) = box.h;
+
+            kf.correct(meas);
+
+            bbox_t new_box = predict();
+            if (new_box.w == 0 || new_box.h == 0) {
+                set(box);
+                //std::cerr << " force set(): track_id = " << box.track_id <<
+                //    ", x = " << box.x << ", y = " << box.y << ", w = " << box.w << ", h = " << box.h << std::endl;
+            }
+        }
 
-bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size) {
-    c_result_vec = static_cast<Detector*>(c_detector_ptr.get())->detect_resized(img, init_w, init_h, thresh, use_mean);
-    *result_size = c_result_vec.size();
-    return c_result_vec.data();
-}
+        // Kalman.predict() calculates: statePre = TransitionMatrix * statePost;
+        // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
+        bbox_t predict() {
+            bbox_t box;
+            state = kf.predict();
+
+            box.x = state.at<float>(0);
+            box.y = state.at<float>(1);
+            box.w = state.at<float>(4);
+            box.h = state.at<float>(5);
+            return box;
+        }
 
-bbox_t* detect(image_t img, int *result_size) {
-    return detect_custom(img, 0.24, true, result_size);
-}
+        void initialize_kalman()
+        {
+            kf = cv::KalmanFilter(stateSize, measSize, contrSize, CV_32F);
+
+            // Transition State Matrix A
+            // Note: set dT at each processing step!
+            // [ 1 0 dT 0  0 0 ]
+            // [ 0 1 0  dT 0 0 ]
+            // [ 0 0 1  0  0 0 ]
+            // [ 0 0 0  1  0 0 ]
+            // [ 0 0 0  0  1 0 ]
+            // [ 0 0 0  0  0 1 ]
+            cv::setIdentity(kf.transitionMatrix);
+
+            // Measure Matrix H
+            // [ 1 0 0 0 0 0 ]
+            // [ 0 1 0 0 0 0 ]
+            // [ 0 0 0 0 1 0 ]
+            // [ 0 0 0 0 0 1 ]
+            kf.measurementMatrix = cv::Mat::zeros(measSize, stateSize, CV_32F);
+            kf.measurementMatrix.at<float>(0) = 1.0f;
+            kf.measurementMatrix.at<float>(7) = 1.0f;
+            kf.measurementMatrix.at<float>(16) = 1.0f;
+            kf.measurementMatrix.at<float>(23) = 1.0f;
+
+            // Process Noise Covariance Matrix Q - result smoother with lower values (1e-2)
+            // [ Ex   0   0     0     0    0  ]
+            // [ 0    Ey  0     0     0    0  ]
+            // [ 0    0   Ev_x  0     0    0  ]
+            // [ 0    0   0     Ev_y  0    0  ]
+            // [ 0    0   0     0     Ew   0  ]
+            // [ 0    0   0     0     0    Eh ]
+            //cv::setIdentity(kf.processNoiseCov, cv::Scalar(1e-3));
+            kf.processNoiseCov.at<float>(0) = 1e-2;
+            kf.processNoiseCov.at<float>(7) = 1e-2;
+            kf.processNoiseCov.at<float>(14) = 1e-2;// 5.0f;
+            kf.processNoiseCov.at<float>(21) = 1e-2;// 5.0f;
+            kf.processNoiseCov.at<float>(28) = 1e-2;
+            kf.processNoiseCov.at<float>(35) = 1e-2;
+
+            // Measures Noise Covariance Matrix R - result smoother with higher values (1e-1)
+            cv::setIdentity(kf.measurementNoiseCov, cv::Scalar(1e-1));
+
+            //cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2));
+            // <<<< Kalman Filter
+
+            set_delta_time(0);
+        }
 
-image_t load_img(char *image_filename) {
-    return static_cast<Detector*>(c_detector_ptr.get())->load_image(image_filename);
-}
-void free_img(image_t m) {
-    static_cast<Detector*>(c_detector_ptr.get())->free_image(m);
-}
+
+        one_kalman_t(int _stateSize = 6, int _measSize = 4, int _contrSize = 0) :
+            kf(_stateSize, _measSize, _contrSize, CV_32F), measSize(_measSize), stateSize(_stateSize), contrSize(_contrSize)
+        {
+            state = cv::Mat(stateSize, 1, CV_32F);  // [x,y,v_x,v_y,w,h]
+            meas = cv::Mat(measSize, 1, CV_32F);    // [z_x,z_y,z_w,z_h]
+            //cv::Mat procNoise(stateSize, 1, type)
+            // [E_x,E_y,E_v_x,E_v_y,E_w,E_h]
+
+            initialize_kalman();
+        }
+    };
+    // ------------------------------------------
+
+
+
+    track_kalman_t(int _max_objects = 1000, int _min_frames = 3, float _max_dist = 40, cv::Size _img_size = cv::Size(10000, 10000)) :
+        max_objects(_max_objects), min_frames(_min_frames), max_dist(_max_dist), img_size(_img_size), 
+        track_id_counter(0)
+    {
+        kalman_vec.resize(max_objects);
+        track_id_state_id_time.resize(max_objects);
+        result_vec_pred.resize(max_objects);
+    }
+
+    float calc_dt() {
+        dT = std::chrono::duration<double>(std::chrono::steady_clock::now() - global_last_time).count();
+        return dT;
+    }
+
+    static float get_distance(float src_x, float src_y, float dst_x, float dst_y) {
+        return sqrtf((src_x - dst_x)*(src_x - dst_x) + (src_y - dst_y)*(src_y - dst_y));
+    }
+
+    void clear_old_states() {
+        // clear old bboxes
+        for (size_t state_id = 0; state_id < track_id_state_id_time.size(); ++state_id)
+        {
+            float time_sec = std::chrono::duration<double>(std::chrono::steady_clock::now() - track_id_state_id_time[state_id].last_time).count();
+            float time_wait = 0.5;    // 0.5 second
+            if (track_id_state_id_time[state_id].track_id > -1)
+            {
+                if ((result_vec_pred[state_id].x > img_size.width) ||
+                    (result_vec_pred[state_id].y > img_size.height))
+                {
+                    track_id_state_id_time[state_id].track_id = -1;
+                }
+
+                if (time_sec >= time_wait || track_id_state_id_time[state_id].detection_count < 0) {
+                    //std::cerr << " remove track_id = " << track_id_state_id_time[state_id].track_id << ", state_id = " << state_id << std::endl;
+                    track_id_state_id_time[state_id].track_id = -1; // remove bbox
+                }
+            }
+        }
+    }
+
+    tst_t get_state_id(bbox_t find_box, std::vector<bool> &busy_vec)
+    {
+        tst_t tst;
+        tst.state_id = -1;
+
+        float min_dist = std::numeric_limits<float>::max();
+
+        for (size_t i = 0; i < max_objects; ++i)
+        {
+            if (track_id_state_id_time[i].track_id > -1 && result_vec_pred[i].obj_id == find_box.obj_id && busy_vec[i] == false)
+            {
+                bbox_t pred_box = result_vec_pred[i];
+
+                float dist = get_distance(pred_box.x, pred_box.y, find_box.x, find_box.y);
+
+                float movement_dist = std::max(max_dist, static_cast<float>(std::max(pred_box.w, pred_box.h)) );
+
+                if ((dist < movement_dist) && (dist < min_dist)) {
+                    min_dist = dist;
+                    tst.state_id = i;
+                }
+            }
+        }
+
+        if (tst.state_id > -1) {
+            track_id_state_id_time[tst.state_id].last_time = std::chrono::steady_clock::now();
+            track_id_state_id_time[tst.state_id].detection_count = std::max(track_id_state_id_time[tst.state_id].detection_count + 2, 10);
+            tst = track_id_state_id_time[tst.state_id];
+            busy_vec[tst.state_id] = true;
+        }
+        else {
+            //std::cerr << " Didn't find: obj_id = " << find_box.obj_id << ", x = " << find_box.x << ", y = " << find_box.y << 
+            //    ", track_id_counter = " << track_id_counter << std::endl;
+        }
+
+        return tst;
+    }
+
+    tst_t new_state_id(std::vector<bool> &busy_vec)
+    {
+        tst_t tst;
+        // find empty cell to add new track_id
+        auto it = std::find_if(track_id_state_id_time.begin(), track_id_state_id_time.end(), [&](tst_t &v) { return v.track_id == -1; });
+        if (it != track_id_state_id_time.end()) {
+            it->state_id = it - track_id_state_id_time.begin();
+            //it->track_id = track_id_counter++;
+            it->track_id = 0;
+            it->last_time = std::chrono::steady_clock::now();
+            it->detection_count = 1;
+            tst = *it;
+            busy_vec[it->state_id] = true;
+        }
+
+        return tst;
+    }
+
+    std::vector<tst_t> find_state_ids(std::vector<bbox_t> result_vec)
+    {
+        std::vector<tst_t> tst_vec(result_vec.size());
+
+        std::vector<bool> busy_vec(max_objects, false);
+
+        for (size_t i = 0; i < result_vec.size(); ++i)
+        {
+            tst_t tst = get_state_id(result_vec[i], busy_vec);
+            int state_id = tst.state_id;
+            int track_id = tst.track_id;
+
+            // if new state_id
+            if (state_id < 0) {
+                tst = new_state_id(busy_vec);
+                state_id = tst.state_id;
+                track_id = tst.track_id;
+                if (state_id > -1) {
+                    kalman_vec[state_id].set(result_vec[i]);
+                    //std::cerr << " post: ";
+                }
+            }
+
+            //std::cerr << " track_id = " << track_id << ", state_id = " << state_id <<
+            //    ", x = " << result_vec[i].x << ", det_count = " << tst.detection_count << std::endl;
+
+            if (state_id > -1) {
+                tst_vec[i] = tst;
+                result_vec_pred[state_id] = result_vec[i];
+                result_vec_pred[state_id].track_id = track_id;
+            }
+        }
+
+        return tst_vec;
+    }
+
+    std::vector<bbox_t> predict()
+    {
+        clear_old_states();
+        std::vector<bbox_t> result_vec;
+
+        for (size_t i = 0; i < max_objects; ++i)
+        {
+            tst_t tst = track_id_state_id_time[i];
+            if (tst.track_id > -1) {
+                bbox_t box = kalman_vec[i].predict();
+
+                result_vec_pred[i].x = box.x;
+                result_vec_pred[i].y = box.y;
+                result_vec_pred[i].w = box.w;
+                result_vec_pred[i].h = box.h;
+
+                if (tst.detection_count >= min_frames)
+                {
+                    if (track_id_state_id_time[i].track_id == 0) {
+                        track_id_state_id_time[i].track_id = ++track_id_counter;
+                        result_vec_pred[i].track_id = track_id_counter;
+                    }
+
+                    result_vec.push_back(result_vec_pred[i]);
+                }
+            }
+        }
+        //std::cerr << "         result_vec.size() = " << result_vec.size() << std::endl;
+
+        //global_last_time = std::chrono::steady_clock::now();
+
+        return result_vec;
+    }
+
+
+    std::vector<bbox_t> correct(std::vector<bbox_t> result_vec)
+    {
+        calc_dt();
+        clear_old_states();
+
+        for (size_t i = 0; i < max_objects; ++i)
+            track_id_state_id_time[i].detection_count--;
+
+        std::vector<tst_t> tst_vec = find_state_ids(result_vec);
+
+        for (size_t i = 0; i < tst_vec.size(); ++i) {
+            tst_t tst = tst_vec[i];
+            int state_id = tst.state_id;
+            if (state_id > -1)
+            {
+                kalman_vec[state_id].set_delta_time(dT);
+                kalman_vec[state_id].correct(result_vec_pred[state_id]);
+            }
+        }
+
+        result_vec = predict();
+
+        global_last_time = std::chrono::steady_clock::now();
+
+        return result_vec;
+    }
+
+};
+// ----------------------------------------------
+#endif    // OPENCV
 
 #endif    // __cplusplus
-*/
-#endif
+
+#endif    // YOLO_V2_CLASS_HPP
diff --git a/scripts/README.md b/scripts/README.md
index 0c8327f0..36985f81 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -16,6 +16,12 @@ Cityscapes: https://www.cityscapes-dataset.com/
 
 Object Tracking Benchmark: http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html
 
+MOT (Multiple object tracking benchmark): https://motchallenge.net/
+
+VOT (Visual object tracking): http://www.votchallenge.net/challenges.html
+
+FREE FLIR Thermal Dataset (infrared): https://www.flir.eu/oem/adas/adas-dataset-form/
+
 MARS: http://www.liangzheng.com.cn/Project/project_mars.html
 
 Market-1501: http://www.liangzheng.org/Project/project_reid.html
@@ -30,7 +36,22 @@ Visual Question Answering: https://visualqa.org/download.html
 
 Large Movie Review Dataset: http://ai.stanford.edu/~amaas/data/sentiment/
 
+KITTI (for autonomous driving): http://www.cvlibs.net/datasets/kitti/
+
+nuScenes (for autonomous driving): https://www.nuscenes.org/overview
+
+----
+
+Wikipedia's List of datasets: https://en.wikipedia.org/wiki/List_of_datasets_for_machine-learning_research
 
 Other datasets (Music, Natural Images, Artificial Datasets, Faces, Text, Speech, Recommendation Systems, Misc): http://deeplearning.net/datasets/
 
-25 datasets: https://www.analyticsvidhya.com/blog/2018/03/comprehensive-collection-deep-learning-datasets/
\ No newline at end of file
+25 datasets: https://www.analyticsvidhya.com/blog/2018/03/comprehensive-collection-deep-learning-datasets/
+
+List of datasets: https://riemenschneider.hayko.at/vision/dataset/index.php
+
+Another list of datasets: http://homepages.inf.ed.ac.uk/rbf/CVonline/Imagedbase.htm
+
+Pedestrian DATASETs for Vision based Detection and Tracking: https://hemprasad.wordpress.com/2014/11/08/pedestrian-datasets-for-vision-based-detection-and-tracking/
+
+TrackingNet: https://tracking-net.org/
\ No newline at end of file
diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu
index 0144ca51..6c9445a6 100644
--- a/src/activation_kernels.cu
+++ b/src/activation_kernels.cu
@@ -4,7 +4,7 @@
 #include "cublas_v2.h"
 
 #include "activations.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 
 __device__ float lhtan_activate_kernel(float x)
diff --git a/src/activation_layer.c b/src/activation_layer.c
index 2c323b8d..b8b5d023 100644
--- a/src/activation_layer.c
+++ b/src/activation_layer.c
@@ -1,6 +1,6 @@
 #include "activation_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include "gemm.h"
 
diff --git a/src/activations.h b/src/activations.h
index 4ecf97d7..4a382b2d 100644
--- a/src/activations.h
+++ b/src/activations.h
@@ -1,7 +1,7 @@
 #ifndef ACTIVATIONS_H
 #define ACTIVATIONS_H
 #include "darknet.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "math.h"
 
 //typedef enum{
diff --git a/src/avgpool_layer.c b/src/avgpool_layer.c
index bae5ff4d..20838bbd 100644
--- a/src/avgpool_layer.c
+++ b/src/avgpool_layer.c
@@ -1,5 +1,5 @@
 #include "avgpool_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include <stdio.h>
 
 avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
diff --git a/src/avgpool_layer.h b/src/avgpool_layer.h
index f7679aa9..2277ec6d 100644
--- a/src/avgpool_layer.h
+++ b/src/avgpool_layer.h
@@ -2,7 +2,7 @@
 #define AVGPOOL_LAYER_H
 
 #include "image.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "layer.h"
 #include "network.h"
 
diff --git a/src/avgpool_layer_kernels.cu b/src/avgpool_layer_kernels.cu
index b38ba450..676faa69 100644
--- a/src/avgpool_layer_kernels.cu
+++ b/src/avgpool_layer_kernels.cu
@@ -3,7 +3,7 @@
 #include "cublas_v2.h"
 
 #include "avgpool_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
 {
diff --git a/src/blas.h b/src/blas.h
index 12d4b9a3..8e91fff2 100644
--- a/src/blas.h
+++ b/src/blas.h
@@ -1,7 +1,7 @@
 #ifndef BLAS_H
 #define BLAS_H
 #ifdef GPU
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "tree.h"
 #endif
 #ifdef __cplusplus
diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index 98592c81..2070bc1f 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -4,7 +4,7 @@
 #include <assert.h>
 
 #include "blas.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "utils.h"
 #include "tree.h"
 
diff --git a/src/classifier.c b/src/classifier.c
index b5c78d65..5471957a 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -5,7 +5,7 @@
 #include "blas.h"
 #include "assert.h"
 #include "classifier.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #ifdef WIN32
 #include <time.h>
 #include "gettimeofday.h"
diff --git a/src/col2im_kernels.cu b/src/col2im_kernels.cu
index e3b1d233..2f18a0fd 100644
--- a/src/col2im_kernels.cu
+++ b/src/col2im_kernels.cu
@@ -3,7 +3,7 @@
 #include "cublas_v2.h"
 
 #include "col2im.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu
 // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE
diff --git a/src/connected_layer.c b/src/connected_layer.c
index 3520e914..242ab8fb 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -2,7 +2,7 @@
 #include "batchnorm_layer.h"
 #include "convolutional_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include "gemm.h"
 
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index 9625ddc6..8ae99710 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -2,12 +2,6 @@
 #include "curand.h"
 #include "cublas_v2.h"
 
-#ifdef CUDNN
-#ifndef USE_CMAKE_LIBS
-#pragma comment(lib, "cudnn.lib")
-#endif
-#endif
-
 #include "convolutional_layer.h"
 #include "batchnorm_layer.h"
 #include "gemm.h"
@@ -15,7 +9,7 @@
 #include "im2col.h"
 #include "col2im.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 
 __global__ void binarize_kernel(float *x, int n, float *binary)
@@ -598,6 +592,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 
 void backward_convolutional_layer_gpu(convolutional_layer l, network_state state)
 {
+    if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
     gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
 
     if (!l.batch_normalize)
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 756fe917..e42e86a7 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -8,12 +8,6 @@
 #include <stdio.h>
 #include <time.h>
 
-#ifdef CUDNN
-#ifndef USE_CMAKE_LIBS
-#pragma comment(lib, "cudnn.lib")
-#endif
-#endif
-
 #ifdef AI2
 #include "xnor_layer.h"
 #endif
diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
index bad3b84e..bfc1229f 100644
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@@ -1,7 +1,7 @@
 #ifndef CONVOLUTIONAL_LAYER_H
 #define CONVOLUTIONAL_LAYER_H
 
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "image.h"
 #include "activations.h"
 #include "layer.h"
diff --git a/src/cost_layer.c b/src/cost_layer.c
index 33fd8572..09188106 100644
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@@ -1,6 +1,6 @@
 #include "cost_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include <math.h>
 #include <string.h>
diff --git a/src/crnn_layer.c b/src/crnn_layer.c
index 0fc6da72..59cac509 100644
--- a/src/crnn_layer.c
+++ b/src/crnn_layer.c
@@ -1,7 +1,7 @@
 #include "crnn_layer.h"
 #include "convolutional_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include "gemm.h"
 
@@ -268,16 +268,18 @@ void forward_crnn_layer_gpu(layer l, network_state state)
     layer input_layer = *(l.input_layer);
     layer self_layer = *(l.self_layer);
     layer output_layer = *(l.output_layer);
-    /*
-#ifdef CUDNN_HALF
-// slow and bad
-    s.index = state.index;
-    s.net = state.net;
-    cuda_convert_f32_to_f16(input_layer.weights_gpu, input_layer.c*input_layer.n*input_layer.size*input_layer.size, input_layer.weights_gpu16);
-    cuda_convert_f32_to_f16(self_layer.weights_gpu, self_layer.c*self_layer.n*self_layer.size*self_layer.size, self_layer.weights_gpu16);
-    cuda_convert_f32_to_f16(output_layer.weights_gpu, output_layer.c*output_layer.n*output_layer.size*output_layer.size, output_layer.weights_gpu16);
+
+/*
+#ifdef CUDNN_HALF   // slow and bad for training
+    if (!state.train && state.net.cudnn_half) {
+        s.index = state.index;
+        cuda_convert_f32_to_f16(input_layer.weights_gpu, input_layer.c*input_layer.n*input_layer.size*input_layer.size, input_layer.weights_gpu16);
+        cuda_convert_f32_to_f16(self_layer.weights_gpu, self_layer.c*self_layer.n*self_layer.size*self_layer.size, self_layer.weights_gpu16);
+        cuda_convert_f32_to_f16(output_layer.weights_gpu, output_layer.c*output_layer.n*output_layer.size*output_layer.size, output_layer.weights_gpu16);
+    }
 #endif  //CUDNN_HALF
-    */
+*/
+
 
     if (state.train) {
         fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
diff --git a/src/crop_layer.c b/src/crop_layer.c
index 258030be..092237f4 100644
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@@ -1,5 +1,5 @@
 #include "crop_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include <stdio.h>
 
 image get_crop_image(crop_layer l)
diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu
index 5b084fe5..46afca6c 100644
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@@ -4,7 +4,7 @@
 
 #include "crop_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "image.h"
 
 __device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
diff --git a/src/cuda.c b/src/dark_cuda.c
similarity index 96%
rename from src/cuda.c
rename to src/dark_cuda.c
index 9e7745e2..42aa6fc8 100644
--- a/src/cuda.c
+++ b/src/dark_cuda.c
@@ -14,6 +14,15 @@ int gpu_index = 0;
 #include "assert.h"
 #include <stdlib.h>
 #include <time.h>
+#include <cuda.h>
+
+#pragma comment(lib, "cuda.lib")
+
+#ifdef CUDNN
+#ifndef USE_CMAKE_LIBS
+#pragma comment(lib, "cudnn.lib")
+#endif  // USE_CMAKE_LIBS
+#endif  // CUDNN
 
 void cuda_set_device(int n)
 {
@@ -30,6 +39,14 @@ int cuda_get_device()
     return n;
 }
 
+void *cuda_get_context()
+{
+    CUcontext pctx;
+    CUresult status = cuCtxGetCurrent(&pctx);
+    if(status != CUDA_SUCCESS) fprintf(stderr, " Error: cuCtxGetCurrent() is failed \n");
+    return (void *)pctx;
+}
+
 void check_error(cudaError_t status)
 {
     cudaError_t status2 = cudaGetLastError();
diff --git a/src/cuda.h b/src/dark_cuda.h
similarity index 96%
rename from src/cuda.h
rename to src/dark_cuda.h
index fe33a258..34614dc1 100644
--- a/src/cuda.h
+++ b/src/dark_cuda.h
@@ -18,11 +18,12 @@ extern int gpu_index;
 #define WARP_SIZE 32
 #define BLOCK_TRANSPOSE32 256
 
+#include <cuda.h>
 #include <cuda_runtime.h>
 #include <curand.h>
 #include <cublas_v2.h>
 #include <cuda_runtime_api.h>
-#include <driver_types.h>
+//#include <driver_types.h>
 
 #ifdef CUDNN
 #include <cudnn.h>
@@ -87,4 +88,4 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line
 #else // GPU
 //LIB_API void cuda_set_device(int n);
 #endif // GPU
-#endif // CUDA_H
+#endif // DARKCUDA_H
diff --git a/src/darknet.c b/src/darknet.c
index bc7a7b7d..c10a7b2f 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -8,7 +8,7 @@
 #include "darknet.h"
 #include "parser.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include "connected_layer.h"
 
diff --git a/src/data.c b/src/data.c
index e6fa95c5..e364ed76 100644
--- a/src/data.c
+++ b/src/data.c
@@ -1,7 +1,7 @@
 #include "data.h"
 #include "utils.h"
 #include "image.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/src/deconvolutional_kernels.cu b/src/deconvolutional_kernels.cu
index b0ba1a86..472b2247 100644
--- a/src/deconvolutional_kernels.cu
+++ b/src/deconvolutional_kernels.cu
@@ -9,7 +9,7 @@
 #include "im2col.h"
 #include "col2im.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state)
 {
diff --git a/src/deconvolutional_layer.h b/src/deconvolutional_layer.h
index 2e1c4527..bb15a429 100644
--- a/src/deconvolutional_layer.h
+++ b/src/deconvolutional_layer.h
@@ -1,7 +1,7 @@
 #ifndef DECONVOLUTIONAL_LAYER_H
 #define DECONVOLUTIONAL_LAYER_H
 
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "image.h"
 #include "activations.h"
 #include "layer.h"
diff --git a/src/detection_layer.c b/src/detection_layer.c
index 60fdc90d..64d133f9 100644
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@@ -3,7 +3,7 @@
 #include "softmax_layer.h"
 #include "blas.h"
 #include "box.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "utils.h"
 #include <stdio.h>
 #include <assert.h>
diff --git a/src/detector.c b/src/detector.c
index e82cc069..96222379 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -923,6 +923,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     printf("\n detections_count = %d, unique_truth_count = %d  \n", detections_count, unique_truth_count);
 
 
+    int* detection_per_class_count = (int*)calloc(classes, sizeof(int));
+    for (j = 0; j < detections_count; ++j) {
+        detection_per_class_count[detections[j].class_id]++;
+    }
+
     int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int));
 
     int rank;
@@ -945,7 +950,8 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
             {
                 truth_flags[d.unique_truth_index] = 1;
                 pr[d.class_id][rank].tp++;    // true-positive
-            }
+            } else
+                pr[d.class_id][rank].fp++;
         }
         else {
             pr[d.class_id][rank].fp++;    // false-positive
@@ -963,6 +969,10 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
 
             if ((tp + fn) > 0) pr[i][rank].recall = (double)tp / (double)(tp + fn);
             else pr[i][rank].recall = 0;
+
+            if (rank == (detections_count - 1) && detection_per_class_count[i] != (tp + fp)) {    // check for last rank
+                    printf(" class_id: %d - detections = %d, tp+fp = %d, tp = %d, fp = %d \n", i, detection_per_class_count[i], tp+fp, tp, fp);
+            }
         }
     }
 
@@ -1014,6 +1024,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     free(pr);
     free(detections);
     free(truth_classes_count);
+    free(detection_per_class_count);
 
     fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
     if (reinforcement_fd != NULL) fclose(reinforcement_fd);
diff --git a/src/dropout_layer.c b/src/dropout_layer.c
index 9eb22982..3c2abed4 100644
--- a/src/dropout_layer.c
+++ b/src/dropout_layer.c
@@ -1,6 +1,6 @@
 #include "dropout_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include <stdlib.h>
 #include <stdio.h>
 
diff --git a/src/dropout_layer_kernels.cu b/src/dropout_layer_kernels.cu
index f6a93c91..ceba0fa3 100644
--- a/src/dropout_layer_kernels.cu
+++ b/src/dropout_layer_kernels.cu
@@ -3,7 +3,7 @@
 #include "cublas_v2.h"
 
 #include "dropout_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "utils.h"
 
 __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale)
diff --git a/src/gemm.c b/src/gemm.c
index b110da61..fec1e16a 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -1,7 +1,7 @@
 #include "gemm.h"
 #include "utils.h"
 #include "im2col.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
diff --git a/src/gru_layer.c b/src/gru_layer.c
index eac751a5..29acdaa2 100644
--- a/src/gru_layer.c
+++ b/src/gru_layer.c
@@ -1,7 +1,7 @@
 #include "gru_layer.h"
 #include "connected_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include "gemm.h"
 
diff --git a/src/http_stream.cpp b/src/http_stream.cpp
index b911f22f..e61e506d 100644
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@@ -1,7 +1,8 @@
+#define _XOPEN_SOURCE
 #include "image.h"
 #include "http_stream.h"
 
-#ifdef OPENCV
+
 //
 // a single-threaded, multi client(using select), debug webserver - streaming out mjpg.
 //  on win, _WIN32 has to be defined, must link against ws2_32.lib (socks on linux are for free)
@@ -11,6 +12,8 @@
 #include <vector>
 #include <iostream>
 #include <algorithm>
+#include <memory>
+#include <mutex>
 using std::cerr;
 using std::endl;
 
@@ -87,24 +90,12 @@ static int close_socket(SOCKET s) {
 #endif // _WIN32
 
 
-#include <opencv2/opencv.hpp>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/highgui/highgui_c.h>
-#include <opencv2/imgproc/imgproc_c.h>
-#ifndef CV_VERSION_EPOCH
-#include <opencv2/videoio/videoio.hpp>
-#endif
-using namespace cv;
-
-
-
-class MJPG_sender
+class JSON_sender
 {
     SOCKET sock;
     SOCKET maxfd;
     fd_set master;
     int timeout; // master sock timeout, shutdown after timeout usec.
-    int quality; // jpeg compression [1..100]
     int close_all_sockets;
 
     int _write(int sock, char const*const s, int len)
@@ -115,10 +106,9 @@ class MJPG_sender
 
 public:
 
-    MJPG_sender(int port = 0, int _timeout = 400000, int _quality = 30)
+    JSON_sender(int port = 0, int _timeout = 400000)
         : sock(INVALID_SOCKET)
         , timeout(_timeout)
-        , quality(_quality)
     {
         close_all_sockets = 0;
         FD_ZERO(&master);
@@ -126,7 +116,7 @@ public:
             open(port);
     }
 
-    ~MJPG_sender()
+    ~JSON_sender()
     {
         close_all();
         release();
@@ -143,8 +133,7 @@ public:
     void close_all()
     {
         close_all_sockets = 1;
-        cv::Mat tmp(cv::Size(10, 10), CV_8UC3);
-        write(tmp);
+        write("\n]");   // close JSON array
     }
 
     bool open(int port)
@@ -165,12 +154,12 @@ public:
 #endif
         if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR)
         {
-            cerr << "error MJPG_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl;
+            cerr << "error JSON_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl;
             return release();
         }
         if (::listen(sock, 10) == SOCKET_ERROR)
         {
-            cerr << "error MJPG_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl;
+            cerr << "error JSON_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl;
             return release();
         }
         FD_ZERO(&master);
@@ -184,7 +173,7 @@ public:
         return sock != INVALID_SOCKET;
     }
 
-    bool write(const Mat & frame)
+    bool write(char const* outputbuf)
     {
         fd_set rread = master;
         struct timeval select_timeout = { 0, 0 };
@@ -192,14 +181,7 @@ public:
         if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0)
             return true; // nothing broken, there's just noone listening
 
-        std::vector<uchar> outbuf;
-        std::vector<int> params;
-        params.push_back(IMWRITE_JPEG_QUALITY);
-        params.push_back(quality);
-        cv::imencode(".jpg", frame, outbuf, params);  //REMOVED FOR COMPATIBILITY
-        // https://docs.opencv.org/3.4/d4/da8/group__imgcodecs.html#ga292d81be8d76901bff7988d18d2b42ac
-        //std::cerr << "cv::imencode call disabled!" << std::endl;
-        size_t outlen = outbuf.size();
+        size_t outlen = strlen(outputbuf);
 
 #ifdef _WIN32
         for (unsigned i = 0; i<rread.fd_count; i++)
@@ -219,14 +201,14 @@ public:
                 SOCKET      client = ::accept(sock, (SOCKADDR*)&address, &addrlen);
                 if (client == SOCKET_ERROR)
                 {
-                    cerr << "error MJPG_sender: couldn't accept connection on sock " << sock << " !" << endl;
+                    cerr << "error JSON_sender: couldn't accept connection on sock " << sock << " !" << endl;
                     return false;
                 }
                 if (setsockopt(client, SOL_SOCKET, SO_RCVTIMEO, (char *)&socket_timeout, sizeof(socket_timeout)) < 0) {
-                    cerr << "error MJPG_sender: SO_RCVTIMEO setsockopt failed\n";
+                    cerr << "error JSON_sender: SO_RCVTIMEO setsockopt failed\n";
                 }
                 if (setsockopt(client, SOL_SOCKET, SO_SNDTIMEO, (char *)&socket_timeout, sizeof(socket_timeout)) < 0) {
-                    cerr << "error MJPG_sender: SO_SNDTIMEO setsockopt failed\n";
+                    cerr << "error JSON_sender: SO_SNDTIMEO setsockopt failed\n";
                 }
                 maxfd = (maxfd>client ? maxfd : client);
                 FD_SET(client, &master);
@@ -239,60 +221,106 @@ public:
                     "Expires: 0\r\n"
                     "Cache-Control: no-cache, private\r\n"
                     "Pragma: no-cache\r\n"
-                    "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n"
+                    "Content-Type: application/json\r\n"
+                    //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n"
                     "\r\n", 0);
-                cerr << "MJPG_sender: new client " << client << endl;
+                _write(client, "[\n", 0);   // open JSON array
+                int n = _write(client, outputbuf, outlen);
+                cerr << "JSON_sender: new client " << client << endl;
             }
             else // existing client, just stream pix
             {
-                if (close_all_sockets) {
-                    int result = close_socket(s);
-                    cerr << "MJPG_sender: close clinet: " << result << " \n";
-                    continue;
-                }
-
-                char head[400];
-                sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen);
-                _write(s, head, 0);
-                int n = _write(s, (char*)(&outbuf[0]), outlen);
-                //cerr << "known client " << s << " " << n << endl;
+                //char head[400];
+                // application/x-resource+json or application/x-collection+json -  when you are representing REST resources and collections
+                // application/json or text/json or text/javascript or text/plain.
+                // https://stackoverflow.com/questions/477816/what-is-the-correct-json-content-type
+                //sprintf(head, "\r\nContent-Length: %zu\r\n\r\n", outlen);
+                //sprintf(head, "--boundary\r\nContent-Type: application/json\r\nContent-Length: %zu\r\n\r\n", outlen);
+                //_write(s, head, 0);
+                if (!close_all_sockets) _write(s, ", \n", 0);
+                int n = _write(s, outputbuf, outlen);
                 if (n < outlen)
                 {
-                    cerr << "MJPG_sender: kill client " << s << endl;
+                    cerr << "JSON_sender: kill client " << s << endl;
                     ::shutdown(s, 2);
                     FD_CLR(s, &master);
                 }
+
+                if (close_all_sockets) {
+                    int result = close_socket(s);
+                    cerr << "JSON_sender: close clinet: " << result << " \n";
+                    continue;
+                }
             }
         }
         if (close_all_sockets) {
             int result = close_socket(sock);
-            cerr << "MJPG_sender: close acceptor: " << result << " \n\n";
+            cerr << "JSON_sender: close acceptor: " << result << " \n\n";
         }
         return true;
-    }
+        }
 };
 // ----------------------------------------
 
-void send_mjpeg(IplImage* ipl, int port, int timeout, int quality)
+static std::unique_ptr<JSON_sender> js_ptr;
+static std::mutex mtx;
+
+void delete_json_sender()
+{
+    std::lock_guard<std::mutex> lock(mtx);
+    js_ptr.release();
+}
+
+void send_json_custom(char const* send_buf, int port, int timeout)
 {
     try {
-        static MJPG_sender wri(port, timeout, quality);
-        cv::Mat mat = cv::cvarrToMat(ipl);
-        wri.write(mat);
-        std::cout << " MJPEG-stream sent. \n";
+        std::lock_guard<std::mutex> lock(mtx);
+        if(!js_ptr) js_ptr.reset(new JSON_sender(port, timeout));
+
+        js_ptr->write(send_buf);
     }
     catch (...) {
-        cerr << " Error in send_mjpeg() function \n";
+        cerr << " Error in send_json_custom() function \n";
+    }
+}
+
+void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout)
+{
+    try {
+        char *send_buf = detection_to_json(dets, nboxes, classes, names, frame_id, NULL);
+
+        send_json_custom(send_buf, port, timeout);
+        std::cout << " JSON-stream sent. \n";
+
+        free(send_buf);
+    }
+    catch (...) {
+        cerr << " Error in send_json() function \n";
     }
 }
 // ----------------------------------------
 
-class JSON_sender
+
+#ifdef OPENCV
+
+#include <opencv2/opencv.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#ifndef CV_VERSION_EPOCH
+#include <opencv2/videoio/videoio.hpp>
+#endif
+using namespace cv;
+
+
+
+class MJPG_sender
 {
     SOCKET sock;
     SOCKET maxfd;
     fd_set master;
     int timeout; // master sock timeout, shutdown after timeout usec.
+    int quality; // jpeg compression [1..100]
     int close_all_sockets;
 
     int _write(int sock, char const*const s, int len)
@@ -303,9 +331,10 @@ class JSON_sender
 
 public:
 
-    JSON_sender(int port = 0, int _timeout = 400000)
+    MJPG_sender(int port = 0, int _timeout = 400000, int _quality = 30)
         : sock(INVALID_SOCKET)
         , timeout(_timeout)
+        , quality(_quality)
     {
         close_all_sockets = 0;
         FD_ZERO(&master);
@@ -313,7 +342,7 @@ public:
             open(port);
     }
 
-    ~JSON_sender()
+    ~MJPG_sender()
     {
         close_all();
         release();
@@ -330,7 +359,8 @@ public:
     void close_all()
     {
         close_all_sockets = 1;
-        write("\n]");   // close JSON array
+        cv::Mat tmp(cv::Size(10, 10), CV_8UC3);
+        write(tmp);
     }
 
     bool open(int port)
@@ -351,12 +381,12 @@ public:
 #endif
         if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR)
         {
-            cerr << "error JSON_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl;
+            cerr << "error MJPG_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl;
             return release();
         }
         if (::listen(sock, 10) == SOCKET_ERROR)
         {
-            cerr << "error JSON_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl;
+            cerr << "error MJPG_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl;
             return release();
         }
         FD_ZERO(&master);
@@ -370,7 +400,7 @@ public:
         return sock != INVALID_SOCKET;
     }
 
-    bool write(char *outputbuf)
+    bool write(const Mat & frame)
     {
         fd_set rread = master;
         struct timeval select_timeout = { 0, 0 };
@@ -378,7 +408,14 @@ public:
         if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0)
             return true; // nothing broken, there's just noone listening
 
-        size_t outlen = strlen(outputbuf);
+        std::vector<uchar> outbuf;
+        std::vector<int> params;
+        params.push_back(IMWRITE_JPEG_QUALITY);
+        params.push_back(quality);
+        cv::imencode(".jpg", frame, outbuf, params);  //REMOVED FOR COMPATIBILITY
+        // https://docs.opencv.org/3.4/d4/da8/group__imgcodecs.html#ga292d81be8d76901bff7988d18d2b42ac
+        //std::cerr << "cv::imencode call disabled!" << std::endl;
+        size_t outlen = outbuf.size();
 
 #ifdef _WIN32
         for (unsigned i = 0; i<rread.fd_count; i++)
@@ -398,14 +435,14 @@ public:
                 SOCKET      client = ::accept(sock, (SOCKADDR*)&address, &addrlen);
                 if (client == SOCKET_ERROR)
                 {
-                    cerr << "error JSON_sender: couldn't accept connection on sock " << sock << " !" << endl;
+                    cerr << "error MJPG_sender: couldn't accept connection on sock " << sock << " !" << endl;
                     return false;
                 }
                 if (setsockopt(client, SOL_SOCKET, SO_RCVTIMEO, (char *)&socket_timeout, sizeof(socket_timeout)) < 0) {
-                    cerr << "error JSON_sender: SO_RCVTIMEO setsockopt failed\n";
+                    cerr << "error MJPG_sender: SO_RCVTIMEO setsockopt failed\n";
                 }
                 if (setsockopt(client, SOL_SOCKET, SO_SNDTIMEO, (char *)&socket_timeout, sizeof(socket_timeout)) < 0) {
-                    cerr << "error JSON_sender: SO_SNDTIMEO setsockopt failed\n";
+                    cerr << "error MJPG_sender: SO_SNDTIMEO setsockopt failed\n";
                 }
                 maxfd = (maxfd>client ? maxfd : client);
                 FD_SET(client, &master);
@@ -418,62 +455,52 @@ public:
                     "Expires: 0\r\n"
                     "Cache-Control: no-cache, private\r\n"
                     "Pragma: no-cache\r\n"
-                    "Content-Type: application/json\r\n"
-                    //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n"
+                    "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n"
                     "\r\n", 0);
-                _write(client, "[\n", 0);   // open JSON array
-                int n = _write(client, outputbuf, outlen);
-                cerr << "JSON_sender: new client " << client << endl;
+                cerr << "MJPG_sender: new client " << client << endl;
             }
             else // existing client, just stream pix
             {
-                //char head[400];
-                // application/x-resource+json or application/x-collection+json -  when you are representing REST resources and collections
-                // application/json or text/json or text/javascript or text/plain.
-                // https://stackoverflow.com/questions/477816/what-is-the-correct-json-content-type
-                //sprintf(head, "\r\nContent-Length: %zu\r\n\r\n", outlen);
-                //sprintf(head, "--boundary\r\nContent-Type: application/json\r\nContent-Length: %zu\r\n\r\n", outlen);
-                //_write(s, head, 0);
-                if(!close_all_sockets) _write(s, ", \n", 0);
-                int n = _write(s, outputbuf, outlen);
+                if (close_all_sockets) {
+                    int result = close_socket(s);
+                    cerr << "MJPG_sender: close clinet: " << result << " \n";
+                    continue;
+                }
+
+                char head[400];
+                sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen);
+                _write(s, head, 0);
+                int n = _write(s, (char*)(&outbuf[0]), outlen);
+                //cerr << "known client " << s << " " << n << endl;
                 if (n < outlen)
                 {
-                    cerr << "JSON_sender: kill client " << s << endl;
+                    cerr << "MJPG_sender: kill client " << s << endl;
                     ::shutdown(s, 2);
                     FD_CLR(s, &master);
                 }
-
-                if (close_all_sockets) {
-                    int result = close_socket(s);
-                    cerr << "JSON_sender: close clinet: " << result << " \n";
-                    continue;
-                }
             }
         }
         if (close_all_sockets) {
             int result = close_socket(sock);
-            cerr << "JSON_sender: close acceptor: " << result << " \n\n";
+            cerr << "MJPG_sender: close acceptor: " << result << " \n\n";
         }
         return true;
     }
 };
 // ----------------------------------------
 
-void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout)
+void send_mjpeg(IplImage* ipl, int port, int timeout, int quality)
 {
     try {
-        static JSON_sender js(port, timeout);
-        char *send_buf = detection_to_json(dets, nboxes, classes, names, frame_id, NULL);
-
-        js.write(send_buf);
-        std::cout << " JSON-stream sent. \n";
-        free(send_buf);
+        static MJPG_sender wri(port, timeout, quality);
+        cv::Mat mat = cv::cvarrToMat(ipl);
+        wri.write(mat);
+        std::cout << " MJPEG-stream sent. \n";
     }
     catch (...) {
-        cerr << " Error in send_json() function \n";
+        cerr << " Error in send_mjpeg() function \n";
     }
 }
-
 // ----------------------------------------
 
 CvCapture* get_capture_video_stream(const char *path) {
@@ -641,8 +668,7 @@ image load_image_resize(char *filename, int w, int h, int c, image *im)
     return out;
 }
 
-
-#endif    // OPENCV
+#endif      // OPENCV
 
 // -----------------------------------------------------
 
diff --git a/src/im2col_kernels.cu b/src/im2col_kernels.cu
index da952df5..539824a9 100644
--- a/src/im2col_kernels.cu
+++ b/src/im2col_kernels.cu
@@ -4,11 +4,10 @@
 #include <stdint.h>
 
 #include "im2col.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 #include <stdio.h>
 #include <assert.h>
-//#include <cuda.h>
 
 
 template<typename T1, typename T2>
diff --git a/src/image.c b/src/image.c
index dadaa1e5..72b4bdfb 100644
--- a/src/image.c
+++ b/src/image.c
@@ -1,7 +1,7 @@
 #include "image.h"
 #include "utils.h"
 #include "blas.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include <stdio.h>
 #ifndef _USE_MATH_DEFINES
 #define _USE_MATH_DEFINES
diff --git a/src/layer.c b/src/layer.c
index 2361c1e0..ae87065f 100644
--- a/src/layer.c
+++ b/src/layer.c
@@ -1,5 +1,5 @@
 #include "layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include <stdlib.h>
 
 void free_layer(layer l)
diff --git a/src/local_layer.h b/src/local_layer.h
index 198ca362..8ef0dbad 100644
--- a/src/local_layer.h
+++ b/src/local_layer.h
@@ -1,7 +1,7 @@
 #ifndef LOCAL_LAYER_H
 #define LOCAL_LAYER_H
 
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "image.h"
 #include "activations.h"
 #include "layer.h"
diff --git a/src/lstm_layer.c b/src/lstm_layer.c
index 2894d3d3..bf1e303b 100644
--- a/src/lstm_layer.c
+++ b/src/lstm_layer.c
@@ -1,7 +1,7 @@
 #include "lstm_layer.h"
 #include "connected_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include "gemm.h"
 
diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c
index cf99e619..2e217459 100644
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@@ -1,5 +1,5 @@
 #include "maxpool_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "gemm.h"
 #include <stdio.h>
 
@@ -32,8 +32,8 @@ void cudnn_maxpool_setup(layer *l)
         CUDNN_NOT_PROPAGATE_NAN,    // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN
         l->size,
         l->size,
-        0, //l.pad,
-        0, //l.pad,
+        l->pad/2, //0, //l.pad,
+        l->pad/2, //0, //l.pad,
         l->stride,
         l->stride);
 
diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h
index ba6a2020..0a90c376 100644
--- a/src/maxpool_layer.h
+++ b/src/maxpool_layer.h
@@ -2,7 +2,7 @@
 #define MAXPOOL_LAYER_H
 
 #include "image.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "layer.h"
 #include "network.h"
 
diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu
index 29aa257c..36fccf32 100644
--- a/src/maxpool_layer_kernels.cu
+++ b/src/maxpool_layer_kernels.cu
@@ -3,7 +3,7 @@
 #include "cublas_v2.h"
 
 #include "maxpool_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
 {
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index a40dae0a..298372df 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -1,7 +1,4 @@
-//#include "cuda_runtime.h"
-//#include "curand.h"
-//#include "cublas_v2.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 
 #include <stdio.h>
 #include <time.h>
diff --git a/src/region_layer.c b/src/region_layer.c
index 3221e77e..74c10e13 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -2,7 +2,7 @@
 #include "activations.h"
 #include "blas.h"
 #include "box.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "utils.h"
 #include <stdio.h>
 #include <assert.h>
diff --git a/src/reorg_layer.c b/src/reorg_layer.c
index 47c5efa4..72d05773 100644
--- a/src/reorg_layer.c
+++ b/src/reorg_layer.c
@@ -1,5 +1,5 @@
 #include "reorg_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include <stdio.h>
 
diff --git a/src/reorg_layer.h b/src/reorg_layer.h
index b6ed379c..f2b90930 100644
--- a/src/reorg_layer.h
+++ b/src/reorg_layer.h
@@ -2,7 +2,7 @@
 #define REORG_LAYER_H
 
 #include "image.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "layer.h"
 #include "network.h"
 
diff --git a/src/reorg_old_layer.c b/src/reorg_old_layer.c
index defc034d..530da202 100644
--- a/src/reorg_old_layer.c
+++ b/src/reorg_old_layer.c
@@ -1,5 +1,5 @@
 #include "reorg_old_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include <stdio.h>
 
diff --git a/src/reorg_old_layer.h b/src/reorg_old_layer.h
index b66769b5..15c61f8d 100644
--- a/src/reorg_old_layer.h
+++ b/src/reorg_old_layer.h
@@ -2,7 +2,7 @@
 #define REORG_OLD_LAYER_H
 
 #include "image.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "layer.h"
 #include "network.h"
 
diff --git a/src/rnn_layer.c b/src/rnn_layer.c
index 22aade09..28163d75 100644
--- a/src/rnn_layer.c
+++ b/src/rnn_layer.c
@@ -1,7 +1,7 @@
 #include "rnn_layer.h"
 #include "connected_layer.h"
 #include "utils.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include "gemm.h"
 
diff --git a/src/route_layer.c b/src/route_layer.c
index 8bd6817a..b502fbe7 100644
--- a/src/route_layer.c
+++ b/src/route_layer.c
@@ -1,5 +1,5 @@
 #include "route_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include <stdio.h>
 
diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c
index b84b72f2..06cd6056 100644
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@@ -1,5 +1,5 @@
 #include "shortcut_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 #include <stdio.h>
 #include <assert.h>
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 3cd607be..9bbff9a0 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -1,6 +1,6 @@
 #include "softmax_layer.h"
 #include "blas.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "utils.h"
 #include "blas.h"
 
diff --git a/src/upsample_layer.c b/src/upsample_layer.c
index d31dd49e..1a2783c2 100644
--- a/src/upsample_layer.c
+++ b/src/upsample_layer.c
@@ -1,5 +1,5 @@
 #include "upsample_layer.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "blas.h"
 
 #include <stdio.h>
diff --git a/src/upsample_layer.h b/src/upsample_layer.h
index 68aff329..4461cb15 100644
--- a/src/upsample_layer.h
+++ b/src/upsample_layer.h
@@ -1,6 +1,6 @@
 #ifndef UPSAMPLE_LAYER_H
 #define UPSAMPLE_LAYER_H
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "layer.h"
 #include "network.h"
 
diff --git a/src/utils.c b/src/utils.c
index b4d17c23..904aa2e0 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -252,6 +252,8 @@ void replace_image_to_label(const char* input_path, char* output_path)
     find_replace_extension(output_path, ".BMP", ".txt", output_path);
     find_replace_extension(output_path, ".ppm", ".txt", output_path);
     find_replace_extension(output_path, ".PPM", ".txt", output_path);
+    find_replace_extension(output_path, ".tiff", ".txt", output_path);
+    find_replace_extension(output_path, ".TIFF", ".txt", output_path);
 }
 
 float sec(clock_t clocks)
diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index be84f4d5..12ab208c 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -5,21 +5,146 @@
 #include <queue>
 #include <fstream>
 #include <thread>
+#include <future>
 #include <atomic>
-#include <mutex>              // std::mutex, std::unique_lock
-#include <condition_variable> // std::condition_variable
+#include <mutex>         // std::mutex, std::unique_lock
+#include <unordered_map> // std::unordered_map
 
 
-// To use tracking - uncomment the following line. Tracking is supported only by OpenCV 3.x
+// It makes sense only for video-Camera (not for video-File)
+// To use - uncomment the following line. Optical-flow is supported only by OpenCV 3.x - 4.x
 //#define TRACK_OPTFLOW
+//#define GPU
+
+// To use 3D-stereo camera ZED - uncomment the following line. ZED_SDK should be installed.
+//#define ZED_STEREO
 
-//#include "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.1\include\cuda_runtime.h"
-//#pragma comment(lib, "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.1/lib/x64/cudart.lib")
-//static std::shared_ptr<image_t> device_ptr(NULL, [](void *img) { cudaDeviceReset(); });
 
 #include "yolo_v2_class.hpp"    // imported functions from DLL
 
 #ifdef OPENCV
+#ifdef ZED_STEREO
+#include <sl_zed/Camera.hpp>
+#pragma comment(lib, "sl_core64.lib")
+#pragma comment(lib, "sl_input64.lib")
+#pragma comment(lib, "sl_zed64.lib")
+
+float getMedian(std::vector<float> &v) {
+    size_t n = v.size() / 2;
+    std::nth_element(v.begin(), v.begin() + n, v.end());
+    return v[n];
+}
+
+std::vector<bbox_t> get_3d_coordinates(std::vector<bbox_t> bbox_vect, cv::Mat xyzrgba)
+{
+    bool valid_measure;
+    int i, j;
+    const int R_max = 4;
+
+    std::vector<bbox_t> bbox3d_vect;
+
+    for (auto &cur_box : bbox_vect) {
+
+        int center_i = cur_box.x + cur_box.w * 0.5f, center_j = cur_box.y + cur_box.h * 0.5f;
+
+        std::vector<float> x_vect, y_vect, z_vect;
+        for (int R = 0; R < R_max; R++) {
+            for (int y = -R; y <= R; y++) {
+                for (int x = -R; x <= R; x++) {
+                    i = center_i + x;
+                    j = center_j + y;
+                    sl::float4 out(NAN, NAN, NAN, NAN);
+                    if (i >= 0 && i < xyzrgba.cols && j >= 0 && j < xyzrgba.rows) {
+                        cv::Vec4f &elem = xyzrgba.at<cv::Vec4f>(j, i);  // x,y,z,w
+                        out.x = elem[0];
+                        out.y = elem[1];
+                        out.z = elem[2];
+                        out.w = elem[3];
+                    }
+                    valid_measure = std::isfinite(out.z);
+                    if (valid_measure)
+                    {
+                        x_vect.push_back(out.x);
+                        y_vect.push_back(out.y);
+                        z_vect.push_back(out.z);
+                    }
+                }
+            }
+        }
+
+        if (x_vect.size() * y_vect.size() * z_vect.size() > 0)
+        {
+            cur_box.x_3d = getMedian(x_vect);
+            cur_box.y_3d = getMedian(y_vect);
+            cur_box.z_3d = getMedian(z_vect);
+        }
+        else {
+            cur_box.x_3d = NAN;
+            cur_box.y_3d = NAN;
+            cur_box.z_3d = NAN;
+        }
+
+        bbox3d_vect.emplace_back(cur_box);
+    }
+
+    return bbox3d_vect;
+}
+
+cv::Mat slMat2cvMat(sl::Mat &input) {
+    // Mapping between MAT_TYPE and CV_TYPE
+    int cv_type = -1;
+    switch (input.getDataType()) {
+    case sl::MAT_TYPE_32F_C1:
+        cv_type = CV_32FC1;
+        break;
+    case sl::MAT_TYPE_32F_C2:
+        cv_type = CV_32FC2;
+        break;
+    case sl::MAT_TYPE_32F_C3:
+        cv_type = CV_32FC3;
+        break;
+    case sl::MAT_TYPE_32F_C4:
+        cv_type = CV_32FC4;
+        break;
+    case sl::MAT_TYPE_8U_C1:
+        cv_type = CV_8UC1;
+        break;
+    case sl::MAT_TYPE_8U_C2:
+        cv_type = CV_8UC2;
+        break;
+    case sl::MAT_TYPE_8U_C3:
+        cv_type = CV_8UC3;
+        break;
+    case sl::MAT_TYPE_8U_C4:
+        cv_type = CV_8UC4;
+        break;
+    default:
+        break;
+    }
+    return cv::Mat(input.getHeight(), input.getWidth(), cv_type, input.getPtr<sl::uchar1>(sl::MEM_CPU));
+}
+
+cv::Mat zed_capture_rgb(sl::Camera &zed) {
+    sl::Mat left;
+    zed.retrieveImage(left);
+    return slMat2cvMat(left).clone();
+}
+
+cv::Mat zed_capture_3d(sl::Camera &zed) {
+    sl::Mat cur_cloud;
+    zed.retrieveMeasure(cur_cloud, sl::MEASURE_XYZ);
+    return slMat2cvMat(cur_cloud).clone();
+}
+
+static sl::Camera zed; // ZED-camera
+
+#else   // ZED_STEREO
+std::vector<bbox_t> get_3d_coordinates(std::vector<bbox_t> bbox_vect, cv::Mat xyzrgba) {
+    return bbox_vect;
+}
+#endif  // ZED_STEREO
+
+
 #include <opencv2/opencv.hpp>            // C++
 #include <opencv2/core/version.hpp>
 #ifndef CV_VERSION_EPOCH
@@ -44,139 +169,6 @@
 #endif    // USE_CMAKE_LIBS
 #endif    // CV_VERSION_EPOCH
 
-class track_kalman {
-public:
-    cv::KalmanFilter kf;
-    int state_size, meas_size, contr_size;
-
-
-    track_kalman(int _state_size = 10, int _meas_size = 10, int _contr_size = 0)
-        : state_size(_state_size), meas_size(_meas_size), contr_size(_contr_size)
-    {
-        kf.init(state_size, meas_size, contr_size, CV_32F);
-
-        cv::setIdentity(kf.measurementMatrix);
-        cv::setIdentity(kf.measurementNoiseCov, cv::Scalar::all(1e-1));
-        cv::setIdentity(kf.processNoiseCov, cv::Scalar::all(1e-5));
-        cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2));
-        cv::setIdentity(kf.transitionMatrix);
-    }
-
-    void set(std::vector<bbox_t> result_vec) {
-        for (size_t i = 0; i < result_vec.size() && i < state_size*2; ++i) {
-            kf.statePost.at<float>(i * 2 + 0) = result_vec[i].x;
-            kf.statePost.at<float>(i * 2 + 1) = result_vec[i].y;
-        }
-    }
-
-    // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre);
-    // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
-    std::vector<bbox_t> correct(std::vector<bbox_t> result_vec) {
-        cv::Mat measurement(meas_size, 1, CV_32F);
-        for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) {
-            measurement.at<float>(i * 2 + 0) = result_vec[i].x;
-            measurement.at<float>(i * 2 + 1) = result_vec[i].y;
-        }
-        cv::Mat estimated = kf.correct(measurement);
-        for (size_t i = 0; i < result_vec.size() && i < meas_size * 2; ++i) {
-            result_vec[i].x = estimated.at<float>(i * 2 + 0);
-            result_vec[i].y = estimated.at<float>(i * 2 + 1);
-        }
-        return result_vec;
-    }
-
-    // Kalman.predict() calculates: statePre = TransitionMatrix * statePost;
-    // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
-    std::vector<bbox_t> predict() {
-        std::vector<bbox_t> result_vec;
-        cv::Mat control;
-        cv::Mat prediction = kf.predict(control);
-        for (size_t i = 0; i < prediction.rows && i < state_size * 2; ++i) {
-            result_vec[i].x = prediction.at<float>(i * 2 + 0);
-            result_vec[i].y = prediction.at<float>(i * 2 + 1);
-        }
-        return result_vec;
-    }
-
-};
-
-
-
-
-class extrapolate_coords_t {
-public:
-    std::vector<bbox_t> old_result_vec;
-    std::vector<float> dx_vec, dy_vec, time_vec;
-    std::vector<float> old_dx_vec, old_dy_vec;
-
-    void new_result(std::vector<bbox_t> new_result_vec, float new_time) {
-        old_dx_vec = dx_vec;
-        old_dy_vec = dy_vec;
-        if (old_dx_vec.size() != old_result_vec.size()) std::cout << "old_dx != old_res \n";
-        dx_vec = std::vector<float>(new_result_vec.size(), 0);
-        dy_vec = std::vector<float>(new_result_vec.size(), 0);
-        update_result(new_result_vec, new_time, false);
-        old_result_vec = new_result_vec;
-        time_vec = std::vector<float>(new_result_vec.size(), new_time);
-    }
-
-    void update_result(std::vector<bbox_t> new_result_vec, float new_time, bool update = true) {
-        for (size_t i = 0; i < new_result_vec.size(); ++i) {
-            for (size_t k = 0; k < old_result_vec.size(); ++k) {
-                if (old_result_vec[k].track_id == new_result_vec[i].track_id && old_result_vec[k].obj_id == new_result_vec[i].obj_id) {
-                    float const delta_time = new_time - time_vec[k];
-                    if (abs(delta_time) < 1) break;
-                    size_t index = (update) ? k : i;
-                    float dx = ((float)new_result_vec[i].x - (float)old_result_vec[k].x) / delta_time;
-                    float dy = ((float)new_result_vec[i].y - (float)old_result_vec[k].y) / delta_time;
-                    float old_dx = dx, old_dy = dy;
-
-                    // if it's shaking
-                    if (update) {
-                        if (dx * dx_vec[i] < 0) dx = dx / 2;
-                        if (dy * dy_vec[i] < 0) dy = dy / 2;
-                    } else {
-                        if (dx * old_dx_vec[k] < 0) dx = dx / 2;
-                        if (dy * old_dy_vec[k] < 0) dy = dy / 2;
-                    }
-                    dx_vec[index] = dx;
-                    dy_vec[index] = dy;
-
-                    //if (old_dx == dx && old_dy == dy) std::cout << "not shakin \n";
-                    //else std::cout << "shakin \n";
-
-                    if (dx_vec[index] > 1000 || dy_vec[index] > 1000) {
-                        //std::cout << "!!! bad dx or dy, dx = " << dx_vec[index] << ", dy = " << dy_vec[index] <<
-                        //    ", delta_time = " << delta_time << ", update = " << update << std::endl;
-                        dx_vec[index] = 0;
-                        dy_vec[index] = 0;
-                    }
-                    old_result_vec[k].x = new_result_vec[i].x;
-                    old_result_vec[k].y = new_result_vec[i].y;
-                    time_vec[k] = new_time;
-                    break;
-                }
-            }
-        }
-    }
-
-    std::vector<bbox_t> predict(float cur_time) {
-        std::vector<bbox_t> result_vec = old_result_vec;
-        for (size_t i = 0; i < old_result_vec.size(); ++i) {
-            float const delta_time = cur_time - time_vec[i];
-            auto &bbox = result_vec[i];
-            float new_x = (float) bbox.x + dx_vec[i] * delta_time;
-            float new_y = (float) bbox.y + dy_vec[i] * delta_time;
-            if (new_x > 0) bbox.x = new_x;
-            else bbox.x = 0;
-            if (new_y > 0) bbox.y = new_y;
-            else bbox.y = 0;
-        }
-        return result_vec;
-    }
-
-};
-
 
 void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std::string> obj_names,
     int current_det_fps = -1, int current_cap_fps = -1)
@@ -190,11 +182,22 @@ void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std
             std::string obj_name = obj_names[i.obj_id];
             if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id);
             cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0);
-            int const max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
-            cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 30, 0)),
-                cv::Point2f(std::min((int)i.x + max_width, mat_img.cols-1), std::min((int)i.y, mat_img.rows-1)),
+            int max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
+            std::string coords_3d;
+            if (!isnan(i.z_3d)) {
+                std::stringstream ss;
+                ss << std::fixed << std::setprecision(2) << "x:" << i.x_3d << "m y:" << i.y_3d << "m z:" << i.z_3d << "m ";
+                coords_3d = ss.str();
+                cv::Size const text_size_3d = getTextSize(ss.str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, 1, 0);
+                int const max_width_3d = (text_size_3d.width > i.w + 2) ? text_size_3d.width : (i.w + 2);
+                if (max_width_3d > max_width) max_width = max_width_3d;
+            }
+
+            cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 35, 0)),
+                cv::Point2f(std::min((int)i.x + max_width, mat_img.cols - 1), std::min((int)i.y, mat_img.rows - 1)),
                 color, CV_FILLED, 8, 0);
-            putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2);
+            putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 16), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2);
+            if(!coords_3d.empty()) putText(mat_img, coords_3d, cv::Point2f(i.x, i.y-1), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1);
         }
     }
     if (current_det_fps >= 0 && current_cap_fps >= 0) {
@@ -205,7 +208,8 @@ void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std
 #endif    // OPENCV
 
 
-void show_console_result(std::vector<bbox_t> const result_vec, std::vector<std::string> const obj_names) {
+void show_console_result(std::vector<bbox_t> const result_vec, std::vector<std::string> const obj_names, int frame_id = -1) {
+    if (frame_id >= 0) std::cout << " Frame: " << frame_id << std::endl;
     for (auto &i : result_vec) {
         if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - ";
         std::cout << "obj_id = " << i.obj_id << ",  x = " << i.x << ", y = " << i.y
@@ -223,6 +227,38 @@ std::vector<std::string> objects_names_from_file(std::string const filename) {
     return file_lines;
 }
 
+template<typename T>
+class send_one_replaceable_object_t {
+    const bool sync;
+    std::atomic<T *> a_ptr;
+public:
+
+    void send(T const& _obj) {
+        T *new_ptr = new T;
+        *new_ptr = _obj;
+        if (sync) {
+            while (a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3));
+        }
+        std::unique_ptr<T> old_ptr(a_ptr.exchange(new_ptr));
+    }
+
+    T receive() {
+        std::unique_ptr<T> ptr;
+        do {
+            while(!a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3));
+            ptr.reset(a_ptr.exchange(NULL));
+        } while (!ptr);
+        T obj = *ptr;
+        return obj;
+    }
+
+    bool is_object_present() {
+        return (a_ptr.load() != NULL);
+    }
+
+    send_one_replaceable_object_t(bool _sync) : sync(_sync), a_ptr(NULL)
+    {}
+};
 
 int main(int argc, char *argv[])
 {
@@ -239,17 +275,23 @@ int main(int argc, char *argv[])
     }
     else if (argc > 1) filename = argv[1];
 
-    float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.20;
+    float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.2;
 
     Detector detector(cfg_file, weights_file);
 
     auto obj_names = objects_names_from_file(names_file);
     std::string out_videofile = "result.avi";
-    bool const save_output_videofile = true;
-#ifdef TRACK_OPTFLOW
+    bool const save_output_videofile = false;   // true - for history
+    bool const send_network = false;        // true - for remote detection
+    bool const use_kalman_filter = false;   // true - for stationary camera
+
+    bool detection_sync = true;             // true - for video-file
+#ifdef TRACK_OPTFLOW    // for slow GPU
+    detection_sync = false;
     Tracker_optflow tracker_flow;
-    detector.wait_stream = true;
-#endif
+    //detector.wait_stream = true;
+#endif  // TRACK_OPTFLOW
+
 
     while (true)
     {
@@ -259,187 +301,318 @@ int main(int argc, char *argv[])
 
         try {
 #ifdef OPENCV
-            extrapolate_coords_t extrapolate_coords;
-            bool extrapolate_flag = false;
-            float cur_time_extrapolate = 0, old_time_extrapolate = 0;
             preview_boxes_t large_preview(100, 150, false), small_preview(50, 50, true);
             bool show_small_boxes = false;
 
             std::string const file_ext = filename.substr(filename.find_last_of(".") + 1);
             std::string const protocol = filename.substr(0, 7);
             if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" ||     // video file
-                protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/")    // video network stream
+                protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" ||    // video network stream
+                filename == "zed_camera" || file_ext == "svo" || filename == "web_camera")   // ZED stereo camera
+
             {
-                cv::Mat cap_frame, cur_frame, det_frame, write_frame;
-                std::queue<cv::Mat> track_optflow_queue;
-                int passed_flow_frames = 0;
-                std::shared_ptr<image_t> det_image;
-                std::vector<bbox_t> result_vec, thread_result_vec;
-                detector.nms = 0.02;    // comment it - if track_id is not required
-                std::atomic<bool> consumed, videowrite_ready;
-                bool exit_flag = false;
-                consumed = true;
-                videowrite_ready = true;
-                std::atomic<int> fps_det_counter, fps_cap_counter;
-                fps_det_counter = 0;
-                fps_cap_counter = 0;
-                int current_det_fps = 0, current_cap_fps = 0;
-                std::thread t_detect, t_cap, t_videowrite;
-                std::mutex mtx;
-                std::condition_variable cv_detected, cv_pre_tracked;
+                if (protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" || filename == "zed_camera" || filename == "web_camera")
+                    detection_sync = false;
+
+                cv::Mat cur_frame;
+                std::atomic<int> fps_cap_counter(0), fps_det_counter(0);
+                std::atomic<int> current_fps_cap(0), current_fps_det(0);
+                std::atomic<bool> exit_flag(false);
                 std::chrono::steady_clock::time_point steady_start, steady_end;
-                cv::VideoCapture cap(filename); cap >> cur_frame;
-                int const video_fps = cap.get(CV_CAP_PROP_FPS);
+                int video_fps = 25;
+                bool use_zed_camera = false;
+
+                track_kalman_t track_kalman;
+
+#ifdef ZED_STEREO
+                sl::InitParameters init_params;
+                init_params.camera_resolution = sl::RESOLUTION_HD720;
+                init_params.coordinate_units = sl::UNIT_METER;
+                //init_params.sdk_cuda_ctx = (CUcontext)detector.get_cuda_context();
+                init_params.sdk_gpu_id = detector.cur_gpu_id;
+                init_params.camera_buffer_count_linux = 2;
+                if (file_ext == "svo") init_params.svo_input_filename.set(filename.c_str());
+                if (filename == "zed_camera" || file_ext == "svo") {
+                    std::cout << "ZED 3D Camera " << zed.open(init_params) << std::endl;
+                    cur_frame = zed_capture_rgb(zed);
+                    use_zed_camera = true;
+                }
+#endif  // ZED_STEREO
+
+                cv::VideoCapture cap;
+                if (filename == "web_camera") {
+                    cap.open(0);
+                    video_fps = cap.get(CV_CAP_PROP_FPS);
+                    cap >> cur_frame;
+                } else if (!use_zed_camera) {
+                    cap.open(filename);
+                    video_fps = cap.get(CV_CAP_PROP_FPS);
+                    cap >> cur_frame;
+                }
                 cv::Size const frame_size = cur_frame.size();
+                //cv::Size const frame_size(cap.get(CV_CAP_PROP_FRAME_WIDTH), cap.get(CV_CAP_PROP_FRAME_HEIGHT));
+                std::cout << "\n Video size: " << frame_size << std::endl;
+
                 cv::VideoWriter output_video;
                 if (save_output_videofile)
                     output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true);
 
-                while (!cur_frame.empty())
+                struct detection_data_t {
+                    cv::Mat cap_frame;
+                    std::shared_ptr<image_t> det_image;
+                    std::vector<bbox_t> result_vec;
+                    cv::Mat draw_frame;
+                    bool new_detection;
+                    uint64_t frame_id;
+                    bool exit_flag;
+                    cv::Mat zed_cloud;
+                    std::queue<cv::Mat> track_optflow_queue;
+                    detection_data_t() : exit_flag(false), new_detection(false) {}
+                };
+
+                const bool sync = detection_sync; // sync data exchange
+                send_one_replaceable_object_t<detection_data_t> cap2prepare(sync), cap2draw(sync),
+                    prepare2detect(sync), detect2draw(sync), draw2show(sync), draw2write(sync), draw2net(sync);
+
+                std::thread t_cap, t_prepare, t_detect, t_post, t_draw, t_write, t_network;
+
+                // capture new video-frame
+                if (t_cap.joinable()) t_cap.join();
+                t_cap = std::thread([&]()
                 {
-                    // always sync
-                    if (t_cap.joinable()) {
-                        t_cap.join();
-                        ++fps_cap_counter;
-                        cur_frame = cap_frame.clone();
-                    }
-                    t_cap = std::thread([&]() { cap >> cap_frame; });
-                    ++cur_time_extrapolate;
+                    uint64_t frame_id = 0;
+                    detection_data_t detection_data;
+                    do {
+                        detection_data = detection_data_t();
+#ifdef ZED_STEREO
+                        if (use_zed_camera) {
+                            while (zed.grab() != sl::SUCCESS) std::this_thread::sleep_for(std::chrono::milliseconds(2));
+                            detection_data.cap_frame = zed_capture_rgb(zed);
+                            detection_data.zed_cloud = zed_capture_3d(zed);
+                        }
+                        else
+#endif   // ZED_STEREO
+                        {
+                            cap >> detection_data.cap_frame;
+                        }
+                        fps_cap_counter++;
+                        detection_data.frame_id = frame_id++;
+                        if (detection_data.cap_frame.empty() || exit_flag) {
+                            std::cout << " exit_flag: detection_data.cap_frame.size = " << detection_data.cap_frame.size() << std::endl;
+                            detection_data.exit_flag = true;
+                            detection_data.cap_frame = cv::Mat(frame_size, CV_8UC3);
+                        }
 
-                    // swap result bouned-boxes and input-frame
-                    if(consumed)
-                    {
-                        std::unique_lock<std::mutex> lock(mtx);
-                        det_image = detector.mat_to_image_resize(cur_frame);
-                        auto old_result_vec = detector.tracking_id(result_vec);
-                        auto detected_result_vec = thread_result_vec;
-                        result_vec = detected_result_vec;
-#ifdef TRACK_OPTFLOW
-                        // track optical flow
-                        if (track_optflow_queue.size() > 0) {
-                            //std::cout << "\n !!!! all = " << track_optflow_queue.size() << ", cur = " << passed_flow_frames << std::endl;
-                            cv::Mat first_frame = track_optflow_queue.front();
-                            tracker_flow.update_tracking_flow(track_optflow_queue.front(), result_vec);
+                        if (!detection_sync) {
+                            cap2draw.send(detection_data);       // skip detection
+                        }
+                        cap2prepare.send(detection_data);
+                    } while (!detection_data.exit_flag);
+                    std::cout << " t_cap exit \n";
+                });
 
-                            while (track_optflow_queue.size() > 1) {
-                                track_optflow_queue.pop();
-                                result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), true);
-                            }
-                            track_optflow_queue.pop();
-                            passed_flow_frames = 0;
 
-                            result_vec = detector.tracking_id(result_vec);
-                            auto tmp_result_vec = detector.tracking_id(detected_result_vec, false);
-                            small_preview.set(first_frame, tmp_result_vec);
+                // pre-processing video frame (resize, convertion)
+                t_prepare = std::thread([&]()
+                {
+                    std::shared_ptr<image_t> det_image;
+                    detection_data_t detection_data;
+                    do {
+                        detection_data = cap2prepare.receive();
+
+                        det_image = detector.mat_to_image_resize(detection_data.cap_frame);
+                        detection_data.det_image = det_image;
+                        prepare2detect.send(detection_data);    // detection
+
+                    } while (!detection_data.exit_flag);
+                    std::cout << " t_prepare exit \n";
+                });
 
-                            extrapolate_coords.new_result(tmp_result_vec, old_time_extrapolate);
-                            old_time_extrapolate = cur_time_extrapolate;
-                            extrapolate_coords.update_result(result_vec, cur_time_extrapolate - 1);
+
+                // detection by Yolo
+                if (t_detect.joinable()) t_detect.join();
+                t_detect = std::thread([&]()
+                {
+                    std::shared_ptr<image_t> det_image;
+                    detection_data_t detection_data;
+                    do {
+                        detection_data = prepare2detect.receive();
+                        det_image = detection_data.det_image;
+                        std::vector<bbox_t> result_vec;
+
+                        if(det_image)
+                            result_vec = detector.detect_resized(*det_image, frame_size.width, frame_size.height, thresh, true);  // true
+                        fps_det_counter++;
+                        //std::this_thread::sleep_for(std::chrono::milliseconds(150));
+
+                        detection_data.new_detection = true;
+                        detection_data.result_vec = result_vec;
+                        detect2draw.send(detection_data);
+                    } while (!detection_data.exit_flag);
+                    std::cout << " t_detect exit \n";
+                });
+
+                // draw rectangles (and track objects)
+                t_draw = std::thread([&]()
+                {
+                    std::queue<cv::Mat> track_optflow_queue;
+                    detection_data_t detection_data;
+                    do {
+
+                        // for Video-file
+                        if (detection_sync) {
+                            detection_data = detect2draw.receive();
                         }
-#else
-                        result_vec = detector.tracking_id(result_vec);    // comment it - if track_id is not required
-                        extrapolate_coords.new_result(result_vec, cur_time_extrapolate - 1);
-#endif
-                        // add old tracked objects
-                        for (auto &i : old_result_vec) {
-                            auto it = std::find_if(result_vec.begin(), result_vec.end(),
-                                [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; });
-                            bool track_id_absent = (it == result_vec.end());
-                            if (track_id_absent) {
-                                if (i.frames_counter-- > 1)
-                                    result_vec.push_back(i);
+                        // for Video-camera
+                        else
+                        {
+                            // get new Detection result if present
+                            if (detect2draw.is_object_present()) {
+                                cv::Mat old_cap_frame = detection_data.cap_frame;   // use old captured frame
+                                detection_data = detect2draw.receive();
+                                if (!old_cap_frame.empty()) detection_data.cap_frame = old_cap_frame;
                             }
+                            // get new Captured frame
                             else {
-                                it->frames_counter = std::min((unsigned)3, i.frames_counter + 1);
+                                std::vector<bbox_t> old_result_vec = detection_data.result_vec; // use old detections
+                                detection_data = cap2draw.receive();
+                                detection_data.result_vec = old_result_vec;
                             }
                         }
+
+                        cv::Mat cap_frame = detection_data.cap_frame;
+                        cv::Mat draw_frame = detection_data.cap_frame.clone();
+                        std::vector<bbox_t> result_vec = detection_data.result_vec;
+
 #ifdef TRACK_OPTFLOW
-                        tracker_flow.update_cur_bbox_vec(result_vec);
-                        result_vec = tracker_flow.tracking_flow(cur_frame, true);    // track optical flow
-#endif
-                        consumed = false;
-                        cv_pre_tracked.notify_all();
-                    }
-                    // launch thread once - Detection
-                    if (!t_detect.joinable()) {
-                        t_detect = std::thread([&]() {
-                            auto current_image = det_image;
-                            consumed = true;
-                            while (current_image.use_count() > 0 && !exit_flag) {
-                                auto result = detector.detect_resized(*current_image, frame_size.width, frame_size.height,
-                                    thresh, false);    // true
-                                ++fps_det_counter;
-                                std::unique_lock<std::mutex> lock(mtx);
-                                thread_result_vec = result;
-                                consumed = true;
-                                cv_detected.notify_all();
-                                if (detector.wait_stream) {
-                                    while (consumed && !exit_flag) cv_pre_tracked.wait(lock);
-                                }
-                                current_image = det_image;
+                        if (detection_data.new_detection) {
+                            tracker_flow.update_tracking_flow(detection_data.cap_frame, detection_data.result_vec);
+                            while (track_optflow_queue.size() > 0) {
+                                draw_frame = track_optflow_queue.back();
+                                result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), false);
+                                track_optflow_queue.pop();
                             }
-                        });
-                    }
-                    //while (!consumed);    // sync detection
-
-                    if (!cur_frame.empty()) {
-                        steady_end = std::chrono::steady_clock::now();
-                        if (std::chrono::duration<double>(steady_end - steady_start).count() >= 1) {
-                            current_det_fps = fps_det_counter;
-                            current_cap_fps = fps_cap_counter;
-                            steady_start = steady_end;
-                            fps_det_counter = 0;
-                            fps_cap_counter = 0;
                         }
+                        else {
+                            track_optflow_queue.push(cap_frame);
+                            result_vec = tracker_flow.tracking_flow(cap_frame, false);
+                        }
+                        detection_data.new_detection = true;    // to correct kalman filter
+#endif //TRACK_OPTFLOW
 
-                        large_preview.set(cur_frame, result_vec);
-#ifdef TRACK_OPTFLOW
-                        ++passed_flow_frames;
-                        track_optflow_queue.push(cur_frame.clone());
-                        result_vec = tracker_flow.tracking_flow(cur_frame);    // track optical flow
-                        extrapolate_coords.update_result(result_vec, cur_time_extrapolate);
-                        small_preview.draw(cur_frame, show_small_boxes);
-#endif
-                        auto result_vec_draw = result_vec;
-                        if (extrapolate_flag) {
-                            result_vec_draw = extrapolate_coords.predict(cur_time_extrapolate);
-                            cv::putText(cur_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2);
+                        // track ID by using kalman filter
+                        if (use_kalman_filter) {
+                            if (detection_data.new_detection) {
+                                result_vec = track_kalman.correct(result_vec);
+                            }
+                            else {
+                                result_vec = track_kalman.predict();
+                            }
                         }
-                        draw_boxes(cur_frame, result_vec_draw, obj_names, current_det_fps, current_cap_fps);
-                        //show_console_result(result_vec, obj_names);
-                        large_preview.draw(cur_frame);
-
-                        cv::imshow("window name", cur_frame);
-                        int key = cv::waitKey(3);    // 3 or 16ms
-                        if (key == 'f') show_small_boxes = !show_small_boxes;
-                        if (key == 'p') while (true) if(cv::waitKey(100) == 'p') break;
-                        if (key == 'e') extrapolate_flag = !extrapolate_flag;
-                        if (key == 27) { exit_flag = true; break; }
-
-                        if (output_video.isOpened() && videowrite_ready) {
-                            if (t_videowrite.joinable()) t_videowrite.join();
-                            write_frame = cur_frame.clone();
-                            videowrite_ready = false;
-                            t_videowrite = std::thread([&]() {
-                                 output_video << write_frame; videowrite_ready = true;
-                            });
+                        // track ID by using custom function
+                        else {
+                            int frame_story = std::max(5, current_fps_cap.load());
+                            result_vec = detector.tracking_id(result_vec, true, frame_story, 40);
                         }
+
+                        if (use_zed_camera && !detection_data.zed_cloud.empty()) {
+                            result_vec = get_3d_coordinates(result_vec, detection_data.zed_cloud);
+                        }
+
+                        //small_preview.set(draw_frame, result_vec);
+                        //large_preview.set(draw_frame, result_vec);
+                        draw_boxes(draw_frame, result_vec, obj_names, current_fps_det, current_fps_cap);
+                        //show_console_result(result_vec, obj_names, detection_data.frame_id);
+                        //large_preview.draw(draw_frame);
+                        //small_preview.draw(draw_frame, true);
+
+                        detection_data.result_vec = result_vec;
+                        detection_data.draw_frame = draw_frame;
+                        draw2show.send(detection_data);
+                        if (send_network) draw2net.send(detection_data);
+                        if (output_video.isOpened()) draw2write.send(detection_data);
+                    } while (!detection_data.exit_flag);
+                    std::cout << " t_draw exit \n";
+                });
+
+
+                // write frame to videofile
+                t_write = std::thread([&]()
+                {
+                    if (output_video.isOpened()) {
+                        detection_data_t detection_data;
+                        cv::Mat output_frame;
+                        do {
+                            detection_data = draw2write.receive();
+                            if(detection_data.draw_frame.channels() == 4) cv::cvtColor(detection_data.draw_frame, output_frame, CV_RGBA2RGB);
+                            else output_frame = detection_data.draw_frame;
+                            output_video << output_frame;
+                        } while (!detection_data.exit_flag);
+                        output_video.release();
                     }
+                    std::cout << " t_write exit \n";
+                });
+
+                // send detection to the network
+                t_network = std::thread([&]()
+                {
+                    if (send_network) {
+                        detection_data_t detection_data;
+                        do {
+                            detection_data = draw2net.receive();
+
+                            detector.send_json_http(detection_data.result_vec, obj_names, detection_data.frame_id, filename);
 
-#ifndef TRACK_OPTFLOW
-                    // wait detection result for video-file only (not for net-cam)
-                    if (protocol != "rtsp://" && protocol != "http://" && protocol != "https:/") {
-                        std::unique_lock<std::mutex> lock(mtx);
-                        while (!consumed) cv_detected.wait(lock);
+                        } while (!detection_data.exit_flag);
                     }
-#endif
-                }
-                exit_flag = true;
+                    std::cout << " t_network exit \n";
+                });
+
+
+                // show detection
+                detection_data_t detection_data;
+                do {
+
+                    steady_end = std::chrono::steady_clock::now();
+                    float time_sec = std::chrono::duration<double>(steady_end - steady_start).count();
+                    if (time_sec >= 1) {
+                        current_fps_det = fps_det_counter.load() / time_sec;
+                        current_fps_cap = fps_cap_counter.load() / time_sec;
+                        steady_start = steady_end;
+                        fps_det_counter = 0;
+                        fps_cap_counter = 0;
+                    }
+
+                    detection_data = draw2show.receive();
+                    cv::Mat draw_frame = detection_data.draw_frame;
+
+                    //if (extrapolate_flag) {
+                    //    cv::putText(draw_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2);
+                    //}
+
+                    cv::imshow("window name", draw_frame);
+                    int key = cv::waitKey(3);    // 3 or 16ms
+                    if (key == 'f') show_small_boxes = !show_small_boxes;
+                    if (key == 'p') while (true) if (cv::waitKey(100) == 'p') break;
+                    //if (key == 'e') extrapolate_flag = !extrapolate_flag;
+                    if (key == 27) { exit_flag = true;}
+
+                    //std::cout << " current_fps_det = " << current_fps_det << ", current_fps_cap = " << current_fps_cap << std::endl;
+                } while (!detection_data.exit_flag);
+                std::cout << " show detection exit \n";
+
+                cv::destroyWindow("window name");
+                // wait for all threads
                 if (t_cap.joinable()) t_cap.join();
+                if (t_prepare.joinable()) t_prepare.join();
                 if (t_detect.joinable()) t_detect.join();
-                if (t_videowrite.joinable()) t_videowrite.join();
-                std::cout << "Video ended \n";
+                if (t_post.joinable()) t_post.join();
+                if (t_draw.joinable()) t_draw.join();
+                if (t_write.joinable()) t_write.join();
+                if (t_network.joinable()) t_network.join();
+
                 break;
+
             }
             else if (file_ext == "txt") {    // list of image files
                 std::ifstream file(filename);
@@ -470,14 +643,14 @@ int main(int argc, char *argv[])
                 show_console_result(result_vec, obj_names);
                 cv::waitKey(0);
             }
-#else
+#else   // OPENCV
             //std::vector<bbox_t> result_vec = detector.detect(filename);
 
             auto img = detector.load_image(filename);
             std::vector<bbox_t> result_vec = detector.detect(img);
             detector.free_image(img);
             show_console_result(result_vec, obj_names);
-#endif
+#endif  // OPENCV
         }
         catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); }
         catch (...) { std::cerr << "unknown exception \n"; getchar(); }
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index a9309d1d..d303b5aa 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -2,7 +2,7 @@
 #include "activations.h"
 #include "blas.h"
 #include "box.h"
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "utils.h"
 
 #include <stdio.h>
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index 1b07b43b..a7418c16 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -250,8 +250,6 @@ LIB_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool use
 #endif
     //std::cout << "net.gpu_index = " << net.gpu_index << std::endl;
 
-    //float nms = .4;
-
     image im;
     im.c = img.c;
     im.data = img.data;
@@ -305,6 +303,9 @@ LIB_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool use
             bbox.obj_id = obj_id;
             bbox.prob = prob;
             bbox.track_id = 0;
+            bbox.x_3d = NAN;
+            bbox.y_3d = NAN;
+            bbox.z_3d = NAN;
 
             bbox_vec.push_back(bbox);
         }
@@ -379,3 +380,70 @@ LIB_API std::vector<bbox_t> Detector::tracking_id(std::vector<bbox_t> cur_bbox_v
 
     return cur_bbox_vec;
 }
+
+
+LIB_API bool Detector::send_json_http(std::vector<bbox_t> cur_bbox_vec, std::vector<std::string> obj_names, int frame_id, std::string filename, int timeout, int port)
+{
+    //int timeout = 400000;
+    //int port = 8070;
+    //send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout);
+
+    std::string send_str;
+
+    char *tmp_buf = (char *)calloc(1024, sizeof(char));
+    if (!filename.empty()) {
+        sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
+    }
+    else {
+        sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id);
+    }
+    send_str = tmp_buf;
+    free(tmp_buf);
+
+    for (auto & i : cur_bbox_vec) {
+        char *buf = (char *)calloc(2048, sizeof(char));
+
+        sprintf(buf, "  {\"class_id\":%d, \"name\":\"%s\", \"absolute_coordinates\":{\"center_x\":%d, \"center_y\":%d, \"width\":%d, \"height\":%d}, \"confidence\":%f",
+            i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob);
+
+        //sprintf(buf, "  {\"class_id\":%d, \"name\":\"%s\", \"relative_coordinates\":{\"center_x\":%f, \"center_y\":%f, \"width\":%f, \"height\":%f}, \"confidence\":%f",
+        //    i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob);
+
+        send_str += buf;
+
+        if (!isnan(i.z_3d)) {
+            sprintf(buf, "\n    , \"coordinates_in_meters\":{\"x_3d\":%.2f, \"y_3d\":%.2f, \"z_3d\":%.2f}",
+                i.x_3d*100, i.y_3d, i.z_3d);
+            send_str += buf;
+        }
+
+        send_str += "}\n";
+
+        free(buf);
+    }
+
+    //send_str +=  "\n ] \n}, \n";
+    send_str += "\n ] \n}";
+
+    send_json_custom(send_str.c_str(), port, timeout);
+    return true;
+}
+
+void *Detector::get_cuda_context()
+{
+#ifdef GPU
+    int old_gpu_index;
+    cudaGetDevice(&old_gpu_index);
+    if (cur_gpu_id != old_gpu_index)
+        cudaSetDevice(cur_gpu_id);
+
+    void *cuda_context = cuda_get_context();
+
+    if (cur_gpu_id != old_gpu_index)
+        cudaSetDevice(old_gpu_index);
+
+    return cuda_context;
+#else   // GPU
+    return NULL;
+#endif  // GPU
+}
\ No newline at end of file

From d762df531f14aaf19ef370ed39dfd53368c6c0e4 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 18 Mar 2019 02:53:48 +0300
Subject: [PATCH 11/23] Compile fix

---
 include/yolo_v2_class.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp
index f9278721..6d86195a 100644
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@@ -44,6 +44,7 @@ struct bbox_t_container {
 #include <vector>
 #include <deque>
 #include <algorithm>
+#include <chrono>
 
 #ifdef OPENCV
 #include <opencv2/opencv.hpp>            // C++

From 24dc5b785343a97e6af30cf3e633fed201ba3c93 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 18 Mar 2019 02:56:32 +0300
Subject: [PATCH 12/23] Another compile fix

---
 src/yolo_console_dll.cpp | 4 ++--
 src/yolo_v2_class.cpp    | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index 12ab208c..42c36322 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -8,7 +8,7 @@
 #include <future>
 #include <atomic>
 #include <mutex>         // std::mutex, std::unique_lock
-#include <unordered_map> // std::unordered_map
+#include <cmath>
 
 
 // It makes sense only for video-Camera (not for video-File)
@@ -184,7 +184,7 @@ void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std
             cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0);
             int max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
             std::string coords_3d;
-            if (!isnan(i.z_3d)) {
+            if (!std::isnan(i.z_3d)) {
                 std::stringstream ss;
                 ss << std::fixed << std::setprecision(2) << "x:" << i.x_3d << "m y:" << i.y_3d << "m z:" << i.z_3d << "m ";
                 coords_3d = ss.str();
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index a7418c16..86c7acbd 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -20,6 +20,7 @@ extern "C" {
 #include <vector>
 #include <iostream>
 #include <algorithm>
+#include <cmath>
 
 
 //static Detector* detector = NULL;
@@ -411,7 +412,7 @@ LIB_API bool Detector::send_json_http(std::vector<bbox_t> cur_bbox_vec, std::vec
 
         send_str += buf;
 
-        if (!isnan(i.z_3d)) {
+        if (!std::isnan(i.z_3d)) {
             sprintf(buf, "\n    , \"coordinates_in_meters\":{\"x_3d\":%.2f, \"y_3d\":%.2f, \"z_3d\":%.2f}",
                 i.x_3d*100, i.y_3d, i.z_3d);
             send_str += buf;

From 44c881233a5904adbb9fe93b39a93aa417ab8140 Mon Sep 17 00:00:00 2001
From: none <none>
Date: Mon, 18 Mar 2019 09:17:17 +0900
Subject: [PATCH 13/23] add Visual studio 2017 "Enterprise" edition to edition
 check.

---
 build.ps1 | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/build.ps1 b/build.ps1
index e5a1e29b..d937b427 100755
--- a/build.ps1
+++ b/build.ps1
@@ -30,12 +30,16 @@ if ($vcpkg_triplet -Match "x86") {
 if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) {
   $vstype = "Professional"
   if (Test-Path "C:\Program Files (x86)\Microsoft Visual Studio\2017\${vstype}\Common7\Tools") {
-    Write-Host "Found VS 2017 Professional"
   }
   else {
-    $vstype = "Community"
-    Write-Host "Found VS 2017 Community"
+    $vstype = "Enterprise"
+    if (Test-Path "C:\Program Files (x86)\Microsoft Visual Studio\2017\${vstype}\Common7\Tools") {
+    }
+    else {
+      $vstype = "Community"
+    }
   }
+  Write-Host "Found VS 2017 ${vstype}"
   Push-Location "C:\Program Files (x86)\Microsoft Visual Studio\2017\${vstype}\Common7\Tools"
   cmd /c "VsDevCmd.bat -arch=x64 & set" |
     ForEach-Object {

From 9c3237acc810cce4216297bc6167713345064858 Mon Sep 17 00:00:00 2001
From: none <none>
Date: Mon, 18 Mar 2019 09:44:43 +0900
Subject: [PATCH 14/23] suppress C4819 warnings to the cuda file in CP932
 environment.

---
 CMakeLists.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e6e6323a..145d286d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -84,7 +84,7 @@ set(CMAKE_DEBUG_POSTFIX d)
 add_definitions(-DUSE_CMAKE_LIBS)
 
 if(MSVC)
-  set(CMAKE_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /fp:fast ${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast ${CMAKE_CXX_FLAGS}")
   string(REGEX REPLACE "/O2" "/Ox" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
   add_definitions(-D_CRT_RAND_S)
   add_definitions(-DNOMINMAX)
@@ -139,15 +139,15 @@ if(ENABLE_CUDA)
   if (MSVC)
     if(CUDNN_FOUND)
       if(OpenCV_FOUND)
-        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU,/DCUDNN,/DOPENCV\" ${CMAKE_CUDA_FLAGS}")
+        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU,/DCUDNN,/DOPENCV\" ${CMAKE_CUDA_FLAGS}")
       else()
-        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU,/DCUDNN\" ${CMAKE_CUDA_FLAGS}")
+        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU,/DCUDNN\" ${CMAKE_CUDA_FLAGS}")
       endif()
     else()
       if(OpenCV_FOUND)
-        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU,/DOPENCV\" ${CMAKE_CUDA_FLAGS}")
+        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU,/DOPENCV\" ${CMAKE_CUDA_FLAGS}")
       else()
-        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/DGPU\" ${CMAKE_CUDA_FLAGS}")
+        set(CMAKE_CUDA_FLAGS "-gencode arch=compute_${CUDA_COMPUTE_MODEL},code=[sm_${CUDA_COMPUTE_MODEL},compute_${CUDA_COMPUTE_MODEL}] -Wno-deprecated-declarations -Xcompiler=\"/wd4028,/wd4190,/wd4244,/wd4267,/wd4305,/wd4477,/wd4996,/wd4819,/DGPU\" ${CMAKE_CUDA_FLAGS}")
       endif()
     endif()
   else()

From 2f0592d540cdf67396a15f1514075dd615cfba88 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 18 Mar 2019 15:22:38 +0300
Subject: [PATCH 15/23] Compile fixes

---
 CMakeLists.txt        | 4 ++--
 src/dark_cuda.c       | 3 ++-
 src/yolo_v2_class.cpp | 6 +++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e6e6323a..05acdad3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -288,8 +288,8 @@ target_link_libraries(darknet PRIVATE Threads::Threads)
 target_link_libraries(darklib PRIVATE Threads::Threads)
 
 if(ENABLE_CUDA)
-  target_link_libraries(darknet PRIVATE curand cublas)
-  target_link_libraries(darklib PRIVATE curand cublas)
+  target_link_libraries(darknet PRIVATE curand cublas cuda)
+  target_link_libraries(darklib PRIVATE curand cublas cuda)
   set_target_properties(darklib PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
 endif()
 
diff --git a/src/dark_cuda.c b/src/dark_cuda.c
index 42aa6fc8..5b57ea1d 100644
--- a/src/dark_cuda.c
+++ b/src/dark_cuda.c
@@ -8,13 +8,14 @@ int gpu_index = 0;
 
 #ifdef GPU
 
-#include "cuda.h"
+#include "dark_cuda.h"
 #include "utils.h"
 #include "blas.h"
 #include "assert.h"
 #include <stdlib.h>
 #include <time.h>
 #include <cuda.h>
+#include <stdio.h>
 
 #pragma comment(lib, "cuda.lib")
 
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index 86c7acbd..3ebc8024 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -393,7 +393,7 @@ LIB_API bool Detector::send_json_http(std::vector<bbox_t> cur_bbox_vec, std::vec
 
     char *tmp_buf = (char *)calloc(1024, sizeof(char));
     if (!filename.empty()) {
-        sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
+        sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename.c_str());
     }
     else {
         sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id);
@@ -405,7 +405,7 @@ LIB_API bool Detector::send_json_http(std::vector<bbox_t> cur_bbox_vec, std::vec
         char *buf = (char *)calloc(2048, sizeof(char));
 
         sprintf(buf, "  {\"class_id\":%d, \"name\":\"%s\", \"absolute_coordinates\":{\"center_x\":%d, \"center_y\":%d, \"width\":%d, \"height\":%d}, \"confidence\":%f",
-            i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob);
+            i.obj_id, obj_names[i.obj_id].c_str(), i.x, i.y, i.w, i.h, i.prob);
 
         //sprintf(buf, "  {\"class_id\":%d, \"name\":\"%s\", \"relative_coordinates\":{\"center_x\":%f, \"center_y\":%f, \"width\":%f, \"height\":%f}, \"confidence\":%f",
         //    i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob);
@@ -414,7 +414,7 @@ LIB_API bool Detector::send_json_http(std::vector<bbox_t> cur_bbox_vec, std::vec
 
         if (!std::isnan(i.z_3d)) {
             sprintf(buf, "\n    , \"coordinates_in_meters\":{\"x_3d\":%.2f, \"y_3d\":%.2f, \"z_3d\":%.2f}",
-                i.x_3d*100, i.y_3d, i.z_3d);
+                i.x_3d, i.y_3d, i.z_3d);
             send_str += buf;
         }
 

From 4cd10ee547f59fcf45df1a23a49da9a9769b633b Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 18 Mar 2019 16:05:42 +0300
Subject: [PATCH 16/23] Another one fix

---
 include/yolo_v2_class.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp
index 6d86195a..de13e489 100644
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@@ -84,7 +84,7 @@ public:
     LIB_API void *get_cuda_context();
 
     LIB_API bool send_json_http(std::vector<bbox_t> cur_bbox_vec, std::vector<std::string> obj_names, int frame_id, 
-        std::string filename = "", int timeout = 400000, int port = 8070);
+        std::string filename = std::string(), int timeout = 400000, int port = 8070);
 
     std::vector<bbox_t> detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false)
     {

From 84be2e8316d643a6d1b63800d527fd20320db782 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 18 Mar 2019 18:11:39 +0300
Subject: [PATCH 17/23] Minor fix

---
 include/yolo_v2_class.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp
index de13e489..2d42576d 100644
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@@ -45,6 +45,9 @@ struct bbox_t_container {
 #include <deque>
 #include <algorithm>
 #include <chrono>
+#include <string>
+#include <sstream>
+#include <iostream>
 
 #ifdef OPENCV
 #include <opencv2/opencv.hpp>            // C++

From b3254ed523a1160c8c97d3da8a4841bbb08e689f Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 18 Mar 2019 19:25:48 +0300
Subject: [PATCH 18/23] Fixed many warnings

---
 src/captcha.c             |  2 +-
 src/cifar.c               |  4 ++--
 src/classifier.c          |  6 +++---
 src/compare.c             |  2 +-
 src/convolutional_layer.c | 25 +++++++++++++------------
 src/darknet.c             |  2 +-
 src/darkunistd.h          |  2 +-
 src/data.c                |  8 ++++----
 src/detector.c            | 21 ++++++++++-----------
 src/dice.c                |  2 +-
 src/gemm.c                | 28 ++++++++++++++--------------
 src/go.c                  |  2 +-
 src/http_stream.cpp       |  2 +-
 src/matrix.c              |  3 +--
 src/network.c             |  6 +++---
 src/rnn.c                 |  2 +-
 src/tag.c                 |  2 +-
 src/utils.c               | 17 ++++++++---------
 src/writing.c             |  2 +-
 src/yolo_v2_class.cpp     | 10 +++++-----
 20 files changed, 73 insertions(+), 75 deletions(-)

diff --git a/src/captcha.c b/src/captcha.c
index 0bb15b84..0cc15915 100644
--- a/src/captcha.c
+++ b/src/captcha.c
@@ -85,7 +85,7 @@ void train_captcha(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if(i%100==0){
             char buff[256];
diff --git a/src/cifar.c b/src/cifar.c
index 04dec155..24a13a57 100644
--- a/src/cifar.c
+++ b/src/cifar.c
@@ -33,7 +33,7 @@ void train_cifar(char *cfgfile, char *weightfile)
         float loss = train_network_sgd(net, train, 1);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
@@ -89,7 +89,7 @@ void train_cifar_distill(char *cfgfile, char *weightfile)
         float loss = train_network_sgd(net, train, 1);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
diff --git a/src/classifier.c b/src/classifier.c
index 5471957a..961e4cbb 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -177,7 +177,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
             draw_precision = 1;
         }
 
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
 #ifdef OPENCV
         draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, "top5", dont_show, mjpeg_port);
 #endif  // OPENCV
@@ -198,7 +198,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
             if (ngpus != 1) sync_nets(nets, ngpus, 0);
 #endif
             char buff[256];
-            sprintf(buff, "%s/%s_last.weights", backup_directory, base, i);
+            sprintf(buff, "%s/%s_last.weights", backup_directory, base);
             save_weights(net, buff);
         }
         free_data(train);
@@ -791,7 +791,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
     int* indexes = (int*)calloc(top, sizeof(int));
     char buff[256];
     char *input = buff;
-    int size = net.w;
+    //int size = net.w;
     while(1){
         if(filename){
             strncpy(input, filename, 256);
diff --git a/src/compare.c b/src/compare.c
index 5c1e0451..bb842261 100644
--- a/src/compare.c
+++ b/src/compare.c
@@ -54,7 +54,7 @@ void train_compare(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if(i%100 == 0){
             char buff[256];
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index e42e86a7..84d36d93 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -679,7 +679,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size) {
 
 void bit_to_float(unsigned char *src, float *dst, size_t size, size_t filters, float *mean_arr) {
     memset(dst, 0, size *sizeof(float));
-    size_t i,  src_i, src_shift;
+    size_t i;
 
     for (i = 0; i < size; ++i) {
         float mean_val = 1;
@@ -726,7 +726,7 @@ void binary_align_weights(convolutional_layer *l)
                 const int items_per_channel = l->size*l->size;
                 for (i = 0; i < items_per_channel; ++i)
                 {
-                    uint32_t val = 0;
+                    //uint32_t val = 0;
                     int c_pack;
                     for (c_pack = 0; c_pack < 32; ++c_pack) {
                         float src = l->binary_weights[fil*items_per_filter + (chan + c_pack)*items_per_channel + i];
@@ -749,8 +749,8 @@ void binary_align_weights(convolutional_layer *l)
         //if (l->n >= 32)
         if(gpu_index >= 0)
         {
-            int M = l->n;
-            int N = l->out_w*l->out_h;
+            //int M = l->n;
+            //int N = l->out_w*l->out_h;
             //printf("\n M = %d, N = %d, M %% 8 = %d, N %% 8 = %d - weights \n", M, N, M % 8, N % 8);
             //printf("\n l.w = %d, l.c = %d, l.n = %d \n", l->w, l->c, l->n);
             for (i = 0; i < align_weights_size / 8; ++i) l->align_bit_weights[i] = ~(l->align_bit_weights[i]);
@@ -806,7 +806,7 @@ size_t binary_transpose_align_input(int k, int n, float *b, char **t_bit_input,
     size_t t_bit_input_size = t_intput_size / 8;// +1;
 
     memset(*t_bit_input, 0, t_bit_input_size * sizeof(char));
-    int src_size = k * bit_align;
+    //int src_size = k * bit_align;
 
     // b - [bit_align, k] - [l.bit_align, l.size*l.size*l.c] = src_size
     // t_input - [bit_align, k] - [n', k]
@@ -862,8 +862,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
 
                 int ldb_align = l.lda_align;
                 size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8;
-                size_t t_intput_size = new_ldb * l.bit_align;// n;
-                size_t t_bit_input_size = t_intput_size / 8;// +1;
+                //size_t t_intput_size = new_ldb * l.bit_align;// n;
+                //size_t t_bit_input_size = t_intput_size / 8;// +1;
 
                 int re_packed_input_size = l.c * l.w * l.h;
                 memset(state.workspace, 0, re_packed_input_size * sizeof(float));
@@ -928,17 +928,18 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
                 //im2col_cpu_custom_align(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b, l.bit_align);
                 im2col_cpu_custom_bin(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, state.workspace, l.bit_align);
 
-                size_t output_size = l.outputs;
+                //size_t output_size = l.outputs;
                 //float *count_output = calloc(output_size, sizeof(float));
                 //size_t bit_output_size = output_size / 8 + 1;
                 //char *bit_output = calloc(bit_output_size, sizeof(char));
 
-                size_t intput_size = n * k; // (out_h*out_w) X (l.size*l.size*l.c) : after im2col()
-                size_t bit_input_size = intput_size / 8 + 1;
+                //size_t intput_size = n * k; // (out_h*out_w) X (l.size*l.size*l.c) : after im2col()
+                //size_t bit_input_size = intput_size / 8 + 1;
                 //char *bit_input = calloc(bit_input_size, sizeof(char));
 
-                size_t weights_size = k * m; //l.size*l.size*l.c*l.n;
-                size_t bit_weights_size = weights_size / 8 + 1;
+                //size_t weights_size = k * m; //l.size*l.size*l.c*l.n;
+                //size_t bit_weights_size = weights_size / 8 + 1;
+
                 //char *bit_weights = calloc(bit_weights_size, sizeof(char));
                 //float *mean_arr = calloc(l.n, sizeof(float));
 
diff --git a/src/darknet.c b/src/darknet.c
index c10a7b2f..67c7d9a4 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -479,7 +479,7 @@ int main(int argc, char **argv)
         float thresh = find_float_arg(argc, argv, "-thresh", .24);
 		int ext_output = find_arg(argc, argv, "-ext_output");
         char *filename = (argc > 4) ? argv[4]: 0;
-        test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, 1, 0, NULL);
+        test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL);
     } else if (0 == strcmp(argv[1], "cifar")){
         run_cifar(argc, argv);
     } else if (0 == strcmp(argv[1], "go")){
diff --git a/src/darkunistd.h b/src/darkunistd.h
index 1ba3ef2e..d2d555c3 100644
--- a/src/darkunistd.h
+++ b/src/darkunistd.h
@@ -21,7 +21,7 @@
 These may be OR'd together.  */
 #define R_OK    4       /* Test for read permission.  */
 #define W_OK    2       /* Test for write permission.  */
-#define X_OK R_OK /* execute permission - unsupported in Windows, \
+#define X_OK R_OK       /* execute permission - unsupported in Windows, */
 #define F_OK    0       /* Test for existence.  */
 
 #define access _access
diff --git a/src/data.c b/src/data.c
index e364ed76..a3612519 100644
--- a/src/data.c
+++ b/src/data.c
@@ -790,8 +790,8 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*c;
 
-    float r1, r2, r3, r4;
-    float dhue, dsat, dexp, flip;
+    float r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+    float dhue = 0, dsat = 0, dexp = 0, flip = 0;
     int augmentation_calculated = 0;
 
     d.y = make_matrix(n, 5*boxes);
@@ -890,8 +890,8 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
     d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
     d.X.cols = h*w*c;
 
-    float r1, r2, r3, r4;
-    float dhue, dsat, dexp, flip;
+    float r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+    float dhue = 0, dsat = 0, dexp = 0, flip = 0;
     int augmentation_calculated = 0;
 
     d.y = make_matrix(n, 5 * boxes);
diff --git a/src/detector.c b/src/detector.c
index 96222379..173bcf78 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -326,7 +326,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
             if (ngpus != 1) sync_nets(nets, ngpus, 0);
 #endif
             char buff[256];
-            sprintf(buff, "%s/%s_last.weights", backup_directory, base, i);
+            sprintf(buff, "%s/%s_last.weights", backup_directory, base);
             save_weights(net, buff);
         }
         free_data(train);
@@ -594,7 +594,7 @@ void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile)
     list *plist = get_paths(valid_images);
     char **paths = (char **)list_to_array(plist);
 
-    layer l = net.layers[net.n - 1];
+    //layer l = net.layers[net.n - 1];
 
     int j, k;
 
@@ -681,16 +681,16 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     char *difficult_valid_images = option_find_str(options, "difficult", NULL);
     char *name_list = option_find_str(options, "names", "data/names.list");
     char **names = get_labels(name_list);
-    char *mapf = option_find_str(options, "map", 0);
-    int *map = 0;
-    if (mapf) map = read_map(mapf);
+    //char *mapf = option_find_str(options, "map", 0);
+    //int *map = 0;
+    //if (mapf) map = read_map(mapf);
     FILE* reinforcement_fd = NULL;
 
     network net;
-    int initial_batch;
+    //int initial_batch;
     if (existing_net) {
         char *train_images = option_find_str(options, "train", "data/train.txt");
-        char *valid_images = option_find_str(options, "valid", train_images);
+        valid_images = option_find_str(options, "valid", train_images);
         net = *existing_net;
     }
     else {
@@ -1131,8 +1131,8 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
 
     printf("\n");
     for (i = 0; i < number_of_boxes; ++i) {
-        float w = boxes_data.vals[i][0] = rel_width_height_array[i * 2];
-        float h = boxes_data.vals[i][1] = rel_width_height_array[i * 2 + 1];
+        boxes_data.vals[i][0] = rel_width_height_array[i * 2];
+        boxes_data.vals[i][1] = rel_width_height_array[i * 2 + 1];
         //if (w > 410 || h > 410) printf("i:%d,  w = %f, h = %f \n", i, w, h);
     }
 
@@ -1170,7 +1170,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
         float anchor_w = anchors_data.centers.vals[cluster_idx][0]; //centers->data.fl[cluster_idx * 2];
         float anchor_h = anchors_data.centers.vals[cluster_idx][1]; //centers->data.fl[cluster_idx * 2 + 1];
         if (best_iou > 1 || best_iou < 0) { // || box_w > width || box_h > height) {
-            printf(" Wrong label: i = %d, box_w = %d, box_h = %d, anchor_w = %d, anchor_h = %d, iou = %f \n",
+            printf(" Wrong label: i = %d, box_w = %f, box_h = %f, anchor_w = %f, anchor_h = %f, iou = %f \n",
                 i, box_w, box_h, anchor_w, anchor_h, best_iou);
         }
         else avg_iou += best_iou;
@@ -1287,7 +1287,6 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
         if (net.layers[net.n - 1].classes > names_size) getchar();
     }
     srand(2222222);
-    double time;
     char buff[256];
     char *input = buff;
     char *json_buf = NULL;
diff --git a/src/dice.c b/src/dice.c
index 94155271..8a0393a8 100644
--- a/src/dice.c
+++ b/src/dice.c
@@ -33,7 +33,7 @@ void train_dice(char *cfgfile, char *weightfile)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if((i % 100) == 0) net.learning_rate *= .1;
         if(i%100==0){
diff --git a/src/gemm.c b/src/gemm.c
index fec1e16a..861a1906 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -321,7 +321,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
 // is not used
 void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb)
 {
-    unsigned x, y, t;
+    unsigned int x, y;
     for (y = 0; y < 32; ++y) {
         for (x = 0; x < 32; ++x) {
             if (A[y * lda] & (1 << x)) B[x * ldb] |= (uint32_t)1 << y;
@@ -400,7 +400,7 @@ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, in
 
 void transpose_8x8_bits_my(unsigned char *A, unsigned char *B, int lda, int ldb)
 {
-    unsigned x, y, t;
+    unsigned x, y;
     for (y = 0; y < 8; ++y) {
         for (x = 0; x < 8; ++x) {
             if (A[y * lda] & (1 << x)) B[x * ldb] |= 1 << y;
@@ -755,7 +755,7 @@ void gemm_nn_fast(int M, int N, int K, float ALPHA,
     for (i = 0; i < (M / TILE_M)*TILE_M; i += TILE_M)
     {
         int j, k;
-        int i_d, j_d, k_d;
+        int i_d, k_d;
 
         for (k = 0; k < (K / TILE_K)*TILE_K; k += TILE_K)
         {
@@ -768,8 +768,8 @@ void gemm_nn_fast(int M, int N, int K, float ALPHA,
                 __m256 result256;
                 __m256 a256_0, b256_0;    // AVX
                 __m256 a256_1, b256_1;    // AVX
-                __m256 a256_2, b256_2;    // AVX
-                __m256 a256_3, b256_3;    // AVX
+                __m256 a256_2;// , b256_2;    // AVX
+                __m256 a256_3;// , b256_3;    // AVX
                 __m256 c256_0, c256_1, c256_2, c256_3;
                 __m256 c256_4, c256_5, c256_6, c256_7;
 
@@ -943,8 +943,8 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA,
 void convolution_2d_old(int w, int h, int ksize, int n, int c, int pad, int stride,
     float *weights, float *input, float *output)
 {
-    const int out_h = (h + 2 * pad - ksize) / stride + 1;    // output_height=input_height for stride=1 and pad=1
-    const int out_w = (w + 2 * pad - ksize) / stride + 1;    // output_width=input_width for stride=1 and pad=1
+    //const int out_h = (h + 2 * pad - ksize) / stride + 1;    // output_height=input_height for stride=1 and pad=1
+    //const int out_w = (w + 2 * pad - ksize) / stride + 1;    // output_width=input_width for stride=1 and pad=1
 
     int fil;
     // filter index
@@ -991,8 +991,8 @@ void convolution_2d_old(int w, int h, int ksize, int n, int c, int pad, int stri
 void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
     float *weights, float *input, float *output, float *mean)
 {
-    const int out_h = (h + 2 * pad - ksize) / stride + 1;    // output_height=input_height for stride=1 and pad=1
-    const int out_w = (w + 2 * pad - ksize) / stride + 1;    // output_width=input_width for stride=1 and pad=1
+    //const int out_h = (h + 2 * pad - ksize) / stride + 1;    // output_height=input_height for stride=1 and pad=1
+    //const int out_w = (w + 2 * pad - ksize) / stride + 1;    // output_width=input_width for stride=1 and pad=1
     int i;
 
 #if defined(_OPENMP)
@@ -1203,7 +1203,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
         float mean_val_0 = mean_arr[i + 0];
         float mean_val_1 = mean_arr[i + 1];
         int j, k;
-        __m256i all_1 = _mm256_set1_epi8(255);
+        //__m256i all_1 = _mm256_set1_epi8(255);
 
         //for (j = 0; j < N; ++j)
         for (j = 0; j < (N/2)*2; j += 2)
@@ -1770,7 +1770,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size)
     memset(dst, 0, dst_size);
 
     size_t i;
-    __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000);
+    //__m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000);
     __m256 float_zero256 = _mm256_set1_ps(0.0);
 
     for (i = 0; i < size; i+=8)
@@ -1881,8 +1881,8 @@ void forward_maxpool_layer_avx(float *src, float *dst, int *indexes, int size, i
                 else if (size == 2 && stride == 2 && is_avx() == 1) {
                     for (j = 0; j < out_w - 4; j += 4) {
                         int out_index = j + out_w*(i + out_h*(k + c*b));
-                        float max = -FLT_MAX;
-                        int max_i = -1;
+                        //float max = -FLT_MAX;
+                        //int max_i = -1;
                         __m128 max128 = _mm_set1_ps(-FLT_MAX);
 
                         for (n = 0; n < size; ++n) {
@@ -2513,7 +2513,7 @@ void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, floa
     #pragma omp parallel for
     for (fil = 0; fil < n; ++fil) {
         float mean_val = mean_arr[fil];
-        int chan, c_pack, y, x, f_y, f_x;
+        int chan, y, x, f_y, f_x;   // c_pack
         // channel index
         for (chan = 0; chan < c / 32; ++chan)
             //for (chan = 0; chan < l.c; chan += 32)
diff --git a/src/go.c b/src/go.c
index 54a739d6..79529432 100644
--- a/src/go.c
+++ b/src/go.c
@@ -144,7 +144,7 @@ void train_go(char *cfgfile, char *weightfile)
         float loss = train_network_datum(net, board, move) / net.batch;
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.95 + loss*.05;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
             char buff[256];
diff --git a/src/http_stream.cpp b/src/http_stream.cpp
index e61e506d..77096d2b 100644
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@@ -181,7 +181,7 @@ public:
         if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0)
             return true; // nothing broken, there's just noone listening
 
-        size_t outlen = strlen(outputbuf);
+        int outlen = static_cast<int>(strlen(outputbuf));
 
 #ifdef _WIN32
         for (unsigned i = 0; i<rread.fd_count; i++)
diff --git a/src/matrix.c b/src/matrix.c
index c7df22b6..edacb07b 100644
--- a/src/matrix.c
+++ b/src/matrix.c
@@ -257,7 +257,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
 
 
 void random_centers(matrix data, matrix centers) {
-    int i, j;
+    int i;
     int *s = sample(data.rows);
     for (i = 0; i < centers.rows; ++i) {
         copy(data.vals[s[i]], centers.vals[i], data.cols);
@@ -281,7 +281,6 @@ int *sample(int n)
 
 float dist(float *x, float *y, int n)
 {
-    int i;
     //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]);
     float mw = (x[0] < y[0]) ? x[0] : y[0];
     float mh = (x[1] < y[1]) ? x[1] : y[1];
diff --git a/src/network.c b/src/network.c
index 30f99c4d..cfc747cb 100644
--- a/src/network.c
+++ b/src/network.c
@@ -488,8 +488,8 @@ int resize_network(network *net, int w, int h)
         h = l.out_h;
         if(l.type == AVGPOOL) break;
     }
-    const int size = get_network_input_size(*net) * net->batch;
 #ifdef GPU
+    const int size = get_network_input_size(*net) * net->batch;
     if(gpu_index >= 0){
         printf(" try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
         net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1);
@@ -728,10 +728,10 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
 
     char *send_buf = (char *)calloc(1024, sizeof(char));
     if (filename) {
-        sprintf(send_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
+        sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
     }
     else {
-        sprintf(send_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id);
+        sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"objects\": [ \n", frame_id);
     }
 
     int i, j;
diff --git a/src/rnn.c b/src/rnn.c
index 93107585..5aee0863 100644
--- a/src/rnn.c
+++ b/src/rnn.c
@@ -163,7 +163,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
     int i = (*net.seen)/net.batch;
 
     int streams = batch/steps;
-    printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %d \n", batch, steps, streams, net.subdivisions, size);
+    printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %ld \n", batch, steps, streams, net.subdivisions, size);
     printf(" global_batch = %d \n", batch*net.subdivisions);
     size_t* offsets = (size_t*)calloc(streams, sizeof(size_t));
     int j;
diff --git a/src/tag.c b/src/tag.c
index 4033216e..0942d503 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -64,7 +64,7 @@ void train_tag(char *cfgfile, char *weightfile, int clear)
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         free_data(train);
         if(*net.seen/N > epoch){
             epoch = *net.seen/N;
diff --git a/src/utils.c b/src/utils.c
index a2a6178a..6afc187c 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -254,7 +254,7 @@ void replace_image_to_label(const char* input_path, char* output_path)
     find_replace_extension(output_path, ".PPM", ".txt", output_path);
     find_replace_extension(output_path, ".tiff", ".txt", output_path);
     find_replace_extension(output_path, ".TIFF", ".txt", output_path);
-    
+
     // Check file ends with txt:
     if(strlen(output_path) > 4) {
         char *output_path_ext = output_path + strlen(output_path) - 4;
@@ -768,13 +768,12 @@ float rand_uniform(float min, float max)
         max = swap;
     }
 
-    if (RAND_MAX < 65536) {
+#if (RAND_MAX < 65536)
         int rnd = rand()*(RAND_MAX + 1) + rand();
         return ((float)rnd / (RAND_MAX*RAND_MAX) * (max - min)) + min;
-    }
-    else {
+#else
         return ((float)rand() / RAND_MAX * (max - min)) + min;
-    }
+#endif
     //return (random_float() * (max - min)) + min;
 }
 
@@ -802,12 +801,12 @@ unsigned int random_gen()
     unsigned int rnd = 0;
 #ifdef WIN32
     rand_s(&rnd);
-#else
+#else   // WIN32
     rnd = rand();
-    if (RAND_MAX < 65536) {
+#if (RAND_MAX < 65536)
         rnd = rand()*(RAND_MAX + 1) + rnd;
-    }
-#endif
+#endif  //(RAND_MAX < 65536)
+#endif  // WIN32
     return rnd;
 }
 
diff --git a/src/writing.c b/src/writing.c
index 4acda60a..e0d8019e 100644
--- a/src/writing.c
+++ b/src/writing.c
@@ -69,7 +69,7 @@ void train_writing(char *cfgfile, char *weightfile)
 
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         free_data(train);
         if(get_current_batch(net)%100 == 0){
             char buff[256];
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index 3ebc8024..93cccf44 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -104,8 +104,8 @@ struct detector_gpu_t {
 LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id)
 {
     wait_stream = 0;
-    int old_gpu_index;
 #ifdef GPU
+    int old_gpu_index;
     check_cuda( cudaGetDevice(&old_gpu_index) );
 #endif
 
@@ -151,7 +151,7 @@ LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename
 LIB_API Detector::~Detector()
 {
     detector_gpu_t &detector_gpu = *static_cast<detector_gpu_t *>(detector_gpu_ptr.get());
-    layer l = detector_gpu.net.layers[detector_gpu.net.n - 1];
+    //layer l = detector_gpu.net.layers[detector_gpu.net.n - 1];
 
     free(detector_gpu.track_id);
 
@@ -159,8 +159,8 @@ LIB_API Detector::~Detector()
     for (int j = 0; j < NFRAMES; ++j) free(detector_gpu.predictions[j]);
     for (int j = 0; j < NFRAMES; ++j) if (detector_gpu.images[j].data) free(detector_gpu.images[j].data);
 
-    int old_gpu_index;
 #ifdef GPU
+    int old_gpu_index;
     cudaGetDevice(&old_gpu_index);
     cuda_set_device(detector_gpu.net.gpu_index);
 #endif
@@ -241,8 +241,8 @@ LIB_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool use
 {
     detector_gpu_t &detector_gpu = *static_cast<detector_gpu_t *>(detector_gpu_ptr.get());
     network &net = detector_gpu.net;
-    int old_gpu_index;
 #ifdef GPU
+    int old_gpu_index;
     cudaGetDevice(&old_gpu_index);
     if(cur_gpu_id != old_gpu_index)
         cudaSetDevice(net.gpu_index);
@@ -289,7 +289,7 @@ LIB_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool use
 
     std::vector<bbox_t> bbox_vec;
 
-    for (size_t i = 0; i < nboxes; ++i) {
+    for (int i = 0; i < nboxes; ++i) {
         box b = dets[i].bbox;
         int const obj_id = max_index(dets[i].prob, l.classes);
         float const prob = dets[i].prob[obj_id];

From 8bcba6c105a631b7e6fb4215edc7c7cb48cf8ec3 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Tue, 19 Mar 2019 00:17:09 +0300
Subject: [PATCH 19/23] Minor fixes. Added ZED_CAMERA to the Makefile.

---
 Makefile                           | 7 +++++++
 build/darknet/darknet.vcxproj      | 4 ++--
 build/darknet/yolo_cpp_dll.vcxproj | 4 ++--
 src/yolo_console_dll.cpp           | 5 +++++
 src/yolo_v2_class.cpp              | 1 +
 5 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index c59088af..aa7eec39 100644
--- a/Makefile
+++ b/Makefile
@@ -5,6 +5,7 @@ OPENCV=0
 AVX=0
 OPENMP=0
 LIBSO=0
+ZED_CAMERA=0
 
 # set GPU=1 and CUDNN=1 to speedup on GPU
 # set CUDNN_HALF=1 to further speedup 3 x times (Mixed-precision on Tensor Cores) GPU: Volta, Xavier, Turing and higher
@@ -111,6 +112,12 @@ CFLAGS+= -DCUDNN_HALF
 ARCH+= -gencode arch=compute_70,code=[sm_70,compute_70]
 endif
 
+ifeq ($(ZED_CAMERA), 1)
+CFLAGS+= -DZED_CAMERA -I/usr/local/zed/include
+LDFLAGS+= -L/usr/local/zed/lib -lsl_core -lsl_input -lsl_zed
+#-lstdc++ -D_GLIBCXX_USE_CXX11_ABI=0 
+endif
+
 OBJ=http_stream.o gemm.o utils.o dark_cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o lstm_layer.o
 ifeq ($(GPU), 1) 
 LDFLAGS+= -lstdc++ 
diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj
index d629cac5..f46efa15 100644
--- a/build/darknet/darknet.vcxproj
+++ b/build/darknet/darknet.vcxproj
@@ -96,7 +96,7 @@
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <OutputFile>$(OutDir)\$(TargetName)$(TargetExt)</OutputFile>
       <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AssemblyDebug>true</AssemblyDebug>
@@ -146,7 +146,7 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <OutputFile>$(OutDir)\$(TargetName)$(TargetExt)</OutputFile>
     </Link>
diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj
index c03bbea3..16050e9e 100644
--- a/build/darknet/yolo_cpp_dll.vcxproj
+++ b/build/darknet/yolo_cpp_dll.vcxproj
@@ -100,7 +100,7 @@
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalLibraryDirectories>$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <OutputFile>$(OutDir)\$(TargetName)$(TargetExt)</OutputFile>
       <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AssemblyDebug>true</AssemblyDebug>
@@ -149,7 +149,7 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
-      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;$(CUDA_PATH)\lib\$(PlatformName);$(CUDNN)\lib\x64;$(cudnn)\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
       <AdditionalDependencies>..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;cuda.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <OutputFile>$(OutDir)\$(TargetName)$(TargetExt)</OutputFile>
     </Link>
diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index 42c36322..2d7330fa 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -334,6 +334,11 @@ int main(int argc, char *argv[])
                 if (file_ext == "svo") init_params.svo_input_filename.set(filename.c_str());
                 if (filename == "zed_camera" || file_ext == "svo") {
                     std::cout << "ZED 3D Camera " << zed.open(init_params) << std::endl;
+                    if (!zed.isOpened()) {
+                        std::cout << " Error: ZED Camera should be connected to USB 3.0. And ZED_SDK should be installed. \n";
+                        getchar();
+                        return 0;
+                    }
                     cur_frame = zed_capture_rgb(zed);
                     use_zed_camera = true;
                 }
diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp
index 93cccf44..ab582bad 100644
--- a/src/yolo_v2_class.cpp
+++ b/src/yolo_v2_class.cpp
@@ -304,6 +304,7 @@ LIB_API std::vector<bbox_t> Detector::detect(image_t img, float thresh, bool use
             bbox.obj_id = obj_id;
             bbox.prob = prob;
             bbox.track_id = 0;
+            bbox.frames_counter = 0;
             bbox.x_3d = NAN;
             bbox.y_3d = NAN;
             bbox.z_3d = NAN;

From 0557f4314108c7efba4ee16e792e58ac5e347865 Mon Sep 17 00:00:00 2001
From: Aymeric Dujardin <aymeric.dujardin@gmail.com>
Date: Tue, 19 Mar 2019 12:41:47 +0100
Subject: [PATCH 20/23] ZED Camera support CMakeLists + Fix Makefile

---
 CMakeLists.txt | 24 ++++++++++++++++++++++++
 Makefile       |  4 ++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d007115d..f7f5fd6b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -167,6 +167,25 @@ if(ENABLE_CUDA)
   endif()
 endif()
 
+set(ENABLE_ZED_CAMERA "TRUE" CACHE BOOL "Enable ZED Camera support")
+if(ENABLE_CUDA)
+  if(ENABLE_ZED_CAMERA)
+    find_package(ZED 2 REQUIRED)
+    if(ZED_FOUND)
+      include_directories(${ZED_INCLUDE_DIRS})
+      link_directories(${ZED_LIBRARY_DIR})
+      add_definitions(-DZED_STEREO)
+      message("ZED Camera support enabled")
+    else()
+      message(WARNING "ZED SDK not found !")
+      set(ENABLE_ZED_CAMERA "FALSE" CACHE BOOL "Enable ZED Camera support" FORCE)
+    endif()
+  endif()
+else()
+  message(WARNING "ZED SDK requires CUDA !")
+  set(ENABLE_ZED_CAMERA "FALSE" CACHE BOOL "Enable ZED Camera support" FORCE)
+endif()
+
 set(CMAKE_THREAD_PREFER_PTHREAD ON)
 find_package(Threads REQUIRED)
 if(MSVC)
@@ -287,6 +306,11 @@ endif()
 target_link_libraries(darknet PRIVATE Threads::Threads)
 target_link_libraries(darklib PRIVATE Threads::Threads)
 
+if(ENABLE_ZED_CAMERA)
+  target_link_libraries(darknet PRIVATE ${ZED_LIBRARIES})
+  target_link_libraries(uselib PRIVATE ${ZED_LIBRARIES})
+endif()
+
 if(ENABLE_CUDA)
   target_link_libraries(darknet PRIVATE curand cublas cuda)
   target_link_libraries(darklib PRIVATE curand cublas cuda)
diff --git a/Makefile b/Makefile
index aa7eec39..b9bed33b 100644
--- a/Makefile
+++ b/Makefile
@@ -33,7 +33,7 @@ OS := $(shell uname)
 # GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4
 # ARCH= -gencode arch=compute_61,code=sm_61 -gencode arch=compute_61,code=compute_61
 
-# GP100/Tesla P100 � DGX-1
+# GP100/Tesla P100 DGX-1
 # ARCH= -gencode arch=compute_60,code=sm_60
 
 # For Jetson TX1, Tegra X1, DRIVE CX, DRIVE PX - uncomment:
@@ -113,7 +113,7 @@ ARCH+= -gencode arch=compute_70,code=[sm_70,compute_70]
 endif
 
 ifeq ($(ZED_CAMERA), 1)
-CFLAGS+= -DZED_CAMERA -I/usr/local/zed/include
+CFLAGS+= -DZED_STEREO -I/usr/local/zed/include
 LDFLAGS+= -L/usr/local/zed/lib -lsl_core -lsl_input -lsl_zed
 #-lstdc++ -D_GLIBCXX_USE_CXX11_ABI=0 
 endif

From f1d47703b4c03cf591d8190c63c59fd2e469fdf1 Mon Sep 17 00:00:00 2001
From: Aymeric Dujardin <aymeric.dujardin@gmail.com>
Date: Tue, 19 Mar 2019 14:23:28 +0100
Subject: [PATCH 21/23] ZED CMake fail silently if not found

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f7f5fd6b..16febf08 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -170,7 +170,7 @@ endif()
 set(ENABLE_ZED_CAMERA "TRUE" CACHE BOOL "Enable ZED Camera support")
 if(ENABLE_CUDA)
   if(ENABLE_ZED_CAMERA)
-    find_package(ZED 2 REQUIRED)
+    find_package(ZED 2 QUIET)
     if(ZED_FOUND)
       include_directories(${ZED_INCLUDE_DIRS})
       link_directories(${ZED_LIBRARY_DIR})

From d2abf0fa57d1f13c85b31929950ba65bc5ea9fde Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Tue, 19 Mar 2019 19:06:24 +0300
Subject: [PATCH 22/23] Minor fix

---
 src/detector.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/detector.c b/src/detector.c
index 173bcf78..62a8a078 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -1044,7 +1044,6 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
     return mean_average_precision;
 }
 
-//#ifdef OPENCV
 typedef struct {
     float w, h;
 } anchors_t;
@@ -1246,6 +1245,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
             cvCircle(img, pt, 1, CV_RGB(red_id, green_id, blue_id), CV_FILLED, 8, 0);
             //if(pt.x > img_size || pt.y > img_size) printf("\n pt.x = %d, pt.y = %d \n", pt.x, pt.y);
         }
+        save_cv_png(img, "cloud.png");
         cvShowImage("clusters", img);
         cvWaitKey(0);
         cvReleaseImage(&img);
@@ -1259,11 +1259,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
 
     getchar();
 }
-//#else
-//void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) {
-//    printf(" k-means++ can't be used without OpenCV, because there is used cvKMeans2 implementation \n");
-//}
-//#endif // OPENCV
+
 
 void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh,
     float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile)

From 9dddf82ab6ae6a0ed666515fb5a93574f2fc510e Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Tue, 19 Mar 2019 20:21:34 +0300
Subject: [PATCH 23/23] Compile fix. And minor fixes

---
 include/yolo_v2_class.hpp | 4 ++--
 src/image.h               | 1 +
 src/yolo_console_dll.cpp  | 8 +++++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp
index 2d42576d..bf3b28aa 100644
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@@ -776,8 +776,8 @@ public:
             kf.processNoiseCov.at<float>(7) = 1e-2;
             kf.processNoiseCov.at<float>(14) = 1e-2;// 5.0f;
             kf.processNoiseCov.at<float>(21) = 1e-2;// 5.0f;
-            kf.processNoiseCov.at<float>(28) = 1e-2;
-            kf.processNoiseCov.at<float>(35) = 1e-2;
+            kf.processNoiseCov.at<float>(28) = 5e-3;
+            kf.processNoiseCov.at<float>(35) = 5e-3;
 
             // Measures Noise Covariance Matrix R - result smoother with higher values (1e-1)
             cv::setIdentity(kf.measurementNoiseCov, cv::Scalar(1e-1));
diff --git a/src/image.h b/src/image.h
index be947d34..91c6fde7 100644
--- a/src/image.h
+++ b/src/image.h
@@ -96,6 +96,7 @@ image **load_alphabet();
 image get_image_from_stream(CvCapture* cap);
 image get_image_from_stream_cpp(CvCapture* cap);
 image ipl_to_image(IplImage* src);
+void save_cv_png(IplImage *img, const char *name);
 #endif
 //float get_pixel(image m, int x, int y, int c);
 //float get_pixel_extend(image m, int x, int y, int c);
diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp
index 2d7330fa..b0e25e2d 100644
--- a/src/yolo_console_dll.cpp
+++ b/src/yolo_console_dll.cpp
@@ -39,12 +39,14 @@ std::vector<bbox_t> get_3d_coordinates(std::vector<bbox_t> bbox_vect, cv::Mat xy
 {
     bool valid_measure;
     int i, j;
-    const int R_max = 4;
+    const unsigned int R_max_global = 10;
 
     std::vector<bbox_t> bbox3d_vect;
 
     for (auto &cur_box : bbox_vect) {
 
+        const unsigned int obj_size = std::min(cur_box.w, cur_box.h);
+        const unsigned int R_max = std::min(R_max_global, obj_size / 2);
         int center_i = cur_box.x + cur_box.w * 0.5f, center_j = cur_box.y + cur_box.h * 0.5f;
 
         std::vector<float> x_vect, y_vect, z_vect;
@@ -183,6 +185,8 @@ void draw_boxes(cv::Mat mat_img, std::vector<bbox_t> result_vec, std::vector<std
             if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id);
             cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0);
             int max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2);
+            max_width = std::max(max_width, (int)i.w + 2);
+            //max_width = std::max(max_width, 283);
             std::string coords_3d;
             if (!std::isnan(i.z_3d)) {
                 std::stringstream ss;
@@ -326,6 +330,8 @@ int main(int argc, char *argv[])
 
 #ifdef ZED_STEREO
                 sl::InitParameters init_params;
+                init_params.depth_minimum_distance = 0.5;
+                init_params.depth_mode = sl::DEPTH_MODE_ULTRA;
                 init_params.camera_resolution = sl::RESOLUTION_HD720;
                 init_params.coordinate_units = sl::UNIT_METER;
                 //init_params.sdk_cuda_ctx = (CUcontext)detector.get_cuda_context();