From a6c51e3b758aee7fd3a6f1d37daa8dcad4891e52 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 9 Mar 2018 01:42:58 +0300
Subject: [PATCH] Fixes for small objects. And fixes of densenet201_yolo.cfg,
 resnet50_yolo.cfg, resnet152_yolo.cfg

---
 build/darknet/x64/densenet201_yolo.cfg | 31 +++++++++-----------------
 build/darknet/x64/partial.cmd          |  4 ++++
 build/darknet/x64/resnet152_yolo.cfg   | 19 ++++++----------
 build/darknet/x64/resnet50_yolo.cfg    | 19 +++++-----------
 src/data.c                             |  2 +-
 src/detector.c                         |  2 +-
 src/region_layer.c                     |  8 +++----
 7 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/build/darknet/x64/densenet201_yolo.cfg b/build/darknet/x64/densenet201_yolo.cfg
index a9791fa1..2c78ec5d 100644
--- a/build/darknet/x64/densenet201_yolo.cfg
+++ b/build/darknet/x64/densenet201_yolo.cfg
@@ -1,30 +1,21 @@
 [net]
-# Training
 batch=64
-subdivisions=32
-
-# Testing
-#batch=1
-#subdivisions=1
-
+subdivisions=8
 height=416
 width=416
-#max_crop=448
 channels=3
 momentum=0.9
 decay=0.0005
-
-burn_in=1000
-learning_rate=0.1
-policy=poly
-power=4
-max_batches=1600000
-
-angle=7
+angle=0
+saturation = 1.5
+exposure = 1.5
 hue=.1
-saturation=.75
-exposure=.75
-aspect=.75
+
+learning_rate=0.0001
+max_batches = 45000
+policy=steps
+steps=100,25000,35000
+scales=10,.1,.1
 
 [convolutional]
 batch_normalize=1
@@ -1935,7 +1926,7 @@ activation=leaky
 
 [route]
 layers=-1,-3
-stopbackward=1
+#stopbackward=1
 
 
 [convolutional]
diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd
index 1adf974e..a7cea0b7 100644
--- a/build/darknet/x64/partial.cmd
+++ b/build/darknet/x64/partial.cmd
@@ -6,6 +6,10 @@ rem Download Yolo9000: http://pjreddie.com/media/files/yolo9000.weights
 darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.13 13
 
 
+darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.12 12
+
+
+
 darknet.exe partial cfg/yolo-voc.cfg yolo-voc.weights yolo-voc.conv.23 23
 
 
diff --git a/build/darknet/x64/resnet152_yolo.cfg b/build/darknet/x64/resnet152_yolo.cfg
index 435695e0..0ea61122 100644
--- a/build/darknet/x64/resnet152_yolo.cfg
+++ b/build/darknet/x64/resnet152_yolo.cfg
@@ -5,22 +5,17 @@ height=416
 width=416
 channels=3
 momentum=0.9
-decay=0.0001
-#decay=0.0005
+decay=0.0005
 angle=0
 saturation = 1.5
 exposure = 1.5
 hue=.1
 
-learning_rate=0.01
-max_batches = 90000
+learning_rate=0.0001
+max_batches = 45000
 policy=steps
-steps=60000,80000
-scales=.1,.1
-
-#learning_rate=0.0001
-#steps=100,25000,35000
-#scales=10,.1,.1
+steps=100,25000,35000
+scales=10,.1,.1
 
 [convolutional]
 batch_normalize=1
@@ -1436,7 +1431,7 @@ activation=linear
 [shortcut]
 from=-4
 activation=leaky
-stopbackward=1
+#stopbackward=1
 
 
 
@@ -1466,7 +1461,7 @@ num=5
 softmax=1
 jitter=.3
 rescore=1
-focal_loss=1
+#focal_loss=1
 
 object_scale=5
 noobject_scale=1
diff --git a/build/darknet/x64/resnet50_yolo.cfg b/build/darknet/x64/resnet50_yolo.cfg
index 0d0ca999..9a911938 100644
--- a/build/darknet/x64/resnet50_yolo.cfg
+++ b/build/darknet/x64/resnet50_yolo.cfg
@@ -1,18 +1,15 @@
 [net]
-# Training
 batch=64
-subdivisions=16
-
-# Testing
-#batch=1
-#subdivisions=1
-
+subdivisions=8
 height=416
 width=416
-#max_crop=448
 channels=3
 momentum=0.9
 decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
 
 learning_rate=0.0001
 max_batches = 45000
@@ -20,12 +17,6 @@ policy=steps
 steps=100,25000,35000
 scales=10,.1,.1
 
-angle=7
-hue=.1
-saturation=.75
-exposure=.75
-aspect=.75
-
 [convolutional]
 batch_normalize=1
 filters=64
diff --git a/src/data.c b/src/data.c
index ea6313a1..6187b34c 100644
--- a/src/data.c
+++ b/src/data.c
@@ -326,7 +326,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
         id = boxes[i].id;
 
 		// not detect small objects
-		if ((w < 0.01 || h < 0.01)) continue;
+		if ((w < 0.001 || h < 0.001)) { printf("small w = %f, h = %f \n", w, h); continue; }
 
         truth[i*5+0] = x;
         truth[i*5+1] = y;
diff --git a/src/detector.c b/src/detector.c
index f8515d49..59182c4e 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -918,7 +918,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int final_width, int final
 		float box_h = points->data.fl[i * 2 + 1];
 		//int cluster_idx = labels->data.i[i];		
 		int cluster_idx = 0;
-		float min_dist = 1000000;
+		float min_dist = FLT_MAX;
 		for (j = 0; j < num_of_clusters; ++j) {
 			float anchor_w = centers->data.fl[j * 2];
 			float anchor_h = centers->data.fl[j * 2 + 1];
diff --git a/src/region_layer.c b/src/region_layer.c
index a83831c6..9ca71c66 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -131,11 +131,11 @@ void delta_region_class(float *output, float *delta, int index, int class_id, in
 		// Focal loss
 		if (focal_loss) {
 			// Focal Loss for Dense Object Detection: http://blog.csdn.net/linmingan/article/details/77885832
-			//printf("Used Focal-loss \n");
-			float alpha = 0.5;	// 0.25
-			float gamma = 2.0;
+			float alpha = 0.5;	// 0.25 or 0.5
+			//float gamma = 2;	// hardcoded in many places of the grad-formula	
+
 			int ti = index + class_id;
-			float grad = -gamma * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]);
+			float grad = -2 * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]);
 
 			for (n = 0; n < classes; ++n) {
 				delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]);