Fixes for small objects. And fixes of densenet201_yolo.cfg, resnet50_yolo.cfg, resnet152_yolo.cfg

7 years ago · a6c51e3b75
parent 100d6f7801
commit a6c51e3b75
7 changed files with 33 additions and 52 deletions
--- a/build/darknet/x64/densenet201_yolo.cfg
+++ b/build/darknet/x64/densenet201_yolo.cfg
@ -1,30 +1,21 @@
 [net]
-# Training
 batch=64
-subdivisions=32
-
-# Testing
-#batch=1
-#subdivisions=1
-
+subdivisions=8
 height=416
 width=416
-#max_crop=448
 channels=3
 momentum=0.9
 decay=0.0005
-
-burn_in=1000
-learning_rate=0.1
-policy=poly
-power=4
-max_batches=1600000
-
-angle=7
+angle=0
+saturation = 1.5
+exposure = 1.5
 hue=.1
-saturation=.75
-exposure=.75
-aspect=.75
+
+learning_rate=0.0001
+max_batches = 45000
+policy=steps
+steps=100,25000,35000
+scales=10,.1,.1

 [convolutional]
 batch_normalize=1
@ -1935,7 +1926,7 @@ activation=leaky

 [route]
 layers=-1,-3
-stopbackward=1
+#stopbackward=1


 [convolutional]
--- a/build/darknet/x64/partial.cmd
+++ b/build/darknet/x64/partial.cmd
@ -6,6 +6,10 @@ rem Download Yolo9000: http://pjreddie.com/media/files/yolo9000.weights
 darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.13 13


+darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.12 12
+
+
+
 darknet.exe partial cfg/yolo-voc.cfg yolo-voc.weights yolo-voc.conv.23 23


--- a/build/darknet/x64/resnet152_yolo.cfg
+++ b/build/darknet/x64/resnet152_yolo.cfg
@ -5,22 +5,17 @@ height=416
 width=416
 channels=3
 momentum=0.9
-decay=0.0001
-#decay=0.0005
+decay=0.0005
 angle=0
 saturation = 1.5
 exposure = 1.5
 hue=.1

-learning_rate=0.01
-max_batches = 90000
+learning_rate=0.0001
+max_batches = 45000
 policy=steps
-steps=60000,80000
-scales=.1,.1
-
-#learning_rate=0.0001
-#steps=100,25000,35000
-#scales=10,.1,.1
+steps=100,25000,35000
+scales=10,.1,.1

 [convolutional]
 batch_normalize=1
@ -1436,7 +1431,7 @@ activation=linear
 [shortcut]
 from=-4
 activation=leaky
-stopbackward=1
+#stopbackward=1



@ -1466,7 +1461,7 @@ num=5
 softmax=1
 jitter=.3
 rescore=1
-focal_loss=1
+#focal_loss=1

 object_scale=5
 noobject_scale=1
--- a/build/darknet/x64/resnet50_yolo.cfg
+++ b/build/darknet/x64/resnet50_yolo.cfg
@ -1,18 +1,15 @@
 [net]
-# Training
 batch=64
-subdivisions=16
-
-# Testing
-#batch=1
-#subdivisions=1
-
+subdivisions=8
 height=416
 width=416
-#max_crop=448
 channels=3
 momentum=0.9
 decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1

 learning_rate=0.0001
 max_batches = 45000
@ -20,12 +17,6 @@ policy=steps
 steps=100,25000,35000
 scales=10,.1,.1

-angle=7
-hue=.1
-saturation=.75
-exposure=.75
-aspect=.75
-
 [convolutional]
 batch_normalize=1
 filters=64
--- a/src/data.c
+++ b/src/data.c
@ -326,7 +326,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
        id = boxes[i].id;

 		// not detect small objects
-		if ((w < 0.01 || h < 0.01)) continue;
+		if ((w < 0.001 || h < 0.001)) { printf("small w = %f, h = %f \n", w, h); continue; }

        truth[i*5+0] = x;
        truth[i*5+1] = y;
--- a/src/detector.c
+++ b/src/detector.c
@ -918,7 +918,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int final_width, int final
 		float box_h = points->data.fl[i * 2 + 1];
 		//int cluster_idx = labels->data.i[i];		
 		int cluster_idx = 0;
-		float min_dist = 1000000;
+		float min_dist = FLT_MAX;
 		for (j = 0; j < num_of_clusters; ++j) {
 			float anchor_w = centers->data.fl[j * 2];
 			float anchor_h = centers->data.fl[j * 2 + 1];
--- a/src/region_layer.c
+++ b/src/region_layer.c
@ -131,11 +131,11 @@ void delta_region_class(float *output, float *delta, int index, int class_id, in
 		// Focal loss
 		if (focal_loss) {
 			// Focal Loss for Dense Object Detection: http://blog.csdn.net/linmingan/article/details/77885832
-			//printf("Used Focal-loss \n");
-			float alpha = 0.5;	// 0.25
-			float gamma = 2.0;
+			float alpha = 0.5;	// 0.25 or 0.5
+			//float gamma = 2;	// hardcoded in many places of the grad-formula	
+
 			int ti = index + class_id;
-			float grad = -gamma * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]);
+			float grad = -2 * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]);

 			for (n = 0; n < classes; ++n) {
 				delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]);