From a6c51e3b758aee7fd3a6f1d37daa8dcad4891e52 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Fri, 9 Mar 2018 01:42:58 +0300 Subject: [PATCH] Fixes for small objects. And fixes of densenet201_yolo.cfg, resnet50_yolo.cfg, resnet152_yolo.cfg --- build/darknet/x64/densenet201_yolo.cfg | 31 +++++++++----------------- build/darknet/x64/partial.cmd | 4 ++++ build/darknet/x64/resnet152_yolo.cfg | 19 ++++++---------- build/darknet/x64/resnet50_yolo.cfg | 19 +++++----------- src/data.c | 2 +- src/detector.c | 2 +- src/region_layer.c | 8 +++---- 7 files changed, 33 insertions(+), 52 deletions(-) diff --git a/build/darknet/x64/densenet201_yolo.cfg b/build/darknet/x64/densenet201_yolo.cfg index a9791fa1..2c78ec5d 100644 --- a/build/darknet/x64/densenet201_yolo.cfg +++ b/build/darknet/x64/densenet201_yolo.cfg @@ -1,30 +1,21 @@ [net] -# Training batch=64 -subdivisions=32 - -# Testing -#batch=1 -#subdivisions=1 - +subdivisions=8 height=416 width=416 -#max_crop=448 channels=3 momentum=0.9 decay=0.0005 - -burn_in=1000 -learning_rate=0.1 -policy=poly -power=4 -max_batches=1600000 - -angle=7 +angle=0 +saturation = 1.5 +exposure = 1.5 hue=.1 -saturation=.75 -exposure=.75 -aspect=.75 + +learning_rate=0.0001 +max_batches = 45000 +policy=steps +steps=100,25000,35000 +scales=10,.1,.1 [convolutional] batch_normalize=1 @@ -1935,7 +1926,7 @@ activation=leaky [route] layers=-1,-3 -stopbackward=1 +#stopbackward=1 [convolutional] diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd index 1adf974e..a7cea0b7 100644 --- a/build/darknet/x64/partial.cmd +++ b/build/darknet/x64/partial.cmd @@ -6,6 +6,10 @@ rem Download Yolo9000: http://pjreddie.com/media/files/yolo9000.weights darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.13 13 +darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.12 12 + + + darknet.exe partial cfg/yolo-voc.cfg yolo-voc.weights yolo-voc.conv.23 23 diff --git a/build/darknet/x64/resnet152_yolo.cfg b/build/darknet/x64/resnet152_yolo.cfg index 435695e0..0ea61122 100644 --- a/build/darknet/x64/resnet152_yolo.cfg +++ b/build/darknet/x64/resnet152_yolo.cfg @@ -5,22 +5,17 @@ height=416 width=416 channels=3 momentum=0.9 -decay=0.0001 -#decay=0.0005 +decay=0.0005 angle=0 saturation = 1.5 exposure = 1.5 hue=.1 -learning_rate=0.01 -max_batches = 90000 +learning_rate=0.0001 +max_batches = 45000 policy=steps -steps=60000,80000 -scales=.1,.1 - -#learning_rate=0.0001 -#steps=100,25000,35000 -#scales=10,.1,.1 +steps=100,25000,35000 +scales=10,.1,.1 [convolutional] batch_normalize=1 @@ -1436,7 +1431,7 @@ activation=linear [shortcut] from=-4 activation=leaky -stopbackward=1 +#stopbackward=1 @@ -1466,7 +1461,7 @@ num=5 softmax=1 jitter=.3 rescore=1 -focal_loss=1 +#focal_loss=1 object_scale=5 noobject_scale=1 diff --git a/build/darknet/x64/resnet50_yolo.cfg b/build/darknet/x64/resnet50_yolo.cfg index 0d0ca999..9a911938 100644 --- a/build/darknet/x64/resnet50_yolo.cfg +++ b/build/darknet/x64/resnet50_yolo.cfg @@ -1,18 +1,15 @@ [net] -# Training batch=64 -subdivisions=16 - -# Testing -#batch=1 -#subdivisions=1 - +subdivisions=8 height=416 width=416 -#max_crop=448 channels=3 momentum=0.9 decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 learning_rate=0.0001 max_batches = 45000 @@ -20,12 +17,6 @@ policy=steps steps=100,25000,35000 scales=10,.1,.1 -angle=7 -hue=.1 -saturation=.75 -exposure=.75 -aspect=.75 - [convolutional] batch_normalize=1 filters=64 diff --git a/src/data.c b/src/data.c index ea6313a1..6187b34c 100644 --- a/src/data.c +++ b/src/data.c @@ -326,7 +326,7 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, id = boxes[i].id; // not detect small objects - if ((w < 0.01 || h < 0.01)) continue; + if ((w < 0.001 || h < 0.001)) { printf("small w = %f, h = %f \n", w, h); continue; } truth[i*5+0] = x; truth[i*5+1] = y; diff --git a/src/detector.c b/src/detector.c index f8515d49..59182c4e 100644 --- a/src/detector.c +++ b/src/detector.c @@ -918,7 +918,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int final_width, int final float box_h = points->data.fl[i * 2 + 1]; //int cluster_idx = labels->data.i[i]; int cluster_idx = 0; - float min_dist = 1000000; + float min_dist = FLT_MAX; for (j = 0; j < num_of_clusters; ++j) { float anchor_w = centers->data.fl[j * 2]; float anchor_h = centers->data.fl[j * 2 + 1]; diff --git a/src/region_layer.c b/src/region_layer.c index a83831c6..9ca71c66 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -131,11 +131,11 @@ void delta_region_class(float *output, float *delta, int index, int class_id, in // Focal loss if (focal_loss) { // Focal Loss for Dense Object Detection: http://blog.csdn.net/linmingan/article/details/77885832 - //printf("Used Focal-loss \n"); - float alpha = 0.5; // 0.25 - float gamma = 2.0; + float alpha = 0.5; // 0.25 or 0.5 + //float gamma = 2; // hardcoded in many places of the grad-formula + int ti = index + class_id; - float grad = -gamma * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]); + float grad = -2 * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]); for (n = 0; n < classes; ++n) { delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]);