Added param [yolo] resize=2.0 for resizing image [1/2 - 2x] without changing aspect ratio (while jitter=0.3 changes aspect ratio)

pull/5620/head
AlexeyAB 5 years ago
parent f5105342be
commit 88a39b31d3
  1. 2
      include/darknet.h
  2. 84
      src/data.c
  3. 2
      src/data.h
  4. 4
      src/detector.c
  5. 4
      src/parser.c

@ -271,6 +271,7 @@ struct layer {
int stretch_sway; int stretch_sway;
float angle; float angle;
float jitter; float jitter;
float resize;
float saturation; float saturation;
float exposure; float exposure;
float shift; float shift;
@ -890,6 +891,7 @@ typedef struct load_args {
int show_imgs; int show_imgs;
int dontuse_opencv; int dontuse_opencv;
float jitter; float jitter;
float resize;
int flip; int flip;
int gaussian_noise; int gaussian_noise;
int blur; int blur;

@ -998,7 +998,7 @@ void blend_truth_mosaic(float *new_truth, int boxes, float *old_truth, int w, in
#include "http_stream.h" #include "http_stream.h"
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_gaussian_noise, int use_blur, int use_mixup, data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_gaussian_noise, int use_blur, int use_mixup,
float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs) float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs)
{ {
const int random_index = random_gen(); const int random_index = random_gen();
c = c ? c : 3; c = c ? c : 3;
@ -1036,6 +1036,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
d.X.cols = h*w*c; d.X.cols = h*w*c;
float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale = 0; float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale = 0;
float resize_r1 = 0, resize_r2 = 0;
float dhue = 0, dsat = 0, dexp = 0, flip = 0, blur = 0; float dhue = 0, dsat = 0, dexp = 0, flip = 0, blur = 0;
int augmentation_calculated = 0, gaussian_noise = 0; int augmentation_calculated = 0, gaussian_noise = 0;
@ -1070,9 +1071,22 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
int dw = (ow*jitter); int dw = (ow*jitter);
int dh = (oh*jitter); int dh = (oh*jitter);
float resize_down = resize, resize_up = resize;
if (resize_down > 1.0) resize_down = 1 / resize_down;
int min_rdw = ow*(1 - (1 / resize_down)) / 2; // < 0
int min_rdh = oh*(1 - (1 / resize_down)) / 2; // < 0
if (resize_up < 1.0) resize_up = 1 / resize_up;
int max_rdw = ow*(1 - (1 / resize_up)) / 2; // > 0
int max_rdh = oh*(1 - (1 / resize_up)) / 2; // > 0
//printf(" down = %f, up = %f \n", (1 - (1 / resize_down)) / 2, (1 - (1 / resize_up)) / 2);
if (!augmentation_calculated || !track) if (!augmentation_calculated || !track)
{ {
augmentation_calculated = 1; augmentation_calculated = 1;
resize_r1 = random_float();
resize_r2 = random_float();
r1 = random_float(); r1 = random_float();
r2 = random_float(); r2 = random_float();
r3 = random_float(); r3 = random_float();
@ -1101,6 +1115,21 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
int pright = rand_precalc_random(-dw, dw, r2); int pright = rand_precalc_random(-dw, dw, r2);
int ptop = rand_precalc_random(-dh, dh, r3); int ptop = rand_precalc_random(-dh, dh, r3);
int pbot = rand_precalc_random(-dh, dh, r4); int pbot = rand_precalc_random(-dh, dh, r4);
if (resize < 1) {
// downsize only
pleft += rand_precalc_random(min_rdw, 0, resize_r1);
pright += rand_precalc_random(min_rdw, 0, resize_r2);
ptop += rand_precalc_random(min_rdh, 0, resize_r1);
pbot += rand_precalc_random(min_rdh, 0, resize_r2);
}
else {
pleft += rand_precalc_random(min_rdw, max_rdw, resize_r1);
pright += rand_precalc_random(min_rdw, max_rdw, resize_r2);
ptop += rand_precalc_random(min_rdh, max_rdh, resize_r1);
pbot += rand_precalc_random(min_rdh, max_rdh, resize_r2);
}
//printf("\n pleft = %d, pright = %d, ptop = %d, pbot = %d, ow = %d, oh = %d \n", pleft, pright, ptop, pbot, ow, oh); //printf("\n pleft = %d, pright = %d, ptop = %d, pbot = %d, ow = %d, oh = %d \n", pleft, pright, ptop, pbot, ow, oh);
//float scale = rand_precalc_random(.25, 2, r_scale); // unused currently //float scale = rand_precalc_random(.25, 2, r_scale); // unused currently
@ -1127,7 +1156,27 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
pright = pright - delta_w; pright = pright - delta_w;
//printf(" result_ar = %f, ow_tmp = %f, delta_w = %d, pleft = %f, pright = %f \n", result_ar, ow_tmp, delta_w, pleft, pright); //printf(" result_ar = %f, ow_tmp = %f, delta_w = %d, pleft = %f, pright = %f \n", result_ar, ow_tmp, delta_w, pleft, pright);
} }
//printf("\n pleft = %d, pright = %d, ptop = %d, pbot = %d, ow = %d, oh = %d \n", pleft, pright, ptop, pbot, ow, oh);
}
/*
// move each 2nd image to the corner - so that most of it was visible
if (use_mixup == 3 && random_gen() % 2 == 0) {
if (flip) {
if (i_mixup == 0) pleft += pright, pright = 0, pbot += ptop, ptop = 0;
if (i_mixup == 1) pright += pleft, pleft = 0, pbot += ptop, ptop = 0;
if (i_mixup == 2) pleft += pright, pright = 0, ptop += pbot, pbot = 0;
if (i_mixup == 3) pright += pleft, pleft = 0, ptop += pbot, pbot = 0;
}
else {
if (i_mixup == 0) pright += pleft, pleft = 0, pbot += ptop, ptop = 0;
if (i_mixup == 1) pleft += pright, pright = 0, pbot += ptop, ptop = 0;
if (i_mixup == 2) pright += pleft, pleft = 0, ptop += pbot, pbot = 0;
if (i_mixup == 3) pleft += pright, pright = 0, ptop += pbot, pbot = 0;
}
} }
*/
int swidth = ow - pleft - pright; int swidth = ow - pleft - pright;
int sheight = oh - ptop - pbot; int sheight = oh - ptop - pbot;
@ -1264,8 +1313,8 @@ void blend_images(image new_img, float alpha, image old_img, float beta)
new_img.data[i] = new_img.data[i] * alpha + old_img.data[i] * beta; new_img.data[i] = new_img.data[i] * alpha + old_img.data[i] * beta;
} }
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup, float jitter, data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs) float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs)
{ {
const int random_index = random_gen(); const int random_index = random_gen();
c = c ? c : 3; c = c ? c : 3;
@ -1299,6 +1348,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
d.X.cols = h*w*c; d.X.cols = h*w*c;
float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale; float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale;
float resize_r1 = 0, resize_r2 = 0;
float dhue = 0, dsat = 0, dexp = 0, flip = 0; float dhue = 0, dsat = 0, dexp = 0, flip = 0;
int augmentation_calculated = 0; int augmentation_calculated = 0;
@ -1318,9 +1368,21 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
int dw = (ow*jitter); int dw = (ow*jitter);
int dh = (oh*jitter); int dh = (oh*jitter);
float resize_down = resize, resize_up = resize;
if (resize_down > 1.0) resize_down = 1 / resize_down;
int min_rdw = ow*(1 - (1 / resize_down)) / 2;
int min_rdh = oh*(1 - (1 / resize_down)) / 2;
if (resize_up < 1.0) resize_up = 1 / resize_up;
int max_rdw = ow*(1 - (1 / resize_up)) / 2;
int max_rdh = oh*(1 - (1 / resize_up)) / 2;
if (!augmentation_calculated || !track) if (!augmentation_calculated || !track)
{ {
augmentation_calculated = 1; augmentation_calculated = 1;
resize_r1 = random_float();
resize_r2 = random_float();
r1 = random_float(); r1 = random_float();
r2 = random_float(); r2 = random_float();
r3 = random_float(); r3 = random_float();
@ -1340,7 +1402,19 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
int ptop = rand_precalc_random(-dh, dh, r3); int ptop = rand_precalc_random(-dh, dh, r3);
int pbot = rand_precalc_random(-dh, dh, r4); int pbot = rand_precalc_random(-dh, dh, r4);
float scale = rand_precalc_random(.25, 2, r_scale); // unused currently if (resize < 1) {
// downsize only
pleft += rand_precalc_random(min_rdw, 0, resize_r1);
pright += rand_precalc_random(min_rdw, 0, resize_r2);
ptop += rand_precalc_random(min_rdh, 0, resize_r1);
pbot += rand_precalc_random(min_rdh, 0, resize_r2);
}
else {
pleft += rand_precalc_random(min_rdw, max_rdw, resize_r1);
pright += rand_precalc_random(min_rdw, max_rdw, resize_r2);
ptop += rand_precalc_random(min_rdh, max_rdh, resize_r1);
pbot += rand_precalc_random(min_rdh, max_rdh, resize_r2);
}
if (letter_box) if (letter_box)
{ {
@ -1454,7 +1528,7 @@ void *load_thread(void *ptr)
} else if (a.type == REGION_DATA){ } else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){ } else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, a.resize,
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.show_imgs); a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.show_imgs);
} else if (a.type == SWAG_DATA){ } else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);

@ -87,7 +87,7 @@ data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h); data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup, data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup,
float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs); float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs);
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv); matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv);
data load_data_super(char **paths, int n, int m, int w, int h, int scale); data load_data_super(char **paths, int n, int m, int w, int h, int scale);

@ -104,7 +104,6 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
layer l = net.layers[net.n - 1]; layer l = net.layers[net.n - 1];
int classes = l.classes; int classes = l.classes;
float jitter = l.jitter;
list *plist = get_paths(train_images); list *plist = get_paths(train_images);
int train_images_num = plist->size; int train_images_num = plist->size;
@ -129,7 +128,8 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
args.m = plist->size; args.m = plist->size;
args.classes = classes; args.classes = classes;
args.flip = net.flip; args.flip = net.flip;
args.jitter = jitter; args.jitter = l.jitter;
args.resize = l.resize;
args.num_boxes = l.max_boxes; args.num_boxes = l.max_boxes;
net.num_boxes = args.num_boxes; net.num_boxes = args.num_boxes;
net.train_images_num = train_images_num; net.train_images_num = train_images_num;

@ -449,6 +449,7 @@ layer parse_yolo(list *options, size_params params)
} }
l.jitter = option_find_float(options, "jitter", .2); l.jitter = option_find_float(options, "jitter", .2);
l.resize = option_find_float_quiet(options, "resize", 1.0);
l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
@ -564,6 +565,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
iou_loss, l.iou_loss, l.iou_normalizer, l.cls_normalizer, l.scale_x_y, l.yolo_point); iou_loss, l.iou_loss, l.iou_normalizer, l.cls_normalizer, l.scale_x_y, l.yolo_point);
l.jitter = option_find_float(options, "jitter", .2); l.jitter = option_find_float(options, "jitter", .2);
l.resize = option_find_float_quiet(options, "resize", 1.0);
l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
l.truth_thresh = option_find_float(options, "truth_thresh", 1); l.truth_thresh = option_find_float(options, "truth_thresh", 1);
@ -612,6 +614,7 @@ layer parse_region(list *options, size_params params)
l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
//l.max_boxes = option_find_int_quiet(options, "max",30); //l.max_boxes = option_find_int_quiet(options, "max",30);
l.jitter = option_find_float(options, "jitter", .2); l.jitter = option_find_float(options, "jitter", .2);
l.resize = option_find_float_quiet(options, "resize", 1.0);
l.rescore = option_find_int_quiet(options, "rescore",0); l.rescore = option_find_int_quiet(options, "rescore",0);
l.thresh = option_find_float(options, "thresh", .5); l.thresh = option_find_float(options, "thresh", .5);
@ -666,6 +669,7 @@ detection_layer parse_detection(list *options, size_params params)
layer.noobject_scale = option_find_float(options, "noobject_scale", 1); layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
layer.class_scale = option_find_float(options, "class_scale", 1); layer.class_scale = option_find_float(options, "class_scale", 1);
layer.jitter = option_find_float(options, "jitter", .2); layer.jitter = option_find_float(options, "jitter", .2);
layer.resize = option_find_float_quiet(options, "resize", 1.0);
layer.random = option_find_float_quiet(options, "random", 0); layer.random = option_find_float_quiet(options, "random", 0);
layer.reorg = option_find_int_quiet(options, "reorg", 0); layer.reorg = option_find_int_quiet(options, "reorg", 0);
return layer; return layer;

Loading…
Cancel
Save