Fixes for training Yolo on small objects

pull/398/head
AlexeyAB 7 years ago
parent 64aa0180bb
commit 9ff40baa4f
  1. 17
      src/data.c
  2. 1
      src/data.h
  3. 1
      src/detector.c
  4. 1
      src/layer.h
  5. 1
      src/parser.c

@ -292,7 +292,7 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
free(boxes);
}
void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy, int small_object)
{
char labelpath[4096];
find_replace(path, "images", "labels", labelpath);
@ -305,6 +305,12 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
find_replace(labelpath, ".JPEG", ".txt", labelpath);
int count = 0;
box_label *boxes = read_boxes(labelpath, &count);
if (small_object == 1) {
for (int i = 0; i < count; ++i) {
if (boxes[i].w < 0.01) boxes[i].w = 0.01;
if (boxes[i].h < 0.01) boxes[i].h = 0.01;
}
}
randomize_boxes(boxes, count);
correct_boxes(boxes, count, dx, dy, sx, sy, flip);
if(count > num_boxes) count = num_boxes;
@ -319,7 +325,8 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes,
h = boxes[i].h;
id = boxes[i].id;
if ((w < .01 || h < .01)) continue;
// not detect small objects
if ((w < 0.01 || h < 0.01)) continue;
truth[i*5+0] = x;
truth[i*5+1] = y;
@ -661,7 +668,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
return d;
}
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
{
char **random_paths = get_random_paths(paths, n, m);
int i;
@ -704,7 +711,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, in
random_distort_image(sized, hue, saturation, exposure);
d.X.vals[i] = sized.data;
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object);
free_image(orig);
free_image(cropped);
@ -734,7 +741,7 @@ void *load_thread(void *ptr)
} else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){

@ -53,6 +53,7 @@ typedef struct load_args{
int classes;
int background;
int scale;
int small_object;
float jitter;
float angle;
float aspect;

@ -82,6 +82,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
args.classes = classes;
args.jitter = jitter;
args.num_boxes = l.max_boxes;
args.small_object = l.small_object;
args.d = &buffer;
args.type = DETECTION_DATA;
args.threads = 4;// 8;

@ -63,6 +63,7 @@ struct layer{
int out_h, out_w, out_c;
int n;
int max_boxes;
int small_object;
int groups;
int size;
int side;

@ -245,6 +245,7 @@ layer parse_region(list *options, size_params params)
l.log = option_find_int_quiet(options, "log", 0);
l.sqrt = option_find_int_quiet(options, "sqrt", 0);
l.small_object = option_find_int(options, "small_object", 0);
l.softmax = option_find_int(options, "softmax", 0);
l.max_boxes = option_find_int_quiet(options, "max",30);
l.jitter = option_find_float(options, "jitter", .2);

Loading…
Cancel
Save