diff --git a/src/data.c b/src/data.c index 6c9e565d..1b9117d9 100644 --- a/src/data.c +++ b/src/data.c @@ -875,7 +875,11 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo dexp = rand_scale(exposure); flip = use_flip ? random_gen() % 2 : 0; - blur = rand_int(0, 1) ? (use_blur) : 0; + + //blur = rand_int(0, 1) ? (use_blur) : 0; + int tmp_blur = rand_int(0, 2); // 0 - disable, 1 - blur background, 2 - blur the whole image + if (tmp_blur == 2) blur = use_blur; + else blur = tmp_blur; } int pleft = rand_precalc_random(-dw, dw, r1); @@ -922,7 +926,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo int min_w_h = fill_truth_detection(filename, boxes, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - if (min_w_h/4 < blur) blur = min_w_h / 4; // disable blur if one of the objects is too small + if (min_w_h/4 < blur && blur > 1) blur = min_w_h / 4; // disable blur if one of the objects is too small image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp, blur, boxes, d.y.vals[i]); diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index ddb43939..3b58cc5a 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -95,8 +95,8 @@ void resize_gaussian_yolo_layer(layer *l, int w, int h) l->outputs = h*w*l->n*(l->classes + 8 + 1); l->inputs = l->outputs; - l->output = realloc(l->output, l->batch*l->outputs * sizeof(float)); - l->delta = realloc(l->delta, l->batch*l->outputs * sizeof(float)); + l->output = (float *)realloc(l->output, l->batch*l->outputs * sizeof(float)); + l->delta = (float *)realloc(l->delta, l->batch*l->outputs * sizeof(float)); //if (!l->output_pinned) l->output = (float*)realloc(l->output, l->batch*l->outputs * sizeof(float)); //if (!l->delta_pinned) l->delta = (float*)realloc(l->delta, l->batch*l->outputs * sizeof(float)); @@ -187,17 +187,17 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind } -void delta_gaussian_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) +void delta_gaussian_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat) { int n; if (delta[index]){ - delta[index + stride*class] = 1 - output[index + stride*class]; - if(avg_cat) *avg_cat += output[index + stride*class]; + delta[index + stride*class_id] = 1 - output[index + stride*class_id]; + if(avg_cat) *avg_cat += output[index + stride*class_id]; return; } for(n = 0; n < classes; ++n){ - delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; - if(n == class && avg_cat) *avg_cat += output[index + stride*n]; + delta[index + stride*n] = ((n == class_id)?1 : 0) - output[index + stride*n]; + if(n == class_id && avg_cat) *avg_cat += output[index + stride*n]; } } @@ -219,9 +219,11 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) // x : mu, sigma int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + scal_add_cpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale x // y : mu, sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 2); activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); + scal_add_cpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale y // w : sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 5); activate_array(l.output + index, l.w*l.h, LOGISTIC); @@ -272,10 +274,10 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) if (best_iou > l.truth_thresh) { l.delta[obj_index] = 1 - l.output[obj_index]; - int class = state.truth[best_t*(4 + 1) + b*l.truths + 4]; - if (l.map) class = l.map[class]; + int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); - delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); + delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); } @@ -312,10 +314,10 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) avg_obj += l.output[obj_index]; l.delta[obj_index] = 1 - l.output[obj_index]; - int class = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class = l.map[class]; + int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; + if (l.map) class_id = l.map[class_id]; int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); - delta_gaussian_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); + delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat); ++count; ++class_count; @@ -433,24 +435,27 @@ int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, floa for(n = 0; n < l.n; ++n){ int obj_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 8); float objectness = predictions[obj_index]; - if(objectness <= thresh) continue; - int box_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 0); - dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); - dets[count].objectness = objectness; - dets[count].classes = l.classes; - - dets[count].uc[0] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 1)]; // tx uncertainty - dets[count].uc[1] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 3)]; // ty uncertainty - dets[count].uc[2] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 5)]; // tw uncertainty - dets[count].uc[3] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 7)]; // th uncertainty - - for(j = 0; j < l.classes; ++j){ - int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); - float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3]) / 4.0; - float prob = objectness*predictions[class_index]*(1.0-uc_aver); - dets[count].prob[j] = (prob > thresh) ? prob : 0; + if (objectness <= thresh) continue; // incorrect behavior for Nan values + + if (objectness > thresh) { + int box_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 0); + dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); + dets[count].objectness = objectness; + dets[count].classes = l.classes; + + dets[count].uc[0] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 1)]; // tx uncertainty + dets[count].uc[1] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 3)]; // ty uncertainty + dets[count].uc[2] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 5)]; // tw uncertainty + dets[count].uc[3] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 7)]; // th uncertainty + + for (j = 0; j < l.classes; ++j) { + int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); + float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3]) / 4.0; + float prob = objectness*predictions[class_index] * (1.0 - uc_aver); + dets[count].prob[j] = (prob > thresh) ? prob : 0; + } + ++count; } - ++count; } } correct_gaussian_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); @@ -470,9 +475,11 @@ void forward_gaussian_yolo_layer_gpu(const layer l, network_state state) // x : mu, sigma int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + scal_add_ongpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale x // y : mu, sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 2); activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); + scal_add_ongpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale y // w : sigma index = entry_gaussian_index(l, b, n*l.w*l.h, 5); activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index d4a69e6f..a9d3b560 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1196,7 +1196,10 @@ image image_data_augmentation(mat_cv* mat, int w, int h, if (blur) { cv::Mat dst(sized.size(), sized.type()); - if(blur == 1) cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); + if (blur == 1) { + //cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); + cv::bilateralFilter(sized, dst, 31, 75, 75); + } else { int ksize = (blur / 2) * 2 + 1; cv::Size kernel_size = cv::Size(ksize, ksize); diff --git a/src/parser.c b/src/parser.c index 1367d35b..e0962f02 100644 --- a/src/parser.c +++ b/src/parser.c @@ -433,6 +433,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); assert(l.outputs == params.inputs); + l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); l.max_boxes = option_find_int_quiet(options, "max", 90); l.jitter = option_find_float(options, "jitter", .2);