From 4b59770886e4a3619eb90f4679623555d8b29281 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 23 Jan 2020 21:49:00 +0300 Subject: [PATCH] Added max_delta=10 or 1 to the [yolo] and [Gaussian_yolo] layers, to limit delta and reduce the probability of Nan. --- include/darknet.h | 1 + src/gaussian_yolo_layer.c | 41 +++++++++++++++++++++++++++++++++++---- src/parser.c | 2 ++ src/yolo_layer.c | 34 ++++++++++++++++++++++++-------- 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index c83e7690..322b8535 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -369,6 +369,7 @@ struct layer { float *weight_updates; float scale_x_y; + float max_delta; float uc_normalizer; float iou_normalizer; float cls_normalizer; diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index 477b0b84..2d2121d1 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -153,8 +153,21 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int return b; } +static inline float fix_nan_inf(float val) +{ + if (isnan(val) || isinf(val)) val = 0; + return val; +} + +static inline float clip_value(float val, const float max_val) +{ + if (val > max_val) val = max_val; + else if (val < -max_val) val = -max_val; + return val; +} + float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, - float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point) + float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point, float max_delta) { box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, yolo_point); @@ -297,6 +310,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind delta_uw *= uc_normalizer; delta_uh *= uc_normalizer; + delta_x = fix_nan_inf(delta_x); + delta_y = fix_nan_inf(delta_y); + delta_w = fix_nan_inf(delta_w); + delta_h = fix_nan_inf(delta_h); + + delta_ux = fix_nan_inf(delta_ux); + delta_uy = fix_nan_inf(delta_uy); + delta_uw = fix_nan_inf(delta_uw); + delta_uh = fix_nan_inf(delta_uh); + + delta_x = clip_value(delta_x, max_delta); + delta_y = clip_value(delta_y, max_delta); + delta_w = clip_value(delta_w, max_delta); + delta_h = clip_value(delta_h, max_delta); + + delta_ux = clip_value(delta_ux, max_delta); + delta_uy = clip_value(delta_uy, max_delta); + delta_uw = clip_value(delta_uw, max_delta); + delta_uh = clip_value(delta_uh, max_delta); + delta[index + 0 * stride] += delta_x; delta[index + 2 * stride] += delta_y; delta[index + 4 * stride] += delta_w; @@ -457,7 +490,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); + delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta); } } } @@ -502,7 +535,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; @@ -535,7 +568,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state) int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); + float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); avg_obj += l.output[obj_index]; diff --git a/src/parser.c b/src/parser.c index bb992985..216941ab 100644 --- a/src/parser.c +++ b/src/parser.c @@ -411,6 +411,7 @@ layer parse_yolo(list *options, size_params params) l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); + l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou"); @@ -506,6 +507,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); + l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0); l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1.0); diff --git a/src/yolo_layer.c b/src/yolo_layer.c index fb90c477..8f661a96 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -129,7 +129,20 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw return b; } -ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate) +static inline float fix_nan_inf(float val) +{ + if (isnan(val) || isinf(val)) val = 0; + return val; +} + +static inline float clip_value(float val, const float max_val) +{ + if (val > max_val) val = max_val; + else if (val < -max_val) val = -max_val; + return val; +} + +ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, int max_delta) { ious all_ious = { 0 }; // i - step in layer width @@ -184,10 +197,15 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, dw *= iou_normalizer; dh *= iou_normalizer; - if (isnan(dx) || isinf(dx)) dx = 0; - if (isnan(dy) || isinf(dy)) dy = 0; - if (isnan(dw) || isinf(dw)) dw = 0; - if (isnan(dh) || isinf(dh)) dh = 0; + dx = fix_nan_inf(dx); + dy = fix_nan_inf(dy); + dw = fix_nan_inf(dw); + dh = fix_nan_inf(dh); + + dx = clip_value(dx, max_delta); + dy = clip_value(dy, max_delta); + dw = clip_value(dw, max_delta); + dh = clip_value(dh, max_delta); if (!accumulate) { delta[index + 0 * stride] = 0; @@ -370,7 +388,7 @@ void forward_yolo_layer(const layer l, network_state state) delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss, l.label_smooth_eps, l.classes_multipliers); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); + delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta); } } } @@ -412,7 +430,7 @@ void forward_yolo_layer(const layer l, network_state state) int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); + ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta); // range is 0 <= 1 tot_iou += all_ious.iou; @@ -456,7 +474,7 @@ void forward_yolo_layer(const layer l, network_state state) int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); + ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta); // range is 0 <= 1 tot_iou += all_ious.iou;