From 4b59770886e4a3619eb90f4679623555d8b29281 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Thu, 23 Jan 2020 21:49:00 +0300
Subject: [PATCH] Added max_delta=10 or 1 to the [yolo] and [Gaussian_yolo]
 layers, to limit delta and reduce the probability of Nan.

---
 include/darknet.h         |  1 +
 src/gaussian_yolo_layer.c | 41 +++++++++++++++++++++++++++++++++++----
 src/parser.c              |  2 ++
 src/yolo_layer.c          | 34 ++++++++++++++++++++++++--------
 4 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/include/darknet.h b/include/darknet.h
index c83e7690..322b8535 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -369,6 +369,7 @@ struct layer {
     float *weight_updates;
 
     float scale_x_y;
+    float max_delta;
     float uc_normalizer;
     float iou_normalizer;
     float cls_normalizer;
diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c
index 477b0b84..2d2121d1 100644
--- a/src/gaussian_yolo_layer.c
+++ b/src/gaussian_yolo_layer.c
@@ -153,8 +153,21 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int
     return b;
 }
 
+static inline float fix_nan_inf(float val)
+{
+    if (isnan(val) || isinf(val)) val = 0;
+    return val;
+}
+
+static inline float clip_value(float val, const float max_val)
+{
+    if (val > max_val) val = max_val;
+    else if (val < -max_val) val = -max_val;
+    return val;
+}
+
 float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta,
-    float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point)
+    float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point, float max_delta)
 {
     box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, yolo_point);
 
@@ -297,6 +310,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind
     delta_uw *= uc_normalizer;
     delta_uh *= uc_normalizer;
 
+    delta_x = fix_nan_inf(delta_x);
+    delta_y = fix_nan_inf(delta_y);
+    delta_w = fix_nan_inf(delta_w);
+    delta_h = fix_nan_inf(delta_h);
+
+    delta_ux = fix_nan_inf(delta_ux);
+    delta_uy = fix_nan_inf(delta_uy);
+    delta_uw = fix_nan_inf(delta_uw);
+    delta_uh = fix_nan_inf(delta_uh);
+
+    delta_x = clip_value(delta_x, max_delta);
+    delta_y = clip_value(delta_y, max_delta);
+    delta_w = clip_value(delta_w, max_delta);
+    delta_h = clip_value(delta_h, max_delta);
+
+    delta_ux = clip_value(delta_ux, max_delta);
+    delta_uy = clip_value(delta_uy, max_delta);
+    delta_uw = clip_value(delta_uw, max_delta);
+    delta_uh = clip_value(delta_uh, max_delta);
+
     delta[index + 0 * stride] += delta_x;
     delta[index + 2 * stride] += delta_y;
     delta[index + 4 * stride] += delta_w;
@@ -457,7 +490,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                         delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers);
                         box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
                         const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point);
+                        delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
                     }
                 }
             }
@@ -502,7 +535,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
 
                 int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                 const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point);
+                float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
 
                 int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8);
                 avg_obj += l.output[obj_index];
@@ -535,7 +568,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
 
                         int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                         const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point);
+                        float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
 
                         int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8);
                         avg_obj += l.output[obj_index];
diff --git a/src/parser.c b/src/parser.c
index bb992985..216941ab 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -411,6 +411,7 @@ layer parse_yolo(list *options, size_params params)
 
     l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
     l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
+    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
     l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
     l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
     char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse");   //  "iou");
@@ -506,6 +507,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
 
     l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
     l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
+    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
     l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0);
     l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
     l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1.0);
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index fb90c477..8f661a96 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -129,7 +129,20 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw
     return b;
 }
 
-ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate)
+static inline float fix_nan_inf(float val)
+{
+    if (isnan(val) || isinf(val)) val = 0;
+    return val;
+}
+
+static inline float clip_value(float val, const float max_val)
+{
+    if (val > max_val) val = max_val;
+    else if (val < -max_val) val = -max_val;
+    return val;
+}
+
+ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, int max_delta)
 {
     ious all_ious = { 0 };
     // i - step in layer width
@@ -184,10 +197,15 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i,
         dw *= iou_normalizer;
         dh *= iou_normalizer;
 
-        if (isnan(dx) || isinf(dx)) dx = 0;
-        if (isnan(dy) || isinf(dy)) dy = 0;
-        if (isnan(dw) || isinf(dw)) dw = 0;
-        if (isnan(dh) || isinf(dh)) dh = 0;
+        dx = fix_nan_inf(dx);
+        dy = fix_nan_inf(dy);
+        dw = fix_nan_inf(dw);
+        dh = fix_nan_inf(dh);
+
+        dx = clip_value(dx, max_delta);
+        dy = clip_value(dy, max_delta);
+        dw = clip_value(dw, max_delta);
+        dh = clip_value(dh, max_delta);
 
         if (!accumulate) {
             delta[index + 0 * stride] = 0;
@@ -370,7 +388,7 @@ void forward_yolo_layer(const layer l, network_state state)
                         delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss, l.label_smooth_eps, l.classes_multipliers);
                         box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
                         const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1);
+                        delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
                     }
                 }
             }
@@ -412,7 +430,7 @@ void forward_yolo_layer(const layer l, network_state state)
 
                 int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                 const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1);
+                ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
 
                 // range is 0 <= 1
                 tot_iou += all_ious.iou;
@@ -456,7 +474,7 @@ void forward_yolo_layer(const layer l, network_state state)
 
                         int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                         const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1);
+                        ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
 
                         // range is 0 <= 1
                         tot_iou += all_ious.iou;