Added max_delta=10 or 1 to the [yolo] and [Gaussian_yolo] layers, to limit delta and reduce the probability of Nan.

5 years ago · 4b59770886
parent f0fd61b829
commit 4b59770886
4 changed files with 66 additions and 12 deletions
--- a/include/darknet.h
+++ b/include/darknet.h
@ -369,6 +369,7 @@ struct layer {
    float *weight_updates;
    float scale_x_y;
    float max_delta;
    float uc_normalizer;
    float iou_normalizer;
    float cls_normalizer;
--- a/src/gaussian_yolo_layer.c
+++ b/src/gaussian_yolo_layer.c
@ -153,8 +153,21 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int
    return b;
 }
 static inline float fix_nan_inf(float val)
 {
    if (isnan(val) || isinf(val)) val = 0;
    return val;
 }
 static inline float clip_value(float val, const float max_val)
 {
    if (val > max_val) val = max_val;
    else if (val < -max_val) val = -max_val;
    return val;
 }
 float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta,
-    float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point)
+    float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point, float max_delta)
 {
    box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, yolo_point);
@ -297,6 +310,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind
    delta_uw *= uc_normalizer;
    delta_uh *= uc_normalizer;
    delta_x = fix_nan_inf(delta_x);
    delta_y = fix_nan_inf(delta_y);
    delta_w = fix_nan_inf(delta_w);
    delta_h = fix_nan_inf(delta_h);
    delta_ux = fix_nan_inf(delta_ux);
    delta_uy = fix_nan_inf(delta_uy);
    delta_uw = fix_nan_inf(delta_uw);
    delta_uh = fix_nan_inf(delta_uh);
    delta_x = clip_value(delta_x, max_delta);
    delta_y = clip_value(delta_y, max_delta);
    delta_w = clip_value(delta_w, max_delta);
    delta_h = clip_value(delta_h, max_delta);
    delta_ux = clip_value(delta_ux, max_delta);
    delta_uy = clip_value(delta_uy, max_delta);
    delta_uw = clip_value(delta_uw, max_delta);
    delta_uh = clip_value(delta_uh, max_delta);
    delta[index + 0 * stride] += delta_x;
    delta[index + 2 * stride] += delta_y;
    delta[index + 4 * stride] += delta_w;
@ -457,7 +490,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                        delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers);
                        box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
                        const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point);
+                        delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
                    }
                }
            }
@ -502,7 +535,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point);
+                float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
                int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8);
                avg_obj += l.output[obj_index];
@ -535,7 +568,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
                        int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                        const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point);
+                        float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
                        int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8);
                        avg_obj += l.output[obj_index];
--- a/src/parser.c
+++ b/src/parser.c
@ -411,6 +411,7 @@ layer parse_yolo(list *options, size_params params)
    l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
    l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
    l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
    l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
    char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse");   //  "iou");
@ -506,6 +507,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
    l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
    l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
    l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0);
    l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
    l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1.0);
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@ -129,7 +129,20 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw
    return b;
 }
-ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate)
+static inline float fix_nan_inf(float val)
 {
    if (isnan(val) || isinf(val)) val = 0;
    return val;
 }
 static inline float clip_value(float val, const float max_val)
 {
    if (val > max_val) val = max_val;
    else if (val < -max_val) val = -max_val;
    return val;
 }
 ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, int max_delta)
 {
    ious all_ious = { 0 };
    // i - step in layer width
@ -184,10 +197,15 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i,
        dw *= iou_normalizer;
        dh *= iou_normalizer;
-        if (isnan(dx) || isinf(dx)) dx = 0;
+        dx = fix_nan_inf(dx);
-        if (isnan(dy) || isinf(dy)) dy = 0;
+        dy = fix_nan_inf(dy);
-        if (isnan(dw) || isinf(dw)) dw = 0;
+        dw = fix_nan_inf(dw);
-        if (isnan(dh) || isinf(dh)) dh = 0;
+        dh = fix_nan_inf(dh);
        dx = clip_value(dx, max_delta);
        dy = clip_value(dy, max_delta);
        dw = clip_value(dw, max_delta);
        dh = clip_value(dh, max_delta);
        if (!accumulate) {
            delta[index + 0 * stride] = 0;
@ -370,7 +388,7 @@ void forward_yolo_layer(const layer l, network_state state)
                        delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss, l.label_smooth_eps, l.classes_multipliers);
                        box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
                        const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1);
+                        delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
                    }
                }
            }
@ -412,7 +430,7 @@ void forward_yolo_layer(const layer l, network_state state)
                int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1);
+                ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
                // range is 0 <= 1
                tot_iou += all_ious.iou;
@ -456,7 +474,7 @@ void forward_yolo_layer(const layer l, network_state state)
                        int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                        const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
-                        ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1);
+                        ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
                        // range is 0 <= 1
                        tot_iou += all_ious.iou;