Added max_delta=10 or 1 to the [yolo] and [Gaussian_yolo] layers, to limit delta and reduce the probability of Nan.

pull/4755/head
AlexeyAB 5 years ago
parent f0fd61b829
commit 4b59770886
  1. 1
      include/darknet.h
  2. 41
      src/gaussian_yolo_layer.c
  3. 2
      src/parser.c
  4. 34
      src/yolo_layer.c

@ -369,6 +369,7 @@ struct layer {
float *weight_updates; float *weight_updates;
float scale_x_y; float scale_x_y;
float max_delta;
float uc_normalizer; float uc_normalizer;
float iou_normalizer; float iou_normalizer;
float cls_normalizer; float cls_normalizer;

@ -153,8 +153,21 @@ box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int
return b; return b;
} }
static inline float fix_nan_inf(float val)
{
if (isnan(val) || isinf(val)) val = 0;
return val;
}
static inline float clip_value(float val, const float max_val)
{
if (val > max_val) val = max_val;
else if (val < -max_val) val = -max_val;
return val;
}
float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta,
float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point) float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point, float max_delta)
{ {
box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, yolo_point); box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, yolo_point);
@ -297,6 +310,26 @@ float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int ind
delta_uw *= uc_normalizer; delta_uw *= uc_normalizer;
delta_uh *= uc_normalizer; delta_uh *= uc_normalizer;
delta_x = fix_nan_inf(delta_x);
delta_y = fix_nan_inf(delta_y);
delta_w = fix_nan_inf(delta_w);
delta_h = fix_nan_inf(delta_h);
delta_ux = fix_nan_inf(delta_ux);
delta_uy = fix_nan_inf(delta_uy);
delta_uw = fix_nan_inf(delta_uw);
delta_uh = fix_nan_inf(delta_uh);
delta_x = clip_value(delta_x, max_delta);
delta_y = clip_value(delta_y, max_delta);
delta_w = clip_value(delta_w, max_delta);
delta_h = clip_value(delta_h, max_delta);
delta_ux = clip_value(delta_ux, max_delta);
delta_uy = clip_value(delta_uy, max_delta);
delta_uw = clip_value(delta_uw, max_delta);
delta_uh = clip_value(delta_uh, max_delta);
delta[index + 0 * stride] += delta_x; delta[index + 0 * stride] += delta_x;
delta[index + 2 * stride] += delta_y; delta[index + 2 * stride] += delta_y;
delta[index + 4 * stride] += delta_w; delta[index + 4 * stride] += delta_w;
@ -457,7 +490,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers); delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers);
box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
} }
} }
} }
@ -502,7 +535,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8);
avg_obj += l.output[obj_index]; avg_obj += l.output[obj_index];
@ -535,7 +568,7 @@ void forward_gaussian_yolo_layer(const layer l, network_state state)
int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point); float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta);
int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8);
avg_obj += l.output[obj_index]; avg_obj += l.output[obj_index];

@ -411,6 +411,7 @@ layer parse_yolo(list *options, size_params params)
l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10
l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou"); char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou");
@ -506,6 +507,7 @@ layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10
l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0); l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0);
l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1.0); l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1.0);

@ -129,7 +129,20 @@ box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw
return b; return b;
} }
ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate) static inline float fix_nan_inf(float val)
{
if (isnan(val) || isinf(val)) val = 0;
return val;
}
static inline float clip_value(float val, const float max_val)
{
if (val > max_val) val = max_val;
else if (val < -max_val) val = -max_val;
return val;
}
ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, int max_delta)
{ {
ious all_ious = { 0 }; ious all_ious = { 0 };
// i - step in layer width // i - step in layer width
@ -184,10 +197,15 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i,
dw *= iou_normalizer; dw *= iou_normalizer;
dh *= iou_normalizer; dh *= iou_normalizer;
if (isnan(dx) || isinf(dx)) dx = 0; dx = fix_nan_inf(dx);
if (isnan(dy) || isinf(dy)) dy = 0; dy = fix_nan_inf(dy);
if (isnan(dw) || isinf(dw)) dw = 0; dw = fix_nan_inf(dw);
if (isnan(dh) || isinf(dh)) dh = 0; dh = fix_nan_inf(dh);
dx = clip_value(dx, max_delta);
dy = clip_value(dy, max_delta);
dw = clip_value(dw, max_delta);
dh = clip_value(dh, max_delta);
if (!accumulate) { if (!accumulate) {
delta[index + 0 * stride] = 0; delta[index + 0 * stride] = 0;
@ -370,7 +388,7 @@ void forward_yolo_layer(const layer l, network_state state)
delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss, l.label_smooth_eps, l.classes_multipliers); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss, l.label_smooth_eps, l.classes_multipliers);
box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
} }
} }
} }
@ -412,7 +430,7 @@ void forward_yolo_layer(const layer l, network_state state)
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
// range is 0 <= 1 // range is 0 <= 1
tot_iou += all_ious.iou; tot_iou += all_ious.iou;
@ -456,7 +474,7 @@ void forward_yolo_layer(const layer l, network_state state)
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1); ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta);
// range is 0 <= 1 // range is 0 <= 1
tot_iou += all_ious.iou; tot_iou += all_ious.iou;

Loading…
Cancel
Save