From 991961200694fca38c95400736d513aa396593a8 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 26 Mar 2020 01:25:04 +0300 Subject: [PATCH] Added attention during training: [net] adversarial_lr=0.05 attention=1 --- include/darknet.h | 3 ++- src/detector.c | 3 +++ src/network_kernels.cu | 49 ++++++++++++++++++++++++++++++++++++++++-- src/parser.c | 2 +- 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 181af1c0..52d7ce5f 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -696,7 +696,8 @@ typedef struct network { int mixup; float label_smooth_eps; int resize_step; - int adversarial; + int attention; + int adversarial; float adversarial_lr; int letter_box; float angle; diff --git a/src/detector.c b/src/detector.c index d7e5e578..b2f8705c 100644 --- a/src/detector.c +++ b/src/detector.c @@ -1744,6 +1744,9 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, float avg_loss = get_network_cost(net); draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, it_num, 0, 0, "mAP%", dont_show, 0, 0); + float inv_loss = 1.0 / max_val_cmp(0.01, avg_loss); + //net.learning_rate = *lr_set * inv_loss; + if (*boxonly) { int dw = truth_cpu[2] * sized.w, dh = truth_cpu[3] * sized.h; int dx = truth_cpu[0] * sized.w - dw / 2, dy = truth_cpu[1] * sized.h - dh / 2; diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 15219011..d42948bb 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -227,6 +227,49 @@ void backward_network_gpu(network net, network_state state) */ } + if (net.adversarial && net.attention) + { + int img_size = net.w * net.h * net.c; + float *original_input_cpu = (float *)xcalloc(img_size, sizeof(float)); + float *original_delta_cpu = (float *)xcalloc(img_size, sizeof(float)); + cuda_pull_array(original_input, original_input_cpu, img_size); + cuda_pull_array(original_delta, original_delta_cpu, img_size); + + image attention_img; + attention_img.w = net.w; + attention_img.h = net.h; + attention_img.c = net.c; + attention_img.data = original_delta_cpu; + int k; + float min_val = 999999, mean_val = 0, max_val = -999999; + for (k = 0; k < img_size; ++k) { + if (original_delta_cpu[k] < min_val) min_val = original_delta_cpu[k]; + if (original_delta_cpu[k] > max_val) max_val = original_delta_cpu[k]; + mean_val += original_delta_cpu[k]; + } + mean_val = mean_val / img_size; + float range = max_val - min_val; + + for (k = 0; k < img_size; ++k) { + float val = original_delta_cpu[k]; + val = fabs(mean_val - val) / range; + original_delta_cpu[k] = val * 4; + } + + image resized = resize_image(attention_img, net.w/4, net.w/4); + attention_img = resize_image(resized, net.w, net.w); + + for (k = 0; k < img_size; ++k) attention_img.data[k] += original_input_cpu[k]; + + //normalize_image(attention_img); + show_image(attention_img, "delta"); + + free_image(resized); + free_image(attention_img); + + free(original_input_cpu); + free(original_delta_cpu); + } if (net.adversarial) { int x_size = get_network_input_size(net)*net.batch; printf(" x_size = %d, original_delta = %p, original_input = %p, net.learning_rate = %d \n", @@ -333,12 +376,14 @@ void forward_backward_network_gpu(network net, float *x, float *y) float train_network_datum_gpu(network net, float *x, float *y) { *net.seen += net.batch; - if (net.adversarial_lr && rand_int(0, 1) == 1 && get_current_iteration(net) > net.burn_in) { + if (net.adversarial_lr && rand_int(0, 1) == 1) {// && get_current_iteration(net) > net.burn_in) { net.adversarial = 1; float lr_old = net.learning_rate; net.learning_rate = net.adversarial_lr; layer l = net.layers[net.n - 1]; - float *truth_cpu = (float *)xcalloc(l.truths * l.batch, sizeof(float)); + int y_size = get_network_output_size(net)*net.batch; + if (net.layers[net.n - 1].truths) y_size = net.layers[net.n - 1].truths*net.batch; + float *truth_cpu = (float *)xcalloc(y_size, sizeof(float)); printf("\n adversarial training, adversarial_lr = %f \n", net.adversarial_lr); diff --git a/src/parser.c b/src/parser.c index b997e56f..4afb28ea 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1086,7 +1086,7 @@ void parse_net_options(list *options, network *net) net->letter_box = option_find_int_quiet(options, "letter_box", 0); net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); net->resize_step = option_find_float_quiet(options, "resize_step", 32); - //net->adversarial = option_find_int_quiet(options, "adversarial", 0); + net->attention = option_find_int_quiet(options, "attention", 0); net->adversarial_lr = option_find_float_quiet(options, "adversarial_lr", 0); net->angle = option_find_float_quiet(options, "angle", 0);