From c1f184249e996d7248e40cffee138a11cdd89ae0 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Thu, 8 Aug 2019 00:29:06 +0300 Subject: [PATCH] Added assisted_excitation_forward for training on CPU --- include/darknet.h | 3 + src/convolutional_layer.c | 118 +++++++++++++++++++++++++++++++++++++- src/convolutional_layer.h | 1 + src/detector.c | 2 + src/parser.c | 1 + 5 files changed, 124 insertions(+), 1 deletion(-) diff --git a/include/darknet.h b/include/darknet.h index 72466ab1..5d87a832 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -278,6 +278,7 @@ struct layer { float focus; int classfix; int absolute; + int assisted_excitation; int onlyforward; int stopbackward; @@ -580,6 +581,8 @@ typedef struct network { int time_steps; int step; int max_batches; + int num_boxes; + int train_images_num; float *seq_scales; float *scales; int *steps; diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index b8972f45..20fc45c6 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -1076,6 +1076,121 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation); if(l.binary || l.xnor) swap_binary(&l); + + //visualize_convolutional_layer(l, "conv_visual", NULL); + //wait_until_press_key_cv(); + + if(l.assisted_excitation && state.train) assisted_excitation_forward(l, state); +} + +static box float_to_box_stride(float *f, int stride) +{ + box b = { 0 }; + b.x = f[0]; + b.y = f[1 * stride]; + b.w = f[2 * stride]; + b.h = f[3 * stride]; + return b; +} + +void assisted_excitation_forward(convolutional_layer l, network_state state) +{ + const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); + + // epoch + const float epoch = (float)(*state.net.seen) / state.net.train_images_num; + + // calculate alpha + //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); + //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); + const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; + + //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", + // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); + + float *a_avg = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); + float *g = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); + + int b; + int w, h, c; + + l.max_boxes = state.net.num_boxes; + l.truths = l.max_boxes*(4 + 1); + + for (b = 0; b < l.batch; ++b) + { + // calculate G + int t; + for (t = 0; t < state.net.num_boxes; ++t) { + box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); + if (!truth.x) break; // continue; + + int left = floor((truth.x - truth.w / 2) * l.out_w); + int right = ceil((truth.x + truth.w / 2) * l.out_w); + int top = floor((truth.y - truth.h / 2) * l.out_h); + int bottom = ceil((truth.y + truth.h / 2) * l.out_h); + + for (w = left; w <= right; w++) { + for (h = top; h < bottom; h++) { + g[w + l.out_w * h + l.out_w*l.out_h*b] = 1; + } + } + } + } + + for (b = 0; b < l.batch; ++b) + { + // calculate average A + for (w = 0; w < l.out_w; w++) { + for (h = 0; h < l.out_h; h++) { + for (c = 0; c < l.out_c; c++) { + a_avg[w + l.out_w*(h + l.out_h*b)] += l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))]; + } + a_avg[w + l.out_w*(h + l.out_h*b)] /= l.out_c; // a_avg / d + } + } + } + + // change activation + for (b = 0; b < l.batch; ++b) + { + for (w = 0; w < l.out_w; w++) { + for (h = 0; h < l.out_h; h++) { + for (c = 0; c < l.out_c; c++) + { + // a = a + alpha(t) + e(c,i,j) = a + alpha(t) + g(i,j) * avg_a(i,j) / channels + l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] += + alpha * + g[w + l.out_w*(h + l.out_h*b)] * + a_avg[w + l.out_w*(h + l.out_h*b)]; + + //l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] = + // alpha * g[w + l.out_w*(h + l.out_h*b)] * a_avg[w + l.out_w*(h + l.out_h*b)]; + } + } + } + } + + if(0) // visualize ground truth + { + for (b = 0; b < l.batch; ++b) + { + image img = float_to_image(l.out_w, l.out_h, 1, &g[l.out_w*l.out_h*b]); + char buff[100]; + sprintf(buff, "a_excitation_%d", b); + show_image_cv(img, buff); + + image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); + char buff2[100]; + sprintf(buff2, "a_excitation_act_%d", b); + show_image_cv(img2, buff2); + wait_key_cv(5); + } + wait_until_press_key_cv(); + } + + free(g); + free(a_avg); } @@ -1221,4 +1336,5 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image //save_image(dc, buff); free_image(dc); return single_weights; -} \ No newline at end of file +} + diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index e937b4c5..e62b155c 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -56,6 +56,7 @@ int convolutional_out_height(convolutional_layer layer); int convolutional_out_width(convolutional_layer layer); void rescale_weights(convolutional_layer l, float scale, float trans); void rgbgr_weights(convolutional_layer l); +void assisted_excitation_forward(convolutional_layer l, network_state state); #ifdef __cplusplus } diff --git a/src/detector.c b/src/detector.c index 9f77ea5a..689ace6d 100644 --- a/src/detector.c +++ b/src/detector.c @@ -123,6 +123,8 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i args.flip = net.flip; args.jitter = jitter; args.num_boxes = l.max_boxes; + net.num_boxes = args.num_boxes; + net.train_images_num = train_images_num; args.d = &buffer; args.type = DETECTION_DATA; args.threads = 64; // 16 or 64 diff --git a/src/parser.c b/src/parser.c index 1b79efe8..09e79d2d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -185,6 +185,7 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, share_layer); layer.flipped = option_find_int_quiet(options, "flipped", 0); layer.dot = option_find_float_quiet(options, "dot", 0); + layer.assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); if(params.net.adam){ layer.B1 = params.net.B1;