From c1f184249e996d7248e40cffee138a11cdd89ae0 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Thu, 8 Aug 2019 00:29:06 +0300
Subject: [PATCH] Added assisted_excitation_forward for training on CPU

---
 include/darknet.h         |   3 +
 src/convolutional_layer.c | 118 +++++++++++++++++++++++++++++++++++++-
 src/convolutional_layer.h |   1 +
 src/detector.c            |   2 +
 src/parser.c              |   1 +
 5 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/include/darknet.h b/include/darknet.h
index 72466ab1..5d87a832 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -278,6 +278,7 @@ struct layer {
     float focus;
     int classfix;
     int absolute;
+    int assisted_excitation;
 
     int onlyforward;
     int stopbackward;
@@ -580,6 +581,8 @@ typedef struct network {
     int time_steps;
     int step;
     int max_batches;
+    int num_boxes;
+    int train_images_num;
     float *seq_scales;
     float *scales;
     int   *steps;
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index b8972f45..20fc45c6 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -1076,6 +1076,121 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
     else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation);
 
     if(l.binary || l.xnor) swap_binary(&l);
+
+    //visualize_convolutional_layer(l, "conv_visual", NULL);
+    //wait_until_press_key_cv();
+
+    if(l.assisted_excitation && state.train) assisted_excitation_forward(l, state);
+}
+
+static box float_to_box_stride(float *f, int stride)
+{
+    box b = { 0 };
+    b.x = f[0];
+    b.y = f[1 * stride];
+    b.w = f[2 * stride];
+    b.h = f[3 * stride];
+    return b;
+}
+
+void assisted_excitation_forward(convolutional_layer l, network_state state)
+{
+    const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions);
+
+    // epoch
+    const float epoch = (float)(*state.net.seen) / state.net.train_images_num;
+
+    // calculate alpha
+    //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches);
+    //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches);
+    const float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2;
+
+    //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n",
+    //    epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num);
+
+    float *a_avg = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float));
+    float *g = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float));
+
+    int b;
+    int w, h, c;
+
+    l.max_boxes = state.net.num_boxes;
+    l.truths = l.max_boxes*(4 + 1);
+
+    for (b = 0; b < l.batch; ++b)
+    {
+        // calculate G
+        int t;
+        for (t = 0; t < state.net.num_boxes; ++t) {
+            box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1);
+            if (!truth.x) break;  // continue;
+
+            int left = floor((truth.x - truth.w / 2) * l.out_w);
+            int right = ceil((truth.x + truth.w / 2) * l.out_w);
+            int top = floor((truth.y - truth.h / 2) * l.out_h);
+            int bottom = ceil((truth.y + truth.h / 2) * l.out_h);
+
+            for (w = left; w <= right; w++) {
+                for (h = top; h < bottom; h++) {
+                    g[w + l.out_w * h + l.out_w*l.out_h*b] = 1;
+                }
+            }
+        }
+    }
+
+    for (b = 0; b < l.batch; ++b)
+    {
+        // calculate average A
+        for (w = 0; w < l.out_w; w++) {
+            for (h = 0; h < l.out_h; h++) {
+                for (c = 0; c < l.out_c; c++) {
+                    a_avg[w + l.out_w*(h + l.out_h*b)] += l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))];
+                }
+                a_avg[w + l.out_w*(h + l.out_h*b)] /= l.out_c;  // a_avg / d
+            }
+        }
+    }
+
+    // change activation
+    for (b = 0; b < l.batch; ++b)
+    {
+        for (w = 0; w < l.out_w; w++) {
+            for (h = 0; h < l.out_h; h++) {
+                for (c = 0; c < l.out_c; c++)
+                {
+                    // a = a + alpha(t) + e(c,i,j) = a + alpha(t) + g(i,j) * avg_a(i,j) / channels
+                    l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] +=
+                        alpha *
+                        g[w + l.out_w*(h + l.out_h*b)] *
+                        a_avg[w + l.out_w*(h + l.out_h*b)];
+
+                    //l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] =
+                    //    alpha * g[w + l.out_w*(h + l.out_h*b)] * a_avg[w + l.out_w*(h + l.out_h*b)];
+                }
+            }
+        }
+    }
+
+    if(0)   // visualize ground truth
+    {
+        for (b = 0; b < l.batch; ++b)
+        {
+            image img = float_to_image(l.out_w, l.out_h, 1, &g[l.out_w*l.out_h*b]);
+            char buff[100];
+            sprintf(buff, "a_excitation_%d", b);
+            show_image_cv(img, buff);
+
+            image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]);
+            char buff2[100];
+            sprintf(buff2, "a_excitation_act_%d", b);
+            show_image_cv(img2, buff2);
+            wait_key_cv(5);
+        }
+        wait_until_press_key_cv();
+    }
+
+    free(g);
+    free(a_avg);
 }
 
 
@@ -1221,4 +1336,5 @@ image *visualize_convolutional_layer(convolutional_layer l, char *window, image
     //save_image(dc, buff);
     free_image(dc);
     return single_weights;
-}
\ No newline at end of file
+}
+
diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
index e937b4c5..e62b155c 100644
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@@ -56,6 +56,7 @@ int convolutional_out_height(convolutional_layer layer);
 int convolutional_out_width(convolutional_layer layer);
 void rescale_weights(convolutional_layer l, float scale, float trans);
 void rgbgr_weights(convolutional_layer l);
+void assisted_excitation_forward(convolutional_layer l, network_state state);
 
 #ifdef __cplusplus
 }
diff --git a/src/detector.c b/src/detector.c
index 9f77ea5a..689ace6d 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -123,6 +123,8 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
     args.flip = net.flip;
     args.jitter = jitter;
     args.num_boxes = l.max_boxes;
+    net.num_boxes = args.num_boxes;
+    net.train_images_num = train_images_num;
     args.d = &buffer;
     args.type = DETECTION_DATA;
     args.threads = 64;    // 16 or 64
diff --git a/src/parser.c b/src/parser.c
index 1b79efe8..09e79d2d 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -185,6 +185,7 @@ convolutional_layer parse_convolutional(list *options, size_params params, netwo
     convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, share_layer);
     layer.flipped = option_find_int_quiet(options, "flipped", 0);
     layer.dot = option_find_float_quiet(options, "dot", 0);
+    layer.assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0);
 
     if(params.net.adam){
         layer.B1 = params.net.B1;