From 298805cefebd0e71274bc12c068384703c9684b4 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Tue, 28 Jan 2020 03:44:24 +0300
Subject: [PATCH] Added new [convolutional]
 activation=normalize_channels_softmax_maxval for ASFF

---
 include/darknet.h            |  2 +-
 src/activation_kernels.cu    | 21 ++++++++++++++++-----
 src/activations.c            | 20 +++++++++++++++++---
 src/activations.h            |  4 ++--
 src/blas.c                   |  4 ++--
 src/blas_kernels.cu          |  5 +++--
 src/convolutional_kernels.cu |  8 +++++---
 src/convolutional_layer.c    |  8 +++++---
 8 files changed, 51 insertions(+), 21 deletions(-)

diff --git a/include/darknet.h b/include/darknet.h
index 322b8535..0b65a79a 100644
--- a/include/darknet.h
+++ b/include/darknet.h
@@ -102,7 +102,7 @@ typedef struct tree {
 
 // activations.h
 typedef enum {
-    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX
+    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX, NORM_CHAN_SOFTMAX_MAXVAL
 }ACTIVATION;
 
 // parser.h
diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu
index 46e957dd..08af147a 100644
--- a/src/activation_kernels.cu
+++ b/src/activation_kernels.cu
@@ -2,6 +2,7 @@
 #include <cuda_runtime.h>
 #include <curand.h>
 #include <cublas_v2.h>
+#include <float.h>
 
 #include "activations.h"
 #include "dark_cuda.h"
@@ -464,7 +465,7 @@ extern "C" void activate_array_normalize_channels_ongpu(float *x, int n, int bat
 
 
 
-__global__ void activate_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu)
+__global__ void activate_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu, int use_max_val)
 {
     int i = blockIdx.x * blockDim.x + threadIdx.x;
 
@@ -474,20 +475,30 @@ __global__ void activate_array_normalize_channels_softmax_kernel(float *x, int s
     const float eps = 0.0001;
     if (i < size) {
         float sum = eps;
+        float max_val = -FLT_MAX;
         int k;
+        if (use_max_val) {
+            for (k = 0; k < channels; ++k) {
+                float val = x[wh_i + k * wh_step + b*wh_step*channels];
+                if (val > max_val) max_val = val;
+            }
+        }
+        else
+            max_val = 0;
+
         for (k = 0; k < channels; ++k) {
             float val = x[wh_i + k * wh_step + b*wh_step*channels];
-            sum += expf(val);
+            sum += expf(val - max_val);
         }
         for (k = 0; k < channels; ++k) {
             float val = x[wh_i + k * wh_step + b*wh_step*channels];
-            val = expf(val) / sum;
+            val = expf(val - max_val) / sum;
             output_gpu[wh_i + k * wh_step + b*wh_step*channels] = val;
         }
     }
 }
 
-extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu)
+extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val)
 {
     // n = w*h*c*batch
     // size = w*h*batch
@@ -495,7 +506,7 @@ extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n,
 
     const int num_blocks = get_number_of_blocks(size, BLOCK);
 
-    activate_array_normalize_channels_softmax_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (x, size, batch, channels, wh_step, output_gpu);
+    activate_array_normalize_channels_softmax_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (x, size, batch, channels, wh_step, output_gpu, use_max_val);
     CHECK_CUDA(cudaPeekAtLastError());
 }
 
diff --git a/src/activations.c b/src/activations.c
index dc450b2f..ba260132 100644
--- a/src/activations.c
+++ b/src/activations.c
@@ -4,6 +4,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <float.h>
 
 char *get_activation_string(ACTIVATION a)
 {
@@ -49,6 +50,7 @@ ACTIVATION get_activation(char *s)
     if (strcmp(s, "mish") == 0) return MISH;
     if (strcmp(s, "normalize_channels") == 0) return NORM_CHAN;
     if (strcmp(s, "normalize_channels_softmax") == 0) return NORM_CHAN_SOFTMAX;
+    if (strcmp(s, "normalize_channels_softmax_maxval") == 0) return NORM_CHAN_SOFTMAX_MAXVAL;
     if (strcmp(s, "loggy")==0) return LOGGY;
     if (strcmp(s, "relu")==0) return RELU;
     if (strcmp(s, "elu")==0) return ELU;
@@ -177,7 +179,7 @@ void activate_array_normalize_channels(float *x, const int n, int batch, int cha
     }
 }
 
-void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output)
+void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val)
 {
     int size = n / channels;
 
@@ -190,14 +192,24 @@ void activate_array_normalize_channels_softmax(float *x, const int n, int batch,
         const float eps = 0.0001;
         if (i < size) {
             float sum = eps;
+            float max_val = -FLT_MAX;
             int k;
+            if (use_max_val) {
+                for (k = 0; k < channels; ++k) {
+                    float val = x[wh_i + k * wh_step + b*wh_step*channels];
+                    if (val > max_val) max_val = val;
+                }
+            }
+            else
+                max_val = 0;
+
             for (k = 0; k < channels; ++k) {
                 float val = x[wh_i + k * wh_step + b*wh_step*channels];
-                sum += expf(val);
+                sum += expf(val - max_val);
             }
             for (k = 0; k < channels; ++k) {
                 float val = x[wh_i + k * wh_step + b*wh_step*channels];
-                val = expf(val) / sum;
+                val = expf(val - max_val) / sum;
                 output[wh_i + k * wh_step + b*wh_step*channels] = val;
             }
         }
@@ -277,6 +289,8 @@ float gradient(float x, ACTIVATION a)
             return relu_gradient(x);
         case NORM_CHAN:
             //return relu_gradient(x);
+        case NORM_CHAN_SOFTMAX_MAXVAL:
+            //...
         case NORM_CHAN_SOFTMAX:
             printf(" Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient \n");
             exit(0);
diff --git a/src/activations.h b/src/activations.h
index 05631fe3..fdbc28ef 100644
--- a/src/activations.h
+++ b/src/activations.h
@@ -24,7 +24,7 @@ void activate_array_swish(float *x, const int n, float * output_sigmoid, float *
 void activate_array_mish(float *x, const int n, float * activation_input, float * output);
 void activate_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *output);
 void gradient_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *delta);
-void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output);
+void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val);
 void gradient_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *delta);
 #ifdef GPU
 void activate_array_ongpu(float *x, int n, ACTIVATION a);
@@ -35,7 +35,7 @@ void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delt
 void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta);
 void activate_array_normalize_channels_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu);
 void gradient_array_normalize_channels_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu);
-void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu);
+void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val);
 void gradient_array_normalize_channels_softmax_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu);
 
 #endif
diff --git a/src/blas.c b/src/blas.c
index 9badc55f..d6f59367 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -90,7 +90,7 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o
         src_id /= src_outputs;
         int src_b = src_id;
 
-        float sum = 1, max_val = -INFINITY;
+        float sum = 1, max_val = -FLT_MAX;
         int i;
         if (weights && weights_normalizion) {
             if (weights_normalizion == SOFTMAX_NORMALIZATION) {
@@ -158,7 +158,7 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int
         src_id /= src_outputs;
         int src_b = src_id;
 
-        float grad = 1, sum = 1, max_val = -INFINITY;
+        float grad = 1, sum = 1, max_val = -FLT_MAX;;
         int i;
         if (weights && weights_normalizion) {
             if (weights_normalizion == SOFTMAX_NORMALIZATION) {
diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index 7d4667a2..94538cd0 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -2,6 +2,7 @@
 #include <curand.h>
 #include <cublas_v2.h>
 #include <assert.h>
+#include <float.h>
 
 #include "blas.h"
 #include "dark_cuda.h"
@@ -716,7 +717,7 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
     src_id /= src_outputs;
     int src_b = src_id;
 
-    float sum = 1, max_val = -INFINITY;
+    float sum = 1, max_val = -FLT_MAX;
     if (weights_gpu && weights_normalizion) {
         if (weights_normalizion == SOFTMAX_NORMALIZATION) {
             for (int i = 0; i < (n + 1); ++i) {
@@ -798,7 +799,7 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
     src_id /= src_outputs;
     int src_b = src_id;
 
-    float grad = 1, sum = 1, max_val = -INFINITY;
+    float grad = 1, sum = 1, max_val = -FLT_MAX;
     int i;
     if (weights_gpu && weights_normalizion) {
         if (weights_normalizion == SOFTMAX_NORMALIZATION) {
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index 868bb599..02f95881 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -395,7 +395,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
             if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
             else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
             else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
-            else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
+            else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0);
+            else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1);
             else if (l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
             //if(l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
             //if (l.binary || l.xnor) swap_binary(&l);
@@ -602,7 +603,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
     if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
     else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
     else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
-    else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
+    else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0);
+    else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1);
     else if (l.activation != LINEAR) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
     //if(l.dot > 0) dot_error_gpu(l);
     if(l.binary || l.xnor) swap_binary(&l);
@@ -647,7 +649,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
 
     if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu);
     else if (l.activation == MISH) gradient_array_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu);
-    else if (l.activation == NORM_CHAN_SOFTMAX) gradient_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
+    else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
     else if (l.activation == NORM_CHAN) gradient_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
     else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
 
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index 83e9f2ab..d967b178 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -1212,7 +1212,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
                 if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output);
                 else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output);
                 else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
-                else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
+                else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0);
+                else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1);
                 else activate_array_cpu_custom(l.output, m*n*l.batch, l.activation);
                 return;
 
@@ -1256,7 +1257,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
     if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output);
     else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output);
     else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
-    else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
+    else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0);
+    else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1);
     else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation);
 
     if(l.binary || l.xnor) swap_binary(&l);
@@ -1395,7 +1397,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
 
     if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.delta);
     else if (l.activation == MISH) gradient_array_mish(l.outputs*l.batch, l.activation_input, l.delta);
-    else if (l.activation == NORM_CHAN_SOFTMAX) gradient_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
+    else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
     else if (l.activation == NORM_CHAN) gradient_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
     else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);