From 298805cefebd0e71274bc12c068384703c9684b4 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Tue, 28 Jan 2020 03:44:24 +0300 Subject: [PATCH] Added new [convolutional] activation=normalize_channels_softmax_maxval for ASFF --- include/darknet.h | 2 +- src/activation_kernels.cu | 21 ++++++++++++++++----- src/activations.c | 20 +++++++++++++++++--- src/activations.h | 4 ++-- src/blas.c | 4 ++-- src/blas_kernels.cu | 5 +++-- src/convolutional_kernels.cu | 8 +++++--- src/convolutional_layer.c | 8 +++++--- 8 files changed, 51 insertions(+), 21 deletions(-) diff --git a/include/darknet.h b/include/darknet.h index 322b8535..0b65a79a 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -102,7 +102,7 @@ typedef struct tree { // activations.h typedef enum { - LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX, NORM_CHAN_SOFTMAX_MAXVAL }ACTIVATION; // parser.h diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 46e957dd..08af147a 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -2,6 +2,7 @@ #include #include #include +#include #include "activations.h" #include "dark_cuda.h" @@ -464,7 +465,7 @@ extern "C" void activate_array_normalize_channels_ongpu(float *x, int n, int bat -__global__ void activate_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu) +__global__ void activate_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu, int use_max_val) { int i = blockIdx.x * blockDim.x + threadIdx.x; @@ -474,20 +475,30 @@ __global__ void activate_array_normalize_channels_softmax_kernel(float *x, int s const float eps = 0.0001; if (i < size) { float sum = eps; + float max_val = -FLT_MAX; int k; + if (use_max_val) { + for (k = 0; k < channels; ++k) { + float val = x[wh_i + k * wh_step + b*wh_step*channels]; + if (val > max_val) max_val = val; + } + } + else + max_val = 0; + for (k = 0; k < channels; ++k) { float val = x[wh_i + k * wh_step + b*wh_step*channels]; - sum += expf(val); + sum += expf(val - max_val); } for (k = 0; k < channels; ++k) { float val = x[wh_i + k * wh_step + b*wh_step*channels]; - val = expf(val) / sum; + val = expf(val - max_val) / sum; output_gpu[wh_i + k * wh_step + b*wh_step*channels] = val; } } } -extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu) +extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val) { // n = w*h*c*batch // size = w*h*batch @@ -495,7 +506,7 @@ extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, const int num_blocks = get_number_of_blocks(size, BLOCK); - activate_array_normalize_channels_softmax_kernel << > > (x, size, batch, channels, wh_step, output_gpu); + activate_array_normalize_channels_softmax_kernel << > > (x, size, batch, channels, wh_step, output_gpu, use_max_val); CHECK_CUDA(cudaPeekAtLastError()); } diff --git a/src/activations.c b/src/activations.c index dc450b2f..ba260132 100644 --- a/src/activations.c +++ b/src/activations.c @@ -4,6 +4,7 @@ #include #include #include +#include char *get_activation_string(ACTIVATION a) { @@ -49,6 +50,7 @@ ACTIVATION get_activation(char *s) if (strcmp(s, "mish") == 0) return MISH; if (strcmp(s, "normalize_channels") == 0) return NORM_CHAN; if (strcmp(s, "normalize_channels_softmax") == 0) return NORM_CHAN_SOFTMAX; + if (strcmp(s, "normalize_channels_softmax_maxval") == 0) return NORM_CHAN_SOFTMAX_MAXVAL; if (strcmp(s, "loggy")==0) return LOGGY; if (strcmp(s, "relu")==0) return RELU; if (strcmp(s, "elu")==0) return ELU; @@ -177,7 +179,7 @@ void activate_array_normalize_channels(float *x, const int n, int batch, int cha } } -void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output) +void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val) { int size = n / channels; @@ -190,14 +192,24 @@ void activate_array_normalize_channels_softmax(float *x, const int n, int batch, const float eps = 0.0001; if (i < size) { float sum = eps; + float max_val = -FLT_MAX; int k; + if (use_max_val) { + for (k = 0; k < channels; ++k) { + float val = x[wh_i + k * wh_step + b*wh_step*channels]; + if (val > max_val) max_val = val; + } + } + else + max_val = 0; + for (k = 0; k < channels; ++k) { float val = x[wh_i + k * wh_step + b*wh_step*channels]; - sum += expf(val); + sum += expf(val - max_val); } for (k = 0; k < channels; ++k) { float val = x[wh_i + k * wh_step + b*wh_step*channels]; - val = expf(val) / sum; + val = expf(val - max_val) / sum; output[wh_i + k * wh_step + b*wh_step*channels] = val; } } @@ -277,6 +289,8 @@ float gradient(float x, ACTIVATION a) return relu_gradient(x); case NORM_CHAN: //return relu_gradient(x); + case NORM_CHAN_SOFTMAX_MAXVAL: + //... case NORM_CHAN_SOFTMAX: printf(" Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient \n"); exit(0); diff --git a/src/activations.h b/src/activations.h index 05631fe3..fdbc28ef 100644 --- a/src/activations.h +++ b/src/activations.h @@ -24,7 +24,7 @@ void activate_array_swish(float *x, const int n, float * output_sigmoid, float * void activate_array_mish(float *x, const int n, float * activation_input, float * output); void activate_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *output); void gradient_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *delta); -void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output); +void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val); void gradient_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *delta); #ifdef GPU void activate_array_ongpu(float *x, int n, ACTIVATION a); @@ -35,7 +35,7 @@ void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delt void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta); void activate_array_normalize_channels_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu); void gradient_array_normalize_channels_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu); -void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu); +void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val); void gradient_array_normalize_channels_softmax_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu); #endif diff --git a/src/blas.c b/src/blas.c index 9badc55f..d6f59367 100644 --- a/src/blas.c +++ b/src/blas.c @@ -90,7 +90,7 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o src_id /= src_outputs; int src_b = src_id; - float sum = 1, max_val = -INFINITY; + float sum = 1, max_val = -FLT_MAX; int i; if (weights && weights_normalizion) { if (weights_normalizion == SOFTMAX_NORMALIZATION) { @@ -158,7 +158,7 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int src_id /= src_outputs; int src_b = src_id; - float grad = 1, sum = 1, max_val = -INFINITY; + float grad = 1, sum = 1, max_val = -FLT_MAX;; int i; if (weights && weights_normalizion) { if (weights_normalizion == SOFTMAX_NORMALIZATION) { diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 7d4667a2..94538cd0 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -2,6 +2,7 @@ #include #include #include +#include #include "blas.h" #include "dark_cuda.h" @@ -716,7 +717,7 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch, src_id /= src_outputs; int src_b = src_id; - float sum = 1, max_val = -INFINITY; + float sum = 1, max_val = -FLT_MAX; if (weights_gpu && weights_normalizion) { if (weights_normalizion == SOFTMAX_NORMALIZATION) { for (int i = 0; i < (n + 1); ++i) { @@ -798,7 +799,7 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i src_id /= src_outputs; int src_b = src_id; - float grad = 1, sum = 1, max_val = -INFINITY; + float grad = 1, sum = 1, max_val = -FLT_MAX; int i; if (weights_gpu && weights_normalizion) { if (weights_normalizion == SOFTMAX_NORMALIZATION) { diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 868bb599..02f95881 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -395,7 +395,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu); + else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0); + else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1); else if (l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if(l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if (l.binary || l.xnor) swap_binary(&l); @@ -602,7 +603,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu); + else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0); + else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1); else if (l.activation != LINEAR) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); //if(l.dot > 0) dot_error_gpu(l); if(l.binary || l.xnor) swap_binary(&l); @@ -647,7 +649,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); else if (l.activation == MISH) gradient_array_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); - else if (l.activation == NORM_CHAN_SOFTMAX) gradient_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); + else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); else if (l.activation == NORM_CHAN) gradient_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 83e9f2ab..d967b178 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -1212,7 +1212,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output); + else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0); + else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1); else activate_array_cpu_custom(l.output, m*n*l.batch, l.activation); return; @@ -1256,7 +1257,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output); + else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0); + else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1); else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation); if(l.binary || l.xnor) swap_binary(&l); @@ -1395,7 +1397,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state) if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.delta); else if (l.activation == MISH) gradient_array_mish(l.outputs*l.batch, l.activation_input, l.delta); - else if (l.activation == NORM_CHAN_SOFTMAX) gradient_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta); + else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta); else if (l.activation == NORM_CHAN) gradient_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta); else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);