Added new [convolutional] activation=normalize_channels_softmax_maxval for ASFF

pull/4783/head
AlexeyAB 5 years ago
parent 4b59770886
commit 298805cefe
  1. 2
      include/darknet.h
  2. 21
      src/activation_kernels.cu
  3. 20
      src/activations.c
  4. 4
      src/activations.h
  5. 4
      src/blas.c
  6. 5
      src/blas_kernels.cu
  7. 8
      src/convolutional_kernels.cu
  8. 8
      src/convolutional_layer.c

@ -102,7 +102,7 @@ typedef struct tree {
// activations.h
typedef enum {
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH, NORM_CHAN, NORM_CHAN_SOFTMAX, NORM_CHAN_SOFTMAX_MAXVAL
}ACTIVATION;
// parser.h

@ -2,6 +2,7 @@
#include <cuda_runtime.h>
#include <curand.h>
#include <cublas_v2.h>
#include <float.h>
#include "activations.h"
#include "dark_cuda.h"
@ -464,7 +465,7 @@ extern "C" void activate_array_normalize_channels_ongpu(float *x, int n, int bat
__global__ void activate_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu)
__global__ void activate_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu, int use_max_val)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
@ -474,20 +475,30 @@ __global__ void activate_array_normalize_channels_softmax_kernel(float *x, int s
const float eps = 0.0001;
if (i < size) {
float sum = eps;
float max_val = -FLT_MAX;
int k;
if (use_max_val) {
for (k = 0; k < channels; ++k) {
float val = x[wh_i + k * wh_step + b*wh_step*channels];
sum += expf(val);
if (val > max_val) max_val = val;
}
}
else
max_val = 0;
for (k = 0; k < channels; ++k) {
float val = x[wh_i + k * wh_step + b*wh_step*channels];
sum += expf(val - max_val);
}
for (k = 0; k < channels; ++k) {
float val = x[wh_i + k * wh_step + b*wh_step*channels];
val = expf(val) / sum;
val = expf(val - max_val) / sum;
output_gpu[wh_i + k * wh_step + b*wh_step*channels] = val;
}
}
}
extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu)
extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val)
{
// n = w*h*c*batch
// size = w*h*batch
@ -495,7 +506,7 @@ extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n,
const int num_blocks = get_number_of_blocks(size, BLOCK);
activate_array_normalize_channels_softmax_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (x, size, batch, channels, wh_step, output_gpu);
activate_array_normalize_channels_softmax_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> > (x, size, batch, channels, wh_step, output_gpu, use_max_val);
CHECK_CUDA(cudaPeekAtLastError());
}

@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
char *get_activation_string(ACTIVATION a)
{
@ -49,6 +50,7 @@ ACTIVATION get_activation(char *s)
if (strcmp(s, "mish") == 0) return MISH;
if (strcmp(s, "normalize_channels") == 0) return NORM_CHAN;
if (strcmp(s, "normalize_channels_softmax") == 0) return NORM_CHAN_SOFTMAX;
if (strcmp(s, "normalize_channels_softmax_maxval") == 0) return NORM_CHAN_SOFTMAX_MAXVAL;
if (strcmp(s, "loggy")==0) return LOGGY;
if (strcmp(s, "relu")==0) return RELU;
if (strcmp(s, "elu")==0) return ELU;
@ -177,7 +179,7 @@ void activate_array_normalize_channels(float *x, const int n, int batch, int cha
}
}
void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output)
void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val)
{
int size = n / channels;
@ -190,14 +192,24 @@ void activate_array_normalize_channels_softmax(float *x, const int n, int batch,
const float eps = 0.0001;
if (i < size) {
float sum = eps;
float max_val = -FLT_MAX;
int k;
if (use_max_val) {
for (k = 0; k < channels; ++k) {
float val = x[wh_i + k * wh_step + b*wh_step*channels];
sum += expf(val);
if (val > max_val) max_val = val;
}
}
else
max_val = 0;
for (k = 0; k < channels; ++k) {
float val = x[wh_i + k * wh_step + b*wh_step*channels];
sum += expf(val - max_val);
}
for (k = 0; k < channels; ++k) {
float val = x[wh_i + k * wh_step + b*wh_step*channels];
val = expf(val) / sum;
val = expf(val - max_val) / sum;
output[wh_i + k * wh_step + b*wh_step*channels] = val;
}
}
@ -277,6 +289,8 @@ float gradient(float x, ACTIVATION a)
return relu_gradient(x);
case NORM_CHAN:
//return relu_gradient(x);
case NORM_CHAN_SOFTMAX_MAXVAL:
//...
case NORM_CHAN_SOFTMAX:
printf(" Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient \n");
exit(0);

@ -24,7 +24,7 @@ void activate_array_swish(float *x, const int n, float * output_sigmoid, float *
void activate_array_mish(float *x, const int n, float * activation_input, float * output);
void activate_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *output);
void gradient_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *delta);
void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output);
void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val);
void gradient_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *delta);
#ifdef GPU
void activate_array_ongpu(float *x, int n, ACTIVATION a);
@ -35,7 +35,7 @@ void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delt
void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta);
void activate_array_normalize_channels_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu);
void gradient_array_normalize_channels_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu);
void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu);
void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val);
void gradient_array_normalize_channels_softmax_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu);
#endif

@ -90,7 +90,7 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o
src_id /= src_outputs;
int src_b = src_id;
float sum = 1, max_val = -INFINITY;
float sum = 1, max_val = -FLT_MAX;
int i;
if (weights && weights_normalizion) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) {
@ -158,7 +158,7 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int
src_id /= src_outputs;
int src_b = src_id;
float grad = 1, sum = 1, max_val = -INFINITY;
float grad = 1, sum = 1, max_val = -FLT_MAX;;
int i;
if (weights && weights_normalizion) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) {

@ -2,6 +2,7 @@
#include <curand.h>
#include <cublas_v2.h>
#include <assert.h>
#include <float.h>
#include "blas.h"
#include "dark_cuda.h"
@ -716,7 +717,7 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
src_id /= src_outputs;
int src_b = src_id;
float sum = 1, max_val = -INFINITY;
float sum = 1, max_val = -FLT_MAX;
if (weights_gpu && weights_normalizion) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) {
for (int i = 0; i < (n + 1); ++i) {
@ -798,7 +799,7 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
src_id /= src_outputs;
int src_b = src_id;
float grad = 1, sum = 1, max_val = -INFINITY;
float grad = 1, sum = 1, max_val = -FLT_MAX;
int i;
if (weights_gpu && weights_normalizion) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) {

@ -395,7 +395,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0);
else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1);
else if (l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
//if(l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
//if (l.binary || l.xnor) swap_binary(&l);
@ -602,7 +603,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state)
if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0);
else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1);
else if (l.activation != LINEAR) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
//if(l.dot > 0) dot_error_gpu(l);
if(l.binary || l.xnor) swap_binary(&l);
@ -647,7 +649,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state
if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu);
else if (l.activation == MISH) gradient_array_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu);
else if (l.activation == NORM_CHAN_SOFTMAX) gradient_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
else if (l.activation == NORM_CHAN) gradient_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);

@ -1212,7 +1212,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output);
else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output);
else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0);
else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1);
else activate_array_cpu_custom(l.output, m*n*l.batch, l.activation);
return;
@ -1256,7 +1257,8 @@ void forward_convolutional_layer(convolutional_layer l, network_state state)
if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output);
else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output);
else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output);
else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0);
else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1);
else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation);
if(l.binary || l.xnor) swap_binary(&l);
@ -1395,7 +1397,7 @@ void backward_convolutional_layer(convolutional_layer l, network_state state)
if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.delta);
else if (l.activation == MISH) gradient_array_mish(l.outputs*l.batch, l.activation_input, l.delta);
else if (l.activation == NORM_CHAN_SOFTMAX) gradient_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
else if (l.activation == NORM_CHAN) gradient_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta);
else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

Loading…
Cancel
Save