Fixed bug with [shortcut] layer and SWISH / MISH activations

pull/6241/head
AlexeyAB 6 years ago
parent defcf7e3a5
commit 55cfc272fb
  1. 14
      src/parser.c
  2. 7
      src/shortcut_layer.c
  3. 2
      src/shortcut_layer.h

@ -725,6 +725,9 @@ layer parse_batchnorm(list *options, size_params params)
layer parse_shortcut(list *options, size_params params, network net) layer parse_shortcut(list *options, size_params params, network net)
{ {
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0);
char *l = option_find(options, "from"); char *l = option_find(options, "from");
int index = atoi(l); int index = atoi(l);
@ -734,11 +737,8 @@ layer parse_shortcut(list *options, size_params params, network net)
layer from = net.layers[index]; layer from = net.layers[index];
if (from.antialiasing) from = *from.input_layer; if (from.antialiasing) from = *from.input_layer;
layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation, params.train); layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, assisted_excitation, activation, params.train);
char *activation_s = option_find_str(options, "activation", "linear");
ACTIVATION activation = get_activation(activation_s);
s.activation = activation;
return s; return s;
} }
@ -758,6 +758,9 @@ layer parse_scale_channels(list *options, size_params params, network net)
char *activation_s = option_find_str_quiet(options, "activation", "linear"); char *activation_s = option_find_str_quiet(options, "activation", "linear");
ACTIVATION activation = get_activation(activation_s); ACTIVATION activation = get_activation(activation_s);
s.activation = activation; s.activation = activation;
if (activation == SWISH || activation == MISH) {
printf(" [scale_channels] layer doesn't support SWISH or MISH activations \n");
}
return s; return s;
} }
@ -775,6 +778,9 @@ layer parse_sam(list *options, size_params params, network net)
char *activation_s = option_find_str_quiet(options, "activation", "linear"); char *activation_s = option_find_str_quiet(options, "activation", "linear");
ACTIVATION activation = get_activation(activation_s); ACTIVATION activation = get_activation(activation_s);
s.activation = activation; s.activation = activation;
if (activation == SWISH || activation == MISH) {
printf(" [sam] layer doesn't support SWISH or MISH activations \n");
}
return s; return s;
} }

@ -6,7 +6,7 @@
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation, int train) layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation, ACTIVATION activation, int train)
{ {
if(assisted_excitation) fprintf(stderr, "Shortcut Layer - AE: %d\n", index); if(assisted_excitation) fprintf(stderr, "Shortcut Layer - AE: %d\n", index);
else fprintf(stderr,"Shortcut Layer: %d\n", index); else fprintf(stderr,"Shortcut Layer: %d\n", index);
@ -14,6 +14,7 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
l.train = train; l.train = train;
l.type = SHORTCUT; l.type = SHORTCUT;
l.batch = batch; l.batch = batch;
l.activation = activation;
l.w = w2; l.w = w2;
l.h = h2; l.h = h2;
l.c = c2; l.c = c2;
@ -34,8 +35,10 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
l.forward = forward_shortcut_layer; l.forward = forward_shortcut_layer;
l.backward = backward_shortcut_layer; l.backward = backward_shortcut_layer;
#ifndef GPU
if (l.activation == SWISH || l.activation == MISH) l.activation_input = (float*)calloc(l.batch*l.outputs, sizeof(float)); if (l.activation == SWISH || l.activation == MISH) l.activation_input = (float*)calloc(l.batch*l.outputs, sizeof(float));
#endif // GPU
#ifdef GPU #ifdef GPU
if (l.activation == SWISH || l.activation == MISH) l.activation_input_gpu = cuda_make_array(l.activation_input, l.batch*l.outputs); if (l.activation == SWISH || l.activation == MISH) l.activation_input_gpu = cuda_make_array(l.activation_input, l.batch*l.outputs);

@ -7,7 +7,7 @@
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation, int train); layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int assisted_excitation, ACTIVATION activation, int train);
void forward_shortcut_layer(const layer l, network_state state); void forward_shortcut_layer(const layer l, network_state state);
void backward_shortcut_layer(const layer l, network_state state); void backward_shortcut_layer(const layer l, network_state state);
void resize_shortcut_layer(layer *l, int w, int h); void resize_shortcut_layer(layer *l, int w, int h);

Loading…
Cancel
Save