Fixed CRNN (RNN based on Convolution) layer

pull/2352/head
AlexeyAB 6 years ago
parent bd91d0a908
commit c7309c1fdb
  1. 3
      src/connected_layer.c
  2. 13
      src/convolutional_layer.c
  3. 24
      src/crnn_layer.c
  4. 2
      src/crnn_layer.h
  5. 3
      src/lstm_layer.c
  6. 13
      src/parser.c
  7. 5
      src/rnn_layer.c

@ -67,6 +67,8 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
l.size = 1;
l.stride = 1;
l.pad = 0;
l.activation = activation;
l.learning_rate_scale = 1;
l.output = calloc(total_batch*outputs, sizeof(float));
l.delta = calloc(total_batch*outputs, sizeof(float));
@ -145,7 +147,6 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
l.workspace_size = get_connected_workspace_size(l);
#endif // CUDNN
#endif // GPU
l.activation = activation;
fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs);
return l;
}

@ -333,6 +333,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
l.size = size;
l.pad = padding;
l.batch_normalize = batch_normalize;
l.learning_rate_scale = 1;
l.weights = calloc(c*n*size*size, sizeof(float));
l.weight_updates = calloc(c*n*size*size, sizeof(float));
@ -350,6 +351,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
l.out_c = n;
l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = l.w * l.h * l.c;
l.activation = activation;
l.output = calloc(total_batch*l.outputs, sizeof(float));
l.delta = calloc(total_batch*l.outputs, sizeof(float));
@ -417,17 +419,17 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
}
l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
#ifdef CUDNN_HALF
l.weights_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weights, c*n*size*size / 2);
l.weight_updates_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weight_updates, c*n*size*size / 2);
#endif
l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
l.biases_gpu = cuda_make_array(l.biases, n);
l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n);
l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n);
if(binary){
l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
@ -439,6 +441,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
}
if(batch_normalize){
l.scales_gpu = cuda_make_array(l.scales, n);
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
l.mean_gpu = cuda_make_array(l.mean, n);
l.variance_gpu = cuda_make_array(l.variance, n);
@ -448,9 +453,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
l.mean_delta_gpu = cuda_make_array(l.mean, n);
l.variance_delta_gpu = cuda_make_array(l.variance, n);
l.scales_gpu = cuda_make_array(l.scales, n);
l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
}
@ -463,7 +465,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
l.workspace_size = get_workspace_size(l);
size_t workspace_size16 = get_workspace_size16(l);
if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16;
l.activation = activation;
//fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;

@ -26,7 +26,7 @@ static void increment_layer(layer *l, int steps)
#endif
}
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize)
{
fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
batch = batch / steps;
@ -47,20 +47,20 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
l.input_layer = malloc(sizeof(layer));
fprintf(stderr, "\t\t");
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0);
fprintf(stderr, "");
*(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
l.input_layer->batch = batch;
if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;
l.self_layer = malloc(sizeof(layer));
fprintf(stderr, "\t\t");
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0);
fprintf(stderr, "");
*(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
l.self_layer->batch = batch;
if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;
l.output_layer = malloc(sizeof(layer));
fprintf(stderr, "\t\t");
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0);
fprintf(stderr, "");
*(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
l.output_layer->batch = batch;
if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
@ -75,8 +75,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
l.forward_gpu = forward_crnn_layer_gpu;
l.backward_gpu = backward_crnn_layer_gpu;
l.update_gpu = update_crnn_layer_gpu;
l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
l.state_gpu = cuda_make_array(l.state, batch*l.hidden*(steps + 1));
l.output_gpu = l.output_layer->output_gpu;
l.delta_gpu = l.output_layer->delta_gpu;
#endif
@ -263,8 +262,8 @@ void backward_crnn_layer_gpu(layer l, network_state state)
increment_layer(&output_layer, l.steps - 1);
l.state_gpu += l.hidden*l.batch*l.steps;
for (i = l.steps-1; i >= 0; --i) {
copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
//copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN
//axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN
s.input = l.state_gpu;
s.delta = self_layer.delta_gpu;
@ -272,12 +271,13 @@ void backward_crnn_layer_gpu(layer l, network_state state)
l.state_gpu -= l.hidden*l.batch;
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
s.input = l.state_gpu;
s.delta = self_layer.delta_gpu - l.hidden*l.batch;
if (i == 0) s.delta = 0;
backward_convolutional_layer_gpu(self_layer, s);
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
s.input = state.input + i*l.inputs*l.batch;
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;

@ -6,7 +6,7 @@
#include "layer.h"
#include "network.h"
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);
void forward_crnn_layer(layer l, network_state state);
void backward_crnn_layer(layer l, network_state state);

@ -35,6 +35,9 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
l.type = LSTM;
l.steps = steps;
l.inputs = inputs;
l.out_w = 1;
l.out_h = 1;
l.out_c = outputs;
l.uf = malloc(sizeof(layer));
fprintf(stderr, "\t\t");

@ -182,13 +182,17 @@ convolutional_layer parse_convolutional(list *options, size_params params)
layer parse_crnn(list *options, size_params params)
{
int output_filters = option_find_int(options, "output_filters",1);
int hidden_filters = option_find_int(options, "hidden_filters",1);
int size = option_find_int_quiet(options, "size", 3);
int stride = option_find_int_quiet(options, "stride", 1);
int pad = option_find_int_quiet(options, "pad", 1);
int output_filters = option_find_int(options, "output",1);
int hidden_filters = option_find_int(options, "hidden",1);
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize);
layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, size, stride, pad, activation, batch_normalize);
l.shortcut = option_find_int_quiet(options, "shortcut", 0);
@ -866,7 +870,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
}
if (workspace_size) {
printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000);
net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
}
else {
@ -1167,6 +1171,7 @@ void load_convolutional_weights(layer l, FILE *fp)
}
int num = l.n*l.c*l.size*l.size;
fread(l.biases, sizeof(float), l.n, fp);
//fread(l.weights, sizeof(float), num, fp); // as in connected layer
if (l.batch_normalize && (!l.dontloadscales)){
fread(l.scales, sizeof(float), l.n, fp);
fread(l.rolling_mean, sizeof(float), l.n, fp);

@ -36,6 +36,9 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
l.steps = steps;
l.hidden = hidden;
l.inputs = inputs;
l.out_w = 1;
l.out_h = 1;
l.out_c = outputs;
l.state = calloc(batch*hidden*(steps+1), sizeof(float));
@ -264,7 +267,7 @@ void backward_rnn_layer_gpu(layer l, network_state state)
l.state_gpu -= l.hidden*l.batch;
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); // the same delta for Input and Self layers
s.input = l.state_gpu;
s.delta = self_layer.delta_gpu - l.hidden*l.batch;

Loading…
Cancel
Save