Fixed CRNN (RNN based on Convolution) layer

6 years ago · c7309c1fdb
parent bd91d0a908
commit c7309c1fdb
7 changed files with 38 additions and 25 deletions
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@ -67,6 +67,8 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
    l.size = 1;
    l.stride = 1;
    l.pad = 0;
+    l.activation = activation;
+    l.learning_rate_scale = 1;

    l.output = calloc(total_batch*outputs, sizeof(float));
    l.delta = calloc(total_batch*outputs, sizeof(float));
@ -145,7 +147,6 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
    l.workspace_size = get_connected_workspace_size(l);
 #endif  // CUDNN
 #endif  // GPU
-    l.activation = activation;
    fprintf(stderr, "connected                            %4d  ->  %4d\n", inputs, outputs);
    return l;
 }
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -333,6 +333,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
    l.size = size;
    l.pad = padding;
    l.batch_normalize = batch_normalize;
+    l.learning_rate_scale = 1;

    l.weights = calloc(c*n*size*size, sizeof(float));
    l.weight_updates = calloc(c*n*size*size, sizeof(float));
@ -350,6 +351,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
    l.out_c = n;
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = l.w * l.h * l.c;
+    l.activation = activation;

    l.output = calloc(total_batch*l.outputs, sizeof(float));
    l.delta  = calloc(total_batch*l.outputs, sizeof(float));
@ -417,17 +419,17 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
        }

        l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
+        l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
 #ifdef CUDNN_HALF
        l.weights_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weights, c*n*size*size / 2);
        l.weight_updates_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weight_updates, c*n*size*size / 2);
 #endif
-        l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);

        l.biases_gpu = cuda_make_array(l.biases, n);
        l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);

-        l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n);
        l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
+        l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n);

        if(binary){
            l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
@ -439,6 +441,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
        }

        if(batch_normalize){
+            l.scales_gpu = cuda_make_array(l.scales, n);
+            l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
+
            l.mean_gpu = cuda_make_array(l.mean, n);
            l.variance_gpu = cuda_make_array(l.variance, n);

@ -448,9 +453,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
            l.mean_delta_gpu = cuda_make_array(l.mean, n);
            l.variance_delta_gpu = cuda_make_array(l.variance, n);

-            l.scales_gpu = cuda_make_array(l.scales, n);
-            l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
-
            l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
            l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n);
        }
@ -463,7 +465,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
    l.workspace_size = get_workspace_size(l);
    size_t workspace_size16 = get_workspace_size16(l);
    if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16;
-    l.activation = activation;

    //fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
    l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
--- a/src/crnn_layer.c
+++ b/src/crnn_layer.c
@ -26,7 +26,7 @@ static void increment_layer(layer *l, int steps)
 #endif
 }

-layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
+layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize)
 {
    fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
    batch = batch / steps;
@ -47,20 +47,20 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
    l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));

    l.input_layer = malloc(sizeof(layer));
-    fprintf(stderr, "\t\t");
-    *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, 3, 1, 1,  activation, batch_normalize, 0, 0, 0, 0, 0);
+    fprintf(stderr, "");
+    *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
    l.input_layer->batch = batch;
    if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;

    l.self_layer = malloc(sizeof(layer));
-    fprintf(stderr, "\t\t");
-    *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, 3, 1, 1,  activation, batch_normalize, 0, 0, 0, 0, 0);
+    fprintf(stderr, "");
+    *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
    l.self_layer->batch = batch;
    if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;

    l.output_layer = malloc(sizeof(layer));
-    fprintf(stderr, "\t\t");
-    *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, 3, 1, 1,  activation, batch_normalize, 0, 0, 0, 0, 0);
+    fprintf(stderr, "");
+    *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0);
    l.output_layer->batch = batch;
    if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;

@ -75,8 +75,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
    l.forward_gpu = forward_crnn_layer_gpu;
    l.backward_gpu = backward_crnn_layer_gpu;
    l.update_gpu = update_crnn_layer_gpu;
-
-    l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
+    l.state_gpu = cuda_make_array(l.state, batch*l.hidden*(steps + 1));
    l.output_gpu = l.output_layer->output_gpu;
    l.delta_gpu = l.output_layer->delta_gpu;
 #endif
@ -263,8 +262,8 @@ void backward_crnn_layer_gpu(layer l, network_state state)
    increment_layer(&output_layer, l.steps - 1);
    l.state_gpu += l.hidden*l.batch*l.steps;
    for (i = l.steps-1; i >= 0; --i) {
-        copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
-        axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
+        //copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);   // commented in RNN
+        //axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN

        s.input = l.state_gpu;
        s.delta = self_layer.delta_gpu;
@ -272,12 +271,13 @@ void backward_crnn_layer_gpu(layer l, network_state state)

        l.state_gpu -= l.hidden*l.batch;

+        copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
+
        s.input = l.state_gpu;
        s.delta = self_layer.delta_gpu - l.hidden*l.batch;
        if (i == 0) s.delta = 0;
        backward_convolutional_layer_gpu(self_layer, s);

-        copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
        if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
        s.input = state.input + i*l.inputs*l.batch;
        if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
--- a/src/crnn_layer.h
+++ b/src/crnn_layer.h
@ -6,7 +6,7 @@
 #include "layer.h"
 #include "network.h"

-layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);
+layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize);

 void forward_crnn_layer(layer l, network_state state);
 void backward_crnn_layer(layer l, network_state state);
--- a/src/lstm_layer.c
+++ b/src/lstm_layer.c
@ -35,6 +35,9 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
    l.type = LSTM;
    l.steps = steps;
    l.inputs = inputs;
+    l.out_w = 1;
+    l.out_h = 1;
+    l.out_c = outputs;

    l.uf = malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
--- a/src/parser.c
+++ b/src/parser.c
@ -182,13 +182,17 @@ convolutional_layer parse_convolutional(list *options, size_params params)

 layer parse_crnn(list *options, size_params params)
 {
-    int output_filters = option_find_int(options, "output_filters",1);
-    int hidden_filters = option_find_int(options, "hidden_filters",1);
+    int size = option_find_int_quiet(options, "size", 3);
+    int stride = option_find_int_quiet(options, "stride", 1);
+    int pad = option_find_int_quiet(options, "pad", 1);
+
+    int output_filters = option_find_int(options, "output",1);
+    int hidden_filters = option_find_int(options, "hidden",1);
    char *activation_s = option_find_str(options, "activation", "logistic");
    ACTIVATION activation = get_activation(activation_s);
    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

-    layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize);
+    layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, size, stride, pad, activation, batch_normalize);

    l.shortcut = option_find_int_quiet(options, "shortcut", 0);

@ -866,7 +870,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
            check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
        }
        if (workspace_size) {
-            printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
+            printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000);
            net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
        }
        else {
@ -1167,6 +1171,7 @@ void load_convolutional_weights(layer l, FILE *fp)
    }
    int num = l.n*l.c*l.size*l.size;
    fread(l.biases, sizeof(float), l.n, fp);
+    //fread(l.weights, sizeof(float), num, fp); // as in connected layer
    if (l.batch_normalize && (!l.dontloadscales)){
        fread(l.scales, sizeof(float), l.n, fp);
        fread(l.rolling_mean, sizeof(float), l.n, fp);
--- a/src/rnn_layer.c
+++ b/src/rnn_layer.c
@ -36,6 +36,9 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
    l.steps = steps;
    l.hidden = hidden;
    l.inputs = inputs;
+    l.out_w = 1;
+    l.out_h = 1;
+    l.out_c = outputs;

    l.state = calloc(batch*hidden*(steps+1), sizeof(float));

@ -264,7 +267,7 @@ void backward_rnn_layer_gpu(layer l, network_state state)

        l.state_gpu -= l.hidden*l.batch;

-        copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
+        copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);    // the same delta for Input and Self layers

        s.input = l.state_gpu;
        s.delta = self_layer.delta_gpu - l.hidden*l.batch;