diff --git a/src/connected_layer.c b/src/connected_layer.c index 2637f946..0f4a61a1 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -67,6 +67,8 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu l.size = 1; l.stride = 1; l.pad = 0; + l.activation = activation; + l.learning_rate_scale = 1; l.output = calloc(total_batch*outputs, sizeof(float)); l.delta = calloc(total_batch*outputs, sizeof(float)); @@ -145,7 +147,6 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu l.workspace_size = get_connected_workspace_size(l); #endif // CUDNN #endif // GPU - l.activation = activation; fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); return l; } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 39a72d3b..f4f5bc67 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -333,6 +333,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.size = size; l.pad = padding; l.batch_normalize = batch_normalize; + l.learning_rate_scale = 1; l.weights = calloc(c*n*size*size, sizeof(float)); l.weight_updates = calloc(c*n*size*size, sizeof(float)); @@ -350,6 +351,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.out_c = n; l.outputs = l.out_h * l.out_w * l.out_c; l.inputs = l.w * l.h * l.c; + l.activation = activation; l.output = calloc(total_batch*l.outputs, sizeof(float)); l.delta = calloc(total_batch*l.outputs, sizeof(float)); @@ -417,17 +419,17 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); + l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); #ifdef CUDNN_HALF l.weights_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weights, c*n*size*size / 2); l.weight_updates_gpu16 = cuda_make_array(NULL, c*n*size*size / 2); //cuda_make_array(l.weight_updates, c*n*size*size / 2); #endif - l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); l.biases_gpu = cuda_make_array(l.biases, n); l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); - l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n); l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); + l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n); if(binary){ l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size); @@ -439,6 +441,9 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, } if(batch_normalize){ + l.scales_gpu = cuda_make_array(l.scales, n); + l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); + l.mean_gpu = cuda_make_array(l.mean, n); l.variance_gpu = cuda_make_array(l.variance, n); @@ -448,9 +453,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.mean_delta_gpu = cuda_make_array(l.mean, n); l.variance_delta_gpu = cuda_make_array(l.variance, n); - l.scales_gpu = cuda_make_array(l.scales, n); - l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); - l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); } @@ -463,7 +465,6 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, l.workspace_size = get_workspace_size(l); size_t workspace_size16 = get_workspace_size16(l); if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16; - l.activation = activation; //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; diff --git a/src/crnn_layer.c b/src/crnn_layer.c index d76c5e22..56ead3af 100644 --- a/src/crnn_layer.c +++ b/src/crnn_layer.c @@ -26,7 +26,7 @@ static void increment_layer(layer *l, int steps) #endif } -layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize) +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize) { fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); batch = batch / steps; @@ -47,20 +47,20 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.state = calloc(l.hidden*batch*(steps+1), sizeof(float)); l.input_layer = malloc(sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0); + fprintf(stderr, ""); + *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0); l.input_layer->batch = batch; if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; l.self_layer = malloc(sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0); + fprintf(stderr, ""); + *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0); l.self_layer->batch = batch; if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; l.output_layer = malloc(sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0, 0, 0); + fprintf(stderr, ""); + *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, size, stride, pad, activation, batch_normalize, 0, 0, 0, 0, 0); l.output_layer->batch = batch; if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; @@ -75,8 +75,7 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou l.forward_gpu = forward_crnn_layer_gpu; l.backward_gpu = backward_crnn_layer_gpu; l.update_gpu = update_crnn_layer_gpu; - - l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1)); + l.state_gpu = cuda_make_array(l.state, batch*l.hidden*(steps + 1)); l.output_gpu = l.output_layer->output_gpu; l.delta_gpu = l.output_layer->delta_gpu; #endif @@ -263,8 +262,8 @@ void backward_crnn_layer_gpu(layer l, network_state state) increment_layer(&output_layer, l.steps - 1); l.state_gpu += l.hidden*l.batch*l.steps; for (i = l.steps-1; i >= 0; --i) { - copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); - axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); + //copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN + //axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN s.input = l.state_gpu; s.delta = self_layer.delta_gpu; @@ -272,12 +271,13 @@ void backward_crnn_layer_gpu(layer l, network_state state) l.state_gpu -= l.hidden*l.batch; + copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + s.input = l.state_gpu; s.delta = self_layer.delta_gpu - l.hidden*l.batch; if (i == 0) s.delta = 0; backward_convolutional_layer_gpu(self_layer, s); - copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); s.input = state.input + i*l.inputs*l.batch; if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; diff --git a/src/crnn_layer.h b/src/crnn_layer.h index 0da942ee..867d3ed6 100644 --- a/src/crnn_layer.h +++ b/src/crnn_layer.h @@ -6,7 +6,7 @@ #include "layer.h" #include "network.h" -layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); +layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, int size, int stride, int pad, ACTIVATION activation, int batch_normalize); void forward_crnn_layer(layer l, network_state state); void backward_crnn_layer(layer l, network_state state); diff --git a/src/lstm_layer.c b/src/lstm_layer.c index cf3411e6..657d500c 100644 --- a/src/lstm_layer.c +++ b/src/lstm_layer.c @@ -35,6 +35,9 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n l.type = LSTM; l.steps = steps; l.inputs = inputs; + l.out_w = 1; + l.out_h = 1; + l.out_c = outputs; l.uf = malloc(sizeof(layer)); fprintf(stderr, "\t\t"); diff --git a/src/parser.c b/src/parser.c index 9ca9a942..b2101703 100644 --- a/src/parser.c +++ b/src/parser.c @@ -182,13 +182,17 @@ convolutional_layer parse_convolutional(list *options, size_params params) layer parse_crnn(list *options, size_params params) { - int output_filters = option_find_int(options, "output_filters",1); - int hidden_filters = option_find_int(options, "hidden_filters",1); + int size = option_find_int_quiet(options, "size", 3); + int stride = option_find_int_quiet(options, "stride", 1); + int pad = option_find_int_quiet(options, "pad", 1); + + int output_filters = option_find_int(options, "output",1); + int hidden_filters = option_find_int(options, "hidden",1); char *activation_s = option_find_str(options, "activation", "logistic"); ACTIVATION activation = get_activation(activation_s); int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize); + layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, size, stride, pad, activation, batch_normalize); l.shortcut = option_find_int_quiet(options, "shortcut", 0); @@ -866,7 +870,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half) } if (workspace_size) { - printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000); + printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000); net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1); } else { @@ -1167,6 +1171,7 @@ void load_convolutional_weights(layer l, FILE *fp) } int num = l.n*l.c*l.size*l.size; fread(l.biases, sizeof(float), l.n, fp); + //fread(l.weights, sizeof(float), num, fp); // as in connected layer if (l.batch_normalize && (!l.dontloadscales)){ fread(l.scales, sizeof(float), l.n, fp); fread(l.rolling_mean, sizeof(float), l.n, fp); diff --git a/src/rnn_layer.c b/src/rnn_layer.c index c1843f06..c5669adb 100644 --- a/src/rnn_layer.c +++ b/src/rnn_layer.c @@ -36,6 +36,9 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, l.steps = steps; l.hidden = hidden; l.inputs = inputs; + l.out_w = 1; + l.out_h = 1; + l.out_c = outputs; l.state = calloc(batch*hidden*(steps+1), sizeof(float)); @@ -264,7 +267,7 @@ void backward_rnn_layer_gpu(layer l, network_state state) l.state_gpu -= l.hidden*l.batch; - copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); + copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); // the same delta for Input and Self layers s.input = l.state_gpu; s.delta = self_layer.delta_gpu - l.hidden*l.batch;