diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 44f62ed3..d498effd 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -457,7 +457,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state { gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); + if (!l.batch_normalize) + backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); //#ifndef CUDNN_HALF //if(l.batch_normalize){ @@ -703,6 +704,45 @@ void push_convolutional_layer(convolutional_layer layer) } } +void update_convolutional_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay) +{ + float learning_rate = learning_rate_init*l.learning_rate_scale; + //float momentum = a.momentum; + //float decay = a.decay; + //int batch = a.batch; + int size = l.size*l.size*l.c*l.n; // old + + if (l.adam) { + //adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); + adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, size, batch, l.t); + + adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch, l.t); + if (l.scales_gpu) { + adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch, l.t); + } + } + else { + //axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + //axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + //scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); + axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); + axpy_ongpu(size, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); + scal_ongpu(size, momentum, l.weight_updates_gpu, 1); + + axpy_ongpu(l.n, learning_rate / batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); + scal_ongpu(l.n, momentum, l.bias_updates_gpu, 1); + + if (l.scales_gpu) { + axpy_ongpu(l.n, learning_rate / batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); + scal_ongpu(l.n, momentum, l.scale_updates_gpu, 1); + } + } + //if (l.clip) { + // constrain_gpu(l.nweights, l.clip, l.weights_gpu, 1); + //} +} + +/* void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay) { int size = layer.size*layer.size*layer.c*layer.n; @@ -753,5 +793,5 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float //----------------------------------- } } - +*/ diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index b05b3d00..4794364e 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -390,6 +390,10 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.adam = 1; l.m = calloc(c*n*size*size, sizeof(float)); l.v = calloc(c*n*size*size, sizeof(float)); + l.bias_m = calloc(n, sizeof(float)); + l.scale_m = calloc(n, sizeof(float)); + l.bias_v = calloc(n, sizeof(float)); + l.scale_v = calloc(n, sizeof(float)); } #ifdef GPU @@ -401,6 +405,10 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int if (adam) { l.m_gpu = cuda_make_array(l.m, c*n*size*size); l.v_gpu = cuda_make_array(l.v, c*n*size*size); + l.bias_m_gpu = cuda_make_array(l.bias_m, n); + l.bias_v_gpu = cuda_make_array(l.bias_v, n); + l.scale_m_gpu = cuda_make_array(l.scale_m, n); + l.scale_v_gpu = cuda_make_array(l.scale_v, n); } l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); diff --git a/src/layer.h b/src/layer.h index a4ebbfbc..cc78de0d 100644 --- a/src/layer.h +++ b/src/layer.h @@ -100,6 +100,7 @@ struct layer{ float exposure; float shift; float ratio; + float learning_rate_scale; int focal_loss; int noloss; int softmax; @@ -122,11 +123,14 @@ struct layer{ float B1; float B2; float eps; - float *m_gpu; - float *v_gpu; + int t; float *m; float *v; + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; tree *softmax_tree; int *map; @@ -245,7 +249,7 @@ struct layer{ size_t workspace_size; - #ifdef GPU +#ifdef GPU float *z_gpu; float *r_gpu; float *h_gpu; @@ -263,6 +267,14 @@ struct layer{ float * concat_gpu; float * concat_delta_gpu; + // adam + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + float *binary_input_gpu; float *binary_weights_gpu; @@ -310,8 +322,8 @@ struct layer{ cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16; cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16; cudnnPoolingDescriptor_t poolingDesc; - #endif - #endif + #endif // CUDNN +#endif // GPU }; void free_layer(layer); diff --git a/src/parser.c b/src/parser.c index b01c5458..f3efc27c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -805,6 +805,7 @@ network parse_network_cfg_custom(char *filename, int batch) l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); l.dontload = option_find_int_quiet(options, "dontload", 0); l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); + l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); option_unused(options); net.layers[count] = l; if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;