diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd index 3d2eeb7d..1248d1b7 100644 --- a/build/darknet/x64/partial.cmd +++ b/build/darknet/x64/partial.cmd @@ -6,6 +6,9 @@ rem Download Yolo9000: http://pjreddie.com/media/files/yolo9000.weights rem darknet.exe partial cfg/tiny-yolo-voc.cfg tiny-yolo-voc.weights tiny-yolo-voc.conv.13 13 +darknet.exe partial cfg/csdarknet53-omega.cfg csdarknet53-omega_final.weights csdarknet53-omega.conv.105 105 + + darknet.exe partial cfg/cd53paspp-omega.cfg cd53paspp-omega_final.weights cd53paspp-omega.conv.137 137 diff --git a/include/darknet.h b/include/darknet.h index 0fc9734d..92ad96a3 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -324,6 +324,7 @@ struct layer { int onlyforward; int stopbackward; + int train_only_bn; int dont_update; int burnin_update; int dontload; diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index 896189be..c63e0aaf 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -744,7 +744,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state assert((l.nweights) > 0); cuda_convert_f32_to_f16(l.weight_updates_gpu, l.nweights, l.weight_updates_gpu16); - if (!state.net.adversarial) { + if (!state.net.adversarial && !l.train_only_bn) { CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(), &one, l.srcTensorDesc16, @@ -796,7 +796,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state backward_batchnorm_layer_gpu(l, state); } - if (!state.net.adversarial) { + if (!state.net.adversarial && !l.train_only_bn) { // calculate conv weight updates // if used: beta=1 then loss decreases faster CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(), @@ -857,17 +857,19 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state float *im = state.input + (i*l.groups + j)*l.c / l.groups*l.h*l.w; - //im2col_ongpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, state.workspace); - im2col_gpu_ext(im, // input - l.c / l.groups, // input channels - l.h, l.w, // input size (h, w) - l.size, l.size, // kernel size (h, w) - l.pad * l.dilation, l.pad * l.dilation, // padding (h, w) - l.stride_y, l.stride_x, // stride (h, w) - l.dilation, l.dilation, // dilation (h, w) - state.workspace); // output - //gemm_ongpu(0, 1, m, n, k, 1, a + i*m*k, k, b, k, 1, c, n); - gemm_ongpu(0, 1, m, n, k, 1, a, k, b, k, 1, c, n); + if (!state.net.adversarial && !l.train_only_bn) { + //im2col_ongpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, state.workspace); + im2col_gpu_ext(im, // input + l.c / l.groups, // input channels + l.h, l.w, // input size (h, w) + l.size, l.size, // kernel size (h, w) + l.pad * l.dilation, l.pad * l.dilation, // padding (h, w) + l.stride_y, l.stride_x, // stride (h, w) + l.dilation, l.dilation, // dilation (h, w) + state.workspace); // output + //gemm_ongpu(0, 1, m, n, k, 1, a + i*m*k, k, b, k, 1, c, n); + gemm_ongpu(0, 1, m, n, k, 1, a, k, b, k, 1, c, n); + } if (state.delta) { if (l.binary || l.xnor) swap_binary(&l); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 68c2a208..7d12f077 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -273,6 +273,8 @@ void update_network_gpu(network net) l.t = get_current_batch(net); if (iteration_num > (net.max_batches * 1 / 2)) l.deform = 0; if (l.burnin_update && (l.burnin_update*net.burn_in > iteration_num)) continue; + if (l.train_only_bn) continue; + if(l.update_gpu && l.dont_update < iteration_num){ l.update_gpu(l, update_batch, rate, net.momentum, net.decay, net.loss_scale); } diff --git a/src/parser.c b/src/parser.c index 28dc9f3d..13378363 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1190,6 +1190,39 @@ int is_network(section *s) || strcmp(s->type, "[network]")==0); } +void set_train_only_bn(network net) +{ + int train_only_bn = 0; + int i; + for (i = net.n - 1; i >= 0; --i) { + if (net.layers[i].train_only_bn) train_only_bn = net.layers[i].train_only_bn; // set l.train_only_bn for all previous layers + if (train_only_bn) { + net.layers[i].train_only_bn = train_only_bn; + + if (net.layers[i].type == CONV_LSTM) { + net.layers[i].wf->train_only_bn = train_only_bn; + net.layers[i].wi->train_only_bn = train_only_bn; + net.layers[i].wg->train_only_bn = train_only_bn; + net.layers[i].wo->train_only_bn = train_only_bn; + net.layers[i].uf->train_only_bn = train_only_bn; + net.layers[i].ui->train_only_bn = train_only_bn; + net.layers[i].ug->train_only_bn = train_only_bn; + net.layers[i].uo->train_only_bn = train_only_bn; + if (net.layers[i].peephole) { + net.layers[i].vf->train_only_bn = train_only_bn; + net.layers[i].vi->train_only_bn = train_only_bn; + net.layers[i].vo->train_only_bn = train_only_bn; + } + } + else if (net.layers[i].type == CRNN) { + net.layers[i].input_layer->train_only_bn = train_only_bn; + net.layers[i].self_layer->train_only_bn = train_only_bn; + net.layers[i].output_layer->train_only_bn = train_only_bn; + } + } + } +} + network parse_network_cfg(char *filename) { return parse_network_cfg_custom(filename, 0, 0); @@ -1450,6 +1483,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) l.dont_update = option_find_int_quiet(options, "dont_update", 0); l.burnin_update = option_find_int_quiet(options, "burnin_update", 0); l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); + l.train_only_bn = option_find_int_quiet(options, "train_only_bn", 0); l.dontload = option_find_int_quiet(options, "dontload", 0); l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); @@ -1522,6 +1556,8 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) } #endif + set_train_only_bn(net); // set l.train_only_bn for all required layers + net.outputs = get_network_output_size(net); net.output = get_network_output(net); avg_outputs = avg_outputs / avg_counter;