Fixed CPU-RAM + GPU-processing bug for Dropout-layer

pull/6241/head
AlexeyAB 6 years ago
parent 213b82a1bd
commit 5d0352f961
  1. 10
      src/network_kernels.cu
  2. 7
      src/parser.c

@ -112,10 +112,12 @@ void backward_network_gpu(network net, network_state state)
if (l.onlyforward) continue;
l.backward_gpu(l, state);
layer prev = net.layers[i - 1];
if (net.optimized_memory && state.delta && !prev.keep_delta_gpu) {
simple_copy_ongpu(prev.outputs*prev.batch, state.delta, prev.delta_gpu);
fill_ongpu(prev.outputs*prev.batch, 0, net.state_delta_gpu, 1);
if (i != 0) {
layer prev = net.layers[i - 1];
if (net.optimized_memory && state.delta && !prev.keep_delta_gpu && prev.delta_gpu != l.delta_gpu) {
simple_copy_ongpu(prev.outputs*prev.batch, state.delta, prev.delta_gpu);
fill_ongpu(prev.outputs*prev.batch, 0, net.state_delta_gpu, 1);
}
}
/*

@ -1148,6 +1148,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
#ifdef GPU
l.output_gpu = net.layers[count-1].output_gpu;
l.delta_gpu = net.layers[count-1].delta_gpu;
l.keep_delta_gpu = 1;
#endif
}
else if (lt == EMPTY) {
@ -1168,7 +1169,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
#ifdef GPU
// futher GPU-memory optimization: net.optimized_memory == 2
if (net.optimized_memory >= 2 && params.train)
if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT)
{
l.optimized_memory = net.optimized_memory;
if (l.output_gpu) {
@ -1187,7 +1188,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
}
// maximum optimization
if (net.optimized_memory >= 3) {
if (net.optimized_memory >= 3 && l.type != DROPOUT) {
if (l.delta_gpu) {
cuda_free(l.delta_gpu);
//l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
@ -1257,7 +1258,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
}
// maximum optimization
if (net.optimized_memory >= 3) {
if (net.optimized_memory >= 3 && l.type != DROPOUT) {
if (l.delta_gpu && l.keep_delta_gpu) {
//cuda_free(l.delta_gpu); // already called above
l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps

Loading…
Cancel
Save