diff --git a/src/connected_layer.c b/src/connected_layer.c index 66f7c91c..e54837ac 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -11,8 +11,11 @@ #include #include -static size_t get_connected_workspace_size(layer l) { +size_t get_connected_workspace_size(layer l) +{ #ifdef CUDNN + return get_convolutional_workspace_size(l); + /* if (gpu_index >= 0) { size_t most = 0; size_t s = 0; @@ -42,6 +45,7 @@ static size_t get_connected_workspace_size(layer l) { if (s > most) most = s; return most; } + */ #endif return 0; } diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 6c08c077..80884f7f 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -100,7 +100,7 @@ image get_convolutional_delta(convolutional_layer l) return float_to_image(w,h,c,l.delta); } -size_t get_workspace_size(layer l){ +size_t get_workspace_size32(layer l){ #ifdef CUDNN if(gpu_index >= 0){ size_t most = 0; @@ -173,6 +173,12 @@ size_t get_workspace_size16(layer l) { //return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float); } +size_t get_convolutional_workspace_size(layer l) { + size_t workspace_size = get_workspace_size32(l); + size_t workspace_size16 = get_workspace_size16(l); + if (workspace_size16 > workspace_size) workspace_size = workspace_size16; + return workspace_size; +} #ifdef GPU #ifdef CUDNN void create_convolutional_cudnn_tensors(layer *l) @@ -462,9 +468,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, #endif } #endif - l.workspace_size = get_workspace_size(l); - size_t workspace_size16 = get_workspace_size16(l); - if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16; + l.workspace_size = get_convolutional_workspace_size(l); //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; @@ -566,9 +570,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) cudnn_convolutional_setup(l, cudnn_fastest); #endif #endif - l->workspace_size = get_workspace_size(*l); - size_t workspace_size16 = get_workspace_size16(*l); - if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16; + l->workspace_size = get_convolutional_workspace_size(*l); #ifdef CUDNN // check for excessive memory consumption @@ -578,9 +580,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2); cudnn_convolutional_setup(l, cudnn_smallest); - l->workspace_size = get_workspace_size(*l); - size_t workspace_size16 = get_workspace_size16(*l); - if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16; + l->workspace_size = get_convolutional_workspace_size(*l); } #endif } diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 19a06251..d6ec9551 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -22,6 +22,7 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int #ifdef CUDNN void cudnn_convolutional_setup(layer *l, int cudnn_preference); void create_convolutional_cudnn_tensors(layer *l); +size_t get_convolutional_workspace_size(layer l); void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); #endif #endif