get_connected_workspace_size() and get_convolutional_workspace_size()

pull/4477/head
AlexeyAB 6 years ago
parent 6832290eee
commit 9e07605bc5
  1. 6
      src/connected_layer.c
  2. 20
      src/convolutional_layer.c
  3. 1
      src/convolutional_layer.h

@ -11,8 +11,11 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
static size_t get_connected_workspace_size(layer l) { size_t get_connected_workspace_size(layer l)
{
#ifdef CUDNN #ifdef CUDNN
return get_convolutional_workspace_size(l);
/*
if (gpu_index >= 0) { if (gpu_index >= 0) {
size_t most = 0; size_t most = 0;
size_t s = 0; size_t s = 0;
@ -42,6 +45,7 @@ static size_t get_connected_workspace_size(layer l) {
if (s > most) most = s; if (s > most) most = s;
return most; return most;
} }
*/
#endif #endif
return 0; return 0;
} }

@ -100,7 +100,7 @@ image get_convolutional_delta(convolutional_layer l)
return float_to_image(w,h,c,l.delta); return float_to_image(w,h,c,l.delta);
} }
size_t get_workspace_size(layer l){ size_t get_workspace_size32(layer l){
#ifdef CUDNN #ifdef CUDNN
if(gpu_index >= 0){ if(gpu_index >= 0){
size_t most = 0; size_t most = 0;
@ -173,6 +173,12 @@ size_t get_workspace_size16(layer l) {
//return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float); //return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float);
} }
size_t get_convolutional_workspace_size(layer l) {
size_t workspace_size = get_workspace_size32(l);
size_t workspace_size16 = get_workspace_size16(l);
if (workspace_size16 > workspace_size) workspace_size = workspace_size16;
return workspace_size;
}
#ifdef GPU #ifdef GPU
#ifdef CUDNN #ifdef CUDNN
void create_convolutional_cudnn_tensors(layer *l) void create_convolutional_cudnn_tensors(layer *l)
@ -462,9 +468,7 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
#endif #endif
} }
#endif #endif
l.workspace_size = get_workspace_size(l); l.workspace_size = get_convolutional_workspace_size(l);
size_t workspace_size16 = get_workspace_size16(l);
if (workspace_size16 > l.workspace_size) l.workspace_size = workspace_size16;
//fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; l.bflops = (2.0 * l.n * l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.;
@ -566,9 +570,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
cudnn_convolutional_setup(l, cudnn_fastest); cudnn_convolutional_setup(l, cudnn_fastest);
#endif #endif
#endif #endif
l->workspace_size = get_workspace_size(*l); l->workspace_size = get_convolutional_workspace_size(*l);
size_t workspace_size16 = get_workspace_size16(*l);
if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
#ifdef CUDNN #ifdef CUDNN
// check for excessive memory consumption // check for excessive memory consumption
@ -578,9 +580,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2); printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2);
cudnn_convolutional_setup(l, cudnn_smallest); cudnn_convolutional_setup(l, cudnn_smallest);
l->workspace_size = get_workspace_size(*l); l->workspace_size = get_convolutional_workspace_size(*l);
size_t workspace_size16 = get_workspace_size16(*l);
if (workspace_size16 > l->workspace_size) l->workspace_size = workspace_size16;
} }
#endif #endif
} }

@ -22,6 +22,7 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
#ifdef CUDNN #ifdef CUDNN
void cudnn_convolutional_setup(layer *l, int cudnn_preference); void cudnn_convolutional_setup(layer *l, int cudnn_preference);
void create_convolutional_cudnn_tensors(layer *l); void create_convolutional_cudnn_tensors(layer *l);
size_t get_convolutional_workspace_size(layer l);
void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16);
#endif #endif
#endif #endif

Loading…
Cancel
Save