From 6832290eee1b421226013ac28177614f3e1e4903 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 8 Feb 2019 00:49:51 +0300
Subject: [PATCH] Fixed set_batch_network(), when workspace larger for smaller
 batch

---
 src/network.c | 55 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/src/network.c b/src/network.c
index ba8a1721..115edeb0 100644
--- a/src/network.c
+++ b/src/network.c
@@ -360,34 +360,63 @@ float train_network_batch(network net, data d, int n)
     return (float)sum/(n*batch);
 }
 
+int recalculate_workspace_size(network *net)
+{
+#ifdef GPU
+    cuda_set_device(net->gpu_index);
+    if (gpu_index >= 0) cuda_free(net->workspace);
+#endif
+    int i;
+    size_t workspace_size = 0;
+    for (i = 0; i < net->n; ++i) {
+        layer l = net->layers[i];
+        //printf(" %d: layer = %d,", i, l.type);
+        if (l.type == CONVOLUTIONAL) {
+            l.workspace_size = get_convolutional_workspace_size(l);
+        }
+        else if (l.type == CONNECTED) {
+            l.workspace_size = get_connected_workspace_size(l);
+        }
+        if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
+        net->layers[i] = l;
+    }
+
+#ifdef GPU
+    if (gpu_index >= 0) {
+        printf("\n try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
+        net->workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
+        printf(" CUDA allocate done! \n");
+    }
+    else {
+        free(net->workspace);
+        net->workspace = calloc(1, workspace_size);
+    }
+#else
+    free(net->workspace);
+    net->workspace = calloc(1, workspace_size);
+#endif
+    //fprintf(stderr, " Done!\n");
+    return 0;
+}
+
 void set_batch_network(network *net, int b)
 {
     net->batch = b;
     int i;
     for(i = 0; i < net->n; ++i){
         net->layers[i].batch = b;
+
 #ifdef CUDNN
         if(net->layers[i].type == CONVOLUTIONAL){
             cudnn_convolutional_setup(net->layers + i, cudnn_fastest);
-            /*
-            layer *l = net->layers + i;
-            cudnn_convolutional_setup(l, cudnn_fastest);
-            // check for excessive memory consumption
-            size_t free_byte;
-            size_t total_byte;
-            check_error(cudaMemGetInfo(&free_byte, &total_byte));
-            if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) {
-                printf(" used slow CUDNN algo without Workspace! \n");
-                cudnn_convolutional_setup(l, cudnn_smallest);
-                l->workspace_size = get_workspace_size(*l);
-            }
-            */
         }
         else if (net->layers[i].type == MAXPOOL) {
             cudnn_maxpool_setup(net->layers + i);
         }
 #endif
+
     }
+    recalculate_workspace_size(net); // recalculate workspace size
 }
 
 int resize_network(network *net, int w, int h)