diff --git a/src/detector.c b/src/detector.c index feb86d55..86612b69 100644 --- a/src/detector.c +++ b/src/detector.c @@ -55,6 +55,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i name_list, names_size, net_classes, cfgfile); if (net_classes > names_size) getchar(); } + free_ptrs((void**)names, net_map.layers[net_map.n - 1].classes); } srand(time(0)); diff --git a/src/layer.c b/src/layer.c index fcdb33fa..4ef496fc 100644 --- a/src/layer.c +++ b/src/layer.c @@ -203,5 +203,32 @@ void free_layer(layer l) if (l.last_prev_state_gpu) cuda_free(l.last_prev_state_gpu); if (l.last_prev_cell_gpu) cuda_free(l.last_prev_cell_gpu); if (l.cell_gpu) cuda_free(l.cell_gpu); -#endif +#ifdef CUDNN_DISABLED // shouldn't be used for -map + if (l.srcTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.srcTensorDesc)); + if (l.dstTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dstTensorDesc)); + if (l.srcTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.srcTensorDesc16)); + if (l.dstTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dstTensorDesc16)); + if (l.dsrcTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dsrcTensorDesc)); + if (l.ddstTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.ddstTensorDesc)); + if (l.dsrcTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dsrcTensorDesc16)); + if (l.ddstTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.ddstTensorDesc16)); + if (l.normTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.normTensorDesc)); + if (l.normDstTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.normDstTensorDesc)); + if (l.normDstTensorDescF16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.normDstTensorDescF16)); + + if (l.weightDesc) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.weightDesc)); + if (l.weightDesc16) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.weightDesc16)); + if (l.dweightDesc) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.dweightDesc)); + if (l.dweightDesc16) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.dweightDesc16)); + + if (l.convDesc) CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(l.convDesc)); + + if (l.poolingDesc) CHECK_CUDNN(cudnnDestroyPoolingDescriptor(l.poolingDesc)); + + //cudnnConvolutionFwdAlgo_t fw_algo, fw_algo16; + //cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16; + //cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16; +#endif // CUDNN + +#endif // GPU } diff --git a/src/list.c b/src/list.c index 6ba09302..624ecf65 100644 --- a/src/list.c +++ b/src/list.c @@ -65,6 +65,17 @@ void free_node(node *n) } } +void free_list_val(list *l) +{ + node *n = l->front; + node *next; + while (n) { + next = n->next; + free(n->val); + n = next; + } +} + void free_list(list *l) { free_node(l->front); diff --git a/src/list.h b/src/list.h index 2623174a..182648f7 100644 --- a/src/list.h +++ b/src/list.h @@ -23,6 +23,7 @@ void list_insert(list *, void *); void **list_to_array(list *l); +void free_list_val(list *l); void free_list(list *l); void free_list_contents(list *l); void free_list_contents_kvp(list *l); diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 54aa9c0e..6c483ce3 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -20,14 +20,19 @@ image get_maxpool_delta(maxpool_layer l) return float_to_image(w,h,c,l.delta); } +void create_maxpool_cudnn_tensors(layer *l) +{ +#ifdef CUDNN + CHECK_CUDNN(cudnnCreatePoolingDescriptor(&l->poolingDesc)); + CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->srcTensorDesc)); + CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->dstTensorDesc)); +#endif // CUDNN +} void cudnn_maxpool_setup(layer *l) { #ifdef CUDNN - cudnnStatus_t maxpool_status; - maxpool_status = cudnnCreatePoolingDescriptor(&l->poolingDesc); - - maxpool_status = cudnnSetPooling2dDescriptor( + CHECK_CUDNN(cudnnSetPooling2dDescriptor( l->poolingDesc, CUDNN_POOLING_MAX, CUDNN_NOT_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN @@ -36,12 +41,10 @@ void cudnn_maxpool_setup(layer *l) l->pad/2, //0, //l.pad, l->pad/2, //0, //l.pad, l->stride_x, - l->stride_y); + l->stride_y)); - cudnnCreateTensorDescriptor(&l->srcTensorDesc); - cudnnCreateTensorDescriptor(&l->dstTensorDesc); - cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); - cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); + CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w)); + CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w)); #endif // CUDNN } @@ -99,6 +102,7 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.delta_gpu = cuda_make_array(l.delta, output_size); } l.output_gpu = cuda_make_array(l.output, output_size); + create_maxpool_cudnn_tensors(&l); cudnn_maxpool_setup(&l); #endif // GPU diff --git a/src/utils.c b/src/utils.c index af7cb9ea..b445e5ce 100644 --- a/src/utils.c +++ b/src/utils.c @@ -180,7 +180,7 @@ void find_replace(const char* str, char* orig, char* rep, char* output) sprintf(buffer, "%s", str); if(!(p = strstr(buffer, orig))){ // Is 'orig' even in 'str'? - sprintf(output, "%s", str); + sprintf(output, "%s", buffer); free(buffer); return; }