From 7ee4135910624f11e80de36b236208b223f58eb4 Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Wed, 10 Oct 2018 13:51:36 +0300 Subject: [PATCH] Fix OpenCV compilation. maxpool_layer on GPU uses cuDNN. --- src/darknet.c | 2 ++ src/data.c | 4 ++-- src/image.c | 2 ++ src/layer.h | 1 + src/maxpool_layer.c | 22 +++++++++++++++++++++- src/maxpool_layer_kernels.cu | 27 +++++++++++++++++++++++++++ 6 files changed, 55 insertions(+), 3 deletions(-) diff --git a/src/darknet.c b/src/darknet.c index 1dc073bc..41878041 100644 --- a/src/darknet.c +++ b/src/darknet.c @@ -370,6 +370,8 @@ int main(int argc, char **argv) gpu_index = find_int_arg(argc, argv, "-i", 0); if(find_arg(argc, argv, "-nogpu")) { gpu_index = -1; + printf("\n Currently Darknet doesn't support -nogpu flag. If you want to use CPU - please compile Darknet with GPU=0 in the Makefile, or compile darknet_no_gpu.sln on Windows.\n"); + exit(-1); } #ifndef GPU diff --git a/src/data.c b/src/data.c index 4168e657..d152fde4 100644 --- a/src/data.c +++ b/src/data.c @@ -336,8 +336,8 @@ void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, // if truth (box for object) is smaller than 1x1 pix char buff[256]; if (id >= classes) { - printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes); - sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes); + printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, (classes-1)); + sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, (classes-1)); system(buff); getchar(); ++sub; diff --git a/src/image.c b/src/image.c index e79fc098..b564f5b4 100644 --- a/src/image.c +++ b/src/image.c @@ -21,6 +21,8 @@ #include "http_stream.h" #endif #include "http_stream.h" + +#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 ) #endif extern int check_mistakes; diff --git a/src/layer.h b/src/layer.h index a4fd312e..2cfa0871 100644 --- a/src/layer.h +++ b/src/layer.h @@ -307,6 +307,7 @@ struct layer{ cudnnConvolutionFwdAlgo_t fw_algo; cudnnConvolutionBwdDataAlgo_t bd_algo; cudnnConvolutionBwdFilterAlgo_t bf_algo; + cudnnPoolingDescriptor_t poolingDesc; #endif #endif }; diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 5ff7e9d4..b45eb44f 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -47,7 +47,27 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s l.indexes_gpu = cuda_make_int_array(output_size); l.output_gpu = cuda_make_array(l.output, output_size); l.delta_gpu = cuda_make_array(l.delta, output_size); - #endif +#ifdef CUDNN + cudnnStatus_t maxpool_status; + maxpool_status = cudnnCreatePoolingDescriptor(&l.poolingDesc); + + maxpool_status = cudnnSetPooling2dDescriptor( + l.poolingDesc, + CUDNN_POOLING_MAX, + CUDNN_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN + l.size, + l.size, + 0, //l.pad, + 0, //l.pad, + l.stride, + l.stride); + + cudnnCreateTensorDescriptor(&l.srcTensorDesc); + cudnnCreateTensorDescriptor(&l.dstTensorDesc); + cudnnSetTensor4dDescriptor(l.srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.c, l.h, l.w); + cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); +#endif // CUDNN + #endif // GPU l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); return l; diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 78b7f397..05d5b4ff 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -86,6 +86,33 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) { + +#ifdef CUDNN + if (!state.train) {// && layer.stride == layer.size) { + // cudnnPoolingBackward + cudnnStatus_t maxpool_status; + + float alpha = 1, beta = 0; + maxpool_status = cudnnPoolingForward( + cudnn_handle(), + layer.poolingDesc, + &alpha, + layer.srcTensorDesc, + state.input, + &beta, + layer.dstTensorDesc, + layer.output_gpu); + + //maxpool_status = cudnnDestroyPoolingDescriptor(poolingDesc); + //cudnnDestroyTensorDescriptor(layer.srcTensorDesc); + //cudnnDestroyTensorDescriptor(layer.dstTensorDesc); + + return; + } +#endif + + + int h = layer.out_h; int w = layer.out_w; int c = layer.c;