From 3ff5084590bce621b195196edd32ac7e3b04bfbd Mon Sep 17 00:00:00 2001 From: AlexeyAB Date: Sun, 6 Jan 2019 23:51:38 +0300 Subject: [PATCH] Added include/darknet.h --- Makefile | 18 +- build/darknet/darknet.vcxproj | 5 +- build/darknet/darknet_no_gpu.vcxproj | 9 +- build/darknet/x64/darknet.py | 4 +- build/darknet/x64/darknet_video.cmd | 5 + build/darknet/x64/darknet_video.py | 320 +++++++++ build/darknet/yolo_console_dll.vcxproj | 9 +- build/darknet/yolo_cpp_dll.vcxproj | 17 +- build/darknet/yolo_cpp_dll_no_gpu.vcxproj | 26 +- darknet.py | 4 +- darknet_video.py | 320 +++++++++ include/darknet.h | 793 ++++++++++++++++++++++ {src => include}/yolo_v2_class.hpp | 58 +- src/activation_kernels.cu | 94 ++- src/activations.h | 7 +- src/box.h | 40 +- src/cuda.h | 21 +- src/data.h | 27 +- src/http_stream.h | 1 + src/image.c | 32 +- src/image.h | 20 +- src/layer.h | 108 +-- src/matrix.h | 10 +- src/network.c | 32 +- src/network.h | 37 +- src/option_list.h | 25 +- src/parser.c | 25 + src/tree.h | 27 +- src/utils.h | 15 +- src/yolo_v2_class.cpp | 36 +- 30 files changed, 1830 insertions(+), 315 deletions(-) create mode 100644 build/darknet/x64/darknet_video.cmd create mode 100644 build/darknet/x64/darknet_video.py create mode 100644 darknet_video.py create mode 100644 include/darknet.h rename {src => include}/yolo_v2_class.hpp (93%) diff --git a/Makefile b/Makefile index 99781ea9..46811ddc 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ EXEC=darknet OBJDIR=./obj/ ifeq ($(LIBSO), 1) -LIBNAMESO=darknet.so +LIBNAMESO=libdarknet.so APPNAMESO=uselib endif @@ -56,7 +56,7 @@ CPP=g++ NVCC=nvcc OPTS=-Ofast LDFLAGS= -lm -pthread -COMMON= +COMMON= -Iinclude/ CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas ifeq ($(DEBUG), 1) @@ -115,18 +115,18 @@ OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernel endif OBJS = $(addprefix $(OBJDIR), $(OBJ)) -DEPS = $(wildcard src/*.h) Makefile +DEPS = $(wildcard src/*.h) Makefile include/darknet.h -all: obj backup results $(EXEC) $(LIBNAMESO) $(APPNAMESO) +all: obj backup results setchmod $(EXEC) $(LIBNAMESO) $(APPNAMESO) ifeq ($(LIBSO), 1) CFLAGS+= -fPIC -$(LIBNAMESO): $(OBJS) src/yolo_v2_class.hpp src/yolo_v2_class.cpp - $(CPP) -shared -std=c++11 -fvisibility=hidden -DYOLODLL_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS) +$(LIBNAMESO): $(OBJS) include/yolo_v2_class.hpp src/yolo_v2_class.cpp + $(CPP) -shared -std=c++11 -fvisibility=hidden -DLIB_EXPORTS $(COMMON) $(CFLAGS) $(OBJS) src/yolo_v2_class.cpp -o $@ $(LDFLAGS) -$(APPNAMESO): $(LIBNAMESO) src/yolo_v2_class.hpp src/yolo_console_dll.cpp - $(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ $(LIBNAMESO) +$(APPNAMESO): $(LIBNAMESO) include/yolo_v2_class.hpp src/yolo_console_dll.cpp + $(CPP) -std=c++11 $(COMMON) $(CFLAGS) -o $@ src/yolo_console_dll.cpp $(LDFLAGS) -L ./ -l:$(LIBNAMESO) endif $(EXEC): $(OBJS) @@ -147,6 +147,8 @@ backup: mkdir -p backup results: mkdir -p results +setchmod: + chmod +x *.sh .PHONY: clean diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index b8e4c08b..f1500eed 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -88,7 +88,7 @@ Level3 Disabled true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include CUDNN;_CRTDBG_MAP_ALLOC;_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) OPENCV; true @@ -132,7 +132,7 @@ true true true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include OPENCV;CUDNN;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y @@ -236,6 +236,7 @@ + diff --git a/build/darknet/darknet_no_gpu.vcxproj b/build/darknet/darknet_no_gpu.vcxproj index 8b173015..785b2f02 100644 --- a/build/darknet/darknet_no_gpu.vcxproj +++ b/build/darknet/darknet_no_gpu.vcxproj @@ -78,7 +78,7 @@ Level3 Disabled true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories); + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories); _CRTDBG_MAP_ALLOC;_MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) OPENCV; stdlib.h;crtdbg.h;%(ForcedIncludeFiles) @@ -94,7 +94,7 @@ Level3 Disabled true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories) + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories) _MBCS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) OPENCV;CUDNN true @@ -118,7 +118,7 @@ true true true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories); + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories); _TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) @@ -138,7 +138,7 @@ true true true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories) + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories) OPENCV;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;_CRT_RAND_S;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y @@ -241,6 +241,7 @@ + diff --git a/build/darknet/x64/darknet.py b/build/darknet/x64/darknet.py index 4bca5f5a..784399ee 100644 --- a/build/darknet/x64/darknet.py +++ b/build/darknet/x64/darknet.py @@ -75,7 +75,7 @@ class METADATA(Structure): #lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) -#lib = CDLL("darknet.so", RTLD_GLOBAL) +#lib = CDLL("libdarknet.so", RTLD_GLOBAL) hasGPU = True if os.name == "nt": cwd = os.path.dirname(__file__) @@ -119,7 +119,7 @@ if os.name == "nt": lib = CDLL(winGPUdll, RTLD_GLOBAL) print("Environment variables indicated a CPU run, but we didn't find `"+winNoGPUdll+"`. Trying a GPU run anyway.") else: - lib = CDLL("./darknet.so", RTLD_GLOBAL) + lib = CDLL("./libdarknet.so", RTLD_GLOBAL) lib.network_width.argtypes = [c_void_p] lib.network_width.restype = c_int lib.network_height.argtypes = [c_void_p] diff --git a/build/darknet/x64/darknet_video.cmd b/build/darknet/x64/darknet_video.cmd new file mode 100644 index 00000000..8311dc0c --- /dev/null +++ b/build/darknet/x64/darknet_video.cmd @@ -0,0 +1,5 @@ +rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\python.exe darknet_video.py + +C:\Python27\python.exe darknet_video.py + +pause \ No newline at end of file diff --git a/build/darknet/x64/darknet_video.py b/build/darknet/x64/darknet_video.py new file mode 100644 index 00000000..1a486ced --- /dev/null +++ b/build/darknet/x64/darknet_video.py @@ -0,0 +1,320 @@ +from ctypes import * +import math +import random +import os +import cv2 +import numpy as np +import time + + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + +hasGPU = True + +lib = CDLL("yolo_cpp_dll.dll", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +if hasGPU: + set_gpu = lib.cuda_set_device + set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = \ + [c_void_p, c_int, c_int, c_float, c_float, POINTER( + c_int), c_int, POINTER(c_int), c_int] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +load_net_custom = lib.load_network_custom +load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int] +load_net_custom.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + + +def array_to_image(arr): + import numpy as np + arr = arr.transpose(2, 0, 1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0 + data = arr.ctypes.data_as(POINTER(c_float)) + im = IMAGE(w, h, c, data) + return im, arr + + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + if altNames is None: + nameTag = meta.names[i] + else: + nameTag = altNames[i] + res.append((nameTag, out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45, debug=False): + im, arr = array_to_image(image) + if debug: + print("Loaded image") + num = c_int(0) + if debug: + print("Assigned num") + pnum = pointer(num) + if debug: + print("Assigned pnum") + predict_image(net, im) + if debug: + print("did prediction") + # dets = get_network_boxes( + # net, image.shape[1], image.shape[0], + # thresh, hier_thresh, + # None, 0, pnum, 0) # OpenCV + dets = get_network_boxes(net, im.w, im.h, + thresh, hier_thresh, None, 0, pnum, 0) + if debug: + print("Got dets") + num = pnum[0] + if debug: + print("got zeroth index of pnum") + if nms: + do_nms_sort(dets, num, meta.classes, nms) + if debug: + print("did sort") + res = [] + if debug: + print("about to range") + for j in range(num): + if debug: + print("Ranging on "+str(j)+" of "+str(num)) + if debug: + print("Classes: "+str(meta), meta.classes, meta.names) + for i in range(meta.classes): + if debug: + print("Class-ranging on "+str(i)+" of " + + str(meta.classes)+"= "+str(dets[j].prob[i])) + if dets[j].prob[i] > 0: + b = dets[j].bbox + if altNames is None: + nameTag = meta.names[i] + else: + nameTag = altNames[i] + if debug: + print("Got bbox", b) + print(nameTag) + print(dets[j].prob[i]) + print((b.x, b.y, b.w, b.h)) + res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h))) + if debug: + print("did range") + res = sorted(res, key=lambda x: -x[1]) + if debug: + print("did sort") + # free_image(im) + if debug: + print("freed image") + free_detections(dets, num) + if debug: + print("freed detections") + return res + + +def convertBack(x, y, w, h): + xmin = int(round(x - (w / 2))) + xmax = int(round(x + (w / 2))) + ymin = int(round(y - (h / 2))) + ymax = int(round(y + (h / 2))) + return xmin, ymin, xmax, ymax + + +def cvDrawBoxes(detections, img): + for detection in detections: + x, y, w, h = detection[2][0],\ + detection[2][1],\ + detection[2][2],\ + detection[2][3] + xmin, ymin, xmax, ymax = convertBack( + float(x), float(y), float(w), float(h)) + pt1 = (xmin, ymin) + pt2 = (xmax, ymax) + cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2) + cv2.putText(img, + detection[0].decode() + + " [" + str(round(detection[1] * 100, 2)) + "]", + (pt1[0], pt1[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, + [0, 255, 0], 4) + return img + + +netMain = None +metaMain = None +altNames = None + + +def YOLO(): + global metaMain, netMain, altNames + configPath = "./cfg/yolov3.cfg" + weightPath = "./yolov3.weights" + metaPath = "./cfg/coco.data" + if not os.path.exists(configPath): + raise ValueError("Invalid config path `" + + os.path.abspath(configPath)+"`") + if not os.path.exists(weightPath): + raise ValueError("Invalid weight path `" + + os.path.abspath(weightPath)+"`") + if not os.path.exists(metaPath): + raise ValueError("Invalid data file path `" + + os.path.abspath(metaPath)+"`") + if netMain is None: + netMain = load_net_custom(configPath.encode( + "ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1 + if metaMain is None: + metaMain = load_meta(metaPath.encode("ascii")) + if altNames is None: + try: + with open(metaPath) as metaFH: + metaContents = metaFH.read() + import re + match = re.search("names *= *(.*)$", metaContents, + re.IGNORECASE | re.MULTILINE) + if match: + result = match.group(1) + else: + result = None + try: + if os.path.exists(result): + with open(result) as namesFH: + namesList = namesFH.read().strip().split("\n") + altNames = [x.strip() for x in namesList] + except TypeError: + pass + except Exception: + pass + #cap = cv2.VideoCapture(0) + cap = cv2.VideoCapture("test.mp4") + cap.set(3, 1280) + cap.set(4, 720) + out = cv2.VideoWriter( + "output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0, + (lib.network_width(netMain), lib.network_height(netMain))) + print("Starting the YOLO loop...") + while True: + prev_time = time.time() + ret, frame_read = cap.read() + frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) + frame_resized = cv2.resize(frame_rgb, + (lib.network_width(netMain), + lib.network_height(netMain)), + interpolation=cv2.INTER_LINEAR) + detections = detect(netMain, metaMain, frame_resized, thresh=0.25) + image = cvDrawBoxes(detections, frame_resized) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + print(1/(time.time()-prev_time)) + cap.release() + out.release() + +if __name__ == "__main__": + YOLO() diff --git a/build/darknet/yolo_console_dll.vcxproj b/build/darknet/yolo_console_dll.vcxproj index 064c94a1..763914b0 100644 --- a/build/darknet/yolo_console_dll.vcxproj +++ b/build/darknet/yolo_console_dll.vcxproj @@ -80,6 +80,7 @@ Level3 Disabled true + ..\..\include; @@ -87,7 +88,7 @@ Level3 Disabled true - C:\opencv_3.0\opencv\build\include + ..\..\include;C:\opencv_3.0\opencv\build\include MultiThreadedDLL _CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) @@ -102,6 +103,7 @@ true true true + ..\..\include; true @@ -115,7 +117,7 @@ true true true - C:\opencv_source\opencv\bin\install\include + ..\..\include;C:\opencv_source\opencv\bin\install\include _CRT_SECURE_NO_WARNINGS;_MBCS;%(PreprocessorDefinitions) Async true @@ -132,6 +134,9 @@ + + + diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index bb8cace6..bde77841 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -80,6 +80,8 @@ Level3 Disabled true + ..\..\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + LIB_EXPORTS;_MBCS;%(PreprocessorDefinitions) true @@ -90,8 +92,8 @@ Level3 Disabled true - ..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include - _MBCS;YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + ..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include + _MBCS;LIB_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) CUDNN true @@ -114,8 +116,8 @@ true true true - C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include - YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + C:\opencv_2.4.9\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include + LIB_EXPORTS;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true @@ -133,8 +135,8 @@ true true true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include - CUDNN;GPU;YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include + CUDNN;GPU;LIB_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y CompileAsCpp @@ -236,9 +238,10 @@ - + + diff --git a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj index ca5610fd..65399935 100644 --- a/build/darknet/yolo_cpp_dll_no_gpu.vcxproj +++ b/build/darknet/yolo_cpp_dll_no_gpu.vcxproj @@ -77,6 +77,8 @@ Level3 Disabled true + ..\..\include; + LIB_EXPORTS;_MBCS;%(PreprocessorDefinitions) true @@ -87,16 +89,17 @@ Level3 Disabled true - ..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include - _MBCS;YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + ..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include + _MBCS;LIB_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) CUDNN true + stdlib.h;crtdbg.h;%(ForcedIncludeFiles) true - $(CUDA_PATH)lib\$(PlatformName);$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) + %(AdditionalLibraryDirectories) $(OutDir)\$(TargetName)$(TargetExt) - ..\..\3rdparty\lib\x64\pthreadVC2.lib;cublas.lib;curand.lib;cudart.lib;%(AdditionalDependencies) + ..\..\3rdparty\lib\x64\pthreadVC2.lib;%(AdditionalDependencies) true @@ -111,16 +114,16 @@ true true true - C:\opencv_2.4.9\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include - YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + C:\opencv_2.4.9\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir);$(cudnn)\include + LIB_EXPORTS;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;GPU;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true true true true - C:\opencv_2.4.9\opencv\build\x86\vc14\lib;C:\opencv_2.4.9\opencv\build\x86\vc12\lib;$(CUDA_PATH)lib\$(PlatformName);$(cudnn)\lib\x64;%(AdditionalLibraryDirectories) - ..\..\3rdparty\lib\x86\pthreadVC2.lib;cudart.lib;cublas.lib;curand.lib;%(AdditionalDependencies) + C:\opencv_2.4.9\opencv\build\x86\vc14\lib;C:\opencv_2.4.9\opencv\build\x86\vc12\lib;%(AdditionalLibraryDirectories) + ..\..\3rdparty\lib\x86\pthreadVC2.lib;%(AdditionalDependencies) @@ -130,8 +133,8 @@ true true true - C:\opencv_3.0\opencv\build\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories) - YOLODLL_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + C:\opencv_3.0\opencv\build\include;..\..\include;..\..\3rdparty\include;%(AdditionalIncludeDirectories) + LIB_EXPORTS;_TIMESPEC_DEFINED;_SCL_SECURE_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) c11 c++1y CompileAsCpp @@ -221,9 +224,10 @@ - + + diff --git a/darknet.py b/darknet.py index 84bbfc42..3d885d20 100644 --- a/darknet.py +++ b/darknet.py @@ -75,7 +75,7 @@ class METADATA(Structure): #lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) -#lib = CDLL("darknet.so", RTLD_GLOBAL) +#lib = CDLL("libdarknet.so", RTLD_GLOBAL) hasGPU = True if os.name == "nt": cwd = os.path.dirname(__file__) @@ -119,7 +119,7 @@ if os.name == "nt": lib = CDLL(winGPUdll, RTLD_GLOBAL) print("Environment variables indicated a CPU run, but we didn't find `"+winNoGPUdll+"`. Trying a GPU run anyway.") else: - lib = CDLL("./darknet.so", RTLD_GLOBAL) + lib = CDLL("./libdarknet.so", RTLD_GLOBAL) lib.network_width.argtypes = [c_void_p] lib.network_width.restype = c_int lib.network_height.argtypes = [c_void_p] diff --git a/darknet_video.py b/darknet_video.py new file mode 100644 index 00000000..7a8e8f00 --- /dev/null +++ b/darknet_video.py @@ -0,0 +1,320 @@ +from ctypes import * +import math +import random +import os +import cv2 +import numpy as np +import time + + +def sample(probs): + s = sum(probs) + probs = [a/s for a in probs] + r = random.uniform(0, 1) + for i in range(len(probs)): + r = r - probs[i] + if r <= 0: + return i + return len(probs)-1 + + +def c_array(ctype, values): + arr = (ctype*len(values))() + arr[:] = values + return arr + + +class BOX(Structure): + _fields_ = [("x", c_float), + ("y", c_float), + ("w", c_float), + ("h", c_float)] + + +class DETECTION(Structure): + _fields_ = [("bbox", BOX), + ("classes", c_int), + ("prob", POINTER(c_float)), + ("mask", POINTER(c_float)), + ("objectness", c_float), + ("sort_class", c_int)] + + +class IMAGE(Structure): + _fields_ = [("w", c_int), + ("h", c_int), + ("c", c_int), + ("data", POINTER(c_float))] + + +class METADATA(Structure): + _fields_ = [("classes", c_int), + ("names", POINTER(c_char_p))] + + +hasGPU = True + +lib = CDLL("./libdarknet.so", RTLD_GLOBAL) +lib.network_width.argtypes = [c_void_p] +lib.network_width.restype = c_int +lib.network_height.argtypes = [c_void_p] +lib.network_height.restype = c_int + +predict = lib.network_predict +predict.argtypes = [c_void_p, POINTER(c_float)] +predict.restype = POINTER(c_float) + +if hasGPU: + set_gpu = lib.cuda_set_device + set_gpu.argtypes = [c_int] + +make_image = lib.make_image +make_image.argtypes = [c_int, c_int, c_int] +make_image.restype = IMAGE + +get_network_boxes = lib.get_network_boxes +get_network_boxes.argtypes = \ + [c_void_p, c_int, c_int, c_float, c_float, POINTER( + c_int), c_int, POINTER(c_int), c_int] +get_network_boxes.restype = POINTER(DETECTION) + +make_network_boxes = lib.make_network_boxes +make_network_boxes.argtypes = [c_void_p] +make_network_boxes.restype = POINTER(DETECTION) + +free_detections = lib.free_detections +free_detections.argtypes = [POINTER(DETECTION), c_int] + +free_ptrs = lib.free_ptrs +free_ptrs.argtypes = [POINTER(c_void_p), c_int] + +network_predict = lib.network_predict +network_predict.argtypes = [c_void_p, POINTER(c_float)] + +reset_rnn = lib.reset_rnn +reset_rnn.argtypes = [c_void_p] + +load_net = lib.load_network +load_net.argtypes = [c_char_p, c_char_p, c_int] +load_net.restype = c_void_p + +load_net_custom = lib.load_network_custom +load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int] +load_net_custom.restype = c_void_p + +do_nms_obj = lib.do_nms_obj +do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +do_nms_sort = lib.do_nms_sort +do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] + +free_image = lib.free_image +free_image.argtypes = [IMAGE] + +letterbox_image = lib.letterbox_image +letterbox_image.argtypes = [IMAGE, c_int, c_int] +letterbox_image.restype = IMAGE + +load_meta = lib.get_metadata +lib.get_metadata.argtypes = [c_char_p] +lib.get_metadata.restype = METADATA + +load_image = lib.load_image_color +load_image.argtypes = [c_char_p, c_int, c_int] +load_image.restype = IMAGE + +rgbgr_image = lib.rgbgr_image +rgbgr_image.argtypes = [IMAGE] + +predict_image = lib.network_predict_image +predict_image.argtypes = [c_void_p, IMAGE] +predict_image.restype = POINTER(c_float) + + +def array_to_image(arr): + import numpy as np + arr = arr.transpose(2, 0, 1) + c = arr.shape[0] + h = arr.shape[1] + w = arr.shape[2] + arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0 + data = arr.ctypes.data_as(POINTER(c_float)) + im = IMAGE(w, h, c, data) + return im, arr + + +def classify(net, meta, im): + out = predict_image(net, im) + res = [] + for i in range(meta.classes): + if altNames is None: + nameTag = meta.names[i] + else: + nameTag = altNames[i] + res.append((nameTag, out[i])) + res = sorted(res, key=lambda x: -x[1]) + return res + + +def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45, debug=False): + im, arr = array_to_image(image) + if debug: + print("Loaded image") + num = c_int(0) + if debug: + print("Assigned num") + pnum = pointer(num) + if debug: + print("Assigned pnum") + predict_image(net, im) + if debug: + print("did prediction") + # dets = get_network_boxes( + # net, image.shape[1], image.shape[0], + # thresh, hier_thresh, + # None, 0, pnum, 0) # OpenCV + dets = get_network_boxes(net, im.w, im.h, + thresh, hier_thresh, None, 0, pnum, 0) + if debug: + print("Got dets") + num = pnum[0] + if debug: + print("got zeroth index of pnum") + if nms: + do_nms_sort(dets, num, meta.classes, nms) + if debug: + print("did sort") + res = [] + if debug: + print("about to range") + for j in range(num): + if debug: + print("Ranging on "+str(j)+" of "+str(num)) + if debug: + print("Classes: "+str(meta), meta.classes, meta.names) + for i in range(meta.classes): + if debug: + print("Class-ranging on "+str(i)+" of " + + str(meta.classes)+"= "+str(dets[j].prob[i])) + if dets[j].prob[i] > 0: + b = dets[j].bbox + if altNames is None: + nameTag = meta.names[i] + else: + nameTag = altNames[i] + if debug: + print("Got bbox", b) + print(nameTag) + print(dets[j].prob[i]) + print((b.x, b.y, b.w, b.h)) + res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h))) + if debug: + print("did range") + res = sorted(res, key=lambda x: -x[1]) + if debug: + print("did sort") + # free_image(im) + if debug: + print("freed image") + free_detections(dets, num) + if debug: + print("freed detections") + return res + + +def convertBack(x, y, w, h): + xmin = int(round(x - (w / 2))) + xmax = int(round(x + (w / 2))) + ymin = int(round(y - (h / 2))) + ymax = int(round(y + (h / 2))) + return xmin, ymin, xmax, ymax + + +def cvDrawBoxes(detections, img): + for detection in detections: + x, y, w, h = detection[2][0],\ + detection[2][1],\ + detection[2][2],\ + detection[2][3] + xmin, ymin, xmax, ymax = convertBack( + float(x), float(y), float(w), float(h)) + pt1 = (xmin, ymin) + pt2 = (xmax, ymax) + cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2) + cv2.putText(img, + detection[0].decode() + + " [" + str(round(detection[1] * 100, 2)) + "]", + (pt1[0], pt1[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, + [0, 255, 0], 4) + return img + + +netMain = None +metaMain = None +altNames = None + + +def YOLO(): + global metaMain, netMain, altNames + configPath = "./cfg/yolov3.cfg" + weightPath = "./yolov3.weights" + metaPath = "./cfg/coco.data" + if not os.path.exists(configPath): + raise ValueError("Invalid config path `" + + os.path.abspath(configPath)+"`") + if not os.path.exists(weightPath): + raise ValueError("Invalid weight path `" + + os.path.abspath(weightPath)+"`") + if not os.path.exists(metaPath): + raise ValueError("Invalid data file path `" + + os.path.abspath(metaPath)+"`") + if netMain is None: + netMain = load_net_custom(configPath.encode( + "ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1 + if metaMain is None: + metaMain = load_meta(metaPath.encode("ascii")) + if altNames is None: + try: + with open(metaPath) as metaFH: + metaContents = metaFH.read() + import re + match = re.search("names *= *(.*)$", metaContents, + re.IGNORECASE | re.MULTILINE) + if match: + result = match.group(1) + else: + result = None + try: + if os.path.exists(result): + with open(result) as namesFH: + namesList = namesFH.read().strip().split("\n") + altNames = [x.strip() for x in namesList] + except TypeError: + pass + except Exception: + pass + #cap = cv2.VideoCapture(0) + cap = cv2.VideoCapture("test.mp4") + cap.set(3, 1280) + cap.set(4, 720) + out = cv2.VideoWriter( + "output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0, + (lib.network_width(netMain), lib.network_height(netMain))) + print("Starting the YOLO loop...") + while True: + prev_time = time.time() + ret, frame_read = cap.read() + frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) + frame_resized = cv2.resize(frame_rgb, + (lib.network_width(netMain), + lib.network_height(netMain)), + interpolation=cv2.INTER_LINEAR) + detections = detect(netMain, metaMain, frame_resized, thresh=0.25) + image = cvDrawBoxes(detections, frame_resized) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + print(1/(time.time()-prev_time)) + cap.release() + out.release() + +if __name__ == "__main__": + YOLO() diff --git a/include/darknet.h b/include/darknet.h new file mode 100644 index 00000000..80dc28cd --- /dev/null +++ b/include/darknet.h @@ -0,0 +1,793 @@ +#ifndef DARKNET_API +#define DARKNET_API + +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define inline __inline +#endif + +#include +#include +#include +#include +#include + +#ifdef LIB_EXPORTS +#if defined(_MSC_VER) +#define LIB_API __declspec(dllexport) +#else +#define LIB_API __attribute__((visibility("default"))) +#endif +#else +#if defined(_MSC_VER) +#define LIB_API +#else +#define LIB_API +#endif +#endif + +#ifdef GPU +#define BLOCK 512 + +#include "cuda_runtime.h" +#include "curand.h" +#include "cublas_v2.h" + +#ifdef CUDNN +#include "cudnn.h" +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct network; +typedef struct network network; + +struct network_state; +typedef struct network_state; + +struct layer; +typedef struct layer layer; + +struct image; +typedef struct image image; + +struct detection; +typedef struct detection detection; + +struct load_args; +typedef struct load_args load_args; + +struct data; +typedef struct data data; + +struct metadata; +typedef struct metadata metadata; + +struct tree; +typedef struct tree tree; + + +#define SECRET_NUM -1234 +extern int gpu_index; + +// option_list.h +typedef struct metadata { + int classes; + char **names; +} metadata; + + +// tree.h +typedef struct tree { + int *leaf; + int n; + int *parent; + int *child; + int *group; + char **name; + + int groups; + int *group_size; + int *group_offset; +} tree; + + +// activations.h +typedef enum { + LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +}ACTIVATION; + +// image.h +typedef enum{ + PNG, BMP, TGA, JPG +} IMTYPE; + +// activations.h +typedef enum{ + MULT, ADD, SUB, DIV +} BINARY_ACTIVATION; + +// layer.h +typedef enum { + CONVOLUTIONAL, + DECONVOLUTIONAL, + CONNECTED, + MAXPOOL, + SOFTMAX, + DETECTION, + DROPOUT, + CROP, + ROUTE, + COST, + NORMALIZATION, + AVGPOOL, + LOCAL, + SHORTCUT, + ACTIVE, + RNN, + GRU, + LSTM, + CRNN, + BATCHNORM, + NETWORK, + XNOR, + REGION, + YOLO, + ISEG, + REORG, + REORG_OLD, + UPSAMPLE, + LOGXENT, + L2NORM, + BLANK +} LAYER_TYPE; + +// layer.h +typedef enum{ + SSE, MASKED, L1, SEG, SMOOTH,WGAN +} COST_TYPE; + +// layer.h +typedef struct update_args { + int batch; + float learning_rate; + float momentum; + float decay; + int adam; + float B1; + float B2; + float eps; + int t; +} update_args; + +// layer.h +struct layer { + LAYER_TYPE type; + ACTIVATION activation; + COST_TYPE cost_type; + void(*forward) (struct layer, struct network_state); + void(*backward) (struct layer, struct network_state); + void(*update) (struct layer, int, float, float, float); + void(*forward_gpu) (struct layer, struct network_state); + void(*backward_gpu) (struct layer, struct network_state); + void(*update_gpu) (struct layer, int, float, float, float); + int batch_normalize; + int shortcut; + int batch; + int forced; + int flipped; + int inputs; + int outputs; + int nweights; + int nbiases; + int extra; + int truths; + int h, w, c; + int out_h, out_w, out_c; + int n; + int max_boxes; + int groups; + int size; + int side; + int stride; + int reverse; + int flatten; + int spatial; + int pad; + int sqrt; + int flip; + int index; + int binary; + int xnor; + int use_bin_output; + int steps; + int hidden; + int truth; + float smooth; + float dot; + float angle; + float jitter; + float saturation; + float exposure; + float shift; + float ratio; + float learning_rate_scale; + float clip; + int focal_loss; + int noloss; + int softmax; + int classes; + int coords; + int background; + int rescore; + int objectness; + int does_cost; + int joint; + int noadjust; + int reorg; + int log; + int tanh; + int *mask; + int total; + float bflops; + + int adam; + float B1; + float B2; + float eps; + + int t; + + float alpha; + float beta; + float kappa; + + float coord_scale; + float object_scale; + float noobject_scale; + float mask_scale; + float class_scale; + int bias_match; + int random; + float ignore_thresh; + float truth_thresh; + float thresh; + float focus; + int classfix; + int absolute; + + int onlyforward; + int stopbackward; + int dontload; + int dontsave; + int dontloadscales; + int numload; + + float temperature; + float probability; + float scale; + + char * cweights; + int * indexes; + int * input_layers; + int * input_sizes; + int * map; + int * counts; + float ** sums; + float * rand; + float * cost; + float * state; + float * prev_state; + float * forgot_state; + float * forgot_delta; + float * state_delta; + float * combine_cpu; + float * combine_delta_cpu; + + float *concat; + float *concat_delta; + + float *binary_weights; + + float *biases; + float *bias_updates; + + float *scales; + float *scale_updates; + + float *weights; + float *weight_updates; + + char *align_bit_weights_gpu; + float *mean_arr_gpu; + float *align_workspace_gpu; + float *transposed_align_workspace_gpu; + int align_workspace_size; + + char *align_bit_weights; + float *mean_arr; + int align_bit_weights_size; + int lda_align; + int new_lda; + int bit_align; + + float *col_image; + float * delta; + float * output; + float * loss; + float * squared; + float * norms; + + float * spatial_mean; + float * mean; + float * variance; + + float * mean_delta; + float * variance_delta; + + float * rolling_mean; + float * rolling_variance; + + float * x; + float * x_norm; + + float * m; + float * v; + + float * bias_m; + float * bias_v; + float * scale_m; + float * scale_v; + + + float *z_cpu; + float *r_cpu; + float *h_cpu; + float * prev_state_cpu; + + float *temp_cpu; + float *temp2_cpu; + float *temp3_cpu; + + float *dh_cpu; + float *hh_cpu; + float *prev_cell_cpu; + float *cell_cpu; + float *f_cpu; + float *i_cpu; + float *g_cpu; + float *o_cpu; + float *c_cpu; + float *dc_cpu; + + float * binary_input; + + struct layer *input_layer; + struct layer *self_layer; + struct layer *output_layer; + + struct layer *reset_layer; + struct layer *update_layer; + struct layer *state_layer; + + struct layer *input_gate_layer; + struct layer *state_gate_layer; + struct layer *input_save_layer; + struct layer *state_save_layer; + struct layer *input_state_layer; + struct layer *state_state_layer; + + struct layer *input_z_layer; + struct layer *state_z_layer; + + struct layer *input_r_layer; + struct layer *state_r_layer; + + struct layer *input_h_layer; + struct layer *state_h_layer; + + struct layer *wz; + struct layer *uz; + struct layer *wr; + struct layer *ur; + struct layer *wh; + struct layer *uh; + struct layer *uo; + struct layer *wo; + struct layer *uf; + struct layer *wf; + struct layer *ui; + struct layer *wi; + struct layer *ug; + struct layer *wg; + + tree *softmax_tree; + + size_t workspace_size; + +#ifdef GPU + int *indexes_gpu; + + float *z_gpu; + float *r_gpu; + float *h_gpu; + + float *temp_gpu; + float *temp2_gpu; + float *temp3_gpu; + + float *dh_gpu; + float *hh_gpu; + float *prev_cell_gpu; + float *cell_gpu; + float *f_gpu; + float *i_gpu; + float *g_gpu; + float *o_gpu; + float *c_gpu; + float *dc_gpu; + + // adam + float *m_gpu; + float *v_gpu; + float *bias_m_gpu; + float *scale_m_gpu; + float *bias_v_gpu; + float *scale_v_gpu; + + float * combine_gpu; + float * combine_delta_gpu; + + float * prev_state_gpu; + float * forgot_state_gpu; + float * forgot_delta_gpu; + float * state_gpu; + float * state_delta_gpu; + float * gate_gpu; + float * gate_delta_gpu; + float * save_gpu; + float * save_delta_gpu; + float * concat_gpu; + float * concat_delta_gpu; + + float *binary_input_gpu; + float *binary_weights_gpu; + + float * mean_gpu; + float * variance_gpu; + + float * rolling_mean_gpu; + float * rolling_variance_gpu; + + float * variance_delta_gpu; + float * mean_delta_gpu; + + float * col_image_gpu; + + float * x_gpu; + float * x_norm_gpu; + float * weights_gpu; + float * weight_updates_gpu; + float * weight_change_gpu; + + float * weights_gpu16; + float * weight_updates_gpu16; + + float * biases_gpu; + float * bias_updates_gpu; + float * bias_change_gpu; + + float * scales_gpu; + float * scale_updates_gpu; + float * scale_change_gpu; + + float * output_gpu; + float * loss_gpu; + float * delta_gpu; + float * rand_gpu; + float * squared_gpu; + float * norms_gpu; +#ifdef CUDNN + cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; + cudnnTensorDescriptor_t srcTensorDesc16, dstTensorDesc16; + cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; + cudnnTensorDescriptor_t dsrcTensorDesc16, ddstTensorDesc16; + cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16; + cudnnFilterDescriptor_t weightDesc, weightDesc16; + cudnnFilterDescriptor_t dweightDesc, dweightDesc16; + cudnnConvolutionDescriptor_t convDesc; + cudnnConvolutionFwdAlgo_t fw_algo, fw_algo16; + cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16; + cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16; + cudnnPoolingDescriptor_t poolingDesc; +#endif // CUDNN +#endif // GPU +}; + + +// network.h +typedef enum { + CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM +} learning_rate_policy; + +// network.h +typedef struct network { + int n; + int batch; + uint64_t *seen; + int *t; + float epoch; + int subdivisions; + layer *layers; + float *output; + learning_rate_policy policy; + + float learning_rate; + float momentum; + float decay; + float gamma; + float scale; + float power; + int time_steps; + int step; + int max_batches; + float *scales; + int *steps; + int num_steps; + int burn_in; + int cudnn_half; + + int adam; + float B1; + float B2; + float eps; + + int inputs; + int outputs; + int truths; + int notruth; + int h, w, c; + int max_crop; + int min_crop; + float max_ratio; + float min_ratio; + int center; + int flip; // horizontal flip 50% probability augmentaiont for classifier training (default = 1) + float angle; + float aspect; + float exposure; + float saturation; + float hue; + int random; + int small_object; + + int gpu_index; + tree *hierarchy; + + float *input; + float *truth; + float *delta; + float *workspace; + int train; + int index; + float *cost; + float clip; + +#ifdef GPU + //float *input_gpu; + //float *truth_gpu; + float *delta_gpu; + float *output_gpu; + + float *input_state_gpu; + + float **input_gpu; + float **truth_gpu; + float **input16_gpu; + float **output16_gpu; + size_t *max_input16_size; + size_t *max_output16_size; + int wait_stream; +#endif +} network; + +// network.h +typedef struct network_state { + float *truth; + float *input; + float *delta; + float *workspace; + int train; + int index; + network net; +} network_state; + +//typedef struct { +// int w; +// int h; +// float scale; +// float rad; +// float dx; +// float dy; +// float aspect; +//} augment_args; + +// image.h +typedef struct image { + int w; + int h; + int c; + float *data; +} image; + +//typedef struct { +// int w; +// int h; +// int c; +// float *data; +//} image; + +// box.h +typedef struct box { + float x, y, w, h; +} box; + +// box.h +typedef struct detection{ + box bbox; + int classes; + float *prob; + float *mask; + float objectness; + int sort_class; +} detection; + +// matrix.h +typedef struct matrix { + int rows, cols; + float **vals; +} matrix; + +// data.h +typedef struct data { + int w, h; + matrix X; + matrix y; + int shallow; + int *num_boxes; + box **boxes; +} data; + +// data.h +typedef enum { + CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA +} data_type; + +// data.h +typedef struct load_args { + int threads; + char **paths; + char *path; + int n; + int m; + char **labels; + int h; + int w; + int c; // color depth + int out_w; + int out_h; + int nh; + int nw; + int num_boxes; + int min, max, size; + int classes; + int background; + int scale; + int center; + int coords; + int small_object; + float jitter; + int flip; + float angle; + float aspect; + float saturation; + float exposure; + float hue; + data *d; + image *im; + image *resized; + data_type type; + tree *hierarchy; +} load_args; + +// data.h +typedef struct box_label { + int id; + float x, y, w, h; + float left, right, top, bottom; +} box_label; + +// list.h +//typedef struct node { +// void *val; +// struct node *next; +// struct node *prev; +//} node; + +// list.h +//typedef struct list { +// int size; +// node *front; +// node *back; +//} list; + +// ----------------------------------------------------- + + +// parser.c +LIB_API network *load_network(char *cfg, char *weights, int clear); +LIB_API network *load_network_custom(char *cfg, char *weights, int clear, int batch); +LIB_API network *load_network(char *cfg, char *weights, int clear); + +// network.c +LIB_API load_args get_base_args(network *net); + +// box.h +LIB_API void do_nms_sort(detection *dets, int total, int classes, float thresh); +LIB_API void do_nms_obj(detection *dets, int total, int classes, float thresh); + +// network.h +LIB_API float *network_predict(network net, float *input); +LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); +LIB_API void free_detections(detection *dets, int n); +LIB_API void fuse_conv_batchnorm(network net); +LIB_API void calculate_binary_weights(network net); + +LIB_API layer* get_network_layer(network* net, int i); +LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); +LIB_API detection *make_network_boxes(network *net, float thresh, int *num); +LIB_API void reset_rnn(network *net); +LIB_API float *network_predict_image(network *net, image im); +LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, network *existing_net); +LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map); +LIB_API int network_width(network *net); +LIB_API int network_height(network *net); +LIB_API void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm); + +// image.h +LIB_API image resize_image(image im, int w, int h); +LIB_API image letterbox_image(image im, int w, int h); +LIB_API void rgbgr_image(image im); +LIB_API image make_image(int w, int h, int c); +LIB_API image load_image_color(char *filename, int w, int h); +LIB_API void free_image(image m); + +// layer.h +LIB_API void free_layer(layer); + +// data.c +LIB_API void free_data(data d); +LIB_API pthread_t load_data(load_args args); +LIB_API pthread_t load_data_in_thread(load_args args); + +// cuda.h +LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); +LIB_API void cuda_set_device(int n); + +// utils.h +LIB_API void free_ptrs(void **ptrs, int n); +LIB_API void top_k(float *a, int n, int k, int *index); + +// tree.h +LIB_API tree *read_tree(char *filename); + +// option_list.h +LIB_API metadata get_metadata(char *file); + + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // DARKNET_API diff --git a/src/yolo_v2_class.hpp b/include/yolo_v2_class.hpp similarity index 93% rename from src/yolo_v2_class.hpp rename to include/yolo_v2_class.hpp index 199b1c97..74e1c116 100644 --- a/src/yolo_v2_class.hpp +++ b/include/yolo_v2_class.hpp @@ -1,15 +1,15 @@ #pragma once -#ifdef YOLODLL_EXPORTS +#ifdef LIB_EXPORTS #if defined(_MSC_VER) -#define YOLODLL_API __declspec(dllexport) +#define LIB_EXPORTS __declspec(dllexport) #else -#define YOLODLL_API __attribute__((visibility("default"))) +#define LIB_EXPORTS __attribute__((visibility("default"))) #endif #else #if defined(_MSC_VER) -#define YOLODLL_API __declspec(dllimport) +#define LIB_EXPORTS __declspec(dllimport) #else -#define YOLODLL_API +#define LIB_EXPORTS #endif #endif @@ -45,12 +45,12 @@ struct bbox_t_container { #include "opencv2/imgproc/imgproc_c.h" // C #endif // OPENCV -extern "C" YOLODLL_API int init(const char *configurationFilename, const char *weightsFilename, int gpu); -extern "C" YOLODLL_API int detect_image(const char *filename, bbox_t_container &container); -extern "C" YOLODLL_API int detect_mat(const uint8_t* data, const size_t data_length, bbox_t_container &container); -extern "C" YOLODLL_API int dispose(); -extern "C" YOLODLL_API int get_device_count(); -extern "C" YOLODLL_API int get_device_name(int gpu, char* deviceName); +extern "C" LIB_EXPORTS int init(const char *configurationFilename, const char *weightsFilename, int gpu); +extern "C" LIB_EXPORTS int detect_image(const char *filename, bbox_t_container &container); +extern "C" LIB_EXPORTS int detect_mat(const uint8_t* data, const size_t data_length, bbox_t_container &container); +extern "C" LIB_EXPORTS int dispose(); +extern "C" LIB_EXPORTS int get_device_count(); +extern "C" LIB_EXPORTS int get_device_name(int gpu, char* deviceName); class Detector { std::shared_ptr detector_gpu_ptr; @@ -60,18 +60,18 @@ public: float nms = .4; bool wait_stream; - YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); - YOLODLL_API ~Detector(); + LIB_EXPORTS Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0); + LIB_EXPORTS ~Detector(); - YOLODLL_API std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); - YOLODLL_API std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); - static YOLODLL_API image_t load_image(std::string image_filename); - static YOLODLL_API void free_image(image_t m); - YOLODLL_API int get_net_width() const; - YOLODLL_API int get_net_height() const; - YOLODLL_API int get_net_color_depth() const; + LIB_EXPORTS std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); + LIB_EXPORTS std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); + static LIB_EXPORTS image_t load_image(std::string image_filename); + static LIB_EXPORTS void free_image(image_t m); + LIB_EXPORTS int get_net_width() const; + LIB_EXPORTS int get_net_height() const; + LIB_EXPORTS int get_net_color_depth() const; - YOLODLL_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, + LIB_EXPORTS std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, int const frames_story = 10, int const max_dist = 150); std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false) @@ -613,13 +613,13 @@ public: /* // C - wrappers - YOLODLL_API void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id); - YOLODLL_API void delete_detector(); - YOLODLL_API bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size); - YOLODLL_API bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size); - YOLODLL_API bbox_t* detect(image_t img, int *result_size); - YOLODLL_API image_t load_img(char *image_filename); - YOLODLL_API void free_img(image_t m); + LIB_EXPORTS void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id); + LIB_EXPORTS void delete_detector(); + LIB_EXPORTS bbox_t* detect_custom(image_t img, float thresh, bool use_mean, int *result_size); + LIB_EXPORTS bbox_t* detect_resized(image_t img, int init_w, int init_h, float thresh, bool use_mean, int *result_size); + LIB_EXPORTS bbox_t* detect(image_t img, int *result_size); + LIB_EXPORTS image_t load_img(char *image_filename); + LIB_EXPORTS void free_img(image_t m); #ifdef __cplusplus } // extern "C" @@ -628,7 +628,7 @@ static std::shared_ptr c_detector_ptr; static std::vector c_result_vec; void create_detector(char const* cfg_filename, char const* weight_filename, int gpu_id) { - c_detector_ptr = std::make_shared(cfg_filename, weight_filename, gpu_id); + c_detector_ptr = std::make_shared(cfg_filename, weight_filename, gpu_id); } void delete_detector() { c_detector_ptr.reset(); } diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu index 8e2a5479..4285b5f3 100644 --- a/src/activation_kernels.cu +++ b/src/activation_kernels.cu @@ -1,3 +1,4 @@ +#include "darknet.h" #include "cuda_runtime.h" #include "curand.h" #include "cublas_v2.h" @@ -113,39 +114,74 @@ __device__ float activate_kernel(float x, ACTIVATION a) __device__ float gradient_kernel(float x, ACTIVATION a) { - switch(a){ - case LINEAR: - return linear_gradient_kernel(x); - case LOGISTIC: - return logistic_gradient_kernel(x); - case LOGGY: - return loggy_gradient_kernel(x); - case RELU: - return relu_gradient_kernel(x); - case ELU: - return elu_gradient_kernel(x); - case SELU: - return selu_gradient_kernel(x); - case RELIE: - return relie_gradient_kernel(x); - case RAMP: - return ramp_gradient_kernel(x); - case LEAKY: - return leaky_gradient_kernel(x); - case TANH: - return tanh_gradient_kernel(x); - case PLSE: - return plse_gradient_kernel(x); - case STAIR: - return stair_gradient_kernel(x); - case HARDTAN: - return hardtan_gradient_kernel(x); - case LHTAN: - return lhtan_gradient_kernel(x); + switch (a) { + case LINEAR: + return linear_gradient_kernel(x); + case LOGISTIC: + return logistic_gradient_kernel(x); + case LOGGY: + return loggy_gradient_kernel(x); + case RELU: + return relu_gradient_kernel(x); + case ELU: + return elu_gradient_kernel(x); + case SELU: + return selu_gradient_kernel(x); + case RELIE: + return relie_gradient_kernel(x); + case RAMP: + return ramp_gradient_kernel(x); + case LEAKY: + return leaky_gradient_kernel(x); + case TANH: + return tanh_gradient_kernel(x); + case PLSE: + return plse_gradient_kernel(x); + case STAIR: + return stair_gradient_kernel(x); + case HARDTAN: + return hardtan_gradient_kernel(x); + case LHTAN: + return lhtan_gradient_kernel(x); } return 0; } +__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s / 2 + i]; + if (id < n) { + float de = dy[id]; + dx[b*s + i] = x2*de; + dx[b*s + s / 2 + i] = x1*de; + } +} + +extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_gradient_array_kernel << > >(x, dx, n / 2, size, a, y); + check_error(cudaPeekAtLastError()); +} +__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + int i = id % s; + int b = id / s; + float x1 = x[b*s + i]; + float x2 = x[b*s + s / 2 + i]; + if (id < n) y[id] = x1*x2; +} + +extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) +{ + binary_activate_array_kernel << > >(x, n / 2, size, a, y); + check_error(cudaPeekAtLastError()); +} + __global__ void activate_array_kernel(float *x, int n, ACTIVATION a) { int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; diff --git a/src/activations.h b/src/activations.h index 94c7f37a..90930d44 100644 --- a/src/activations.h +++ b/src/activations.h @@ -1,11 +1,12 @@ #ifndef ACTIVATIONS_H #define ACTIVATIONS_H +#include "darknet.h" #include "cuda.h" #include "math.h" -typedef enum{ - LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU -}ACTIVATION; +//typedef enum{ +// LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU +//}ACTIVATION; ACTIVATION get_activation(char *s); diff --git a/src/box.h b/src/box.h index 6f5e7383..0e6fd918 100644 --- a/src/box.h +++ b/src/box.h @@ -1,36 +1,24 @@ #ifndef BOX_H #define BOX_H -#ifdef YOLODLL_EXPORTS -#if defined(_MSC_VER) -#define YOLODLL_API __declspec(dllexport) -#else -#define YOLODLL_API __attribute__((visibility("default"))) -#endif -#else -#if defined(_MSC_VER) -#define YOLODLL_API -#else -#define YOLODLL_API -#endif -#endif +#include "darknet.h" -typedef struct{ - float x, y, w, h; -} box; +//typedef struct{ +// float x, y, w, h; +//} box; typedef struct{ float dx, dy, dw, dh; } dbox; -typedef struct detection { - box bbox; - int classes; - float *prob; - float *mask; - float objectness; - int sort_class; -} detection; +//typedef struct detection { +// box bbox; +// int classes; +// float *prob; +// float *mask; +// float objectness; +// int sort_class; +//} detection; typedef struct detection_with_class { detection det; @@ -45,8 +33,8 @@ float box_rmse(box a, box b); dbox diou(box a, box b); void do_nms(box *boxes, float **probs, int total, int classes, float thresh); void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh); -YOLODLL_API void do_nms_sort(detection *dets, int total, int classes, float thresh); -YOLODLL_API void do_nms_obj(detection *dets, int total, int classes, float thresh); +//LIB_API void do_nms_sort(detection *dets, int total, int classes, float thresh); +//LIB_API void do_nms_obj(detection *dets, int total, int classes, float thresh); box decode_box(box b, box anchor); box encode_box(box b, box anchor); diff --git a/src/cuda.h b/src/cuda.h index 289ee5b4..6aa83550 100644 --- a/src/cuda.h +++ b/src/cuda.h @@ -1,24 +1,11 @@ #ifndef CUDA_H #define CUDA_H +#include "darknet.h" #if defined(_MSC_VER) && _MSC_VER < 1900 #define inline __inline #endif -#ifdef YOLODLL_EXPORTS -#if defined(_MSC_VER) -#define YOLODLL_API __declspec(dllexport) -#else -#define YOLODLL_API __attribute__((visibility("default"))) -#endif -#else -#if defined(_MSC_VER) -#define YOLODLL_API -#else -#define YOLODLL_API -#endif -#endif - extern int gpu_index; #ifdef GPU @@ -42,8 +29,8 @@ extern "C" { int *cuda_make_int_array(size_t n); int *cuda_make_int_array_new_api(int *x, size_t n); void cuda_push_array(float *x_gpu, float *x, size_t n); - YOLODLL_API void cuda_pull_array(float *x_gpu, float *x, size_t n); - YOLODLL_API void cuda_set_device(int n); + //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); + //LIB_API void cuda_set_device(int n); int cuda_get_device(); void cuda_free(float *x_gpu); void cuda_random(float *x_gpu, size_t n); @@ -60,6 +47,6 @@ enum {cudnn_fastest, cudnn_smallest}; #endif #else // GPU -YOLODLL_API void cuda_set_device(int n); +//LIB_API void cuda_set_device(int n); #endif // GPU #endif // CUDA_H diff --git a/src/data.h b/src/data.h index f7ab585c..844680dc 100644 --- a/src/data.h +++ b/src/data.h @@ -6,6 +6,7 @@ #define inline __inline #endif +#include "darknet.h" #include "matrix.h" #include "list.h" #include "image.h" @@ -22,19 +23,19 @@ static inline float distance_from_edge(int x, int max) return dist; } -typedef struct{ - int w, h; - matrix X; - matrix y; - int shallow; - int *num_boxes; - box **boxes; -} data; - -typedef enum { - CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, LETTERBOX_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA -} data_type; +//typedef struct{ +// int w, h; +// matrix X; +// matrix y; +// int shallow; +// int *num_boxes; +// box **boxes; +//} data; +//typedef enum { +// CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, LETTERBOX_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA +//} data_type; +/* typedef struct load_args{ int threads; char **paths; @@ -80,7 +81,7 @@ void free_data(data d); pthread_t load_data(load_args args); pthread_t load_data_in_thread(load_args args); - +*/ void print_letters(float *pred, int n); data load_data_captcha(char **paths, int n, int m, int k, int w, int h); data load_data_captcha_encode(char **paths, int n, int m, int w, int h); diff --git a/src/http_stream.h b/src/http_stream.h index b7702250..04c7516b 100644 --- a/src/http_stream.h +++ b/src/http_stream.h @@ -1,6 +1,7 @@ #pragma once #ifndef HTTP_STREAM_H #define HTTP_STREAM_H +#include "darknet.h" #ifdef OPENCV #include "opencv2/highgui/highgui_c.h" diff --git a/src/image.c b/src/image.c index 1fcdd5c0..0a97b83e 100644 --- a/src/image.c +++ b/src/image.c @@ -1,3 +1,4 @@ +#include "darknet.h" #include "image.h" #include "utils.h" #include "blas.h" @@ -1246,13 +1247,34 @@ void save_image_png(image im, const char *name) if(!success) fprintf(stderr, "Failed to write image %s\n", buff); } +void save_image_options(image im, const char *name, IMTYPE f, int quality) +{ + char buff[256]; + //sprintf(buff, "%s (%d)", name, windows); + if (f == PNG) sprintf(buff, "%s.png", name); + else if (f == BMP) sprintf(buff, "%s.bmp", name); + else if (f == TGA) sprintf(buff, "%s.tga", name); + else if (f == JPG) sprintf(buff, "%s.jpg", name); + else sprintf(buff, "%s.png", name); + unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char)); + int i, k; + for (k = 0; k < im.c; ++k) { + for (i = 0; i < im.w*im.h; ++i) { + data[i*im.c + k] = (unsigned char)(255 * im.data[i + k*im.w*im.h]); + } + } + int success = 0; + if (f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); + else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); + else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); + else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); + free(data); + if (!success) fprintf(stderr, "Failed to write image %s\n", buff); +} + void save_image(image im, const char *name) { -#ifdef OPENCV - save_image_jpg(im, name); -#else - save_image_png(im, name); -#endif + save_image_options(im, name, JPG, 80); } diff --git a/src/image.h b/src/image.h index 84b12249..15731333 100644 --- a/src/image.h +++ b/src/image.h @@ -1,5 +1,6 @@ #ifndef IMAGE_H #define IMAGE_H +#include "darknet.h" #include #include @@ -8,13 +9,14 @@ #include #include "box.h" +/* typedef struct { - int w; - int h; + int w; + int h; int c; float *data; } image; - +*/ float get_color(int c, int x, int max); void flip_image(image a); void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); @@ -30,10 +32,10 @@ image crop_image(image im, int dx, int dy, int w, int h); image random_crop_image(image im, int w, int h); image random_augment_image(image im, float angle, float aspect, int low, int high, int size); void random_distort_image(image im, float hue, float saturation, float exposure); -YOLODLL_API image resize_image(image im, int w, int h); +//LIB_EXPORTS image resize_image(image im, int w, int h); void fill_image(image m, float s); void letterbox_image_into(image im, int w, int h, image boxed); -YOLODLL_API image letterbox_image(image im, int w, int h); +//LIB_EXPORTS image letterbox_image(image im, int w, int h); image resize_min(image im, int min); image resize_max(image im, int max); void translate_image(image m, float s); @@ -46,7 +48,7 @@ void exposure_image(image im, float sat); void distort_image(image im, float hue, float sat, float val); void saturate_exposure_image(image im, float sat, float exposure); void hsv_to_rgb(image im); -YOLODLL_API void rgbgr_image(image im); +//LIB_EXPORTS void rgbgr_image(image im); void constrain_image(image im); void composite_3d(char *f1, char *f2, char *out, int delta); int best_3d_shift_r(image a, image b, int min, int max); @@ -68,13 +70,13 @@ void show_image_collapsed(image p, char *name); void print_image(image m); -YOLODLL_API image make_image(int w, int h, int c); +//LIB_EXPORTS image make_image(int w, int h, int c); image make_random_image(int w, int h, int c); image make_empty_image(int w, int h, int c); image float_to_image(int w, int h, int c, float *data); image copy_image(image p); image load_image(char *filename, int w, int h, int c); -YOLODLL_API image load_image_color(char *filename, int w, int h); +//LIB_EXPORTS image load_image_color(char *filename, int w, int h); image **load_alphabet(); //float get_pixel(image m, int x, int y, int c); @@ -85,7 +87,7 @@ float bilinear_interpolate(image im, float x, float y, int c); image get_image_layer(image m, int l); -YOLODLL_API void free_image(image m); +//LIB_EXPORTS void free_image(image m); void test_resize(char *filename); #endif diff --git a/src/layer.h b/src/layer.h index cc78de0d..692df88c 100644 --- a/src/layer.h +++ b/src/layer.h @@ -1,61 +1,63 @@ #ifndef BASE_LAYER_H #define BASE_LAYER_H +#include "darknet.h" #include "activations.h" #include "stddef.h" #include "tree.h" -struct network_state; - -struct layer; -typedef struct layer layer; - -typedef enum { - CONVOLUTIONAL, - DECONVOLUTIONAL, - CONNECTED, - MAXPOOL, - SOFTMAX, - DETECTION, - DROPOUT, - CROP, - ROUTE, - COST, - NORMALIZATION, - AVGPOOL, - LOCAL, - SHORTCUT, - ACTIVE, - RNN, - GRU, - CRNN, - BATCHNORM, - NETWORK, - XNOR, - REGION, - YOLO, - REORG, - UPSAMPLE, - REORG_OLD, - BLANK -} LAYER_TYPE; - -typedef enum{ - SSE, MASKED, SMOOTH -} COST_TYPE; - -typedef struct { - int batch; - float learning_rate; - float momentum; - float decay; - int adam; - float B1; - float B2; - float eps; - int t; -} update_args; - +//struct network_state; + +//struct layer; +//typedef struct layer layer; + +//typedef enum { +// CONVOLUTIONAL, +// DECONVOLUTIONAL, +// CONNECTED, +// MAXPOOL, +// SOFTMAX, +// DETECTION, +// DROPOUT, +// CROP, +// ROUTE, +// COST, +// NORMALIZATION, +// AVGPOOL, +// LOCAL, +// SHORTCUT, +// ACTIVE, +// RNN, +// GRU, +// CRNN, +// BATCHNORM, +// NETWORK, +// XNOR, +// REGION, +// YOLO, +// REORG, +// UPSAMPLE, +// REORG_OLD, +// BLANK +//} LAYER_TYPE; + +//typedef enum{ +// SSE, MASKED, SMOOTH +//} COST_TYPE; + +//typedef struct { +// int batch; +// float learning_rate; +// float momentum; +// float decay; +// int adam; +// float B1; +// float B2; +// float eps; +// int t; +//} update_args; + +/* struct layer{ LAYER_TYPE type; ACTIVATION activation; @@ -325,7 +327,7 @@ struct layer{ #endif // CUDNN #endif // GPU }; - -void free_layer(layer); +*/ +//void free_layer(layer); #endif diff --git a/src/matrix.h b/src/matrix.h index 1226de2b..a56ae8e0 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -1,9 +1,11 @@ #ifndef MATRIX_H #define MATRIX_H -typedef struct matrix{ - int rows, cols; - float **vals; -} matrix; +#include "darknet.h" + +//typedef struct matrix{ +// int rows, cols; +// float **vals; +//} matrix; typedef struct { int *assignments; diff --git a/src/network.c b/src/network.c index cf0d9351..306918bb 100644 --- a/src/network.c +++ b/src/network.c @@ -1,6 +1,7 @@ #include #include #include +#include "darknet.h" #include "network.h" #include "image.h" #include "data.h" @@ -31,21 +32,22 @@ #include "upsample_layer.h" #include "parser.h" -network *load_network_custom(char *cfg, char *weights, int clear, int batch) -{ - printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); - network *net = calloc(1, sizeof(network)); - *net = parse_network_cfg_custom(cfg, batch); - if (weights && weights[0] != 0) { - load_weights(net, weights); - } - if (clear) (*net->seen) = 0; - return net; -} - -network *load_network(char *cfg, char *weights, int clear) -{ - return load_network_custom(cfg, weights, clear, 0); +load_args get_base_args(network *net) +{ + load_args args = { 0 }; + args.w = net->w; + args.h = net->h; + args.size = net->w; + + args.min = net->min_crop; + args.max = net->max_crop; + args.angle = net->angle; + args.aspect = net->aspect; + args.exposure = net->exposure; + args.center = net->center; + args.saturation = net->saturation; + args.hue = net->hue; + return args; } int get_current_batch(network net) diff --git a/src/network.h b/src/network.h index 0cb381fb..a825a047 100644 --- a/src/network.h +++ b/src/network.h @@ -1,6 +1,7 @@ // Oh boy, why am I about to do this.... #ifndef NETWORK_H #define NETWORK_H +#include "darknet.h" #include #include "layer.h" @@ -13,6 +14,7 @@ extern "C" { #include "data.h" #include "tree.h" +/* typedef enum { CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM } learning_rate_policy; @@ -77,6 +79,7 @@ typedef struct network{ #endif } network; + typedef struct network_state { float *truth; float *input; @@ -86,6 +89,7 @@ typedef struct network_state { int index; network net; } network_state; +*/ #ifdef GPU float train_networks(network *nets, int n, data d, int interval); @@ -117,7 +121,7 @@ float train_network_sgd(network net, data d, int n); float train_network_datum(network net, float *x, float *y); matrix network_predict_data(network net, data test); -YOLODLL_API float *network_predict(network net, float *input); +//LIB_API float *network_predict(network net, float *input); float network_accuracy(network net, data d); float *network_accuracies(network net, data d, int n); float network_accuracy_multi(network net, data d, int n); @@ -137,25 +141,24 @@ int resize_network(network *net, int w, int h); void set_batch_network(network *net, int b); int get_network_input_size(network net); float get_network_cost(network net); -YOLODLL_API layer* get_network_layer(network* net, int i); -YOLODLL_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); -YOLODLL_API detection *make_network_boxes(network *net, float thresh, int *num); -YOLODLL_API void free_detections(detection *dets, int n); -YOLODLL_API void reset_rnn(network *net); -YOLODLL_API network *load_network_custom(char *cfg, char *weights, int clear, int batch); -YOLODLL_API network *load_network(char *cfg, char *weights, int clear); -YOLODLL_API float *network_predict_image(network *net, image im); -YOLODLL_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, network *existing_net); -YOLODLL_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map); -YOLODLL_API int network_width(network *net); -YOLODLL_API int network_height(network *net); - -YOLODLL_API void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm); +//LIB_API layer* get_network_layer(network* net, int i); +//LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); +//LIB_API detection *make_network_boxes(network *net, float thresh, int *num); +//LIB_API void free_detections(detection *dets, int n); +//LIB_API void reset_rnn(network *net); +//LIB_API network *load_network_custom(char *cfg, char *weights, int clear, int batch); +//LIB_API network *load_network(char *cfg, char *weights, int clear); +//LIB_API float *network_predict_image(network *net, image im); +//LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, network *existing_net); +//LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map); +//LIB_API int network_width(network *net); +//LIB_API int network_height(network *net); +//LIB_API void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm); int get_network_nuisance(network net); int get_network_background(network net); -YOLODLL_API void fuse_conv_batchnorm(network net); -YOLODLL_API void calculate_binary_weights(network net); +//LIB_API void fuse_conv_batchnorm(network net); +//LIB_API void calculate_binary_weights(network net); network combine_train_valid_networks(network net_train, network net_map); #ifdef __cplusplus diff --git a/src/option_list.h b/src/option_list.h index 29ac9431..cb860671 100644 --- a/src/option_list.h +++ b/src/option_list.h @@ -1,20 +1,7 @@ #ifndef OPTION_LIST_H #define OPTION_LIST_H #include "list.h" - -#ifdef YOLODLL_EXPORTS -#if defined(_MSC_VER) -#define YOLODLL_API __declspec(dllexport) -#else -#define YOLODLL_API __attribute__((visibility("default"))) -#endif -#else -#if defined(_MSC_VER) -#define YOLODLL_API -#else -#define YOLODLL_API -#endif -#endif +#include "darknet.h" typedef struct{ char *key; @@ -34,11 +21,11 @@ float option_find_float(list *l, char *key, float def); float option_find_float_quiet(list *l, char *key, float def); void option_unused(list *l); -typedef struct { - int classes; - char **names; -} metadata; +//typedef struct { +// int classes; +// char **names; +//} metadata; -YOLODLL_API metadata get_metadata(char *file); +//LIB_API metadata get_metadata(char *file); #endif diff --git a/src/parser.c b/src/parser.c index e381edc1..12b4bd41 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1249,3 +1249,28 @@ void load_weights(network *net, char *filename) load_weights_upto(net, filename, net->n); } +// load network & force - set batch size +network *load_network_custom(char *cfg, char *weights, int clear, int batch) +{ + printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); + network *net = calloc(1, sizeof(network)); + *net = parse_network_cfg_custom(cfg, batch); + if (weights && weights[0] != 0) { + load_weights(net, weights); + } + if (clear) (*net->seen) = 0; + return net; +} + +// load network & get batch size from cfg-file +network *load_network(char *cfg, char *weights, int clear) +{ + printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); + network *net = calloc(1, sizeof(network)); + *net = parse_network_cfg(cfg); + if (weights && weights[0] != 0) { + load_weights(net, weights); + } + if (clear) (*net->seen) = 0; + return net; +} \ No newline at end of file diff --git a/src/tree.h b/src/tree.h index 6983adf5..b44d3f4a 100644 --- a/src/tree.h +++ b/src/tree.h @@ -1,20 +1,21 @@ #ifndef TREE_H #define TREE_H +#include "darknet.h" -typedef struct{ - int *leaf; - int n; - int *parent; - int *child; - int *group; - char **name; +//typedef struct{ +// int *leaf; +// int n; +// int *parent; +// int *child; +// int *group; +// char **name; +// +// int groups; +// int *group_size; +// int *group_offset; +//} tree; - int groups; - int *group_size; - int *group_offset; -} tree; - -tree *read_tree(char *filename); +//tree *read_tree(char *filename); int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves); void change_leaves(tree *t, char *leaf_list); diff --git a/src/utils.h b/src/utils.h index ba61d344..5c87dda5 100644 --- a/src/utils.h +++ b/src/utils.h @@ -11,25 +11,27 @@ #define SECRET_NUM -1234 #define TWO_PI 6.2831853071795864769252866 -#ifdef YOLODLL_EXPORTS +#ifdef LIB_EXPORTS #if defined(_MSC_VER) -#define YOLODLL_API __declspec(dllexport) +#define LIB_API __declspec(dllexport) #else -#define YOLODLL_API __attribute__((visibility("default"))) +#define LIB_API __attribute__((visibility("default"))) #endif #else #if defined(_MSC_VER) -#define YOLODLL_API +#define LIB_API #else -#define YOLODLL_API +#define LIB_API #endif #endif +LIB_API void free_ptrs(void **ptrs, int n); +LIB_API void top_k(float *a, int n, int k, int *index); + double what_time_is_it_now(); int *read_map(char *filename); void shuffle(void *arr, size_t n, size_t size); void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); -YOLODLL_API void free_ptrs(void **ptrs, int n); char *basecfg(char *cfgfile); int alphanum_to_int(char c); char int_to_alphanum(int i); @@ -47,7 +49,6 @@ void file_error(char *s); void strip(char *s); void strip_args(char *s); void strip_char(char *s, char bad); -YOLODLL_API void top_k(float *a, int n, int k, int *index); list *split_str(char *s, char delim); char *fgetl(FILE *fp); list *parse_csv_line(char *line); diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index 8fcf9358..a8710600 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -25,13 +25,13 @@ extern "C" { //static Detector* detector = NULL; static std::unique_ptr detector; -int init(const char *configurationFilename, const char *weightsFilename, int gpu) +int init(const char *configurationFilename, const char *weightsFilename, int gpu) { detector.reset(new Detector(configurationFilename, weightsFilename, gpu)); return 1; } -int detect_image(const char *filename, bbox_t_container &container) +int detect_image(const char *filename, bbox_t_container &container) { std::vector detection = detector->detect(filename); for (size_t i = 0; i < detection.size() && i < C_SHARP_MAX_OBJECTS; ++i) @@ -100,7 +100,7 @@ struct detector_gpu_t { unsigned int *track_id; }; -YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id) +LIB_EXPORTS Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id) : cur_gpu_id(gpu_id) { wait_stream = 0; int old_gpu_index; @@ -119,7 +119,7 @@ YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_file network &net = detector_gpu.net; net.gpu_index = cur_gpu_id; //gpu_index = i; - + char *cfgfile = const_cast(cfg_filename.data()); char *weightfile = const_cast(weight_filename.data()); @@ -147,7 +147,7 @@ YOLODLL_API Detector::Detector(std::string cfg_filename, std::string weight_file } -YOLODLL_API Detector::~Detector() +LIB_EXPORTS Detector::~Detector() { detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); layer l = detector_gpu.net.layers[detector_gpu.net.n - 1]; @@ -171,21 +171,21 @@ YOLODLL_API Detector::~Detector() #endif } -YOLODLL_API int Detector::get_net_width() const { +LIB_EXPORTS int Detector::get_net_width() const { detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); return detector_gpu.net.w; } -YOLODLL_API int Detector::get_net_height() const { +LIB_EXPORTS int Detector::get_net_height() const { detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); return detector_gpu.net.h; } -YOLODLL_API int Detector::get_net_color_depth() const { +LIB_EXPORTS int Detector::get_net_color_depth() const { detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); return detector_gpu.net.c; } -YOLODLL_API std::vector Detector::detect(std::string image_filename, float thresh, bool use_mean) +LIB_EXPORTS std::vector Detector::detect(std::string image_filename, float thresh, bool use_mean) { std::shared_ptr image_ptr(new image_t, [](image_t *img) { if (img->data) free(img->data); delete img; }); *image_ptr = load_image(image_filename); @@ -196,7 +196,7 @@ static image load_image_stb(char *filename, int channels) { int w, h, c; unsigned char *data = stbi_load(filename, &w, &h, &c, channels); - if (!data) + if (!data) throw std::runtime_error("file not found"); if (channels) c = channels; int i, j, k; @@ -214,7 +214,7 @@ static image load_image_stb(char *filename, int channels) return im; } -YOLODLL_API image_t Detector::load_image(std::string image_filename) +LIB_EXPORTS image_t Detector::load_image(std::string image_filename) { char *input = const_cast(image_filename.data()); image im = load_image_stb(input, 3); @@ -229,14 +229,14 @@ YOLODLL_API image_t Detector::load_image(std::string image_filename) } -YOLODLL_API void Detector::free_image(image_t m) +LIB_EXPORTS void Detector::free_image(image_t m) { if (m.data) { free(m.data); } } -YOLODLL_API std::vector Detector::detect(image_t img, float thresh, bool use_mean) +LIB_EXPORTS std::vector Detector::detect(image_t img, float thresh, bool use_mean) { detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); network &net = detector_gpu.net; @@ -259,7 +259,7 @@ YOLODLL_API std::vector Detector::detect(image_t img, float thresh, bool im.w = img.w; image sized; - + if (net.w == im.w && net.h == im.h) { sized = make_image(im.w, im.h, im.c); memcpy(sized.data, im.data, im.w*im.h*im.c * sizeof(float)); @@ -294,8 +294,8 @@ YOLODLL_API std::vector Detector::detect(image_t img, float thresh, bool box b = dets[i].bbox; int const obj_id = max_index(dets[i].prob, l.classes); float const prob = dets[i].prob[obj_id]; - - if (prob > thresh) + + if (prob > thresh) { bbox_t bbox; bbox.x = std::max((double)0, (b.x - b.w / 2.)*im.w); @@ -322,7 +322,7 @@ YOLODLL_API std::vector Detector::detect(image_t img, float thresh, bool return bbox_vec; } -YOLODLL_API std::vector Detector::tracking_id(std::vector cur_bbox_vec, bool const change_history, +LIB_EXPORTS std::vector Detector::tracking_id(std::vector cur_bbox_vec, bool const change_history, int const frames_story, int const max_dist) { detector_gpu_t &det_gpu = *static_cast(detector_gpu_ptr.get()); @@ -357,7 +357,7 @@ YOLODLL_API std::vector Detector::tracking_id(std::vector cur_bb } } - bool track_id_absent = !std::any_of(cur_bbox_vec.begin(), cur_bbox_vec.end(), + bool track_id_absent = !std::any_of(cur_bbox_vec.begin(), cur_bbox_vec.end(), [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); if (cur_index >= 0 && track_id_absent){