mirror of https://github.com/AlexeyAB/darknet.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
320 lines
9.3 KiB
320 lines
9.3 KiB
from ctypes import * |
|
import math |
|
import random |
|
import os |
|
import cv2 |
|
import numpy as np |
|
import time |
|
|
|
|
|
def sample(probs): |
|
s = sum(probs) |
|
probs = [a/s for a in probs] |
|
r = random.uniform(0, 1) |
|
for i in range(len(probs)): |
|
r = r - probs[i] |
|
if r <= 0: |
|
return i |
|
return len(probs)-1 |
|
|
|
|
|
def c_array(ctype, values): |
|
arr = (ctype*len(values))() |
|
arr[:] = values |
|
return arr |
|
|
|
|
|
class BOX(Structure): |
|
_fields_ = [("x", c_float), |
|
("y", c_float), |
|
("w", c_float), |
|
("h", c_float)] |
|
|
|
|
|
class DETECTION(Structure): |
|
_fields_ = [("bbox", BOX), |
|
("classes", c_int), |
|
("prob", POINTER(c_float)), |
|
("mask", POINTER(c_float)), |
|
("objectness", c_float), |
|
("sort_class", c_int)] |
|
|
|
|
|
class IMAGE(Structure): |
|
_fields_ = [("w", c_int), |
|
("h", c_int), |
|
("c", c_int), |
|
("data", POINTER(c_float))] |
|
|
|
|
|
class METADATA(Structure): |
|
_fields_ = [("classes", c_int), |
|
("names", POINTER(c_char_p))] |
|
|
|
|
|
hasGPU = True |
|
|
|
lib = CDLL("./libdarknet.so", RTLD_GLOBAL) |
|
lib.network_width.argtypes = [c_void_p] |
|
lib.network_width.restype = c_int |
|
lib.network_height.argtypes = [c_void_p] |
|
lib.network_height.restype = c_int |
|
|
|
predict = lib.network_predict |
|
predict.argtypes = [c_void_p, POINTER(c_float)] |
|
predict.restype = POINTER(c_float) |
|
|
|
if hasGPU: |
|
set_gpu = lib.cuda_set_device |
|
set_gpu.argtypes = [c_int] |
|
|
|
make_image = lib.make_image |
|
make_image.argtypes = [c_int, c_int, c_int] |
|
make_image.restype = IMAGE |
|
|
|
get_network_boxes = lib.get_network_boxes |
|
get_network_boxes.argtypes = \ |
|
[c_void_p, c_int, c_int, c_float, c_float, POINTER( |
|
c_int), c_int, POINTER(c_int), c_int] |
|
get_network_boxes.restype = POINTER(DETECTION) |
|
|
|
make_network_boxes = lib.make_network_boxes |
|
make_network_boxes.argtypes = [c_void_p] |
|
make_network_boxes.restype = POINTER(DETECTION) |
|
|
|
free_detections = lib.free_detections |
|
free_detections.argtypes = [POINTER(DETECTION), c_int] |
|
|
|
free_ptrs = lib.free_ptrs |
|
free_ptrs.argtypes = [POINTER(c_void_p), c_int] |
|
|
|
network_predict = lib.network_predict |
|
network_predict.argtypes = [c_void_p, POINTER(c_float)] |
|
|
|
reset_rnn = lib.reset_rnn |
|
reset_rnn.argtypes = [c_void_p] |
|
|
|
load_net = lib.load_network |
|
load_net.argtypes = [c_char_p, c_char_p, c_int] |
|
load_net.restype = c_void_p |
|
|
|
load_net_custom = lib.load_network_custom |
|
load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int] |
|
load_net_custom.restype = c_void_p |
|
|
|
do_nms_obj = lib.do_nms_obj |
|
do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] |
|
|
|
do_nms_sort = lib.do_nms_sort |
|
do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] |
|
|
|
free_image = lib.free_image |
|
free_image.argtypes = [IMAGE] |
|
|
|
letterbox_image = lib.letterbox_image |
|
letterbox_image.argtypes = [IMAGE, c_int, c_int] |
|
letterbox_image.restype = IMAGE |
|
|
|
load_meta = lib.get_metadata |
|
lib.get_metadata.argtypes = [c_char_p] |
|
lib.get_metadata.restype = METADATA |
|
|
|
load_image = lib.load_image_color |
|
load_image.argtypes = [c_char_p, c_int, c_int] |
|
load_image.restype = IMAGE |
|
|
|
rgbgr_image = lib.rgbgr_image |
|
rgbgr_image.argtypes = [IMAGE] |
|
|
|
predict_image = lib.network_predict_image |
|
predict_image.argtypes = [c_void_p, IMAGE] |
|
predict_image.restype = POINTER(c_float) |
|
|
|
|
|
def array_to_image(arr): |
|
import numpy as np |
|
arr = arr.transpose(2, 0, 1) |
|
c = arr.shape[0] |
|
h = arr.shape[1] |
|
w = arr.shape[2] |
|
arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0 |
|
data = arr.ctypes.data_as(POINTER(c_float)) |
|
im = IMAGE(w, h, c, data) |
|
return im, arr |
|
|
|
|
|
def classify(net, meta, im): |
|
out = predict_image(net, im) |
|
res = [] |
|
for i in range(meta.classes): |
|
if altNames is None: |
|
nameTag = meta.names[i] |
|
else: |
|
nameTag = altNames[i] |
|
res.append((nameTag, out[i])) |
|
res = sorted(res, key=lambda x: -x[1]) |
|
return res |
|
|
|
|
|
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45, debug=False): |
|
im, arr = array_to_image(image) |
|
if debug: |
|
print("Loaded image") |
|
num = c_int(0) |
|
if debug: |
|
print("Assigned num") |
|
pnum = pointer(num) |
|
if debug: |
|
print("Assigned pnum") |
|
predict_image(net, im) |
|
if debug: |
|
print("did prediction") |
|
# dets = get_network_boxes( |
|
# net, image.shape[1], image.shape[0], |
|
# thresh, hier_thresh, |
|
# None, 0, pnum, 0) # OpenCV |
|
dets = get_network_boxes(net, im.w, im.h, |
|
thresh, hier_thresh, None, 0, pnum, 0) |
|
if debug: |
|
print("Got dets") |
|
num = pnum[0] |
|
if debug: |
|
print("got zeroth index of pnum") |
|
if nms: |
|
do_nms_sort(dets, num, meta.classes, nms) |
|
if debug: |
|
print("did sort") |
|
res = [] |
|
if debug: |
|
print("about to range") |
|
for j in range(num): |
|
if debug: |
|
print("Ranging on "+str(j)+" of "+str(num)) |
|
if debug: |
|
print("Classes: "+str(meta), meta.classes, meta.names) |
|
for i in range(meta.classes): |
|
if debug: |
|
print("Class-ranging on "+str(i)+" of " + |
|
str(meta.classes)+"= "+str(dets[j].prob[i])) |
|
if dets[j].prob[i] > 0: |
|
b = dets[j].bbox |
|
if altNames is None: |
|
nameTag = meta.names[i] |
|
else: |
|
nameTag = altNames[i] |
|
if debug: |
|
print("Got bbox", b) |
|
print(nameTag) |
|
print(dets[j].prob[i]) |
|
print((b.x, b.y, b.w, b.h)) |
|
res.append((nameTag, dets[j].prob[i], (b.x, b.y, b.w, b.h))) |
|
if debug: |
|
print("did range") |
|
res = sorted(res, key=lambda x: -x[1]) |
|
if debug: |
|
print("did sort") |
|
# free_image(im) |
|
if debug: |
|
print("freed image") |
|
free_detections(dets, num) |
|
if debug: |
|
print("freed detections") |
|
return res |
|
|
|
|
|
def convertBack(x, y, w, h): |
|
xmin = int(round(x - (w / 2))) |
|
xmax = int(round(x + (w / 2))) |
|
ymin = int(round(y - (h / 2))) |
|
ymax = int(round(y + (h / 2))) |
|
return xmin, ymin, xmax, ymax |
|
|
|
|
|
def cvDrawBoxes(detections, img): |
|
for detection in detections: |
|
x, y, w, h = detection[2][0],\ |
|
detection[2][1],\ |
|
detection[2][2],\ |
|
detection[2][3] |
|
xmin, ymin, xmax, ymax = convertBack( |
|
float(x), float(y), float(w), float(h)) |
|
pt1 = (xmin, ymin) |
|
pt2 = (xmax, ymax) |
|
cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2) |
|
cv2.putText(img, |
|
detection[0].decode() + |
|
" [" + str(round(detection[1] * 100, 2)) + "]", |
|
(pt1[0], pt1[1] + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, |
|
[0, 255, 0], 4) |
|
return img |
|
|
|
|
|
netMain = None |
|
metaMain = None |
|
altNames = None |
|
|
|
|
|
def YOLO(): |
|
global metaMain, netMain, altNames |
|
configPath = "./cfg/yolov3.cfg" |
|
weightPath = "./yolov3.weights" |
|
metaPath = "./cfg/coco.data" |
|
if not os.path.exists(configPath): |
|
raise ValueError("Invalid config path `" + |
|
os.path.abspath(configPath)+"`") |
|
if not os.path.exists(weightPath): |
|
raise ValueError("Invalid weight path `" + |
|
os.path.abspath(weightPath)+"`") |
|
if not os.path.exists(metaPath): |
|
raise ValueError("Invalid data file path `" + |
|
os.path.abspath(metaPath)+"`") |
|
if netMain is None: |
|
netMain = load_net_custom(configPath.encode( |
|
"ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1 |
|
if metaMain is None: |
|
metaMain = load_meta(metaPath.encode("ascii")) |
|
if altNames is None: |
|
try: |
|
with open(metaPath) as metaFH: |
|
metaContents = metaFH.read() |
|
import re |
|
match = re.search("names *= *(.*)$", metaContents, |
|
re.IGNORECASE | re.MULTILINE) |
|
if match: |
|
result = match.group(1) |
|
else: |
|
result = None |
|
try: |
|
if os.path.exists(result): |
|
with open(result) as namesFH: |
|
namesList = namesFH.read().strip().split("\n") |
|
altNames = [x.strip() for x in namesList] |
|
except TypeError: |
|
pass |
|
except Exception: |
|
pass |
|
#cap = cv2.VideoCapture(0) |
|
cap = cv2.VideoCapture("test.mp4") |
|
cap.set(3, 1280) |
|
cap.set(4, 720) |
|
out = cv2.VideoWriter( |
|
"output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0, |
|
(lib.network_width(netMain), lib.network_height(netMain))) |
|
print("Starting the YOLO loop...") |
|
while True: |
|
prev_time = time.time() |
|
ret, frame_read = cap.read() |
|
frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) |
|
frame_resized = cv2.resize(frame_rgb, |
|
(lib.network_width(netMain), |
|
lib.network_height(netMain)), |
|
interpolation=cv2.INTER_LINEAR) |
|
detections = detect(netMain, metaMain, frame_resized, thresh=0.25) |
|
image = cvDrawBoxes(detections, frame_resized) |
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
print(1/(time.time()-prev_time)) |
|
cap.release() |
|
out.release() |
|
|
|
if __name__ == "__main__": |
|
YOLO()
|
|
|