diff --git a/build/darknet/x64/calc_mAP_voc_py.cmd b/build/darknet/x64/calc_mAP_voc_py.cmd index 0267600c..8c5ba3cf 100644 --- a/build/darknet/x64/calc_mAP_voc_py.cmd +++ b/build/darknet/x64/calc_mAP_voc_py.cmd @@ -3,9 +3,9 @@ rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install cPi rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install _pickle -darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights +rem darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights -rem darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights +darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights reval_voc_py3.py --year 2007 --classes data\voc.names --image_set test --voc_dir E:\VOC2007_2012\VOCtrainval_11-May-2012\VOCdevkit results diff --git a/build/darknet/x64/data/voc.data b/build/darknet/x64/data/voc.data index 0a8524c2..63d1a91d 100644 --- a/build/darknet/x64/data/voc.data +++ b/build/darknet/x64/data/voc.data @@ -1,6 +1,7 @@ classes= 20 train = data/voc/train.txt valid = data/voc/2007_test.txt +#difficult = data/voc/difficult_2007_test.txt names = data/voc.names backup = backup/ diff --git a/scripts/voc_label_difficult.py b/scripts/voc_label_difficult.py new file mode 100644 index 00000000..93e57fe7 --- /dev/null +++ b/scripts/voc_label_difficult.py @@ -0,0 +1,56 @@ +import xml.etree.ElementTree as ET +import pickle +import os +from os import listdir, getcwd +from os.path import join + +sets=[('2012', 'val'),('2007', 'test')] + +classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + + +def convert(size, box): + dw = 1./size[0] + dh = 1./size[1] + x = (box[0] + box[1])/2.0 + y = (box[2] + box[3])/2.0 + w = box[1] - box[0] + h = box[3] - box[2] + x = x*dw + w = w*dw + y = y*dh + h = h*dh + return (x,y,w,h) + +def convert_annotation(year, image_id): + in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) + out_file = open('VOCdevkit/VOC%s/labels/difficult_%s.txt'%(year, image_id), 'w') + tree=ET.parse(in_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + for obj in root.iter('object'): + difficult = obj.find('difficult').text + cls = obj.find('name').text + if cls not in classes or int(difficult) == 0: + continue + cls_id = classes.index(cls) + xmlbox = obj.find('bndbox') + b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) + bb = convert((w,h), b) + out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') + +wd = getcwd() + +for year, image_set in sets: + if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): + os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) + image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() + list_file = open('difficult_%s_%s.txt'%(year, image_set), 'w') + for image_id in image_ids: + list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/difficult_%s.jpg\n'%(wd, year, image_id)) + convert_annotation(year, image_id) + list_file.close() + diff --git a/src/blas.c b/src/blas.c index 31bd86b2..cb6501fd 100644 --- a/src/blas.c +++ b/src/blas.c @@ -1,5 +1,6 @@ #include "blas.h" -#include "math.h" + +#include #include #include #include @@ -54,6 +55,16 @@ void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) } } +void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) +{ + int i; + for(i = 0; i < n; ++i){ + if(da) da[i] += dc[i] * s[i]; + if(db) db[i] += dc[i] * (1-s[i]); + ds[i] += dc[i] * (a[i] - b[i]); + } +} + void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) { int stride = w1/w2; @@ -161,12 +172,48 @@ void fill_cpu(int N, float ALPHA, float *X, int INCX) for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; } +void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + if(X) X[j*NX + i] += OUT[index]; + ++index; + } + for(i = 0; i < NY; ++i){ + if(Y) Y[j*NY + i] += OUT[index]; + ++index; + } + } +} + +void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) +{ + int i, j; + int index = 0; + for(j = 0; j < B; ++j) { + for(i = 0; i < NX; ++i){ + OUT[index++] = X[j*NX + i]; + } + for(i = 0; i < NY; ++i){ + OUT[index++] = Y[j*NY + i]; + } + } +} + void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) { int i; for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; } +void mult_add_into_cpu(int N, float *X, float *Y, float *Z) +{ + int i; + for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; +} + void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) { int i; @@ -179,11 +226,21 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) } else { error[i] = 2*abs_val - 1; - delta[i] = (diff < 0) ? -1 : 1; + delta[i] = (diff < 0) ? 1 : -1; } } } +void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) +{ + int i; + for(i = 0; i < n; ++i){ + float diff = truth[i] - pred[i]; + error[i] = fabs(diff); + delta[i] = diff > 0 ? 1 : -1; + } +} + void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) { int i; @@ -202,21 +259,32 @@ float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) return dot; } -void softmax(float *input, int n, float temp, float *output) +void softmax(float *input, int n, float temp, float *output, int stride) { int i; float sum = 0; float largest = -FLT_MAX; for(i = 0; i < n; ++i){ - if(input[i] > largest) largest = input[i]; + if(input[i*stride] > largest) largest = input[i*stride]; } for(i = 0; i < n; ++i){ - float e = exp(input[i]/temp - largest/temp); + float e = exp(input[i*stride]/temp - largest/temp); sum += e; - output[i] = e; + output[i*stride] = e; } for(i = 0; i < n; ++i){ - output[i] /= sum; + output[i*stride] /= sum; + } +} + + +void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) +{ + int g, b; + for(b = 0; b < batch; ++b){ + for(g = 0; g < groups; ++g){ + softmax(input + b*batch_offset + g*group_offset, n, temp, output + b*batch_offset + g*group_offset, stride); + } } } diff --git a/src/blas.h b/src/blas.h index 3d6ee7d3..a5b82eca 100644 --- a/src/blas.h +++ b/src/blas.h @@ -35,7 +35,7 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); -void softmax(float *input, int n, float temp, float *output); +void softmax(float *input, int n, float temp, float *output, int stride); #ifdef GPU #include "cuda.h" diff --git a/src/detection_layer.c b/src/detection_layer.c index cd98b4b4..fd5a4198 100644 --- a/src/detection_layer.c +++ b/src/detection_layer.c @@ -59,7 +59,7 @@ void forward_detection_layer(const detection_layer l, network_state state) for (i = 0; i < locations; ++i) { int offset = i*l.classes; softmax(l.output + index + offset, l.classes, 1, - l.output + index + offset); + l.output + index + offset, 1); } } } diff --git a/src/detector.c b/src/detector.c index 3111c193..ce259fd6 100644 --- a/src/detector.c +++ b/src/detector.c @@ -499,9 +499,9 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile) { int j; list *options = read_data_cfg(datacfg); - char *valid_images = option_find_str(options, "valid", "data/train.list"); + char *valid_images = option_find_str(options, "valid", "data/train.txt"); + char *difficult_valid_images = option_find_str(options, "difficult", NULL); char *name_list = option_find_str(options, "names", "data/names.list"); - //char *prefix = option_find_str(options, "results", "results"); char **names = get_labels(name_list); char *mapf = option_find_str(options, "map", 0); int *map = 0; @@ -515,10 +515,16 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile) fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); srand(time(0)); - char *base = "comp4_det_test_"; list *plist = get_paths(valid_images); char **paths = (char **)list_to_array(plist); + char **paths_dif = NULL; + if (difficult_valid_images) { + list *plist_dif = get_paths(difficult_valid_images); + paths_dif = (char **)list_to_array(plist_dif); + } + + layer l = net.layers[net.n - 1]; int classes = l.classes; @@ -574,7 +580,7 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile) } for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { const int image_index = i + t - nthreads; - char *path = paths[i + t - nthreads]; + char *path = paths[image_index]; char *id = basecfg(path); float *X = val_resized[t].data; network_predict(net, X); @@ -594,6 +600,22 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile) truth_classes_count[truth[j].id]++; } + // difficult + box_label *truth_dif = NULL; + int num_labels_dif = 0; + if (paths_dif) + { + char *path_dif = paths_dif[image_index]; + + char labelpath_dif[4096]; + find_replace(path_dif, "images", "labels", labelpath_dif); + find_replace(labelpath_dif, "JPEGImages", "labels", labelpath_dif); + find_replace(labelpath_dif, ".jpg", ".txt", labelpath_dif); + find_replace(labelpath_dif, ".JPEG", ".txt", labelpath_dif); + find_replace(labelpath_dif, ".png", ".txt", labelpath_dif); + truth_dif = read_boxes(labelpath_dif, &num_labels_dif); + } + for (i = 0; i < (l.w*l.h*l.n); ++i) { int class_id; @@ -606,6 +628,8 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile) detections[detections_count - 1].p = prob; detections[detections_count - 1].image_index = image_index; detections[detections_count - 1].class_id = class_id; + detections[detections_count - 1].truth_flag = 0; + detections[detections_count - 1].unique_truth_index = -1; int truth_index = -1; float max_iou = 0; @@ -617,16 +641,27 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile) float current_iou = box_iou(boxes[i], t); if (current_iou > iou_thresh && class_id == truth[j].id) { if (current_iou > max_iou) { - current_iou = max_iou; + max_iou = current_iou; truth_index = unique_truth_index + j; } } } + // best IoU if (truth_index > -1) { detections[detections_count - 1].truth_flag = 1; detections[detections_count - 1].unique_truth_index = truth_index; } + else { + // if object is difficult then remove detection + for (j = 0; j < num_labels_dif; ++j) { + box t = { truth_dif[j].x, truth_dif[j].y, truth_dif[j].w, truth_dif[j].h }; + float current_iou = box_iou(boxes[i], t); + if (current_iou > iou_thresh && class_id == truth_dif[j].id) { + --detections_count; + } + } + } } } } @@ -685,7 +720,6 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile) pr[d.class_id][rank].fp++; // false-positive } - for (i = 0; i < classes; ++i) { const int tp = pr[i][rank].tp; diff --git a/src/region_layer.c b/src/region_layer.c index f7eaef61..d48e8d06 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -170,7 +170,7 @@ void forward_region_layer(const region_layer l, network_state state) for (b = 0; b < l.batch; ++b){ for(i = 0; i < l.h*l.w*l.n; ++i){ int index = size*i + b*l.outputs; - softmax(l.output + index + 5, l.classes, 1, l.output + index + 5); + softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1); } } } diff --git a/src/softmax_layer.c b/src/softmax_layer.c index 5d153148..27f73fdd 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -40,7 +40,7 @@ void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarc int count = 0; for(i = 0; i < hierarchy->groups; ++i){ int group_size = hierarchy->group_size[i]; - softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count); + softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count, 1); count += group_size; } } @@ -55,7 +55,7 @@ void forward_softmax_layer(const softmax_layer l, network_state state) softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output); } else { for(b = 0; b < batch; ++b){ - softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs); + softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs, 1); } } }