It takes into account the Difficult for calculating mAP for PascalVOC

pull/398/head
AlexeyAB 7 years ago
parent a1af57d8d6
commit 65bff2683b
  1. 4
      build/darknet/x64/calc_mAP_voc_py.cmd
  2. 1
      build/darknet/x64/data/voc.data
  3. 56
      scripts/voc_label_difficult.py
  4. 82
      src/blas.c
  5. 2
      src/blas.h
  6. 2
      src/detection_layer.c
  7. 46
      src/detector.c
  8. 2
      src/region_layer.c
  9. 4
      src/softmax_layer.c

@ -3,9 +3,9 @@ rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install cPi
rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install _pickle
darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights
rem darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights
rem darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights
darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights
reval_voc_py3.py --year 2007 --classes data\voc.names --image_set test --voc_dir E:\VOC2007_2012\VOCtrainval_11-May-2012\VOCdevkit results

@ -1,6 +1,7 @@
classes= 20
train = data/voc/train.txt
valid = data/voc/2007_test.txt
#difficult = data/voc/difficult_2007_test.txt
names = data/voc.names
backup = backup/

@ -0,0 +1,56 @@
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets=[('2012', 'val'),('2007', 'test')]
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(year, image_id):
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/difficult_%s.txt'%(year, image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 0:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for year, image_set in sets:
if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
list_file = open('difficult_%s_%s.txt'%(year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/difficult_%s.jpg\n'%(wd, year, image_id))
convert_annotation(year, image_id)
list_file.close()

@ -1,5 +1,6 @@
#include "blas.h"
#include "math.h"
#include <math.h>
#include <assert.h>
#include <float.h>
#include <stdio.h>
@ -54,6 +55,16 @@ void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
}
}
void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc)
{
int i;
for(i = 0; i < n; ++i){
if(da) da[i] += dc[i] * s[i];
if(db) db[i] += dc[i] * (1-s[i]);
ds[i] += dc[i] * (a[i] - b[i]);
}
}
void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
{
int stride = w1/w2;
@ -161,12 +172,48 @@ void fill_cpu(int N, float ALPHA, float *X, int INCX)
for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
}
void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
int i, j;
int index = 0;
for(j = 0; j < B; ++j) {
for(i = 0; i < NX; ++i){
if(X) X[j*NX + i] += OUT[index];
++index;
}
for(i = 0; i < NY; ++i){
if(Y) Y[j*NY + i] += OUT[index];
++index;
}
}
}
void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
{
int i, j;
int index = 0;
for(j = 0; j < B; ++j) {
for(i = 0; i < NX; ++i){
OUT[index++] = X[j*NX + i];
}
for(i = 0; i < NY; ++i){
OUT[index++] = Y[j*NY + i];
}
}
}
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
{
int i;
for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
}
void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
{
int i;
for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i];
}
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
int i;
@ -179,8 +226,18 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
}
else {
error[i] = 2*abs_val - 1;
delta[i] = (diff < 0) ? -1 : 1;
delta[i] = (diff < 0) ? 1 : -1;
}
}
}
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
int i;
for(i = 0; i < n; ++i){
float diff = truth[i] - pred[i];
error[i] = fabs(diff);
delta[i] = diff > 0 ? 1 : -1;
}
}
@ -202,21 +259,32 @@ float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
return dot;
}
void softmax(float *input, int n, float temp, float *output)
void softmax(float *input, int n, float temp, float *output, int stride)
{
int i;
float sum = 0;
float largest = -FLT_MAX;
for(i = 0; i < n; ++i){
if(input[i] > largest) largest = input[i];
if(input[i*stride] > largest) largest = input[i*stride];
}
for(i = 0; i < n; ++i){
float e = exp(input[i]/temp - largest/temp);
float e = exp(input[i*stride]/temp - largest/temp);
sum += e;
output[i] = e;
output[i*stride] = e;
}
for(i = 0; i < n; ++i){
output[i] /= sum;
output[i*stride] /= sum;
}
}
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
{
int g, b;
for(b = 0; b < batch; ++b){
for(g = 0; g < groups; ++g){
softmax(input + b*batch_offset + g*group_offset, n, temp, output + b*batch_offset + g*group_offset, stride);
}
}
}

@ -35,7 +35,7 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
void softmax(float *input, int n, float temp, float *output);
void softmax(float *input, int n, float temp, float *output, int stride);
#ifdef GPU
#include "cuda.h"

@ -59,7 +59,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
for (i = 0; i < locations; ++i) {
int offset = i*l.classes;
softmax(l.output + index + offset, l.classes, 1,
l.output + index + offset);
l.output + index + offset, 1);
}
}
}

@ -499,9 +499,9 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
{
int j;
list *options = read_data_cfg(datacfg);
char *valid_images = option_find_str(options, "valid", "data/train.list");
char *valid_images = option_find_str(options, "valid", "data/train.txt");
char *difficult_valid_images = option_find_str(options, "difficult", NULL);
char *name_list = option_find_str(options, "names", "data/names.list");
//char *prefix = option_find_str(options, "results", "results");
char **names = get_labels(name_list);
char *mapf = option_find_str(options, "map", 0);
int *map = 0;
@ -515,10 +515,16 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
srand(time(0));
char *base = "comp4_det_test_";
list *plist = get_paths(valid_images);
char **paths = (char **)list_to_array(plist);
char **paths_dif = NULL;
if (difficult_valid_images) {
list *plist_dif = get_paths(difficult_valid_images);
paths_dif = (char **)list_to_array(plist_dif);
}
layer l = net.layers[net.n - 1];
int classes = l.classes;
@ -574,7 +580,7 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
}
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
const int image_index = i + t - nthreads;
char *path = paths[i + t - nthreads];
char *path = paths[image_index];
char *id = basecfg(path);
float *X = val_resized[t].data;
network_predict(net, X);
@ -594,6 +600,22 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
truth_classes_count[truth[j].id]++;
}
// difficult
box_label *truth_dif = NULL;
int num_labels_dif = 0;
if (paths_dif)
{
char *path_dif = paths_dif[image_index];
char labelpath_dif[4096];
find_replace(path_dif, "images", "labels", labelpath_dif);
find_replace(labelpath_dif, "JPEGImages", "labels", labelpath_dif);
find_replace(labelpath_dif, ".jpg", ".txt", labelpath_dif);
find_replace(labelpath_dif, ".JPEG", ".txt", labelpath_dif);
find_replace(labelpath_dif, ".png", ".txt", labelpath_dif);
truth_dif = read_boxes(labelpath_dif, &num_labels_dif);
}
for (i = 0; i < (l.w*l.h*l.n); ++i) {
int class_id;
@ -606,6 +628,8 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
detections[detections_count - 1].p = prob;
detections[detections_count - 1].image_index = image_index;
detections[detections_count - 1].class_id = class_id;
detections[detections_count - 1].truth_flag = 0;
detections[detections_count - 1].unique_truth_index = -1;
int truth_index = -1;
float max_iou = 0;
@ -617,16 +641,27 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
float current_iou = box_iou(boxes[i], t);
if (current_iou > iou_thresh && class_id == truth[j].id) {
if (current_iou > max_iou) {
current_iou = max_iou;
max_iou = current_iou;
truth_index = unique_truth_index + j;
}
}
}
// best IoU
if (truth_index > -1) {
detections[detections_count - 1].truth_flag = 1;
detections[detections_count - 1].unique_truth_index = truth_index;
}
else {
// if object is difficult then remove detection
for (j = 0; j < num_labels_dif; ++j) {
box t = { truth_dif[j].x, truth_dif[j].y, truth_dif[j].w, truth_dif[j].h };
float current_iou = box_iou(boxes[i], t);
if (current_iou > iou_thresh && class_id == truth_dif[j].id) {
--detections_count;
}
}
}
}
}
}
@ -685,7 +720,6 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
pr[d.class_id][rank].fp++; // false-positive
}
for (i = 0; i < classes; ++i)
{
const int tp = pr[i][rank].tp;

@ -170,7 +170,7 @@ void forward_region_layer(const region_layer l, network_state state)
for (b = 0; b < l.batch; ++b){
for(i = 0; i < l.h*l.w*l.n; ++i){
int index = size*i + b*l.outputs;
softmax(l.output + index + 5, l.classes, 1, l.output + index + 5);
softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1);
}
}
}

@ -40,7 +40,7 @@ void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarc
int count = 0;
for(i = 0; i < hierarchy->groups; ++i){
int group_size = hierarchy->group_size[i];
softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count);
softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count, 1);
count += group_size;
}
}
@ -55,7 +55,7 @@ void forward_softmax_layer(const softmax_layer l, network_state state)
softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
} else {
for(b = 0; b < batch; ++b){
softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs, 1);
}
}
}

Loading…
Cancel
Save