diff --git a/build/darknet/x64/calc_mAP_voc_py.cmd b/build/darknet/x64/calc_mAP_voc_py.cmd
index 0267600c..8c5ba3cf 100644
--- a/build/darknet/x64/calc_mAP_voc_py.cmd
+++ b/build/darknet/x64/calc_mAP_voc_py.cmd
@@ -3,9 +3,9 @@ rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install cPi
 rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install _pickle
 
 
-darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights
+rem darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights
 
-rem darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights
+darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights
 
 
 reval_voc_py3.py --year 2007 --classes data\voc.names --image_set test --voc_dir E:\VOC2007_2012\VOCtrainval_11-May-2012\VOCdevkit results
diff --git a/build/darknet/x64/data/voc.data b/build/darknet/x64/data/voc.data
index 0a8524c2..63d1a91d 100644
--- a/build/darknet/x64/data/voc.data
+++ b/build/darknet/x64/data/voc.data
@@ -1,6 +1,7 @@
 classes= 20
 train  = data/voc/train.txt
 valid  = data/voc/2007_test.txt
+#difficult = data/voc/difficult_2007_test.txt
 names = data/voc.names
 backup = backup/
 
diff --git a/scripts/voc_label_difficult.py b/scripts/voc_label_difficult.py
new file mode 100644
index 00000000..93e57fe7
--- /dev/null
+++ b/scripts/voc_label_difficult.py
@@ -0,0 +1,56 @@
+import xml.etree.ElementTree as ET
+import pickle
+import os
+from os import listdir, getcwd
+from os.path import join
+
+sets=[('2012', 'val'),('2007', 'test')]
+
+classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
+
+
+def convert(size, box):
+    dw = 1./size[0]
+    dh = 1./size[1]
+    x = (box[0] + box[1])/2.0
+    y = (box[2] + box[3])/2.0
+    w = box[1] - box[0]
+    h = box[3] - box[2]
+    x = x*dw
+    w = w*dw
+    y = y*dh
+    h = h*dh
+    return (x,y,w,h)
+
+def convert_annotation(year, image_id):
+    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
+    out_file = open('VOCdevkit/VOC%s/labels/difficult_%s.txt'%(year, image_id), 'w')
+    tree=ET.parse(in_file)
+    root = tree.getroot()
+    size = root.find('size')
+    w = int(size.find('width').text)
+    h = int(size.find('height').text)
+
+    for obj in root.iter('object'):
+        difficult = obj.find('difficult').text
+        cls = obj.find('name').text
+        if cls not in classes or int(difficult) == 0:
+            continue
+        cls_id = classes.index(cls)
+        xmlbox = obj.find('bndbox')
+        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
+        bb = convert((w,h), b)
+        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
+
+wd = getcwd()
+
+for year, image_set in sets:
+    if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
+        os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
+    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
+    list_file = open('difficult_%s_%s.txt'%(year, image_set), 'w')
+    for image_id in image_ids:
+        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/difficult_%s.jpg\n'%(wd, year, image_id))
+        convert_annotation(year, image_id)
+    list_file.close()
+
diff --git a/src/blas.c b/src/blas.c
index 31bd86b2..cb6501fd 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -1,5 +1,6 @@
 #include "blas.h"
-#include "math.h"
+
+#include <math.h>
 #include <assert.h>
 #include <float.h>
 #include <stdio.h>
@@ -54,6 +55,16 @@ void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
     }
 }
 
+void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc)
+{
+    int i;
+    for(i = 0; i < n; ++i){
+        if(da) da[i] += dc[i] * s[i];
+        if(db) db[i] += dc[i] * (1-s[i]);
+        ds[i] += dc[i] * (a[i] - b[i]);
+    }
+}
+
 void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
 {
     int stride = w1/w2;
@@ -161,12 +172,48 @@ void fill_cpu(int N, float ALPHA, float *X, int INCX)
     for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
 }
 
+void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+{
+    int i, j;
+    int index = 0;
+    for(j = 0; j < B; ++j) {
+        for(i = 0; i < NX; ++i){
+            if(X) X[j*NX + i] += OUT[index];
+            ++index;
+        }
+        for(i = 0; i < NY; ++i){
+            if(Y) Y[j*NY + i] += OUT[index];
+            ++index;
+        }
+    }
+}
+
+void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+{
+    int i, j;
+    int index = 0;
+    for(j = 0; j < B; ++j) {
+        for(i = 0; i < NX; ++i){
+            OUT[index++] = X[j*NX + i];
+        }
+        for(i = 0; i < NY; ++i){
+            OUT[index++] = Y[j*NY + i];
+        }
+    }
+}
+
 void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
 {
     int i;
     for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
 }
 
+void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
+{
+    int i;
+    for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i];
+}
+
 void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
 {
     int i;
@@ -179,11 +226,21 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
         }
         else {
             error[i] = 2*abs_val - 1;
-            delta[i] = (diff < 0) ? -1 : 1;
+            delta[i] = (diff < 0) ? 1 : -1;
         }
     }
 }
 
+void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
+{
+    int i;
+    for(i = 0; i < n; ++i){
+        float diff = truth[i] - pred[i];
+        error[i] = fabs(diff);
+        delta[i] = diff > 0 ? 1 : -1;
+    }
+}
+
 void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
 {
     int i;
@@ -202,21 +259,32 @@ float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
     return dot;
 }
 
-void softmax(float *input, int n, float temp, float *output)
+void softmax(float *input, int n, float temp, float *output, int stride)
 {
     int i;
     float sum = 0;
     float largest = -FLT_MAX;
     for(i = 0; i < n; ++i){
-        if(input[i] > largest) largest = input[i];
+        if(input[i*stride] > largest) largest = input[i*stride];
     }
     for(i = 0; i < n; ++i){
-        float e = exp(input[i]/temp - largest/temp);
+        float e = exp(input[i*stride]/temp - largest/temp);
         sum += e;
-        output[i] = e;
+        output[i*stride] = e;
     }
     for(i = 0; i < n; ++i){
-        output[i] /= sum;
+        output[i*stride] /= sum;
+    }
+}
+
+
+void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
+{
+    int g, b;
+    for(b = 0; b < batch; ++b){
+        for(g = 0; g < groups; ++g){
+            softmax(input + b*batch_offset + g*group_offset, n, temp, output + b*batch_offset + g*group_offset, stride);
+        }
     }
 }
 
diff --git a/src/blas.h b/src/blas.h
index 3d6ee7d3..a5b82eca 100644
--- a/src/blas.h
+++ b/src/blas.h
@@ -35,7 +35,7 @@ void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
 void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
 void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
 
-void softmax(float *input, int n, float temp, float *output);
+void softmax(float *input, int n, float temp, float *output, int stride);
 
 #ifdef GPU
 #include "cuda.h"
diff --git a/src/detection_layer.c b/src/detection_layer.c
index cd98b4b4..fd5a4198 100644
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@@ -59,7 +59,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
             for (i = 0; i < locations; ++i) {
                 int offset = i*l.classes;
                 softmax(l.output + index + offset, l.classes, 1,
-                        l.output + index + offset);
+                        l.output + index + offset, 1);
             }
         }
     }
diff --git a/src/detector.c b/src/detector.c
index 3111c193..ce259fd6 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -499,9 +499,9 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
 {
 	int j;
 	list *options = read_data_cfg(datacfg);
-	char *valid_images = option_find_str(options, "valid", "data/train.list");
+	char *valid_images = option_find_str(options, "valid", "data/train.txt");
+	char *difficult_valid_images = option_find_str(options, "difficult", NULL);
 	char *name_list = option_find_str(options, "names", "data/names.list");
-	//char *prefix = option_find_str(options, "results", "results");
 	char **names = get_labels(name_list);
 	char *mapf = option_find_str(options, "map", 0);
 	int *map = 0;
@@ -515,10 +515,16 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
 	fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
 	srand(time(0));
 
-	char *base = "comp4_det_test_";
 	list *plist = get_paths(valid_images);
 	char **paths = (char **)list_to_array(plist);
 
+	char **paths_dif = NULL;
+	if (difficult_valid_images) {
+		list *plist_dif = get_paths(difficult_valid_images);
+		paths_dif = (char **)list_to_array(plist_dif);
+	}
+	
+
 	layer l = net.layers[net.n - 1];
 	int classes = l.classes;
 
@@ -574,7 +580,7 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
 		}
 		for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
 			const int image_index = i + t - nthreads;
-			char *path = paths[i + t - nthreads];
+			char *path = paths[image_index];
 			char *id = basecfg(path);
 			float *X = val_resized[t].data;
 			network_predict(net, X);
@@ -594,6 +600,22 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
 				truth_classes_count[truth[j].id]++;
 			}
 
+			// difficult
+			box_label *truth_dif = NULL;
+			int num_labels_dif = 0;
+			if (paths_dif)
+			{
+				char *path_dif = paths_dif[image_index];
+
+				char labelpath_dif[4096];
+				find_replace(path_dif, "images", "labels", labelpath_dif);
+				find_replace(labelpath_dif, "JPEGImages", "labels", labelpath_dif);
+				find_replace(labelpath_dif, ".jpg", ".txt", labelpath_dif);
+				find_replace(labelpath_dif, ".JPEG", ".txt", labelpath_dif);
+				find_replace(labelpath_dif, ".png", ".txt", labelpath_dif);				
+				truth_dif = read_boxes(labelpath_dif, &num_labels_dif);
+			}
+
 			for (i = 0; i < (l.w*l.h*l.n); ++i) {
 
 				int class_id;
@@ -606,6 +628,8 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
 						detections[detections_count - 1].p = prob;
 						detections[detections_count - 1].image_index = image_index;
 						detections[detections_count - 1].class_id = class_id;
+						detections[detections_count - 1].truth_flag = 0;
+						detections[detections_count - 1].unique_truth_index = -1;
 
 						int truth_index = -1;
 						float max_iou = 0;
@@ -617,16 +641,27 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
 							float current_iou = box_iou(boxes[i], t);
 							if (current_iou > iou_thresh && class_id == truth[j].id) {
 								if (current_iou > max_iou) {
-									current_iou = max_iou;
+									max_iou = current_iou;
 									truth_index = unique_truth_index + j;
 								}
 							}
 						}
+
 						// best IoU
 						if (truth_index > -1) {
 							detections[detections_count - 1].truth_flag = 1;
 							detections[detections_count - 1].unique_truth_index = truth_index;
 						}
+						else {
+							// if object is difficult then remove detection
+							for (j = 0; j < num_labels_dif; ++j) {
+								box t = { truth_dif[j].x, truth_dif[j].y, truth_dif[j].w, truth_dif[j].h };
+								float current_iou = box_iou(boxes[i], t);
+								if (current_iou > iou_thresh && class_id == truth_dif[j].id) {
+									--detections_count;
+								}
+							}
+						}
 					}
 				}
 			}
@@ -685,7 +720,6 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile)
 			pr[d.class_id][rank].fp++;	// false-positive
 		}
 
-
 		for (i = 0; i < classes; ++i) 
 		{
 			const int tp = pr[i][rank].tp;
diff --git a/src/region_layer.c b/src/region_layer.c
index f7eaef61..d48e8d06 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -170,7 +170,7 @@ void forward_region_layer(const region_layer l, network_state state)
         for (b = 0; b < l.batch; ++b){
             for(i = 0; i < l.h*l.w*l.n; ++i){
                 int index = size*i + b*l.outputs;
-                softmax(l.output + index + 5, l.classes, 1, l.output + index + 5);
+                softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1);
             }
         }
     }
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 5d153148..27f73fdd 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -40,7 +40,7 @@ void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarc
         int count = 0;
         for(i = 0; i < hierarchy->groups; ++i){
             int group_size = hierarchy->group_size[i];
-            softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count);
+            softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count, 1);
             count += group_size;
         }
     }
@@ -55,7 +55,7 @@ void forward_softmax_layer(const softmax_layer l, network_state state)
         softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
     } else {
         for(b = 0; b < batch; ++b){
-            softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
+            softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs, 1);
         }
     }
 }