diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj
index 3557200a..b8e4c08b 100644
--- a/build/darknet/darknet.vcxproj
+++ b/build/darknet/darknet.vcxproj
@@ -52,7 +52,7 @@
-
+
@@ -288,6 +288,6 @@
-
+
\ No newline at end of file
diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj
index 5d6576ce..bb8cace6 100644
--- a/build/darknet/yolo_cpp_dll.vcxproj
+++ b/build/darknet/yolo_cpp_dll.vcxproj
@@ -52,7 +52,7 @@
-
+
@@ -291,6 +291,6 @@
-
+
\ No newline at end of file
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index feaffa0c..d7220010 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -695,17 +695,29 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float
adam_gpu(size, layer.weights_gpu, layer.m_gpu, layer.v_gpu, layer.B1, layer.B2, learning_rate/batch, layer.eps, layer.t+1);
fill_ongpu(size, 0, layer.weight_updates_gpu, 1);
}else{
- // update weights:
- // weights_gpu = weights_gpu*(1 - decay*lr) + weight_updates_gpu*lr / (batch*subdivision) =
- // weights_gpu*(1 - 0.0005*0.001) + weight_updates_gpu*0.001/(64*8) =
- // weights_gpu * 0.999 999 5 + weight_updates_gpu * 0.000 001 953125
- //
- // weight_updates_gpu = (weight_updates_gpu - weights_gpu*decay*batch*subdivision)*momentum =
- // (weight_updates_gpu - weights_gpu * 0.0005 * 64 * 8) * 0.9 =
- // weight_updates_gpu*0.9 - weights_gpu*0.2304
- axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1);
- axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
- scal_ongpu(size, momentum, layer.weight_updates_gpu, 1);
+ axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); // wu = wu - w*decay*batch
+ axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); // w = w + wu*lr/batch
+ scal_ongpu(size, momentum, layer.weight_updates_gpu, 1); // wu = wu*momentum // wu = (wu - w*decay*batch)*momentum
+ // w = w + (wu - w*decay*batch)*lr/batch = w + wu*lr/batch - w*decay*lr = w*(1-decay*lr) + wu*lr/batch
+ //wu_prev = (wu_old - w_old*decay*batch)*momentum
+
+
+ //weights_update = weights_update_new + (weights_update_old - weights_old*decay*batch)*momentum - weights_new*decay*batch =
+ // = weights_update_new + weights_update_old*momentum - weights_old*decay*batch*momentum - weights_new*decay*batch
+ // = weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch
+
+ //------------- RESULT --------------
+ // weights_update = weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch
+ //-----------------------------------
+
+ // weights_newest = weights_new + (weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch)*lr/batch
+ // = weights_new + weights_update_new*lr/batch + weights_update_old*momentum*lr/batch - weights_old*momentum*decay*batch*lr/batch - weights_new*decay*batch*lr/batch
+ // = weights_new + weights_update_new*lr/batch + weights_update_old*momentum*lr/batch - weights_old*momentum*decay*lr - weights_new*decay*lr
+ // = weights_new*(1 - decay*lr) - weights_old*momentum*decay*lr + (weights_update_new + weights_update_old*momentum)*lr/batch
+
+ //------------- RESULT --------------
+ // weights_newest = weights_new*(1 - decay*lr) - weights_old*momentum*(decay*lr) + (weights_update_new + weights_update_old*momentum)*lr/batch
+ //-----------------------------------
}
}
diff --git a/src/image.c b/src/image.c
index 8566a4f8..155afba9 100644
--- a/src/image.c
+++ b/src/image.c
@@ -705,6 +705,9 @@ IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_li
}
}
cvPutText(img, "Iteration number", cvPoint(draw_size / 2, img_size - 10), &font, CV_RGB(0, 0, 0));
+ char max_batches_buff[100];
+ sprintf(max_batches_buff, "in cfg max_batches=%d", max_batches);
+ cvPutText(img, max_batches_buff, cvPoint(draw_size - 195, img_size - 10), &font, CV_RGB(0, 0, 0));
cvPutText(img, "Press 's' to save: chart.jpg", cvPoint(5, img_size - 10), &font, CV_RGB(0, 0, 0));
printf(" If error occurs - run training with flag: -dont_show \n");
cvNamedWindow("average loss", CV_WINDOW_NORMAL);
@@ -728,15 +731,20 @@ void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_
if (pt1.y < 0) pt1.y = 1;
cvCircle(img, pt1, 1, CV_RGB(0, 0, 255), CV_FILLED, 8, 0);
- sprintf(char_buff, "current avg loss = %2.4f", avg_loss);
+ sprintf(char_buff, "current avg loss = %2.4f iteration = %d", avg_loss, current_batch);
pt1.x = img_size / 2, pt1.y = 30;
- pt2.x = pt1.x + 250, pt2.y = pt1.y + 20;
+ pt2.x = pt1.x + 460, pt2.y = pt1.y + 20;
cvRectangle(img, pt1, pt2, CV_RGB(255, 255, 255), CV_FILLED, 8, 0);
pt1.y += 15;
cvPutText(img, char_buff, pt1, &font, CV_RGB(0, 0, 0));
cvShowImage("average loss", img);
int k = cvWaitKey(20);
- if (k == 's' || current_batch == (max_batches-1)) cvSaveImage("chart.jpg", img, 0);
+ if (k == 's' || current_batch == (max_batches - 1)) {
+ cvSaveImage("chart.jpg", img, 0);
+ cvPutText(img, "- Saved", cvPoint(250, img_size - 10), &font, CV_RGB(255, 0, 0));
+ }
+ else
+ cvPutText(img, "- Saved", cvPoint(250, img_size - 10), &font, CV_RGB(255, 255, 255));
}
#endif // OPENCV