diff --git a/build/darknet/darknet.vcxproj b/build/darknet/darknet.vcxproj index 3557200a..b8e4c08b 100644 --- a/build/darknet/darknet.vcxproj +++ b/build/darknet/darknet.vcxproj @@ -52,7 +52,7 @@ - + @@ -288,6 +288,6 @@ - + \ No newline at end of file diff --git a/build/darknet/yolo_cpp_dll.vcxproj b/build/darknet/yolo_cpp_dll.vcxproj index 5d6576ce..bb8cace6 100644 --- a/build/darknet/yolo_cpp_dll.vcxproj +++ b/build/darknet/yolo_cpp_dll.vcxproj @@ -52,7 +52,7 @@ - + @@ -291,6 +291,6 @@ - + \ No newline at end of file diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index feaffa0c..d7220010 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -695,17 +695,29 @@ void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float adam_gpu(size, layer.weights_gpu, layer.m_gpu, layer.v_gpu, layer.B1, layer.B2, learning_rate/batch, layer.eps, layer.t+1); fill_ongpu(size, 0, layer.weight_updates_gpu, 1); }else{ - // update weights: - // weights_gpu = weights_gpu*(1 - decay*lr) + weight_updates_gpu*lr / (batch*subdivision) = - // weights_gpu*(1 - 0.0005*0.001) + weight_updates_gpu*0.001/(64*8) = - // weights_gpu * 0.999 999 5 + weight_updates_gpu * 0.000 001 953125 - // - // weight_updates_gpu = (weight_updates_gpu - weights_gpu*decay*batch*subdivision)*momentum = - // (weight_updates_gpu - weights_gpu * 0.0005 * 64 * 8) * 0.9 = - // weight_updates_gpu*0.9 - weights_gpu*0.2304 - axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); - axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); - scal_ongpu(size, momentum, layer.weight_updates_gpu, 1); + axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); // wu = wu - w*decay*batch + axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); // w = w + wu*lr/batch + scal_ongpu(size, momentum, layer.weight_updates_gpu, 1); // wu = wu*momentum // wu = (wu - w*decay*batch)*momentum + // w = w + (wu - w*decay*batch)*lr/batch = w + wu*lr/batch - w*decay*lr = w*(1-decay*lr) + wu*lr/batch + //wu_prev = (wu_old - w_old*decay*batch)*momentum + + + //weights_update = weights_update_new + (weights_update_old - weights_old*decay*batch)*momentum - weights_new*decay*batch = + // = weights_update_new + weights_update_old*momentum - weights_old*decay*batch*momentum - weights_new*decay*batch + // = weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch + + //------------- RESULT -------------- + // weights_update = weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch + //----------------------------------- + + // weights_newest = weights_new + (weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch)*lr/batch + // = weights_new + weights_update_new*lr/batch + weights_update_old*momentum*lr/batch - weights_old*momentum*decay*batch*lr/batch - weights_new*decay*batch*lr/batch + // = weights_new + weights_update_new*lr/batch + weights_update_old*momentum*lr/batch - weights_old*momentum*decay*lr - weights_new*decay*lr + // = weights_new*(1 - decay*lr) - weights_old*momentum*decay*lr + (weights_update_new + weights_update_old*momentum)*lr/batch + + //------------- RESULT -------------- + // weights_newest = weights_new*(1 - decay*lr) - weights_old*momentum*(decay*lr) + (weights_update_new + weights_update_old*momentum)*lr/batch + //----------------------------------- } } diff --git a/src/image.c b/src/image.c index 8566a4f8..155afba9 100644 --- a/src/image.c +++ b/src/image.c @@ -705,6 +705,9 @@ IplImage* draw_train_chart(float max_img_loss, int max_batches, int number_of_li } } cvPutText(img, "Iteration number", cvPoint(draw_size / 2, img_size - 10), &font, CV_RGB(0, 0, 0)); + char max_batches_buff[100]; + sprintf(max_batches_buff, "in cfg max_batches=%d", max_batches); + cvPutText(img, max_batches_buff, cvPoint(draw_size - 195, img_size - 10), &font, CV_RGB(0, 0, 0)); cvPutText(img, "Press 's' to save: chart.jpg", cvPoint(5, img_size - 10), &font, CV_RGB(0, 0, 0)); printf(" If error occurs - run training with flag: -dont_show \n"); cvNamedWindow("average loss", CV_WINDOW_NORMAL); @@ -728,15 +731,20 @@ void draw_train_loss(IplImage* img, int img_size, float avg_loss, float max_img_ if (pt1.y < 0) pt1.y = 1; cvCircle(img, pt1, 1, CV_RGB(0, 0, 255), CV_FILLED, 8, 0); - sprintf(char_buff, "current avg loss = %2.4f", avg_loss); + sprintf(char_buff, "current avg loss = %2.4f iteration = %d", avg_loss, current_batch); pt1.x = img_size / 2, pt1.y = 30; - pt2.x = pt1.x + 250, pt2.y = pt1.y + 20; + pt2.x = pt1.x + 460, pt2.y = pt1.y + 20; cvRectangle(img, pt1, pt2, CV_RGB(255, 255, 255), CV_FILLED, 8, 0); pt1.y += 15; cvPutText(img, char_buff, pt1, &font, CV_RGB(0, 0, 0)); cvShowImage("average loss", img); int k = cvWaitKey(20); - if (k == 's' || current_batch == (max_batches-1)) cvSaveImage("chart.jpg", img, 0); + if (k == 's' || current_batch == (max_batches - 1)) { + cvSaveImage("chart.jpg", img, 0); + cvPutText(img, "- Saved", cvPoint(250, img_size - 10), &font, CV_RGB(255, 0, 0)); + } + else + cvPutText(img, "- Saved", cvPoint(250, img_size - 10), &font, CV_RGB(255, 255, 255)); } #endif // OPENCV