Merge pull request #4976 from mmaaz60/monitor_training

Monitor training
pull/5172/head
Alexey 5 years ago committed by GitHub
commit 17dd701618
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 13
      src/classifier.c
  2. 11
      src/detector.c
  3. 7
      src/image_opencv.cpp
  4. 2
      src/image_opencv.h

@ -135,6 +135,10 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
int iter_topk = get_current_batch(net); int iter_topk = get_current_batch(net);
float topk = 0; float topk = 0;
int count = 0;
double start, time_remaining, avg_time = -1, alpha_time = 0.01;
start = what_time_is_it_now();
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
time=clock(); time=clock();
@ -183,9 +187,14 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
draw_precision = 1; draw_precision = 1;
} }
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); time_remaining = (net.max_batches - i)*(what_time_is_it_now() - start) / 60 / 60;
// set initial value, even if resume training from 10000 iteration
if (avg_time < 0) avg_time = time_remaining;
else avg_time = alpha_time * time_remaining + (1 - alpha_time) * avg_time;
start = what_time_is_it_now();
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images, %f time left\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen, avg_time);
#ifdef OPENCV #ifdef OPENCV
if (!dontuse_opencv) draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port); if (!dontuse_opencv) draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port, avg_time);
#endif // OPENCV #endif // OPENCV
if (i >= (iter_save + 1000)) { if (i >= (iter_save + 1000)) {

@ -168,7 +168,10 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
//printf(" imgs = %d \n", imgs); //printf(" imgs = %d \n", imgs);
pthread_t load_thread = load_data(args); pthread_t load_thread = load_data(args);
int count = 0; int count = 0;
double time_remaining, avg_time = -1, alpha_time = 0.01;
//while(i*imgs < N*120){ //while(i*imgs < N*120){
while (get_current_iteration(net) < net.max_batches) { while (get_current_iteration(net) < net.max_batches) {
if (l.random && count++ % 10 == 0) { if (l.random && count++ % 10 == 0) {
@ -290,7 +293,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
if (iteration < net.burn_in * 3) fprintf(stderr, "\n Tensor Cores are disabled until the first %d iterations are reached.", 3 * net.burn_in); if (iteration < net.burn_in * 3) fprintf(stderr, "\n Tensor Cores are disabled until the first %d iterations are reached.", 3 * net.burn_in);
else fprintf(stderr, "\n Tensor Cores are used."); else fprintf(stderr, "\n Tensor Cores are used.");
} }
printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images\n", iteration, loss, avg_loss, get_current_rate(net), (what_time_is_it_now() - time), iteration*imgs); printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images, %f time left\n", iteration, loss, avg_loss, get_current_rate(net), (what_time_is_it_now() - time), iteration*imgs, avg_time);
int draw_precision = 0; int draw_precision = 0;
if (calc_map && (iteration >= next_map_calc || iteration == net.max_batches)) { if (calc_map && (iteration >= next_map_calc || iteration == net.max_batches)) {
@ -341,8 +344,12 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
draw_precision = 1; draw_precision = 1;
} }
time_remaining = (net.max_batches - iteration)*(what_time_is_it_now() - time + load_time) / 60 / 60;
// set initial value, even if resume training from 10000 iteration
if (avg_time < 0) avg_time = time_remaining;
else avg_time = alpha_time * time_remaining + (1 - alpha_time) * avg_time;
#ifdef OPENCV #ifdef OPENCV
draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, net.max_batches, mean_average_precision, draw_precision, "mAP%", dont_show, mjpeg_port); draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, net.max_batches, mean_average_precision, draw_precision, "mAP%", dont_show, mjpeg_port, avg_time);
#endif // OPENCV #endif // OPENCV
//if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) {

@ -1063,7 +1063,7 @@ extern "C" mat_cv* draw_train_chart(char *windows_name, float max_img_loss, int
// ---------------------------------------- // ----------------------------------------
extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches, extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port) float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port, double time_remaining)
{ {
try { try {
cv::Mat &img = *(cv::Mat*)img_src; cv::Mat &img = *(cv::Mat*)img_src;
@ -1104,10 +1104,9 @@ extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_siz
old_precision = precision; old_precision = precision;
iteration_old = current_batch; iteration_old = current_batch;
} }
sprintf(char_buff, "current avg loss = %2.4f iteration = %d approx. time left = %2.2f hours", avg_loss, current_batch, time_remaining);
sprintf(char_buff, "current avg loss = %2.4f iteration = %d", avg_loss, current_batch);
pt1.x = 15, pt1.y = draw_size + 18; pt1.x = 15, pt1.y = draw_size + 18;
pt2.x = pt1.x + 460, pt2.y = pt1.y + 20; pt2.x = pt1.x + 800, pt2.y = pt1.y + 20;
cv::rectangle(img, pt1, pt2, CV_RGB(255, 255, 255), CV_FILLED, 8, 0); cv::rectangle(img, pt1, pt2, CV_RGB(255, 255, 255), CV_FILLED, 8, 0);
pt1.y += 15; pt1.y += 15;
cv::putText(img, char_buff, pt1, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 100), 1, CV_AA); cv::putText(img, char_buff, pt1, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 100), 1, CV_AA);

@ -94,7 +94,7 @@ void draw_detections_cv_v3(mat_cv* show_img, detection *dets, int num, float thr
// Draw Loss & Accuracy chart // Draw Loss & Accuracy chart
mat_cv* draw_train_chart(char *windows_name, float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show); mat_cv* draw_train_chart(char *windows_name, float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show);
void draw_train_loss(char *windows_name, mat_cv* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches, void draw_train_loss(char *windows_name, mat_cv* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches,
float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port); float precision, int draw_precision, char *accuracy_name, int dont_show, int mjpeg_port, double time_remaining);
// Data augmentation // Data augmentation
image image_data_augmentation(mat_cv* mat, int w, int h, image image_data_augmentation(mat_cv* mat, int w, int h,

Loading…
Cancel
Save