Added command line param -dontuse_opencv for training Classifier. Also use GaussianBlur instead of bilateralFilter for blur=1 for training Classifier and Detector.

pull/1557/head
AlexeyAB 5 years ago
parent b8605bda1e
commit 114a7f942b
  1. 1
      include/darknet.h
  2. 2
      scripts/README.md
  3. 10
      src/classifier.c
  4. 21
      src/data.c
  5. 4
      src/data.h
  6. 12
      src/image.c
  7. 1
      src/image.h
  8. 12
      src/image_opencv.cpp

@ -822,6 +822,7 @@ typedef struct load_args {
int augment_speed; int augment_speed;
int letter_box; int letter_box;
int show_imgs; int show_imgs;
int dontuse_opencv;
float jitter; float jitter;
int flip; int flip;
int blur; int blur;

@ -2,6 +2,8 @@
### Datasets: ### Datasets:
25 thousand datasets on Kaggle: https://www.kaggle.com/datasets
BDD100K - Diverse Driving Video: https://bair.berkeley.edu/blog/2018/05/30/bdd/ BDD100K - Diverse Driving Video: https://bair.berkeley.edu/blog/2018/05/30/bdd/
Pascal VOC: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html Pascal VOC: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html

@ -27,7 +27,7 @@ float *get_regression_values(char **labels, int n)
return v; return v;
} }
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int mjpeg_port, int calc_topk, int show_imgs) void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dontuse_opencv, int dont_show, int mjpeg_port, int calc_topk, int show_imgs)
{ {
int i; int i;
@ -81,6 +81,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
args.threads = 32; args.threads = 32;
args.hierarchy = net.hierarchy; args.hierarchy = net.hierarchy;
args.dontuse_opencv = dontuse_opencv;
args.min = net.min_crop; args.min = net.min_crop;
args.max = net.max_crop; args.max = net.max_crop;
args.flip = net.flip; args.flip = net.flip;
@ -112,7 +113,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
int img_size = 1000; int img_size = 1000;
char windows_name[100]; char windows_name[100];
sprintf(windows_name, "average loss (id:%d)", random_gen()); sprintf(windows_name, "average loss (id:%d)", random_gen());
img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show); if (!dontuse_opencv) img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show);
#endif //OPENCV #endif //OPENCV
data train; data train;
@ -176,7 +177,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
#ifdef OPENCV #ifdef OPENCV
draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port); if (!dontuse_opencv) draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, dont_show, mjpeg_port);
#endif // OPENCV #endif // OPENCV
if (i >= (iter_save + 1000)) { if (i >= (iter_save + 1000)) {
@ -1298,6 +1299,7 @@ void run_classifier(int argc, char **argv)
} }
int dont_show = find_arg(argc, argv, "-dont_show"); int dont_show = find_arg(argc, argv, "-dont_show");
int dontuse_opencv = find_arg(argc, argv, "-dontuse_opencv");
int show_imgs = find_arg(argc, argv, "-show_imgs"); int show_imgs = find_arg(argc, argv, "-show_imgs");
int calc_topk = find_arg(argc, argv, "-topk"); int calc_topk = find_arg(argc, argv, "-topk");
int cam_index = find_int_arg(argc, argv, "-c", 0); int cam_index = find_int_arg(argc, argv, "-c", 0);
@ -1311,7 +1313,7 @@ void run_classifier(int argc, char **argv)
int layer = layer_s ? atoi(layer_s) : -1; int layer = layer_s ? atoi(layer_s) : -1;
if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top);
else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dont_show, mjpeg_port, calc_topk, show_imgs); else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dontuse_opencv, dont_show, mjpeg_port, calc_topk, show_imgs);
else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);

@ -142,7 +142,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
return X; return X;
} }
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure) matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv)
{ {
int i; int i;
matrix X; matrix X;
@ -152,7 +152,10 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
for(i = 0; i < n; ++i){ for(i = 0; i < n; ++i){
int size = w > h ? w : h; int size = w > h ? w : h;
image im = load_image_color(paths[i], 0, 0); image im;
if(dontuse_opencv) im = load_image_stb_resize(paths[i], 0, 0, 3);
else im = load_image_color(paths[i], 0, 0);
image crop = random_augment_image(im, angle, aspect, min, max, size); image crop = random_augment_image(im, angle, aspect, min, max, size);
int flip = use_flip ? random_gen() % 2 : 0; int flip = use_flip ? random_gen() % 2 : 0;
if (flip) if (flip)
@ -1362,7 +1365,7 @@ void *load_thread(void *ptr)
if (a.type == OLD_CLASSIFICATION_DATA){ if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){ } else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps); *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps, a.dontuse_opencv);
} else if (a.type == SUPER_DATA){ } else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == WRITING_DATA){ } else if (a.type == WRITING_DATA){
@ -1507,20 +1510,20 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)
return d; return d;
} }
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps) data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv)
{ {
char **paths_stored = paths; char **paths_stored = paths;
if(m) paths = get_random_paths(paths, n, m); if(m) paths = get_random_paths(paths, n, m);
data d = {0}; data d = {0};
d.shallow = 0; d.shallow = 0;
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d.y = load_labels_paths(paths, n, labels, k, hierarchy, label_smooth_eps); d.y = load_labels_paths(paths, n, labels, k, hierarchy, label_smooth_eps);
if (use_mixup && rand_int(0, 1)) { if (use_mixup && rand_int(0, 1)) {
char **paths_mix = get_random_paths(paths_stored, n, m); char **paths_mix = get_random_paths(paths_stored, n, m);
data d2 = { 0 }; data d2 = { 0 };
d2.shallow = 0; d2.shallow = 0;
d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d2.y = load_labels_paths(paths_mix, n, labels, k, hierarchy, label_smooth_eps); d2.y = load_labels_paths(paths_mix, n, labels, k, hierarchy, label_smooth_eps);
free(paths_mix); free(paths_mix);
@ -1530,12 +1533,12 @@ data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *h
d4.shallow = 0; d4.shallow = 0;
if (use_mixup >= 3) { if (use_mixup >= 3) {
char **paths_mix3 = get_random_paths(paths_stored, n, m); char **paths_mix3 = get_random_paths(paths_stored, n, m);
d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d3.y = load_labels_paths(paths_mix3, n, labels, k, hierarchy, label_smooth_eps); d3.y = load_labels_paths(paths_mix3, n, labels, k, hierarchy, label_smooth_eps);
free(paths_mix3); free(paths_mix3);
char **paths_mix4 = get_random_paths(paths_stored, n, m); char **paths_mix4 = get_random_paths(paths_stored, n, m);
d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv);
d4.y = load_labels_paths(paths_mix4, n, labels, k, hierarchy, label_smooth_eps); d4.y = load_labels_paths(paths_mix4, n, labels, k, hierarchy, label_smooth_eps);
free(paths_mix4); free(paths_mix4);
} }
@ -1706,7 +1709,7 @@ data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int
d.w = w; d.w = w;
d.h = h; d.h = h;
d.shallow = 0; d.shallow = 0;
d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure); d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, 0);
d.y = load_tags_paths(paths, n, k); d.y = load_tags_paths(paths, n, k);
if(m) free(paths); if(m) free(paths);
return d; return d;

@ -89,9 +89,9 @@ data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup, data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, int use_blur, int use_mixup,
float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs); float jitter, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int show_imgs);
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv);
data load_data_super(char **paths, int n, int m, int w, int h, int scale); data load_data_super(char **paths, int n, int m, int w, int h, int scale);
data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps); data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv);
data load_go(char *filename); data load_go(char *filename);
box_label *read_boxes(char *filename, int *n); box_label *read_boxes(char *filename, int *n);

@ -1419,6 +1419,18 @@ image load_image_stb(char *filename, int channels)
return im; return im;
} }
image load_image_stb_resize(char *filename, int w, int h, int c)
{
image out = load_image_stb(filename, c); // without OpenCV
if ((h && w) && (h != out.h || w != out.w)) {
image resized = resize_image(out, w, h);
free_image(out);
out = resized;
}
return out;
}
image load_image(char *filename, int w, int h, int c) image load_image(char *filename, int w, int h, int c)
{ {
#ifdef OPENCV #ifdef OPENCV

@ -83,6 +83,7 @@ image float_to_image_scaled(int w, int h, int c, float *data);
image float_to_image(int w, int h, int c, float *data); image float_to_image(int w, int h, int c, float *data);
image copy_image(image p); image copy_image(image p);
image load_image(char *filename, int w, int h, int c); image load_image(char *filename, int w, int h, int c);
image load_image_stb_resize(char *filename, int w, int h, int c);
//LIB_API image load_image_color(char *filename, int w, int h); //LIB_API image load_image_color(char *filename, int w, int h);
image **load_alphabet(); image **load_alphabet();

@ -1210,15 +1210,15 @@ extern "C" image image_data_augmentation(mat_cv* mat, int w, int h,
if (blur) { if (blur) {
cv::Mat dst(sized.size(), sized.type()); cv::Mat dst(sized.size(), sized.type());
if (blur == 1) { if (blur == 1) {
//cv::GaussianBlur(sized, dst, cv::Size(31, 31), 0); cv::GaussianBlur(sized, dst, cv::Size(17, 17), 0);
cv::bilateralFilter(sized, dst, 17, 75, 75); //cv::bilateralFilter(sized, dst, 17, 75, 75);
} }
else { else {
int ksize = (blur / 2) * 2 + 1; int ksize = (blur / 2) * 2 + 1;
cv::Size kernel_size = cv::Size(ksize, ksize); cv::Size kernel_size = cv::Size(ksize, ksize);
//cv::GaussianBlur(sized, dst, kernel_size, 0); cv::GaussianBlur(sized, dst, kernel_size, 0);
//cv::medianBlur(sized, dst, ksize); //cv::medianBlur(sized, dst, ksize);
cv::bilateralFilter(sized, dst, ksize, 75, 75); //cv::bilateralFilter(sized, dst, ksize, 75, 75);
// sharpen // sharpen
//cv::Mat img_tmp; //cv::Mat img_tmp;
@ -1274,7 +1274,9 @@ extern "C" image blur_image(image src_img, int ksize)
{ {
cv::Mat src = image_to_mat(src_img); cv::Mat src = image_to_mat(src_img);
cv::Mat dst; cv::Mat dst;
cv::bilateralFilter(src, dst, ksize, 75, 75); cv::Size kernel_size = cv::Size(ksize, ksize);
cv::GaussianBlur(src, dst, kernel_size, 0);
//cv::bilateralFilter(src, dst, ksize, 75, 75);
image dst_img = mat_to_image(dst); image dst_img = mat_to_image(dst);
return dst_img; return dst_img;
} }

Loading…
Cancel
Save