diff --git a/src/detector.c b/src/detector.c index ef220e0b..de4c6e39 100644 --- a/src/detector.c +++ b/src/detector.c @@ -1709,6 +1709,8 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, if (letter_box) sized = letterbox_image(im, net.w, net.h); else sized = resize_image(im, net.w, net.h); + image src_sized = copy_image(sized); + layer l = net.layers[net.n - 1]; net.num_boxes = l.max_boxes; int num_truth = l.truths; @@ -1716,8 +1718,9 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, int *it_num_set = (int *)xcalloc(1, sizeof(int)); float *lr_set = (float *)xcalloc(1, sizeof(float)); + int *boxonly = (int *)xcalloc(1, sizeof(int)); - cv_draw_object(sized, truth_cpu, net.num_boxes, num_truth, it_num_set, lr_set, l.classes, names); + cv_draw_object(sized, truth_cpu, net.num_boxes, num_truth, it_num_set, lr_set, boxonly, l.classes, names); net.learning_rate = *lr_set; it_num = *it_num_set; @@ -1736,17 +1739,6 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, int iteration; for (iteration = 0; iteration < it_num; ++iteration) { - /* - free_network(net); - net = parse_network_cfg(cfgfile);// parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - net.learning_rate = *lr_set; - net.adversarial = 1; - set_batch_network(&net, 1); - if (weightfile) { - load_weights(&net, weightfile); - } - */ - if (iteration == it_num - 1) { net.train = 0; quantize_image(sized); @@ -1758,55 +1750,38 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, it_num, 0, 0, "mAP%", dont_show, 0, 0); //quantize_image(sized); + if (*boxonly) { + int dw = truth_cpu[2] * sized.w, dh = truth_cpu[3] * sized.h; + int dx = truth_cpu[0] * sized.w - dw / 2, dy = truth_cpu[1] * sized.h - dh / 2; + image crop = crop_image(sized, dx, dy, dw, dh); + copy_image_inplace(src_sized, sized); + embed_image(crop, sized, dx, dy); + } + show_image_cv(sized, "image_optimization"); wait_key_cv(20); } - { - int nboxes = 0; - detection *dets = get_network_boxes(&net, im.w, im.h, thresh, 0, 0, 1, &nboxes, letter_box); - if (nms) { - if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); - else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); - } - draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes, 1); - save_image(im, "pre_predictions"); - if (!dont_show) { - show_image(im, "pre_predictions"); - } - } - quantize_image(sized); save_image_png(sized, "drawn"); //sized = load_image("drawn.png", 0, 0, net.c); - free_network(net); - net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - net.benchmark_layers = benchmark_layers; - - - double time = get_time_point(); - network_predict(net, X); - printf("%s: Predicted in %lf milli-seconds.\n", input, ((double)get_time_point() - time) / 1000); - int nboxes = 0; - detection *dets = get_network_boxes(&net, im.w, im.h, thresh, 0, 0, 1, &nboxes, letter_box); + detection *dets = get_network_boxes(&net, sized.w, sized.h, thresh, 0, 0, 1, &nboxes, letter_box); if (nms) { if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); } - draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes, 1); - save_image(im, "predictions"); + draw_detections_v3(sized, dets, nboxes, thresh, names, alphabet, l.classes, 1); + save_image(sized, "pre_predictions"); if (!dont_show) { - show_image(im, "predictions"); + show_image(sized, "pre_predictions"); } free_detections(dets, nboxes); free_image(im); free_image(sized); + free_image(src_sized); if (!dont_show) { wait_until_press_key_cv(); diff --git a/src/image.c b/src/image.c index fc5264df..855a4f2f 100644 --- a/src/image.c +++ b/src/image.c @@ -654,6 +654,11 @@ void normalize_image2(image p) free(max); } +void copy_image_inplace(image src, image dst) +{ + memcpy(dst.data, src.data, src.h*src.w*src.c * sizeof(float)); +} + image copy_image(image p) { image copy = p; diff --git a/src/image.h b/src/image.h index 904b87a8..70d5f98d 100644 --- a/src/image.h +++ b/src/image.h @@ -82,6 +82,7 @@ image make_empty_image(int w, int h, int c); image float_to_image_scaled(int w, int h, int c, float *data); image float_to_image(int w, int h, int c, float *data); image copy_image(image p); +void copy_image_inplace(image src, image dst); image load_image(char *filename, int w, int h, int c); image load_image_stb_resize(char *filename, int w, int h, int c); //LIB_API image load_image_color(char *filename, int w, int h); diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index 030113ed..c65eef61 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -1347,7 +1347,7 @@ void callback_mouse_click(int event, int x, int y, int flags, void* user_data) } } -extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int num_truth, int *it_num_set, float *lr_set, int classes, char **names) +extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int num_truth, int *it_num_set, float *lr_set, int *boxonly, int classes, char **names) { cv::Mat frame = image_to_mat(sized); if(frame.channels() == 3) cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR); @@ -1374,6 +1374,9 @@ extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int std::string const cl_trackbar_name = "class_id"; int cl_tb_res = cv::createTrackbar(cl_trackbar_name, window_name, &cl_trackbar_value, classes-1); + std::string const bo_trackbar_name = "box-only"; + int bo_tb_res = cv::createTrackbar(bo_trackbar_name, window_name, boxonly, 1); + int i = 0; while (!selected) { diff --git a/src/image_opencv.h b/src/image_opencv.h index b8563a3b..b2dc7c82 100644 --- a/src/image_opencv.h +++ b/src/image_opencv.h @@ -109,7 +109,7 @@ void blend_images_cv(image new_img, float alpha, image old_img, float beta); image blur_image(image src_img, int ksize); // draw objects for Adversarial attacks -void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int num_truth, int *it_num_set, float *lr_set, int classes, char **names); +void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int num_truth, int *it_num_set, float *lr_set, int *boxonly, int classes, char **names); // Show Anchors void show_acnhors(int number_of_boxes, int num_of_clusters, float *rel_width_height_array, model anchors_data, int width, int height); diff --git a/src/network_kernels.cu b/src/network_kernels.cu index 45f86a2f..ae1a5c71 100644 --- a/src/network_kernels.cu +++ b/src/network_kernels.cu @@ -232,7 +232,6 @@ void backward_network_gpu(network net, network_state state) printf(" x_size = %d, original_delta = %p, original_input = %p, net.learning_rate = %d \n", x_size, original_delta, original_input, x_size, net.learning_rate); axpy_ongpu(x_size, net.learning_rate, original_delta, 1, original_input, 1); - //axpy_ongpu(x_size, 0.1, original_delta, 1, original_input, 1); constrain_min_max_ongpu(x_size, 0, 1, original_input, 1); } diff --git a/src/yolo_layer.c b/src/yolo_layer.c index 94a3cfe3..41ae96c7 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -171,8 +171,8 @@ ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, float tw = log(truth.w*w / biases[2 * n]); float th = log(truth.h*h / biases[2 * n + 1]); - printf(" tx = %f, ty = %f, tw = %f, th = %f \n", tx, ty, tw, th); - printf(" x = %f, y = %f, w = %f, h = %f \n", x[index + 0 * stride], x[index + 1 * stride], x[index + 2 * stride], x[index + 3 * stride]); + //printf(" tx = %f, ty = %f, tw = %f, th = %f \n", tx, ty, tw, th); + //printf(" x = %f, y = %f, w = %f, h = %f \n", x[index + 0 * stride], x[index + 1 * stride], x[index + 2 * stride], x[index + 3 * stride]); // accumulate delta delta[index + 0 * stride] += scale * (tx - x[index + 0 * stride]) * iou_normalizer; @@ -477,9 +477,8 @@ void forward_yolo_layer(const layer l, network_state state) int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers); - printf(" label: class_id = %d, truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f \n", class_id, truth.x, truth.y, truth.w, truth.h); - printf(" mask_n = %d, l.output[obj_index] = %f, l.output[class_index + class_id] = %f \n\n", mask_n, l.output[obj_index], l.output[class_index + class_id]); - + //printf(" label: class_id = %d, truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f \n", class_id, truth.x, truth.y, truth.w, truth.h); + //printf(" mask_n = %d, l.output[obj_index] = %f, l.output[class_index + class_id] = %f \n\n", mask_n, l.output[obj_index], l.output[class_index + class_id]); ++count; ++class_count;