Added Supervised and Unsupervised Contrastive loss for Classifier [net] contrastive=1 unsupervised=1 [contrastive] classes=1000

5 years ago · a553899505
parent 09991d0488
commit a553899505
7 changed files with 52 additions and 31 deletions
--- a/include/darknet.h
+++ b/include/darknet.h
@ -727,6 +727,7 @@ typedef struct network {
    int letter_box;
    int mosaic_bound;
    int contrastive;
+    int unsupervised;
    float angle;
    float aspect;
    float exposure;
--- a/src/blas.c
+++ b/src/blas.c
@ -590,9 +590,10 @@ void grad_contrastive_loss_positive(int i, int *labels, int num_of_samples, floa
        fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len);
        getchar();
    }
-    const float mult = 1 / ((2 * N - 1) * temperature * vec_len);
+    const float mult = 1 / ((N - 1) * temperature * vec_len);

    for (j = 0; j < num_of_samples; ++j) {
+        //if (i != j && (i/2) == (j/2)) {
        if (i != j && labels[i] == labels[j]) {
            const float sim = cos_sim[i*num_of_samples + j];        // cosine_similarity(z[i], z[j], feature_size);
            const float P = p_constrastive[i*num_of_samples + j];   // P_constrastive(i, j, labels, num_of_samples, z, feature_size, temperature, cos_sim);
@ -600,8 +601,8 @@ void grad_contrastive_loss_positive(int i, int *labels, int num_of_samples, floa

            int m;
            for (m = 0; m < feature_size; ++m) {
-                //const float d = mult*(sim * z[i][m] - z[j][m]) * (1 - P); // bad
-                const float d = mult*(sim * z[j][m] - z[j][m]) * (1 - P); // good
+                const float d = mult*(sim * z[i][m] - z[j][m]) * (1 - P); // good
+                //const float d = mult*(sim * z[j][m] - z[j][m]) * (1 - P); // bad
               // printf(" pos: z[j][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[j][m], z[i][m], d, sim);
                delta[m] -= d;
            }
@ -626,9 +627,10 @@ void grad_contrastive_loss_negative(int i, int *labels, int num_of_samples, floa
        fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len);
        getchar();
    }
-    const float mult = 1 / ((2 * N - 1) * temperature * vec_len);
+    const float mult = 1 / ((N - 1) * temperature * vec_len);

    for (j = 0; j < num_of_samples; ++j) {
+        //if (i != j && (i/2) == (j/2)) {
        if (i != j && labels[i] == labels[j]) {

            int k;
@ -641,8 +643,8 @@ void grad_contrastive_loss_negative(int i, int *labels, int num_of_samples, floa

                    int m;
                    for (m = 0; m < feature_size; ++m) {
-                        //const float d = mult*(z[k][m] - sim * z[i][m]) * P;   // bad
-                        const float d = mult*(z[k][m] - sim * z[k][m]) * P; // good
+                        const float d = mult*(z[k][m] - sim * z[i][m]) * P;   // good
+                        //const float d = mult*(z[k][m] - sim * z[k][m]) * P; // bad
                        //printf(" neg: z[k][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[k][m], z[i][m], d, sim);
                        delta[m] -= d;
                    }
--- a/src/classifier.c
+++ b/src/classifier.c
@ -69,13 +69,18 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    int topk_data = option_find_int(options, "top", 5);
    char topk_buff[10];
    sprintf(topk_buff, "top%d", topk_data);
-    if (classes != net.layers[net.n - 1].inputs) {
+    layer l = net.layers[net.n - 1];
+    if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) {
        printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n",
-            net.layers[net.n - 1].inputs, classes);
+            l.outputs, classes);
        getchar();
    }

    char **labels = get_labels(label_list);
+    if (net.unsupervised) {
+        free(labels);
+        labels = NULL;
+    }
    list *plist = get_paths(train_list);
    char **paths = (char **)list_to_array(plist);
    printf("%d\n", plist->size);
@ -184,8 +189,16 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
        int draw_precision = 0;
        if (calc_topk && (i >= calc_topk_for_each || i == net.max_batches)) {
            iter_topk = i;
-            topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n
-            printf("\n accuracy %s = %f \n", topk_buff, topk);
+            if (net.contrastive && l.type != SOFTMAX && l.type != COST) {
+                int k;
+                for (k = 0; k < net.n; ++k) if (net.layers[k].type == CONTRASTIVE) break;
+                topk = *(net.layers[k].loss) / 100;
+                sprintf(topk_buff, "Contr");
+            }
+            else {
+                topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n
+                printf("\n accuracy %s = %f \n", topk_buff, topk);
+            }
            draw_precision = 1;
        }

@ -240,7 +253,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    free(nets);

    //free_ptrs((void**)labels, classes);
-    free(labels);
+    if(labels) free(labels);
    free_ptrs((void**)paths, plist->size);
    free_list(plist);
    free(nets);
@ -820,9 +833,10 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
    if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
    int classes = option_find_int(options, "classes", 2);
    printf(" classes = %d, output in cfg = %d \n", classes, net.layers[net.n - 1].c);
-    if (classes != net.layers[net.n - 1].inputs) {
+    layer l = net.layers[net.n - 1];
+    if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) {
        printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n",
-            net.layers[net.n - 1].inputs, classes);
+            l.outputs, classes);
        getchar();
    }
    if (top == 0) top = option_find_int(options, "top", 1);
--- a/src/data.c
+++ b/src/data.c
@ -613,7 +613,9 @@ matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierar
    } else {
        // unsupervised learning
        for (i = 0; i < n; ++i) {
-            const int class_id = i / 2;
+            const int img_index = (contrastive) ? (i / 2) : i;
+            const uintptr_t path_p = (uintptr_t)paths[img_index];// abs(random_gen());
+            const int class_id = path_p % k;
            int l;
            for (l = 0; l < k; ++l) y.vals[i][l] = 0;
            y.vals[i][class_id] = 1;
--- a/src/parser.c
+++ b/src/parser.c
@ -1130,6 +1130,7 @@ void parse_net_options(list *options, network *net)
    net->letter_box = option_find_int_quiet(options, "letter_box", 0);
    net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0);
    net->contrastive = option_find_int_quiet(options, "contrastive", 0);
+    net->unsupervised = option_find_int_quiet(options, "unsupervised", 0);
    net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
    net->resize_step = option_find_float_quiet(options, "resize_step", 32);
    net->attention = option_find_int_quiet(options, "attention", 0);
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@ -141,7 +141,7 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int cla
    l.n = n;
    l.classes = classes;
    l.temperature = 1;
-    //l.loss = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.loss = (float*)xcalloc(1, sizeof(float));
    l.output = (float*)xcalloc(inputs * batch, sizeof(float));
    l.delta = (float*)xcalloc(inputs * batch, sizeof(float));
    l.cost = (float*)xcalloc(1, sizeof(float));
@ -156,7 +156,6 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int cla
    l.backward_gpu = backward_contrastive_layer_gpu;

    l.output_gpu = cuda_make_array(l.output, inputs*batch);
-    //l.loss_gpu = cuda_make_array(l.loss, inputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
    //l.cos_sim_gpu = cuda_make_array(l.cos_sim, l.batch*l.batch);
 #endif
@ -164,10 +163,10 @@ contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int cla
 }


-void forward_contrastive_layer(const contrastive_layer l, network_state state)
+void forward_contrastive_layer(contrastive_layer l, network_state state)
 {
    if (!state.train) return;
-    const float truth_thresh = 0.2;
+    const float truth_thresh = state.net.label_smooth_eps;

    memset(l.delta, 0, l.batch*l.inputs * sizeof(float));

@ -183,7 +182,8 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state)
                for (n = 0; n < l.classes; ++n) {
                    const float truth_prob = state.truth[b*l.classes + n];
                    //printf(" truth_prob = %f, ", truth_prob);
-                    if (truth_prob > max_truth)
+                    //if (truth_prob > max_truth)
+                    if (truth_prob > truth_thresh)
                    {
                        max_truth = truth_prob;
                        l.labels[b] = n;
@ -228,7 +228,8 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state)
        //printf(" l.labels[b] = %d, l.labels[b+1] = %d, l.labels[b+2] = %d, b = %d \n", l.labels[b], l.labels[b + 1], l.labels[b + 2], b);
        //printf(" same = %f, aug = %f, diff = %f, (aug > diff) = %d \n", same, aug, diff, (aug > diff));
    }
-    printf("good contrast = %f %% \n", 100 * good_contrast / (l.batch/2));
+    *l.loss = 100 * good_contrast / (l.batch / 2);
+    printf(" Contrast accuracy = %f %% \n", *l.loss);

    // precalculate P_contrastive
    for (b = 0; b < l.batch; ++b) {
@ -251,10 +252,10 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state)
            {
                //printf(" b = %d, ", b);
                // positive
-                grad_contrastive_loss_positive(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta);
+                grad_contrastive_loss_positive(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta + b*l.inputs);

                // negative
-                grad_contrastive_loss_negative(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta);
+                grad_contrastive_loss_negative(b, l.labels, l.batch, z, l.n, l.temperature, l.cos_sim, l.p_constrastive, l.delta + b*l.inputs);
            }
        }
    }
@ -264,9 +265,9 @@ void forward_contrastive_layer(const contrastive_layer l, network_state state)
    free(z);
 }

-void backward_contrastive_layer(const contrastive_layer l, network_state net)
+void backward_contrastive_layer(contrastive_layer l, network_state state)
 {
-    axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1);
+    axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, state.delta, 1);
 }


@ -283,10 +284,10 @@ void push_contrastive_layer_output(const contrastive_layer l)
 }


-void forward_contrastive_layer_gpu(const contrastive_layer l, network_state state)
+void forward_contrastive_layer_gpu(contrastive_layer l, network_state state)
 {
-    if (!state.train) return;
    simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu);
+    if (!state.train) return;

    float *in_cpu = (float *)xcalloc(l.batch*l.inputs, sizeof(float));
    cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
@ -311,7 +312,7 @@ void forward_contrastive_layer_gpu(const contrastive_layer l, network_state stat
    if (cpu_state.truth) free(cpu_state.truth);
 }

-void backward_contrastive_layer_gpu(const contrastive_layer layer, network_state state)
+void backward_contrastive_layer_gpu(contrastive_layer layer, network_state state)
 {
    axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1);
 }
--- a/src/softmax_layer.h
+++ b/src/softmax_layer.h
@ -23,14 +23,14 @@ void backward_softmax_layer_gpu(const softmax_layer l, network_state state);
 //-----------------------

 contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int classes, int inputs);
-void forward_contrastive_layer(const contrastive_layer l, network_state state);
-void backward_contrastive_layer(const contrastive_layer l, network_state net);
+void forward_contrastive_layer(contrastive_layer l, network_state state);
+void backward_contrastive_layer(contrastive_layer l, network_state net);

 #ifdef GPU
 void pull_contrastive_layer_output(const contrastive_layer l);
 void push_contrastive_layer_output(const contrastive_layer l);
-void forward_contrastive_layer_gpu(const contrastive_layer l, network_state state);
-void backward_contrastive_layer_gpu(const contrastive_layer layer, network_state state);
+void forward_contrastive_layer_gpu(contrastive_layer l, network_state state);
+void backward_contrastive_layer_gpu(contrastive_layer layer, network_state state);
 #endif

 #ifdef __cplusplus