fix calloc realloc failure

6 years ago · 011eed632d
parent c71701b3c3
commit 011eed632d
49 changed files with 489 additions and 504 deletions
--- a/src/activation_layer.c
+++ b/src/activation_layer.c
@ -18,8 +18,8 @@ layer make_activation_layer(int batch, int inputs, ACTIVATION activation)
    l.outputs = inputs;
    l.batch=batch;

-    l.output = (float*)calloc(batch * inputs, sizeof(float));
-    l.delta = (float*)calloc(batch * inputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * inputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * inputs, sizeof(float));

    l.forward = forward_activation_layer;
    l.backward = backward_activation_layer;
--- a/src/avgpool_layer.c
+++ b/src/avgpool_layer.c
@ -17,8 +17,8 @@ avgpool_layer make_avgpool_layer(int batch, int w, int h, int c)
    l.outputs = l.out_c;
    l.inputs = h*w*c;
    int output_size = l.outputs * batch;
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));
    l.forward = forward_avgpool_layer;
    l.backward = backward_avgpool_layer;
    #ifdef GPU
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@ -11,23 +11,23 @@ layer make_batchnorm_layer(int batch, int w, int h, int c)
    layer.h = layer.out_h = h;
    layer.w = layer.out_w = w;
    layer.c = layer.out_c = c;
-    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float));
    layer.inputs = w*h*c;
    layer.outputs = layer.inputs;

-    layer.scales = (float*)calloc(c, sizeof(float));
-    layer.scale_updates = (float*)calloc(c, sizeof(float));
+    layer.scales = (float*)xcalloc(c, sizeof(float));
+    layer.scale_updates = (float*)xcalloc(c, sizeof(float));
    int i;
    for(i = 0; i < c; ++i){
        layer.scales[i] = 1;
    }

-    layer.mean = (float*)calloc(c, sizeof(float));
-    layer.variance = (float*)calloc(c, sizeof(float));
+    layer.mean = (float*)xcalloc(c, sizeof(float));
+    layer.variance = (float*)xcalloc(c, sizeof(float));

-    layer.rolling_mean = (float*)calloc(c, sizeof(float));
-    layer.rolling_variance = (float*)calloc(c, sizeof(float));
+    layer.rolling_mean = (float*)xcalloc(c, sizeof(float));
+    layer.rolling_variance = (float*)xcalloc(c, sizeof(float));

    layer.forward = forward_batchnorm_layer;
    layer.backward = backward_batchnorm_layer;
--- a/src/blas.c
+++ b/src/blas.c
@ -34,7 +34,7 @@ void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride,

 void flatten(float *x, int size, int layers, int batch, int forward)
 {
-    float* swap = (float*)calloc(size * layers * batch, sizeof(float));
+    float* swap = (float*)xcalloc(size * layers * batch, sizeof(float));
    int i,c,b;
    for(b = 0; b < batch; ++b){
        for(c = 0; c < layers; ++c){
--- a/src/box.c
+++ b/src/box.c
@ -1,4 +1,5 @@
 #include "box.h"
+#include "utils.h"
 #include <stdio.h>
 #include <math.h>
 #include <stdlib.h>
@ -379,7 +380,7 @@ int nms_comparator(const void *pa, const void *pb)
 void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh)
 {
    int i, j, k;
-    sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));
+    sortable_bbox* s = (sortable_bbox*)xcalloc(total, sizeof(sortable_bbox));

    for(i = 0; i < total; ++i){
        s[i].index = i;
--- a/src/classifier.c
+++ b/src/classifier.c
@ -17,7 +17,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile

 float *get_regression_values(char **labels, int n)
 {
-    float* v = (float*)calloc(n, sizeof(float));
+    float* v = (float*)xcalloc(n, sizeof(float));
    int i;
    for(i = 0; i < n; ++i){
        char *p = strchr(labels[i], ' ');
@ -35,7 +35,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
-    network* nets = (network*)calloc(ngpus, sizeof(network));
+    network* nets = (network*)xcalloc(ngpus, sizeof(network));

    srand(time(0));
    int seed = rand();
@ -415,7 +415,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)

    float avg_acc = 0;
    float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));

    for(i = 0; i < m; ++i){
        int class_id = -1;
@ -442,7 +442,7 @@ void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
        images[7] = crop_image(im, 0, 0, w, h);
        images[8] = crop_image(im, -shift, shift, w, h);
        images[9] = crop_image(im, shift, shift, w, h);
-        float* pred = (float*)calloc(classes, sizeof(float));
+        float* pred = (float*)xcalloc(classes, sizeof(float));
        for(j = 0; j < 10; ++j){
            float *p = network_predict(net, images[j].data);
            if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1);
@ -489,7 +489,7 @@ void validate_classifier_full(char *datacfg, char *filename, char *weightfile)

    float avg_acc = 0;
    float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));

    int size = net.w;
    for(i = 0; i < m; ++i){
@ -567,7 +567,7 @@ float validate_classifier_single(char *datacfg, char *filename, char *weightfile

    float avg_acc = 0;
    float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));

    for(i = 0; i < m; ++i){
        int class_id = -1;
@ -638,7 +638,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)

    float avg_acc = 0;
    float avg_topk = 0;
-    int* indexes = (int*)calloc(topk, sizeof(int));
+    int* indexes = (int*)xcalloc(topk, sizeof(int));

    for(i = 0; i < m; ++i){
        int class_id = -1;
@ -649,7 +649,7 @@ void validate_classifier_multi(char *datacfg, char *filename, char *weightfile)
                break;
            }
        }
-        float* pred = (float*)calloc(classes, sizeof(float));
+        float* pred = (float*)xcalloc(classes, sizeof(float));
        image im = load_image_color(paths[i], 0, 0);
        for(j = 0; j < nscales; ++j){
            image r = resize_min(im, scales[j]);
@ -694,7 +694,7 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena

    char **names = get_labels(name_list);
    clock_t time;
-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));
    char buff[256];
    char *input = buff;
    while(1){
@ -781,7 +781,7 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
    int i = 0;
    char **names = get_labels(name_list);
    clock_t time;
-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));
    if(!indexes) {
        error("calloc failed");
    }
@ -971,7 +971,7 @@ void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_i
    char *name_list = option_find_str(options, "names", 0);
    char **names = get_labels(name_list);

-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));

    if(!cap) error("Couldn't connect to webcam.\n");
    create_window_cv("Threat", 0, 512, 512);
@ -1110,7 +1110,7 @@ void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_inde
    char *name_list = option_find_str(options, "names", 0);
    char **names = get_labels(name_list);

-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));

    if(!cap) error("Couldn't connect to webcam.\n");
    cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL);
@ -1193,7 +1193,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind
    char *name_list = option_find_str(options, "names", 0);
    char **names = get_labels(name_list);

-    int* indexes = (int*)calloc(top, sizeof(int));
+    int* indexes = (int*)xcalloc(top, sizeof(int));

    if(!cap) error("Couldn't connect to webcam.\n");
    create_window_cv("Classifier", 0, 512, 512);
@ -1258,7 +1258,7 @@ void run_classifier(int argc, char **argv)
        for(i = 0; i < len; ++i){
            if (gpu_list[i] == ',') ++ngpus;
        }
-        gpus = (int*)calloc(ngpus, sizeof(int));
+        gpus = (int*)xcalloc(ngpus, sizeof(int));
        for(i = 0; i < ngpus; ++i){
            gpus[i] = atoi(gpu_list);
            gpu_list = strchr(gpu_list, ',')+1;
--- a/src/coco.c
+++ b/src/coco.c
@ -160,9 +160,9 @@ void validate_coco(char *cfgfile, char *weightfile)
    FILE *fp = fopen(buff, "w");
    fprintf(fp, "[\n");

-    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
-    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)calloc(classes, sizeof(float));
+    box* boxes = (box*)xcalloc(side * side * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(side * side * l.n, sizeof(float*));
+    for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)xcalloc(classes, sizeof(float));

    int m = plist->size;
    int i=0;
@ -173,11 +173,11 @@ void validate_coco(char *cfgfile, char *weightfile)
    float iou_thresh = .5;

    int nthreads = 8;
-    image* val = (image*)calloc(nthreads, sizeof(image));
-    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)xcalloc(nthreads, sizeof(image));
+    image* val_resized = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)xcalloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t));

    load_args args = {0};
    args.w = net.w;
@ -249,29 +249,17 @@ void validate_coco_recall(char *cfgfile, char *weightfile)

    int j, k;
    /* unused code,why?
-    FILE** fps = (FILE**)calloc(classes, sizeof(FILE*));
-    if(!fps) {
-        error("calloc failed");
-    }
+    FILE** fps = (FILE**)xcalloc(classes, sizeof(FILE*));
    for(j = 0; j < classes; ++j){
        char buff[1024];
        snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]);
        fps[j] = fopen(buff, "w");
    }
    */
-    box* boxes = (box*)calloc(side * side * l.n, sizeof(box));
-    if(!boxes) {
-        error("calloc failed");
-    }
-    float** probs = (float**)calloc(side * side * l.n, sizeof(float*));
-    if(!probs) {
-        error("calloc failed");
-    }
+    box* boxes = (box*)xcalloc(side * side * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(side * side * l.n, sizeof(float*));
    for(j = 0; j < side*side*l.n; ++j) {
-      probs[j] = (float*)calloc(classes, sizeof(float));
-      if(!probs[j]) {
-          error("calloc failed");
-      }
+      probs[j] = (float*)xcalloc(classes, sizeof(float));
    }

    int m = plist->size;
@ -349,19 +337,10 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
    char buff[256];
    char *input = buff;
    int j;
-    box* boxes = (box*)calloc(l.side * l.side * l.n, sizeof(box));
-    if(!boxes) {
-        error("calloc failed");
-    }
-    float** probs = (float**)calloc(l.side * l.side * l.n, sizeof(float*));
-    if(!probs) {
-        error("calloc failed");
-    }
+    box* boxes = (box*)xcalloc(l.side * l.side * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(l.side * l.side * l.n, sizeof(float*));
    for(j = 0; j < l.side*l.side*l.n; ++j) {
-      probs[j] = (float*)calloc(l.classes, sizeof(float));
-      if(!probs[j]) {
-          error("calloc failed");
-      }
+      probs[j] = (float*)xcalloc(l.classes, sizeof(float));
    }
    while(1){
        if(filename){
--- a/src/compare.c
+++ b/src/compare.c
@ -176,7 +176,7 @@ int bbox_comparator(const void *a, const void *b)

    image im1 = load_image_color(box1.filename, net.w, net.h);
    image im2 = load_image_color(box2.filename, net.w, net.h);
-    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
+    float* X = (float*)xcalloc(net.w * net.h * net.c, sizeof(float));
    memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
    memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
    float *predictions = network_predict(net, X);
@ -205,7 +205,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
 {
    image im1 = load_image_color(a->filename, net.w, net.h);
    image im2 = load_image_color(b->filename, net.w, net.h);
-    float* X = (float*)calloc(net.w * net.h * net.c, sizeof(float));
+    float* X = (float*)xcalloc(net.w * net.h * net.c, sizeof(float));
    memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
    memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
    float *predictions = network_predict(net, X);
@ -239,7 +239,7 @@ void SortMaster3000(char *filename, char *weightfile)
    char **paths = (char **)list_to_array(plist);
    int N = plist->size;
    free_list(plist);
-    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)xcalloc(N, sizeof(sortable_bbox));
    printf("Sorting %d boxes...\n", N);
    for(i = 0; i < N; ++i){
        boxes[i].filename = paths[i];
@ -274,13 +274,13 @@ void BattleRoyaleWithCheese(char *filename, char *weightfile)
    int N = plist->size;
    int total = N;
    free_list(plist);
-    sortable_bbox* boxes = (sortable_bbox*)calloc(N, sizeof(sortable_bbox));
+    sortable_bbox* boxes = (sortable_bbox*)xcalloc(N, sizeof(sortable_bbox));
    printf("Battling %d boxes...\n", N);
    for(i = 0; i < N; ++i){
        boxes[i].filename = paths[i];
        boxes[i].net = net;
        boxes[i].classes = classes;
-        boxes[i].elos = (float*)calloc(classes, sizeof(float));
+        boxes[i].elos = (float*)xcalloc(classes, sizeof(float));
        for(j = 0; j < classes; ++j){
            boxes[i].elos[j] = 1500;
        }
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@ -74,14 +74,14 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
    l.activation = activation;
    l.learning_rate_scale = 1;

-    l.output = (float*)calloc(total_batch * outputs, sizeof(float));
-    l.delta = (float*)calloc(total_batch * outputs, sizeof(float));
+    l.output = (float*)xcalloc(total_batch * outputs, sizeof(float));
+    l.delta = (float*)xcalloc(total_batch * outputs, sizeof(float));

-    l.weight_updates = (float*)calloc(inputs * outputs, sizeof(float));
-    l.bias_updates = (float*)calloc(outputs, sizeof(float));
+    l.weight_updates = (float*)xcalloc(inputs * outputs, sizeof(float));
+    l.bias_updates = (float*)xcalloc(outputs, sizeof(float));

-    l.weights = (float*)calloc(outputs * inputs, sizeof(float));
-    l.biases = (float*)calloc(outputs, sizeof(float));
+    l.weights = (float*)xcalloc(outputs * inputs, sizeof(float));
+    l.biases = (float*)xcalloc(outputs, sizeof(float));

    l.forward = forward_connected_layer;
    l.backward = backward_connected_layer;
@ -98,22 +98,22 @@ connected_layer make_connected_layer(int batch, int steps, int inputs, int outpu
    }

    if(batch_normalize){
-        l.scales = (float*)calloc(outputs, sizeof(float));
-        l.scale_updates = (float*)calloc(outputs, sizeof(float));
+        l.scales = (float*)xcalloc(outputs, sizeof(float));
+        l.scale_updates = (float*)xcalloc(outputs, sizeof(float));
        for(i = 0; i < outputs; ++i){
            l.scales[i] = 1;
        }

-        l.mean = (float*)calloc(outputs, sizeof(float));
-        l.mean_delta = (float*)calloc(outputs, sizeof(float));
-        l.variance = (float*)calloc(outputs, sizeof(float));
-        l.variance_delta = (float*)calloc(outputs, sizeof(float));
+        l.mean = (float*)xcalloc(outputs, sizeof(float));
+        l.mean_delta = (float*)xcalloc(outputs, sizeof(float));
+        l.variance = (float*)xcalloc(outputs, sizeof(float));
+        l.variance_delta = (float*)xcalloc(outputs, sizeof(float));

-        l.rolling_mean = (float*)calloc(outputs, sizeof(float));
-        l.rolling_variance = (float*)calloc(outputs, sizeof(float));
+        l.rolling_mean = (float*)xcalloc(outputs, sizeof(float));
+        l.rolling_variance = (float*)xcalloc(outputs, sizeof(float));

-        l.x = (float*)calloc(total_batch * outputs, sizeof(float));
-        l.x_norm = (float*)calloc(total_batch * outputs, sizeof(float));
+        l.x = (float*)xcalloc(total_batch * outputs, sizeof(float));
+        l.x_norm = (float*)xcalloc(total_batch * outputs, sizeof(float));
    }

 #ifdef GPU
--- a/src/conv_lstm_layer.c
+++ b/src/conv_lstm_layer.c
@ -65,65 +65,65 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i
    l.peephole = peephole;

    // U
-    l.uf = (layer*)calloc(1, sizeof(layer));
+    l.uf = (layer*)xcalloc(1, sizeof(layer));
    *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.uf->batch = batch;
    if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;

-    l.ui = (layer*)calloc(1, sizeof(layer));
+    l.ui = (layer*)xcalloc(1, sizeof(layer));
    *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.ui->batch = batch;
    if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;

-    l.ug = (layer*)calloc(1, sizeof(layer));
+    l.ug = (layer*)xcalloc(1, sizeof(layer));
    *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.ug->batch = batch;
    if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;

-    l.uo = (layer*)calloc(1, sizeof(layer));
+    l.uo = (layer*)xcalloc(1, sizeof(layer));
    *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.uo->batch = batch;
    if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;


    // W
-    l.wf = (layer*)calloc(1, sizeof(layer));
+    l.wf = (layer*)xcalloc(1, sizeof(layer));
    *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.wf->batch = batch;
    if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;

-    l.wi = (layer*)calloc(1, sizeof(layer));
+    l.wi = (layer*)xcalloc(1, sizeof(layer));
    *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.wi->batch = batch;
    if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;

-    l.wg = (layer*)calloc(1, sizeof(layer));
+    l.wg = (layer*)xcalloc(1, sizeof(layer));
    *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.wg->batch = batch;
    if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;

-    l.wo = (layer*)calloc(1, sizeof(layer));
+    l.wo = (layer*)xcalloc(1, sizeof(layer));
    *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.wo->batch = batch;
    if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size;


    // V
-    l.vf = (layer*)calloc(1, sizeof(layer));
+    l.vf = (layer*)xcalloc(1, sizeof(layer));
    if (l.peephole) {
        *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
        l.vf->batch = batch;
        if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size;
    }

-    l.vi = (layer*)calloc(1, sizeof(layer));
+    l.vi = (layer*)xcalloc(1, sizeof(layer));
    if (l.peephole) {
        *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
        l.vi->batch = batch;
        if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size;
    }

-    l.vo = (layer*)calloc(1, sizeof(layer));
+    l.vo = (layer*)xcalloc(1, sizeof(layer));
    if (l.peephole) {
        *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
        l.vo->batch = batch;
@ -141,30 +141,30 @@ layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, i

    assert(l.wo->outputs == l.uo->outputs);

-    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    //l.state = (float*)calloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    //l.state = (float*)xcalloc(outputs * batch, sizeof(float));

    l.forward = forward_conv_lstm_layer;
    l.update = update_conv_lstm_layer;
    l.backward = backward_conv_lstm_layer;

-    l.prev_state_cpu =  (float*)calloc(batch*outputs, sizeof(float));
-    l.prev_cell_cpu =   (float*)calloc(batch*outputs, sizeof(float));
-    l.cell_cpu =        (float*)calloc(batch*outputs*steps, sizeof(float));
-
-    l.f_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.i_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.g_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.o_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.c_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.stored_c_cpu = (float*)calloc(batch*outputs, sizeof(float));
-    l.h_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.stored_h_cpu = (float*)calloc(batch*outputs, sizeof(float));
-    l.temp_cpu =        (float*)calloc(batch*outputs, sizeof(float));
-    l.temp2_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.temp3_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.dc_cpu =          (float*)calloc(batch*outputs, sizeof(float));
-    l.dh_cpu =          (float*)calloc(batch*outputs, sizeof(float));
+    l.prev_state_cpu =  (float*)xcalloc(batch*outputs, sizeof(float));
+    l.prev_cell_cpu =   (float*)xcalloc(batch*outputs, sizeof(float));
+    l.cell_cpu =        (float*)xcalloc(batch*outputs*steps, sizeof(float));
+
+    l.f_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.i_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.g_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.o_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.c_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.stored_c_cpu = (float*)xcalloc(batch*outputs, sizeof(float));
+    l.h_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.stored_h_cpu = (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp_cpu =        (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp2_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp3_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dc_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dh_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));

 #ifdef GPU
    l.forward_gpu = forward_conv_lstm_layer_gpu;
@ -275,26 +275,26 @@ void resize_conv_lstm_layer(layer *l, int w, int h)

    assert(l->wo->outputs == l->uo->outputs);

-    l->output = (float*)realloc(l->output, outputs * batch * steps * sizeof(float));
-    //l->state = (float*)realloc(l->state, outputs * batch * sizeof(float));
-
-    l->prev_state_cpu = (float*)realloc(l->prev_state_cpu, batch*outputs * sizeof(float));
-    l->prev_cell_cpu = (float*)realloc(l->prev_cell_cpu, batch*outputs * sizeof(float));
-    l->cell_cpu = (float*)realloc(l->cell_cpu, batch*outputs*steps * sizeof(float));
-
-    l->f_cpu = (float*)realloc(l->f_cpu, batch*outputs * sizeof(float));
-    l->i_cpu = (float*)realloc(l->i_cpu, batch*outputs * sizeof(float));
-    l->g_cpu = (float*)realloc(l->g_cpu, batch*outputs * sizeof(float));
-    l->o_cpu = (float*)realloc(l->o_cpu, batch*outputs * sizeof(float));
-    l->c_cpu = (float*)realloc(l->c_cpu, batch*outputs * sizeof(float));
-    l->h_cpu = (float*)realloc(l->h_cpu, batch*outputs * sizeof(float));
-    l->temp_cpu = (float*)realloc(l->temp_cpu, batch*outputs * sizeof(float));
-    l->temp2_cpu = (float*)realloc(l->temp2_cpu, batch*outputs * sizeof(float));
-    l->temp3_cpu = (float*)realloc(l->temp3_cpu, batch*outputs * sizeof(float));
-    l->dc_cpu = (float*)realloc(l->dc_cpu, batch*outputs * sizeof(float));
-    l->dh_cpu = (float*)realloc(l->dh_cpu, batch*outputs * sizeof(float));
-    l->stored_c_cpu = (float*)realloc(l->stored_c_cpu, batch*outputs * sizeof(float));
-    l->stored_h_cpu = (float*)realloc(l->stored_h_cpu, batch*outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, outputs * batch * steps * sizeof(float));
+    //l->state = (float*)xrealloc(l->state, outputs * batch * sizeof(float));
+
+    l->prev_state_cpu = (float*)xrealloc(l->prev_state_cpu, batch*outputs * sizeof(float));
+    l->prev_cell_cpu = (float*)xrealloc(l->prev_cell_cpu, batch*outputs * sizeof(float));
+    l->cell_cpu = (float*)xrealloc(l->cell_cpu, batch*outputs*steps * sizeof(float));
+
+    l->f_cpu = (float*)xrealloc(l->f_cpu, batch*outputs * sizeof(float));
+    l->i_cpu = (float*)xrealloc(l->i_cpu, batch*outputs * sizeof(float));
+    l->g_cpu = (float*)xrealloc(l->g_cpu, batch*outputs * sizeof(float));
+    l->o_cpu = (float*)xrealloc(l->o_cpu, batch*outputs * sizeof(float));
+    l->c_cpu = (float*)xrealloc(l->c_cpu, batch*outputs * sizeof(float));
+    l->h_cpu = (float*)xrealloc(l->h_cpu, batch*outputs * sizeof(float));
+    l->temp_cpu = (float*)xrealloc(l->temp_cpu, batch*outputs * sizeof(float));
+    l->temp2_cpu = (float*)xrealloc(l->temp2_cpu, batch*outputs * sizeof(float));
+    l->temp3_cpu = (float*)xrealloc(l->temp3_cpu, batch*outputs * sizeof(float));
+    l->dc_cpu = (float*)xrealloc(l->dc_cpu, batch*outputs * sizeof(float));
+    l->dh_cpu = (float*)xrealloc(l->dh_cpu, batch*outputs * sizeof(float));
+    l->stored_c_cpu = (float*)xrealloc(l->stored_c_cpu, batch*outputs * sizeof(float));
+    l->stored_h_cpu = (float*)xrealloc(l->stored_h_cpu, batch*outputs * sizeof(float));

 #ifdef GPU
    //if (l->state_gpu) cudaFree(l->state_gpu);
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@ -370,11 +370,11 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
        l.bias_updates = l.share_layer->bias_updates;
    }
    else {
-        l.weights = (float*)calloc(l.nweights, sizeof(float));
-        l.weight_updates = (float*)calloc(l.nweights, sizeof(float));
+        l.weights = (float*)xcalloc(l.nweights, sizeof(float));
+        l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float));

-        l.biases = (float*)calloc(n, sizeof(float));
-        l.bias_updates = (float*)calloc(n, sizeof(float));
+        l.biases = (float*)xcalloc(n, sizeof(float));
+        l.bias_updates = (float*)xcalloc(n, sizeof(float));
    }

    // float scale = 1./sqrt(size*size*c);
@ -389,36 +389,36 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
    l.inputs = l.w * l.h * l.c;
    l.activation = activation;

-    l.output = (float*)calloc(total_batch*l.outputs, sizeof(float));
-    l.delta  = (float*)calloc(total_batch*l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(total_batch*l.outputs, sizeof(float));
+    l.delta  = (float*)xcalloc(total_batch*l.outputs, sizeof(float));

    l.forward = forward_convolutional_layer;
    l.backward = backward_convolutional_layer;
    l.update = update_convolutional_layer;
    if(binary){
-        l.binary_weights = (float*)calloc(l.nweights, sizeof(float));
-        l.cweights = (char*)calloc(l.nweights, sizeof(char));
-        l.scales = (float*)calloc(n, sizeof(float));
+        l.binary_weights = (float*)xcalloc(l.nweights, sizeof(float));
+        l.cweights = (char*)xcalloc(l.nweights, sizeof(char));
+        l.scales = (float*)xcalloc(n, sizeof(float));
    }
    if(xnor){
-        l.binary_weights = (float*)calloc(l.nweights, sizeof(float));
-        l.binary_input = (float*)calloc(l.inputs * l.batch, sizeof(float));
+        l.binary_weights = (float*)xcalloc(l.nweights, sizeof(float));
+        l.binary_input = (float*)xcalloc(l.inputs * l.batch, sizeof(float));

        int align = 32;// 8;
        int src_align = l.out_h*l.out_w;
        l.bit_align = src_align + (align - src_align % align);

-        l.mean_arr = (float*)calloc(l.n, sizeof(float));
+        l.mean_arr = (float*)xcalloc(l.n, sizeof(float));

        const size_t new_c = l.c / 32;
        size_t in_re_packed_input_size = new_c * l.w * l.h + 1;
-        l.bin_re_packed_input = (uint32_t*)calloc(in_re_packed_input_size, sizeof(uint32_t));
+        l.bin_re_packed_input = (uint32_t*)xcalloc(in_re_packed_input_size, sizeof(uint32_t));

        l.lda_align = 256;  // AVX2
        int k = l.size*l.size*l.c;
        size_t k_aligned = k + (l.lda_align - k%l.lda_align);
        size_t t_bit_input_size = k_aligned * l.bit_align / 8;
-        l.t_bit_input = (char*)calloc(t_bit_input_size, sizeof(char));
+        l.t_bit_input = (char*)xcalloc(t_bit_input_size, sizeof(char));
    }

    if(batch_normalize){
@ -433,36 +433,36 @@ convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w,
            l.rolling_variance = l.share_layer->rolling_variance;
        }
        else {
-            l.scales = (float*)calloc(n, sizeof(float));
-            l.scale_updates = (float*)calloc(n, sizeof(float));
+            l.scales = (float*)xcalloc(n, sizeof(float));
+            l.scale_updates = (float*)xcalloc(n, sizeof(float));
            for (i = 0; i < n; ++i) {
                l.scales[i] = 1;
            }

-            l.mean = (float*)calloc(n, sizeof(float));
-            l.variance = (float*)calloc(n, sizeof(float));
+            l.mean = (float*)xcalloc(n, sizeof(float));
+            l.variance = (float*)xcalloc(n, sizeof(float));

-            l.mean_delta = (float*)calloc(n, sizeof(float));
-            l.variance_delta = (float*)calloc(n, sizeof(float));
+            l.mean_delta = (float*)xcalloc(n, sizeof(float));
+            l.variance_delta = (float*)xcalloc(n, sizeof(float));

-            l.rolling_mean = (float*)calloc(n, sizeof(float));
-            l.rolling_variance = (float*)calloc(n, sizeof(float));
+            l.rolling_mean = (float*)xcalloc(n, sizeof(float));
+            l.rolling_variance = (float*)xcalloc(n, sizeof(float));
        }

-        l.x = (float*)calloc(total_batch * l.outputs, sizeof(float));
-        l.x_norm = (float*)calloc(total_batch * l.outputs, sizeof(float));
+        l.x = (float*)xcalloc(total_batch * l.outputs, sizeof(float));
+        l.x_norm = (float*)xcalloc(total_batch * l.outputs, sizeof(float));
    }
    if(adam){
        l.adam = 1;
-        l.m = (float*)calloc(l.nweights, sizeof(float));
-        l.v = (float*)calloc(l.nweights, sizeof(float));
-        l.bias_m = (float*)calloc(n, sizeof(float));
-        l.scale_m = (float*)calloc(n, sizeof(float));
-        l.bias_v = (float*)calloc(n, sizeof(float));
-        l.scale_v = (float*)calloc(n, sizeof(float));
+        l.m = (float*)xcalloc(l.nweights, sizeof(float));
+        l.v = (float*)xcalloc(l.nweights, sizeof(float));
+        l.bias_m = (float*)xcalloc(n, sizeof(float));
+        l.scale_m = (float*)xcalloc(n, sizeof(float));
+        l.bias_v = (float*)xcalloc(n, sizeof(float));
+        l.scale_v = (float*)xcalloc(n, sizeof(float));
    }

-    if(l.activation == SWISH) l.output_sigmoid = (float*)calloc(total_batch*l.outputs, sizeof(float));
+    if(l.activation == SWISH) l.output_sigmoid = (float*)xcalloc(total_batch*l.outputs, sizeof(float));

 #ifdef GPU
    if (l.activation == SWISH) l.output_sigmoid_gpu = cuda_make_array(l.output_sigmoid, total_batch*out_h*out_w*n);
@ -620,11 +620,11 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h)
    l->outputs = l->out_h * l->out_w * l->out_c;
    l->inputs = l->w * l->h * l->c;

-    l->output = (float*)realloc(l->output, total_batch * l->outputs * sizeof(float));
-    l->delta = (float*)realloc(l->delta, total_batch * l->outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, total_batch * l->outputs * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, total_batch * l->outputs * sizeof(float));
    if(l->batch_normalize){
-        l->x = (float*)realloc(l->x, total_batch * l->outputs * sizeof(float));
-        l->x_norm = (float*)realloc(l->x_norm, total_batch * l->outputs * sizeof(float));
+        l->x = (float*)xrealloc(l->x, total_batch * l->outputs * sizeof(float));
+        l->x_norm = (float*)xrealloc(l->x_norm, total_batch * l->outputs * sizeof(float));
    }

    if (l->xnor) {
@ -766,8 +766,8 @@ void binary_align_weights(convolutional_layer *l)

    size_t align_weights_size = new_lda * m;
    l->align_bit_weights_size = align_weights_size / 8 + 1;
-    float* align_weights = (float*)calloc(align_weights_size, sizeof(float));
-    l->align_bit_weights = (char*)calloc(l->align_bit_weights_size, sizeof(char));
+    float* align_weights = (float*)xcalloc(align_weights_size, sizeof(float));
+    l->align_bit_weights = (char*)xcalloc(l->align_bit_weights_size, sizeof(char));

    size_t i, j;
    // align A without transpose
@ -1108,8 +1108,8 @@ void assisted_excitation_forward(convolutional_layer l, network_state state)
    //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n",
    //    epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num);

-    float *a_avg = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float));
-    float *g = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float));
+    float *a_avg = (float *)xcalloc(l.out_w * l.out_h * l.batch, sizeof(float));
+    float *g = (float *)xcalloc(l.out_w * l.out_h * l.batch, sizeof(float));

    int b;
    int w, h, c;
@ -1310,7 +1310,7 @@ void rescale_weights(convolutional_layer l, float scale, float trans)

 image *get_weights(convolutional_layer l)
 {
-    image *weights = (image *)calloc(l.n, sizeof(image));
+    image *weights = (image *)xcalloc(l.n, sizeof(image));
    int i;
    for (i = 0; i < l.n; ++i) {
        weights[i] = copy_image(get_convolutional_weight(l, i));
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@ -41,9 +41,9 @@ cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float sca
    l.inputs = inputs;
    l.outputs = inputs;
    l.cost_type = cost_type;
-    l.delta = (float*)calloc(inputs * batch, sizeof(float));
-    l.output = (float*)calloc(inputs * batch, sizeof(float));
-    l.cost = (float*)calloc(1, sizeof(float));
+    l.delta = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));

    l.forward = forward_cost_layer;
    l.backward = backward_cost_layer;
@ -61,8 +61,8 @@ void resize_cost_layer(cost_layer *l, int inputs)
 {
    l->inputs = inputs;
    l->outputs = inputs;
-    l->delta = (float*)realloc(l->delta, inputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, inputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, inputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, inputs * l->batch * sizeof(float));
 #ifdef GPU
    cuda_free(l->delta_gpu);
    cuda_free(l->output_gpu);
--- a/src/crnn_layer.c
+++ b/src/crnn_layer.c
@ -47,19 +47,19 @@ layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int ou
    l.hidden = h * w * hidden_filters;
    l.xnor = xnor;

-    l.state = (float*)calloc(l.hidden * l.batch * (l.steps + 1), sizeof(float));
+    l.state = (float*)xcalloc(l.hidden * l.batch * (l.steps + 1), sizeof(float));

-    l.input_layer = (layer*)calloc(1, sizeof(layer));
+    l.input_layer = (layer*)xcalloc(1, sizeof(layer));
    *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.input_layer->batch = batch;
    if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;

-    l.self_layer = (layer*)calloc(1, sizeof(layer));
+    l.self_layer = (layer*)xcalloc(1, sizeof(layer));
    *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.self_layer->batch = batch;
    if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;

-    l.output_layer = (layer*)calloc(1, sizeof(layer));
+    l.output_layer = (layer*)xcalloc(1, sizeof(layer));
    *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, NULL);
    l.output_layer->batch = batch;
    if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size;
@ -121,7 +121,7 @@ void resize_crnn_layer(layer *l, int w, int h)
    assert(l->input_layer->outputs == l->self_layer->outputs);
    assert(l->input_layer->outputs == l->output_layer->inputs);

-    l->state = (float*)realloc(l->state, l->batch*l->hidden*(l->steps + 1)*sizeof(float));
+    l->state = (float*)xrealloc(l->state, l->batch*l->hidden*(l->steps + 1)*sizeof(float));

 #ifdef GPU
    if (l->state_gpu) cudaFree(l->state_gpu);
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@ -1,3 +1,4 @@
+#include "utils.h"
 #include "crop_layer.h"
 #include "dark_cuda.h"
 #include <stdio.h>
@ -32,7 +33,7 @@ crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int
    l.out_c = c;
    l.inputs = l.w * l.h * l.c;
    l.outputs = l.out_w * l.out_h * l.out_c;
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));
    l.forward = forward_crop_layer;
    l.backward = backward_crop_layer;

@ -56,7 +57,7 @@ void resize_crop_layer(layer *l, int w, int h)
    l->inputs = l->w * l->h * l->c;
    l->outputs = l->out_h * l->out_w * l->out_c;

-    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float));
    #ifdef GPU
    cuda_free(l->output_gpu);
    l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
--- a/src/dark_cuda.c
+++ b/src/dark_cuda.c
@ -257,7 +257,7 @@ void cuda_random(float *x_gpu, size_t n)

 float cuda_compare(float *x_gpu, float *x, size_t n, char *s)
 {
-    float* tmp = (float*)calloc(n, sizeof(float));
+    float* tmp = (float*)xcalloc(n, sizeof(float));
    cuda_pull_array(x_gpu, tmp, n);
    //int i;
    //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]);
--- a/src/darknet.c
+++ b/src/darknet.c
@ -258,12 +258,12 @@ layer normalize_layer(layer l, int n)
 {
    int j;
    l.batch_normalize=1;
-    l.scales = (float*)calloc(n, sizeof(float));
+    l.scales = (float*)xcalloc(n, sizeof(float));
    for(j = 0; j < n; ++j){
        l.scales[j] = 1;
    }
-    l.rolling_mean = (float*)calloc(n, sizeof(float));
-    l.rolling_variance = (float*)calloc(n, sizeof(float));
+    l.rolling_mean = (float*)xcalloc(n, sizeof(float));
+    l.rolling_variance = (float*)xcalloc(n, sizeof(float));
    return l;
 }

--- a/src/data.c
+++ b/src/data.c
@ -45,11 +45,11 @@ char **get_sequential_paths(char **paths, int n, int m, int mini_batch, int augm
 {
    int speed = rand_int(1, augment_speed);
    if (speed < 1) speed = 1;
-    char** sequentia_paths = (char**)calloc(n, sizeof(char*));
+    char** sequentia_paths = (char**)xcalloc(n, sizeof(char*));
    int i;
    pthread_mutex_lock(&mutex);
    //printf("n = %d, mini_batch = %d \n", n, mini_batch);
-    unsigned int *start_time_indexes = (unsigned int *)calloc(mini_batch, sizeof(unsigned int));
+    unsigned int *start_time_indexes = (unsigned int *)xcalloc(mini_batch, sizeof(unsigned int));
    for (i = 0; i < mini_batch; ++i) {
        start_time_indexes[i] = random_gen() % m;
        //printf(" start_time_indexes[i] = %u, ", start_time_indexes[i]);
@ -75,7 +75,7 @@ char **get_sequential_paths(char **paths, int n, int m, int mini_batch, int augm

 char **get_random_paths(char **paths, int n, int m)
 {
-    char** random_paths = (char**)calloc(n, sizeof(char*));
+    char** random_paths = (char**)xcalloc(n, sizeof(char*));
    int i;
    pthread_mutex_lock(&mutex);
    //printf("n = %d \n", n);
@ -94,7 +94,7 @@ char **get_random_paths(char **paths, int n, int m)

 char **find_replace_paths(char **paths, int n, char *find, char *replace)
 {
-    char** replace_paths = (char**)calloc(n, sizeof(char*));
+    char** replace_paths = (char**)xcalloc(n, sizeof(char*));
    int i;
    for(i = 0; i < n; ++i){
        char replaced[4096];
@ -109,7 +109,7 @@ matrix load_image_paths_gray(char **paths, int n, int w, int h)
    int i;
    matrix X;
    X.rows = n;
-    X.vals = (float**)calloc(X.rows, sizeof(float*));
+    X.vals = (float**)xcalloc(X.rows, sizeof(float*));
    X.cols = 0;

    for(i = 0; i < n; ++i){
@ -130,7 +130,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
    int i;
    matrix X;
    X.rows = n;
-    X.vals = (float**)calloc(X.rows, sizeof(float*));
+    X.vals = (float**)xcalloc(X.rows, sizeof(float*));
    X.cols = 0;

    for(i = 0; i < n; ++i){
@ -146,7 +146,7 @@ matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int
    int i;
    matrix X;
    X.rows = n;
-    X.vals = (float**)calloc(X.rows, sizeof(float*));
+    X.vals = (float**)xcalloc(X.rows, sizeof(float*));
    X.cols = 0;

    for(i = 0; i < n; ++i){
@ -173,7 +173,7 @@ extern int check_mistakes;

 box_label *read_boxes(char *filename, int *n)
 {
-    box_label* boxes = (box_label*)calloc(1, sizeof(box_label));
+    box_label* boxes = (box_label*)xcalloc(1, sizeof(box_label));
    FILE *file = fopen(filename, "r");
    if (!file) {
        printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename);
@ -192,7 +192,7 @@ box_label *read_boxes(char *filename, int *n)
    int id;
    int count = 0;
    while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){
-        boxes = (box_label*)realloc(boxes, (count + 1) * sizeof(box_label));
+        boxes = (box_label*)xrealloc(boxes, (count + 1) * sizeof(box_label));
        if(!boxes) {
          error("realloc failed");
        }
@ -604,7 +604,7 @@ data load_data_region(int n, char **paths, int m, int w, int h, int size, int cl
    d.shallow = 0;

    d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*3;


@ -658,7 +658,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
    d.shallow = 0;

    d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*6;

    int k = 2*(classes);
@ -667,7 +667,7 @@ data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
        image im1 = load_image_color(paths[i*2],   w, h);
        image im2 = load_image_color(paths[i*2+1], w, h);

-        d.X.vals[i] = (float*)calloc(d.X.cols, sizeof(float));
+        d.X.vals[i] = (float*)xcalloc(d.X.cols, sizeof(float));
        memcpy(d.X.vals[i],         im1.data, h*w*3*sizeof(float));
        memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));

@ -729,7 +729,7 @@ data load_data_swag(char **paths, int n, int classes, float jitter)
    d.h = h;

    d.X.rows = 1;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*3;

    int k = (4+classes)*30;
@ -828,7 +828,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    d.shallow = 0;

    d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*c;

    float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale = 0;
@ -840,7 +840,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    for (i_mixup = 0; i_mixup <= mixup; i_mixup++) {
        if (i_mixup) augmentation_calculated = 0;
        for (i = 0; i < n; ++i) {
-            float *truth = (float*)calloc(5 * boxes, sizeof(float));
+            float *truth = (float*)xcalloc(5 * boxes, sizeof(float));
            const char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i];

            int flag = (c >= 3);
@ -1001,7 +1001,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    d.shallow = 0;

    d.X.rows = n;
-    d.X.vals = (float**)calloc(d.X.rows, sizeof(float*));
+    d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*c;

    float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale;
@ -1013,7 +1013,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo
    for (i_mixup = 0; i_mixup <= mixup; i_mixup++) {
        if (i_mixup) augmentation_calculated = 0;
        for (i = 0; i < n; ++i) {
-            float *truth = (float*)calloc(5 * boxes, sizeof(float));
+            float *truth = (float*)xcalloc(5 * boxes, sizeof(float));
            char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i];

            image orig = load_image(filename, 0, 0, c);
@ -1182,7 +1182,7 @@ void *load_thread(void *ptr)
 pthread_t load_data_in_thread(load_args args)
 {
    pthread_t thread;
-    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)xcalloc(1, sizeof(struct load_args));
    *ptr = args;
    if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
    return thread;
@ -1197,8 +1197,8 @@ void *load_threads(void *ptr)
    data *out = args.d;
    int total = args.n;
    free(ptr);
-    data* buffers = (data*)calloc(args.threads, sizeof(data));
-    pthread_t* threads = (pthread_t*)calloc(args.threads, sizeof(pthread_t));
+    data* buffers = (data*)xcalloc(args.threads, sizeof(data));
+    pthread_t* threads = (pthread_t*)xcalloc(args.threads, sizeof(pthread_t));
    for(i = 0; i < args.threads; ++i){
        args.d = buffers + i;
        args.n = (i+1) * total/args.threads - i * total/args.threads;
@ -1221,7 +1221,7 @@ void *load_threads(void *ptr)
 pthread_t load_data(load_args args)
 {
    pthread_t thread;
-    struct load_args* ptr = (load_args*)calloc(1, sizeof(struct load_args));
+    struct load_args* ptr = (load_args*)xcalloc(1, sizeof(struct load_args));
    *ptr = args;
    if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
    return thread;
@ -1275,11 +1275,11 @@ data load_data_super(char **paths, int n, int m, int w, int h, int scale)

    int i;
    d.X.rows = n;
-    d.X.vals = (float**)calloc(n, sizeof(float*));
+    d.X.vals = (float**)xcalloc(n, sizeof(float*));
    d.X.cols = w*h*3;

    d.y.rows = n;
-    d.y.vals = (float**)calloc(n, sizeof(float*));
+    d.y.vals = (float**)xcalloc(n, sizeof(float*));
    d.y.cols = w*scale * h*scale * 3;

    for(i = 0; i < n; ++i){
@ -1327,7 +1327,7 @@ matrix concat_matrix(matrix m1, matrix m2)
    matrix m;
    m.cols = m1.cols;
    m.rows = m1.rows+m2.rows;
-    m.vals = (float**)calloc(m1.rows + m2.rows, sizeof(float*));
+    m.vals = (float**)xcalloc(m1.rows + m2.rows, sizeof(float*));
    for(i = 0; i < m1.rows; ++i){
        m.vals[count++] = m1.vals[i];
    }
@ -1579,8 +1579,8 @@ data get_random_data(data d, int num)
    r.X.cols = d.X.cols;
    r.y.cols = d.y.cols;

-    r.X.vals = (float**)calloc(num, sizeof(float*));
-    r.y.vals = (float**)calloc(num, sizeof(float*));
+    r.X.vals = (float**)xcalloc(num, sizeof(float*));
+    r.y.vals = (float**)xcalloc(num, sizeof(float*));

    int i;
    for(i = 0; i < num; ++i){
@ -1593,7 +1593,7 @@ data get_random_data(data d, int num)

 data *split_data(data d, int part, int total)
 {
-    data* split = (data*)calloc(2, sizeof(data));
+    data* split = (data*)xcalloc(2, sizeof(data));
    int i;
    int start = part*d.X.rows/total;
    int end = (part+1)*d.X.rows/total;
@ -1606,10 +1606,10 @@ data *split_data(data d, int part, int total)
    train.X.cols = test.X.cols = d.X.cols;
    train.y.cols = test.y.cols = d.y.cols;

-    train.X.vals = (float**)calloc(train.X.rows, sizeof(float*));
-    test.X.vals = (float**)calloc(test.X.rows, sizeof(float*));
-    train.y.vals = (float**)calloc(train.y.rows, sizeof(float*));
-    test.y.vals = (float**)calloc(test.y.rows, sizeof(float*));
+    train.X.vals = (float**)xcalloc(train.X.rows, sizeof(float*));
+    test.X.vals = (float**)xcalloc(test.X.rows, sizeof(float*));
+    train.y.vals = (float**)xcalloc(train.y.rows, sizeof(float*));
+    test.y.vals = (float**)xcalloc(test.y.rows, sizeof(float*));

    for(i = 0; i < start; ++i){
        train.X.vals[i] = d.X.vals[i];
--- a/src/deconvolutional_layer.c
+++ b/src/deconvolutional_layer.c
@ -57,11 +57,11 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
    l.stride = stride;
    l.size = size;

-    l.weights = (float*)calloc(c * n * size * size, sizeof(float));
-    l.weight_updates = (float*)calloc(c * n * size * size, sizeof(float));
+    l.weights = (float*)xcalloc(c * n * size * size, sizeof(float));
+    l.weight_updates = (float*)xcalloc(c * n * size * size, sizeof(float));

-    l.biases = (float*)calloc(n, sizeof(float));
-    l.bias_updates = (float*)calloc(n, sizeof(float));
+    l.biases = (float*)xcalloc(n, sizeof(float));
+    l.bias_updates = (float*)xcalloc(n, sizeof(float));
    float scale = 1./sqrt(size*size*c);
    for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
    for(i = 0; i < n; ++i){
@ -76,9 +76,9 @@ deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c,
    l.outputs = l.out_w * l.out_h * l.out_c;
    l.inputs = l.w * l.h * l.c;

-    l.col_image = (float*)calloc(h * w * size * size * n, sizeof(float));
-    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.col_image = (float*)xcalloc(h * w * size * size * n, sizeof(float));
+    l.output = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.delta = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));

    l.forward = forward_deconvolutional_layer;
    l.backward = backward_deconvolutional_layer;
@ -110,11 +110,11 @@ void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w)
    int out_h = deconvolutional_out_height(*l);
    int out_w = deconvolutional_out_width(*l);

-    l->col_image = (float*)realloc(l->col_image,
+    l->col_image = (float*)xrealloc(l->col_image,
                                out_h*out_w*l->size*l->size*l->c*sizeof(float));
-    l->output = (float*)realloc(l->output,
+    l->output = (float*)xrealloc(l->output,
                                l->batch*out_h * out_w * l->n*sizeof(float));
-    l->delta = (float*)realloc(l->delta,
+    l->delta = (float*)xrealloc(l->delta,
                                l->batch*out_h * out_w * l->n*sizeof(float));
    #ifdef GPU
    cuda_free(l->col_image_gpu);
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@ -25,11 +25,11 @@ detection_layer make_detection_layer(int batch, int inputs, int n, int side, int
    l.w = side;
    l.h = side;
    assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
-    l.cost = (float*)calloc(1, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
    l.outputs = l.inputs;
    l.truths = l.side*l.side*(1+l.coords+l.classes);
-    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));

    l.forward = forward_detection_layer;
    l.backward = backward_detection_layer;
@ -182,7 +182,7 @@ void forward_detection_layer(const detection_layer l, network_state state)
        }

        if(0){
-            float* costs = (float*)calloc(l.batch * locations * l.n, sizeof(float));
+            float* costs = (float*)xcalloc(l.batch * locations * l.n, sizeof(float));
            for (b = 0; b < l.batch; ++b) {
                int index = b*l.inputs;
                for (i = 0; i < locations; ++i) {
@ -259,11 +259,11 @@ void forward_detection_layer_gpu(const detection_layer l, network_state state)
        return;
    }

-    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
+    float* in_cpu = (float*)xcalloc(l.batch * l.inputs, sizeof(float));
    float *truth_cpu = 0;
    if(state.truth){
        int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
-        truth_cpu = (float*)calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)xcalloc(num_truth, sizeof(float));
        cuda_pull_array(state.truth, truth_cpu, num_truth);
    }
    cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
--- a/src/detector.c
+++ b/src/detector.c
@ -60,7 +60,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    float avg_loss = -1;
-    network* nets = (network*)calloc(ngpus, sizeof(network));
+    network* nets = (network*)xcalloc(ngpus, sizeof(network));

    srand(time(0));
    int seed = rand();
@ -475,7 +475,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
    }
    else {
        if (!outfile) outfile = "comp4_det_test_";
-        fps = (FILE**)calloc(classes, sizeof(FILE*));
+        fps = (FILE**)xcalloc(classes, sizeof(FILE*));
        for (j = 0; j < classes; ++j) {
            snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
            fps[j] = fopen(buff, "w");
@ -492,11 +492,11 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out

    int nthreads = 4;
    if (m < 4) nthreads = m;
-    image* val = (image*)calloc(nthreads, sizeof(image));
-    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)xcalloc(nthreads, sizeof(image));
+    image* val_resized = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)xcalloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t));

    load_args args = { 0 };
    args.w = net.w;
@ -735,11 +735,11 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa

    int nthreads = 4;
    if (m < 4) nthreads = m;
-    image* val = (image*)calloc(nthreads, sizeof(image));
-    image* val_resized = (image*)calloc(nthreads, sizeof(image));
-    image* buf = (image*)calloc(nthreads, sizeof(image));
-    image* buf_resized = (image*)calloc(nthreads, sizeof(image));
-    pthread_t* thr = (pthread_t*)calloc(nthreads, sizeof(pthread_t));
+    image* val = (image*)xcalloc(nthreads, sizeof(image));
+    image* val_resized = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf = (image*)xcalloc(nthreads, sizeof(image));
+    image* buf_resized = (image*)xcalloc(nthreads, sizeof(image));
+    pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t));

    load_args args = { 0 };
    args.w = net.w;
@ -753,16 +753,16 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
    int tp_for_thresh = 0;
    int fp_for_thresh = 0;

-    box_prob* detections = (box_prob*)calloc(1, sizeof(box_prob));
+    box_prob* detections = (box_prob*)xcalloc(1, sizeof(box_prob));
    int detections_count = 0;
    int unique_truth_count = 0;

-    int* truth_classes_count = (int*)calloc(classes, sizeof(int));
+    int* truth_classes_count = (int*)xcalloc(classes, sizeof(int));

    // For multi-class precision and recall computation
-    float *avg_iou_per_class = (float*)calloc(classes, sizeof(float));
-    int *tp_for_thresh_per_class = (int*)calloc(classes, sizeof(int));
-    int *fp_for_thresh_per_class = (int*)calloc(classes, sizeof(int));
+    float *avg_iou_per_class = (float*)xcalloc(classes, sizeof(float));
+    int *tp_for_thresh_per_class = (int*)xcalloc(classes, sizeof(int));
+    int *fp_for_thresh_per_class = (int*)xcalloc(classes, sizeof(int));

    for (t = 0; t < nthreads; ++t) {
        args.path = paths[i + t];
@ -834,7 +834,7 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
                    float prob = dets[i].prob[class_id];
                    if (prob > 0) {
                        detections_count++;
-                        detections = (box_prob*)realloc(detections, detections_count * sizeof(box_prob));
+                        detections = (box_prob*)xrealloc(detections, detections_count * sizeof(box_prob));
                        if (!detections) {
                          error("realloc failed");
                        }
@ -945,19 +945,19 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa
    } pr_t;

    // for PR-curve
-    pr_t** pr = (pr_t**)calloc(classes, sizeof(pr_t*));
+    pr_t** pr = (pr_t**)xcalloc(classes, sizeof(pr_t*));
    for (i = 0; i < classes; ++i) {
-        pr[i] = (pr_t*)calloc(detections_count, sizeof(pr_t));
+        pr[i] = (pr_t*)xcalloc(detections_count, sizeof(pr_t));
    }
    printf("\n detections_count = %d, unique_truth_count = %d  \n", detections_count, unique_truth_count);


-    int* detection_per_class_count = (int*)calloc(classes, sizeof(int));
+    int* detection_per_class_count = (int*)xcalloc(classes, sizeof(int));
    for (j = 0; j < detections_count; ++j) {
        detection_per_class_count[detections[j].class_id]++;
    }

-    int* truth_flags = (int*)calloc(unique_truth_count, sizeof(int));
+    int* truth_flags = (int*)xcalloc(unique_truth_count, sizeof(int));

    int rank;
    for (rank = 0; rank < detections_count; ++rank) {
@ -1155,7 +1155,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
    }

    //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 };
-    float* rel_width_height_array = (float*)calloc(1000, sizeof(float));
+    float* rel_width_height_array = (float*)xcalloc(1000, sizeof(float));


    list *options = read_data_cfg(datacfg);
@ -1191,7 +1191,7 @@ void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int
                if (check_mistakes) getchar();
            }
            number_of_boxes++;
-            rel_width_height_array = (float*)realloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
+            rel_width_height_array = (float*)xrealloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float));
            if (!rel_width_height_array) {
              error("realloc failed");
            }
@ -1354,7 +1354,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam

        //box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
        //float **probs = calloc(l.w*l.h*l.n, sizeof(float*));
-        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
+        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)xcalloc(l.classes, sizeof(float));

        float *X = sized.data;

@ -1490,7 +1490,7 @@ void run_detector(int argc, char **argv)
        for (i = 0; i < len; ++i) {
            if (gpu_list[i] == ',') ++ngpus;
        }
-        gpus = (int*)calloc(ngpus, sizeof(int));
+        gpus = (int*)xcalloc(ngpus, sizeof(int));
        for (i = 0; i < ngpus; ++i) {
            gpus[i] = atoi(gpu_list);
            gpu_list = strchr(gpu_list, ',') + 1;
--- a/src/dropout_layer.c
+++ b/src/dropout_layer.c
@ -12,7 +12,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
    l.inputs = inputs;
    l.outputs = inputs;
    l.batch = batch;
-    l.rand = (float*)calloc(inputs * batch, sizeof(float));
+    l.rand = (float*)xcalloc(inputs * batch, sizeof(float));
    l.scale = 1./(1.-probability);
    l.forward = forward_dropout_layer;
    l.backward = backward_dropout_layer;
@ -27,7 +27,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)

 void resize_dropout_layer(dropout_layer *l, int inputs)
 {
-    l->rand = (float*)realloc(l->rand, l->inputs * l->batch * sizeof(float));
+    l->rand = (float*)xrealloc(l->rand, l->inputs * l->batch * sizeof(float));
    #ifdef GPU
    cuda_free(l->rand_gpu);

--- a/src/gemm.c
+++ b/src/gemm.c
@ -49,7 +49,7 @@ void gemm_bin(int M, int N, int K, float ALPHA,
 float *random_matrix(int rows, int cols)
 {
    int i;
-    float* m = (float*)calloc(rows * cols, sizeof(float));
+    float* m = (float*)xcalloc(rows * cols, sizeof(float));
    for(i = 0; i < rows*cols; ++i){
        m[i] = (float)rand()/RAND_MAX;
    }
@ -2370,7 +2370,7 @@ void float_to_bit(float *src, unsigned char *dst, size_t size)
    memset(dst, 0, dst_size);

    size_t i;
-    char* byte_arr = (char*)calloc(size, sizeof(char));
+    char* byte_arr = (char*)xcalloc(size, sizeof(char));
    for (i = 0; i < size; ++i) {
        if (src[i] > 0) byte_arr[i] = 1;
    }
--- a/src/go.c
+++ b/src/go.c
@ -32,21 +32,21 @@ moves load_go_moves(char *filename)
 {
    moves m;
    m.n = 128;
-    m.data = (char**)calloc(128, sizeof(char*));
+    m.data = (char**)xcalloc(128, sizeof(char*));
    FILE *fp = fopen(filename, "rb");
    int count = 0;
    char *line = 0;
    while((line = fgetgo(fp))){
        if(count >= m.n){
            m.n *= 2;
-            m.data = (char**)realloc(m.data, m.n * sizeof(char*));
+            m.data = (char**)xrealloc(m.data, m.n * sizeof(char*));
        }
        m.data[count] = line;
        ++count;
    }
    printf("%d\n", count);
    m.n = count;
-    m.data = (char**)realloc(m.data, count * sizeof(char*));
+    m.data = (char**)xrealloc(m.data, count * sizeof(char*));
    return m;
 }

@ -127,11 +127,11 @@ void train_go(char *cfgfile, char *weightfile)
    char* backup_directory = "backup/";

    char buff[256];
-    float* board = (float*)calloc(19 * 19 * net.batch, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19 * net.batch, sizeof(float));
    if(!board) {
        error("calloc failed");
    }
-    float* move = (float*)calloc(19 * 19 * net.batch, sizeof(float));
+    float* move = (float*)xcalloc(19 * 19 * net.batch, sizeof(float));
    if(!move) {
        error("calloc failed");
    }
@ -192,7 +192,7 @@ void propagate_liberty(float *board, int *lib, int *visited, int row, int col, i

 int *calculate_liberties(float *board)
 {
-    int* lib = (int*)calloc(19 * 19, sizeof(int));
+    int* lib = (int*)xcalloc(19 * 19, sizeof(int));
    int visited[361];
    int i, j;
    for(j = 0; j < 19; ++j){
@ -416,11 +416,11 @@ void valid_go(char *cfgfile, char *weightfile, int multi)
    set_batch_network(&net, 1);
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);

-    float* board = (float*)calloc(19 * 19, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
    if(!board) {
        error("calloc failed");
    }
-    float* move = (float*)calloc(19 * 19, sizeof(float));
+    float* move = (float*)xcalloc(19 * 19, sizeof(float));
    if(!move) {
        error("calloc failed");
    }
@ -452,9 +452,9 @@ void engine_go(char *filename, char *weightfile, int multi)
    }
    srand(time(0));
    set_batch_network(&net, 1);
-    float* board = (float*)calloc(19 * 19, sizeof(float));
-    char* one = (char*)calloc(91, sizeof(char));
-    char* two = (char*)calloc(91, sizeof(char));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
+    char* one = (char*)xcalloc(91, sizeof(char));
+    char* two = (char*)xcalloc(91, sizeof(char));
    int passed = 0;
    while(1){
        char buff[256];
@ -625,8 +625,8 @@ void test_go(char *cfg, char *weights, int multi)
    }
    srand(time(0));
    set_batch_network(&net, 1);
-    float* board = (float*)calloc(19 * 19, sizeof(float));
-    float* move = (float*)calloc(19 * 19, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
+    float* move = (float*)xcalloc(19 * 19, sizeof(float));
    int color = 1;
    while(1){
        float *output = network_predict(net, board);
@ -777,15 +777,15 @@ void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi)
    int count = 0;
    set_batch_network(&net, 1);
    set_batch_network(&net2, 1);
-    float* board = (float*)calloc(19 * 19, sizeof(float));
+    float* board = (float*)xcalloc(19 * 19, sizeof(float));
    if(!board) {
        error("calloc failed");
    }
-    char* one = (char*)calloc(91, sizeof(char));
+    char* one = (char*)xcalloc(91, sizeof(char));
    if(!one) {
        error("calloc failed");
    }
-    char* two = (char*)calloc(91, sizeof(char));
+    char* two = (char*)xcalloc(91, sizeof(char));
    if(!two) {
        error("calloc failed");
    }
--- a/src/gru_layer.c
+++ b/src/gru_layer.c
@ -74,16 +74,16 @@ layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_no


    l.outputs = outputs;
-    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.delta = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.state = (float*)calloc(outputs * batch, sizeof(float));
-    l.prev_state = (float*)calloc(outputs * batch, sizeof(float));
-    l.forgot_state = (float*)calloc(outputs * batch, sizeof(float));
-    l.forgot_delta = (float*)calloc(outputs * batch, sizeof(float));
-
-    l.r_cpu = (float*)calloc(outputs * batch, sizeof(float));
-    l.z_cpu = (float*)calloc(outputs * batch, sizeof(float));
-    l.h_cpu = (float*)calloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    l.delta = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    l.state = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.prev_state = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.forgot_state = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.forgot_delta = (float*)xcalloc(outputs * batch, sizeof(float));
+
+    l.r_cpu = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.z_cpu = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.h_cpu = (float*)xcalloc(outputs * batch, sizeof(float));

    l.forward = forward_gru_layer;
    l.backward = backward_gru_layer;
--- a/src/image.c
+++ b/src/image.c
@ -249,9 +249,9 @@ image **load_alphabet()
 {
    int i, j;
    const int nsize = 8;
-    image** alphabets = (image**)calloc(nsize, sizeof(image*));
+    image** alphabets = (image**)xcalloc(nsize, sizeof(image*));
    for(j = 0; j < nsize; ++j){
-        alphabets[j] = (image*)calloc(128, sizeof(image));
+        alphabets[j] = (image*)xcalloc(128, sizeof(image));
        for(i = 32; i < 127; ++i){
            char buff[256];
            sprintf(buff, "data/labels/%d_%d.png", i, j);
@ -267,7 +267,7 @@ image **load_alphabet()
 detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names)
 {
    int selected_num = 0;
-    detection_with_class* result_arr = (detection_with_class*)calloc(dets_num, sizeof(detection_with_class));
+    detection_with_class* result_arr = (detection_with_class*)xcalloc(dets_num, sizeof(detection_with_class));
    int i;
    for (i = 0; i < dets_num; ++i) {
        int best_class = -1;
@ -619,8 +619,8 @@ void normalize_image(image p)

 void normalize_image2(image p)
 {
-    float* min = (float*)calloc(p.c, sizeof(float));
-    float* max = (float*)calloc(p.c, sizeof(float));
+    float* min = (float*)xcalloc(p.c, sizeof(float));
+    float* max = (float*)xcalloc(p.c, sizeof(float));
    int i,j;
    for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];

@ -649,7 +649,7 @@ void normalize_image2(image p)
 image copy_image(image p)
 {
    image copy = p;
-    copy.data = (float*)calloc(p.h * p.w * p.c, sizeof(float));
+    copy.data = (float*)xcalloc(p.h * p.w * p.c, sizeof(float));
    memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
    return copy;
 }
@ -679,7 +679,7 @@ void save_image_png(image im, const char *name)
    char buff[256];
    //sprintf(buff, "%s (%d)", name, windows);
    sprintf(buff, "%s.png", name);
-    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
+    unsigned char* data = (unsigned char*)xcalloc(im.w * im.h * im.c, sizeof(unsigned char));
    int i,k;
    for(k = 0; k < im.c; ++k){
        for(i = 0; i < im.w*im.h; ++i){
@ -700,7 +700,7 @@ void save_image_options(image im, const char *name, IMTYPE f, int quality)
    else if (f == TGA) sprintf(buff, "%s.tga", name);
    else if (f == JPG) sprintf(buff, "%s.jpg", name);
    else               sprintf(buff, "%s.png", name);
-    unsigned char* data = (unsigned char*)calloc(im.w * im.h * im.c, sizeof(unsigned char));
+    unsigned char* data = (unsigned char*)xcalloc(im.w * im.h * im.c, sizeof(unsigned char));
    int i, k;
    for (k = 0; k < im.c; ++k) {
        for (i = 0; i < im.w*im.h; ++i) {
@ -758,14 +758,14 @@ image make_empty_image(int w, int h, int c)
 image make_image(int w, int h, int c)
 {
    image out = make_empty_image(w,h,c);
-    out.data = (float*)calloc(h * w * c, sizeof(float));
+    out.data = (float*)xcalloc(h * w * c, sizeof(float));
    return out;
 }

 image make_random_image(int w, int h, int c)
 {
    image out = make_empty_image(w,h,c);
-    out.data = (float*)calloc(h * w * c, sizeof(float));
+    out.data = (float*)xcalloc(h * w * c, sizeof(float));
    int i;
    for(i = 0; i < w*h*c; ++i){
        out.data[i] = (rand_normal() * .25) + .5;
--- a/src/list.c
+++ b/src/list.c
@ -93,7 +93,7 @@ void free_list_contents_kvp(list *l)

 void **list_to_array(list *l)
 {
-    void** a = (void**)calloc(l->size, sizeof(void*));
+    void** a = (void**)xcalloc(l->size, sizeof(void*));
    int count = 0;
    node *n = l->front;
    while(n){
--- a/src/local_layer.c
+++ b/src/local_layer.c
@ -47,19 +47,19 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = l.w * l.h * l.c;

-    l.weights = (float*)calloc(c * n * size * size * locations, sizeof(float));
-    l.weight_updates = (float*)calloc(c * n * size * size * locations, sizeof(float));
+    l.weights = (float*)xcalloc(c * n * size * size * locations, sizeof(float));
+    l.weight_updates = (float*)xcalloc(c * n * size * size * locations, sizeof(float));

-    l.biases = (float*)calloc(l.outputs, sizeof(float));
-    l.bias_updates = (float*)calloc(l.outputs, sizeof(float));
+    l.biases = (float*)xcalloc(l.outputs, sizeof(float));
+    l.bias_updates = (float*)xcalloc(l.outputs, sizeof(float));

    // float scale = 1./sqrt(size*size*c);
    float scale = sqrt(2./(size*size*c));
    for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);

-    l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
-    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.col_image = (float*)xcalloc(out_h * out_w * size * size * c, sizeof(float));
+    l.output = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));
+    l.delta = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float));

    l.forward = forward_local_layer;
    l.backward = backward_local_layer;
--- a/src/lstm_layer.c
+++ b/src/lstm_layer.c
@ -39,49 +39,49 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
    l.out_h = 1;
    l.out_c = outputs;

-    l.uf = (layer*)calloc(1, sizeof(layer));
+    l.uf = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.uf->batch = batch;
    if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size;

-    l.ui = (layer*)calloc(1, sizeof(layer));
+    l.ui = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.ui->batch = batch;
    if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size;

-    l.ug = (layer*)calloc(1, sizeof(layer));
+    l.ug = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.ug->batch = batch;
    if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size;

-    l.uo = (layer*)calloc(1, sizeof(layer));
+    l.uo = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize);
    l.uo->batch = batch;
    if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size;

-    l.wf = (layer*)calloc(1, sizeof(layer));
+    l.wf = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wf->batch = batch;
    if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size;

-    l.wi = (layer*)calloc(1, sizeof(layer));
+    l.wi = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wi->batch = batch;
    if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size;

-    l.wg = (layer*)calloc(1, sizeof(layer));
+    l.wg = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wg->batch = batch;
    if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size;

-    l.wo = (layer*)calloc(1, sizeof(layer));
+    l.wo = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize);
    l.wo->batch = batch;
@ -90,28 +90,28 @@ layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_n
    l.batch_normalize = batch_normalize;
    l.outputs = outputs;

-    l.output = (float*)calloc(outputs * batch * steps, sizeof(float));
-    l.state = (float*)calloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float));
+    l.state = (float*)xcalloc(outputs * batch, sizeof(float));

    l.forward = forward_lstm_layer;
    l.update = update_lstm_layer;
    l.backward = backward_lstm_layer;

-    l.prev_state_cpu =  (float*)calloc(batch*outputs, sizeof(float));
-    l.prev_cell_cpu =   (float*)calloc(batch*outputs, sizeof(float));
-    l.cell_cpu =        (float*)calloc(batch*outputs*steps, sizeof(float));
-
-    l.f_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.i_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.g_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.o_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.c_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.h_cpu =           (float*)calloc(batch*outputs, sizeof(float));
-    l.temp_cpu =        (float*)calloc(batch*outputs, sizeof(float));
-    l.temp2_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.temp3_cpu =       (float*)calloc(batch*outputs, sizeof(float));
-    l.dc_cpu =          (float*)calloc(batch*outputs, sizeof(float));
-    l.dh_cpu =          (float*)calloc(batch*outputs, sizeof(float));
+    l.prev_state_cpu =  (float*)xcalloc(batch*outputs, sizeof(float));
+    l.prev_cell_cpu =   (float*)xcalloc(batch*outputs, sizeof(float));
+    l.cell_cpu =        (float*)xcalloc(batch*outputs*steps, sizeof(float));
+
+    l.f_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.i_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.g_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.o_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.c_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.h_cpu =           (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp_cpu =        (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp2_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.temp3_cpu =       (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dc_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));
+    l.dh_cpu =          (float*)xcalloc(batch*outputs, sizeof(float));

 #ifdef GPU
    l.forward_gpu = forward_lstm_layer_gpu;
--- a/src/matrix.c
+++ b/src/matrix.c
@ -15,7 +15,7 @@ void free_matrix(matrix m)

 float matrix_topk_accuracy(matrix truth, matrix guess, int k)
 {
-    int* indexes = (int*)calloc(k, sizeof(int));
+    int* indexes = (int*)xcalloc(k, sizeof(int));
    int n = truth.cols;
    int i,j;
    int correct = 0;
@ -48,15 +48,15 @@ matrix resize_matrix(matrix m, int size)
    int i;
    if (m.rows == size) return m;
    if (m.rows < size) {
-        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
+        m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
        for (i = m.rows; i < size; ++i) {
-            m.vals[i] = (float*)calloc(m.cols, sizeof(float));
+            m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
        }
    } else if (m.rows > size) {
        for (i = size; i < m.rows; ++i) {
            free(m.vals[i]);
        }
-        m.vals = (float**)realloc(m.vals, size * sizeof(float*));
+        m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
    }
    m.rows = size;
    return m;
@ -79,9 +79,9 @@ matrix make_matrix(int rows, int cols)
    matrix m;
    m.rows = rows;
    m.cols = cols;
-    m.vals = (float**)calloc(m.rows, sizeof(float*));
+    m.vals = (float**)xcalloc(m.rows, sizeof(float*));
    for(i = 0; i < m.rows; ++i){
-        m.vals[i] = (float*)calloc(m.cols, sizeof(float));
+        m.vals[i] = (float*)xcalloc(m.cols, sizeof(float));
    }
    return m;
 }
@ -92,7 +92,7 @@ matrix hold_out_matrix(matrix *m, int n)
    matrix h;
    h.rows = n;
    h.cols = m->cols;
-    h.vals = (float**)calloc(h.rows, sizeof(float*));
+    h.vals = (float**)xcalloc(h.rows, sizeof(float*));
    for(i = 0; i < n; ++i){
        int index = rand()%m->rows;
        h.vals[i] = m->vals[index];
@ -103,7 +103,7 @@ matrix hold_out_matrix(matrix *m, int n)

 float *pop_column(matrix *m, int c)
 {
-    float* col = (float*)calloc(m->rows, sizeof(float));
+    float* col = (float*)xcalloc(m->rows, sizeof(float));
    int i, j;
    for(i = 0; i < m->rows; ++i){
        col[i] = m->vals[i][c];
@ -127,18 +127,18 @@ matrix csv_to_matrix(char *filename)

    int n = 0;
    int size = 1024;
-    m.vals = (float**)calloc(size, sizeof(float*));
+    m.vals = (float**)xcalloc(size, sizeof(float*));
    while((line = fgetl(fp))){
        if(m.cols == -1) m.cols = count_fields(line);
        if(n == size){
            size *= 2;
-            m.vals = (float**)realloc(m.vals, size * sizeof(float*));
+            m.vals = (float**)xrealloc(m.vals, size * sizeof(float*));
        }
        m.vals[n] = parse_fields(line, m.cols);
        free(line);
        ++n;
    }
-    m.vals = (float**)realloc(m.vals, n * sizeof(float*));
+    m.vals = (float**)xrealloc(m.vals, n * sizeof(float*));
    m.rows = n;
    return m;
 }
@ -225,7 +225,7 @@ void kmeans_maximization(matrix data, int *assignments, matrix centers)
    matrix old_centers = make_matrix(centers.rows, centers.cols);

    int i, j;
-    int *counts = (int*)calloc(centers.rows, sizeof(int));
+    int *counts = (int*)xcalloc(centers.rows, sizeof(int));
    for (i = 0; i < centers.rows; ++i) {
        for (j = 0; j < centers.cols; ++j) {
            old_centers.vals[i][j] = centers.vals[i][j];
@ -269,7 +269,7 @@ void random_centers(matrix data, matrix centers) {
 int *sample(int n)
 {
    int i;
-    int* s = (int*)calloc(n, sizeof(int));
+    int* s = (int*)xcalloc(n, sizeof(int));
    for (i = 0; i < n; ++i) s[i] = i;
    for (i = n - 1; i >= 0; --i) {
        int swap = s[i];
@ -301,7 +301,7 @@ void copy(float *x, float *y, int n)
 model do_kmeans(matrix data, int k)
 {
    matrix centers = make_matrix(k, data.cols);
-    int* assignments = (int*)calloc(data.rows, sizeof(int));
+    int* assignments = (int*)xcalloc(data.rows, sizeof(int));
    //smart_centers(data, centers);
    random_centers(data, centers);  // IoU = 67.31% after kmeans

--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@ -71,9 +71,9 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
    l.size = size;
    l.stride = stride;
    int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.indexes = (int*)calloc(output_size, sizeof(int));
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.indexes = (int*)xcalloc(output_size, sizeof(int));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));
    l.forward = forward_maxpool_layer;
    l.backward = backward_maxpool_layer;
    #ifdef GPU
@ -102,9 +102,9 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
    l->outputs = l->out_w * l->out_h * l->out_c;
    int output_size = l->outputs * l->batch;

-    l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int));
-    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
+    l->indexes = (int*)xrealloc(l->indexes, output_size * sizeof(int));
+    l->output = (float*)xrealloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float));

 #ifdef GPU
    CHECK_CUDA(cudaFree((float *)l->indexes_gpu));
--- a/src/network.c
+++ b/src/network.c
@ -232,16 +232,16 @@ network make_network(int n)
 {
    network net = {0};
    net.n = n;
-    net.layers = (layer*)calloc(net.n, sizeof(layer));
-    net.seen = (uint64_t*)calloc(1, sizeof(uint64_t));
+    net.layers = (layer*)xcalloc(net.n, sizeof(layer));
+    net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t));
 #ifdef GPU
-    net.input_gpu = (float**)calloc(1, sizeof(float*));
-    net.truth_gpu = (float**)calloc(1, sizeof(float*));
+    net.input_gpu = (float**)xcalloc(1, sizeof(float*));
+    net.truth_gpu = (float**)xcalloc(1, sizeof(float*));

-    net.input16_gpu = (float**)calloc(1, sizeof(float*));
-    net.output16_gpu = (float**)calloc(1, sizeof(float*));
-    net.max_input16_size = (size_t*)calloc(1, sizeof(size_t));
-    net.max_output16_size = (size_t*)calloc(1, sizeof(size_t));
+    net.input16_gpu = (float**)xcalloc(1, sizeof(float*));
+    net.output16_gpu = (float**)xcalloc(1, sizeof(float*));
+    net.max_input16_size = (size_t*)xcalloc(1, sizeof(size_t));
+    net.max_output16_size = (size_t*)xcalloc(1, sizeof(size_t));
 #endif
    return net;
 }
@ -353,8 +353,8 @@ float train_network_datum(network net, float *x, float *y)
 float train_network_sgd(network net, data d, int n)
 {
    int batch = net.batch;
-    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
-    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
+    float* X = (float*)xcalloc(batch * d.X.cols, sizeof(float));
+    float* y = (float*)xcalloc(batch * d.y.cols, sizeof(float));

    int i;
    float sum = 0;
@ -379,8 +379,8 @@ float train_network_waitkey(network net, data d, int wait_key)
    assert(d.X.rows % net.batch == 0);
    int batch = net.batch;
    int n = d.X.rows / batch;
-    float* X = (float*)calloc(batch * d.X.cols, sizeof(float));
-    float* y = (float*)calloc(batch * d.y.cols, sizeof(float));
+    float* X = (float*)xcalloc(batch * d.X.cols, sizeof(float));
+    float* y = (float*)xcalloc(batch * d.y.cols, sizeof(float));

    int i;
    float sum = 0;
@ -450,11 +450,11 @@ int recalculate_workspace_size(network *net)
    }
    else {
        free(net->workspace);
-        net->workspace = (float*)calloc(1, workspace_size);
+        net->workspace = (float*)xcalloc(1, workspace_size);
    }
 #else
    free(net->workspace);
-    net->workspace = (float*)calloc(1, workspace_size);
+    net->workspace = (float*)xcalloc(1, workspace_size);
 #endif
    //fprintf(stderr, " Done!\n");
    return 0;
@ -565,19 +565,19 @@ int resize_network(network *net, int w, int h)
            net->input_pinned_cpu_flag = 1;
        else {
            cudaGetLastError(); // reset CUDA-error
-            net->input_pinned_cpu = (float*)calloc(size, sizeof(float));
+            net->input_pinned_cpu = (float*)xcalloc(size, sizeof(float));
            net->input_pinned_cpu_flag = 0;
        }
        printf(" CUDA allocate done! \n");
    }else {
        free(net->workspace);
-        net->workspace = (float*)calloc(1, workspace_size);
+        net->workspace = (float*)xcalloc(1, workspace_size);
        if(!net->input_pinned_cpu_flag)
-            net->input_pinned_cpu = (float*)realloc(net->input_pinned_cpu, size * sizeof(float));
+            net->input_pinned_cpu = (float*)xrealloc(net->input_pinned_cpu, size * sizeof(float));
    }
 #else
    free(net->workspace);
-    net->workspace = (float*)calloc(1, workspace_size);
+    net->workspace = (float*)xcalloc(1, workspace_size);
 #endif
    //fprintf(stderr, " Done!\n");
    return 0;
@ -702,11 +702,11 @@ detection *make_network_boxes(network *net, float thresh, int *num)
    int i;
    int nboxes = num_detections(net, thresh);
    if (num) *num = nboxes;
-    detection* dets = (detection*)calloc(nboxes, sizeof(detection));
+    detection* dets = (detection*)xcalloc(nboxes, sizeof(detection));
    for (i = 0; i < nboxes; ++i) {
-        dets[i].prob = (float*)calloc(l.classes, sizeof(float));
+        dets[i].prob = (float*)xcalloc(l.classes, sizeof(float));
        if (l.coords > 4) {
-            dets[i].mask = (float*)calloc(l.coords - 4, sizeof(float));
+            dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float));
        }
    }
    return dets;
@ -715,10 +715,10 @@ detection *make_network_boxes(network *net, float thresh, int *num)

 void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
 {
-    box* boxes = (box*)calloc(l.w * l.h * l.n, sizeof(box));
-    float** probs = (float**)calloc(l.w * l.h * l.n, sizeof(float*));
+    box* boxes = (box*)xcalloc(l.w * l.h * l.n, sizeof(box));
+    float** probs = (float**)xcalloc(l.w * l.h * l.n, sizeof(float*));
    int i, j;
-    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)calloc(l.classes, sizeof(float));
+    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)xcalloc(l.classes, sizeof(float));
    get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
    for (j = 0; j < l.w*l.h*l.n; ++j) {
        dets[j].classes = l.classes;
@ -793,7 +793,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
 {
    const float thresh = 0.005; // function get_network_boxes() has already filtred dets by actual threshold

-    char *send_buf = (char *)calloc(1024, sizeof(char));
+    char *send_buf = (char *)xcalloc(1024, sizeof(char));
    if (filename) {
        sprintf(send_buf, "{\n \"frame_id\":%lld, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename);
    }
@ -810,7 +810,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
            {
                if (class_id != -1) strcat(send_buf, ", \n");
                class_id = j;
-                char *buf = (char *)calloc(2048, sizeof(char));
+                char *buf = (char *)xcalloc(2048, sizeof(char));
                //sprintf(buf, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f}",
                //    image_id, j, dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h, dets[i].prob[j]);

@ -820,7 +820,7 @@ char *detection_to_json(detection *dets, int nboxes, int classes, char **names,
                int send_buf_len = strlen(send_buf);
                int buf_len = strlen(buf);
                int total_len = send_buf_len + buf_len + 100;
-                send_buf = (char *)realloc(send_buf, total_len * sizeof(char));
+                send_buf = (char *)xrealloc(send_buf, total_len * sizeof(char));
                if (!send_buf) {
                  error("realloc failed");
                }
@ -878,7 +878,7 @@ matrix network_predict_data_multi(network net, data test, int n)
    int i,j,b,m;
    int k = get_network_output_size(net);
    matrix pred = make_matrix(test.X.rows, k);
-    float* X = (float*)calloc(net.batch * test.X.rows, sizeof(float));
+    float* X = (float*)xcalloc(net.batch * test.X.rows, sizeof(float));
    for(i = 0; i < test.X.rows; i += net.batch){
        for(b = 0; b < net.batch; ++b){
            if(i+b == test.X.rows) break;
@ -903,7 +903,7 @@ matrix network_predict_data(network net, data test)
    int i,j,b;
    int k = get_network_output_size(net);
    matrix pred = make_matrix(test.X.rows, k);
-    float* X = (float*)calloc(net.batch * test.X.cols, sizeof(float));
+    float* X = (float*)xcalloc(net.batch * test.X.cols, sizeof(float));
    for(i = 0; i < test.X.rows; i += net.batch){
        for(b = 0; b < net.batch; ++b){
            if(i+b == test.X.rows) break;
--- a/src/normalization_layer.c
+++ b/src/normalization_layer.c
@ -15,10 +15,10 @@ layer make_normalization_layer(int batch, int w, int h, int c, int size, float a
    layer.size = size;
    layer.alpha = alpha;
    layer.beta = beta;
-    layer.output = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.delta = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.squared = (float*)calloc(h * w * c * batch, sizeof(float));
-    layer.norms = (float*)calloc(h * w * c * batch, sizeof(float));
+    layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.squared = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.norms = (float*)xcalloc(h * w * c * batch, sizeof(float));
    layer.inputs = w*h*c;
    layer.outputs = layer.inputs;

@ -46,10 +46,10 @@ void resize_normalization_layer(layer *layer, int w, int h)
    layer->out_w = w;
    layer->inputs = w*h*c;
    layer->outputs = layer->inputs;
-    layer->output = (float*)realloc(layer->output, h * w * c * batch * sizeof(float));
-    layer->delta = (float*)realloc(layer->delta, h * w * c * batch * sizeof(float));
-    layer->squared = (float*)realloc(layer->squared, h * w * c * batch * sizeof(float));
-    layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
+    layer->output = (float*)xrealloc(layer->output, h * w * c * batch * sizeof(float));
+    layer->delta = (float*)xrealloc(layer->delta, h * w * c * batch * sizeof(float));
+    layer->squared = (float*)xrealloc(layer->squared, h * w * c * batch * sizeof(float));
+    layer->norms = (float*)xrealloc(layer->norms, h * w * c * batch * sizeof(float));
 #ifdef GPU
    cuda_free(layer->output_gpu);
    cuda_free(layer->delta_gpu);
--- a/src/parser.c
+++ b/src/parser.c
@ -319,7 +319,7 @@ int *parse_yolo_mask(char *a, int *num)
        for (i = 0; i < len; ++i) {
            if (a[i] == ',') ++n;
        }
-        mask = (int*)calloc(n, sizeof(int));
+        mask = (int*)xcalloc(n, sizeof(int));
        for (i = 0; i < n; ++i) {
            int val = atoi(a);
            mask[i] = val;
@ -680,8 +680,8 @@ route_layer parse_route(list *options, size_params params, network net)
        if (l[i] == ',') ++n;
    }

-    int* layers = (int*)calloc(n, sizeof(int));
-    int* sizes = (int*)calloc(n, sizeof(int));
+    int* layers = (int*)xcalloc(n, sizeof(int));
+    int* sizes = (int*)xcalloc(n, sizeof(int));
    for(i = 0; i < n; ++i){
        int index = atoi(l);
        l = strchr(l, ',')+1;
@ -799,9 +799,9 @@ void parse_net_options(list *options, network *net)
            for (i = 0; i < len; ++i) {
                if (l[i] == ',') ++n;
            }
-            int* steps = (int*)calloc(n, sizeof(int));
-            float* scales = (float*)calloc(n, sizeof(float));
-            float* seq_scales = (float*)calloc(n, sizeof(float));
+            int* steps = (int*)xcalloc(n, sizeof(int));
+            float* scales = (float*)xcalloc(n, sizeof(float));
+            float* seq_scales = (float*)xcalloc(n, sizeof(float));
            for (i = 0; i < n; ++i) {
                float scale = 1.0;
                if (p) {
@ -1007,7 +1007,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
        if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
        else {
            cudaGetLastError(); // reset CUDA-error
-            net.input_pinned_cpu = (float*)calloc(size, sizeof(float));
+            net.input_pinned_cpu = (float*)xcalloc(size, sizeof(float));
        }

        // pre-allocate memory for inference on Tensor Cores (fp16)
@ -1022,12 +1022,12 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
            net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
        }
        else {
-            net.workspace = (float*)calloc(1, workspace_size);
+            net.workspace = (float*)xcalloc(1, workspace_size);
        }
    }
 #else
        if (workspace_size) {
-            net.workspace = (float*)calloc(1, workspace_size);
+            net.workspace = (float*)xcalloc(1, workspace_size);
        }
 #endif

@ -1248,7 +1248,7 @@ void save_weights(network net, char *filename)

 void transpose_matrix(float *a, int rows, int cols)
 {
-    float* transpose = (float*)calloc(rows * cols, sizeof(float));
+    float* transpose = (float*)xcalloc(rows * cols, sizeof(float));
    int x, y;
    for(x = 0; x < rows; ++x){
        for(y = 0; y < cols; ++y){
@ -1485,7 +1485,7 @@ void load_weights(network *net, char *filename)
 network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 {
    printf(" Try to load cfg: %s, clear = %d \n", cfg, clear);
-    network* net = (network*)calloc(1, sizeof(network));
+    network* net = (network*)xcalloc(1, sizeof(network));
    *net = parse_network_cfg_custom(cfg, batch, 0);
    if (weights && weights[0] != 0) {
        printf(" Try to load weights: %s \n", weights);
@ -1499,7 +1499,7 @@ network *load_network_custom(char *cfg, char *weights, int clear, int batch)
 network *load_network(char *cfg, char *weights, int clear)
 {
    printf(" Try to load cfg: %s, clear = %d \n", cfg, clear);
-    network* net = (network*)calloc(1, sizeof(network));
+    network* net = (network*)xcalloc(1, sizeof(network));
    *net = parse_network_cfg(cfg);
    if (weights && weights[0] != 0) {
        printf(" Try to load weights: %s \n", weights);
--- a/src/region_layer.c
+++ b/src/region_layer.c
@ -22,15 +22,15 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int
    l.w = w;
    l.classes = classes;
    l.coords = coords;
-    l.cost = (float*)calloc(1, sizeof(float));
-    l.biases = (float*)calloc(n * 2, sizeof(float));
-    l.bias_updates = (float*)calloc(n * 2, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
+    l.biases = (float*)xcalloc(n * 2, sizeof(float));
+    l.bias_updates = (float*)xcalloc(n * 2, sizeof(float));
    l.outputs = h*w*n*(classes + coords + 1);
    l.inputs = l.outputs;
    l.max_boxes = max_boxes;
    l.truths = max_boxes*(5);
-    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
    int i;
    for(i = 0; i < n*2; ++i){
        l.biases[i] = .5;
@ -63,8 +63,8 @@ void resize_region_layer(layer *l, int w, int h)
    l->outputs = h*w*l->n*(l->classes + l->coords + 1);
    l->inputs = l->outputs;

-    l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
-    l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->batch * l->outputs * sizeof(float));

 #ifdef GPU
    if (old_w < w || old_h < h) {
@ -446,11 +446,11 @@ void forward_region_layer_gpu(const region_layer l, network_state state)
        softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5);
    }

-    float* in_cpu = (float*)calloc(l.batch * l.inputs, sizeof(float));
+    float* in_cpu = (float*)xcalloc(l.batch * l.inputs, sizeof(float));
    float *truth_cpu = 0;
    if(state.truth){
        int num_truth = l.batch*l.truths;
-        truth_cpu = (float*)calloc(num_truth, sizeof(float));
+        truth_cpu = (float*)xcalloc(num_truth, sizeof(float));
        cuda_pull_array(state.truth, truth_cpu, num_truth);
    }
    cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs);
--- a/src/reorg_layer.c
+++ b/src/reorg_layer.c
@ -27,8 +27,8 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse)
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = h*w*c;
    int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));

    l.forward = forward_reorg_layer;
    l.backward = backward_reorg_layer;
@ -64,8 +64,8 @@ void resize_reorg_layer(layer *l, int w, int h)
    l->inputs = l->outputs;
    int output_size = l->outputs * l->batch;

-    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
+    l->output = (float*)xrealloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float));

 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/reorg_old_layer.c
+++ b/src/reorg_old_layer.c
@ -27,8 +27,8 @@ layer make_reorg_old_layer(int batch, int w, int h, int c, int stride, int rever
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = h*w*c;
    int output_size = l.out_h * l.out_w * l.out_c * batch;
-    l.output = (float*)calloc(output_size, sizeof(float));
-    l.delta = (float*)calloc(output_size, sizeof(float));
+    l.output = (float*)xcalloc(output_size, sizeof(float));
+    l.delta = (float*)xcalloc(output_size, sizeof(float));

    l.forward = forward_reorg_old_layer;
    l.backward = backward_reorg_old_layer;
@ -64,8 +64,8 @@ void resize_reorg_old_layer(layer *l, int w, int h)
    l->inputs = l->outputs;
    int output_size = l->outputs * l->batch;

-    l->output = (float*)realloc(l->output, output_size * sizeof(float));
-    l->delta = (float*)realloc(l->delta, output_size * sizeof(float));
+    l->output = (float*)xrealloc(l->output, output_size * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float));

 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/rnn.c
+++ b/src/rnn.c
@ -14,7 +14,7 @@ int *read_tokenized_data(char *filename, size_t *read)
    size_t size = 512;
    size_t count = 0;
    FILE *fp = fopen(filename, "r");
-    int* d = (int*)calloc(size, sizeof(int));
+    int* d = (int*)xcalloc(size, sizeof(int));
    if(!d) {
      error("calloc failed");
    }
@ -24,7 +24,7 @@ int *read_tokenized_data(char *filename, size_t *read)
        ++count;
        if(count > size){
            size = size*2;
-            d = (int*)realloc(d, size * sizeof(int));
+            d = (int*)xrealloc(d, size * sizeof(int));
            if(!d) {
              error("realloc failed");
            }
@ -33,7 +33,7 @@ int *read_tokenized_data(char *filename, size_t *read)
        one = fscanf(fp, "%d", &n);
    }
    fclose(fp);
-    d = (int*)realloc(d, count * sizeof(int));
+    d = (int*)xrealloc(d, count * sizeof(int));
    if(!d) {
      error("realloc failed");
    }
@ -46,13 +46,13 @@ char **read_tokens(char *filename, size_t *read)
    size_t size = 512;
    size_t count = 0;
    FILE *fp = fopen(filename, "r");
-    char** d = (char**)calloc(size, sizeof(char*));
+    char** d = (char**)xcalloc(size, sizeof(char*));
    char *line;
    while((line=fgetl(fp)) != 0){
        ++count;
        if(count > size){
            size = size*2;
-            d = (char**)realloc(d, size * sizeof(char*));
+            d = (char**)xrealloc(d, size * sizeof(char*));
            if(!d) {
              error("realloc failed");
            }
@ -60,7 +60,7 @@ char **read_tokens(char *filename, size_t *read)
        d[count-1] = line;
    }
    fclose(fp);
-    d = (char**)realloc(d, count * sizeof(char*));
+    d = (char**)xrealloc(d, count * sizeof(char*));
    if(!d) {
      error("realloc failed");
    }
@ -70,8 +70,8 @@ char **read_tokens(char *filename, size_t *read)

 float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
-    float* x = (float*)calloc(batch * steps * characters, sizeof(float));
-    float* y = (float*)calloc(batch * steps * characters, sizeof(float));
+    float* x = (float*)xcalloc(batch * steps * characters, sizeof(float));
+    float* y = (float*)xcalloc(batch * steps * characters, sizeof(float));
    int i,j;
    for(i = 0; i < batch; ++i){
        for(j = 0; j < steps; ++j){
@ -96,8 +96,8 @@ float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size

 float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps)
 {
-    float* x = (float*)calloc(batch * steps * characters, sizeof(float));
-    float* y = (float*)calloc(batch * steps * characters, sizeof(float));
+    float* x = (float*)xcalloc(batch * steps * characters, sizeof(float));
+    float* y = (float*)xcalloc(batch * steps * characters, sizeof(float));
    int i,j;
    for(i = 0; i < batch; ++i){
        for(j = 0; j < steps; ++j){
@ -152,7 +152,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
        size = ftell(fp);
        fseek(fp, 0, SEEK_SET);

-        text = (unsigned char *)calloc(size + 1, sizeof(char));
+        text = (unsigned char *)xcalloc(size + 1, sizeof(char));
        fread(text, 1, size, fp);
        fclose(fp);
    }
@ -176,7 +176,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
    int streams = batch/steps;
    printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %ld \n", batch, steps, streams, net.subdivisions, size);
    printf(" global_batch = %d \n", batch*net.subdivisions);
-    size_t* offsets = (size_t*)calloc(streams, sizeof(size_t));
+    size_t* offsets = (size_t*)xcalloc(streams, sizeof(size_t));
    int j;
    for(j = 0; j < streams; ++j){
        offsets[j] = rand_size_t()%size;
@ -259,7 +259,7 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
    for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
    int c = 0;
    int len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));

    /*
       fill_cpu(inputs, 0, input, 1);
@ -318,7 +318,7 @@ void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int r
    int i, j;
    for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
    int c = 0;
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
    float *out = 0;

    while((c = getc(stdin)) != EOF){
@ -357,7 +357,7 @@ void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed)
    int words = 1;
    int c;
    int len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
    int i;
    for(i = 0; i < len; ++i){
        c = seed[i];
@ -409,7 +409,7 @@ void valid_char_rnn(char *cfgfile, char *weightfile, char *seed)
    int words = 1;
    int c;
    int len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
    int i;
    for(i = 0; i < len; ++i){
        c = seed[i];
@ -448,7 +448,7 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)

    int c;
    int seed_len = strlen(seed);
-    float* input = (float*)calloc(inputs, sizeof(float));
+    float* input = (float*)xcalloc(inputs, sizeof(float));
    int i;
    char *line;
    while((line=fgetl(stdin)) != 0){
--- a/src/rnn_layer.c
+++ b/src/rnn_layer.c
@ -40,21 +40,21 @@ layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps,
    l.out_h = 1;
    l.out_c = outputs;

-    l.state = (float*)calloc(batch * hidden * (steps + 1), sizeof(float));
+    l.state = (float*)xcalloc(batch * hidden * (steps + 1), sizeof(float));

-    l.input_layer = (layer*)calloc(1, sizeof(layer));
+    l.input_layer = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.input_layer) = make_connected_layer(batch, steps, inputs, hidden, activation, batch_normalize);
    l.input_layer->batch = batch;
    if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size;

-    l.self_layer = (layer*)calloc(1, sizeof(layer));
+    l.self_layer = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.self_layer) = make_connected_layer(batch, steps, hidden, hidden, (log==2)?LOGGY:(log==1?LOGISTIC:activation), batch_normalize);
    l.self_layer->batch = batch;
    if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size;

-    l.output_layer = (layer*)calloc(1, sizeof(layer));
+    l.output_layer = (layer*)xcalloc(1, sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.output_layer) = make_connected_layer(batch, steps, hidden, outputs, activation, batch_normalize);
    l.output_layer->batch = batch;
--- a/src/rnn_vid.c
+++ b/src/rnn_vid.c
@ -20,10 +20,10 @@ float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int ste
    image out_im = get_network_image(net);
    int output_size = out_im.w*out_im.h*out_im.c;
    printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
-    float* feats = (float*)calloc(net.batch * batch * output_size, sizeof(float));
+    float* feats = (float*)xcalloc(net.batch * batch * output_size, sizeof(float));
    for(b = 0; b < batch; ++b){
        int input_size = net.w*net.h*net.c;
-        float* input = (float*)calloc(input_size * net.batch, sizeof(float));
+        float* input = (float*)xcalloc(input_size * net.batch, sizeof(float));
        char *filename = files[rand()%n];
        cap_cv *cap = get_capture_video_stream(filename);
        int frames = get_capture_frame_count_cv(cap);
--- a/src/route_layer.c
+++ b/src/route_layer.c
@ -21,8 +21,8 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz
    fprintf(stderr, "\n");
    l.outputs = outputs;
    l.inputs = outputs;
-    l.delta = (float*)calloc(outputs * batch, sizeof(float));
-    l.output = (float*)calloc(outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(outputs * batch, sizeof(float));

    l.forward = forward_route_layer;
    l.backward = backward_route_layer;
@ -58,8 +58,8 @@ void resize_route_layer(route_layer *l, network *net)
        }
    }
    l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));

 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/sam_layer.c
+++ b/src/sam_layer.c
@ -24,8 +24,8 @@ layer make_sam_layer(int batch, int index, int w, int h, int c, int w2, int h2,
    l.inputs = l.outputs;
    l.index = index;

-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));

    l.forward = forward_sam_layer;
    l.backward = backward_sam_layer;
@ -45,8 +45,8 @@ void resize_sam_layer(layer *l, int w, int h)
    l->out_h = h;
    l->outputs = l->out_w*l->out_h*l->out_c;
    l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));

 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/scale_channels_layer.c
+++ b/src/scale_channels_layer.c
@ -24,8 +24,8 @@ layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w
    l.inputs = l.outputs;
    l.index = index;

-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));

    l.forward = forward_scale_channels_layer;
    l.backward = backward_scale_channels_layer;
@ -45,8 +45,8 @@ void resize_scale_channels_layer(layer *l, int w, int h)
    l->out_h = h;
    l->outputs = l->out_w*l->out_h*l->out_c;
    l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));

 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/shortcut_layer.c
+++ b/src/shortcut_layer.c
@ -23,8 +23,8 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int

    l.index = index;

-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));

    l.forward = forward_shortcut_layer;
    l.backward = backward_shortcut_layer;
@ -46,8 +46,8 @@ void resize_shortcut_layer(layer *l, int w, int h)
    l->h = l->out_h = h;
    l->outputs = w*h*l->out_c;
    l->inputs = l->outputs;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));

 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@ -36,10 +36,10 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
    l.groups = groups;
    l.inputs = inputs;
    l.outputs = inputs;
-    l.loss = (float*)calloc(inputs * batch, sizeof(float));
-    l.output = (float*)calloc(inputs * batch, sizeof(float));
-    l.delta = (float*)calloc(inputs * batch, sizeof(float));
-    l.cost = (float*)calloc(1, sizeof(float));
+    l.loss = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(inputs * batch, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));

    l.forward = forward_softmax_layer;
    l.backward = backward_softmax_layer;
--- a/src/tree.c
+++ b/src/tree.c
@ -93,42 +93,42 @@ tree *read_tree(char *filename)
    int groups = 0;
    int n = 0;
    while((line=fgetl(fp)) != 0){
-        char* id = (char*)calloc(256, sizeof(char));
+        char* id = (char*)xcalloc(256, sizeof(char));
        int parent = -1;
        sscanf(line, "%s %d", id, &parent);
-        t.parent = (int*)realloc(t.parent, (n + 1) * sizeof(int));
+        t.parent = (int*)xrealloc(t.parent, (n + 1) * sizeof(int));
        t.parent[n] = parent;

-        t.name = (char**)realloc(t.name, (n + 1) * sizeof(char*));
+        t.name = (char**)xrealloc(t.name, (n + 1) * sizeof(char*));
        t.name[n] = id;
        if(parent != last_parent){
            ++groups;
-            t.group_offset = (int*)realloc(t.group_offset, groups * sizeof(int));
+            t.group_offset = (int*)xrealloc(t.group_offset, groups * sizeof(int));
            t.group_offset[groups - 1] = n - group_size;
-            t.group_size = (int*)realloc(t.group_size, groups * sizeof(int));
+            t.group_size = (int*)xrealloc(t.group_size, groups * sizeof(int));
            t.group_size[groups - 1] = group_size;
            group_size = 0;
            last_parent = parent;
        }
-        t.group = (int*)realloc(t.group, (n + 1) * sizeof(int));
+        t.group = (int*)xrealloc(t.group, (n + 1) * sizeof(int));
        t.group[n] = groups;
        ++n;
        ++group_size;
    }
    ++groups;
-    t.group_offset = (int*)realloc(t.group_offset, groups * sizeof(int));
+    t.group_offset = (int*)xrealloc(t.group_offset, groups * sizeof(int));
    t.group_offset[groups - 1] = n - group_size;
-    t.group_size = (int*)realloc(t.group_size, groups * sizeof(int));
+    t.group_size = (int*)xrealloc(t.group_size, groups * sizeof(int));
    t.group_size[groups - 1] = group_size;
    t.n = n;
    t.groups = groups;
-    t.leaf = (int*)calloc(n, sizeof(int));
+    t.leaf = (int*)xcalloc(n, sizeof(int));
    int i;
    for(i = 0; i < n; ++i) t.leaf[i] = 1;
    for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0;

    fclose(fp);
-    tree* tree_ptr = (tree*)calloc(1, sizeof(tree));
+    tree* tree_ptr = (tree*)xcalloc(1, sizeof(tree));
    *tree_ptr = t;
    //error(0);
    return tree_ptr;
--- a/src/upsample_layer.c
+++ b/src/upsample_layer.c
@ -24,8 +24,8 @@ layer make_upsample_layer(int batch, int w, int h, int c, int stride)
    l.stride = stride;
    l.outputs = l.out_w*l.out_h*l.out_c;
    l.inputs = l.w*l.h*l.c;
-    l.delta = (float*)calloc(l.outputs * batch, sizeof(float));
-    l.output = (float*)calloc(l.outputs * batch, sizeof(float));
+    l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float));
+    l.output = (float*)xcalloc(l.outputs * batch, sizeof(float));

    l.forward = forward_upsample_layer;
    l.backward = backward_upsample_layer;
@ -53,8 +53,8 @@ void resize_upsample_layer(layer *l, int w, int h)
    }
    l->outputs = l->out_w*l->out_h*l->out_c;
    l->inputs = l->h*l->w*l->c;
-    l->delta = (float*)realloc(l->delta, l->outputs * l->batch * sizeof(float));
-    l->output = (float*)realloc(l->output, l->outputs * l->batch * sizeof(float));
+    l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float));
+    l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float));

 #ifdef GPU
    cuda_free(l->output_gpu);
--- a/src/utils.c
+++ b/src/utils.c
@ -56,7 +56,7 @@ int *read_map(char *filename)
    if(!file) file_error(filename);
    while((str=fgetl(file))){
        ++n;
-        map = (int*)realloc(map, n * sizeof(int));
+        map = (int*)xrealloc(map, n * sizeof(int));
        if(!map) {
          error("realloc failed");
        }
@ -81,7 +81,7 @@ void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections)
 void shuffle(void *arr, size_t n, size_t size)
 {
    size_t i;
-    void* swp = (void*)calloc(1, size);
+    void* swp = (void*)xcalloc(1, size);
    for(i = 0; i < n-1; ++i){
        size_t j = i + random_gen()/(RAND_MAX / (n-i)+1);
        memcpy(swp,            (char*)arr+(j*size), size);
@ -418,7 +418,7 @@ char *fgetl(FILE *fp)
    while((line[curr-1] != '\n') && !feof(fp)){
        if(curr == size-1){
            size *= 2;
-            line = (char*)realloc(line, size * sizeof(char));
+            line = (char*)xrealloc(line, size * sizeof(char));
            if(!line) {
                printf("%ld\n", size);
                malloc_error();
@ -497,6 +497,9 @@ void write_all(int fd, char *buffer, size_t bytes)

 char *copy_string(char *s)
 {
+    if(!s) {
+        return NULL;
+    }
    char* copy = (char*)malloc(strlen(s) + 1);
    strncpy(copy, s, strlen(s)+1);
    return copy;
@ -533,7 +536,7 @@ int count_fields(char *line)

 float *parse_fields(char *line, int n)
 {
-    float* field = (float*)calloc(n, sizeof(float));
+    float* field = (float*)xcalloc(n, sizeof(float));
    char *c, *p, *end;
    int count = 0;
    int done = 0;
@ -721,8 +724,8 @@ int max_index(float *a, int n)
 int top_max_index(float *a, int n, int k)
 {
    if (n <= 0) return -1;
-    float *values = (float*)calloc(k, sizeof(float));
-    int *indexes = (int*)calloc(k, sizeof(int));
+    float *values = (float*)xcalloc(k, sizeof(float));
+    int *indexes = (int*)xcalloc(k, sizeof(int));
    int i, j;
    for (i = 0; i < n; ++i) {
        for (j = 0; j < k; ++j) {
@ -835,9 +838,9 @@ float rand_scale(float s)
 float **one_hot_encode(float *a, int n, int k)
 {
    int i;
-    float** t = (float**)calloc(n, sizeof(float*));
+    float** t = (float**)xcalloc(n, sizeof(float*));
    for(i = 0; i < n; ++i){
-        t[i] = (float*)calloc(k, sizeof(float));
+        t[i] = (float*)xcalloc(k, sizeof(float));
        int index = (int)a[i];
        t[i][index] = 1;
    }
@ -923,7 +926,7 @@ int check_array_is_inf(float *arr, int size)

 int *random_index_order(int min, int max)
 {
-    int *inds = (int *)calloc(max - min, sizeof(int));
+    int *inds = (int *)xcalloc(max - min, sizeof(int));
    int i;
    for (i = min; i < max; ++i) {
        inds[i - min] = i;
--- a/src/utils.h
+++ b/src/utils.h
@ -14,6 +14,7 @@ LIB_API void free_ptrs(void **ptrs, int n);
 LIB_API void top_k(float *a, int n, int k, int *index);

 void *xcalloc(size_t nmemb, size_t size);
+void *xrealloc(void *ptr, size_t size);
 double what_time_is_it_now();
 int *read_map(char *filename);
 void shuffle(void *arr, size_t n, size_t size);
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@ -26,22 +26,22 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
    l.out_h = l.h;
    l.out_c = l.c;
    l.classes = classes;
-    l.cost = (float*)calloc(1, sizeof(float));
-    l.biases = (float*)calloc(total * 2, sizeof(float));
+    l.cost = (float*)xcalloc(1, sizeof(float));
+    l.biases = (float*)xcalloc(total * 2, sizeof(float));
    if(mask) l.mask = mask;
    else{
-        l.mask = (int*)calloc(n, sizeof(int));
+        l.mask = (int*)xcalloc(n, sizeof(int));
        for(i = 0; i < n; ++i){
            l.mask[i] = i;
        }
    }
-    l.bias_updates = (float*)calloc(n * 2, sizeof(float));
+    l.bias_updates = (float*)xcalloc(n * 2, sizeof(float));
    l.outputs = h*w*n*(classes + 4 + 1);
    l.inputs = l.outputs;
    l.max_boxes = max_boxes;
    l.truths = l.max_boxes*(4 + 1);    // 90*(4 + 1);
-    l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
-    l.output = (float*)calloc(batch * l.outputs, sizeof(float));
+    l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));
+    l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
    for(i = 0; i < total*2; ++i){
        l.biases[i] = .5;
    }
@ -58,14 +58,14 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
    if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1;
    else {
        cudaGetLastError(); // reset CUDA-error
-        l.output = (float*)calloc(batch * l.outputs, sizeof(float));
+        l.output = (float*)xcalloc(batch * l.outputs, sizeof(float));
    }

    free(l.delta);
    if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1;
    else {
        cudaGetLastError(); // reset CUDA-error
-        l.delta = (float*)calloc(batch * l.outputs, sizeof(float));
+        l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float));
    }
 #endif

@ -83,15 +83,15 @@ void resize_yolo_layer(layer *l, int w, int h)
    l->outputs = h*w*l->n*(l->classes + 4 + 1);
    l->inputs = l->outputs;

-    if (!l->output_pinned) l->output = (float*)realloc(l->output, l->batch*l->outputs * sizeof(float));
-    if (!l->delta_pinned) l->delta = (float*)realloc(l->delta, l->batch*l->outputs*sizeof(float));
+    if (!l->output_pinned) l->output = (float*)xrealloc(l->output, l->batch*l->outputs * sizeof(float));
+    if (!l->delta_pinned) l->delta = (float*)xrealloc(l->delta, l->batch*l->outputs*sizeof(float));

 #ifdef GPU
    if (l->output_pinned) {
        cudaFreeHost(l->output);
        if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
            cudaGetLastError(); // reset CUDA-error
-            l->output = (float*)realloc(l->output, l->batch * l->outputs * sizeof(float));
+            l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float));
            l->output_pinned = 0;
        }
    }
@ -100,7 +100,7 @@ void resize_yolo_layer(layer *l, int w, int h)
        cudaFreeHost(l->delta);
        if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
            cudaGetLastError(); // reset CUDA-error
-            l->delta = (float*)realloc(l->delta, l->batch * l->outputs * sizeof(float));
+            l->delta = (float*)xrealloc(l->delta, l->batch * l->outputs * sizeof(float));
            l->delta_pinned = 0;
        }
    }
@ -377,7 +377,7 @@ void forward_yolo_layer(const layer l, network_state state)
        // TODO: remove IOU loss fields before computing MSE on class
        //   probably split into two arrays
        int stride = l.w*l.h;
-        float* no_iou_loss_delta = (float *)calloc(l.batch * l.outputs, sizeof(float));
+        float* no_iou_loss_delta = (float *)xcalloc(l.batch * l.outputs, sizeof(float));
        memcpy(no_iou_loss_delta, l.delta, l.batch * l.outputs * sizeof(float));
        for (b = 0; b < l.batch; ++b) {
            for (j = 0; j < l.h; ++j) {
@ -547,13 +547,13 @@ void forward_yolo_layer_gpu(const layer l, network_state state)
        return;
    }

-    float *in_cpu = (float *)calloc(l.batch*l.inputs, sizeof(float));
+    float *in_cpu = (float *)xcalloc(l.batch*l.inputs, sizeof(float));
    cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
    memcpy(in_cpu, l.output, l.batch*l.outputs*sizeof(float));
    float *truth_cpu = 0;
    if (state.truth) {
        int num_truth = l.batch*l.truths;
-        truth_cpu = (float *)calloc(num_truth, sizeof(float));
+        truth_cpu = (float *)xcalloc(num_truth, sizeof(float));
        cuda_pull_array(state.truth, truth_cpu, num_truth);
    }
    network_state cpu_state = state;