move as many defs as possible away from darknet.h

6 years ago · 6e7c7976d7
parent 4ae778defc
commit 6e7c7976d7
55 changed files with 109 additions and 164 deletions
--- a/.gitignore
+++ b/.gitignore
@ -24,12 +24,3 @@ ehthumbs.db
 Icon?
 Thumbs.db
 *.swp
-
-# CMake #
-cmake-build-debug/
-CMakeLists.txt
-build_*/
-build.*
-cmake/
-*.patch
-.gitignore
--- a/include/darknet.h
+++ b/include/darknet.h
@ -26,40 +26,10 @@
 #endif
 #endif

-#ifdef _WIN32
-#define PORT unsigned long
-#define ADDRPOINTER int*
-#else
-#define PORT unsigned short
-#define SOCKET int
-#define HOSTENT struct hostent
-#define SOCKADDR struct sockaddr
-#define SOCKADDR_IN struct sockaddr_in
-#define ADDRPOINTER unsigned int*
-#define INVALID_SOCKET -1
-#define SOCKET_ERROR -1
-#endif
-#define FULL_MASK 0xffffffff
-#define WARP_SIZE 32
-#define BLOCK 512
-#define NUMCHARS 37
 #define NFRAMES 3
-#define BLOCK_TRANSPOSE32 256
-#define DOABS 1
 #define SECRET_NUM -1234
-#define C_SHARP_MAX_OBJECTS 1000
-#define TILE_M 4 // 4 ops
-#define TILE_N 16 // AVX2 = 2 ops * 8 floats
-#define TILE_K 16 // loop
-#ifndef __COMPAR_FN_T
-#define __COMPAR_FN_T
-typedef int (*__compar_fn_t)(const void*, const void*);
-#ifdef __USE_GNU
-typedef __compar_fn_t comparison_fn_t;
-#endif
-#endif
+
 #ifdef GPU
-#define BLOCK 512

 #include "cuda_runtime.h"
 #include "curand.h"
@ -101,8 +71,6 @@ typedef struct metadata metadata;
 struct tree;
 typedef struct tree tree;

-
-#define SECRET_NUM -1234
 extern int gpu_index;

 // option_list.h
--- a/include/yolo_v2_class.hpp
+++ b/include/yolo_v2_class.hpp
@ -3,6 +3,8 @@

 #include "darknet.h"

+#define C_SHARP_MAX_OBJECTS 1000
+
 struct bbox_t {
    unsigned int x, y, w, h;    // (x,y) - top-left corner, (w, h) - width & height of bounded box
    float prob;                    // confidence - probability that the object was found correctly
--- a/src/activations.c
+++ b/src/activations.c
@ -162,4 +162,3 @@ void gradient_array(const float *x, const int n, const ACTIVATION a, float *delt
        delta[i] *= gradient(x[i], a);
    }
 }
-
--- a/src/art.c
+++ b/src/art.c
@ -38,7 +38,7 @@ void demo_art(char *cfgfile, char *weightfile, int cam_index)

    char *window = "ArtJudgementBot9000!!!";
    if(!cap) error("Couldn't connect to webcam.\n");
-    cvNamedWindow(window, CV_WINDOW_NORMAL); 
+    cvNamedWindow(window, CV_WINDOW_NORMAL);
    cvResizeWindow(window, 512, 512);
    int i;
    int idx[] = {37, 401, 434};
@ -84,4 +84,3 @@ void run_art(int argc, char **argv)
    char *weights = argv[3];
    demo_art(cfg, weights, cam_index);
 }
-
--- a/src/avgpool_layer.c
+++ b/src/avgpool_layer.c
@ -68,4 +68,3 @@ void backward_avgpool_layer(const avgpool_layer l, network_state state)
        }
    }
 }
-
--- a/src/avgpool_layer_kernels.cu
+++ b/src/avgpool_layer_kernels.cu
@ -56,4 +56,3 @@ extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state st
    backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu);
    CHECK_CUDA(cudaPeekAtLastError());
 }
-
--- a/src/batchnorm_layer.c
+++ b/src/batchnorm_layer.c
@ -273,4 +273,4 @@ void backward_batchnorm_layer_gpu(layer l, network_state state)
    if (l.type == BATCHNORM) simple_copy_ongpu(l.outputs*l.batch, l.delta_gpu, state.delta);
        //copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
 }
-#endif
+#endif
--- a/src/blas.c
+++ b/src/blas.c
@ -333,4 +333,4 @@ void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int for
            }
        }
    }
-}
+}
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@ -974,4 +974,4 @@ extern "C" void softmax_tree_gpu(float *input, int spatial, int batch, int strid
    CHECK_CUDA(cudaPeekAtLastError());
 	cuda_free((float *)tree_groups_size);
 	cuda_free((float *)tree_groups_offset);
-}
+}
--- a/src/box.c
+++ b/src/box.c
@ -98,9 +98,9 @@ float box_iou(box a, box b)

 float box_rmse(box a, box b)
 {
-    return sqrt(pow(a.x-b.x, 2) + 
-                pow(a.y-b.y, 2) + 
-                pow(a.w-b.w, 2) + 
+    return sqrt(pow(a.x-b.x, 2) +
+                pow(a.y-b.y, 2) +
+                pow(a.w-b.w, 2) +
                pow(a.h-b.h, 2));
 }

@ -252,7 +252,7 @@ void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thr
    sortable_bbox* s = (sortable_bbox*)calloc(total, sizeof(sortable_bbox));

    for(i = 0; i < total; ++i){
-        s[i].index = i;       
+        s[i].index = i;
        s[i].class_id = 0;
        s[i].probs = probs;
    }
--- a/src/captcha.c
+++ b/src/captcha.c
@ -361,4 +361,3 @@ void run_captcha(int argc, char **argv)
    //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights);
    //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights);
 }
-
--- a/src/cifar.c
+++ b/src/cifar.c
@ -273,5 +273,3 @@ void run_cifar(int argc, char **argv)
    else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights);
    else if(0==strcmp(argv[2], "eval")) eval_cifar_csv();
 }
-
-
--- a/src/classifier.c
+++ b/src/classifier.c
@ -1295,5 +1295,3 @@ void run_classifier(int argc, char **argv)
    else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights);
    else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights);
 }
-
-
--- a/src/col2im.c
+++ b/src/col2im.c
@ -14,7 +14,7 @@ void col2im_add_pixel(float *im, int height, int width, int channels,
 //This one might be too, can't remember.
 void col2im_cpu(float* data_col,
         int channels,  int height,  int width,
-         int ksize,  int stride, int pad, float* data_im) 
+         int ksize,  int stride, int pad, float* data_im)
 {
    int c,h,w;
    int height_col = (height + 2*pad - ksize) / stride + 1;
@ -37,4 +37,3 @@ void col2im_cpu(float* data_col,
        }
    }
 }
-
--- a/src/col2im_kernels.cu
+++ b/src/col2im_kernels.cu
@ -55,4 +55,3 @@ void col2im_ongpu(float *data_col,

    CHECK_CUDA(cudaPeekAtLastError());
 }
-
--- a/src/compare.c
+++ b/src/compare.c
@ -180,7 +180,7 @@ int bbox_comparator(const void *a, const void *b)
    memcpy(X,                   im1.data, im1.w*im1.h*im1.c*sizeof(float));
    memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float));
    float *predictions = network_predict(net, X);
-    
+
    free_image(im1);
    free_image(im2);
    free(X);
@ -218,7 +218,7 @@ void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, in
            bbox_update(a, b, i, result);
        }
    }
-    
+
    free_image(im1);
    free_image(im2);
    free(X);
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@ -145,4 +145,3 @@ void backward_cost_layer_gpu(const cost_layer l, network_state state)
    axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
 }
 #endif
-
--- a/src/cpu_gemm.c
+++ b/src/cpu_gemm.c
@ -1,7 +1,7 @@
 //#include "mini_blas.h"

-void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
@ -17,8 +17,8 @@ void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA,
    }
 }

-void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
@ -35,8 +35,8 @@ void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA,
    }
 }

-void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
@ -51,8 +51,8 @@ void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA,
        }
    }
 }
-void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
@ -68,8 +68,8 @@ void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA,
 }


-void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, 
-        float *A, int lda, 
+void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA,
+        float *A, int lda,
        float *B, int ldb,
        float BETA,
        float *C, int ldc)
--- a/src/crop_layer.c
+++ b/src/crop_layer.c
@ -88,16 +88,15 @@ void forward_crop_layer(const crop_layer l, network_state state)
            for(i = 0; i < l.out_h; ++i){
                for(j = 0; j < l.out_w; ++j){
                    if(flip){
-                        col = l.w - dw - j - 1;    
+                        col = l.w - dw - j - 1;
                    }else{
                        col = j + dw;
                    }
                    row = i + dh;
-                    index = col+l.w*(row+l.h*(c + l.c*b)); 
+                    index = col+l.w*(row+l.h*(c + l.c*b));
                    l.output[count++] = state.input[index]*scale + trans;
                }
            }
        }
    }
 }
-
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@ -220,4 +220,3 @@ extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
       cvWaitKey(0);
       */
 }
-
--- a/src/cuda.h
+++ b/src/cuda.h
@ -13,6 +13,10 @@ extern int gpu_index;

 #ifdef GPU

+#define BLOCK 512
+#define FULL_MASK 0xffffffff
+#define WARP_SIZE 32
+#define BLOCK_TRANSPOSE32 256

 #include <cuda_runtime.h>
 #include <curand.h>
--- a/src/darknet.c
+++ b/src/darknet.c
@ -540,4 +540,3 @@ int main(int argc, char **argv)
    }
    return 0;
 }
-
--- a/src/data.c
+++ b/src/data.c
@ -7,6 +7,8 @@
 #include <stdlib.h>
 #include <string.h>

+#define NUMCHARS 37
+
 pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

 list *get_paths(char *filename)
@ -1347,4 +1349,3 @@ data *split_data(data d, int part, int total)
    split[1] = test;
    return split;
 }
-
--- a/src/deconvolutional_kernels.cu
+++ b/src/deconvolutional_kernels.cu
@ -59,7 +59,7 @@ extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer,
        float *b = layer.col_image_gpu;
        float *c = layer.weight_updates_gpu;

-        im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w, 
+        im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w,
                layer.size, layer.stride, 0, b);
        gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n);

@ -104,4 +104,3 @@ extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, in
    axpy_ongpu(size, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1);
    scal_ongpu(size, momentum, layer.weight_updates_gpu, 1);
 }
-
--- a/src/deconvolutional_layer.c
+++ b/src/deconvolutional_layer.c
@ -173,7 +173,7 @@ void backward_deconvolutional_layer(deconvolutional_layer l, network_state state
        float *b = l.col_image;
        float *c = l.weight_updates;

-        im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w, 
+        im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w,
                l.size, l.stride, 0, b);
        gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);

@ -201,6 +201,3 @@ void update_deconvolutional_layer(deconvolutional_layer l, int skip, float learn
    axpy_cpu(size, learning_rate, l.weight_updates, 1, l.weights, 1);
    scal_cpu(size, momentum, l.weight_updates, 1);
 }
-
-
-
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@ -312,4 +312,4 @@ void get_detection_detections(layer l, int w, int h, float thresh, detection *de
 			}
 		}
 	}
-}
+}
--- a/src/detector.c
+++ b/src/detector.c
@ -8,6 +8,14 @@
 #include "demo.h"
 #include "option_list.h"

+#ifndef __COMPAR_FN_T
+#define __COMPAR_FN_T
+typedef int (*__compar_fn_t)(const void*, const void*);
+#ifdef __USE_GNU
+typedef __compar_fn_t comparison_fn_t;
+#endif
+#endif
+
 #ifdef OPENCV
 #include <opencv2/highgui/highgui_c.h>
 #include <opencv2/core/core_c.h>
--- a/src/dice.c
+++ b/src/dice.c
@ -115,4 +115,3 @@ void run_dice(int argc, char **argv)
    else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights);
 }
-
--- a/src/dropout_layer.c
+++ b/src/dropout_layer.c
@ -23,7 +23,7 @@ dropout_layer make_dropout_layer(int batch, int inputs, float probability)
    #endif
    fprintf(stderr, "dropout       p = %.2f               %4d  ->  %4d\n", probability, inputs, inputs);
    return l;
-} 
+}

 void resize_dropout_layer(dropout_layer *l, int inputs)
 {
@ -57,4 +57,3 @@ void backward_dropout_layer(dropout_layer l, network_state state)
        else state.delta[i] *= l.scale;
    }
 }
-
--- a/src/gemm.c
+++ b/src/gemm.c
@ -15,6 +15,10 @@
 #include <omp.h>
 #endif

+#define TILE_M 4 // 4 ops
+#define TILE_N 16 // AVX2 = 2 ops * 8 floats
+#define TILE_K 16 // loop
+
 void gemm_bin(int M, int N, int K, float ALPHA,
        char  *A, int lda,
        float *B, int ldb,
@ -1160,10 +1164,10 @@ static inline void xnor_avx2_popcnt(__m256i a_bit256, __m256i b_bit256, __m256i
    __m256i xor256 = _mm256_xor_si256(a_bit256, b_bit256);  // xnor = not(xor(a,b))
    c_bit256 = _mm256_andnot_si256(xor256, c_bit256);  // can be optimized - we can do other NOT for wegihts once and do not do this NOT

-    *count_sum = _mm256_add_epi64(count256(c_bit256), *count_sum);    //  1st part - popcnt Mula’s algorithm
+    *count_sum = _mm256_add_epi64(count256(c_bit256), *count_sum);    //  1st part - popcnt Mula's algorithm
 }

-// 2nd part - popcnt Mula’s algorithm
+// 2nd part - popcnt Mula's algorithm
 static inline int get_count_mula(__m256i count_sum) {
    return _mm256_extract_epi64(count_sum, 0)
        + _mm256_extract_epi64(count_sum, 1)
@ -2827,4 +2831,3 @@ int test_gpu_blas()
    return 0;
 }
 #endif
-
--- a/src/gettimeofday.c
+++ b/src/gettimeofday.c
@ -8,7 +8,7 @@ LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
  FILETIME file_time;
  uint64_t time;

- 
+
  GetSystemTime(&system_time);
  SystemTimeToFileTime(&system_time, &file_time);
  time = ((uint64_t)file_time.dwLowDateTime);
@ -18,7 +18,7 @@ LIB_API int gettimeofday(struct timeval* tp, struct timezone* tzp)
  tp->tv_usec = (long)(system_time.wMilliseconds * 1000);
  return 0;
  }
- 
+
 LIB_API int clock_gettime(int dummy, struct timespec* ct)
  {
  LARGE_INTEGER count;
@ -30,7 +30,7 @@ LIB_API int clock_gettime(int dummy, struct timespec* ct)
      g_counts_per_sec.QuadPart = 0;
    }
  }
- 
+
  if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) || (0 == QueryPerformanceCounter(&count))) {
    return -1;
 }
--- a/src/gettimeofday.h
+++ b/src/gettimeofday.h
@ -24,7 +24,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
- 
+
 static unsigned char g_first_time = 1;
 static LARGE_INTEGER g_counts_per_sec;

@ -36,4 +36,3 @@ LIB_API int clock_gettime(int, struct timespec*);
 #endif

 #endif
-
--- a/src/http_stream.h
+++ b/src/http_stream.h
@ -36,4 +36,4 @@ image load_image_resize(char *filename, int w, int h, int c, image *im);
 }
 #endif

-#endif // HTTP_STREAM_H
+#endif // HTTP_STREAM_H
--- a/src/im2col.c
+++ b/src/im2col.c
@ -15,7 +15,7 @@ float im2col_get_pixel(float *im, int height, int width, int channels,
 //https://github.com/BVLC/caffe/blob/master/LICENSE
 void im2col_cpu(float* data_im,
     int channels,  int height,  int width,
-     int ksize,  int stride, int pad, float* data_col) 
+     int ksize,  int stride, int pad, float* data_col)
 {
    int c,h,w;
    int height_col = (height + 2*pad - ksize) / stride + 1;
@ -37,4 +37,3 @@ void im2col_cpu(float* data_im,
        }
    }
 }
-
--- a/src/im2col_kernels.cu
+++ b/src/im2col_kernels.cu
@ -2214,4 +2214,3 @@ void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int
 }

 // --------------------------------
-
--- a/src/image.c
+++ b/src/image.c
@ -2159,4 +2159,4 @@ LIB_API void copy_image_from_bytes(image im, char *pdata)
            }
        }
    }
-}
+}
--- a/src/list.c
+++ b/src/list.c
@ -34,7 +34,7 @@ void *list_pop(list *l){
    if(l->back) l->back->next = 0;
    free(b);
    --l->size;
-    
+
    return val;
 }

--- a/src/local_layer.c
+++ b/src/local_layer.c
@ -60,7 +60,7 @@ local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, in
    l.col_image = (float*)calloc(out_h * out_w * size * size * c, sizeof(float));
    l.output = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
    l.delta = (float*)calloc(l.batch * out_h * out_w * n, sizeof(float));
-    
+
    l.forward = forward_local_layer;
    l.backward = backward_local_layer;
    l.update = update_local_layer;
@ -101,7 +101,7 @@ void forward_local_layer(const local_layer l, network_state state)

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
-        im2col_cpu(input, l.c, l.h, l.w, 
+        im2col_cpu(input, l.c, l.h, l.w,
                l.size, l.stride, l.pad, l.col_image);
        float *output = l.output + i*l.outputs;
        for(j = 0; j < locations; ++j){
@ -132,10 +132,10 @@ void backward_local_layer(local_layer l, network_state state)

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
-        im2col_cpu(input, l.c, l.h, l.w, 
+        im2col_cpu(input, l.c, l.h, l.w,
                l.size, l.stride, l.pad, l.col_image);

-        for(j = 0; j < locations; ++j){ 
+        for(j = 0; j < locations; ++j){
            float *a = l.delta + i*l.outputs + j;
            float *b = l.col_image + j;
            float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
@ -147,7 +147,7 @@ void backward_local_layer(local_layer l, network_state state)
        }

        if(state.delta){
-            for(j = 0; j < locations; ++j){ 
+            for(j = 0; j < locations; ++j){
                float *a = l.weights + j*l.size*l.size*l.c*l.n;
                float *b = l.delta + i*l.outputs + j;
                float *c = l.col_image + j;
@ -191,7 +191,7 @@ void forward_local_layer_gpu(const local_layer l, network_state state)

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
-        im2col_ongpu(input, l.c, l.h, l.w, 
+        im2col_ongpu(input, l.c, l.h, l.w,
                l.size, l.stride, l.pad, l.col_image_gpu);
        float *output = l.output_gpu + i*l.outputs;
        for(j = 0; j < locations; ++j){
@ -221,10 +221,10 @@ void backward_local_layer_gpu(local_layer l, network_state state)

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
-        im2col_ongpu(input, l.c, l.h, l.w, 
+        im2col_ongpu(input, l.c, l.h, l.w,
                l.size, l.stride, l.pad, l.col_image_gpu);

-        for(j = 0; j < locations; ++j){ 
+        for(j = 0; j < locations; ++j){
            float *a = l.delta_gpu + i*l.outputs + j;
            float *b = l.col_image_gpu + j;
            float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n;
@ -236,7 +236,7 @@ void backward_local_layer_gpu(local_layer l, network_state state)
        }

        if(state.delta){
-            for(j = 0; j < locations; ++j){ 
+            for(j = 0; j < locations; ++j){
                float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
                float *b = l.delta_gpu + i*l.outputs + j;
                float *c = l.col_image_gpu + j;
--- a/src/lstm_layer.h
+++ b/src/lstm_layer.h
@ -11,13 +11,13 @@ extern "C" {
 #endif
 LIB_API layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);

-void forward_lstm_layer(layer l, network_state state); 
+void forward_lstm_layer(layer l, network_state state);
 void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay);

 #ifdef GPU
 void forward_lstm_layer_gpu(layer l, network_state state);
 void backward_lstm_layer_gpu(layer l, network_state state);
-void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); 
+void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
 #endif

 #ifdef __cplusplus
--- a/src/maxpool_layer.c
+++ b/src/maxpool_layer.c
@ -162,4 +162,3 @@ void backward_maxpool_layer(const maxpool_layer l, network_state state)
        state.delta[index] += l.delta[i];
    }
 }
-
--- a/src/maxpool_layer_kernels.cu
+++ b/src/maxpool_layer_kernels.cu
@ -126,4 +126,3 @@ extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state st
    backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
    CHECK_CUDA(cudaPeekAtLastError());
 }
-
--- a/src/network.h
+++ b/src/network.h
@ -167,4 +167,3 @@ network combine_train_valid_networks(network net_train, network net_map);
 #endif

 #endif
-
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@ -459,4 +459,3 @@ float *network_predict_gpu(network net, float *input)
    //cuda_free(state.input);   // will be freed in the free_network()
    return out;
 }
-
--- a/src/nightmare.c
+++ b/src/nightmare.c
@ -23,7 +23,7 @@ float abs_mean(float *x, int n)
 void calculate_loss(float *output, float *delta, int n, float thresh)
 {
    int i;
-    float mean = mean_array(output, n); 
+    float mean = mean_array(output, n);
    float var = variance_array(output, n);
    for(i = 0; i < n; ++i){
        if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i];
@ -260,7 +260,7 @@ void run_nightmare(int argc, char **argv)
    for(e = 0; e < rounds; ++e){
        fprintf(stderr, "Iteration: ");
        fflush(stderr);
-        for(n = 0; n < iters; ++n){  
+        for(n = 0; n < iters; ++n){
            fprintf(stderr, "%d, ", n);
            fflush(stderr);
            if(reconstruct){
@ -305,4 +305,3 @@ void run_nightmare(int argc, char **argv)
        im = resized;
    }
 }
-
--- a/src/normalization_layer.c
+++ b/src/normalization_layer.c
@ -52,9 +52,9 @@ void resize_normalization_layer(layer *layer, int w, int h)
    layer->norms = (float*)realloc(layer->norms, h * w * c * batch * sizeof(float));
 #ifdef GPU
    cuda_free(layer->output_gpu);
-    cuda_free(layer->delta_gpu); 
-    cuda_free(layer->squared_gpu); 
-    cuda_free(layer->norms_gpu);   
+    cuda_free(layer->delta_gpu);
+    cuda_free(layer->squared_gpu);
+    cuda_free(layer->norms_gpu);
    layer->output_gpu =  cuda_make_array(layer->output, h * w * c * batch);
    layer->delta_gpu =   cuda_make_array(layer->delta, h * w * c * batch);
    layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
--- a/src/region_layer.c
+++ b/src/region_layer.c
@ -9,6 +9,7 @@
 #include <string.h>
 #include <stdlib.h>

+#define DOABS 1

 region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes)
 {
@ -588,4 +589,4 @@ void zero_objectness(layer l)
            l.output[obj_index] = 0;
        }
    }
-}
+}
--- a/src/rnn_vid.c
+++ b/src/rnn_vid.c
@ -213,4 +213,3 @@ void run_vid_rnn(int argc, char **argv)
 #else
 void run_vid_rnn(int argc, char **argv){}
 #endif
-
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@ -47,9 +47,9 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups)
    l.forward_gpu = forward_softmax_layer_gpu;
    l.backward_gpu = backward_softmax_layer_gpu;

-    l.output_gpu = cuda_make_array(l.output, inputs*batch); 
-    l.loss_gpu = cuda_make_array(l.loss, inputs*batch); 
-    l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 
+    l.output_gpu = cuda_make_array(l.output, inputs*batch);
+    l.loss_gpu = cuda_make_array(l.loss, inputs*batch);
+    l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
    #endif
    return l;
 }
--- a/src/stb_image_write.h
+++ b/src/stb_image_write.h
@ -81,7 +81,7 @@ USAGE:

   TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed
   data, set the global variable 'stbi_write_tga_with_rle' to 0.
-   
+
   JPEG does ignore alpha channels in input data; quality is between 1 and 100.
   Higher quality looks better but results in a bigger image.
   JPEG baseline (no JPEG progressive).
@ -114,7 +114,7 @@ CREDITS:
      Thatcher Ulrich
      github:poppolopoppo
      Patrick Boettcher
-      
+
 LICENSE

  See end of file for license information.
@ -1250,7 +1250,7 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, in
                             37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
   static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
                              99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
-   static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 
+   static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,
                                 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };

   int row, col, i, k;
@ -1421,38 +1421,38 @@ This software is available under 2 licenses -- choose whichever you prefer.
 ------------------------------------------------------------------------------
 ALTERNATIVE A - MIT License
 Copyright (c) 2017 Sean Barrett
-Permission is hereby granted, free of charge, to any person obtaining a copy of 
-this software and associated documentation files (the "Software"), to deal in 
-the Software without restriction, including without limitation the rights to 
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
-of the Software, and to permit persons to whom the Software is furnished to do 
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all 
+The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 ------------------------------------------------------------------------------
 ALTERNATIVE B - Public Domain (www.unlicense.org)
 This is free and unencumbered software released into the public domain.
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 
-software, either in source code form or as a compiled binary, for any purpose, 
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
 commercial or non-commercial, and by any means.
-In jurisdictions that recognize copyright laws, the author or authors of this 
-software dedicate any and all copyright interest in the software to the public 
-domain. We make this dedication for the benefit of the public at large and to 
-the detriment of our heirs and successors. We intend this dedication to be an 
-overt act of relinquishment in perpetuity of all present and future rights to 
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
 this software under copyright law.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ------------------------------------------------------------------------------
 */
--- a/src/tag.c
+++ b/src/tag.c
@ -150,4 +150,3 @@ void run_tag(int argc, char **argv)
    if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear);
    else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename);
 }
-
--- a/src/tree.c
+++ b/src/tree.c
@ -40,7 +40,7 @@ void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leave
    for(j = 0; j < n; ++j){
        int parent = hier->parent[j];
        if(parent >= 0){
-            predictions[j] *= predictions[parent]; 
+            predictions[j] *= predictions[parent];
        }
    }
    if(only_leaves){
@ -86,7 +86,7 @@ tree *read_tree(char *filename)
 {
    tree t = {0};
    FILE *fp = fopen(filename, "r");
-    
+
    char *line;
    int last_parent = -1;
    int group_size = 0;
--- a/src/upsample_layer.c
+++ b/src/upsample_layer.c
@ -62,7 +62,7 @@ void resize_upsample_layer(layer *l, int w, int h)
    l->output_gpu  = cuda_make_array(l->output, l->outputs*l->batch);
    l->delta_gpu   = cuda_make_array(l->delta,  l->outputs*l->batch);
 #endif
-    
+
 }

 void forward_upsample_layer(const layer l, network_state net)
--- a/src/writing.c
+++ b/src/writing.c
@ -147,4 +147,3 @@ void run_writing(int argc, char **argv)
    if(0==strcmp(argv[2], "train")) train_writing(cfg, weights);
    else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename);
 }
-
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@ -485,4 +485,3 @@ void backward_yolo_layer_gpu(const layer l, network_state state)
    axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
 }
 #endif
-