diff --git a/Makefile b/Makefile index 4c1bb148..a02d7ef7 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,9 @@ UNAME = $(shell uname) ifeq ($(UNAME), Darwin) COMMON += -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include else -COMMON += -march=native +COMMON += -march=native -flto endif -CFLAGS= $(COMMON) -Ofast -flto +CFLAGS= $(COMMON) -Ofast #CFLAGS= $(COMMON) -O0 -g LDFLAGS=`pkg-config --libs opencv` -lm VPATH=./src/ diff --git a/dog.jpg b/dog.jpg deleted file mode 100644 index 3b9f7abd..00000000 Binary files a/dog.jpg and /dev/null differ diff --git a/src/connected_layer.c b/src/connected_layer.c index 07fad695..16a39be9 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -7,16 +7,17 @@ #include #include -connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation) +connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation) { fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs); int i; connected_layer *layer = calloc(1, sizeof(connected_layer)); layer->inputs = inputs; layer->outputs = outputs; + layer->batch=batch; - layer->output = calloc(outputs, sizeof(float*)); - layer->delta = calloc(outputs, sizeof(float*)); + layer->output = calloc(batch*outputs, sizeof(float*)); + layer->delta = calloc(batch*outputs, sizeof(float*)); layer->weight_updates = calloc(inputs*outputs, sizeof(float)); layer->weight_adapt = calloc(inputs*outputs, sizeof(float)); @@ -78,14 +79,14 @@ void forward_connected_layer(connected_layer layer, float *input) { int i; memcpy(layer.output, layer.biases, layer.outputs*sizeof(float)); - int m = 1; + int m = layer.batch; int k = layer.inputs; int n = layer.outputs; float *a = input; float *b = layer.weights; float *c = layer.output; gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); - for(i = 0; i < layer.outputs; ++i){ + for(i = 0; i < layer.outputs*layer.batch; ++i){ layer.output[i] = activate(layer.output[i], layer.activation); } //for(i = 0; i < layer.outputs; ++i) if(i%(layer.outputs/10+1)==0) printf("%f, ", layer.output[i]); printf("\n"); @@ -94,12 +95,12 @@ void forward_connected_layer(connected_layer layer, float *input) void learn_connected_layer(connected_layer layer, float *input) { int i; - for(i = 0; i < layer.outputs; ++i){ + for(i = 0; i < layer.outputs*layer.batch; ++i){ layer.delta[i] *= gradient(layer.output[i], layer.activation); - layer.bias_updates[i] += layer.delta[i]; + layer.bias_updates[i%layer.batch] += layer.delta[i]/layer.batch; } int m = layer.inputs; - int k = 1; + int k = layer.batch; int n = layer.outputs; float *a = input; float *b = layer.delta; @@ -113,7 +114,7 @@ void backward_connected_layer(connected_layer layer, float *input, float *delta) int m = layer.inputs; int k = layer.outputs; - int n = 1; + int n = layer.batch; float *a = layer.weights; float *b = layer.delta; diff --git a/src/connected_layer.h b/src/connected_layer.h index 4b17c59b..83ae914f 100644 --- a/src/connected_layer.h +++ b/src/connected_layer.h @@ -4,6 +4,7 @@ #include "activations.h" typedef struct{ + int batch; int inputs; int outputs; float *weights; @@ -25,7 +26,7 @@ typedef struct{ } connected_layer; -connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation); +connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation); void forward_connected_layer(connected_layer layer, float *input); void backward_connected_layer(connected_layer layer, float *input, float *delta); diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 8d8efc11..f7c9c102 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -31,7 +31,7 @@ image get_convolutional_delta(convolutional_layer layer) return float_to_image(h,w,c,layer.delta); } -convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation) +convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation) { int i; size = 2*(size/2)+1; //HA! And you thought you'd use an even sized filter... @@ -40,6 +40,7 @@ convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int si layer->w = w; layer->c = c; layer->n = n; + layer->batch = batch; layer->stride = stride; layer->size = size; @@ -56,12 +57,12 @@ convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int si //layer->biases[i] = rand_normal()*scale + scale; layer->biases[i] = 0; } - int out_h = (h-size)/stride + 1; - int out_w = (w-size)/stride + 1; + int out_h = convolutional_out_height(*layer); + int out_w = convolutional_out_width(*layer); - layer->col_image = calloc(out_h*out_w*size*size*c, sizeof(float)); - layer->output = calloc(out_h * out_w * n, sizeof(float)); - layer->delta = calloc(out_h * out_w * n, sizeof(float)); + layer->col_image = calloc(layer->batch*out_h*out_w*size*size*c, sizeof(float)); + layer->output = calloc(layer->batch*out_h * out_w * n, sizeof(float)); + layer->delta = calloc(layer->batch*out_h * out_w * n, sizeof(float)); layer->activation = activation; fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); @@ -70,21 +71,39 @@ convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int si return layer; } +void resize_convolutional_layer(convolutional_layer *layer, int h, int w, int c) +{ + layer->h = h; + layer->w = w; + layer->c = c; + int out_h = convolutional_out_height(*layer); + int out_w = convolutional_out_width(*layer); + + layer->col_image = realloc(layer->col_image, + layer->batch*out_h*out_w*layer->size*layer->size*layer->c*sizeof(float)); + layer->output = realloc(layer->output, + layer->batch*out_h * out_w * layer->n*sizeof(float)); + layer->delta = realloc(layer->delta, + layer->batch*out_h * out_w * layer->n*sizeof(float)); +} + void forward_convolutional_layer(const convolutional_layer layer, float *in) { int i; int m = layer.n; int k = layer.size*layer.size*layer.c; - int n = ((layer.h-layer.size)/layer.stride + 1)* - ((layer.w-layer.size)/layer.stride + 1); + int n = convolutional_out_height(layer)* + convolutional_out_width(layer)* + layer.batch; memset(layer.output, 0, m*n*sizeof(float)); float *a = layer.filters; float *b = layer.col_image; float *c = layer.output; - - im2col_cpu(in, layer.c, layer.h, layer.w, layer.size, layer.stride, b); + for(i = 0; i < layer.batch; ++i){ + im2col_cpu(in+i*(n/layer.batch), layer.c, layer.h, layer.w, layer.size, layer.stride, b+i*(n/layer.batch)); + } gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); for(i = 0; i < m*n; ++i){ @@ -97,9 +116,10 @@ void forward_convolutional_layer(const convolutional_layer layer, float *in) void gradient_delta_convolutional_layer(convolutional_layer layer) { int i; - int size = convolutional_out_height(layer) - *convolutional_out_width(layer) - *layer.n; + int size = convolutional_out_height(layer)* + convolutional_out_width(layer)* + layer.n* + layer.batch; for(i = 0; i < size; ++i){ layer.delta[i] *= gradient(layer.output[i], layer.activation); } @@ -107,15 +127,17 @@ void gradient_delta_convolutional_layer(convolutional_layer layer) void learn_bias_convolutional_layer(convolutional_layer layer) { - int i,j; + int i,j,b; int size = convolutional_out_height(layer) *convolutional_out_width(layer); - for(i = 0; i < layer.n; ++i){ - float sum = 0; - for(j = 0; j < size; ++j){ - sum += layer.delta[j+i*size]; + for(b = 0; b < layer.batch; ++b){ + for(i = 0; i < layer.n; ++i){ + float sum = 0; + for(j = 0; j < size; ++j){ + sum += layer.delta[j+size*(i+b*layer.n)]; + } + layer.bias_updates[i] += sum/size; } - layer.bias_updates[i] += sum/size; } } @@ -125,8 +147,9 @@ void learn_convolutional_layer(convolutional_layer layer) learn_bias_convolutional_layer(layer); int m = layer.n; int n = layer.size*layer.size*layer.c; - int k = ((layer.h-layer.size)/layer.stride + 1)* - ((layer.w-layer.size)/layer.stride + 1); + int k = convolutional_out_height(layer)* + convolutional_out_width(layer)* + layer.batch; float *a = layer.delta; float *b = layer.col_image; @@ -137,10 +160,12 @@ void learn_convolutional_layer(convolutional_layer layer) void backward_convolutional_layer(convolutional_layer layer, float *delta) { + int i; int m = layer.size*layer.size*layer.c; int k = layer.n; - int n = ((layer.h-layer.size)/layer.stride + 1)* - ((layer.w-layer.size)/layer.stride + 1); + int n = convolutional_out_height(layer)* + convolutional_out_width(layer)* + layer.batch; float *a = layer.filters; float *b = layer.delta; @@ -150,8 +175,10 @@ void backward_convolutional_layer(convolutional_layer layer, float *delta) memset(c, 0, m*n*sizeof(float)); gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); - memset(delta, 0, layer.h*layer.w*layer.c*sizeof(float)); - col2im_cpu(c, layer.c, layer.h, layer.w, layer.size, layer.stride, delta); + memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); + for(i = 0; i < layer.batch; ++i){ + col2im_cpu(c+i*n/layer.batch, layer.c, layer.h, layer.w, layer.size, layer.stride, delta+i*n/layer.batch); + } } void update_convolutional_layer(convolutional_layer layer, float step, float momentum, float decay) @@ -225,7 +252,7 @@ void update_convolutional_layer(convolutional_layer layer, float step, float mom void test_convolutional_layer() { - convolutional_layer l = *make_convolutional_layer(4,4,1,1,3,1,LINEAR); + convolutional_layer l = *make_convolutional_layer(1,4,4,1,1,3,1,LINEAR); float input[] = {1,2,3,4, 5,6,7,8, 9,10,11,12, diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h index 8ca69b1b..4e69dcfd 100644 --- a/src/convolutional_layer.h +++ b/src/convolutional_layer.h @@ -5,6 +5,7 @@ #include "activations.h" typedef struct { + int batch; int h,w,c; int n; int size; @@ -24,7 +25,8 @@ typedef struct { ACTIVATION activation; } convolutional_layer; -convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation); +convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation); +void resize_convolutional_layer(convolutional_layer *layer, int h, int w, int c); void forward_convolutional_layer(const convolutional_layer layer, float *in); void learn_convolutional_layer(convolutional_layer layer); void update_convolutional_layer(convolutional_layer layer, float step, float momentum, float decay); diff --git a/src/data.c b/src/data.c index f44f5daf..39ece116 100644 --- a/src/data.c +++ b/src/data.c @@ -119,6 +119,30 @@ data load_categorical_data_csv(char *filename, int target, int k) return d; } +data load_cifar10_data(char *filename) +{ + data d; + d.shallow = 0; + unsigned long i,j; + matrix X = make_matrix(10000, 3072); + matrix y = make_matrix(10000, 10); + d.X = X; + d.y = y; + + FILE *fp = fopen(filename, "rb"); + for(i = 0; i < 10000; ++i){ + unsigned char bytes[3073]; + fread(bytes, 1, 3073, fp); + int class = bytes[0]; + y.vals[i][class] = 1; + for(j = 0; j < X.cols; ++j){ + X.vals[i][j] = (double)bytes[j+1]; + } + } + fclose(fp); + return d; +} + void randomize_data(data d) { int i; diff --git a/src/data.h b/src/data.h index 4df0c687..dfbbf72f 100644 --- a/src/data.h +++ b/src/data.h @@ -17,6 +17,7 @@ data load_data_image_pathfile_part(char *filename, int part, int total, char **labels, int k, int h, int w); data load_data_image_pathfile_random(char *filename, int n, char **labels, int k, int h, int w); +data load_cifar10_data(char *filename); list *get_paths(char *filename); data load_categorical_data_csv(char *filename, int target, int k); void normalize_data_rows(data d); diff --git a/src/image.c b/src/image.c index 16679776..24e32922 100644 --- a/src/image.c +++ b/src/image.c @@ -136,7 +136,7 @@ void show_image(image p, char *name) } } free_image(copy); - if(disp->height < 500 || disp->width < 500){ + if(disp->height < 500 || disp->width < 500 || disp->height > 1000){ int w = 1500; int h = w*p.h/p.w; if(h > 1000){ diff --git a/src/maxpool_layer.c b/src/maxpool_layer.c index 8c409b94..413816a6 100644 --- a/src/maxpool_layer.c +++ b/src/maxpool_layer.c @@ -17,10 +17,12 @@ image get_maxpool_delta(maxpool_layer layer) return float_to_image(h,w,c,layer.delta); } -maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride) +maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int stride) { + c = c*batch; fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride); maxpool_layer *layer = calloc(1, sizeof(maxpool_layer)); + layer->batch = batch; layer->h = h; layer->w = w; layer->c = c; @@ -30,6 +32,15 @@ maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride) return layer; } +void resize_maxpool_layer(maxpool_layer *layer, int h, int w, int c) +{ + layer->h = h; + layer->w = w; + layer->c = c; + layer->output = realloc(layer->output, ((h-1)/layer->stride+1) * ((w-1)/layer->stride+1) * c * sizeof(float)); + layer->delta = realloc(layer->delta, ((h-1)/layer->stride+1) * ((w-1)/layer->stride+1) * c * sizeof(float)); +} + void forward_maxpool_layer(const maxpool_layer layer, float *in) { image input = float_to_image(layer.h, layer.w, layer.c, in); diff --git a/src/maxpool_layer.h b/src/maxpool_layer.h index 27d6f55a..92d41e66 100644 --- a/src/maxpool_layer.h +++ b/src/maxpool_layer.h @@ -4,6 +4,7 @@ #include "image.h" typedef struct { + int batch; int h,w,c; int stride; float *delta; @@ -11,7 +12,8 @@ typedef struct { } maxpool_layer; image get_maxpool_image(maxpool_layer layer); -maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride); +maxpool_layer *make_maxpool_layer(int batch, int h, int w, int c, int stride); +void resize_maxpool_layer(maxpool_layer *layer, int h, int w, int c); void forward_maxpool_layer(const maxpool_layer layer, float *in); void backward_maxpool_layer(const maxpool_layer layer, float *in, float *delta); diff --git a/src/network.c b/src/network.c index b2fc9225..e2c44b05 100644 --- a/src/network.c +++ b/src/network.c @@ -10,10 +10,11 @@ #include "maxpool_layer.h" #include "softmax_layer.h" -network make_network(int n) +network make_network(int n, int batch) { network net; net.n = n; + net.batch = batch; net.layers = calloc(net.n, sizeof(void *)); net.types = calloc(net.n, sizeof(LAYER_TYPE)); net.outputs = 0; @@ -25,10 +26,11 @@ void print_convolutional_cfg(FILE *fp, convolutional_layer *l, int first) { int i; fprintf(fp, "[convolutional]\n"); - if(first) fprintf(fp, "height=%d\n" + if(first) fprintf(fp, "batch=%d\n" + "height=%d\n" "width=%d\n" "channels=%d\n", - l->h, l->w, l->c); + l->batch,l->h, l->w, l->c); fprintf(fp, "filters=%d\n" "size=%d\n" "stride=%d\n" @@ -44,7 +46,7 @@ void print_connected_cfg(FILE *fp, connected_layer *l, int first) { int i; fprintf(fp, "[connected]\n"); - if(first) fprintf(fp, "input=%d\n", l->inputs); + if(first) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); fprintf(fp, "output=%d\n" "activation=%s\n", l->outputs, @@ -58,17 +60,18 @@ void print_connected_cfg(FILE *fp, connected_layer *l, int first) void print_maxpool_cfg(FILE *fp, maxpool_layer *l, int first) { fprintf(fp, "[maxpool]\n"); - if(first) fprintf(fp, "height=%d\n" + if(first) fprintf(fp, "batch=%d\n" + "height=%d\n" "width=%d\n" "channels=%d\n", - l->h, l->w, l->c); + l->batch,l->h, l->w, l->c); fprintf(fp, "stride=%d\n\n", l->stride); } void print_softmax_cfg(FILE *fp, softmax_layer *l, int first) { fprintf(fp, "[softmax]\n"); - if(first) fprintf(fp, "input=%d\n", l->inputs); + if(first) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); fprintf(fp, "\n"); } @@ -191,11 +194,11 @@ float calculate_error_network(network net, float *truth) float *out = get_network_output(net); int i, k = get_network_output_size(net); for(i = 0; i < k; ++i){ - printf("%f, ", out[i]); + //printf("%f, ", out[i]); delta[i] = truth[i] - out[i]; sum += delta[i]*delta[i]; } - printf("\n"); + //printf("\n"); return sum; } @@ -258,19 +261,26 @@ float train_network_sgd(network net, data d, int n, float step, float momentum,f int i; float error = 0; int correct = 0; + int pos = 0; for(i = 0; i < n; ++i){ int index = rand()%d.X.rows; - error += train_network_datum(net, d.X.vals[index], d.y.vals[index], step, momentum, decay); + float err = train_network_datum(net, d.X.vals[index], d.y.vals[index], step, momentum, decay); float *y = d.y.vals[index]; int class = get_predicted_class_network(net); correct += (y[class]?1:0); + if(y[1]){ + error += err; + ++pos; + } + + //printf("%d %f %f\n", i,net.output[0], d.y.vals[index][0]); //if((i+1)%10 == 0){ // printf("%d: %f\n", (i+1), (float)correct/(i+1)); //} } - printf("Accuracy: %f\n",(float) correct/n); - return error/n; + //printf("Accuracy: %f\n",(float) correct/n); + return error/pos; } float train_network_batch(network net, data d, int n, float step, float momentum,float decay) { @@ -304,7 +314,7 @@ void train_network(network net, data d, float step, float momentum, float decay) } visualize_network(net); cvWaitKey(100); - printf("Accuracy: %f\n", (float)correct/d.X.rows); + fprintf(stderr, "Accuracy: %f\n", (float)correct/d.X.rows); } int get_network_output_size_layer(network net, int i) @@ -330,7 +340,8 @@ int get_network_output_size_layer(network net, int i) return 0; } -int reset_network_size(network net, int h, int w, int c) +/* +int resize_network(network net, int h, int w, int c) { int i; for (i = 0; i < net.n; ++i){ @@ -357,6 +368,34 @@ int reset_network_size(network net, int h, int w, int c) } return 0; } +*/ + +int resize_network(network net, int h, int w, int c) +{ + int i; + for (i = 0; i < net.n; ++i){ + if(net.types[i] == CONVOLUTIONAL){ + convolutional_layer *layer = (convolutional_layer *)net.layers[i]; + resize_convolutional_layer(layer, h, w, c); + image output = get_convolutional_image(*layer); + h = output.h; + w = output.w; + c = output.c; + } + else if(net.types[i] == MAXPOOL){ + maxpool_layer *layer = (maxpool_layer *)net.layers[i]; + resize_maxpool_layer(layer, h, w, c); + image output = get_maxpool_image(*layer); + h = output.h; + w = output.w; + c = output.c; + } + else{ + error("Cannot resize this type of layer"); + } + } + return 0; +} int get_network_output_size(network net) { diff --git a/src/network.h b/src/network.h index c75804d3..5acee61b 100644 --- a/src/network.h +++ b/src/network.h @@ -14,13 +14,14 @@ typedef enum { typedef struct { int n; + int batch; void **layers; LAYER_TYPE *types; int outputs; float *output; } network; -network make_network(int n); +network make_network(int n, int batch); void forward_network(network net, float *input); float backward_network(network net, float *input, float *truth); void update_network(network net, float step, float momentum, float decay); @@ -41,7 +42,7 @@ int get_predicted_class_network(network net); void print_network(network net); void visualize_network(network net); void save_network(network net, char *filename); -int reset_network_size(network net, int h, int w, int c); +int resize_network(network net, int h, int w, int c); #endif diff --git a/src/parser.c b/src/parser.c index cf35a94a..cf64b553 100644 --- a/src/parser.c +++ b/src/parser.c @@ -52,6 +52,7 @@ convolutional_layer *parse_convolutional(list *options, network net, int count) h = option_find_int(options, "height",1); w = option_find_int(options, "width",1); c = option_find_int(options, "channels",1); + net.batch = option_find_int(options, "batch",1); }else{ image m = get_network_image_layer(net, count-1); h = m.h; @@ -59,7 +60,7 @@ convolutional_layer *parse_convolutional(list *options, network net, int count) c = m.c; if(h == 0) error("Layer before convolutional layer must output image."); } - convolutional_layer *layer = make_convolutional_layer(h,w,c,n,size,stride, activation); + convolutional_layer *layer = make_convolutional_layer(net.batch,h,w,c,n,size,stride, activation); char *data = option_find_str(options, "data", 0); if(data){ char *curr = data; @@ -90,10 +91,11 @@ connected_layer *parse_connected(list *options, network net, int count) ACTIVATION activation = get_activation(activation_s); if(count == 0){ input = option_find_int(options, "input",1); + net.batch = option_find_int(options, "batch",1); }else{ input = get_network_output_size_layer(net, count-1); } - connected_layer *layer = make_connected_layer(input, output, activation); + connected_layer *layer = make_connected_layer(net.batch, input, output, activation); char *data = option_find_str(options, "data", 0); if(data){ char *curr = data; @@ -120,10 +122,11 @@ softmax_layer *parse_softmax(list *options, network net, int count) int input; if(count == 0){ input = option_find_int(options, "input",1); + net.batch = option_find_int(options, "batch",1); }else{ input = get_network_output_size_layer(net, count-1); } - softmax_layer *layer = make_softmax_layer(input); + softmax_layer *layer = make_softmax_layer(net.batch, input); option_unused(options); return layer; } @@ -136,6 +139,7 @@ maxpool_layer *parse_maxpool(list *options, network net, int count) h = option_find_int(options, "height",1); w = option_find_int(options, "width",1); c = option_find_int(options, "channels",1); + net.batch = option_find_int(options, "batch",1); }else{ image m = get_network_image_layer(net, count-1); h = m.h; @@ -143,7 +147,7 @@ maxpool_layer *parse_maxpool(list *options, network net, int count) c = m.c; if(h == 0) error("Layer before convolutional layer must output image."); } - maxpool_layer *layer = make_maxpool_layer(h,w,c,stride); + maxpool_layer *layer = make_maxpool_layer(net.batch,h,w,c,stride); option_unused(options); return layer; } @@ -151,7 +155,7 @@ maxpool_layer *parse_maxpool(list *options, network net, int count) network parse_network_cfg(char *filename) { list *sections = read_cfg(filename); - network net = make_network(sections->size); + network net = make_network(sections->size, 0); node *n = sections->front; int count = 0; @@ -162,18 +166,22 @@ network parse_network_cfg(char *filename) convolutional_layer *layer = parse_convolutional(options, net, count); net.types[count] = CONVOLUTIONAL; net.layers[count] = layer; + net.batch = layer->batch; }else if(is_connected(s)){ connected_layer *layer = parse_connected(options, net, count); net.types[count] = CONNECTED; net.layers[count] = layer; + net.batch = layer->batch; }else if(is_softmax(s)){ softmax_layer *layer = parse_softmax(options, net, count); net.types[count] = SOFTMAX; net.layers[count] = layer; + net.batch = layer->batch; }else if(is_maxpool(s)){ maxpool_layer *layer = parse_maxpool(options, net, count); net.types[count] = MAXPOOL; net.layers[count] = layer; + net.batch = layer->batch; }else{ fprintf(stderr, "Type not recognized: %s\n", s->type); } diff --git a/src/softmax_layer.c b/src/softmax_layer.c index b6b7ff35..12684238 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -3,13 +3,14 @@ #include #include -softmax_layer *make_softmax_layer(int inputs) +softmax_layer *make_softmax_layer(int batch, int inputs) { fprintf(stderr, "Softmax Layer: %d inputs\n", inputs); softmax_layer *layer = calloc(1, sizeof(softmax_layer)); + layer->batch = batch; layer->inputs = inputs; - layer->output = calloc(inputs, sizeof(float)); - layer->delta = calloc(inputs, sizeof(float)); + layer->output = calloc(inputs*batch, sizeof(float)); + layer->delta = calloc(inputs*batch, sizeof(float)); return layer; } @@ -28,28 +29,30 @@ void forward_softmax_layer(const softmax_layer layer, float *input) */ void forward_softmax_layer(const softmax_layer layer, float *input) { - int i; - float sum = 0; - float largest = 0; - for(i = 0; i < layer.inputs; ++i){ - if(input[i] > largest) largest = input[i]; - } - for(i = 0; i < layer.inputs; ++i){ - sum += exp(input[i]-largest); - //printf("%f, ", input[i]); - } - //printf("\n"); - if(sum) sum = largest+log(sum); - else sum = largest-100; - for(i = 0; i < layer.inputs; ++i){ - layer.output[i] = exp(input[i]-sum); + int i,b; + for(b = 0; b < layer.batch; ++b){ + float sum = 0; + float largest = 0; + for(i = 0; i < layer.inputs; ++i){ + if(input[i+b*layer.inputs] > largest) largest = input[i+b*layer.inputs]; + } + for(i = 0; i < layer.inputs; ++i){ + sum += exp(input[i+b*layer.inputs]-largest); + //printf("%f, ", input[i]); + } + //printf("\n"); + if(sum) sum = largest+log(sum); + else sum = largest-100; + for(i = 0; i < layer.inputs; ++i){ + layer.output[i+b*layer.inputs] = exp(input[i+b*layer.inputs]-sum); + } } } void backward_softmax_layer(const softmax_layer layer, float *input, float *delta) { int i; - for(i = 0; i < layer.inputs; ++i){ + for(i = 0; i < layer.inputs*layer.batch; ++i){ delta[i] = layer.delta[i]; } } diff --git a/src/softmax_layer.h b/src/softmax_layer.h index bfcd390f..414030c6 100644 --- a/src/softmax_layer.h +++ b/src/softmax_layer.h @@ -3,11 +3,12 @@ typedef struct { int inputs; + int batch; float *delta; float *output; } softmax_layer; -softmax_layer *make_softmax_layer(int inputs); +softmax_layer *make_softmax_layer(int batch, int inputs); void forward_softmax_layer(const softmax_layer layer, float *input); void backward_softmax_layer(const softmax_layer layer, float *input, float *delta); diff --git a/src/tests.c b/src/tests.c index 557f0fbf..91217d42 100644 --- a/src/tests.c +++ b/src/tests.c @@ -77,7 +77,7 @@ void verify_convolutional_layer() int size = 3; float eps = .00000001; image test = make_random_image(5,5, 1); - convolutional_layer layer = *make_convolutional_layer(test.h,test.w,test.c, n, size, stride, RELU); + convolutional_layer layer = *make_convolutional_layer(1,test.h,test.w,test.c, n, size, stride, RELU); image out = get_convolutional_image(layer); float **jacobian = calloc(test.h*test.w*test.c, sizeof(float)); @@ -200,7 +200,7 @@ void train_full() while(1){ i += 1000; data train = load_data_image_pathfile_random("images/assira/train.list", 1000, labels, 2, 256, 256); - image im = float_to_image(256, 256, 3,train.X.vals[0]); + //image im = float_to_image(256, 256, 3,train.X.vals[0]); //visualize_network(net); //cvWaitKey(100); //show_image(im, "input"); @@ -247,30 +247,75 @@ void test_full() fclose(fp); } +void test_cifar10() +{ + data test = load_cifar10_data("images/cifar10/test_batch.bin"); + scale_data_rows(test, 1./255); + network net = parse_network_cfg("cfg/cifar10.cfg"); + int count = 0; + float lr = .000005; + float momentum = .99; + float decay = 0.001; + decay = 0; + int batch = 10000; + while(++count <= 10000){ + char buff[256]; + sprintf(buff, "images/cifar10/data_batch_%d.bin", rand()%5+1); + data train = load_cifar10_data(buff); + scale_data_rows(train, 1./255); + train_network_sgd(net, train, batch, lr, momentum, decay); + //printf("%5f %5f\n",(double)count*batch/train.X.rows, loss); + + float test_acc = network_accuracy(net, test); + printf("%5f %5f\n",(double)count*batch/train.X.rows/5, 1-test_acc); + free_data(train); + } + +} + +void test_vince() +{ + network net = parse_network_cfg("cfg/vince.cfg"); + data train = load_categorical_data_csv("images/vince.txt", 144, 2); + normalize_data_rows(train); + + int count = 0; + float lr = .00005; + float momentum = .9; + float decay = 0.0001; + decay = 0; + int batch = 10000; + while(++count <= 10000){ + float loss = train_network_sgd(net, train, batch, lr, momentum, decay); + printf("%5f %5f\n",(double)count*batch/train.X.rows, loss); + } +} + void test_nist() { srand(444444); srand(888888); - network net = parse_network_cfg("nist.cfg"); + network net = parse_network_cfg("cfg/nist_basic.cfg"); data train = load_categorical_data_csv("mnist/mnist_train.csv", 0, 10); data test = load_categorical_data_csv("mnist/mnist_test.csv",0,10); normalize_data_rows(train); normalize_data_rows(test); //randomize_data(train); int count = 0; - float lr = .0005; + float lr = .00005; float momentum = .9; - float decay = 0.001; - clock_t start = clock(), end; - while(++count <= 100){ - //visualize_network(net); - float loss = train_network_sgd(net, train, 1000, lr, momentum, decay); - printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*100, loss, lr, momentum, decay); - end = clock(); - printf("Time: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC); - start=end; - //cvWaitKey(100); - //lr /= 2; + float decay = 0.0001; + decay = 0; + //clock_t start = clock(), end; + int batch = 10000; + while(++count <= 10000){ + float loss = train_network_sgd(net, train, batch, lr, momentum, decay); + printf("%5f %5f\n",(double)count*batch/train.X.rows, loss); + //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay); + //end = clock(); + //printf("Time: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC); + //start=end; + /* if(count%5 == 0){ float train_acc = network_accuracy(net, train); fprintf(stderr, "\nTRAIN: %f\n", train_acc); @@ -279,6 +324,7 @@ void test_nist() printf("%d, %f, %f\n", count, train_acc, test_acc); //lr *= .5; } + */ } } @@ -439,91 +485,35 @@ image features_output_size(network net, IplImage *src, int outh, int outw) { int h = voc_size(outh); int w = voc_size(outw); - printf("%d %d\n", h, w); + fprintf(stderr, "%d %d\n", h, w); IplImage *sized = cvCreateImage(cvSize(w,h), src->depth, src->nChannels); cvResize(src, sized, CV_INTER_LINEAR); image im = ipl_to_image(sized); - reset_network_size(net, im.h, im.w, im.c); + resize_network(net, im.h, im.w, im.c); forward_network(net, im.data); image out = get_network_image_layer(net, 6); - //printf("%d %d\n%d %d\n", outh, out.h, outw, out.w); free_image(im); cvReleaseImage(&sized); return copy_image(out); } -void features_VOC(int part, int total) +void features_VOC_image_size(char *image_path, int h, int w) { - int i,j, count = 0; + int j; network net = parse_network_cfg("cfg/voc_imagenet.cfg"); - char *path_file = "images/VOC2012/all_paths.txt"; - char *out_dir = "voc_features/"; - list *paths = get_paths(path_file); - node *n = paths->front; - int size = paths->size; - for(count = 0; count < part*size/total; ++count) n = n->next; - while(n && count++ < (part+1)*size/total){ - char *path = (char *)n->val; - char buff[1024]; - sprintf(buff, "%s%s.txt",out_dir, path); - printf("%s\n", path); - FILE *fp = fopen(buff, "w"); - if(fp == 0) file_error(buff); - - IplImage* src = 0; - if( (src = cvLoadImage(path,-1)) == 0 ) - { - printf("Cannot load file image %s\n", path); - exit(0); - } - int w = src->width; - int h = src->height; - int sbin = 8; - int interval = 10; - double scale = pow(2., 1./interval); - int m = (wnext; + IplImage* src = 0; + if( (src = cvLoadImage(image_path,-1)) == 0 ) file_error(image_path); + image out = features_output_size(net, src, h, w); + for(j = 0; j < out.c*out.h*out.w; ++j){ + if(j != 0) printf(","); + printf("%g", out.data[j]); } + printf("\n"); + free_image(out); + cvReleaseImage(&src); } void features_VOC_image(char *image_file, char *image_dir, char *out_dir) @@ -531,9 +521,9 @@ void features_VOC_image(char *image_file, char *image_dir, char *out_dir) int i,j; network net = parse_network_cfg("cfg/voc_imagenet.cfg"); char image_path[1024]; - sprintf(image_path, "%s%s",image_dir, image_file); + sprintf(image_path, "%s/%s",image_dir, image_file); char out_path[1024]; - sprintf(out_path, "%s%s.txt",out_dir, image_file); + sprintf(out_path, "%s/%s.txt",out_dir, image_file); printf("%s\n", image_file); FILE *fp = fopen(out_path, "w"); if(fp == 0) file_error(out_path); @@ -543,10 +533,11 @@ void features_VOC_image(char *image_file, char *image_dir, char *out_dir) int w = src->width; int h = src->height; int sbin = 8; - int interval = 10; + int interval = 4; double scale = pow(2., 1./interval); int m = (w= interval"); image *ims = calloc(max_scale+interval, sizeof(image)); for(i = 0; i < interval; ++i){ @@ -642,10 +633,13 @@ int main(int argc, char *argv[]) //test_split(); //test_ensemble(); //test_nist(); + //test_cifar10(); + //test_vince(); //test_full(); //train_VOC(); - features_VOC_image(argv[1], argv[2], argv[3]); - printf("Success!\n"); + //features_VOC_image(argv[1], argv[2], argv[3]); + features_VOC_image_size(argv[1], atoi(argv[2]), atoi(argv[3])); + fprintf(stderr, "Success!\n"); //test_random_preprocess(); //test_random_classify(); //test_parser(); diff --git a/test.jpg b/test.jpg deleted file mode 100644 index f7b6cb8d..00000000 Binary files a/test.jpg and /dev/null differ diff --git a/test_color.png b/test_color.png deleted file mode 100644 index 1a1836e8..00000000 Binary files a/test_color.png and /dev/null differ diff --git a/test_dog.jpg b/test_dog.jpg deleted file mode 100644 index aa98311a..00000000 Binary files a/test_dog.jpg and /dev/null differ diff --git a/test_hinton.jpg b/test_hinton.jpg deleted file mode 100644 index 25b38210..00000000 Binary files a/test_hinton.jpg and /dev/null differ