mirror of https://github.com/AlexeyAB/darknet.git
parent
1578ec70d7
commit
913d355ec1
35 changed files with 913 additions and 86 deletions
@ -1 +0,0 @@ |
||||
|
@ -0,0 +1,147 @@ |
||||
#include "network.h" |
||||
#include "cost_layer.h" |
||||
#include "utils.h" |
||||
#include "parser.h" |
||||
|
||||
#ifdef OPENCV |
||||
#include "opencv2/highgui/highgui_c.h" |
||||
#endif |
||||
|
||||
typedef struct { |
||||
float *x; |
||||
float *y; |
||||
} float_pair; |
||||
|
||||
float_pair get_rnn_data(char *text, int len, int batch, int steps) |
||||
{ |
||||
float *x = calloc(batch * steps * 256, sizeof(float)); |
||||
float *y = calloc(batch * steps * 256, sizeof(float)); |
||||
int i,j; |
||||
for(i = 0; i < batch; ++i){ |
||||
int index = rand() %(len - steps - 1); |
||||
for(j = 0; j < steps; ++j){ |
||||
x[(j*batch + i)*256 + text[index + j]] = 1; |
||||
y[(j*batch + i)*256 + text[index + j + 1]] = 1; |
||||
} |
||||
} |
||||
float_pair p; |
||||
p.x = x; |
||||
p.y = y; |
||||
return p; |
||||
} |
||||
|
||||
void train_char_rnn(char *cfgfile, char *weightfile, char *filename) |
||||
{ |
||||
FILE *fp = fopen(filename, "r"); |
||||
//FILE *fp = fopen("data/ab.txt", "r");
|
||||
//FILE *fp = fopen("data/grrm/asoiaf.txt", "r");
|
||||
|
||||
fseek(fp, 0, SEEK_END);
|
||||
size_t size = ftell(fp); |
||||
fseek(fp, 0, SEEK_SET);
|
||||
|
||||
char *text = calloc(size, sizeof(char)); |
||||
fread(text, 1, size, fp); |
||||
fclose(fp); |
||||
|
||||
char *backup_directory = "/home/pjreddie/backup/"; |
||||
srand(time(0)); |
||||
data_seed = time(0); |
||||
char *base = basecfg(cfgfile); |
||||
printf("%s\n", base); |
||||
float avg_loss = -1; |
||||
network net = parse_network_cfg(cfgfile); |
||||
if(weightfile){ |
||||
load_weights(&net, weightfile); |
||||
} |
||||
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
||||
int batch = net.batch; |
||||
int steps = net.time_steps; |
||||
int i = (*net.seen)/net.batch; |
||||
|
||||
clock_t time; |
||||
while(get_current_batch(net) < net.max_batches){ |
||||
i += 1; |
||||
time=clock(); |
||||
float_pair p = get_rnn_data(text, size, batch/steps, steps); |
||||
|
||||
float loss = train_network_datum(net, p.x, p.y) / (batch); |
||||
free(p.x); |
||||
free(p.y); |
||||
if (avg_loss < 0) avg_loss = loss; |
||||
avg_loss = avg_loss*.9 + loss*.1; |
||||
|
||||
printf("%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); |
||||
if(i%100==0){ |
||||
char buff[256]; |
||||
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); |
||||
save_weights(net, buff); |
||||
} |
||||
if(i%10==0){ |
||||
char buff[256]; |
||||
sprintf(buff, "%s/%s.backup", backup_directory, base); |
||||
save_weights(net, buff); |
||||
} |
||||
} |
||||
char buff[256]; |
||||
sprintf(buff, "%s/%s_final.weights", backup_directory, base); |
||||
save_weights(net, buff); |
||||
} |
||||
|
||||
void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed) |
||||
{ |
||||
srand(rseed); |
||||
char *base = basecfg(cfgfile); |
||||
printf("%s\n", base); |
||||
|
||||
network net = parse_network_cfg(cfgfile); |
||||
if(weightfile){ |
||||
load_weights(&net, weightfile); |
||||
} |
||||
|
||||
int i, j; |
||||
for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp; |
||||
char c; |
||||
int len = strlen(seed); |
||||
float *input = calloc(256, sizeof(float)); |
||||
for(i = 0; i < len-1; ++i){ |
||||
c = seed[i]; |
||||
input[(int)c] = 1; |
||||
network_predict(net, input); |
||||
input[(int)c] = 0; |
||||
printf("%c", c); |
||||
} |
||||
c = seed[len-1]; |
||||
for(i = 0; i < num; ++i){ |
||||
printf("%c", c); |
||||
float r = rand_uniform(0,1); |
||||
float sum = 0; |
||||
input[(int)c] = 1; |
||||
float *out = network_predict(net, input); |
||||
input[(int)c] = 0; |
||||
for(j = 0; j < 256; ++j){ |
||||
sum += out[j]; |
||||
if(sum > r) break; |
||||
} |
||||
c = j; |
||||
} |
||||
printf("\n"); |
||||
} |
||||
|
||||
void run_char_rnn(int argc, char **argv) |
||||
{ |
||||
if(argc < 4){ |
||||
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); |
||||
return; |
||||
} |
||||
char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); |
||||
char *seed = find_char_arg(argc, argv, "-seed", "\n"); |
||||
int len = find_int_arg(argc, argv, "-len", 100); |
||||
float temp = find_float_arg(argc, argv, "-temp", 1); |
||||
int rseed = find_int_arg(argc, argv, "-srand", time(0)); |
||||
|
||||
char *cfg = argv[3]; |
||||
char *weights = (argc > 4) ? argv[4] : 0; |
||||
if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename); |
||||
else if(0==strcmp(argv[2], "test")) test_char_rnn(cfg, weights, len, seed, temp, rseed); |
||||
} |
@ -0,0 +1,275 @@ |
||||
#include "rnn_layer.h" |
||||
#include "connected_layer.h" |
||||
#include "utils.h" |
||||
#include "cuda.h" |
||||
#include "blas.h" |
||||
#include "gemm.h" |
||||
|
||||
#include <math.h> |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <string.h> |
||||
|
||||
|
||||
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize) |
||||
{ |
||||
printf("%d %d\n", batch, steps); |
||||
batch = batch / steps; |
||||
layer l = {0}; |
||||
l.batch = batch; |
||||
l.type = RNN; |
||||
l.steps = steps; |
||||
l.hidden = hidden; |
||||
l.inputs = inputs; |
||||
|
||||
l.state = calloc(batch*hidden, sizeof(float)); |
||||
|
||||
l.input_layer = malloc(sizeof(layer)); |
||||
*(l.input_layer) = make_connected_layer(batch*steps, inputs, hidden, activation, batch_normalize); |
||||
l.input_layer->batch = batch; |
||||
|
||||
l.self_layer = malloc(sizeof(layer)); |
||||
*(l.self_layer) = make_connected_layer(batch*steps, hidden, hidden, activation, batch_normalize); |
||||
l.self_layer->batch = batch; |
||||
|
||||
l.output_layer = malloc(sizeof(layer)); |
||||
*(l.output_layer) = make_connected_layer(batch*steps, hidden, outputs, activation, batch_normalize); |
||||
l.output_layer->batch = batch; |
||||
|
||||
l.outputs = outputs; |
||||
l.output = l.output_layer->output; |
||||
l.delta = l.output_layer->delta; |
||||
|
||||
#ifdef GPU |
||||
l.state_gpu = cuda_make_array(l.state, batch*hidden); |
||||
l.output_gpu = l.output_layer->output_gpu; |
||||
l.delta_gpu = l.output_layer->delta_gpu; |
||||
#endif |
||||
|
||||
fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); |
||||
return l; |
||||
} |
||||
|
||||
void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay) |
||||
{ |
||||
update_connected_layer(*(l.input_layer), batch, learning_rate, momentum, decay); |
||||
update_connected_layer(*(l.self_layer), batch, learning_rate, momentum, decay); |
||||
update_connected_layer(*(l.output_layer), batch, learning_rate, momentum, decay); |
||||
} |
||||
|
||||
void forward_rnn_layer(layer l, network_state state) |
||||
{ |
||||
network_state s = {0}; |
||||
s.train = state.train; |
||||
int i; |
||||
layer input_layer = *(l.input_layer); |
||||
layer self_layer = *(l.self_layer); |
||||
layer output_layer = *(l.output_layer); |
||||
|
||||
fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); |
||||
fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); |
||||
fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); |
||||
if(state.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); |
||||
|
||||
for (i = 0; i < l.steps; ++i) { |
||||
s.input = state.input; |
||||
forward_connected_layer(input_layer, s); |
||||
|
||||
s.input = l.state; |
||||
forward_connected_layer(self_layer, s); |
||||
|
||||
copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); |
||||
axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); |
||||
|
||||
s.input = l.state; |
||||
forward_connected_layer(output_layer, s); |
||||
|
||||
state.input += l.inputs*l.batch; |
||||
input_layer.output += l.hidden*l.batch; |
||||
self_layer.output += l.hidden*l.batch; |
||||
output_layer.output += l.outputs*l.batch; |
||||
} |
||||
} |
||||
|
||||
void backward_rnn_layer(layer l, network_state state) |
||||
{ |
||||
network_state s = {0}; |
||||
s.train = state.train; |
||||
int i; |
||||
layer input_layer = *(l.input_layer); |
||||
layer self_layer = *(l.self_layer); |
||||
layer output_layer = *(l.output_layer); |
||||
input_layer.output += l.hidden*l.batch*(l.steps-1); |
||||
input_layer.delta += l.hidden*l.batch*(l.steps-1); |
||||
|
||||
self_layer.output += l.hidden*l.batch*(l.steps-1); |
||||
self_layer.delta += l.hidden*l.batch*(l.steps-1); |
||||
|
||||
output_layer.output += l.outputs*l.batch*(l.steps-1); |
||||
output_layer.delta += l.outputs*l.batch*(l.steps-1); |
||||
for (i = l.steps-1; i >= 0; --i) { |
||||
copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); |
||||
axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); |
||||
|
||||
s.input = l.state; |
||||
s.delta = self_layer.delta; |
||||
backward_connected_layer(output_layer, s); |
||||
|
||||
if(i > 0){ |
||||
copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); |
||||
axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); |
||||
}else{ |
||||
fill_cpu(l.hidden * l.batch, 0, l.state, 1); |
||||
} |
||||
|
||||
s.input = l.state; |
||||
s.delta = self_layer.delta - l.hidden*l.batch; |
||||
if (i == 0) s.delta = 0; |
||||
backward_connected_layer(self_layer, s); |
||||
|
||||
copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); |
||||
s.input = state.input + i*l.inputs*l.batch; |
||||
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; |
||||
else s.delta = 0; |
||||
backward_connected_layer(input_layer, s); |
||||
|
||||
input_layer.output -= l.hidden*l.batch; |
||||
input_layer.delta -= l.hidden*l.batch; |
||||
|
||||
self_layer.output -= l.hidden*l.batch; |
||||
self_layer.delta -= l.hidden*l.batch; |
||||
|
||||
output_layer.output -= l.outputs*l.batch; |
||||
output_layer.delta -= l.outputs*l.batch; |
||||
} |
||||
} |
||||
|
||||
#ifdef GPU |
||||
|
||||
void pull_rnn_layer(layer l) |
||||
{ |
||||
pull_connected_layer(*(l.input_layer)); |
||||
pull_connected_layer(*(l.self_layer)); |
||||
pull_connected_layer(*(l.output_layer)); |
||||
} |
||||
|
||||
void push_rnn_layer(layer l) |
||||
{ |
||||
push_connected_layer(*(l.input_layer)); |
||||
push_connected_layer(*(l.self_layer)); |
||||
push_connected_layer(*(l.output_layer)); |
||||
} |
||||
|
||||
void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay) |
||||
{ |
||||
update_connected_layer_gpu(*(l.input_layer), batch, learning_rate, momentum, decay); |
||||
update_connected_layer_gpu(*(l.self_layer), batch, learning_rate, momentum, decay); |
||||
update_connected_layer_gpu(*(l.output_layer), batch, learning_rate, momentum, decay); |
||||
} |
||||
|
||||
void forward_rnn_layer_gpu(layer l, network_state state) |
||||
{ |
||||
network_state s = {0}; |
||||
s.train = state.train; |
||||
int i; |
||||
layer input_layer = *(l.input_layer); |
||||
layer self_layer = *(l.self_layer); |
||||
layer output_layer = *(l.output_layer); |
||||
|
||||
fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); |
||||
fill_ongpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); |
||||
fill_ongpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); |
||||
if(state.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1); |
||||
|
||||
for (i = 0; i < l.steps; ++i) { |
||||
s.input = state.input; |
||||
forward_connected_layer_gpu(input_layer, s); |
||||
|
||||
s.input = l.state_gpu; |
||||
forward_connected_layer_gpu(self_layer, s); |
||||
|
||||
copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); |
||||
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); |
||||
|
||||
forward_connected_layer_gpu(output_layer, s); |
||||
|
||||
state.input += l.inputs*l.batch; |
||||
input_layer.output_gpu += l.hidden*l.batch; |
||||
input_layer.x_gpu += l.hidden*l.batch; |
||||
input_layer.x_norm_gpu += l.hidden*l.batch; |
||||
|
||||
self_layer.output_gpu += l.hidden*l.batch; |
||||
self_layer.x_gpu += l.hidden*l.batch; |
||||
self_layer.x_norm_gpu += l.hidden*l.batch; |
||||
|
||||
output_layer.output_gpu += l.outputs*l.batch; |
||||
output_layer.x_gpu += l.outputs*l.batch; |
||||
output_layer.x_norm_gpu += l.outputs*l.batch; |
||||
} |
||||
} |
||||
|
||||
void backward_rnn_layer_gpu(layer l, network_state state) |
||||
{ |
||||
network_state s = {0}; |
||||
s.train = state.train; |
||||
int i; |
||||
layer input_layer = *(l.input_layer); |
||||
layer self_layer = *(l.self_layer); |
||||
layer output_layer = *(l.output_layer); |
||||
input_layer.output_gpu += l.hidden*l.batch*(l.steps-1); |
||||
input_layer.delta_gpu += l.hidden*l.batch*(l.steps-1); |
||||
input_layer.x_gpu += l.hidden*l.batch*(l.steps-1); |
||||
input_layer.x_norm_gpu += l.hidden*l.batch*(l.steps-1); |
||||
|
||||
self_layer.output_gpu += l.hidden*l.batch*(l.steps-1); |
||||
self_layer.delta_gpu += l.hidden*l.batch*(l.steps-1); |
||||
self_layer.x_gpu += l.hidden*l.batch*(l.steps-1); |
||||
self_layer.x_norm_gpu += l.hidden*l.batch*(l.steps-1); |
||||
|
||||
output_layer.output_gpu += l.outputs*l.batch*(l.steps-1); |
||||
output_layer.delta_gpu += l.outputs*l.batch*(l.steps-1); |
||||
output_layer.x_gpu += l.outputs*l.batch*(l.steps-1); |
||||
output_layer.x_norm_gpu += l.outputs*l.batch*(l.steps-1); |
||||
for (i = l.steps-1; i >= 0; --i) { |
||||
copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); |
||||
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); |
||||
|
||||
s.input = l.state_gpu; |
||||
s.delta = self_layer.delta_gpu; |
||||
backward_connected_layer_gpu(output_layer, s); |
||||
|
||||
if(i > 0){ |
||||
copy_ongpu(l.hidden * l.batch, input_layer.output_gpu - l.hidden*l.batch, 1, l.state_gpu, 1); |
||||
axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu - l.hidden*l.batch, 1, l.state_gpu, 1); |
||||
}else{ |
||||
fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1); |
||||
} |
||||
|
||||
s.input = l.state_gpu; |
||||
s.delta = self_layer.delta_gpu - l.hidden*l.batch; |
||||
if (i == 0) s.delta = 0; |
||||
backward_connected_layer_gpu(self_layer, s); |
||||
|
||||
copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); |
||||
s.input = state.input + i*l.inputs*l.batch; |
||||
if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; |
||||
else s.delta = 0; |
||||
backward_connected_layer_gpu(input_layer, s); |
||||
|
||||
input_layer.output_gpu -= l.hidden*l.batch; |
||||
input_layer.delta_gpu -= l.hidden*l.batch; |
||||
input_layer.x_gpu -= l.hidden*l.batch; |
||||
input_layer.x_norm_gpu -= l.hidden*l.batch; |
||||
|
||||
self_layer.output_gpu -= l.hidden*l.batch; |
||||
self_layer.delta_gpu -= l.hidden*l.batch; |
||||
self_layer.x_gpu -= l.hidden*l.batch; |
||||
self_layer.x_norm_gpu -= l.hidden*l.batch; |
||||
|
||||
output_layer.output_gpu -= l.outputs*l.batch; |
||||
output_layer.delta_gpu -= l.outputs*l.batch; |
||||
output_layer.x_gpu -= l.outputs*l.batch; |
||||
output_layer.x_norm_gpu -= l.outputs*l.batch; |
||||
} |
||||
} |
||||
#endif |
@ -0,0 +1,24 @@ |
||||
|
||||
#ifndef RNN_LAYER_H |
||||
#define RNN_LAYER_H |
||||
|
||||
#include "activations.h" |
||||
#include "layer.h" |
||||
#include "network.h" |
||||
|
||||
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize); |
||||
|
||||
void forward_rnn_layer(layer l, network_state state); |
||||
void backward_rnn_layer(layer l, network_state state); |
||||
void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay); |
||||
|
||||
#ifdef GPU |
||||
void forward_rnn_layer_gpu(layer l, network_state state); |
||||
void backward_rnn_layer_gpu(layer l, network_state state); |
||||
void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); |
||||
void push_rnn_layer(layer l); |
||||
void pull_rnn_layer(layer l); |
||||
#endif |
||||
|
||||
#endif |
||||
|
Loading…
Reference in new issue