Fixed typo weights_normalizion -> weights_normalization

pull/5550/head
AlexeyAB 5 years ago
parent ccafff912b
commit 424632f336
  1. 2
      include/darknet.h
  2. 40
      src/blas.c
  3. 8
      src/blas.h
  4. 60
      src/blas_kernels.cu
  5. 5
      src/demo.c
  6. 14
      src/network.c
  7. 12
      src/parser.c
  8. 14
      src/shortcut_layer.c
  9. 2
      src/shortcut_layer.h

@ -352,7 +352,7 @@ struct layer {
float **layers_output; float **layers_output;
float **layers_delta; float **layers_delta;
WEIGHTS_TYPE_T weights_type; WEIGHTS_TYPE_T weights_type;
WEIGHTS_NORMALIZATION_T weights_normalizion; WEIGHTS_NORMALIZATION_T weights_normalization;
int * map; int * map;
int * counts; int * counts;
float ** sums; float ** sums;

@ -74,7 +74,7 @@ static float relu(float src) {
return 0; return 0;
} }
void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, float **layers_output, float *out, float *in, float *weights, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion) void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, float **layers_output, float *out, float *in, float *weights, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
{ {
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w) // nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
@ -92,8 +92,8 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o
float sum = 1, max_val = -FLT_MAX; float sum = 1, max_val = -FLT_MAX;
int i; int i;
if (weights && weights_normalizion) { if (weights && weights_normalization) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) { if (weights_normalization == SOFTMAX_NORMALIZATION) {
for (i = 0; i < (n + 1); ++i) { for (i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
float w = weights[weights_index]; float w = weights[weights_index];
@ -105,15 +105,15 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o
for (i = 0; i < (n + 1); ++i) { for (i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float w = weights[weights_index]; const float w = weights[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) sum += relu(w); if (weights_normalization == RELU_NORMALIZATION) sum += relu(w);
else if (weights_normalizion == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val);
} }
} }
if (weights) { if (weights) {
float w = weights[src_i / step]; float w = weights[src_i / step];
if (weights_normalizion == RELU_NORMALIZATION) w = relu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
out[id] = in[id] * w; // [0 or c or (c, h ,w)] out[id] = in[id] * w; // [0 or c or (c, h ,w)]
} }
@ -131,8 +131,8 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o
if (weights) { if (weights) {
const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)]
float w = weights[weights_index]; float w = weights[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) w = relu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
out[out_index] += add[add_index] * w; // [0 or c or (c, h ,w)] out[out_index] += add[add_index] * w; // [0 or c or (c, h ,w)]
} }
@ -143,7 +143,7 @@ void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *o
} }
void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers,
float **layers_delta, float *delta_out, float *delta_in, float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalizion) float **layers_delta, float *delta_out, float *delta_in, float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalization)
{ {
// nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w) // nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w)
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
@ -160,8 +160,8 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int
float grad = 1, sum = 1, max_val = -FLT_MAX;; float grad = 1, sum = 1, max_val = -FLT_MAX;;
int i; int i;
if (weights && weights_normalizion) { if (weights && weights_normalization) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) { if (weights_normalization == SOFTMAX_NORMALIZATION) {
for (i = 0; i < (n + 1); ++i) { for (i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
float w = weights[weights_index]; float w = weights[weights_index];
@ -173,8 +173,8 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int
for (i = 0; i < (n + 1); ++i) { for (i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float w = weights[weights_index]; const float w = weights[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) sum += relu(w); if (weights_normalization == RELU_NORMALIZATION) sum += relu(w);
else if (weights_normalizion == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val);
} }
/* /*
@ -183,16 +183,16 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float delta_w = delta_in[id] * in[id]; const float delta_w = delta_in[id] * in[id];
const float w = weights[weights_index]; const float w = weights[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) grad += delta_w * relu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) grad += delta_w * relu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) grad += delta_w * expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) grad += delta_w * expf(w - max_val) / sum;
} }
*/ */
} }
if (weights) { if (weights) {
float w = weights[src_i / step]; float w = weights[src_i / step];
if (weights_normalizion == RELU_NORMALIZATION) w = relu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
delta_out[id] += delta_in[id] * w; // [0 or c or (c, h ,w)] delta_out[id] += delta_in[id] * w; // [0 or c or (c, h ,w)]
weight_updates[src_i / step] += delta_in[id] * in[id] * grad; weight_updates[src_i / step] += delta_in[id] * in[id] * grad;
@ -212,8 +212,8 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int
const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)]
float w = weights[weights_index]; float w = weights[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) w = relu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
layer_delta[add_index] += delta_in[id] * w; // [0 or c or (c, h ,w)] layer_delta[add_index] += delta_in[id] * w; // [0 or c or (c, h ,w)]
weight_updates[weights_index] += delta_in[id] * add[add_index] * grad; weight_updates[weights_index] += delta_in[id] * add[add_index] * grad;

@ -33,9 +33,9 @@ void fill_cpu(int N, float ALPHA, float * X, int INCX);
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
void test_gpu_blas(); void test_gpu_blas();
void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, float **layers_output, float *out, float *in, float *weights, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion); void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, float **layers_output, float *out, float *in, float *weights, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization);
void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers,
float **layers_delta, float *delta_out, float *delta_in, float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalizion); float **layers_delta, float *delta_out, float *delta_in, float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalization);
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); void mean_cpu(float *x, int batch, int filters, int spatial, float *mean);
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
@ -95,9 +95,9 @@ void inverse_variance_ongpu(int size, float *src, float *dst, float epsilon);
void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon); void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon);
void compare_2_arrays_gpu(float *one, float *two, int size); void compare_2_arrays_gpu(float *one, float *two, int size);
void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion); void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization);
void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_delta_gpu, float *delta_out, float *delta_in, void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_delta_gpu, float *delta_out, float *delta_in,
float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalizion); float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalization);
void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out);
void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
void mean_array_gpu(float *src, int size, float alpha, float *avg); void mean_array_gpu(float *src, int size, float alpha, float *avg);

@ -879,7 +879,7 @@ __device__ float grad_lrelu(float src) {
return (src > eps); return (src > eps);
} }
__global__ void shortcut_singlelayer_simple_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion) __global__ void shortcut_singlelayer_simple_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
{ {
const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= size) return; if (id >= size) return;
@ -901,7 +901,7 @@ __global__ void shortcut_singlelayer_simple_kernel(int size, int src_outputs, in
out[id] = out_val; out[id] = out_val;
} }
__global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion) __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
{ {
const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= size) return; if (id >= size) return;
@ -917,8 +917,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
int src_b = src_id; int src_b = src_id;
float sum = 1, max_val = -FLT_MAX; float sum = 1, max_val = -FLT_MAX;
if (weights_gpu && weights_normalizion) { if (weights_gpu && weights_normalization) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) { if (weights_normalization == SOFTMAX_NORMALIZATION) {
for (int i = 0; i < (n + 1); ++i) { for (int i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float w = weights_gpu[weights_index]; const float w = weights_gpu[weights_index];
@ -930,8 +930,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
for (int i = 0; i < (n + 1); ++i) { for (int i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float w = weights_gpu[weights_index]; const float w = weights_gpu[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) sum += lrelu(w); if (weights_normalization == RELU_NORMALIZATION) sum += lrelu(w);
else if (weights_normalizion == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val);
} }
} }
@ -939,8 +939,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
if (weights_gpu) { if (weights_gpu) {
float w = weights_gpu[src_i / step]; float w = weights_gpu[src_i / step];
if (weights_normalizion == RELU_NORMALIZATION) w = lrelu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
out_val = in[id] * w; // [0 or c or (c, h ,w)] out_val = in[id] * w; // [0 or c or (c, h ,w)]
} }
@ -957,8 +957,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
if (weights_gpu) { if (weights_gpu) {
const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)]
float w = weights_gpu[weights_index]; float w = weights_gpu[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) w = lrelu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
out_val += add[add_index] * w; // [0 or c or (c, h ,w)] out_val += add[add_index] * w; // [0 or c or (c, h ,w)]
} }
@ -968,22 +968,22 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
out[id] = out_val; out[id] = out_val;
} }
extern "C" void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion) extern "C" void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
{ {
//printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n); //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n);
int size = batch * src_outputs; int size = batch * src_outputs;
if (nweights == 0 && n == 1) { if (nweights == 0 && n == 1) {
shortcut_singlelayer_simple_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalizion); shortcut_singlelayer_simple_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization);
} }
else { else {
shortcut_multilayer_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalizion); shortcut_multilayer_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization);
} }
CHECK_CUDA(cudaPeekAtLastError()); CHECK_CUDA(cudaPeekAtLastError());
} }
__global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu,
float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalizion) float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization)
{ {
const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= size) return; if (id >= size) return;
@ -1000,8 +1000,8 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
float grad = 1, sum = 1, max_val = -FLT_MAX; float grad = 1, sum = 1, max_val = -FLT_MAX;
int i; int i;
if (weights_gpu && weights_normalizion) { if (weights_gpu && weights_normalization) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) { if (weights_normalization == SOFTMAX_NORMALIZATION) {
for (int i = 0; i < (n + 1); ++i) { for (int i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
float w = weights_gpu[weights_index]; float w = weights_gpu[weights_index];
@ -1013,19 +1013,19 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
for (i = 0; i < (n + 1); ++i) { for (i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float w = weights_gpu[weights_index]; const float w = weights_gpu[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) sum += lrelu(w); if (weights_normalization == RELU_NORMALIZATION) sum += lrelu(w);
else if (weights_normalizion == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val);
} }
} }
if (weights_gpu) { if (weights_gpu) {
float w = weights_gpu[src_i / step]; float w = weights_gpu[src_i / step];
if (weights_normalizion == RELU_NORMALIZATION) w = lrelu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
if (weights_normalizion == RELU_NORMALIZATION) grad = w; if (weights_normalization == RELU_NORMALIZATION) grad = w;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) grad = w*(1-w); else if (weights_normalization == SOFTMAX_NORMALIZATION) grad = w*(1-w);
delta_out[id] += delta_in[id] * w; // [0 or c or (c, h ,w)] delta_out[id] += delta_in[id] * w; // [0 or c or (c, h ,w)]
float weights_update_tmp = delta_in[id] * in[id] * grad;// / step; float weights_update_tmp = delta_in[id] * in[id] * grad;// / step;
@ -1061,11 +1061,11 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)]
float w = weights_gpu[weights_index]; float w = weights_gpu[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) w = lrelu(w) / sum; if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
if (weights_normalizion == RELU_NORMALIZATION) grad = w; if (weights_normalization == RELU_NORMALIZATION) grad = w;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) grad = w*(1 - w); else if (weights_normalization == SOFTMAX_NORMALIZATION) grad = w*(1 - w);
layer_delta[add_index] += delta_in[id] * w; layer_delta[add_index] += delta_in[id] * w;
float weights_update_tmp = delta_in[id] * add[add_index] * grad;// / step; float weights_update_tmp = delta_in[id] * add[add_index] * grad;// / step;
@ -1078,8 +1078,8 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
if (threadIdx.x % 32 == 0) { if (threadIdx.x % 32 == 0) {
if (!isnan(wu) && !isinf(wu)) if (!isnan(wu) && !isinf(wu))
atomicAdd(&weight_updates_gpu[weights_index], wu); atomicAdd(&weight_updates_gpu[weights_index], wu);
//if(weights_gpu[weights_index] != 1) printf(" wu = %f, weights_update_tmp = %f, w = %f, weights_gpu[weights_index] = %f, grad = %f, weights_normalizion = %d ", //if(weights_gpu[weights_index] != 1) printf(" wu = %f, weights_update_tmp = %f, w = %f, weights_gpu[weights_index] = %f, grad = %f, weights_normalization = %d ",
// wu, weights_update_tmp, w, weights_gpu[weights_index], grad, weights_normalizion); // wu, weights_update_tmp, w, weights_gpu[weights_index], grad, weights_normalization);
} }
} }
else { else {
@ -1094,7 +1094,7 @@ __global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, i
} }
extern "C" void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, extern "C" void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu,
float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalizion) float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization)
{ {
const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w)
int step = 0; int step = 0;
@ -1104,7 +1104,7 @@ extern "C" void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int
//printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n); //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n);
int size = batch * src_outputs; int size = batch * src_outputs;
backward_shortcut_multilayer_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu, backward_shortcut_multilayer_kernel << <cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >> > (size, src_outputs, batch, n, outputs_of_layers_gpu,
layers_delta_gpu, delta_out, delta_in, weights_gpu, weight_updates_gpu, nweights, in, layers_output_gpu, weights_normalizion); layers_delta_gpu, delta_out, delta_in, weights_gpu, weight_updates_gpu, nweights, in, layers_output_gpu, weights_normalization);
CHECK_CUDA(cudaPeekAtLastError()); CHECK_CUDA(cudaPeekAtLastError());
} }

@ -244,6 +244,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
double start_time = get_time_point(); double start_time = get_time_point();
float avg_fps = 0; float avg_fps = 0;
int frame_counter = 0; int frame_counter = 0;
int global_frame_counter = 0;
while(1){ while(1){
++count; ++count;
@ -291,7 +292,8 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
if(!prefix){ if(!prefix){
if (!dont_show) { if (!dont_show) {
show_image_mat(show_img, "Demo"); const int each_frame = max_val_cmp(1, avg_fps / 100);
if(global_frame_counter % each_frame == 0) show_image_mat(show_img, "Demo");
int c = wait_key_cv(1); int c = wait_key_cv(1);
if (c == 10) { if (c == 10) {
if (frame_skip == 0) frame_skip = 60; if (frame_skip == 0) frame_skip = 60;
@ -363,6 +365,7 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
float spent_time = (get_time_point() - start_time) / 1000000; float spent_time = (get_time_point() - start_time) / 1000000;
frame_counter++; frame_counter++;
global_frame_counter++;
if (spent_time >= 3.0f) { if (spent_time >= 3.0f) {
//printf(" spent_time = %f \n", spent_time); //printf(" spent_time = %f \n", spent_time);
avg_fps = frame_counter / spent_time; avg_fps = frame_counter / spent_time;

@ -1217,7 +1217,7 @@ void fuse_conv_batchnorm(network net)
#endif #endif
} }
} }
else if (l->type == SHORTCUT && l->weights && l->weights_normalizion) else if (l->type == SHORTCUT && l->weights && l->weights_normalization)
{ {
if (l->nweights > 0) { if (l->nweights > 0) {
//cuda_pull_array(l.weights_gpu, l.weights, l.nweights); //cuda_pull_array(l.weights_gpu, l.weights, l.nweights);
@ -1234,7 +1234,7 @@ void fuse_conv_batchnorm(network net)
{ {
float sum = 1, max_val = -FLT_MAX; float sum = 1, max_val = -FLT_MAX;
if (l->weights_normalizion == SOFTMAX_NORMALIZATION) { if (l->weights_normalization == SOFTMAX_NORMALIZATION) {
for (i = 0; i < (l->n + 1); ++i) { for (i = 0; i < (l->n + 1); ++i) {
int w_index = chan + i * layer_step; int w_index = chan + i * layer_step;
float w = l->weights[w_index]; float w = l->weights[w_index];
@ -1248,20 +1248,20 @@ void fuse_conv_batchnorm(network net)
for (i = 0; i < (l->n + 1); ++i) { for (i = 0; i < (l->n + 1); ++i) {
int w_index = chan + i * layer_step; int w_index = chan + i * layer_step;
float w = l->weights[w_index]; float w = l->weights[w_index];
if (l->weights_normalizion == RELU_NORMALIZATION) sum += lrelu(w); if (l->weights_normalization == RELU_NORMALIZATION) sum += lrelu(w);
else if (l->weights_normalizion == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); else if (l->weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val);
} }
for (i = 0; i < (l->n + 1); ++i) { for (i = 0; i < (l->n + 1); ++i) {
int w_index = chan + i * layer_step; int w_index = chan + i * layer_step;
float w = l->weights[w_index]; float w = l->weights[w_index];
if (l->weights_normalizion == RELU_NORMALIZATION) w = lrelu(w) / sum; if (l->weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum;
else if (l->weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; else if (l->weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
l->weights[w_index] = w; l->weights[w_index] = w;
} }
} }
l->weights_normalizion = NO_NORMALIZATION; l->weights_normalization = NO_NORMALIZATION;
#ifdef GPU #ifdef GPU
if (gpu_index >= 0) { if (gpu_index >= 0) {

@ -852,12 +852,12 @@ layer parse_shortcut(list *options, size_params params, network net)
exit(0); exit(0);
} }
char *weights_normalizion_str = option_find_str_quiet(options, "weights_normalizion", "none"); char *weights_normalization_str = option_find_str_quiet(options, "weights_normalization", "none");
WEIGHTS_NORMALIZATION_T weights_normalizion = NO_NORMALIZATION; WEIGHTS_NORMALIZATION_T weights_normalization = NO_NORMALIZATION;
if (strcmp(weights_normalizion_str, "relu") == 0 || strcmp(weights_normalizion_str, "avg_relu") == 0) weights_normalizion = RELU_NORMALIZATION; if (strcmp(weights_normalization_str, "relu") == 0 || strcmp(weights_normalization_str, "avg_relu") == 0) weights_normalization = RELU_NORMALIZATION;
else if (strcmp(weights_normalizion_str, "softmax") == 0) weights_normalizion = SOFTMAX_NORMALIZATION; else if (strcmp(weights_normalization_str, "softmax") == 0) weights_normalization = SOFTMAX_NORMALIZATION;
else if (strcmp(weights_type_str, "none") != 0) { else if (strcmp(weights_type_str, "none") != 0) {
printf("Error: Incorrect weights_normalizion = %s \n Use one of: none, relu, softmax \n", weights_normalizion_str); printf("Error: Incorrect weights_normalization = %s \n Use one of: none, relu, softmax \n", weights_normalization_str);
getchar(); getchar();
exit(0); exit(0);
} }
@ -896,7 +896,7 @@ layer parse_shortcut(list *options, size_params params, network net)
#endif// GPU #endif// GPU
layer s = make_shortcut_layer(params.batch, n, layers, sizes, params.w, params.h, params.c, layers_output, layers_delta, layer s = make_shortcut_layer(params.batch, n, layers, sizes, params.w, params.h, params.c, layers_output, layers_delta,
layers_output_gpu, layers_delta_gpu, weights_type, weights_normalizion, activation, params.train); layers_output_gpu, layers_delta_gpu, weights_type, weights_normalization, activation, params.train);
free(layers_output_gpu); free(layers_output_gpu);
free(layers_delta_gpu); free(layers_delta_gpu);

@ -8,7 +8,7 @@
#include <assert.h> #include <assert.h>
layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes, int w, int h, int c, layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes, int w, int h, int c,
float **layers_output, float **layers_delta, float **layers_output_gpu, float **layers_delta_gpu, WEIGHTS_TYPE_T weights_type, WEIGHTS_NORMALIZATION_T weights_normalizion, float **layers_output, float **layers_delta, float **layers_output_gpu, float **layers_delta_gpu, WEIGHTS_TYPE_T weights_type, WEIGHTS_NORMALIZATION_T weights_normalization,
ACTIVATION activation, int train) ACTIVATION activation, int train)
{ {
fprintf(stderr, "Shortcut Layer: "); fprintf(stderr, "Shortcut Layer: ");
@ -26,7 +26,7 @@ layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes,
l.layers_output = layers_output; l.layers_output = layers_output;
l.layers_delta = layers_delta; l.layers_delta = layers_delta;
l.weights_type = weights_type; l.weights_type = weights_type;
l.weights_normalizion = weights_normalizion; l.weights_normalization = weights_normalization;
l.learning_rate_scale = 1; // not necessary l.learning_rate_scale = 1; // not necessary
//l.w = w2; //l.w = w2;
@ -87,7 +87,7 @@ layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes,
l.bflops = l.out_w * l.out_h * l.out_c * l.n / 1000000000.; l.bflops = l.out_w * l.out_h * l.out_c * l.n / 1000000000.;
if (l.weights_type) l.bflops *= 2; if (l.weights_type) l.bflops *= 2;
fprintf(stderr, " wt = %d, wn = %d, outputs:%4d x%4d x%4d %5.3f BF\n", l.weights_type, l.weights_normalizion, l.out_w, l.out_h, l.out_c, l.bflops); fprintf(stderr, " wt = %d, wn = %d, outputs:%4d x%4d x%4d %5.3f BF\n", l.weights_type, l.weights_normalization, l.out_w, l.out_h, l.out_c, l.bflops);
return l; return l;
} }
@ -161,7 +161,7 @@ void forward_shortcut_layer(const layer l, network_state state)
l.output[i] = state.input[i] + state.net.layers[l.index].output[i]; l.output[i] = state.input[i] + state.net.layers[l.index].output[i];
} }
else { else {
shortcut_multilayer_cpu(l.outputs * l.batch, l.outputs, l.batch, l.n, l.input_sizes, l.layers_output, l.output, state.input, l.weights, l.nweights, l.weights_normalizion); shortcut_multilayer_cpu(l.outputs * l.batch, l.outputs, l.batch, l.n, l.input_sizes, l.layers_output, l.output, state.input, l.weights, l.nweights, l.weights_normalization);
} }
//copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); //copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
@ -180,7 +180,7 @@ void backward_shortcut_layer(const layer l, network_state state)
else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
backward_shortcut_multilayer_cpu(l.outputs * l.batch, l.outputs, l.batch, l.n, l.input_sizes, backward_shortcut_multilayer_cpu(l.outputs * l.batch, l.outputs, l.batch, l.n, l.input_sizes,
l.layers_delta, state.delta, l.delta, l.weights, l.weight_updates, l.nweights, state.input, l.layers_output, l.weights_normalizion); l.layers_delta, state.delta, l.delta, l.weights, l.weight_updates, l.nweights, state.input, l.layers_output, l.weights_normalization);
//axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1); //axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1);
//shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); //shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta);
@ -218,7 +218,7 @@ void forward_shortcut_layer_gpu(const layer l, network_state state)
//} //}
//else //else
{ {
shortcut_multilayer_gpu(l.outputs, l.batch, l.n, l.input_sizes_gpu, l.layers_output_gpu, l.output_gpu, state.input, l.weights_gpu, l.nweights, l.weights_normalizion); shortcut_multilayer_gpu(l.outputs, l.batch, l.n, l.input_sizes_gpu, l.layers_output_gpu, l.output_gpu, state.input, l.weights_gpu, l.nweights, l.weights_normalization);
} }
if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu);
@ -234,7 +234,7 @@ void backward_shortcut_layer_gpu(const layer l, network_state state)
else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
backward_shortcut_multilayer_gpu(l.outputs, l.batch, l.n, l.input_sizes_gpu, l.layers_delta_gpu, state.delta, l.delta_gpu, backward_shortcut_multilayer_gpu(l.outputs, l.batch, l.n, l.input_sizes_gpu, l.layers_delta_gpu, state.delta, l.delta_gpu,
l.weights_gpu, l.weight_updates_gpu, l.nweights, state.input, l.layers_output_gpu, l.weights_normalizion); l.weights_gpu, l.weight_updates_gpu, l.nweights, state.input, l.layers_output_gpu, l.weights_normalization);
//axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1); //axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1);
//shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu); //shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu);

@ -8,7 +8,7 @@
extern "C" { extern "C" {
#endif #endif
layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes, int w, int h, int c, layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes, int w, int h, int c,
float **layers_output, float **layers_delta, float **layers_output_gpu, float **layers_delta_gpu, WEIGHTS_TYPE_T weights_type, WEIGHTS_NORMALIZATION_T weights_normalizion, float **layers_output, float **layers_delta, float **layers_output_gpu, float **layers_delta_gpu, WEIGHTS_TYPE_T weights_type, WEIGHTS_NORMALIZATION_T weights_normalization,
ACTIVATION activation, int train); ACTIVATION activation, int train);
void forward_shortcut_layer(const layer l, network_state state); void forward_shortcut_layer(const layer l, network_state state);
void backward_shortcut_layer(const layer l, network_state state); void backward_shortcut_layer(const layer l, network_state state);

Loading…
Cancel
Save