Added maxpool_depth= and out_channels= params to [maxpool]

pull/5011/head
AlexeyAB 6 years ago
parent 7d4143332d
commit d4402d29c2
  1. 2
      include/darknet.h
  2. 53
      src/maxpool_layer.c
  3. 2
      src/maxpool_layer.h
  4. 71
      src/maxpool_layer_kernels.cu
  5. 4
      src/parser.c

@ -208,6 +208,8 @@ struct layer {
int side; int side;
int stride; int stride;
int dilation; int dilation;
int maxpool_depth;
int out_channels;
int reverse; int reverse;
int flatten; int flatten;
int spatial; int spatial;

@ -45,7 +45,7 @@ void cudnn_maxpool_setup(layer *l)
} }
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels)
{ {
maxpool_layer l = { (LAYER_TYPE)0 }; maxpool_layer l = { (LAYER_TYPE)0 };
l.type = MAXPOOL; l.type = MAXPOOL;
@ -54,9 +54,18 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s
l.w = w; l.w = w;
l.c = c; l.c = c;
l.pad = padding; l.pad = padding;
l.out_w = (w + padding - size) / stride + 1; l.maxpool_depth = maxpool_depth;
l.out_h = (h + padding - size) / stride + 1; l.out_channels = out_channels;
l.out_c = c; if (maxpool_depth) {
l.out_c = out_channels;
l.out_w = l.w;
l.out_h = l.h;
}
else {
l.out_w = (w + padding - size) / stride + 1;
l.out_h = (h + padding - size) / stride + 1;
l.out_c = c;
}
l.outputs = l.out_h * l.out_w * l.out_c; l.outputs = l.out_h * l.out_w * l.out_c;
l.inputs = h*w*c; l.inputs = h*w*c;
l.size = size; l.size = size;
@ -90,7 +99,7 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
l->out_w = (w + l->pad - l->size) / l->stride + 1; l->out_w = (w + l->pad - l->size) / l->stride + 1;
l->out_h = (h + l->pad - l->size) / l->stride + 1; l->out_h = (h + l->pad - l->size) / l->stride + 1;
l->outputs = l->out_w * l->out_h * l->c; l->outputs = l->out_w * l->out_h * l->out_c;
int output_size = l->outputs * l->batch; int output_size = l->outputs * l->batch;
l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int)); l->indexes = (int*)realloc(l->indexes, output_size * sizeof(int));
@ -111,6 +120,37 @@ void resize_maxpool_layer(maxpool_layer *l, int w, int h)
void forward_maxpool_layer(const maxpool_layer l, network_state state) void forward_maxpool_layer(const maxpool_layer l, network_state state)
{ {
if (l.maxpool_depth)
{
int b, i, j, k, g;
for (b = 0; b < l.batch; ++b) {
#pragma omp parallel for
for (i = 0; i < l.h; ++i) {
for (j = 0; j < l.w; ++j) {
for (g = 0; g < l.out_c; ++g)
{
int out_index = j + l.w*(i + l.h*(g + l.out_c*b));
float max = -FLT_MAX;
int max_i = -1;
for (k = g; k < l.c; k += l.out_c)
{
int in_index = j + l.w*(i + l.h*(k + l.c*b));
float val = state.input[in_index];
max_i = (val > max) ? in_index : max_i;
max = (val > max) ? val : max;
}
l.output[out_index] = max;
l.indexes[out_index] = max_i;
}
}
}
}
return;
}
if (!state.train) { if (!state.train) {
forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch); forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch);
return; return;
@ -156,7 +196,8 @@ void backward_maxpool_layer(const maxpool_layer l, network_state state)
int i; int i;
int h = l.out_h; int h = l.out_h;
int w = l.out_w; int w = l.out_w;
int c = l.c; int c = l.out_c;
#pragma omp parallel for
for(i = 0; i < h*w*c*l.batch; ++i){ for(i = 0; i < h*w*c*l.batch; ++i){
int index = l.indexes[i]; int index = l.indexes[i];
state.delta[index] += l.delta[i]; state.delta[index] += l.delta[i];

@ -12,7 +12,7 @@ typedef layer maxpool_layer;
extern "C" { extern "C" {
#endif #endif
image get_maxpool_image(maxpool_layer l); image get_maxpool_image(maxpool_layer l);
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding, int maxpool_depth, int out_channels);
void resize_maxpool_layer(maxpool_layer *l, int w, int h); void resize_maxpool_layer(maxpool_layer *l, int w, int h);
void forward_maxpool_layer(const maxpool_layer l, network_state state); void forward_maxpool_layer(const maxpool_layer l, network_state state);
void backward_maxpool_layer(const maxpool_layer l, network_state state); void backward_maxpool_layer(const maxpool_layer l, network_state state);

@ -5,6 +5,50 @@
#include "maxpool_layer.h" #include "maxpool_layer.h"
#include "dark_cuda.h" #include "dark_cuda.h"
__global__ void forward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int out_c, int batch, float *input, float *output, int *indexes)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= n) return;
int j = id % w;
id = id / w;
int i = id % h;
id = id / h;
//int g = id % out_c;
//id = id / out_c;
int b = id % batch;
int k;
for (int g = 0; g < out_c; ++g)
{
int out_index = j + w*(i + h*(g + out_c*b));
float max = -FLT_MAX;
int max_i = -1;
for (k = g; k < c; k += out_c)
{
int in_index = j + w*(i + h*(k + c*b));
float val = input[in_index];
max_i = (val > max) ? in_index : max_i;
max = (val > max) ? val : max;
}
output[out_index] = max;
indexes[out_index] = max_i;
}
}
__global__ void backward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int batch, float *delta, float *prev_delta, int *indexes)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= n) return;
int index = indexes[id];
prev_delta[index] += delta[id];
}
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
{ {
int h = (in_h + pad - size) / stride + 1; int h = (in_h + pad - size) / stride + 1;
@ -84,6 +128,19 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_
extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
{ {
if (layer.maxpool_depth) {
int h = layer.out_h;
int w = layer.out_w;
int c = 1;// layer.out_c;
size_t n = h*w*c*layer.batch;
forward_maxpool_depth_layer_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(
n, layer.w, layer.h, layer.c, layer.out_c, layer.batch, state.input, layer.output_gpu, layer.indexes_gpu);
CHECK_CUDA(cudaPeekAtLastError());
return;
}
#ifdef CUDNN_DISABLED #ifdef CUDNN_DISABLED
if (!state.train && layer.stride == layer.size) { if (!state.train && layer.stride == layer.size) {
@ -111,7 +168,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta
int h = layer.out_h; int h = layer.out_h;
int w = layer.out_w; int w = layer.out_w;
int c = layer.c; int c = layer.out_c;
size_t n = h*w*c*layer.batch; size_t n = h*w*c*layer.batch;
@ -121,6 +178,18 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta
extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
{ {
if (layer.maxpool_depth) {
int h = layer.out_h;
int w = layer.out_w;
int c = layer.out_c;
size_t n = h * w * c * layer.batch;
backward_maxpool_depth_layer_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(n, layer.w, layer.h, layer.c, layer.batch, layer.delta_gpu, state.delta, layer.indexes_gpu);
CHECK_CUDA(cudaPeekAtLastError());
return;
}
size_t n = layer.h*layer.w*layer.c*layer.batch; size_t n = layer.h*layer.w*layer.c*layer.batch;
backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);

@ -534,6 +534,8 @@ maxpool_layer parse_maxpool(list *options, size_params params)
int stride = option_find_int(options, "stride",1); int stride = option_find_int(options, "stride",1);
int size = option_find_int(options, "size",stride); int size = option_find_int(options, "size",stride);
int padding = option_find_int_quiet(options, "padding", size-1); int padding = option_find_int_quiet(options, "padding", size-1);
int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0);
int out_channels = option_find_int_quiet(options, "out_channels", 1);
int batch,h,w,c; int batch,h,w,c;
h = params.h; h = params.h;
@ -542,7 +544,7 @@ maxpool_layer parse_maxpool(list *options, size_params params)
batch=params.batch; batch=params.batch;
if(!(h && w && c)) error("Layer before maxpool layer must output image."); if(!(h && w && c)) error("Layer before maxpool layer must output image.");
maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding); maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride, padding, maxpool_depth, out_channels);
return layer; return layer;
} }

Loading…
Cancel
Save