__global__ void forward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int out_c, int batch, float *input, float *output, int *indexes)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= n) return;
int j = id % w;
id = id / w;
int i = id % h;
id = id / h;
//int g = id % out_c;
//id = id / out_c;
int b = id % batch;
int k;
for (int g = 0; g < out_c; ++g)
{
int out_index = j + w*(i + h*(g + out_c*b));
float max = -FLT_MAX;
int max_i = -1;
for (k = g; k < c; k += out_c)
{
int in_index = j + w*(i + h*(k + c*b));
float val = input[in_index];
max_i = (val > max) ? in_index : max_i;
max = (val > max) ? val : max;
}
output[out_index] = max;
indexes[out_index] = max_i;
}
}
__global__ void backward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int batch, float *delta, float *prev_delta, int *indexes)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= n) return;
int index = indexes[id];
prev_delta[index] += delta[id];
}
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
{
{
int h = (in_h + pad - size) / stride + 1;
int h = (in_h + pad - size) / stride + 1;
@ -84,6 +128,19 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_