__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
{
{
int h = (in_h + 2 * pad - size) / stride + 1;
int h = (in_h + pad - size) / stride + 1;
int w = (in_w + 2 * pad - size) / stride + 1;
int w = (in_w + pad - size) / stride + 1;
int c = in_c;
int c = in_c;
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@ -24,8 +24,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
id /= c;
id /= c;
int b = id;
int b = id;
int w_offset = -pad;
int w_offset = -pad / 2;
int h_offset = -pad;
int h_offset = -pad / 2;
int out_index = j + w*(i + h*(k + c*b));
int out_index = j + w*(i + h*(k + c*b));
float max = -INFINITY;
float max = -INFINITY;
@ -49,8 +49,8 @@ __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
{
{
int h = (in_h + 2 * pad - size) / stride + 1;
int h = (in_h + pad - size) / stride + 1;
int w = (in_w + 2 * pad - size) / stride + 1;
int w = (in_w + pad - size) / stride + 1;
int c = in_c;
int c = in_c;
int area = (size-1)/stride;
int area = (size-1)/stride;
@ -66,8 +66,8 @@ __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_