|
|
|
@ -62,19 +62,20 @@ void im2col_cpu_ext(const float* data_im, const int channels, |
|
|
|
|
const int output_w = (width + 2 * pad_w - |
|
|
|
|
(dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; |
|
|
|
|
const int channel_size = height * width; |
|
|
|
|
for (int channel = channels; channel--; data_im += channel_size) { |
|
|
|
|
for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { |
|
|
|
|
for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { |
|
|
|
|
int channel, kernel_row, kernel_col, output_rows, output_col; |
|
|
|
|
for (channel = channels; channel--; data_im += channel_size) { |
|
|
|
|
for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { |
|
|
|
|
for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { |
|
|
|
|
int input_row = -pad_h + kernel_row * dilation_h; |
|
|
|
|
for (int output_rows = output_h; output_rows; output_rows--) { |
|
|
|
|
for (output_rows = output_h; output_rows; output_rows--) { |
|
|
|
|
if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { |
|
|
|
|
for (int output_cols = output_w; output_cols; output_cols--) { |
|
|
|
|
for (output_col = output_w; output_col; output_col--) { |
|
|
|
|
*(data_col++) = 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else { |
|
|
|
|
int input_col = -pad_w + kernel_col * dilation_w; |
|
|
|
|
for (int output_col = output_w; output_col; output_col--) { |
|
|
|
|
for (output_col = output_w; output_col; output_col--) { |
|
|
|
|
if (is_a_ge_zero_and_a_lt_b(input_col, width)) { |
|
|
|
|
*(data_col++) = data_im[input_row * width + input_col]; |
|
|
|
|
} |
|
|
|
|