Fix sam layer

pull/3738/head
AlexeyAB 6 years ago
parent 6467dc576a
commit 4bf9b223a2
  1. 7
      src/blas.h
  2. 45
      src/blas_kernels.cu
  3. 2
      src/parser.c
  4. 4
      src/sam_layer.c
  5. 2
      src/shortcut_layer.c

@ -122,6 +122,13 @@ void backward_scale_channels_gpu(float *in_w_h_c_delta, int size, int channel_si
float *in_scales_c, float *out_from_delta, float *in_scales_c, float *out_from_delta,
float *in_from_output, float *out_state_delta); float *in_from_output, float *out_state_delta);
void backward_sam_gpu(float *in_w_h_c_delta, int size, int channel_size,
float *in_scales_c, float *out_from_delta,
float *in_from_output, float *out_state_delta);
void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scales_c, float *out);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
} }

@ -1173,3 +1173,48 @@ extern "C" void backward_scale_channels_gpu(float *in_w_h_c_delta, int size, int
CHECK_CUDA(cudaPeekAtLastError()); CHECK_CUDA(cudaPeekAtLastError());
} }
__global__ void sam_kernel(float *in_w_h_c, int size, int channel_size, float *scales_c, float *out)
{
const int index = blockIdx.x*blockDim.x + threadIdx.x;
if (index < size) {
out[index] = in_w_h_c[index] * scales_c[index];
}
}
extern "C" void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scales_c, float *out)
{
const int block_size = BLOCK;
const int num_blocks = get_number_of_blocks(size, block_size);
sam_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(in_w_h_c, size, channel_size, scales_c, out);
CHECK_CUDA(cudaPeekAtLastError());
}
__global__ void backward_sam_kernel(float *in_w_h_c_delta, int size, int channel_size,
float *in_scales_c, float *out_from_delta,
float *in_from_output, float *out_state_delta)
{
const int index = blockIdx.x*blockDim.x + threadIdx.x;
if (index < size) {
out_state_delta[index] += in_w_h_c_delta[index] * in_from_output[index]; // l.delta * from (should be divided by channel_size?)
out_from_delta[index] += in_scales_c[index] * in_w_h_c_delta[index]; // input * l.delta
//out_state_delta[index] += in_w_h_c_delta[index];
//out_from_delta[index] = in_w_h_c_delta[index];
}
}
extern "C" void backward_sam_gpu(float *in_w_h_c_delta, int size, int channel_size,
float *in_scales_c, float *out_from_delta,
float *in_from_output, float *out_state_delta)
{
const int block_size = BLOCK;
const int num_blocks = get_number_of_blocks(size, block_size);
backward_sam_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> > (in_w_h_c_delta, size, channel_size,
in_scales_c, out_from_delta,
in_from_output, out_state_delta);
CHECK_CUDA(cudaPeekAtLastError());
}

@ -633,7 +633,7 @@ layer parse_sam(list *options, size_params params, network net)
int batch = params.batch; int batch = params.batch;
layer from = net.layers[index]; layer from = net.layers[index];
layer s = make_scale_channels_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); layer s = make_sam_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
char *activation_s = option_find_str_quiet(options, "activation", "linear"); char *activation_s = option_find_str_quiet(options, "activation", "linear");
ACTIVATION activation = get_activation(activation_s); ACTIVATION activation = get_activation(activation_s);

@ -98,7 +98,7 @@ void forward_sam_layer_gpu(const layer l, network_state state)
int size = l.batch * l.out_c * l.out_w * l.out_h; int size = l.batch * l.out_c * l.out_w * l.out_h;
int channel_size = 1; int channel_size = 1;
scale_channels_gpu(state.net.layers[l.index].output_gpu, size, channel_size, state.input, l.output_gpu); sam_gpu(state.net.layers[l.index].output_gpu, size, channel_size, state.input, l.output_gpu);
activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
} }
@ -113,6 +113,6 @@ void backward_sam_layer_gpu(const layer l, network_state state)
float *from_delta = state.net.layers[l.index].delta_gpu; float *from_delta = state.net.layers[l.index].delta_gpu;
backward_scale_channels_gpu(l.delta_gpu, size, channel_size, state.input, from_delta, from_output, state.delta); backward_sam_gpu(l.delta_gpu, size, channel_size, state.input, from_delta, from_output, state.delta);
} }
#endif #endif

@ -19,7 +19,7 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int
l.outputs = w*h*c; l.outputs = w*h*c;
l.inputs = l.outputs; l.inputs = l.outputs;
if(w != w2 || h != h2 || c != c) fprintf(stderr, " w = %d, w2 = %d, h = %d, h2 = %d, c = %d, c2 = %d \n", w, w2, h, h2, c, c2); if(w != w2 || h != h2 || c != c2) fprintf(stderr, " w = %d, w2 = %d, h = %d, h2 = %d, c = %d, c2 = %d \n", w, w2, h, h2, c, c2);
l.index = index; l.index = index;

Loading…
Cancel
Save