__global__ void shortcut_singlelayer_simple_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion)
__global__ void shortcut_singlelayer_simple_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
{
{
const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= size) return;
if (id >= size) return;
@ -901,7 +901,7 @@ __global__ void shortcut_singlelayer_simple_kernel(int size, int src_outputs, in
out[id] = out_val;
out[id] = out_val;
}
}
__global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion)
__global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)
{
{
const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if (id >= size) return;
if (id >= size) return;
@ -917,8 +917,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
int src_b = src_id;
int src_b = src_id;
float sum = 1, max_val = -FLT_MAX;
float sum = 1, max_val = -FLT_MAX;
if (weights_gpu && weights_normalizion) {
if (weights_gpu && weights_normalization) {
if (weights_normalizion == SOFTMAX_NORMALIZATION) {
if (weights_normalization == SOFTMAX_NORMALIZATION) {
for (int i = 0; i < (n + 1); ++i) {
for (int i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float w = weights_gpu[weights_index];
const float w = weights_gpu[weights_index];
@ -930,8 +930,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
for (int i = 0; i < (n + 1); ++i) {
for (int i = 0; i < (n + 1); ++i) {
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)]
const float w = weights_gpu[weights_index];
const float w = weights_gpu[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) sum += lrelu(w);
if (weights_normalization == RELU_NORMALIZATION) sum += lrelu(w);
else if (weights_normalizion == SOFTMAX_NORMALIZATION) sum += expf(w - max_val);
else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val);
}
}
}
}
@ -939,8 +939,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
if (weights_gpu) {
if (weights_gpu) {
float w = weights_gpu[src_i / step];
float w = weights_gpu[src_i / step];
if (weights_normalizion == RELU_NORMALIZATION) w = lrelu(w) / sum;
if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
out_val = in[id] * w; // [0 or c or (c, h ,w)]
out_val = in[id] * w; // [0 or c or (c, h ,w)]
}
}
@ -957,8 +957,8 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
if (weights_gpu) {
if (weights_gpu) {
const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)]
const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)]
float w = weights_gpu[weights_index];
float w = weights_gpu[weights_index];
if (weights_normalizion == RELU_NORMALIZATION) w = lrelu(w) / sum;
if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum;
else if (weights_normalizion == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum;
out_val += add[add_index] * w; // [0 or c or (c, h ,w)]
out_val += add[add_index] * w; // [0 or c or (c, h ,w)]
}
}
@ -968,22 +968,22 @@ __global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch,
out[id] = out_val;
out[id] = out_val;
}
}
extern "C" void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalizion)
extern "C" void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization)