|
|
|
@ -186,6 +186,19 @@ __global__ void activate_array_kernel(float *x, int n, ACTIVATION a) |
|
|
|
|
if(i < n) x[i] = activate_kernel(x[i], a); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void activate_array_swish_kernel(float *x, int n, float *output_sigmoid_gpu, float *output_gpu) |
|
|
|
|
{ |
|
|
|
|
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; |
|
|
|
|
if (i < n) { |
|
|
|
|
float x_val = x[i]; |
|
|
|
|
float sigmoid = logistic_activate_kernel(x_val); |
|
|
|
|
output_sigmoid_gpu[i] = sigmoid; |
|
|
|
|
output_gpu[i] = x_val * sigmoid; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__global__ void activate_array_leaky_kernel(float *x, int n) |
|
|
|
|
{ |
|
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
|
@ -240,6 +253,16 @@ __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delt |
|
|
|
|
if(i < n) delta[i] *= gradient_kernel(x[i], a); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// https://github.com/BVLC/caffe/blob/04ab089db018a292ae48d51732dd6c66766b36b6/src/caffe/layers/swish_layer.cu#L28-L30 |
|
|
|
|
__global__ void gradient_array_swish_kernel(float *x, int n, float *sigmoid_gpu, float *delta) |
|
|
|
|
{ |
|
|
|
|
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; |
|
|
|
|
if (i < n) { |
|
|
|
|
float swish = x[i]; |
|
|
|
|
delta[i] *= swish + sigmoid_gpu[i] * (1 - swish); // gradient_kernel(x[i], a); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__global__ void gradient_array_leaky_kernel(float *x, int n, float *delta) |
|
|
|
|
{ |
|
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
|
@ -303,6 +326,13 @@ extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) |
|
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
extern "C" void activate_array_swish_ongpu(float *x, int n, float *output_sigmoid_gpu, float *output_gpu) |
|
|
|
|
{ |
|
|
|
|
const int num_blocks = get_number_of_blocks(n, BLOCK); |
|
|
|
|
activate_array_swish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> >(x, n, output_sigmoid_gpu, output_gpu); |
|
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta) |
|
|
|
|
{ |
|
|
|
|
const int num_blocks = get_number_of_blocks(n, BLOCK); |
|
|
|
@ -317,3 +347,11 @@ extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta |
|
|
|
|
gradient_array_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (x, n, a, delta); |
|
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extern "C" void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delta) |
|
|
|
|
{ |
|
|
|
|
const int num_blocks = get_number_of_blocks(n, BLOCK); |
|
|
|
|
gradient_array_swish_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (x, n, sigmoid_gpu, delta); |
|
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
|
|
} |