|
|
@ -36,6 +36,7 @@ __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} |
|
|
|
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} |
|
|
|
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} |
|
|
|
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} |
|
|
|
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} |
|
|
|
__device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} |
|
|
|
__device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} |
|
|
|
|
|
|
|
__device__ float gelu_activate_kernel(float x){return (0.5*x*(1 + tanhf(0.797885*x + 0.035677*powf(x, 3))));} |
|
|
|
__device__ float softplus_kernel(float x, float threshold = 20) { |
|
|
|
__device__ float softplus_kernel(float x, float threshold = 20) { |
|
|
|
if (x > threshold) return x; // too large |
|
|
|
if (x > threshold) return x; // too large |
|
|
|
else if (x < -threshold) return expf(x); // too small |
|
|
|
else if (x < -threshold) return expf(x); // too small |
|
|
@ -75,6 +76,11 @@ __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} |
|
|
|
__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} |
|
|
|
__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} |
|
|
|
__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} |
|
|
|
__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} |
|
|
|
__device__ float tanh_gradient_kernel(float x){return 1-x*x;} |
|
|
|
__device__ float tanh_gradient_kernel(float x){return 1-x*x;} |
|
|
|
|
|
|
|
__device__ float sech_gpu(float x) { return 2 / (expf(x) + expf(-x)); } |
|
|
|
|
|
|
|
__device__ float gelu_gradient_kernel(float x) { |
|
|
|
|
|
|
|
const float x3 = powf(x, 3); |
|
|
|
|
|
|
|
return 0.5*tanhf(0.0356774*x3 + 0.797885*x) + (0.0535161*x3 + 0.398942*x) * powf(sech_gpu(0.0356774*x3 + 0.797885*x), 2) + 0.5; |
|
|
|
|
|
|
|
} |
|
|
|
__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} |
|
|
|
__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} |
|
|
|
__device__ float stair_gradient_kernel(float x) |
|
|
|
__device__ float stair_gradient_kernel(float x) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -99,6 +105,8 @@ __device__ float activate_kernel(float x, ACTIVATION a) |
|
|
|
return elu_activate_kernel(x); |
|
|
|
return elu_activate_kernel(x); |
|
|
|
case SELU: |
|
|
|
case SELU: |
|
|
|
return selu_activate_kernel(x); |
|
|
|
return selu_activate_kernel(x); |
|
|
|
|
|
|
|
case GELU: |
|
|
|
|
|
|
|
return gelu_activate_kernel(x); |
|
|
|
case RELIE: |
|
|
|
case RELIE: |
|
|
|
return relie_activate_kernel(x); |
|
|
|
return relie_activate_kernel(x); |
|
|
|
case RAMP: |
|
|
|
case RAMP: |
|
|
@ -138,6 +146,8 @@ __device__ float gradient_kernel(float x, ACTIVATION a) |
|
|
|
return elu_gradient_kernel(x); |
|
|
|
return elu_gradient_kernel(x); |
|
|
|
case SELU: |
|
|
|
case SELU: |
|
|
|
return selu_gradient_kernel(x); |
|
|
|
return selu_gradient_kernel(x); |
|
|
|
|
|
|
|
case GELU: |
|
|
|
|
|
|
|
return gelu_gradient_kernel(x); |
|
|
|
case RELIE: |
|
|
|
case RELIE: |
|
|
|
return relie_gradient_kernel(x); |
|
|
|
return relie_gradient_kernel(x); |
|
|
|
case RAMP: |
|
|
|
case RAMP: |
|
|
@ -245,6 +255,14 @@ __global__ void activate_array_selu_kernel(float *x, int n) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void activate_array_gelu_kernel(float *x, int n) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
|
|
|
|
|
if (index < n) { |
|
|
|
|
|
|
|
x[index] = gelu_activate_kernel(x[index]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
__global__ void activate_array_logistic_kernel(float *x, int n) |
|
|
|
__global__ void activate_array_logistic_kernel(float *x, int n) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
@ -343,6 +361,14 @@ __global__ void gradient_array_selu_kernel(float *x, int n, float *delta) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void gradient_array_gelu_kernel(float *x, int n, float *delta) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
|
|
|
|
|
if (index < n) { |
|
|
|
|
|
|
|
delta[index] *= gelu_gradient_kernel(x[index]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
__global__ void gradient_array_logistic_kernel(float *x, int n, float *delta) |
|
|
|
__global__ void gradient_array_logistic_kernel(float *x, int n, float *delta) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
|
int index = blockIdx.x*blockDim.x + threadIdx.x; |
|
|
@ -394,6 +420,7 @@ extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) |
|
|
|
else if (a == RELU) activate_array_relu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n); |
|
|
|
else if (a == RELU) activate_array_relu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n); |
|
|
|
else if (a == RELU6) activate_array_relu6_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n); |
|
|
|
else if (a == RELU6) activate_array_relu6_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n); |
|
|
|
else if (a == SELU) activate_array_selu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n); |
|
|
|
else if (a == SELU) activate_array_selu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n); |
|
|
|
|
|
|
|
else if (a == GELU) activate_array_gelu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n); |
|
|
|
else |
|
|
|
else |
|
|
|
activate_array_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream()>>>(x, n, a); |
|
|
|
activate_array_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream()>>>(x, n, a); |
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
@ -429,6 +456,7 @@ extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta |
|
|
|
exit(0); |
|
|
|
exit(0); |
|
|
|
} |
|
|
|
} |
|
|
|
else if (a == SELU) gradient_array_selu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta); |
|
|
|
else if (a == SELU) gradient_array_selu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta); |
|
|
|
|
|
|
|
else if (a == GELU) gradient_array_gelu_kernel << <num_blocks, BLOCK, 0, get_cuda_stream() >> >(x, n, delta); |
|
|
|
else |
|
|
|
else |
|
|
|
gradient_array_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (x, n, a, delta); |
|
|
|
gradient_array_kernel << <cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >> > (x, n, a, delta); |
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
|
CHECK_CUDA(cudaPeekAtLastError()); |
|
|
|