mirror of https://github.com/AlexeyAB/darknet.git
parent
9b3c7136f3
commit
158bb1bee9
17 changed files with 440 additions and 97 deletions
@ -0,0 +1,73 @@ |
||||
|
||||
__kernel void forward(int in_h, int in_w, int in_c, int stride, int size, __global float *input, __global float *output, __global int *indexes) |
||||
{ |
||||
int h = (in_h-1)/stride + 1; |
||||
int w = (in_w-1)/stride + 1; |
||||
int c = in_c; |
||||
|
||||
int id = get_global_id(0); |
||||
int j = id % w; |
||||
id /= w; |
||||
int i = id % h; |
||||
id /= h; |
||||
int k = id % c; |
||||
id /= c; |
||||
int b = id; |
||||
|
||||
int w_offset = (-size-1)/2 + 1; |
||||
int h_offset = (-size-1)/2 + 1; |
||||
|
||||
int out_index = j + w*(i + h*(k + c*b)); |
||||
float max = -INFINITY; |
||||
int max_i = -1; |
||||
int l, m; |
||||
for(l = 0; l < size; ++l){ |
||||
for(m = 0; m < size; ++m){ |
||||
int cur_h = h_offset + i*stride + l; |
||||
int cur_w = w_offset + j*stride + m; |
||||
int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); |
||||
int valid = (cur_h >= 0 && cur_h < in_h && |
||||
cur_w >= 0 && cur_w < in_w); |
||||
float val = (valid != 0) ? input[index] : -INFINITY; |
||||
max_i = (val > max) ? index : max_i; |
||||
max = (val > max) ? val : max; |
||||
} |
||||
} |
||||
output[out_index] = max; |
||||
indexes[out_index] = max_i; |
||||
} |
||||
|
||||
__kernel void backward(int in_h, int in_w, int in_c, int stride, int size, __global float *delta, __global float *prev_delta, __global int *indexes) |
||||
{ |
||||
int h = (in_h-1)/stride + 1; |
||||
int w = (in_w-1)/stride + 1; |
||||
int c = in_c; |
||||
int area = (size-1)/stride; |
||||
|
||||
int id = get_global_id(0); |
||||
int index = id; |
||||
int j = id % in_w; |
||||
id /= in_w; |
||||
int i = id % in_h; |
||||
id /= in_h; |
||||
int k = id % in_c; |
||||
id /= in_c; |
||||
int b = id; |
||||
|
||||
int w_offset = (-size-1)/2 + 1; |
||||
int h_offset = (-size-1)/2 + 1; |
||||
|
||||
float d = 0; |
||||
int l, m; |
||||
for(l = -area; l < area+1; ++l){ |
||||
for(m = -area; m < area+1; ++m){ |
||||
int out_w = (j-w_offset)/stride + m; |
||||
int out_h = (i-h_offset)/stride + l; |
||||
int out_index = out_w + w*(out_h + h*(k + c*b)); |
||||
int valid = (out_w >= 0 && out_w < w && |
||||
out_h >= 0 && out_h < h); |
||||
d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; |
||||
} |
||||
} |
||||
prev_delta[index] = d; |
||||
} |
@ -0,0 +1,21 @@ |
||||
|
||||
__kernel void forward(int n, __global float *input, __global float *output) |
||||
{ |
||||
int b = get_global_id(0); |
||||
|
||||
int i; |
||||
float sum = 0; |
||||
float largest = -INFINITY; |
||||
for(i = 0; i < n; ++i){ |
||||
int val = input[i+b*n]; |
||||
largest = (val>largest) ? val : largest; |
||||
} |
||||
for(i = 0; i < n; ++i){ |
||||
sum += exp(input[i+b*n]-largest); |
||||
} |
||||
sum = (sum != 0) ? largest+log(sum) : largest-100; |
||||
for(i = 0; i < n; ++i){ |
||||
output[i+b*n] = exp(input[i+b*n]-sum); |
||||
} |
||||
} |
||||
|
@ -1,16 +1,27 @@ |
||||
#ifndef SOFTMAX_LAYER_H |
||||
#define SOFTMAX_LAYER_H |
||||
|
||||
#include "opencl.h" |
||||
|
||||
typedef struct { |
||||
int inputs; |
||||
int batch; |
||||
float *delta; |
||||
float *output; |
||||
float *jacobian; |
||||
#ifdef GPU |
||||
cl_mem delta_cl; |
||||
cl_mem output_cl; |
||||
#endif |
||||
} softmax_layer; |
||||
|
||||
softmax_layer *make_softmax_layer(int batch, int inputs); |
||||
void forward_softmax_layer(const softmax_layer layer, float *input); |
||||
void backward_softmax_layer(const softmax_layer layer, float *input, float *delta); |
||||
void backward_softmax_layer(const softmax_layer layer, float *delta); |
||||
|
||||
#ifdef GPU |
||||
void forward_softmax_layer_gpu(const softmax_layer layer, cl_mem input); |
||||
void backward_softmax_layer_gpu(const softmax_layer layer, cl_mem delta); |
||||
#endif |
||||
|
||||
#endif |
||||
|
Loading…
Reference in new issue