mirror of https://github.com/AlexeyAB/darknet.git
parent
ae53edc6a4
commit
d8adaf8ea6
17 changed files with 287 additions and 127 deletions
@ -1,70 +0,0 @@ |
||||
#include "cuda_runtime.h" |
||||
#include "curand.h" |
||||
#include "cublas_v2.h" |
||||
|
||||
extern "C" { |
||||
#include "softmax_layer.h" |
||||
#include "cuda.h" |
||||
#include "blas.h" |
||||
} |
||||
|
||||
__global__ void forward_softmax_layer_kernel(int n, int batch, float *input, float temp, float *output) |
||||
{ |
||||
int b = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; |
||||
if(b >= batch) return; |
||||
|
||||
int i; |
||||
float sum = 0; |
||||
float largest = -INFINITY; |
||||
for(i = 0; i < n; ++i){ |
||||
int val = input[i+b*n]; |
||||
largest = (val>largest) ? val : largest; |
||||
} |
||||
for(i = 0; i < n; ++i){ |
||||
sum += exp(input[i+b*n]/temp-largest/temp); |
||||
} |
||||
sum = (sum != 0) ? largest/temp+log(sum) : largest-100; |
||||
for(i = 0; i < n; ++i){ |
||||
output[i+b*n] = exp(input[i+b*n]/temp-sum); |
||||
} |
||||
} |
||||
|
||||
extern "C" void pull_softmax_layer_output(const softmax_layer layer) |
||||
{ |
||||
cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); |
||||
} |
||||
|
||||
extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, network_state state) |
||||
{ |
||||
int inputs = layer.inputs / layer.groups; |
||||
int batch = layer.batch * layer.groups; |
||||
forward_softmax_layer_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, batch, state.input, layer.temperature, layer.output_gpu); |
||||
check_error(cudaPeekAtLastError()); |
||||
} |
||||
|
||||
extern "C" void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) |
||||
{ |
||||
axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1); |
||||
} |
||||
|
||||
/* This is if you want softmax w/o log-loss classification. You probably don't. |
||||
int i,j,b; |
||||
for(b = 0; b < layer.batch; ++b){ |
||||
for(i = 0; i < layer.inputs; ++i){ |
||||
for(j = 0; j < layer.inputs; ++j){ |
||||
int d = (i==j); |
||||
layer.jacobian[b*layer.inputs*layer.inputs + i*layer.inputs + j] = |
||||
layer.output[b*layer.inputs + i] * (d - layer.output[b*layer.inputs + j]); |
||||
} |
||||
} |
||||
} |
||||
for(b = 0; b < layer.batch; ++b){ |
||||
int M = layer.inputs; |
||||
int N = 1; |
||||
int K = layer.inputs; |
||||
float *A = layer.jacobian + b*layer.inputs*layer.inputs; |
||||
float *B = layer.delta + b*layer.inputs; |
||||
float *C = delta + b*layer.inputs; |
||||
gemm(0,0,M,N,K,1,A,K,B,N,0,C,N); |
||||
} |
||||
*/ |
@ -0,0 +1,49 @@ |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include "tree.h" |
||||
#include "utils.h" |
||||
|
||||
tree *read_tree(char *filename) |
||||
{ |
||||
tree t = {0}; |
||||
FILE *fp = fopen(filename, "r"); |
||||
|
||||
char *line; |
||||
int last_parent = -1; |
||||
int group_size = 0; |
||||
int groups = 0; |
||||
int n = 0; |
||||
while((line=fgetl(fp)) != 0){ |
||||
char *id = calloc(256, sizeof(char)); |
||||
int parent = -1; |
||||
sscanf(line, "%s %d", id, &parent); |
||||
t.parent = realloc(t.parent, (n+1)*sizeof(int)); |
||||
t.parent[n] = parent; |
||||
t.name = realloc(t.name, (n+1)*sizeof(char *)); |
||||
t.name[n] = id; |
||||
if(parent != last_parent){ |
||||
++groups; |
||||
t.group_size = realloc(t.group_size, groups * sizeof(int)); |
||||
t.group_size[groups - 1] = group_size; |
||||
group_size = 0; |
||||
last_parent = parent; |
||||
} |
||||
++n; |
||||
++group_size; |
||||
} |
||||
++groups; |
||||
t.group_size = realloc(t.group_size, groups * sizeof(int)); |
||||
t.group_size[groups - 1] = group_size; |
||||
t.n = n; |
||||
t.groups = groups; |
||||
t.leaf = calloc(n, sizeof(int)); |
||||
int i; |
||||
for(i = 0; i < n; ++i) t.leaf[i] = 1; |
||||
for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; |
||||
|
||||
fclose(fp); |
||||
tree *tree_ptr = calloc(1, sizeof(tree)); |
||||
*tree_ptr = t; |
||||
//error(0);
|
||||
return tree_ptr; |
||||
} |
@ -0,0 +1,16 @@ |
||||
#ifndef TREE_H |
||||
#define TREE_H |
||||
|
||||
typedef struct{ |
||||
int *leaf; |
||||
int n; |
||||
int *parent; |
||||
char **name; |
||||
|
||||
int groups; |
||||
int *group_size; |
||||
} tree; |
||||
|
||||
tree *read_tree(char *filename); |
||||
|
||||
#endif |
Loading…
Reference in new issue