|
|
@ -732,6 +732,8 @@ network parse_network_cfg_custom(char *filename, int batch) |
|
|
|
|
|
|
|
|
|
|
|
float bflops = 0; |
|
|
|
float bflops = 0; |
|
|
|
size_t workspace_size = 0; |
|
|
|
size_t workspace_size = 0; |
|
|
|
|
|
|
|
size_t max_inputs = 0; |
|
|
|
|
|
|
|
size_t max_outputs = 0; |
|
|
|
n = n->next; |
|
|
|
n = n->next; |
|
|
|
int count = 0; |
|
|
|
int count = 0; |
|
|
|
free_section(s); |
|
|
|
free_section(s); |
|
|
@ -806,6 +808,8 @@ network parse_network_cfg_custom(char *filename, int batch) |
|
|
|
option_unused(options); |
|
|
|
option_unused(options); |
|
|
|
net.layers[count] = l; |
|
|
|
net.layers[count] = l; |
|
|
|
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; |
|
|
|
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; |
|
|
|
|
|
|
|
if (l.inputs > max_inputs) max_inputs = l.inputs; |
|
|
|
|
|
|
|
if (l.outputs > max_outputs) max_outputs = l.outputs; |
|
|
|
free_section(s); |
|
|
|
free_section(s); |
|
|
|
n = n->next; |
|
|
|
n = n->next; |
|
|
|
++count; |
|
|
|
++count; |
|
|
@ -828,6 +832,14 @@ network parse_network_cfg_custom(char *filename, int batch) |
|
|
|
net.workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1); |
|
|
|
net.workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1); |
|
|
|
int size = get_network_input_size(net) * net.batch; |
|
|
|
int size = get_network_input_size(net) * net.batch; |
|
|
|
net.input_state_gpu = cuda_make_array(0, size); |
|
|
|
net.input_state_gpu = cuda_make_array(0, size); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// pre-allocate memory for inference on Tensor Cores (fp16)
|
|
|
|
|
|
|
|
if (net.cudnn_half) { |
|
|
|
|
|
|
|
*net.max_input16_size = max_inputs; |
|
|
|
|
|
|
|
check_error(cudaMalloc((void **)net.input16_gpu, *net.max_input16_size * sizeof(short))); //sizeof(half)
|
|
|
|
|
|
|
|
*net.max_output16_size = max_outputs; |
|
|
|
|
|
|
|
check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
|
|
|
|
|
|
|
|
} |
|
|
|
}else { |
|
|
|
}else { |
|
|
|
net.workspace = calloc(1, workspace_size); |
|
|
|
net.workspace = calloc(1, workspace_size); |
|
|
|
} |
|
|
|
} |
|
|
|