Added cudaGetLastError() for cudaHostAlloc() to reset last cuda error

pull/2352/head
AlexeyAB 6 years ago
parent 381f90ebb8
commit c71354ab2e
  1. 3
      src/network.c
  2. 7
      src/parser.c
  3. 12
      src/yolo_layer.c

@ -459,12 +459,13 @@ int resize_network(network *net, int w, int h)
const int size = get_network_input_size(*net) * net->batch; const int size = get_network_input_size(*net) * net->batch;
#ifdef GPU #ifdef GPU
if(gpu_index >= 0){ if(gpu_index >= 0){
printf(" try to allocate workspace = %zu * sizeof(float), ", workspace_size / sizeof(float) + 1); printf(" try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1); net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1);
net->input_state_gpu = cuda_make_array(0, size); net->input_state_gpu = cuda_make_array(0, size);
if (cudaSuccess == cudaHostAlloc(&net->input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) if (cudaSuccess == cudaHostAlloc(&net->input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped))
net->input_pinned_cpu_flag = 1; net->input_pinned_cpu_flag = 1;
else { else {
cudaGetLastError(); // reset CUDA-error
net->input_pinned_cpu = calloc(size, sizeof(float)); net->input_pinned_cpu = calloc(size, sizeof(float));
net->input_pinned_cpu_flag = 0; net->input_pinned_cpu_flag = 0;
} }

@ -853,7 +853,10 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
int size = get_network_input_size(net) * net.batch; int size = get_network_input_size(net) * net.batch;
net.input_state_gpu = cuda_make_array(0, size); net.input_state_gpu = cuda_make_array(0, size);
if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1; if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
else net.input_pinned_cpu = calloc(size, sizeof(float)); else {
cudaGetLastError(); // reset CUDA-error
net.input_pinned_cpu = calloc(size, sizeof(float));
}
// pre-allocate memory for inference on Tensor Cores (fp16) // pre-allocate memory for inference on Tensor Cores (fp16)
if (net.cudnn_half) { if (net.cudnn_half) {
@ -863,7 +866,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps)
check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half) check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
} }
if (workspace_size) { if (workspace_size) {
printf(" Allocate workspace_size = %zu \n", workspace_size); printf(" Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000);
net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1); net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
} }
else { else {

@ -56,11 +56,17 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
free(l.output); free(l.output);
if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1;
else l.output = calloc(batch*l.outputs, sizeof(float)); else {
cudaGetLastError(); // reset CUDA-error
l.output = calloc(batch*l.outputs, sizeof(float));
}
free(l.delta); free(l.delta);
if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1;
else l.delta = calloc(batch*l.outputs, sizeof(float)); else {
cudaGetLastError(); // reset CUDA-error
l.delta = calloc(batch*l.outputs, sizeof(float));
}
#endif #endif
fprintf(stderr, "yolo\n"); fprintf(stderr, "yolo\n");
@ -84,6 +90,7 @@ void resize_yolo_layer(layer *l, int w, int h)
if (l->output_pinned) { if (l->output_pinned) {
cudaFreeHost(l->output); cudaFreeHost(l->output);
if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
cudaGetLastError(); // reset CUDA-error
l->output = realloc(l->output, l->batch*l->outputs * sizeof(float)); l->output = realloc(l->output, l->batch*l->outputs * sizeof(float));
l->output_pinned = 0; l->output_pinned = 0;
} }
@ -92,6 +99,7 @@ void resize_yolo_layer(layer *l, int w, int h)
if (l->delta_pinned) { if (l->delta_pinned) {
cudaFreeHost(l->delta); cudaFreeHost(l->delta);
if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) {
cudaGetLastError(); // reset CUDA-error
l->delta = realloc(l->delta, l->batch*l->outputs * sizeof(float)); l->delta = realloc(l->delta, l->batch*l->outputs * sizeof(float));
l->delta_pinned = 0; l->delta_pinned = 0;
} }

Loading…
Cancel
Save