Functions for fixing nan

6 years ago · e8c3905e54
parent 051b8b7d6f
commit e8c3905e54
4 changed files with 68 additions and 2 deletions
--- a/src/blas.h
+++ b/src/blas.h
@ -102,6 +102,9 @@ void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int for

 void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);

+void fix_nan_and_inf(float *input, size_t size);
+int is_nan_or_inf(float *input, size_t size);
+
 #endif
 #ifdef __cplusplus
 }
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@ -975,3 +975,50 @@ extern "C" void softmax_tree_gpu(float *input, int spatial, int batch, int strid
 	cuda_free((float *)tree_groups_size);
 	cuda_free((float *)tree_groups_offset);
 }
+
+
+__global__ void fix_nan_and_inf_kernel(float *input, size_t size)
+{
+    const int index = blockIdx.x*blockDim.x + threadIdx.x;
+    if (index < size) {
+        float val = input[index];
+        if (isnan(val) || isinf(val))
+            input[index] = index;  // pseudo random value
+    }
+}
+
+extern "C" void fix_nan_and_inf(float *input, size_t size)
+{
+    const int block_size = BLOCK;
+    const int num_blocks = get_number_of_blocks(size, block_size);
+    fix_nan_and_inf_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(input, size);
+    CHECK_CUDA(cudaPeekAtLastError());
+    //CHECK_CUDA(cudaDeviceSynchronize());
+}
+
+
+__global__ void is_nan_or_inf_kernel(float *input, size_t size, int *pinned_return)
+{
+    const int index = blockIdx.x*blockDim.x + threadIdx.x;
+    if (index < size) {
+        float val = input[index];
+        if (isnan(val) || isinf(val))
+            *pinned_return = 1;
+    }
+}
+
+extern "C" int is_nan_or_inf(float *input, size_t size)
+{
+    int *pinned_return;
+    CHECK_CUDA(cudaHostAlloc(&pinned_return, sizeof(int), cudaHostRegisterMapped));
+    *pinned_return = 0;
+
+    const int block_size = BLOCK;
+    const int num_blocks = get_number_of_blocks(size, block_size);
+    is_nan_or_inf_kernel << <num_blocks, block_size, 0, get_cuda_stream() >> >(input, size, pinned_return);
+    CHECK_CUDA(cudaDeviceSynchronize());
+    int ret_val = *pinned_return;
+
+    CHECK_CUDA(cudaFreeHost(pinned_return));
+    return ret_val;
+}
--- a/src/cuda.c
+++ b/src/cuda.c
@ -59,8 +59,10 @@ void check_error(cudaError_t status)

 void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time)
 {
-    if (status != cudaSuccess)
+    if (status != cudaSuccess) {
        printf("CUDA status Error: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+        check_error(status);
+    }
 #ifdef DEBUG
    status = cudaDeviceSynchronize();
    if (status != cudaSuccess)
@ -175,8 +177,10 @@ void cudnn_check_error(cudnnStatus_t status)

 void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time)
 {
-    if (status != CUDNN_STATUS_SUCCESS)
+    if (status != CUDNN_STATUS_SUCCESS) {
        printf("\n cuDNN status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time);
+        cudnn_check_error(status);
+    }
 #ifdef DEBUG
    status = cudaDeviceSynchronize();
    if (status != CUDNN_STATUS_SUCCESS)
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@ -110,6 +110,18 @@ void backward_network_gpu(network net, network_state state)
            state.delta = prev.delta_gpu;
        }
        l.backward_gpu(l, state);
+
+        /*
+        if(i != 0)
+        {
+            layer l = net.layers[i - 1];
+            int state_delta_nan_inf = is_nan_or_inf(state.delta, l.outputs * l.batch);
+            int state_input_nan_inf = is_nan_or_inf(state.input, l.outputs * l.batch);
+            printf("\n i - %d  is_nan_or_inf(s.delta) = %d \n", i, state_delta_nan_inf);
+            printf(" i - %d  is_nan_or_inf(s.input) = %d \n", i, state_input_nan_inf);
+            if (state_delta_nan_inf || state_input_nan_inf) { printf(" found "); getchar(); }
+        }
+        */
    }
 }