fixes for modern clang on linux

pull/3685/head
Stefano Sinigardi 6 years ago
parent 6e7c3a53c7
commit 769f76ef62
  1. 4
      .travis.yml
  2. 21
      src/gemm.c

@ -115,7 +115,7 @@ matrix:
- additional_defines="" - additional_defines=""
- USE_VCPKG=true - USE_VCPKG=true
- VCPKG_DEFINES="-DCMAKE_TOOLCHAIN_FILE=$HOME/vcpkg/scripts/buildsystems/vcpkg.cmake" - VCPKG_DEFINES="-DCMAKE_TOOLCHAIN_FILE=$HOME/vcpkg/scripts/buildsystems/vcpkg.cmake"
- MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8" - MATRIX_EVAL=""
- os: linux - os: linux
compiler: clang compiler: clang
@ -189,7 +189,7 @@ matrix:
- CUDA_PATH=/usr/local/cuda-10.0 - CUDA_PATH=/usr/local/cuda-10.0
- CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.0 - CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.0
- LD_LIBRARY_PATH="/usr/local/cuda-10.0/lib64:/usr/local/cuda-10.0/lib64/stubs:${LD_LIBRARY_PATH}" - LD_LIBRARY_PATH="/usr/local/cuda-10.0/lib64:/usr/local/cuda-10.0/lib64/stubs:${LD_LIBRARY_PATH}"
- MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8 && wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && sudo dpkg -i cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo dpkg -i nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo apt-get -y update && sudo apt-get install -y --no-install-recommends cuda-compiler-10-0 cuda-libraries-dev-10-0 cuda-driver-dev-10-0 cuda-cudart-dev-10-0 cuda-cublas-dev-10-0 cuda-curand-dev-10-0 && sudo apt-get install -y --no-install-recommends libcudnn7-dev && sudo ln -s /usr/local/cuda-10.0/lib64/stubs/libcuda.so /usr/local/cuda-10.0/lib64/stubs/libcuda.so.1" - MATRIX_EVAL="wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/7fa2af80.pub && sudo dpkg -i cuda-repo-ubuntu1404_10.0.130-1_amd64.deb && wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo dpkg -i nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb && sudo apt-get -y update && sudo apt-get install -y --no-install-recommends cuda-compiler-10-0 cuda-libraries-dev-10-0 cuda-driver-dev-10-0 cuda-cudart-dev-10-0 cuda-cublas-dev-10-0 cuda-curand-dev-10-0 && sudo apt-get install -y --no-install-recommends libcudnn7-dev && sudo ln -s /usr/local/cuda-10.0/lib64/stubs/libcuda.so /usr/local/cuda-10.0/lib64/stubs/libcuda.so.1"
# allow_failures: # allow_failures:
# - name: macOS - vcpkg # - name: macOS - vcpkg

@ -550,7 +550,26 @@ static inline float _castu32_f32(uint32_t a) {
} }
static inline float _mm256_extract_float32(__m256 a, const int index) { static inline float _mm256_extract_float32(__m256 a, const int index) {
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), index)); switch(index) {
case 0:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0));
case 1:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 1));
case 2:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 2));
case 3:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 3));
case 4:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 4));
case 5:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 5));
case 6:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 6));
case 7:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 7));
default:
return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0));
}
} }
void asm_cpuid(uint32_t* abcd, uint32_t eax) void asm_cpuid(uint32_t* abcd, uint32_t eax)

Loading…
Cancel
Save