|
|
|
@ -324,7 +324,7 @@ void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb) |
|
|
|
|
unsigned int x, y; |
|
|
|
|
for (y = 0; y < 32; ++y) { |
|
|
|
|
for (x = 0; x < 32; ++x) { |
|
|
|
|
if (A[y * lda] & (1 << x)) B[x * ldb] |= (uint32_t)1 << y; |
|
|
|
|
if (A[y * lda] & ((uint32_t)1 << x)) B[x * ldb] |= (uint32_t)1 << y; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -636,48 +636,48 @@ void check_cpu_features(void) { |
|
|
|
|
// Detect Features
|
|
|
|
|
if (nIds >= 0x00000001) { |
|
|
|
|
cpuid(info, 0x00000001); |
|
|
|
|
HW_MMX = (info[3] & ((int)1 << 23)) != 0; |
|
|
|
|
HW_SSE = (info[3] & ((int)1 << 25)) != 0; |
|
|
|
|
HW_SSE2 = (info[3] & ((int)1 << 26)) != 0; |
|
|
|
|
HW_SSE3 = (info[2] & ((int)1 << 0)) != 0; |
|
|
|
|
HW_MMX = (info[3] & ((uint32_t)1 << 23)) != 0; |
|
|
|
|
HW_SSE = (info[3] & ((uint32_t)1 << 25)) != 0; |
|
|
|
|
HW_SSE2 = (info[3] & ((uint32_t)1 << 26)) != 0; |
|
|
|
|
HW_SSE3 = (info[2] & ((uint32_t)1 << 0)) != 0; |
|
|
|
|
|
|
|
|
|
HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0; |
|
|
|
|
HW_SSE41 = (info[2] & ((int)1 << 19)) != 0; |
|
|
|
|
HW_SSE42 = (info[2] & ((int)1 << 20)) != 0; |
|
|
|
|
HW_AES = (info[2] & ((int)1 << 25)) != 0; |
|
|
|
|
HW_SSSE3 = (info[2] & ((uint32_t)1 << 9)) != 0; |
|
|
|
|
HW_SSE41 = (info[2] & ((uint32_t)1 << 19)) != 0; |
|
|
|
|
HW_SSE42 = (info[2] & ((uint32_t)1 << 20)) != 0; |
|
|
|
|
HW_AES = (info[2] & ((uint32_t)1 << 25)) != 0; |
|
|
|
|
|
|
|
|
|
HW_AVX = (info[2] & ((int)1 << 28)) != 0; |
|
|
|
|
HW_FMA3 = (info[2] & ((int)1 << 12)) != 0; |
|
|
|
|
HW_AVX = (info[2] & ((uint32_t)1 << 28)) != 0; |
|
|
|
|
HW_FMA3 = (info[2] & ((uint32_t)1 << 12)) != 0; |
|
|
|
|
|
|
|
|
|
HW_RDRAND = (info[2] & ((int)1 << 30)) != 0; |
|
|
|
|
HW_RDRAND = (info[2] & ((uint32_t)1 << 30)) != 0; |
|
|
|
|
} |
|
|
|
|
if (nIds >= 0x00000007) { |
|
|
|
|
cpuid(info, 0x00000007); |
|
|
|
|
HW_AVX2 = (info[1] & ((int)1 << 5)) != 0; |
|
|
|
|
|
|
|
|
|
HW_BMI1 = (info[1] & ((int)1 << 3)) != 0; |
|
|
|
|
HW_BMI2 = (info[1] & ((int)1 << 8)) != 0; |
|
|
|
|
HW_ADX = (info[1] & ((int)1 << 19)) != 0; |
|
|
|
|
HW_SHA = (info[1] & ((int)1 << 29)) != 0; |
|
|
|
|
HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0; |
|
|
|
|
|
|
|
|
|
HW_AVX512F = (info[1] & ((int)1 << 16)) != 0; |
|
|
|
|
HW_AVX512CD = (info[1] & ((int)1 << 28)) != 0; |
|
|
|
|
HW_AVX512PF = (info[1] & ((int)1 << 26)) != 0; |
|
|
|
|
HW_AVX512ER = (info[1] & ((int)1 << 27)) != 0; |
|
|
|
|
HW_AVX512VL = (info[1] & ((int)1 << 31)) != 0; |
|
|
|
|
HW_AVX512BW = (info[1] & ((int)1 << 30)) != 0; |
|
|
|
|
HW_AVX512DQ = (info[1] & ((int)1 << 17)) != 0; |
|
|
|
|
HW_AVX512IFMA = (info[1] & ((int)1 << 21)) != 0; |
|
|
|
|
HW_AVX512VBMI = (info[2] & ((int)1 << 1)) != 0; |
|
|
|
|
HW_AVX2 = (info[1] & ((uint32_t)1 << 5)) != 0; |
|
|
|
|
|
|
|
|
|
HW_BMI1 = (info[1] & ((uint32_t)1 << 3)) != 0; |
|
|
|
|
HW_BMI2 = (info[1] & ((uint32_t)1 << 8)) != 0; |
|
|
|
|
HW_ADX = (info[1] & ((uint32_t)1 << 19)) != 0; |
|
|
|
|
HW_SHA = (info[1] & ((uint32_t)1 << 29)) != 0; |
|
|
|
|
HW_PREFETCHWT1 = (info[2] & ((uint32_t)1 << 0)) != 0; |
|
|
|
|
|
|
|
|
|
HW_AVX512F = (info[1] & ((uint32_t)1 << 16)) != 0; |
|
|
|
|
HW_AVX512CD = (info[1] & ((uint32_t)1 << 28)) != 0; |
|
|
|
|
HW_AVX512PF = (info[1] & ((uint32_t)1 << 26)) != 0; |
|
|
|
|
HW_AVX512ER = (info[1] & ((uint32_t)1 << 27)) != 0; |
|
|
|
|
HW_AVX512VL = (info[1] & ((uint32_t)1 << 31)) != 0; |
|
|
|
|
HW_AVX512BW = (info[1] & ((uint32_t)1 << 30)) != 0; |
|
|
|
|
HW_AVX512DQ = (info[1] & ((uint32_t)1 << 17)) != 0; |
|
|
|
|
HW_AVX512IFMA = (info[1] & ((uint32_t)1 << 21)) != 0; |
|
|
|
|
HW_AVX512VBMI = (info[2] & ((uint32_t)1 << 1)) != 0; |
|
|
|
|
} |
|
|
|
|
if (nExIds >= 0x80000001) { |
|
|
|
|
cpuid(info, 0x80000001); |
|
|
|
|
HW_x64 = (info[3] & ((int)1 << 29)) != 0; |
|
|
|
|
HW_ABM = (info[2] & ((int)1 << 5)) != 0; |
|
|
|
|
HW_SSE4a = (info[2] & ((int)1 << 6)) != 0; |
|
|
|
|
HW_FMA4 = (info[2] & ((int)1 << 16)) != 0; |
|
|
|
|
HW_XOP = (info[2] & ((int)1 << 11)) != 0; |
|
|
|
|
HW_x64 = (info[3] & ((uint32_t)1 << 29)) != 0; |
|
|
|
|
HW_ABM = (info[2] & ((uint32_t)1 << 5)) != 0; |
|
|
|
|
HW_SSE4a = (info[2] & ((uint32_t)1 << 6)) != 0; |
|
|
|
|
HW_FMA4 = (info[2] & ((uint32_t)1 << 16)) != 0; |
|
|
|
|
HW_XOP = (info[2] & ((uint32_t)1 << 11)) != 0; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|