Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
openSUSE:Leap:16.0:Staging:B
xsimd
0002-Fix-detection-of-SSE-AVX-AVX512-when-they-...
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 0002-Fix-detection-of-SSE-AVX-AVX512-when-they-are-explic.patch of Package xsimd
From c2974c874e14557490eab76d2eebf9f8b9eb88f1 Mon Sep 17 00:00:00 2001 From: Dmitry Kazakov <dimula73@gmail.com> Date: Tue, 28 May 2024 22:21:08 +0200 Subject: [PATCH 2/2] Fix detection of SSE/AVX/AVX512 when they are explicitly disabled by OS Some CPU vulnerability mitigations may disable AVX functionality on the hardware level via the XCR0 register. We should check that manually to verify that OS actually allows us to use this feature. See https://bugs.kde.org/show_bug.cgi?id=484622 Fix #1025 --- include/xsimd/config/xsimd_cpuid.hpp | 91 ++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 30a9da2..8021fce 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -122,6 +122,35 @@ namespace xsimd #endif #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) + + auto get_xcr0_low = []() noexcept + { + uint32_t xcr0; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + xcr0 = (uint32_t)_xgetbv(0); + +#elif defined(__GNUC__) + + __asm__( + "xorl %%ecx, %%ecx\n" + "xgetbv\n" + : "=a"(xcr0) + : +#if defined(__i386__) + : "ecx", "edx" +#else + : "rcx", "rdx" +#endif + ); + +#else /* _MSC_VER < 1400 */ +#error "_MSC_VER < 1400 is not supported" +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + return xcr0; + }; + auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept { @@ -156,19 +185,43 @@ namespace xsimd get_cpuid(regs1, 0x1); - sse2 = regs1[3] >> 26 & 1; - sse3 = regs1[2] >> 0 & 1; - ssse3 = regs1[2] >> 9 & 1; - sse4_1 = regs1[2] >> 19 & 1; - sse4_2 = regs1[2] >> 20 & 1; - fma3_sse42 = regs1[2] >> 12 & 1; + // OS can explicitly disable the usage of SSE/AVX extensions + // by setting an appropriate flag in CR0 register + // + // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html + + unsigned sse_state_os_enabled = 1; + unsigned avx_state_os_enabled = 1; + unsigned avx512_state_os_enabled = 1; + + // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit + // 18] to enable XSETBV/XGETBV instructions to access XCR0 and + // to support processor extended state management using + // XSAVE/XRSTOR. + bool osxsave = regs1[2] >> 27 & 1; + if (osxsave) + { + + uint32_t xcr0 = get_xcr0_low(); + + sse_state_os_enabled = xcr0 >> 1 & 1; + avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; + avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; + } + + sse2 = regs1[3] >> 26 & sse_state_os_enabled; + sse3 = regs1[2] >> 0 & sse_state_os_enabled; + ssse3 = regs1[2] >> 9 & sse_state_os_enabled; + sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; + sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; + fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; - avx = regs1[2] >> 28 & 1; + avx = regs1[2] >> 28 & avx_state_os_enabled; fma3_avx = avx && fma3_sse42; int regs8[4]; get_cpuid(regs8, 0x80000001); - fma4 = regs8[2] >> 16 & 1; + fma4 = regs8[2] >> 16 & avx_state_os_enabled; // sse4a = regs[2] >> 6 & 1; @@ -176,23 +229,23 @@ namespace xsimd int regs7[4]; get_cpuid(regs7, 0x7); - avx2 = regs7[1] >> 5 & 1; + avx2 = regs7[1] >> 5 & avx_state_os_enabled; int regs7a[4]; get_cpuid(regs7a, 0x7, 0x1); - avxvnni = regs7a[0] >> 4 & 1; + avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; fma3_avx2 = avx2 && fma3_sse42; - avx512f = regs7[1] >> 16 & 1; - avx512cd = regs7[1] >> 28 & 1; - avx512dq = regs7[1] >> 17 & 1; - avx512bw = regs7[1] >> 30 & 1; - avx512er = regs7[1] >> 27 & 1; - avx512pf = regs7[1] >> 26 & 1; - avx512ifma = regs7[1] >> 21 & 1; - avx512vbmi = regs7[2] >> 1 & 1; - avx512vnni_bw = regs7[2] >> 11 & 1; + avx512f = regs7[1] >> 16 & avx512_state_os_enabled; + avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; + avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; + avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; + avx512er = regs7[1] >> 27 & avx512_state_os_enabled; + avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; + avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; + avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; + avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; avx512vnni_vbmi = avx512vbmi && avx512vnni_bw; #endif } -- 2.45.2
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor