Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12-SP1:GA
xen.10697
5c7e716d-x86-pv-restrict-cr4-fsgsbase.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File 5c7e716d-x86-pv-restrict-cr4-fsgsbase.patch of Package xen.10697
# Commit eccc170053e46b4ab1d9e7485c09e210be15bbd7 # Date 2019-03-05 13:54:05 +0100 # Author Andrew Cooper <andrew.cooper3@citrix.com> # Committer Jan Beulich <jbeulich@suse.com> x86/pv: Don't have %cr4.fsgsbase active behind a guest kernels back Currently, a 64bit PV guest can appear to set and clear FSGSBASE in %cr4, but the bit remains set in hardware. Therefore, the {RD,WR}{FS,GS}BASE are usable even when the guest kernel believes that they are disabled. The FSGSBASE feature isn't currently supported in Linux, and its context switch path has some optimisations which rely on userspace being unable to use the WR{FS,GS}BASE instructions. Xen's current behaviour undermines this expectation. In 64bit PV guest context, always load the guest kernels setting of FSGSBASE into %cr4. This requires adjusting how Xen uses the {RD,WR}{FS,GS}BASE instructions. * Delete the cpu_has_fsgsbase helper. It is no longer safe, as users need to check %cr4 directly. * The raw __rd{fs,gs}base() helpers are only safe to use when %cr4.fsgsbase is set. Comment this property. * The {rd,wr}{fs,gs}{base,shadow}() and read_msr() helpers are updated to use the current %cr4 value to determine which mechanism to use. * toggle_guest_mode() and save_segments() are update to avoid reading fs/gsbase if the values in hardware cannot be stale WRT struct vcpu. A consequence of this is that the write_cr() path needs to cache the current bases, as subsequent context switches will skip saving the values. * write_cr4() is updated to ensure that the shadow %cr4.fsgsbase value is observed in a safe way WRT the hardware setting, if an interrupt happens to hit in the middle. * load_segments() is updated to use the VMLOAD optimisation if FSGSBASE is unavailable, even if only gs_shadow needs updating. As a minor perf improvement, check cpu_has_svm first to short circuit a context-dependent conditional on Intel hardware. * pv_make_cr4() is updated for 64bit PV guests to use the guest kernels choice of FSGSBASE. This is part of XSA-293. Reported-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -440,6 +440,16 @@ unsigned long pv_make_cr4(const struct v if ( d->arch.vtsc || (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_TSD) ) cr4 |= X86_CR4_TSD; + /* + * The {RD,WR}{FS,GS}BASE are only useable in 64bit code segments. While + * we must not have CR4.FSGSBASE set behind the back of a 64bit PV kernel, + * we do leave it set in 32bit PV context to speed up Xen's context switch + * path. + */ + if ( !is_pv_32bit_domain(d) && + !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_FSGSBASE) ) + cr4 &= ~X86_CR4_FSGSBASE; + return cr4; } @@ -1463,7 +1473,8 @@ static void save_segments(struct vcpu *v regs->fs = read_segment_register(fs); regs->gs = read_segment_register(gs); - if ( cpu_has_fsgsbase && !is_pv_32bit_vcpu(v) ) + /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */ + if ( (read_cr4() & X86_CR4_FSGSBASE) && !is_pv_32bit_vcpu(v) ) { v->arch.pv_vcpu.fs_base = __rdfsbase(); if ( v->arch.flags & TF_kernel_mode ) --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -1375,7 +1375,7 @@ void __init __start_xen(unsigned long mb if ( cpu_has_smep ) set_in_cr4(X86_CR4_SMEP); - if ( cpu_has_fsgsbase ) + if ( boot_cpu_has(X86_FEATURE_FSGSBASE) ) set_in_cr4(X86_CR4_FSGSBASE); if ( opt_invpcid && cpu_has_invpcid ) --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2444,6 +2444,17 @@ static int emulate_privileged_op(struct } case 4: /* Write CR4 */ + /* + * If this write will disable FSGSBASE, refresh Xen's idea of the + * guest bases now that they can no longer change. + */ + if ( (v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_FSGSBASE) && + !(*reg & X86_CR4_FSGSBASE) ) + { + v->arch.pv_vcpu.fs_base = __rdfsbase(); + v->arch.pv_vcpu.gs_base_kernel = __rdgsbase(); + } + v->arch.pv_vcpu.ctrlreg[4] = pv_fixup_guest_cr4(v, *reg); write_cr4(pv_make_cr4(v)); break; @@ -2667,13 +2678,14 @@ static int emulate_privileged_op(struct case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - val = cpu_has_fsgsbase ? __rdfsbase() : v->arch.pv_vcpu.fs_base; + val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdfsbase() + : v->arch.pv_vcpu.fs_base; goto rdmsr_writeback; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - val = cpu_has_fsgsbase ? __rdgsbase() - : v->arch.pv_vcpu.gs_base_kernel; + val = (read_cr4() & X86_CR4_FSGSBASE) ? __rdgsbase() + : v->arch.pv_vcpu.gs_base_kernel; goto rdmsr_writeback; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -266,7 +266,9 @@ void toggle_guest_mode(struct vcpu *v) { if ( is_pv_32bit_vcpu(v) ) return; - if ( cpu_has_fsgsbase ) + + /* %fs/%gs bases can only be stale if WR{FS,GS}BASE are usable. */ + if ( read_cr4() & X86_CR4_FSGSBASE ) { if ( v->arch.flags & TF_kernel_mode ) v->arch.pv_vcpu.gs_base_kernel = __rdgsbase(); --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -196,7 +196,6 @@ #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #define cpu_has_page1gb boot_cpu_has(X86_FEATURE_PAGE1GB) #define cpu_has_efer 1 -#define cpu_has_fsgsbase boot_cpu_has(X86_FEATURE_FSGSBASE) #define cpu_has_smep boot_cpu_has(X86_FEATURE_SMEP) #define cpu_has_invpcid boot_cpu_has(X86_FEATURE_INVPCID) --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -121,6 +121,14 @@ static inline uint64_t rdtsc_ordered(voi : "=a" (low), "=d" (high) \ : "c" (counter)) +/* + * On hardware supporting FSGSBASE, the value loaded into hardware is the + * guest kernel's choice for 64bit PV guests (Xen's choice for Idle, HVM and + * 32bit PV). + * + * Therefore, the {RD,WR}{FS,GS}BASE instructions are only safe to use if + * %cr4.fsgsbase is set. + */ static inline unsigned long __rdfsbase(void) { unsigned long base; @@ -151,7 +159,7 @@ static inline unsigned long rdfsbase(voi { unsigned long base; - if ( cpu_has_fsgsbase ) + if ( read_cr4() & X86_CR4_FSGSBASE ) return __rdfsbase(); rdmsrl(MSR_FS_BASE, base); @@ -163,7 +171,7 @@ static inline unsigned long rdgsbase(voi { unsigned long base; - if ( cpu_has_fsgsbase ) + if ( read_cr4() & X86_CR4_FSGSBASE ) return __rdgsbase(); rdmsrl(MSR_GS_BASE, base); @@ -173,7 +181,7 @@ static inline unsigned long rdgsbase(voi static inline void wrfsbase(unsigned long base) { - if ( cpu_has_fsgsbase ) + if ( read_cr4() & X86_CR4_FSGSBASE ) #ifdef HAVE_GAS_FSGSBASE asm volatile ( "wrfsbase %0" :: "r" (base) ); #else @@ -185,7 +193,7 @@ static inline void wrfsbase(unsigned lon static inline void wrgsbase(unsigned long base) { - if ( cpu_has_fsgsbase ) + if ( read_cr4() & X86_CR4_FSGSBASE ) #ifdef HAVE_GAS_FSGSBASE asm volatile ( "wrgsbase %0" :: "r" (base) ); #else --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -343,8 +343,26 @@ static inline void write_cr4(unsigned lo /* No global pages in case of PCIDs enabled! */ ASSERT(!(val & X86_CR4_PGE) || !(val & X86_CR4_PCIDE)); + /* + * On hardware supporting FSGSBASE, the value in %cr4 is the kernel's + * choice for 64bit PV guests, which impacts whether Xen can use the + * instructions. + * + * The {rd,wr}{fs,gs}base() helpers use this_cpu(cr4) to work out whether it + * is safe to execute the {RD,WR}{FS,GS}BASE instruction, falling back to + * the MSR path if not. Some users require interrupt safety. + * + * If FSGSBASE is currently or about to become clear, reflect this in + * this_cpu(cr4) before updating %cr4, so an interrupt which hits in the + * middle won't observe FSGSBASE set in this_cpu(cr4) but clear in %cr4. + */ + this_cpu(cr4) = val & (this_cpu(cr4) | ~X86_CR4_FSGSBASE); + + asm volatile ( "mov %[val], %%cr4" + : "+m" (this_cpu(cr4)) /* Force ordering without a barrier. */ + : [val] "r" (val) ); + this_cpu(cr4) = val; - asm volatile ( "mov %0,%%cr4" : : "r" (val) ); } /* Clear and set 'TS' bit respectively */
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor