aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorGravatar Ingo Molnar <mingo@kernel.org> 2024-02-27 10:09:49 +0100
committerGravatar Ingo Molnar <mingo@kernel.org> 2024-02-27 10:09:49 +0100
commit9b9c280b9af2aa851d83e7d0b79f36a3d869d745 (patch)
treeb61cf8566669d9615234e87696a833ee487e2655 /arch/x86
parentx86/apic: Build the x86 topology enumeration functions on UP APIC builds too (diff)
parentx86/bugs: Use fixed addressing for VERW operand (diff)
downloadlinux-9b9c280b9af2aa851d83e7d0b79f36a3d869d745.tar.gz
linux-9b9c280b9af2aa851d83e7d0b79f36a3d869d745.tar.bz2
linux-9b9c280b9af2aa851d83e7d0b79f36a3d869d745.zip
Merge branch 'x86/urgent' into x86/apic, to resolve conflicts
Conflicts: arch/x86/kernel/cpu/common.c arch/x86/kernel/cpu/intel.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/entry/entry.S23
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/entry/entry_64_compat.S1
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/entry-common.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h25
-rw-r--r--arch/x86/kernel/cpu/bugs.c15
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c178
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/run_flags.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S9
-rw-r--r--arch/x86/kvm/vmx/vmx.c20
-rw-r--r--arch/x86/kvm/x86.c20
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/xen/smp.c12
19 files changed, 221 insertions, 146 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2264db14a25d..da8f3caf2781 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
- ifeq ($(CONFIG_STACKPROTECTOR),y)
- ifeq ($(CONFIG_SMP),y)
+ ifeq ($(CONFIG_STACKPROTECTOR),y)
+ ifeq ($(CONFIG_SMP),y)
KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
- else
+ else
KBUILD_CFLAGS += -mstack-protector-guard=global
- endif
endif
+ endif
else
BITS := 64
UTS_MACHINE := x86_64
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 8c8d38f0cb1d..003379049924 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@
#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
.pushsection .noinstr.text, "ax"
@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);
.popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index c73047bf9f4b..fba427646805 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
+ CLEAR_CPU_BUFFERS
/*
* Return back to the vDSO, which will pop ecx and edx.
@@ -954,6 +955,7 @@ restore_all_switch_stack:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
+ CLEAR_CPU_BUFFERS
.Lirq_return:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */
call exc_nmi
+ CLEAR_CPU_BUFFERS
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c40f89ab1b4c..9bb485977629 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -161,6 +161,7 @@ syscall_return_via_sysret:
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
swapgs
+ CLEAR_CPU_BUFFERS
sysretq
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
.Lswapgs_and_iret:
swapgs
+ CLEAR_CPU_BUFFERS
/* Assert that the IRET frame indicates user mode. */
testb $3, 8(%rsp)
jnz .Lnative_iret
@@ -723,6 +725,8 @@ native_irq_return_ldt:
*/
popq %rax /* Restore user RAX */
+ CLEAR_CPU_BUFFERS
+
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
@@ -1450,6 +1454,12 @@ nmi_restore:
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
+ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
+ * NMI in kernel after user state is restored. For an unprivileged user
+ * these conditions are hard to meet.
+ */
+
+ /*
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
* cannot result in a fault. Similarly, we don't need to worry
@@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore)
UNWIND_HINT_END_OF_STACK
ENDBR
mov $-ENOSYS, %eax
+ CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(entry_SYSCALL32_ignore)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index de94e2e84ecc..eabf48c4d4b4 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
xorl %r9d, %r9d
xorl %r10d, %r10d
swapgs
+ CLEAR_CPU_BUFFERS
sysretl
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index fdf723b6f6d0..2b62cdd8dd12 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -95,7 +95,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce8f50192ae3..7e523bb3d2d3 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
- mds_user_clear_cpu_buffers();
amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 262e65539f83..ab19c7f1167b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -315,6 +315,17 @@
#endif
.endm
+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+ ALTERNATIVE "", __stringify(verw mds_verw_sel), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -529,13 +540,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+extern u16 mds_verw_sel;
+
#include <asm/segment.h>
/**
@@ -562,17 +574,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
- if (static_branch_likely(&mds_user_clear))
- mds_clear_cpu_buffers();
-}
-
-/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bb0ab8466b91..48d049cd74e7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-/* Control MDS CPU buffer clear before returning to user space */
-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
-EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void)
*/
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM)))
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
static_branch_enable(&mmio_stale_data_clear);
@@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off())
return;
- if (!static_key_enabled(&mds_user_clear))
+ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out;
/*
- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
- * mitigation, if necessary.
+ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
+ * Stale Data mitigation, if necessary.
*/
if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 05e0b31f75e9..087d21734286 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1543,6 +1543,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
get_cpu_vendor(c);
get_cpu_cap(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
cpu_parse_early_param();
cpu_init_topology(c);
@@ -1557,11 +1558,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_bsp_init(c);
} else {
setup_clear_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
cpu_init_topology(c);
}
- get_cpu_address_sizes(c);
-
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
cpu_set_bug_bits(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1bb37a69d837..be30d7fa2e66 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
return false;
}
+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme_early(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -315,6 +399,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
}
check_memory_type_self_snoop_errata(c);
+
+ /*
+ * Adjust the number of physical bits early because it affects the
+ * valid bits of the MTRR mask registers.
+ */
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme_early(c);
}
static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -475,90 +566,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-#define MSR_IA32_TME_ACTIVATE 0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
-
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
- pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
- return;
- }
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
-
- /*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
- */
- c->x86_phys_bits -= keyid_bits;
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -677,9 +684,6 @@ static void init_intel(struct cpuinfo_x86 *c)
init_ia32_feat_ctl(c);
- if (cpu_has(c, X86_FEATURE_TME))
- detect_tme(c);
-
init_intel_misc_features(c);
split_lock_init();
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 17e955ab69fe..3082cf24b69e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -563,9 +563,6 @@ nmi_restart:
}
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
-
- if (user_mode(regs))
- mds_user_clear_cpu_buffers();
}
#if IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a6216c874729..315c7c2ba89b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -71,7 +71,7 @@ static int fixed_pmc_events[] = {
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
{
struct kvm_pmc *pmc;
- u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
+ u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
int i;
pmu->fixed_ctr_ctrl = data;
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index edc3f16cc189..6a9bfdfbb6e5 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
-#define VMX_RUN_VMRESUME (1 << 0)
-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 906ecd001511..2bfbf758d061 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- test $VMX_RUN_VMRESUME, %ebx
+ bt $VMX_RUN_VMRESUME_SHIFT, %ebx
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -161,8 +161,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */
- jz .Lvmlaunch
+ /* Clobbers EFLAGS.ZF */
+ CLEAR_CPU_BUFFERS
+
+ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+ jnc .Lvmlaunch
/*
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1111d9d08903..88a4ff200d04 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -388,7 +388,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+ /*
+ * Disable VERW's behavior of clearing CPU buffers for the guest if the
+ * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled
+ * the mitigation. Disabling the clearing behavior provides a
+ * performance boost for guests that aren't aware that manually clearing
+ * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry
+ * and VM-Exit.
+ */
+ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
+ (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_TAA);
@@ -7224,11 +7233,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_state_enter_irqoff();
- /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ /*
+ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
+ * mitigation for MDS is done late in VMentry and is still
+ * executed in spite of L1D Flush. This is because an extra VERW
+ * should not matter much after the big hammer L1D Flush.
+ */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mds_user_clear))
- mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf10a9073a09..48a61d283406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1704,22 +1704,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
struct kvm_msr_entry msr;
int r;
+ /* Unconditionally clear the output for simplicity */
+ msr.data = 0;
msr.index = index;
r = kvm_get_msr_feature(&msr);
- if (r == KVM_MSR_RET_INVALID) {
- /* Unconditionally clear the output for simplicity */
- *data = 0;
- if (kvm_msr_ignored_check(index, 0, false))
- r = 0;
- }
-
- if (r)
- return r;
+ if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
+ r = 0;
*data = msr.data;
- return 0;
+ return r;
}
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -2511,7 +2506,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
}
#ifdef CONFIG_X86_64
-static inline int gtod_is_based_on_tsc(int mode)
+static inline bool gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
@@ -5458,7 +5453,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
vcpu->arch.nmi_pending = 0;
atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
- kvm_make_request(KVM_REQ_NMI, vcpu);
+ if (events->nmi.pending)
+ kvm_make_request(KVM_REQ_NMI, vcpu);
}
static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..f50cc210a981 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_large(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the begining: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 114b362cb7af..935771726f9c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu)
char *resched_name, *callfunc_name, *debug_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
+ if (!resched_name)
+ goto fail_mem;
per_cpu(xen_resched_irq, cpu).name = resched_name;
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
cpu,
@@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_resched_irq, cpu).irq = rc;
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
cpu,
@@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu)
if (!xen_fifo_events) {
debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+ if (!debug_name)
+ goto fail_mem;
+
per_cpu(xen_debug_irq, cpu).name = debug_name;
rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
xen_debug_interrupt,
@@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu)
}
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
+
per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
cpu,
@@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu)
return 0;
+ fail_mem:
+ rc = -ENOMEM;
fail:
xen_smp_intr_free(cpu);
return rc;