aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig19
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c28
-rw-r--r--arch/x86/include/asm/coco.h1
-rw-r--r--arch/x86/include/asm/e820/api.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/sev.h2
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/kernel/apic/apic.c16
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/e820.c7
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/sev-shared.c6
-rw-r--r--arch/x86/mm/fault.c33
-rw-r--r--arch/x86/mm/mem_encrypt.c7
-rw-r--r--arch/x86/mm/numa.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c63
-rw-r--r--arch/x86/virt/svm/sev.c36
-rw-r--r--arch/x86/xen/enlighten_pv.c11
-rw-r--r--arch/x86/xen/smp_pv.c4
20 files changed, 130 insertions, 119 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4474bf32d0a4..928820e61cb5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -62,6 +62,7 @@ config X86
select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_INIT
+ select ARCH_CONFIGURES_CPU_MITIGATIONS
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64
@@ -2488,17 +2489,21 @@ config PREFIX_SYMBOLS
def_bool y
depends on CALL_PADDING && !CFI_CLANG
-menuconfig SPECULATION_MITIGATIONS
- bool "Mitigations for speculative execution vulnerabilities"
+menuconfig CPU_MITIGATIONS
+ bool "Mitigations for CPU vulnerabilities"
default y
help
- Say Y here to enable options which enable mitigations for
- speculative execution hardware vulnerabilities.
+ Say Y here to enable options which enable mitigations for hardware
+ vulnerabilities (usually related to speculative execution).
+ Mitigations can be disabled or restricted to SMT systems at runtime
+ via the "mitigations" kernel parameter.
- If you say N, all mitigations will be disabled. You really
- should know what you are doing to say so.
+ If you say N, all mitigations will be disabled. This CANNOT be
+ overridden at runtime.
-if SPECULATION_MITIGATIONS
+ Say 'Y', unless you really know what you are doing.
+
+if CPU_MITIGATIONS
config MITIGATION_PAGE_TABLE_ISOLATION
bool "Remove the kernel mapping in user mode"
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index a3c0df11d0e6..2fb7d53cf333 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
- /*
- * XXX: if access_ok, get_user, and put_user handled
- * sig_on_uaccess_err, this could go away.
- */
-
if (!access_ok((void __user *)ptr, size)) {
struct thread_struct *thread = &current->thread;
@@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
bool emulate_vsyscall(unsigned long error_code,
struct pt_regs *regs, unsigned long address)
{
- struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
- int prev_sig_on_uaccess_err;
long ret;
unsigned long orig_dx;
@@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code,
goto sigsegv;
}
- tsk = current;
-
/*
* Check for access_ok violations and find the syscall nr.
*
@@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code,
goto do_ret; /* skip requested */
/*
- * With a real vsyscall, page faults cause SIGSEGV. We want to
- * preserve that behavior to make writing exploits harder.
+ * With a real vsyscall, page faults cause SIGSEGV.
*/
- prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
- current->thread.sig_on_uaccess_err = 1;
-
ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
@@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code,
break;
}
- current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
-
check_fault:
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall fault (exploit attempt?)");
-
- /*
- * If we failed to generate a signal for any reason,
- * generate one here. (This should be impossible.)
- */
- if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
- !sigismember(&tsk->pending.signal, SIGSEGV)))
- goto sigsegv;
-
- return true; /* Don't emulate the ret. */
+ goto sigsegv;
}
regs->ax = ret;
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index c086699b0d0c..aa6c8f8ca958 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -25,6 +25,7 @@ u64 cc_mkdec(u64 val);
void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
+static const u64 cc_mask = 0;
static inline u64 cc_mkenc(u64 val)
{
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index e8f58ddd06d9..2e74a7f0e935 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -17,6 +17,7 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern void e820__print_table(char *who);
extern int e820__update_table(struct e820_table *table);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0b748ee16b3d..9abb8cc4cd47 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -148,7 +148,7 @@
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | \
_PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
- _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP)
+ _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP)
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
@@ -173,6 +173,7 @@ enum page_cache_mode {
};
#endif
+#define _PAGE_CC (_AT(pteval_t, cc_mask))
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 811548f131f4..78e51b0d6433 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -472,7 +472,6 @@ struct thread_struct {
unsigned long iopl_emul;
unsigned int iopl_warn:1;
- unsigned int sig_on_uaccess_err:1;
/*
* Protection Keys Register for Userspace. Loaded immediately on
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 7f57382afee4..93ed60080cfe 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
void kdump_sev_callback(void);
+void snp_fixup_e820_tables(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
+static inline void snp_fixup_e820_tables(void) {}
#endif
#endif
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 1be13b2dfe8b..64df897c0ee3 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -37,8 +37,6 @@ extern int phys_to_target_node(phys_addr_t start);
#define phys_to_target_node phys_to_target_node
extern int memory_add_physaddr_to_nid(u64 start);
#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
-extern int numa_fill_memblks(u64 start, u64 end);
-#define numa_fill_memblks numa_fill_memblks
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c342c4aa9c68..803dcfb0e346 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1771,7 +1771,7 @@ void x2apic_setup(void)
__x2apic_enable();
}
-static __init void apic_set_fixmap(void);
+static __init void apic_set_fixmap(bool read_apic);
static __init void x2apic_disable(void)
{
@@ -1793,7 +1793,12 @@ static __init void x2apic_disable(void)
}
__x2apic_disable();
- apic_set_fixmap();
+ /*
+ * Don't reread the APIC ID as it was already done from
+ * check_x2apic() and the APIC driver still is a x2APIC variant,
+ * which fails to do the read after x2APIC was disabled.
+ */
+ apic_set_fixmap(false);
}
static __init void x2apic_enable(void)
@@ -2057,13 +2062,14 @@ void __init init_apic_mappings(void)
}
}
-static __init void apic_set_fixmap(void)
+static __init void apic_set_fixmap(bool read_apic)
{
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
apic_mmio_base = APIC_BASE;
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
apic_mmio_base, mp_lapic_addr);
- apic_read_boot_cpu_id(false);
+ if (read_apic)
+ apic_read_boot_cpu_id(false);
}
void __init register_lapic_address(unsigned long address)
@@ -2073,7 +2079,7 @@ void __init register_lapic_address(unsigned long address)
mp_lapic_addr = address;
if (!x2apic_mode)
- apic_set_fixmap();
+ apic_set_fixmap(true);
}
/*
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index cb9eece55904..307302af0aee 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -459,8 +459,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
case 0x1a:
switch (c->x86_model) {
- case 0x00 ... 0x0f:
- case 0x20 ... 0x2f:
+ case 0x00 ... 0x2f:
case 0x40 ... 0x4f:
case 0x70 ... 0x7f:
setup_force_cpu_cap(X86_FEATURE_ZEN5);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 6f1b379e3b38..68b09f718f10 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -532,9 +532,10 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum
return __e820__range_update(e820_table, start, size, old_type, new_type);
}
-static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
+u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size,
+ enum e820_type old_type, enum e820_type new_type)
{
- return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
+ return __e820__range_update(t, start, size, old_type, new_type);
}
/* Remove a range of memory from the E820 table: */
@@ -806,7 +807,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
addr = memblock_phys_alloc(size, align);
if (addr) {
- e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
e820__update_table_kexec();
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7062b84dd467..6d3d20e3e43a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -139,7 +139,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
log_lvl, d3, d6, d7);
}
- if (cpu_feature_enabled(X86_FEATURE_OSPKE))
+ if (cr4 & X86_CR4_PKE)
printk("%sPKRU: %08x\n", log_lvl, read_pkru());
}
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 8b04958da5e7..b4f8fa0f722c 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -1203,12 +1203,14 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
break;
case SVM_EXIT_MONITOR:
- if (opcode == 0x010f && modrm == 0xc8)
+ /* MONITOR and MONITORX instructions generate the same error code */
+ if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa))
return ES_OK;
break;
case SVM_EXIT_MWAIT:
- if (opcode == 0x010f && modrm == 0xc9)
+ /* MWAIT and MWAITX instructions generate the same error code */
+ if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb))
return ES_OK;
break;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 622d12ec7f08..bba4e020dd64 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -723,39 +723,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
WARN_ON_ONCE(user_mode(regs));
/* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
- /*
- * Any interrupt that takes a fault gets the fixup. This makes
- * the below recursive fault logic only apply to a faults from
- * task context.
- */
- if (in_interrupt())
- return;
-
- /*
- * Per the above we're !in_interrupt(), aka. task context.
- *
- * In this case we need to make sure we're not recursively
- * faulting through the emulate_vsyscall() logic.
- */
- if (current->thread.sig_on_uaccess_err && signal) {
- sanitize_error_code(address, &error_code);
-
- set_signal_archinfo(address, error_code);
-
- if (si_code == SEGV_PKUERR) {
- force_sig_pkuerr((void __user *)address, pkey);
- } else {
- /* XXX: hwpoison faults will set the wrong code. */
- force_sig_fault(signal, si_code, (void __user *)address);
- }
- }
-
- /*
- * Barring that, we can do the fixup and be happy.
- */
+ if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
return;
- }
/*
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 6f3b3e028718..0a120d85d7bb 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -102,6 +102,13 @@ void __init mem_encrypt_setup_arch(void)
phys_addr_t total_mem = memblock_phys_mem_size();
unsigned long size;
+ /*
+ * Do RMP table fixups after the e820 tables have been setup by
+ * e820__memory_setup().
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ snp_fixup_e820_tables();
+
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 65e9a6e391c0..ce84ba86e69e 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -929,6 +929,8 @@ int memory_add_physaddr_to_nid(u64 start)
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
+
static int __init cmp_memblk(const void *a, const void *b)
{
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
@@ -1001,5 +1003,3 @@ int __init numa_fill_memblks(u64 start, u64 end)
}
return 0;
}
-
-#endif
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index df5fac428408..59cbc94b6e69 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1807,36 +1807,41 @@ populate_extable:
if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address:
- * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
- * src_reg is used as scratch for src_reg += insn->off and restored
- * after emit_ldx if necessary
+ * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
+ * and
+ * src_reg + insn->off < VSYSCALL_ADDR
*/
- u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
+ u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
u8 *end_of_jmp;
- /* At end of these emitted checks, insn->off will have been added
- * to src_reg, so no need to do relative load with insn->off offset
- */
- insn_off = 0;
+ /* movabsq r10, VSYSCALL_ADDR */
+ emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
+ (u32)(long)VSYSCALL_ADDR);
- /* movabsq r11, limit */
- EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
- EMIT((u32)limit, 4);
- EMIT(limit >> 32, 4);
+ /* mov src_reg, r11 */
+ EMIT_mov(AUX_REG, src_reg);
if (insn->off) {
- /* add src_reg, insn->off */
- maybe_emit_1mod(&prog, src_reg, true);
- EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
+ /* add r11, insn->off */
+ maybe_emit_1mod(&prog, AUX_REG, true);
+ EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
}
- /* cmp src_reg, r11 */
- maybe_emit_mod(&prog, src_reg, AUX_REG, true);
- EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
+ /* sub r11, r10 */
+ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+ EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
+
+ /* movabsq r10, limit */
+ emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
+ (u32)(long)limit);
+
+ /* cmp r10, r11 */
+ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+ EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
- /* if unsigned '>=', goto load */
- EMIT2(X86_JAE, 0);
+ /* if unsigned '>', goto load */
+ EMIT2(X86_JA, 0);
end_of_jmp = prog;
/* xor dst_reg, dst_reg */
@@ -1862,18 +1867,6 @@ populate_extable:
/* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx;
- if (insn->off && src_reg != dst_reg) {
- /* sub src_reg, insn->off
- * Restore src_reg after "add src_reg, insn->off" in prev
- * if statement. But if src_reg == dst_reg, emit_ldx
- * above already clobbered src_reg, so no need to restore.
- * If add src_reg, insn->off was unnecessary, no need to
- * restore either.
- */
- maybe_emit_1mod(&prog, src_reg, true);
- EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
- }
-
if (!bpf_prog->aux->extable)
break;
@@ -3473,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void)
{
return true;
}
+
+/* x86-64 JIT emits its own code to filter user addresses so return 0 here */
+u64 bpf_arch_uaddress_limit(void)
+{
+ return 0;
+}
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index ab0e8448bb6e..0ae10535c699 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -163,6 +163,42 @@ bool snp_probe_rmptable_info(void)
return true;
}
+static void __init __snp_fixup_e820_tables(u64 pa)
+{
+ if (IS_ALIGNED(pa, PMD_SIZE))
+ return;
+
+ /*
+ * Handle cases where the RMP table placement by the BIOS is not
+ * 2M aligned and the kexec kernel could try to allocate
+ * from within that chunk which then causes a fatal RMP fault.
+ *
+ * The e820_table needs to be updated as it is converted to
+ * kernel memory resources and used by KEXEC_FILE_LOAD syscall
+ * to load kexec segments.
+ *
+ * The e820_table_firmware needs to be updated as it is exposed
+ * to sysfs and used by the KEXEC_LOAD syscall to load kexec
+ * segments.
+ *
+ * The e820_table_kexec needs to be updated as it passed to
+ * the kexec-ed kernel.
+ */
+ pa = ALIGN_DOWN(pa, PMD_SIZE);
+ if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
+ pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
+ e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ }
+}
+
+void __init snp_fixup_e820_tables(void)
+{
+ __snp_fixup_e820_tables(probed_rmp_base);
+ __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
+}
+
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ace2eb054053..9ba53814ed6a 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -219,13 +219,21 @@ static __read_mostly unsigned int cpuid_leaf5_edx_val;
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
- unsigned maskebx = ~0;
+ unsigned int maskebx = ~0;
+ unsigned int or_ebx = 0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
switch (*ax) {
+ case 0x1:
+ /* Replace initial APIC ID in bits 24-31 of EBX. */
+ /* See xen_pv_smp_config() for related topology preparations. */
+ maskebx = 0x00ffffff;
+ or_ebx = smp_processor_id() << 24;
+ break;
+
case CPUID_MWAIT_LEAF:
/* Synthesize the values.. */
*ax = 0;
@@ -248,6 +256,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
: "0" (*ax), "2" (*cx));
*bx &= maskebx;
+ *bx |= or_ebx;
}
static bool __init xen_check_mwait(void)
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 27d1a5b7f571..ac41d83b38d3 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -154,9 +154,9 @@ static void __init xen_pv_smp_config(void)
u32 apicid = 0;
int i;
- topology_register_boot_apic(apicid++);
+ topology_register_boot_apic(apicid);
- for (i = 1; i < nr_cpu_ids; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
topology_register_apic(apicid++, CPU_ACPIID_INVALID, true);
/* Pretend to be a proper enumerated system */