From 6efa20e49b9cb1db1ab66870cc37323474a75a13 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 19 Jul 2013 11:51:31 -0400 Subject: xen: Support 64-bit PV guest receiving NMIs This is based on a patch that Zhenzhong Duan had sent - which was missing some of the remaining pieces. The kernel has the logic to handle Xen-type-exceptions using the paravirt interface in the assembler code (see PARAVIRT_ADJUST_EXCEPTION_FRAME - pv_irq_ops.adjust_exception_frame and and INTERRUPT_RETURN - pv_cpu_ops.iret). That means the nmi handler (and other exception handlers) use the hypervisor iret. The other changes that would be neccessary for this would be to translate the NMI_VECTOR to one of the entries on the ipi_vector and make xen_send_IPI_mask_allbutself use different events. Fortunately for us commit 1db01b4903639fcfaec213701a494fe3fb2c490b (xen: Clean up apic ipi interface) implemented this and we piggyback on the cleanup such that the apic IPI interface will pass the right vector value for NMI. With this patch we can trigger NMIs within a PV guest (only tested x86_64). For this to work with normal PV guests (not initial domain) we need the domain to be able to use the APIC ops - they are already implemented to use the Xen event channels. For that to be turned on in a PV domU we need to remove the masking of X86_FEATURE_APIC. Incidentally that means kgdb will also now work within a PV guest without using the 'nokgdbroundup' workaround. Note that the 32-bit version is different and this patch does not enable that. CC: Lisa Nguyen CC: Ben Guthro CC: Zhenzhong Duan Signed-off-by: Konrad Rzeszutek Wilk [v1: Fixed up per David Vrabel comments] Reviewed-by: Ben Guthro Reviewed-by: David Vrabel --- arch/x86/include/asm/xen/events.h | 1 + arch/x86/xen/enlighten.c | 13 ++++++++----- arch/x86/xen/setup.c | 13 +++++++++++-- arch/x86/xen/smp.c | 6 ++++++ 4 files changed, 26 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index ca842f2769ef..608a79d5a466 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -7,6 +7,7 @@ enum ipi_vector { XEN_CALL_FUNCTION_SINGLE_VECTOR, XEN_SPIN_UNLOCK_VECTOR, XEN_IRQ_WORK_VECTOR, + XEN_NMI_VECTOR, XEN_NR_IPIS, }; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 193097ef3d7d..b5a22fa7e249 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void) if (!xen_initial_domain()) cpuid_leaf1_edx_mask &= - ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ - (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + ~((1 << X86_FEATURE_ACPI)); /* disable ACPI */ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); @@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, addr = (unsigned long)xen_int3; else if (addr == (unsigned long)stack_segment) addr = (unsigned long)xen_stack_segment; - else if (addr == (unsigned long)double_fault || - addr == (unsigned long)nmi) { + else if (addr == (unsigned long)double_fault) { /* Don't need to handle these */ return 0; #ifdef CONFIG_X86_MCE @@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, */ ; #endif - } else { + } else if (addr == (unsigned long)nmi) + /* + * Use the native version as well. + */ + ; + else { /* Some other trap using IST? */ if (WARN_ON(val->ist != 0)) return 0; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 056d11faef21..403f5cc5415b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -33,6 +33,9 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +#ifdef CONFIG_X86_64 +extern const char nmi[]; +#endif extern void xen_sysenter_target(void); extern void xen_syscall_target(void); extern void xen_syscall32_target(void); @@ -525,7 +528,13 @@ void xen_enable_syscall(void) } #endif /* CONFIG_X86_64 */ } - +void __cpuinit xen_enable_nmi(void) +{ +#ifdef CONFIG_X86_64 + if (register_callback(CALLBACKTYPE_nmi, nmi)) + BUG(); +#endif +} void __init xen_arch_setup(void) { xen_panic_handler_init(); @@ -543,7 +552,7 @@ void __init xen_arch_setup(void) xen_enable_sysenter(); xen_enable_syscall(); - + xen_enable_nmi(); #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index ca92754eb846..22759c6d309f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -572,6 +572,12 @@ static inline int xen_map_vector(int vector) case IRQ_WORK_VECTOR: xen_vector = XEN_IRQ_WORK_VECTOR; break; +#ifdef CONFIG_X86_64 + case NMI_VECTOR: + case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */ + xen_vector = XEN_NMI_VECTOR; + break; +#endif default: xen_vector = -1; printk(KERN_ERR "xen: vector 0x%x is not implemented\n", -- cgit v1.2.3 From 65a45fa2f640f72757ef00f2bc83f9654d65a62b Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 19 Jul 2013 15:51:59 +0100 Subject: xen/p2m: avoid unneccesary TLB flush in m2p_remove_override() In m2p_remove_override() when removing the grant map from the kernel mapping and replacing with a mapping to the original page, the grant unmap will already have flushed the TLB and it is not necessary to do it again after updating the mapping. Signed-off-by: David Vrabel Cc: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Stefano Stabellini --- arch/x86/xen/p2m.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95fb2aa5927e..74672eeac881 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -1003,7 +1003,6 @@ int m2p_remove_override(struct page *page, set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); - __flush_tlb_single(address); kmap_op->host_addr = 0; } } -- cgit v1.2.3 From fb58e30091c74967f6b8e98b3c1f292782f92b41 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 15 Aug 2013 13:21:04 +0100 Subject: x86/xen: disable premption when enabling local irqs If CONFIG_PREEMPT is enabled then xen_enable_irq() (and xen_restore_fl()) could be preempted and rescheduled on a different VCPU in between the clear of the mask and the check for pending events. This may result in events being lost as the upcall will check for pending events on the wrong VCPU. Fix this by disabling preemption around the unmask and check for events. Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/irq.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 01a4dc015ae1..0da7f863056f 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags) /* convert from IF type flag */ flags = !(flags & X86_EFLAGS_IF); - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ + /* See xen_irq_enable() for why preemption must be disabled. */ preempt_disable(); vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; - preempt_enable_no_resched(); - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ if (flags == 0) { - preempt_check_resched(); barrier(); /* unmask then check (avoid races) */ if (unlikely(vcpu->evtchn_upcall_pending)) xen_force_evtchn_callback(); - } + preempt_enable(); + } else + preempt_enable_no_resched(); } PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); @@ -82,10 +77,12 @@ static void xen_irq_enable(void) { struct vcpu_info *vcpu; - /* We don't need to worry about being preempted here, since - either a) interrupts are disabled, so no preemption, or b) - the caller is confused and is trying to re-enable interrupts - on an indeterminate processor. */ + /* + * We may be preempted as soon as vcpu->evtchn_upcall_mask is + * cleared, so disable preemption to ensure we check for + * events on the VCPU we are still running on. + */ + preempt_disable(); vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; @@ -96,6 +93,8 @@ static void xen_irq_enable(void) barrier(); /* unmask then check (avoid races) */ if (unlikely(vcpu->evtchn_upcall_pending)) xen_force_evtchn_callback(); + + preempt_enable(); } PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); -- cgit v1.2.3 From e201bfcc5c4fe6d58ce2b21fb4f4c2a2f0ab7ab8 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 22 Jul 2013 15:29:00 +0100 Subject: x86/xen: during early setup, only 1:1 map the ISA region During early setup, when the reserved regions and MMIO holes are being setup as 1:1 in the p2m, clear any mappings instead of making them 1:1 (execept for the ISA region which is expected to be mapped). This fixes a regression introduced in 3.5 by 83d51ab473dd (xen/setup: update VA mapping when releasing memory during setup) which caused hosts with tboot to fail to boot. tboot marks a region in the e820 map as unusable and the dom0 kernel would attempt to map this region and Xen does not permit unusable regions to be mapped by guests. (XEN) 0000000000000000 - 0000000000060000 (usable) (XEN) 0000000000060000 - 0000000000068000 (reserved) (XEN) 0000000000068000 - 000000000009e000 (usable) (XEN) 0000000000100000 - 0000000000800000 (usable) (XEN) 0000000000800000 - 0000000000972000 (unusable) tboot marked this region as unusable. (XEN) 0000000000972000 - 00000000cf200000 (usable) (XEN) 00000000cf200000 - 00000000cf38f000 (reserved) (XEN) 00000000cf38f000 - 00000000cf3ce000 (ACPI data) (XEN) 00000000cf3ce000 - 00000000d0000000 (reserved) (XEN) 00000000e0000000 - 00000000f0000000 (reserved) (XEN) 00000000fe000000 - 0000000100000000 (reserved) (XEN) 0000000100000000 - 0000000630000000 (usable) Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/setup.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 403f5cc5415b..6243651623d5 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -218,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk( unsigned long pfn; /* - * If the PFNs are currently mapped, the VA mapping also needs - * to be updated to be 1:1. + * If the PFNs are currently mapped, clear the mappings + * (except for the ISA region which must be 1:1 mapped) to + * release the refcounts (in Xen) on the original frames. */ - for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) + for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { + pte_t pte = __pte_ma(0); + + if (pfn < PFN_UP(ISA_END_ADDRESS)) + pte = mfn_pte(pfn, PAGE_KERNEL_IO); + (void)HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - mfn_pte(pfn, PAGE_KERNEL_IO), 0); + (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0); + } if (start_pfn < nr_pages) *released += xen_release_chunk( -- cgit v1.2.3 From ee0726407feaf504dff304fb603652fb2d778b42 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 23 Jul 2013 17:23:54 +0000 Subject: xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original mapping GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0 mapping instead of reinstating the original mapping. Doing so separately would be racy. To unmap a grant and reinstate the original mapping atomically we use GNTTABOP_unmap_and_replace. GNTTABOP_unmap_and_replace doesn't work with GNTMAP_contains_pte, so don't use it for kmaps. GNTTABOP_unmap_and_replace zeroes the mapping passed in new_addr so we have to reinstate it, however that is a per-cpu mapping only used for balloon scratch pages, so we can be sure that it's not going to be accessed while the mapping is not valid. Signed-off-by: Stefano Stabellini Reviewed-by: David Vrabel Acked-by: Konrad Rzeszutek Wilk CC: alex@alex.org.uk CC: dcrisan@flexiant.com [v1: Konrad fixed up the conflicts] Conflicts: arch/x86/xen/p2m.c --- arch/x86/xen/p2m.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 74672eeac881..0d4ec35895d4 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -161,6 +161,7 @@ #include #include #include +#include #include #include "multicalls.h" @@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page, if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; - struct gnttab_unmap_grant_ref *unmap_op; + struct gnttab_unmap_and_replace *unmap_op; + struct page *scratch_page = get_balloon_scratch_page(); + unsigned long scratch_page_address = (unsigned long) + __va(page_to_pfn(scratch_page) << PAGE_SHIFT); /* * It might be that we queued all the m2p grant table @@ -990,20 +994,25 @@ int m2p_remove_override(struct page *page, } mcs = xen_mc_entry( - sizeof(struct gnttab_unmap_grant_ref)); + sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; unmap_op->host_addr = kmap_op->host_addr; + unmap_op->new_addr = scratch_page_address; unmap_op->handle = kmap_op->handle; - unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, - GNTTABOP_unmap_grant_ref, unmap_op, 1); + GNTTABOP_unmap_and_replace, unmap_op, 1); xen_mc_issue(PARAVIRT_LAZY_MMU); - set_pte_at(&init_mm, address, ptep, - pfn_pte(pfn, PAGE_KERNEL)); + mcs = __xen_mc_entry(0); + MULTI_update_va_mapping(mcs.mc, scratch_page_address, + pfn_pte(page_to_pfn(get_balloon_scratch_page()), + PAGE_KERNEL_RO), 0); + xen_mc_issue(PARAVIRT_LAZY_MMU); + kmap_op->host_addr = 0; + put_balloon_scratch_page(); } } -- cgit v1.2.3 From 669b0ae961e87c824233475e987b2d39996d4849 Mon Sep 17 00:00:00 2001 From: Vaughan Cao Date: Fri, 16 Aug 2013 16:10:56 +0800 Subject: xen/pvhvm: Initialize xen panic handler for PVHVM guests kernel use callback linked in panic_notifier_list to notice others when panic happens. NORET_TYPE void panic(const char * fmt, ...){ ... atomic_notifier_call_chain(&panic_notifier_list, 0, buf); } When Xen becomes aware of this, it will call xen_reboot(SHUTDOWN_crash) to send out an event with reason code - SHUTDOWN_crash. xen_panic_handler_init() is defined to register on panic_notifier_list but we only call it in xen_arch_setup which only be called by PV, this patch is necessary for PVHVM. Without this patch, setting 'on_crash=coredump-restart' in PVHVM guest config file won't lead a vmcore to be generate when the guest panics. It can be reproduced with 'echo c > /proc/sysrq-trigger'. Signed-off-by: Vaughan Cao Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Joe Jin --- arch/x86/xen/enlighten.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b5a22fa7e249..15939e872db2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1713,6 +1713,8 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_shared_info(); + xen_panic_handler_init(); + if (xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; xen_hvm_smp_init(); -- cgit v1.2.3