From adfaf18334cbf16c563e4ebc67e968ea1b17ec51 Mon Sep 17 00:00:00 2001 From: raymond pang Date: Tue, 22 Aug 2017 23:44:47 +0800 Subject: x86/ioapic: Print the IRTE's index field correctly when enabling INTR When enabling interrupt remap, IOAPIC's RTE contains the interrupt_index field of IRTE. This field is composed of the ->index and the ->index2 members of 'struct IR_IO_APIC_route_entry' - but what we print out currently only uses ->index. Fix it. Signed-off-by: Raymond Pang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joro@8bytes.org Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/CAHG4imNDzpDyOVi7MByVrLQ%3DQFuOVqpzJ5F-Xs5z6OZphubj-Q@mail.gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 237e9c2341c7..70e48aa6af98 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1243,7 +1243,7 @@ static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries) entry.vector, entry.irr, entry.delivery_status); if (ir_entry->format) printk(KERN_DEBUG "%s, remapped, I(%04X), Z(%X)\n", - buf, (ir_entry->index << 15) | ir_entry->index, + buf, (ir_entry->index2 << 15) | ir_entry->index, ir_entry->zero); else printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", -- cgit v1.2.3 From 69de72ec6db950c436e36b94cf05eeb9e11ee144 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:16 +0200 Subject: x86/irq: Remove vector_used_by_percpu_irq() Last user (lguest) is gone. Remove it. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.201432430@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq.h | 1 - arch/x86/kernel/irq.c | 2 -- arch/x86/kernel/irqinit.c | 12 ------------ 3 files changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 668cca540025..ce991689843f 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -44,7 +44,6 @@ extern __visible unsigned int do_IRQ(struct pt_regs *regs); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); -extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 4ed0aba8dbc8..e6073a0ce77e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -346,8 +346,6 @@ __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) set_irq_regs(old_regs); } -EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); - #ifdef CONFIG_HOTPLUG_CPU /* These two declarations are only used in check_irq_vectors_for_cpu_disable() diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index c7fd18526c3e..6537cfe2cb1d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -55,18 +55,6 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... NR_VECTORS - 1] = VECTOR_UNUSED, }; -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (!IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) - return 1; - } - - return 0; -} - void __init init_ISA_irqs(void) { struct irq_chip *chip = legacy_pic->chip; -- cgit v1.2.3 From fa4ab5774dfe58fd5e99462f625253659d41df09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:17 +0200 Subject: x86/irq: Unexport used_vectors[] No modular users. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.278375986@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bf54309b85da..556f8f53965d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -83,7 +83,6 @@ gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss; gate_desc idt_table[NR_VECTORS] __page_aligned_bss; DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); static inline void cond_local_irq_enable(struct pt_regs *regs) { -- cgit v1.2.3 From 05161b9cbe553c41cf775ac41bb5120d94347e5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:18 +0200 Subject: x86/irq: Get rid of the 'first_system_vector' indirection bogosity This variable is beyond pointless. Nothing allocates a vector via alloc_gate() below FIRST_SYSTEM_VECTOR. So nothing can change first_system_vector. If there is a need for a gate below FIRST_SYSTEM_VECTOR then it can be added to the vector defines and FIRST_SYSTEM_VECTOR can be adjusted accordingly. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.357109735@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 5 ++--- arch/x86/kernel/apic/apic.c | 2 -- arch/x86/kernel/apic/vector.c | 2 +- arch/x86/kernel/irq.c | 2 +- arch/x86/kernel/irqinit.c | 5 +---- 5 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index d0a21b12dd58..a7f36ab1c07d 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -482,16 +483,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr, 0, 0, __KERNEL_CS); \ } while (0) -extern int first_system_vector; /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ extern unsigned long used_vectors[]; static inline void alloc_system_vector(int vector) { + BUG_ON(vector < FIRST_SYSTEM_VECTOR); if (!test_bit(vector, used_vectors)) { set_bit(vector, used_vectors); - if (first_system_vector > vector) - first_system_vector = vector; } else { BUG(); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 98b3dd8cf2bf..8996ef1eb39f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -177,8 +177,6 @@ static int disable_apic_timer __initdata; int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); -int first_system_vector = FIRST_SYSTEM_VECTOR; - /* * Debug level, exported for io_apic.c */ diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index b3af457ed667..88c214e75a6b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -166,7 +166,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, offset = current_offset; next: vector += 16; - if (vector >= first_system_vector) { + if (vector >= FIRST_SYSTEM_VECTOR) { offset = (offset + 1) % 16; vector = FIRST_EXTERNAL_VECTOR + offset; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e6073a0ce77e..019d0ac8ed3a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -429,7 +429,7 @@ int check_irq_vectors_for_cpu_disable(void) * this w/o holding vector_lock. */ for (vector = FIRST_EXTERNAL_VECTOR; - vector < first_system_vector; vector++) { + vector < FIRST_SYSTEM_VECTOR; vector++) { if (!test_bit(vector, used_vectors) && IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) { if (++count == this_count) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 6537cfe2cb1d..4e5f8c022fdd 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -169,10 +169,7 @@ void __init native_init_IRQ(void) * 'special' SMP interrupts) */ i = FIRST_EXTERNAL_VECTOR; -#ifndef CONFIG_X86_LOCAL_APIC -#define first_system_vector NR_VECTORS -#endif - for_each_clear_bit_from(i, used_vectors, first_system_vector) { + for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ set_intr_gate(i, irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR)); -- cgit v1.2.3 From 9aec458ff07323f6593fd718cc33b1bca2f64597 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:19 +0200 Subject: x86/irq: Remove duplicated used_vectors definition Also remove the unparseable comment in the other place while at it. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.436711634@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 1 - arch/x86/include/asm/irq.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a7f36ab1c07d..71094f208673 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -483,7 +483,6 @@ static inline void _set_gate(int gate, unsigned type, void *addr, 0, 0, __KERNEL_CS); \ } while (0) -/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ extern unsigned long used_vectors[]; static inline void alloc_system_vector(int vector) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ce991689843f..9958ceea2fa3 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -42,9 +42,6 @@ extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern __visible unsigned int do_IRQ(struct pt_regs *regs); -/* Interrupt vector management */ -extern DECLARE_BITMAP(used_vectors, NR_VECTORS); - extern void init_ISA_irqs(void); #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.3 From f7eaf6e00fd581043bb540dfe865f1d81769b189 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:20 +0200 Subject: x86/boot: Move EISA setup to a separate file EISA has absolutely nothing to do with traps, so move it out of traps.c into its own eisa.c file. Furthermore, the EISA bus detection does not need to run during very early boot, it's good enough to run it before the EISA bus and drivers are initialized. I.e. instead of calling it from the very early trap_init() code, make it a subsys_initcall(). Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.515322409@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/eisa.c | 18 ++++++++++++++++++ arch/x86/kernel/traps.c | 13 ------------- 3 files changed, 19 insertions(+), 13 deletions(-) create mode 100644 arch/x86/kernel/eisa.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 287eac7d207f..6ab5fbfa71b1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o +obj-$(CONFIG_EISA) += eisa.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c new file mode 100644 index 000000000000..881f9236ebff --- /dev/null +++ b/arch/x86/kernel/eisa.c @@ -0,0 +1,18 @@ +/* + * EISA specific code + * + * This file is licensed under the GPL V2 + */ +#include +#include + +static __init int eisa_bus_probe(void) +{ + void __iomem *p = ioremap(0x0FFFD9, 4); + + if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) + EISA_bus = 1; + iounmap(p); + return 0; +} +subsys_initcall(eisa_bus_probe); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 556f8f53965d..309532451d94 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -38,11 +38,6 @@ #include #include -#ifdef CONFIG_EISA -#include -#include -#endif - #if defined(CONFIG_EDAC) #include #endif @@ -969,14 +964,6 @@ void __init trap_init(void) { int i; -#ifdef CONFIG_EISA - void __iomem *p = early_ioremap(0x0FFFD9, 4); - - if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) - EISA_bus = 1; - early_iounmap(p, 4); -#endif - set_intr_gate(X86_TRAP_DE, divide_error); set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); /* int4 can be called from all */ -- cgit v1.2.3 From 2feb1b316d48004d905278c02a55902cab0be8be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:21 +0200 Subject: x86/tracing: Introduce a static key for exception tracing Switching the IDT just for avoiding tracepoints creates a completely impenetrable macro/inline/ifdef mess. There is no point in avoiding tracepoints for most of the traps/exceptions. For the more expensive tracepoints, like pagefaults, this can be handled with an explicit static key. Preparatory patch to remove the tracing IDT. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.593094539@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/trace/common.h | 15 +++++++++++++++ arch/x86/include/asm/trace/exceptions.h | 4 +--- arch/x86/include/asm/trace/irq_vectors.h | 4 +--- arch/x86/kernel/tracepoint.c | 9 ++++++++- 4 files changed, 25 insertions(+), 7 deletions(-) create mode 100644 arch/x86/include/asm/trace/common.h (limited to 'arch') diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h new file mode 100644 index 000000000000..b1eb7b18ee8a --- /dev/null +++ b/arch/x86/include/asm/trace/common.h @@ -0,0 +1,15 @@ +#ifndef _ASM_TRACE_COMMON_H +#define _ASM_TRACE_COMMON_H + +extern int trace_irq_vector_regfunc(void); +extern void trace_irq_vector_unregfunc(void); + +#ifdef CONFIG_TRACING +DECLARE_STATIC_KEY_FALSE(trace_irqvectors_key); +#define trace_irqvectors_enabled() \ + static_branch_unlikely(&trace_irqvectors_key) +#else +static inline bool trace_irqvectors_enabled(void) { return false; } +#endif + +#endif diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index 2422b14c50a7..960a5b50ac3b 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -5,9 +5,7 @@ #define _TRACE_PAGE_FAULT_H #include - -extern int trace_irq_vector_regfunc(void); -extern void trace_irq_vector_unregfunc(void); +#include DECLARE_EVENT_CLASS(x86_exceptions, diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 32dd6a9e343c..7825b4426e7e 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -5,9 +5,7 @@ #define _TRACE_IRQ_VECTORS_H #include - -extern int trace_irq_vector_regfunc(void); -extern void trace_irq_vector_unregfunc(void); +#include DECLARE_EVENT_CLASS(x86_irq_vector, diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 15515132bf0d..dd4aa04bb95c 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -4,9 +4,11 @@ * Copyright (C) 2013 Seiji Aguchi * */ +#include +#include + #include #include -#include atomic_t trace_idt_ctr = ATOMIC_INIT(0); struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, @@ -15,6 +17,7 @@ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, /* No need to be aligned, but done to keep all IDTs defined the same way. */ gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss; +DEFINE_STATIC_KEY_FALSE(trace_irqvectors_key); static int trace_irq_vector_refcount; static DEFINE_MUTEX(irq_vector_mutex); @@ -36,6 +39,8 @@ static void switch_idt(void *arg) int trace_irq_vector_regfunc(void) { + static_branch_inc(&trace_irqvectors_key); + mutex_lock(&irq_vector_mutex); if (!trace_irq_vector_refcount) { set_trace_idt_ctr(1); @@ -49,6 +54,8 @@ int trace_irq_vector_regfunc(void) void trace_irq_vector_unregfunc(void) { + static_branch_dec(&trace_irqvectors_key); + mutex_lock(&irq_vector_mutex); trace_irq_vector_refcount--; if (!trace_irq_vector_refcount) { -- cgit v1.2.3 From 11a7ffb01703c3bbb1e9b968893f4487a1b0b5a8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:22 +0200 Subject: x86/traps: Simplify pagefault tracing logic Make use of the new irqvector tracing static key and remove the duplicated trace_do_pagefault() implementation. If irq vector tracing is disabled, then the overhead of this is a single NOP5, which is a reasonable tradeoff to avoid duplicated code and the unholy macro mess. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.672965407@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 8 -------- arch/x86/entry/entry_64.S | 13 +----------- arch/x86/include/asm/traps.h | 10 +-------- arch/x86/kernel/kvm.c | 2 +- arch/x86/mm/fault.c | 49 ++++++++++++-------------------------------- 5 files changed, 16 insertions(+), 66 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 48ef7bb32c42..0092da1c056f 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -891,14 +891,6 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, #endif /* CONFIG_HYPERV */ -#ifdef CONFIG_TRACING -ENTRY(trace_page_fault) - ASM_CLAC - pushl $trace_do_page_fault - jmp common_exception -END(trace_page_fault) -#endif - ENTRY(page_fault) ASM_CLAC pushl $do_page_fault diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4dbb336a1fdd..2731b9471770 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -918,17 +918,6 @@ ENTRY(\sym) END(\sym) .endm -#ifdef CONFIG_TRACING -.macro trace_idtentry sym do_sym has_error_code:req -idtentry trace(\sym) trace(\do_sym) has_error_code=\has_error_code -idtentry \sym \do_sym has_error_code=\has_error_code -.endm -#else -.macro trace_idtentry sym do_sym has_error_code:req -idtentry \sym \do_sym has_error_code=\has_error_code -.endm -#endif - idtentry divide_error do_divide_error has_error_code=0 idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 @@ -1096,7 +1085,7 @@ idtentry xen_stack_segment do_stack_segment has_error_code=1 #endif idtentry general_protection do_general_protection has_error_code=1 -trace_idtentry page_fault do_page_fault has_error_code=1 +idtentry page_fault do_page_fault has_error_code=1 #ifdef CONFIG_KVM_GUEST idtentry async_page_fault do_async_page_fault has_error_code=1 diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 01fd0a7f48cd..b4f322d6c95f 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -39,7 +39,6 @@ asmlinkage void machine_check(void); asmlinkage void simd_coprocessor_error(void); #ifdef CONFIG_TRACING -asmlinkage void trace_page_fault(void); #define trace_stack_segment stack_segment #define trace_divide_error divide_error #define trace_bounds bounds @@ -54,6 +53,7 @@ asmlinkage void trace_page_fault(void); #define trace_alignment_check alignment_check #define trace_simd_coprocessor_error simd_coprocessor_error #define trace_async_page_fault async_page_fault +#define trace_page_fault page_fault #endif dotraplinkage void do_divide_error(struct pt_regs *, long); @@ -74,14 +74,6 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); -#ifdef CONFIG_TRACING -dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long); -#else -static inline void trace_do_page_fault(struct pt_regs *regs, unsigned long error) -{ - do_page_fault(regs, error); -} -#endif dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); dotraplinkage void do_coprocessor_error(struct pt_regs *, long); dotraplinkage void do_alignment_check(struct pt_regs *, long); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d04e30e3c0ff..6ed9242b5fa7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -263,7 +263,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) switch (kvm_read_and_reset_pf_reason()) { default: - trace_do_page_fault(regs, error_code); + do_page_fault(regs, error_code); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2a1fa10c6a98..58d7b3a4ec2e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1254,10 +1254,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. - * - * This function must have noinline because both callers - * {,trace_}do_page_fault() have notrace on. Having this an actual function - * guarantees there's a function trace entry. */ static noinline void __do_page_fault(struct pt_regs *regs, unsigned long error_code, @@ -1490,27 +1486,6 @@ good_area: } NOKPROBE_SYMBOL(__do_page_fault); -dotraplinkage void notrace -do_page_fault(struct pt_regs *regs, unsigned long error_code) -{ - unsigned long address = read_cr2(); /* Get the faulting address */ - enum ctx_state prev_state; - - /* - * We must have this function tagged with __kprobes, notrace and call - * read_cr2() before calling anything else. To avoid calling any kind - * of tracing machinery before we've observed the CR2 value. - * - * exception_{enter,exit}() contain all sorts of tracepoints. - */ - - prev_state = exception_enter(); - __do_page_fault(regs, error_code, address); - exception_exit(prev_state); -} -NOKPROBE_SYMBOL(do_page_fault); - -#ifdef CONFIG_TRACING static nokprobe_inline void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, unsigned long error_code) @@ -1521,22 +1496,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs, trace_page_fault_kernel(address, regs, error_code); } +/* + * We must have this function blacklisted from kprobes, tagged with notrace + * and call read_cr2() before calling anything else. To avoid calling any + * kind of tracing machinery before we've observed the CR2 value. + * + * exception_{enter,exit}() contains all sorts of tracepoints. + */ dotraplinkage void notrace -trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) +do_page_fault(struct pt_regs *regs, unsigned long error_code) { - /* - * The exception_enter and tracepoint processing could - * trigger another page faults (user space callchain - * reading) and destroy the original cr2 value, so read - * the faulting address now. - */ - unsigned long address = read_cr2(); + unsigned long address = read_cr2(); /* Get the faulting address */ enum ctx_state prev_state; prev_state = exception_enter(); - trace_page_fault_entries(address, regs, error_code); + if (trace_irqvectors_enabled()) + trace_page_fault_entries(address, regs, error_code); + __do_page_fault(regs, error_code, address); exception_exit(prev_state); } -NOKPROBE_SYMBOL(trace_do_page_fault); -#endif /* CONFIG_TRACING */ +NOKPROBE_SYMBOL(do_page_fault); -- cgit v1.2.3 From 302a98f896bbd2feb1393d98e8b9febeb101db6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:23 +0200 Subject: x86/apic: Remove the duplicated tracing version of local_timer_interrupt() The two NOP5s are noise in the rest of the work which is done by the timer interrupt and modern CPUs are pretty good in optimizing NOPs anyway. Get rid of the interrupt handler duplication and move the tracepoints into the regular handler. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170828064956.751247330@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/kernel/apic/apic.c | 19 ------------------- 2 files changed, 1 insertion(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index d6dbafbd4207..44137bb12136 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -48,7 +48,6 @@ extern asmlinkage void call_function_single_interrupt(void); #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ -extern void trace_apic_timer_interrupt(void); extern void trace_x86_platform_ipi(void); extern void trace_error_interrupt(void); extern void trace_irq_work_interrupt(void); @@ -59,6 +58,7 @@ extern void trace_threshold_interrupt(void); extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); +#define trace_apic_timer_interrupt apic_timer_interrupt #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt #define trace_reboot_interrupt reboot_interrupt #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8996ef1eb39f..7a57b5418f6d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1029,25 +1029,6 @@ __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - * - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - entering_ack_irq(); - local_apic_timer_interrupt(); - exiting_irq(); - - set_irq_regs(old_regs); -} - -__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. -- cgit v1.2.3 From 3bec6def39e32609e01a68b43476ee1f1c512eaa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:24 +0200 Subject: x86/apic: Use this_cpu_ptr() in local_timer_interrupt() Accessing the per cpu data via per_cpu(, smp_processor_id()) is pointless. Use this_cpu_ptr() instead. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064956.829552757@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7a57b5418f6d..a33fa4442d14 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -988,8 +988,7 @@ void setup_secondary_APIC_clock(void) */ static void local_apic_timer_interrupt(void) { - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + struct clock_event_device *evt = this_cpu_ptr(&lapic_events); /* * Normally we should not be here till LAPIC has been initialized but @@ -1003,7 +1002,8 @@ static void local_apic_timer_interrupt(void) * spurious. */ if (!evt->event_handler) { - pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); + pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", + smp_processor_id()); /* Switch it off */ lapic_timer_shutdown(evt); return; -- cgit v1.2.3 From 8a17116b1fddc1f414cd4dd5e86fa239fcdb5208 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:25 +0200 Subject: x86/irq: Get rid of duplicated trace_x86_platform_ipi() code Two NOP5s are really a good tradeoff vs. the unholy IDT switching mess, which duplicates code all over the place. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170828064956.907209383@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/kernel/irq.c | 25 +++++-------------------- 2 files changed, 6 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 44137bb12136..a7e45d1707b7 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -48,7 +48,6 @@ extern asmlinkage void call_function_single_interrupt(void); #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ -extern void trace_x86_platform_ipi(void); extern void trace_error_interrupt(void); extern void trace_irq_work_interrupt(void); extern void trace_spurious_interrupt(void); @@ -58,6 +57,7 @@ extern void trace_threshold_interrupt(void); extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); +#define trace_x86_platform_ipi x86_platform_ipi #define trace_apic_timer_interrupt apic_timer_interrupt #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt #define trace_reboot_interrupt reboot_interrupt diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 019d0ac8ed3a..befdd4a54ecc 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -262,20 +262,16 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) /* * Handler for X86_PLATFORM_IPI_VECTOR. */ -void __smp_x86_platform_ipi(void) -{ - inc_irq_stat(x86_platform_ipis); - - if (x86_platform_ipi_callback) - x86_platform_ipi_callback(); -} - __visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); entering_ack_irq(); - __smp_x86_platform_ipi(); + trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); + inc_irq_stat(x86_platform_ipis); + if (x86_platform_ipi_callback) + x86_platform_ipi_callback(); + trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); exiting_irq(); set_irq_regs(old_regs); } @@ -334,17 +330,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs) } #endif -__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - entering_ack_irq(); - trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); - __smp_x86_platform_ipi(); - trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); - exiting_irq(); - set_irq_regs(old_regs); -} #ifdef CONFIG_HOTPLUG_CPU -- cgit v1.2.3 From 61069de7a3252be0b1f567fe9e0b4723f1d2814f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:26 +0200 Subject: x86/apic: Remove the duplicated tracing versions of interrupts The error and the spurious interrupt are really rare events and not at all performance sensitive: two NOP5s can be tolerated when tracing is disabled. Remove the complication. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170828064956.986009402@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 4 ++-- arch/x86/kernel/apic/apic.c | 43 ++++++++++--------------------------------- 2 files changed, 12 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index a7e45d1707b7..b094b877b294 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -48,15 +48,15 @@ extern asmlinkage void call_function_single_interrupt(void); #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ -extern void trace_error_interrupt(void); extern void trace_irq_work_interrupt(void); -extern void trace_spurious_interrupt(void); extern void trace_thermal_interrupt(void); extern void trace_reschedule_interrupt(void); extern void trace_threshold_interrupt(void); extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); +#define trace_error_interrupt error_interrupt +#define trace_spurious_interrupt spurious_interrupt #define trace_x86_platform_ipi x86_platform_ipi #define trace_apic_timer_interrupt apic_timer_interrupt #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a33fa4442d14..eebee4cbc14b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1899,10 +1899,14 @@ void __init register_lapic_address(unsigned long address) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -static void __smp_spurious_interrupt(u8 vector) +__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) { + u8 vector = ~regs->orig_ax; u32 v; + entering_irq(); + trace_spurious_apic_entry(vector); + /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1917,22 +1921,7 @@ static void __smp_spurious_interrupt(u8 vector) /* see sw-dev-man vol 3, chapter 7.4.13.5 */ pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " "should never happen.\n", vector, smp_processor_id()); -} -__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) -{ - entering_irq(); - __smp_spurious_interrupt(~regs->orig_ax); - exiting_irq(); -} - -__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs) -{ - u8 vector = ~regs->orig_ax; - - entering_irq(); - trace_spurious_apic_entry(vector); - __smp_spurious_interrupt(vector); trace_spurious_apic_exit(vector); exiting_irq(); } @@ -1940,10 +1929,8 @@ __visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs) /* * This interrupt should never happen with our APIC/SMP architecture */ -static void __smp_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) { - u32 v; - u32 i = 0; static const char * const error_interrupt_reason[] = { "Send CS error", /* APIC Error Bit 0 */ "Receive CS error", /* APIC Error Bit 1 */ @@ -1954,6 +1941,10 @@ static void __smp_error_interrupt(struct pt_regs *regs) "Received illegal vector", /* APIC Error Bit 6 */ "Illegal register address", /* APIC Error Bit 7 */ }; + u32 v, i = 0; + + entering_irq(); + trace_error_apic_entry(ERROR_APIC_VECTOR); /* First tickle the hardware, only then report what went on. -- REW */ if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ @@ -1975,20 +1966,6 @@ static void __smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT "\n"); -} - -__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) -{ - entering_irq(); - __smp_error_interrupt(regs); - exiting_irq(); -} - -__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs) -{ - entering_irq(); - trace_error_apic_entry(ERROR_APIC_VECTOR); - __smp_error_interrupt(regs); trace_error_apic_exit(ERROR_APIC_VECTOR); exiting_irq(); } -- cgit v1.2.3 From daabb8eb9a55af90d0a55d93547a51cc6921389f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:27 +0200 Subject: x86/irqwork: Get rid of duplicated tracing interrupt code Two NOP5s are a reasonable tradeoff to avoid duplicated code and the requirement to switch the IDT. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.064746737@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/kernel/irq_work.c | 16 ++-------------- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b094b877b294..fd7e7e67ff79 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -48,13 +48,13 @@ extern asmlinkage void call_function_single_interrupt(void); #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ -extern void trace_irq_work_interrupt(void); extern void trace_thermal_interrupt(void); extern void trace_reschedule_interrupt(void); extern void trace_threshold_interrupt(void); extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); +#define trace_irq_work_interrupt irq_work_interrupt #define trace_error_interrupt error_interrupt #define trace_spurious_interrupt spurious_interrupt #define trace_x86_platform_ipi x86_platform_ipi diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 275487872be2..8054caee401f 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -11,24 +11,12 @@ #include #include -static inline void __smp_irq_work_interrupt(void) -{ - inc_irq_stat(apic_irq_work_irqs); - irq_work_run(); -} - __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) -{ - ipi_entering_ack_irq(); - __smp_irq_work_interrupt(); - exiting_irq(); -} - -__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); - __smp_irq_work_interrupt(); + inc_irq_stat(apic_irq_work_irqs); + irq_work_run(); trace_irq_work_exit(IRQ_WORK_VECTOR); exiting_irq(); } -- cgit v1.2.3 From 0f42ae283c9b3ebfa34cac6d86c335aa1ebe8ac1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:28 +0200 Subject: x86/mce: Remove duplicated tracing interrupt code Machine checks are not really high frequency events. The extra two NOP5s for the disabled tracepoints are noise vs. the heavy lifting which needs to be done in the MCE handler. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.144301907@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 6 +++--- arch/x86/kernel/cpu/mcheck/mce_amd.c | 16 ++-------------- arch/x86/kernel/cpu/mcheck/therm_throt.c | 20 +++----------------- arch/x86/kernel/cpu/mcheck/threshold.c | 16 ++-------------- 4 files changed, 10 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index fd7e7e67ff79..5f042ec1b4ee 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -48,12 +48,12 @@ extern asmlinkage void call_function_single_interrupt(void); #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ -extern void trace_thermal_interrupt(void); extern void trace_reschedule_interrupt(void); -extern void trace_threshold_interrupt(void); -extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); +#define trace_thermal_interrupt thermal_interrupt +#define trace_threshold_interrupt threshold_interrupt +#define trace_deferred_error_interrupt deferred_error_interrupt #define trace_irq_work_interrupt irq_work_interrupt #define trace_error_interrupt error_interrupt #define trace_spurious_interrupt spurious_interrupt diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9e314bcf67cc..172924d57d24 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -776,24 +776,12 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) mce_log(&m); } -static inline void __smp_deferred_error_interrupt(void) -{ - inc_irq_stat(irq_deferred_error_count); - deferred_error_int_vector(); -} - asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) -{ - entering_irq(); - __smp_deferred_error_interrupt(); - exiting_ack_irq(); -} - -asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void) { entering_irq(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); - __smp_deferred_error_interrupt(); + inc_irq_stat(irq_deferred_error_count); + deferred_error_int_vector(); trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); exiting_ack_irq(); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index f7370abd33c6..2da67b70ba98 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -390,26 +390,12 @@ static void unexpected_thermal_interrupt(void) static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; -static inline void __smp_thermal_interrupt(void) -{ - inc_irq_stat(irq_thermal_count); - smp_thermal_vector(); -} - -asmlinkage __visible void __irq_entry -smp_thermal_interrupt(struct pt_regs *regs) -{ - entering_irq(); - __smp_thermal_interrupt(); - exiting_ack_irq(); -} - -asmlinkage __visible void __irq_entry -smp_trace_thermal_interrupt(struct pt_regs *regs) +asmlinkage __visible void __irq_entry smp_thermal_interrupt(struct pt_regs *r) { entering_irq(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); - __smp_thermal_interrupt(); + inc_irq_stat(irq_thermal_count); + smp_thermal_vector(); trace_thermal_apic_exit(THERMAL_APIC_VECTOR); exiting_ack_irq(); } diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index bb0e75eed10a..5e7249e42f8f 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -17,24 +17,12 @@ static void default_threshold_interrupt(void) void (*mce_threshold_vector)(void) = default_threshold_interrupt; -static inline void __smp_threshold_interrupt(void) -{ - inc_irq_stat(irq_threshold_count); - mce_threshold_vector(); -} - asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) -{ - entering_irq(); - __smp_threshold_interrupt(); - exiting_ack_irq(); -} - -asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void) { entering_irq(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); - __smp_threshold_interrupt(); + inc_irq_stat(irq_threshold_count); + mce_threshold_vector(); trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); exiting_ack_irq(); } -- cgit v1.2.3 From 85b77cdd8fbd163d65f340e3f6578c50031af960 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:29 +0200 Subject: x86/smp: Remove pointless duplicated interrupt code Two NOP5s are really a good tradeoff vs. the unholy IDT switching mess, which duplicates code all over the place. The rescheduling interrupt gets optimized in a later step. Make the ordering of function call and statistics increment the same as in other places. Calculate stats first, then do the function call. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.222101344@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 4 ++-- arch/x86/kernel/smp.c | 43 +++++++------------------------------------ 2 files changed, 9 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 5f042ec1b4ee..8fb6c228438f 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -49,8 +49,8 @@ extern asmlinkage void call_function_single_interrupt(void); #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ extern void trace_reschedule_interrupt(void); -extern void trace_call_function_interrupt(void); -extern void trace_call_function_single_interrupt(void); +#define trace_call_function_interrupt call_function_interrupt +#define trace_call_function_single_interrupt call_function_single_interrupt #define trace_thermal_interrupt thermal_interrupt #define trace_threshold_interrupt threshold_interrupt #define trace_deferred_error_interrupt deferred_error_interrupt diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d798c0da451c..fb49e10cc30a 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -281,57 +281,28 @@ __visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs) */ ipi_entering_ack_irq(); trace_reschedule_entry(RESCHEDULE_VECTOR); - __smp_reschedule_interrupt(); + inc_irq_stat(irq_resched_count); + scheduler_ipi(); trace_reschedule_exit(RESCHEDULE_VECTOR); exiting_irq(); - /* - * KVM uses this interrupt to force a cpu out of guest mode - */ -} - -static inline void __smp_call_function_interrupt(void) -{ - generic_smp_call_function_interrupt(); - inc_irq_stat(irq_call_count); } __visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) -{ - ipi_entering_ack_irq(); - __smp_call_function_interrupt(); - exiting_irq(); -} - -__visible void __irq_entry -smp_trace_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); - __smp_call_function_interrupt(); - trace_call_function_exit(CALL_FUNCTION_VECTOR); - exiting_irq(); -} - -static inline void __smp_call_function_single_interrupt(void) -{ - generic_smp_call_function_single_interrupt(); inc_irq_stat(irq_call_count); -} - -__visible void __irq_entry -smp_call_function_single_interrupt(struct pt_regs *regs) -{ - ipi_entering_ack_irq(); - __smp_call_function_single_interrupt(); + generic_smp_call_function_interrupt(); + trace_call_function_exit(CALL_FUNCTION_VECTOR); exiting_irq(); } -__visible void __irq_entry -smp_trace_call_function_single_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_call_function_single_interrupt(struct pt_regs *r) { ipi_entering_ack_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); - __smp_call_function_single_interrupt(); + inc_irq_stat(irq_call_count); + generic_smp_call_function_single_interrupt(); trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); exiting_irq(); } -- cgit v1.2.3 From 3cd788c1eec4b3659671aa13d335a15102ac4d06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:30 +0200 Subject: x86/smp: Use static key for reschedule interrupt tracing It's worth to avoid the extra irq_enter()/irq_exit() pair in the case that the reschedule interrupt tracepoints are disabled. Use the static key which indicates that exception tracing is enabled. For now this key is global. It will be optimized in a later step. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170828064957.299808677@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/kernel/smp.c | 40 +++++++++++++++------------------------- 2 files changed, 16 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8fb6c228438f..4626f8790ce7 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -48,7 +48,7 @@ extern asmlinkage void call_function_single_interrupt(void); #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ -extern void trace_reschedule_interrupt(void); +#define trace_reschedule_interrupt reschedule_interrupt #define trace_call_function_interrupt call_function_interrupt #define trace_call_function_single_interrupt call_function_single_interrupt #define trace_thermal_interrupt thermal_interrupt diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index fb49e10cc30a..cfe865b85bc5 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -254,37 +254,27 @@ finish: } /* - * Reschedule call back. + * Reschedule call back. KVM uses this interrupt to force a cpu out of + * guest mode */ -static inline void __smp_reschedule_interrupt(void) -{ - inc_irq_stat(irq_resched_count); - scheduler_ipi(); -} - __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); - __smp_reschedule_interrupt(); - /* - * KVM uses this interrupt to force a cpu out of guest mode - */ -} - -__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs) -{ - /* - * Need to call irq_enter() before calling the trace point. - * __smp_reschedule_interrupt() calls irq_enter/exit() too (in - * scheduler_ipi(). This is OK, since those functions are allowed - * to nest. - */ - ipi_entering_ack_irq(); - trace_reschedule_entry(RESCHEDULE_VECTOR); inc_irq_stat(irq_resched_count); + + if (trace_irqvectors_enabled()) { + /* + * scheduler_ipi() might call irq_enter() as well, but + * nested calls are fine. + */ + irq_enter(); + trace_reschedule_entry(RESCHEDULE_VECTOR); + scheduler_ipi(); + trace_reschedule_exit(RESCHEDULE_VECTOR); + irq_exit(); + return; + } scheduler_ipi(); - trace_reschedule_exit(RESCHEDULE_VECTOR); - exiting_irq(); } __visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) -- cgit v1.2.3 From 4b9a8dca0e58b6fee229795f77e902a4dfd116fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:31 +0200 Subject: x86/idt: Remove the tracing IDT completely No more users of the tracing IDT. All exception tracepoints have been moved into the regular handlers. Get rid of the mess which shouldn't have been created in the first place. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.378851687@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 12 ++----- arch/x86/entry/entry_64.S | 13 ------- arch/x86/include/asm/desc.h | 82 ++++-------------------------------------- arch/x86/include/asm/hw_irq.h | 20 ----------- arch/x86/include/asm/segment.h | 3 -- arch/x86/kernel/tracepoint.c | 43 ---------------------- arch/x86/kernel/traps.c | 6 +--- 7 files changed, 9 insertions(+), 170 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0092da1c056f..8a13d468635a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -673,16 +673,8 @@ ENTRY(name) \ jmp ret_from_intr; \ ENDPROC(name) - -#ifdef CONFIG_TRACING -# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) -#else -# define TRACE_BUILD_INTERRUPT(name, nr) -#endif - #define BUILD_INTERRUPT(name, nr) \ BUILD_INTERRUPT3(name, nr, smp_##name); \ - TRACE_BUILD_INTERRUPT(name, nr) /* The include is where all of the SMP etc. interrupts come from */ #include @@ -880,14 +872,14 @@ ENTRY(xen_failsafe_callback) ENDPROC(xen_failsafe_callback) BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, - xen_evtchn_do_upcall) + xen_evtchn_do_upcall) #endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, - hyperv_vector_handler) + hyperv_vector_handler) #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2731b9471770..7a1d383c2192 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -748,18 +748,6 @@ ENTRY(\sym) END(\sym) .endm -#ifdef CONFIG_TRACING -#define trace(sym) trace_##sym -#define smp_trace(sym) smp_trace_##sym - -.macro trace_apicinterrupt num sym -apicinterrupt3 \num trace(\sym) smp_trace(\sym) -.endm -#else -.macro trace_apicinterrupt num sym do_sym -.endm -#endif - /* Make sure APIC interrupt handlers end up in the irqentry section: */ #if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) # define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax" @@ -772,7 +760,6 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym) .macro apicinterrupt num sym do_sym PUSH_SECTION_IRQENTRY apicinterrupt3 \num \sym \do_sym -trace_apicinterrupt \num \sym POP_SECTION_IRQENTRY .endm diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 71094f208673..d18a604a0941 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -421,35 +421,7 @@ static inline void set_nmi_gate(int gate, void *addr) } #endif -#ifdef CONFIG_TRACING -extern struct desc_ptr trace_idt_descr; -extern gate_desc trace_idt_table[]; -static inline void write_trace_idt_entry(int entry, const gate_desc *gate) -{ - write_idt_entry(trace_idt_table, entry, gate); -} - -static inline void _trace_set_gate(int gate, unsigned type, void *addr, - unsigned dpl, unsigned ist, unsigned seg) -{ - gate_desc s; - - pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); - /* - * does not need to be atomic because it is only done once at - * setup time - */ - write_trace_idt_entry(gate, &s); -} -#else -static inline void write_trace_idt_entry(int entry, const gate_desc *gate) -{ -} - -#define _trace_set_gate(gate, type, addr, dpl, ist, seg) -#endif - -static inline void _set_gate(int gate, unsigned type, void *addr, +static inline void _set_gate(int gate, unsigned type, const void *addr, unsigned dpl, unsigned ist, unsigned seg) { gate_desc s; @@ -460,28 +432,13 @@ static inline void _set_gate(int gate, unsigned type, void *addr, * setup time */ write_idt_entry(idt_table, gate, &s); - write_trace_idt_entry(gate, &s); } -/* - * This needs to use 'idt_table' rather than 'idt', and - * thus use the _nonmapped_ version of the IDT, as the - * Pentium F0 0F bugfix can have resulted in the mapped - * IDT being write-protected. - */ -#define set_intr_gate_notrace(n, addr) \ - do { \ - BUG_ON((unsigned)n > 0xFF); \ - _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ - __KERNEL_CS); \ - } while (0) - -#define set_intr_gate(n, addr) \ - do { \ - set_intr_gate_notrace(n, addr); \ - _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ - 0, 0, __KERNEL_CS); \ - } while (0) +static inline void set_intr_gate(unsigned int n, const void *addr) +{ + BUG_ON(n > 0xFF); + _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); +} extern unsigned long used_vectors[]; @@ -565,31 +522,6 @@ static inline void load_debug_idt(void) } #endif -#ifdef CONFIG_TRACING -extern atomic_t trace_idt_ctr; -static inline bool is_trace_idt_enabled(void) -{ - if (atomic_read(&trace_idt_ctr)) - return true; - - return false; -} - -static inline void load_trace_idt(void) -{ - load_idt((const struct desc_ptr *)&trace_idt_descr); -} -#else -static inline bool is_trace_idt_enabled(void) -{ - return false; -} - -static inline void load_trace_idt(void) -{ -} -#endif - /* * The load_current_idt() must be called with interrupts disabled * to avoid races. That way the IDT will always be set back to the expected @@ -601,8 +533,6 @@ static inline void load_current_idt(void) { if (is_debug_idt_enabled()) load_debug_idt(); - else if (is_trace_idt_enabled()) - load_trace_idt(); else load_idt((const struct desc_ptr *)&idt_descr); } diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 4626f8790ce7..6dfe366a8804 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -46,26 +46,6 @@ extern asmlinkage void deferred_error_interrupt(void); extern asmlinkage void call_function_interrupt(void); extern asmlinkage void call_function_single_interrupt(void); -#ifdef CONFIG_TRACING -/* Interrupt handlers registered during init_IRQ */ -#define trace_reschedule_interrupt reschedule_interrupt -#define trace_call_function_interrupt call_function_interrupt -#define trace_call_function_single_interrupt call_function_single_interrupt -#define trace_thermal_interrupt thermal_interrupt -#define trace_threshold_interrupt threshold_interrupt -#define trace_deferred_error_interrupt deferred_error_interrupt -#define trace_irq_work_interrupt irq_work_interrupt -#define trace_error_interrupt error_interrupt -#define trace_spurious_interrupt spurious_interrupt -#define trace_x86_platform_ipi x86_platform_ipi -#define trace_apic_timer_interrupt apic_timer_interrupt -#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt -#define trace_reboot_interrupt reboot_interrupt -#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi -#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi -#define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi -#endif /* CONFIG_TRACING */ - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; struct pci_dev; diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 1549caa098f0..5a602d6e874d 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -238,9 +238,6 @@ #ifndef __ASSEMBLY__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; -#ifdef CONFIG_TRACING -# define trace_early_idt_handler_array early_idt_handler_array -#endif /* * Load a segment. Fall back on loading the zero segment if something goes diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index dd4aa04bb95c..4cae92f15495 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -10,58 +10,15 @@ #include #include -atomic_t trace_idt_ctr = ATOMIC_INIT(0); -struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1, - (unsigned long) trace_idt_table }; - -/* No need to be aligned, but done to keep all IDTs defined the same way. */ -gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss; - DEFINE_STATIC_KEY_FALSE(trace_irqvectors_key); -static int trace_irq_vector_refcount; -static DEFINE_MUTEX(irq_vector_mutex); - -static void set_trace_idt_ctr(int val) -{ - atomic_set(&trace_idt_ctr, val); - /* Ensure the trace_idt_ctr is set before sending IPI */ - wmb(); -} - -static void switch_idt(void *arg) -{ - unsigned long flags; - - local_irq_save(flags); - load_current_idt(); - local_irq_restore(flags); -} int trace_irq_vector_regfunc(void) { static_branch_inc(&trace_irqvectors_key); - - mutex_lock(&irq_vector_mutex); - if (!trace_irq_vector_refcount) { - set_trace_idt_ctr(1); - smp_call_function(switch_idt, NULL, 0); - switch_idt(NULL); - } - trace_irq_vector_refcount++; - mutex_unlock(&irq_vector_mutex); return 0; } void trace_irq_vector_unregfunc(void) { static_branch_dec(&trace_irqvectors_key); - - mutex_lock(&irq_vector_mutex); - trace_irq_vector_refcount--; - if (!trace_irq_vector_refcount) { - set_trace_idt_ctr(0); - smp_call_function(switch_idt, NULL, 0); - switch_idt(NULL); - } - mutex_unlock(&irq_vector_mutex); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 309532451d94..36c583625328 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -939,12 +939,8 @@ void __init early_trap_init(void) * stack. Using the original stack works well enough at this * early stage. DEBUG_STACK will be equipped after cpu_init() in * trap_init(). - * - * We don't need to set trace_idt_table like set_intr_gate(), - * since we don't have trace_debug and it will be reset to - * 'debug' in trap_init() by set_intr_gate_ist(). */ - set_intr_gate_notrace(X86_TRAP_DB, debug); + set_intr_gate(X86_TRAP_DB, debug); /* int3 can be called from all */ set_system_intr_gate(X86_TRAP_BP, &int3); #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 6f54f3ec6c5d866c0517e9cf444bed762c2a05b7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:32 +0200 Subject: x86/idt: Clean up the i386 low level entry macros Some of the entry function defines for i386 were explictely using the BUILD_INTERRUPT3() macro to prevent that the extra trace entry got added via BUILD_INTERRUPT(). No that the trace cruft is gone, the file can be cleaned up and converted to use BUILD_INTERRUPT() which avoids the ugly line breaks. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.456815006@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/entry_arch.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 07b06955a05d..c91165098d2b 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -13,20 +13,16 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) -BUILD_INTERRUPT3(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR, - smp_irq_move_cleanup_interrupt) -BUILD_INTERRUPT3(reboot_interrupt, REBOOT_VECTOR, smp_reboot_interrupt) +BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR) +BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR) #endif BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_HAVE_KVM -BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR, - smp_kvm_posted_intr_ipi) -BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR, - smp_kvm_posted_intr_wakeup_ipi) -BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR, - smp_kvm_posted_intr_nested_ipi) +BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR) +BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) #endif /* -- cgit v1.2.3 From 809547472edae0bc68f2b5abc37b92c8a988bc8a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:33 +0200 Subject: x86/tracing: Disentangle pagefault and resched IPI tracing key The pagefault and the resched IPI handler are the only ones where it is worth to optimize the code further in case tracepoints are disabled. But it makes no sense to have a single static key for both. Seperate the static keys so the facilities are handled seperately. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.536699116@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/trace/common.h | 15 ++++++++------- arch/x86/include/asm/trace/exceptions.h | 6 ++++-- arch/x86/include/asm/trace/irq_vectors.h | 29 +++++++++++++++++++++++------ arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/tracepoint.c | 27 ++++++++++++++++++++++----- arch/x86/mm/fault.c | 2 +- 6 files changed, 59 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h index b1eb7b18ee8a..57c8da027d99 100644 --- a/arch/x86/include/asm/trace/common.h +++ b/arch/x86/include/asm/trace/common.h @@ -1,15 +1,16 @@ #ifndef _ASM_TRACE_COMMON_H #define _ASM_TRACE_COMMON_H -extern int trace_irq_vector_regfunc(void); -extern void trace_irq_vector_unregfunc(void); - #ifdef CONFIG_TRACING -DECLARE_STATIC_KEY_FALSE(trace_irqvectors_key); -#define trace_irqvectors_enabled() \ - static_branch_unlikely(&trace_irqvectors_key) +DECLARE_STATIC_KEY_FALSE(trace_pagefault_key); +#define trace_pagefault_enabled() \ + static_branch_unlikely(&trace_pagefault_key) +DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key); +#define trace_resched_ipi_enabled() \ + static_branch_unlikely(&trace_resched_ipi_key) #else -static inline bool trace_irqvectors_enabled(void) { return false; } +static inline bool trace_pagefault_enabled(void) { return false; } +static inline bool trace_resched_ipi_enabled(void) { return false; } #endif #endif diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index 960a5b50ac3b..5665bf205b8d 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -7,6 +7,9 @@ #include #include +extern int trace_pagefault_reg(void); +extern void trace_pagefault_unreg(void); + DECLARE_EVENT_CLASS(x86_exceptions, TP_PROTO(unsigned long address, struct pt_regs *regs, @@ -35,8 +38,7 @@ DEFINE_EVENT_FN(x86_exceptions, name, \ TP_PROTO(unsigned long address, struct pt_regs *regs, \ unsigned long error_code), \ TP_ARGS(address, regs, error_code), \ - trace_irq_vector_regfunc, \ - trace_irq_vector_unregfunc); + trace_pagefault_reg, trace_pagefault_unreg); DEFINE_PAGE_FAULT_EVENT(page_fault_user); DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 7825b4426e7e..a1bdc25b6507 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -7,6 +7,9 @@ #include #include +extern int trace_resched_ipi_reg(void); +extern void trace_resched_ipi_unreg(void); + DECLARE_EVENT_CLASS(x86_irq_vector, TP_PROTO(int vector), @@ -24,17 +27,24 @@ DECLARE_EVENT_CLASS(x86_irq_vector, TP_printk("vector=%d", __entry->vector) ); #define DEFINE_IRQ_VECTOR_EVENT(name) \ +DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \ + TP_PROTO(int vector), \ + TP_ARGS(vector), NULL, NULL); \ +DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ + TP_PROTO(int vector), \ + TP_ARGS(vector), NULL, NULL); + +#define DEFINE_RESCHED_IPI_EVENT(name) \ DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \ TP_PROTO(int vector), \ TP_ARGS(vector), \ - trace_irq_vector_regfunc, \ - trace_irq_vector_unregfunc); \ + trace_resched_ipi_reg, \ + trace_resched_ipi_unreg); \ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ TP_PROTO(int vector), \ TP_ARGS(vector), \ - trace_irq_vector_regfunc, \ - trace_irq_vector_unregfunc); - + trace_resched_ipi_reg, \ + trace_resched_ipi_unreg); /* * local_timer - called when entering/exiting a local timer interrupt @@ -42,10 +52,17 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ */ DEFINE_IRQ_VECTOR_EVENT(local_timer); +/* + * The ifdef is required because that tracepoint macro hell emits tracepoint + * code in files which include this header even if the tracepoint is not + * enabled. Brilliant stuff that. + */ +#ifdef CONFIG_SMP /* * reschedule - called when entering/exiting a reschedule vector handler */ -DEFINE_IRQ_VECTOR_EVENT(reschedule); +DEFINE_RESCHED_IPI_EVENT(reschedule); +#endif /* * spurious_apic - called when entering/exiting a spurious apic vector handler diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index cfe865b85bc5..5c574dff4c1a 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -262,7 +262,7 @@ __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) ack_APIC_irq(); inc_irq_stat(irq_resched_count); - if (trace_irqvectors_enabled()) { + if (trace_resched_ipi_enabled()) { /* * scheduler_ipi() might call irq_enter() as well, but * nested calls are fine. diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c index 4cae92f15495..c6636d1f60b9 100644 --- a/arch/x86/kernel/tracepoint.c +++ b/arch/x86/kernel/tracepoint.c @@ -10,15 +10,32 @@ #include #include -DEFINE_STATIC_KEY_FALSE(trace_irqvectors_key); +DEFINE_STATIC_KEY_FALSE(trace_pagefault_key); -int trace_irq_vector_regfunc(void) +int trace_pagefault_reg(void) { - static_branch_inc(&trace_irqvectors_key); + static_branch_inc(&trace_pagefault_key); return 0; } -void trace_irq_vector_unregfunc(void) +void trace_pagefault_unreg(void) { - static_branch_dec(&trace_irqvectors_key); + static_branch_dec(&trace_pagefault_key); } + +#ifdef CONFIG_SMP + +DEFINE_STATIC_KEY_FALSE(trace_resched_ipi_key); + +int trace_resched_ipi_reg(void) +{ + static_branch_inc(&trace_resched_ipi_key); + return 0; +} + +void trace_resched_ipi_unreg(void) +{ + static_branch_dec(&trace_resched_ipi_key); +} + +#endif diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 58d7b3a4ec2e..f9bb6608f6f1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1510,7 +1510,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) enum ctx_state prev_state; prev_state = exception_enter(); - if (trace_irqvectors_enabled()) + if (trace_pagefault_enabled()) trace_page_fault_entries(address, regs, error_code); __do_page_fault(regs, error_code, address); -- cgit v1.2.3 From 0428e01a2f13a6b7dae8289fb10030dbea336dee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:34 +0200 Subject: x86/ipi: Make platform IPI depend on APIC The platform IPI vector is only installed when the local APIC is enabled. All users of it depend on the local APIC anyway. Make the related code conditional on CONFIG_X86_LOCAL_APIC=y. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.615286163@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/entry_arch.h | 3 +-- arch/x86/kernel/irq.c | 11 ++++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c91165098d2b..aa15d1f7e530 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -17,8 +17,6 @@ BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR) BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR) #endif -BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) - #ifdef CONFIG_HAVE_KVM BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR) @@ -37,6 +35,7 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_IRQ_WORK BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index befdd4a54ecc..52089c043160 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -29,9 +29,6 @@ EXPORT_PER_CPU_SYMBOL(irq_regs); atomic_t irq_err_count; -/* Function pointer for generic interrupt vector handling */ -void (*x86_platform_ipi_callback)(void) = NULL; - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -87,13 +84,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); seq_puts(p, " APIC ICR read retries\n"); -#endif if (x86_platform_ipi_callback) { seq_printf(p, "%*s: ", prec, "PLT"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); seq_puts(p, " Platform interrupts\n"); } +#endif #ifdef CONFIG_SMP seq_printf(p, "%*s: ", prec, "RES"); for_each_online_cpu(j) @@ -183,9 +180,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->apic_perf_irqs; sum += irq_stats(cpu)->apic_irq_work_irqs; sum += irq_stats(cpu)->icr_read_retry_count; -#endif if (x86_platform_ipi_callback) sum += irq_stats(cpu)->x86_platform_ipis; +#endif #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; @@ -259,6 +256,9 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) return 1; } +#ifdef CONFIG_X86_LOCAL_APIC +/* Function pointer for generic interrupt vector handling */ +void (*x86_platform_ipi_callback)(void) = NULL; /* * Handler for X86_PLATFORM_IPI_VECTOR. */ @@ -275,6 +275,7 @@ __visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs) exiting_irq(); set_irq_regs(old_regs); } +#endif #ifdef CONFIG_HAVE_KVM static void dummy_handler(void) {} -- cgit v1.2.3 From a45525b5b47c10c0446eda21227792b39af233dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:35 +0200 Subject: x86/irq_work: Make it depend on APIC The irq work interrupt vector is only installed when CONFIG_X86_LOCAL_APIC is enabled, but the interrupt handler is compiled in unconditionally. Compile the cruft out when the APIC is disabled. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.691909010@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_work.h | 8 ++++++++ arch/x86/kernel/irq_work.c | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index f70604125286..ddbb8ea0f5a9 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -3,9 +3,17 @@ #include +#ifdef CONFIG_X86_LOCAL_APIC static inline bool arch_irq_work_has_interrupt(void) { return boot_cpu_has(X86_FEATURE_APIC); } +extern void arch_irq_work_raise(void); +#else +static inline bool arch_irq_work_has_interrupt(void) +{ + return false; +} +#endif #endif /* _ASM_IRQ_WORK_H */ diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 8054caee401f..70dee056f92b 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -11,6 +11,7 @@ #include #include +#ifdef CONFIG_X86_LOCAL_APIC __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); @@ -23,11 +24,10 @@ __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) void arch_irq_work_raise(void) { -#ifdef CONFIG_X86_LOCAL_APIC if (!arch_irq_work_has_interrupt()) return; apic->send_IPI_self(IRQ_WORK_VECTOR); apic_wait_icr_idle(); -#endif } +#endif -- cgit v1.2.3 From 73285527804402befe5d5140aeede21c16544b4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:36 +0200 Subject: x86/tracing: Build tracepoints only when they are used The tracepoint macro magic emits code for all tracepoints in a event header file. That code stays around even if the tracepoint is not used at all. The linker does not discard it. Build the various irq_vector tracepoints dependent on the appropriate CONFIG switches. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.770651777@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/trace/irq_vectors.h | 36 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index a1bdc25b6507..1599d394c8c1 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -7,6 +7,8 @@ #include #include +#ifdef CONFIG_X86_LOCAL_APIC + extern int trace_resched_ipi_reg(void); extern void trace_resched_ipi_unreg(void); @@ -52,18 +54,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ */ DEFINE_IRQ_VECTOR_EVENT(local_timer); -/* - * The ifdef is required because that tracepoint macro hell emits tracepoint - * code in files which include this header even if the tracepoint is not - * enabled. Brilliant stuff that. - */ -#ifdef CONFIG_SMP -/* - * reschedule - called when entering/exiting a reschedule vector handler - */ -DEFINE_RESCHED_IPI_EVENT(reschedule); -#endif - /* * spurious_apic - called when entering/exiting a spurious apic vector handler */ @@ -80,6 +70,7 @@ DEFINE_IRQ_VECTOR_EVENT(error_apic); */ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi); +#ifdef CONFIG_IRQ_WORK /* * irq_work - called when entering/exiting a irq work interrupt * vector handler @@ -96,6 +87,18 @@ DEFINE_IRQ_VECTOR_EVENT(irq_work); * 4) goto 1 */ TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); +#endif + +/* + * The ifdef is required because that tracepoint macro hell emits tracepoint + * code in files which include this header even if the tracepoint is not + * enabled. Brilliant stuff that. + */ +#ifdef CONFIG_SMP +/* + * reschedule - called when entering/exiting a reschedule vector handler + */ +DEFINE_RESCHED_IPI_EVENT(reschedule); /* * call_function - called when entering/exiting a call function interrupt @@ -108,24 +111,33 @@ DEFINE_IRQ_VECTOR_EVENT(call_function); * single interrupt vector handler */ DEFINE_IRQ_VECTOR_EVENT(call_function_single); +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD /* * threshold_apic - called when entering/exiting a threshold apic interrupt * vector handler */ DEFINE_IRQ_VECTOR_EVENT(threshold_apic); +#endif +#ifdef CONFIG_X86_MCE_AMD /* * deferred_error_apic - called when entering/exiting a deferred apic interrupt * vector handler */ DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); +#endif +#ifdef CONFIG_X86_THERMAL_VECTOR /* * thermal_apic - called when entering/exiting a thermal apic interrupt * vector handler */ DEFINE_IRQ_VECTOR_EVENT(thermal_apic); +#endif + +#endif /* CONFIG_X86_LOCAL_APIC */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . -- cgit v1.2.3 From 64b163fab684e3de47aa8db6cc08ae7d2e194373 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:37 +0200 Subject: x86/idt: Unify gate_struct handling for 32/64-bit kernels The first 32 bits of gate struct are the same for 32 and 64 bit kernels. The 32-bit version uses desc_struct and no designated data structure, so we need different accessors for 32 and 64 bit kernels. Aside of that the macros which are necessary to build the 32-bit gate descriptor are horrible to read. Unify the gate structs and switch all code fiddling with it over. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.861974317@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 8 +++--- arch/x86/include/asm/desc.h | 45 ++++++++++++++----------------- arch/x86/include/asm/desc_defs.h | 57 ++++++++++++++++++++++++++-------------- arch/x86/kvm/vmx.c | 2 +- arch/x86/xen/enlighten_pv.c | 12 ++++----- 5 files changed, 67 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c3e869eaef0c..65f0b24f60db 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1058,7 +1058,7 @@ struct boot_params *efi_main(struct efi_config *c, desc->s = DESC_TYPE_CODE_DATA; desc->dpl = 0; desc->p = 1; - desc->limit = 0xf; + desc->limit1 = 0xf; desc->avl = 0; desc->l = 0; desc->d = SEG_OP_SIZE_32BIT; @@ -1078,7 +1078,7 @@ struct boot_params *efi_main(struct efi_config *c, desc->s = DESC_TYPE_CODE_DATA; desc->dpl = 0; desc->p = 1; - desc->limit = 0xf; + desc->limit1 = 0xf; desc->avl = 0; if (IS_ENABLED(CONFIG_X86_64)) { desc->l = 1; @@ -1099,7 +1099,7 @@ struct boot_params *efi_main(struct efi_config *c, desc->s = DESC_TYPE_CODE_DATA; desc->dpl = 0; desc->p = 1; - desc->limit = 0xf; + desc->limit1 = 0xf; desc->avl = 0; desc->l = 0; desc->d = SEG_OP_SIZE_32BIT; @@ -1116,7 +1116,7 @@ struct boot_params *efi_main(struct efi_config *c, desc->s = 0; desc->dpl = 0; desc->p = 1; - desc->limit = 0x0; + desc->limit1 = 0x0; desc->avl = 0; desc->l = 0; desc->d = 0; diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index d18a604a0941..0731064c633b 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -84,33 +84,25 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu) return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu)); } -#ifdef CONFIG_X86_64 - static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, unsigned dpl, unsigned ist, unsigned seg) { - gate->offset_low = PTR_LOW(func); + gate->offset_low = (u16) func; + gate->bits.p = 1; + gate->bits.dpl = dpl; + gate->bits.zero = 0; + gate->bits.type = type; + gate->offset_middle = (u16) (func >> 16); +#ifdef CONFIG_X86_64 gate->segment = __KERNEL_CS; - gate->ist = ist; - gate->p = 1; - gate->dpl = dpl; - gate->zero0 = 0; - gate->zero1 = 0; - gate->type = type; - gate->offset_middle = PTR_MIDDLE(func); - gate->offset_high = PTR_HIGH(func); -} - + gate->bits.ist = ist; + gate->reserved = 0; + gate->offset_high = (u32) (func >> 32); #else -static inline void pack_gate(gate_desc *gate, unsigned char type, - unsigned long base, unsigned dpl, unsigned flags, - unsigned short seg) -{ - gate->a = (seg << 16) | (base & 0xffff); - gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8); -} - + gate->segment = seg; + gate->bits.ist = 0; #endif +} static inline int desc_empty(const void *ptr) { @@ -186,7 +178,8 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, } -static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) +static inline void set_tssldt_descriptor(void *d, unsigned long addr, + unsigned type, unsigned size) { #ifdef CONFIG_X86_64 struct ldttss_desc64 *desc = d; @@ -194,13 +187,13 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t memset(desc, 0, sizeof(*desc)); desc->limit0 = size & 0xFFFF; - desc->base0 = PTR_LOW(addr); - desc->base1 = PTR_MIDDLE(addr) & 0xFF; + desc->base0 = (u16) addr; + desc->base1 = (addr >> 16) & 0xFF; desc->type = type; desc->p = 1; desc->limit1 = (size >> 16) & 0xF; - desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; - desc->base3 = PTR_HIGH(addr); + desc->base2 = (addr >> 24) & 0xFF; + desc->base3 = (u32) (addr >> 32); #else pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); #endif diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 49265345d4d2..d684bee8a59a 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -47,20 +47,6 @@ enum { GATE_TASK = 0x5, }; -/* 16byte gate */ -struct gate_struct64 { - u16 offset_low; - u16 segment; - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; - u16 offset_middle; - u32 offset_high; - u32 zero1; -} __attribute__((packed)); - -#define PTR_LOW(x) ((unsigned long long)(x) & 0xFFFF) -#define PTR_MIDDLE(x) (((unsigned long long)(x) >> 16) & 0xFFFF) -#define PTR_HIGH(x) ((unsigned long long)(x) >> 32) - enum { DESC_TSS = 0x9, DESC_LDT = 0x2, @@ -77,20 +63,51 @@ struct ldttss_desc64 { u32 zero1; } __attribute__((packed)); + #ifdef CONFIG_X86_64 -typedef struct gate_struct64 gate_desc; typedef struct ldttss_desc64 ldt_desc; typedef struct ldttss_desc64 tss_desc; -#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32)) -#define gate_segment(g) ((g).segment) #else -typedef struct desc_struct gate_desc; typedef struct desc_struct ldt_desc; typedef struct desc_struct tss_desc; -#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff)) -#define gate_segment(g) ((g).a >> 16) #endif +struct idt_bits { + u16 ist : 3, + zero : 5, + type : 5, + dpl : 2, + p : 1; +} __attribute__((packed)); + +struct gate_struct { + u16 offset_low; + u16 segment; + struct idt_bits bits; + u16 offset_middle; +#ifdef CONFIG_X86_64 + u32 offset_high; + u32 reserved; +#endif +} __attribute__((packed)); + +typedef struct gate_struct gate_desc; + +static inline unsigned long gate_offset(const gate_desc *g) +{ +#ifdef CONFIG_X86_64 + return g->offset_low | ((unsigned long)g->offset_middle << 16) | + ((unsigned long) g->offset_high << 32); +#else + return g->offset_low | ((unsigned long)g->offset_middle << 16); +#endif +} + +static inline unsigned long gate_segment(const gate_desc *g) +{ + return g->segment; +} + struct desc_ptr { unsigned short size; unsigned long address; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c6ef2940119b..08d00eefabea 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8779,7 +8779,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) vector = exit_intr_info & INTR_INFO_VECTOR_MASK; desc = (gate_desc *)vmx->host_idt_base + vector; - entry = gate_offset(*desc); + entry = gate_offset(desc); asm volatile( #ifdef CONFIG_X86_64 "mov %%" _ASM_SP ", %[sp]\n\t" diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 98491521bb43..4c5d72b8aada 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -584,12 +584,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, { unsigned long addr; - if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) + if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT) return 0; info->vector = vector; - addr = gate_offset(*val); + addr = gate_offset(val); #ifdef CONFIG_X86_64 /* * Look for known traps using IST, and substitute them @@ -622,16 +622,16 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, ; else { /* Some other trap using IST? */ - if (WARN_ON(val->ist != 0)) + if (WARN_ON(val->bits.ist != 0)) return 0; } #endif /* CONFIG_X86_64 */ info->address = addr; - info->cs = gate_segment(*val); - info->flags = val->dpl; + info->cs = gate_segment(val); + info->flags = val->bits.dpl; /* interrupt gates clear IF */ - if (val->type == GATE_INTERRUPT) + if (val->bits.type == GATE_INTERRUPT) info->flags |= 1 << 2; return 1; -- cgit v1.2.3 From 1dd439fe97e1a32cbb980c180f1bcb54bb6a2a55 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:38 +0200 Subject: x86/percpu: Use static initializer for GDT entry The IDT cleanup is about to remove pack_descriptor(). The GDT setup for the per-cpu storage can be achieved with the static initializer as well. Replace it. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064957.954214927@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 10edd1e69a68..6e8fcb6f7e1e 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -155,13 +155,10 @@ static void __init pcpup_populate_pte(unsigned long addr) static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 - struct desc_struct gdt; + struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu), + 0xFFFFF); - pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - write_gdt_entry(get_cpu_gdt_rw(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); + write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S); #endif } -- cgit v1.2.3 From 718f5d0030da8669404dab873336b16c169b430b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:39 +0200 Subject: x86/fpu: Use bitfield accessors for desc_struct desc_struct is a union of u32 fields and bitfields. The access to the u32 fields is done with magic macros. Convert it to use the bitfields and replace the macro magic with parseable inline functions. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.042406718@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/math-emu/fpu_entry.c | 11 +++++----- arch/x86/math-emu/fpu_system.h | 48 +++++++++++++++++++++++++++++++---------- arch/x86/math-emu/get_address.c | 17 ++++++++------- 3 files changed, 51 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 0203baefb5c0..d4a7df2205b8 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -147,7 +147,7 @@ void math_emulate(struct math_emu_info *info) } code_descriptor = FPU_get_ldt_descriptor(FPU_CS); - if (SEG_D_SIZE(code_descriptor)) { + if (code_descriptor.d) { /* The above test may be wrong, the book is not clear */ /* Segmented 32 bit protected mode */ addr_modes.default_mode = SEG32; @@ -155,11 +155,10 @@ void math_emulate(struct math_emu_info *info) /* 16 bit protected mode */ addr_modes.default_mode = PM16; } - FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); - code_limit = code_base - + (SEG_LIMIT(code_descriptor) + - 1) * SEG_GRANULARITY(code_descriptor) - - 1; + FPU_EIP += code_base = seg_get_base(&code_descriptor); + code_limit = seg_get_limit(&code_descriptor) + 1; + code_limit *= seg_get_granularity(&code_descriptor); + code_limit += code_base - 1; if (code_limit < code_base) code_limit = 0xffffffff; } diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index a179254a5122..2319a257ec32 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -34,17 +34,43 @@ static inline struct desc_struct FPU_get_ldt_descriptor(unsigned seg) return ret; } -#define SEG_D_SIZE(x) ((x).b & (3 << 21)) -#define SEG_G_BIT(x) ((x).b & (1 << 23)) -#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) -#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) -#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ - | (((s).b & 0xff) << 16) | ((s).a >> 16)) -#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) -#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) -#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) -#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ - == (1 << 10)) +#define SEG_TYPE_WRITABLE (1U << 1) +#define SEG_TYPE_EXPANDS_DOWN (1U << 2) +#define SEG_TYPE_EXECUTE (1U << 3) +#define SEG_TYPE_EXPAND_MASK (SEG_TYPE_EXPANDS_DOWN | SEG_TYPE_EXECUTE) +#define SEG_TYPE_EXECUTE_MASK (SEG_TYPE_WRITABLE | SEG_TYPE_EXECUTE) + +static inline unsigned long seg_get_base(struct desc_struct *d) +{ + unsigned long base = (unsigned long)d->base2 << 24; + + return base | ((unsigned long)d->base1 << 16) | d->base0; +} + +static inline unsigned long seg_get_limit(struct desc_struct *d) +{ + return ((unsigned long)d->limit << 16) | d->limit0; +} + +static inline unsigned long seg_get_granularity(struct desc_struct *d) +{ + return d->g ? 4096 : 1; +} + +static inline bool seg_expands_down(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXPAND_MASK) == SEG_TYPE_EXPANDS_DOWN; +} + +static inline bool seg_execute_only(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_EXECUTE; +} + +static inline bool seg_writable(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE; +} #define I387 (¤t->thread.fpu.state) #define FPU_info (I387->soft.info) diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index b8ef9f9d2ffc..c48967c6a0e2 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -159,17 +159,18 @@ static long pm_address(u_char FPU_modrm, u_char segment, } descriptor = FPU_get_ldt_descriptor(addr->selector); - base_address = SEG_BASE_ADDR(descriptor); + base_address = seg_get_base(&descriptor); address = base_address + offset; - limit = base_address - + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1; + limit = seg_get_limit(&descriptor) + 1; + limit *= seg_get_granularity(&descriptor); + limit += base_address - 1; if (limit < base_address) limit = 0xffffffff; - if (SEG_EXPAND_DOWN(descriptor)) { - if (SEG_G_BIT(descriptor)) + if (seg_expands_down(&descriptor)) { + if (descriptor.g) { seg_top = 0xffffffff; - else { + } else { seg_top = base_address + (1 << 20); if (seg_top < base_address) seg_top = 0xffffffff; @@ -182,8 +183,8 @@ static long pm_address(u_char FPU_modrm, u_char segment, (address > limit) || (address < base_address) ? 0 : ((limit - address) >= 254 ? 255 : limit - address + 1); } - if (SEG_EXECUTE_ONLY(descriptor) || - (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT))) { + if (seg_execute_only(&descriptor) || + (!seg_writable(&descriptor) && (FPU_modrm & FPU_WRITE_BIT))) { access_limit = 0; } return address; -- cgit v1.2.3 From 9a98e7780022aa7cd201eb8a88a4f1d607b73cde Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:40 +0200 Subject: x86/asm: Replace access to desc_struct:a/b fields The union inside of desc_struct which allows access to the raw u32 parts of the descriptors. This raw access part is about to go away. Replace the few code parts which access those fields. Signed-off-by: Thomas Gleixner Reviewed-by: Boris Ostrovsky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.120214366@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/hypercall.h | 6 ++++-- arch/x86/kernel/tls.c | 2 +- arch/x86/xen/enlighten_pv.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 11071fcd630e..9606688caa4b 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -552,6 +552,8 @@ static inline void MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, struct desc_struct desc) { + u32 *p = (u32 *) &desc; + mcl->op = __HYPERVISOR_update_descriptor; if (sizeof(maddr) == sizeof(long)) { mcl->args[0] = maddr; @@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, } else { mcl->args[0] = maddr; mcl->args[1] = maddr >> 32; - mcl->args[2] = desc.a; - mcl->args[3] = desc.b; + mcl->args[2] = *p++; + mcl->args[3] = *p; } trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index dcd699baea1b..a106b9719c58 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -93,7 +93,7 @@ static void set_tls_desc(struct task_struct *p, int idx, while (n-- > 0) { if (LDT_empty(info) || LDT_zero(info)) { - desc->a = desc->b = 0; + memset(desc, 0, sizeof(*desc)); } else { fill_ldt(desc, info); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4c5d72b8aada..03fb07d28299 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -494,7 +494,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) static inline bool desc_equal(const struct desc_struct *d1, const struct desc_struct *d2) { - return d1->a == d2->a && d1->b == d2->b; + return !memcmp(d1, d2, sizeof(*d1)); } static void load_TLS_descriptor(struct thread_struct *t, -- cgit v1.2.3 From 38e9e81f4c81c75799b002d5811de7241b307676 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:41 +0200 Subject: x86/gdt: Use bitfields for initialization The GDT entry related code uses two ways to access entries via union fields: - bitfields - macros which initialize the two 16-bit parts of the entry by magic shift and mask operations. Clean it up and only use the bitfields to initialize and access entries. ( The old access patterns were partly done due to GCC optimizing bitfield accesses in a horrible way - that's mostly fixed these days and clarity of code in such low level accessors is very important. ) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.197673367@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vma.c | 2 +- arch/x86/include/asm/desc.h | 26 +++++++++++++++--------- arch/x86/include/asm/desc_defs.h | 44 ++++++++++++++++++---------------------- arch/x86/math-emu/fpu_system.h | 2 +- 4 files changed, 38 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 726355ce8497..1911310959f8 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -351,7 +351,7 @@ static void vgetcpu_cpu_init(void *arg) * and 8 bits for the node) */ d.limit0 = cpu | ((node & 0xf) << 12); - d.limit = node >> 4; + d.limit1 = node >> 4; d.type = 5; /* RO data, expand down, accessed */ d.dpl = 3; /* Visible to user code */ d.s = 1; /* Not a system segment */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 0731064c633b..2090cd223505 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -23,7 +23,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->s = 1; desc->dpl = 0x3; desc->p = info->seg_not_present ^ 1; - desc->limit = (info->limit & 0xf0000) >> 16; + desc->limit1 = (info->limit & 0xf0000) >> 16; desc->avl = info->useable; desc->d = info->seg_32bit; desc->g = info->limit_in_pages; @@ -170,14 +170,20 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, unsigned long limit, unsigned char type, unsigned char flags) { - desc->a = ((base & 0xffff) << 16) | (limit & 0xffff); - desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | - (limit & 0x000f0000) | ((type & 0xff) << 8) | - ((flags & 0xf) << 20); - desc->p = 1; + desc->limit0 = (u16) limit; + desc->base0 = (u16) base; + desc->base1 = (base >> 16) & 0xFF; + desc->type = type & 0x0F; + desc->s = 0; + desc->dpl = 0; + desc->p = 1; + desc->limit1 = (limit >> 16) & 0xF; + desc->avl = (flags >> 0) & 0x01; + desc->l = (flags >> 1) & 0x01; + desc->d = (flags >> 2) & 0x01; + desc->g = (flags >> 3) & 0x01; } - static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) { @@ -195,7 +201,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, desc->base2 = (addr >> 24) & 0xFF; desc->base3 = (u32) (addr >> 32); #else - pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); + pack_descriptor((struct desc_struct *)d, addr, size, type, 0); #endif } @@ -395,13 +401,13 @@ static inline void set_desc_base(struct desc_struct *desc, unsigned long base) static inline unsigned long get_desc_limit(const struct desc_struct *desc) { - return desc->limit0 | (desc->limit << 16); + return desc->limit0 | (desc->limit1 << 16); } static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) { desc->limit0 = limit & 0xffff; - desc->limit = (limit >> 16) & 0xf; + desc->limit1 = (limit >> 16) & 0xf; } #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index d684bee8a59a..1b9494e006a1 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -11,34 +11,30 @@ #include -/* - * FIXME: Accessing the desc_struct through its fields is more elegant, - * and should be the one valid thing to do. However, a lot of open code - * still touches the a and b accessors, and doing this allow us to do it - * incrementally. We keep the signature as a struct, rather than a union, - * so we can get rid of it transparently in the future -- glommer - */ /* 8 byte segment descriptor */ struct desc_struct { - union { - struct { - unsigned int a; - unsigned int b; - }; - struct { - u16 limit0; - u16 base0; - unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; - unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; - }; - }; + u16 limit0; + u16 base0; + u16 base1: 8, type: 4, s: 1, dpl: 2, p: 1; + u16 limit1: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; } __attribute__((packed)); -#define GDT_ENTRY_INIT(flags, base, limit) { { { \ - .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \ - .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \ - ((limit) & 0xf0000) | ((base) & 0xff000000), \ - } } } +#define GDT_ENTRY_INIT(flags, base, limit) \ + { \ + .limit0 = (u16) (limit), \ + .limit1 = ((limit) >> 16) & 0x0F, \ + .base0 = (u16) (base), \ + .base1 = ((base) >> 16) & 0xFF, \ + .base2 = ((base) >> 24) & 0xFF, \ + .type = (flags & 0x0f), \ + .s = (flags >> 4) & 0x01, \ + .dpl = (flags >> 5) & 0x03, \ + .p = (flags >> 7) & 0x01, \ + .avl = (flags >> 12) & 0x01, \ + .l = (flags >> 13) & 0x01, \ + .d = (flags >> 14) & 0x01, \ + .g = (flags >> 15) & 0x01, \ + } enum { GATE_INTERRUPT = 0xE, diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 2319a257ec32..699f329f1d40 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -49,7 +49,7 @@ static inline unsigned long seg_get_base(struct desc_struct *d) static inline unsigned long seg_get_limit(struct desc_struct *d) { - return ((unsigned long)d->limit << 16) | d->limit0; + return ((unsigned long)d->limit1 << 16) | d->limit0; } static inline unsigned long seg_get_granularity(struct desc_struct *d) -- cgit v1.2.3 From 87cc037674342cbf6213829b2cc59bb71be60777 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:42 +0200 Subject: x86/ldttss: Clean up 32-bit descriptors Like the IDT descriptors, the LDT/TSS descriptors are pointlessly different on 32 and 64 bit kernels. Unify them and get rid of the duplicated code. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.289634692@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 26 +++----------------------- arch/x86/include/asm/desc_defs.h | 27 ++++++++++++--------------- 2 files changed, 15 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 2090cd223505..108a9e894019 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -166,42 +166,22 @@ native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int memcpy(&gdt[entry], desc, size); } -static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, - unsigned long limit, unsigned char type, - unsigned char flags) -{ - desc->limit0 = (u16) limit; - desc->base0 = (u16) base; - desc->base1 = (base >> 16) & 0xFF; - desc->type = type & 0x0F; - desc->s = 0; - desc->dpl = 0; - desc->p = 1; - desc->limit1 = (limit >> 16) & 0xF; - desc->avl = (flags >> 0) & 0x01; - desc->l = (flags >> 1) & 0x01; - desc->d = (flags >> 2) & 0x01; - desc->g = (flags >> 3) & 0x01; -} - static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) { -#ifdef CONFIG_X86_64 - struct ldttss_desc64 *desc = d; + struct ldttss_desc *desc = d; memset(desc, 0, sizeof(*desc)); - desc->limit0 = size & 0xFFFF; + desc->limit0 = (u16) size; desc->base0 = (u16) addr; desc->base1 = (addr >> 16) & 0xFF; desc->type = type; desc->p = 1; desc->limit1 = (size >> 16) & 0xF; desc->base2 = (addr >> 24) & 0xFF; +#ifdef CONFIG_X86_64 desc->base3 = (u32) (addr >> 32); -#else - pack_descriptor((struct desc_struct *)d, addr, size, type, 0); #endif } diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 1b9494e006a1..346d252029b7 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -49,24 +49,21 @@ enum { DESCTYPE_S = 0x10, /* !system */ }; -/* LDT or TSS descriptor in the GDT. 16 bytes. */ -struct ldttss_desc64 { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 5, dpl : 2, p : 1; - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; - u32 base3; - u32 zero1; -} __attribute__((packed)); - +/* LDT or TSS descriptor in the GDT. */ +struct ldttss_desc { + u16 limit0; + u16 base0; + u16 base1 : 8, type : 5, dpl : 2, p : 1; + u16 limit1 : 4, zero0 : 3, g : 1, base2 : 8; #ifdef CONFIG_X86_64 -typedef struct ldttss_desc64 ldt_desc; -typedef struct ldttss_desc64 tss_desc; -#else -typedef struct desc_struct ldt_desc; -typedef struct desc_struct tss_desc; + u32 base3; + u32 zero1; #endif +} __attribute__((packed)); + +typedef struct ldttss_desc ldt_desc; +typedef struct ldttss_desc tss_desc; struct idt_bits { u16 ist : 3, -- cgit v1.2.3 From d8ed9d48266a27ab02a4bbcb81e755d63aec108a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:43 +0200 Subject: x86/idt: Create file for IDT related code IDT related code lives scattered around in various places. Create a new source file in arch/x86/kernel/idt.c to hold it. Move the idt_tables and descriptors to it for a start. Follow up patches will gradually move more code over. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.367081121@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 9 --------- arch/x86/kernel/idt.c | 26 ++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 6 ------ 4 files changed, 27 insertions(+), 16 deletions(-) create mode 100644 arch/x86/kernel/idt.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6ab5fbfa71b1..fd0a7895b63f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -42,7 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace obj-y := process_$(BITS).o signal.o obj-$(CONFIG_COMPAT) += signal_compat.o -obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o +obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o dumpstack.o nmi.o obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c8b39870f33e..71ab8a45cd66 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1289,15 +1289,6 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct desc_ptr idt_descr __ro_after_init = { - .size = NR_VECTORS * 16 - 1, - .address = (unsigned long) idt_table, -}; -const struct desc_ptr debug_idt_descr = { - .size = NR_VECTORS * 16 - 1, - .address = (unsigned long) debug_idt_table, -}; - DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c new file mode 100644 index 000000000000..3d19cad18aa2 --- /dev/null +++ b/arch/x86/kernel/idt.c @@ -0,0 +1,26 @@ +/* + * Interrupt descriptor table related code + * + * This file is licensed under the GPL V2 + */ +#include + +#include + +/* Must be page-aligned because the real IDT is used in a fixmap. */ +gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; + +#ifdef CONFIG_X86_64 +/* No need to be aligned, but done to keep all IDTs defined the same way. */ +gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; + +struct desc_ptr idt_descr __ro_after_init = { + .size = IDT_ENTRIES * 16 - 1, + .address = (unsigned long) idt_table, +}; + +const struct desc_ptr debug_idt_descr = { + .size = IDT_ENTRIES * 16 - 1, + .address = (unsigned long) debug_idt_table, +}; +#endif diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 36c583625328..41f4cd331cb2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -65,18 +65,12 @@ #include #include #include - -/* No need to be aligned, but done to keep all IDTs defined the same way. */ -gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss; #else #include #include #include #endif -/* Must be page-aligned because the real IDT is used in a fixmap. */ -gate_desc idt_table[NR_VECTORS] __page_aligned_bss; - DECLARE_BITMAP(used_vectors, NR_VECTORS); static inline void cond_local_irq_enable(struct pt_regs *regs) -- cgit v1.2.3 From 16bc18d895cee95f12bd722e5a3016676dfcf084 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:44 +0200 Subject: x86/idt: Move 32-bit idt_descr to C code 32-bit kernels have the idt_descr defined in the low level assembly entry code, but there is no good reason for that. Move it into the C file and use the 64-bit version of it. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.445862201@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 6 ------ arch/x86/kernel/idt.c | 10 +++++----- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 29da9599fec0..ce8c6ed417a4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -622,7 +622,6 @@ int_msg: .data .globl boot_gdt_descr -.globl idt_descr ALIGN # early boot GDT descriptor (must use 1:1 address mapping) @@ -631,11 +630,6 @@ boot_gdt_descr: .word __BOOT_DS+7 .long boot_gdt - __PAGE_OFFSET - .word 0 # 32-bit align idt_desc.address -idt_descr: - .word IDT_ENTRIES*8-1 # idt contains 256 entries - .long idt_table - # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 3d19cad18aa2..86e5912f2b70 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,15 +10,15 @@ /* Must be page-aligned because the real IDT is used in a fixmap. */ gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; -#ifdef CONFIG_X86_64 -/* No need to be aligned, but done to keep all IDTs defined the same way. */ -gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; - struct desc_ptr idt_descr __ro_after_init = { - .size = IDT_ENTRIES * 16 - 1, + .size = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1, .address = (unsigned long) idt_table, }; +#ifdef CONFIG_X86_64 +/* No need to be aligned, but done to keep all IDTs defined the same way. */ +gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; + const struct desc_ptr debug_idt_descr = { .size = IDT_ENTRIES * 16 - 1, .address = (unsigned long) debug_idt_table, -- cgit v1.2.3 From 8f55868f9e42fea56021b17421914b9e4fda4960 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:45 +0200 Subject: x86/idt: Remove unused set_trap_gate() This inline is not used at all. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.522053134@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 108a9e894019..51b3d480d0b6 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -446,18 +446,6 @@ static inline void set_system_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); } -static inline void set_system_trap_gate(unsigned int n, void *addr) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); -} - -static inline void set_trap_gate(unsigned int n, void *addr) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); -} - static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) { BUG_ON((unsigned)n > 0xFF); -- cgit v1.2.3 From e802a51ede91350438c051da2f238f5e8c918ead Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:46 +0200 Subject: x86/idt: Consolidate IDT invalidation kexec and reboot have both code to invalidate IDT. Create a common function and use it. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.600953282@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 3 +++ arch/x86/kernel/idt.c | 11 +++++++++++ arch/x86/kernel/machine_kexec_32.c | 14 +------------- arch/x86/kernel/reboot.c | 4 +--- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 51b3d480d0b6..33aff45f58ea 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -503,4 +503,7 @@ static inline void load_current_idt(void) else load_idt((const struct desc_ptr *)&idt_descr); } + +extern void idt_invalidate(void *addr); + #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 86e5912f2b70..cd4658c1e405 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -24,3 +24,14 @@ const struct desc_ptr debug_idt_descr = { .address = (unsigned long) debug_idt_table, }; #endif + +/** + * idt_invalidate - Invalidate interrupt descriptor table + * @addr: The virtual address of the 'invalid' IDT + */ +void idt_invalidate(void *addr) +{ + struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 }; + + load_idt(&idt); +} diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8c53c5d7a1bc..00bc751c861c 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -26,18 +26,6 @@ #include #include -static void set_idt(void *newidt, __u16 limit) -{ - struct desc_ptr curidt; - - /* ia32 supports unaliged loads & stores */ - curidt.size = limit; - curidt.address = (unsigned long)newidt; - - load_idt(&curidt); -} - - static void set_gdt(void *newgdt, __u16 limit) { struct desc_ptr curgdt; @@ -245,7 +233,7 @@ void machine_kexec(struct kimage *image) * If you want to load them you must set up your own idt & gdt. */ set_gdt(phys_to_virt(0), 0); - set_idt(phys_to_virt(0), 0); + idt_invalidate(phys_to_virt(0)); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a56bf6051f4e..54984b142641 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -38,8 +38,6 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static const struct desc_ptr no_idt = {}; - /* * This is set if we need to go through the 'emergency' path. * When machine_emergency_restart() is called, we may be on @@ -638,7 +636,7 @@ static void native_machine_emergency_restart(void) break; case BOOT_TRIPLE: - load_idt(&no_idt); + idt_invalidate(NULL); __asm__ __volatile__("int3"); /* We're probably dead after this, but... */ -- cgit v1.2.3 From 588787fde7aa346f345e1a7600f84d88039fc9df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:47 +0200 Subject: x86/idt: Move early IDT handler setup to IDT code The early IDT handler setup is done in C entry code on 64-bit kernels and in ASM entry code on 32-bit kernels. Move the 64-bit variant to the IDT code so it can be shared with 32-bit in the next step. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.679561404@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 9 +++++++++ arch/x86/kernel/head64.c | 6 +----- arch/x86/kernel/idt.c | 12 ++++++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 33aff45f58ea..5a3cdeb7cb03 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -504,6 +504,15 @@ static inline void load_current_idt(void) load_idt((const struct desc_ptr *)&idt_descr); } +extern void idt_setup_early_handler(void); +extern void idt_setup_early_traps(void); + +#ifdef CONFIG_X86_64 +extern void idt_setup_early_pf(void); +#else +static inline void idt_setup_early_pf(void) { } +#endif + extern void idt_invalidate(void *addr); #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9ba79543d9ee..d6ab034bd65f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -255,8 +255,6 @@ static void __init copy_bootdata(char *real_mode_data) asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) { - int i; - /* * Build-time sanity checks on the kernel image and module * area mappings. (these are purely build-time and produce no code) @@ -282,9 +280,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) kasan_early_init(); - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) - set_intr_gate(i, early_idt_handler_array[i]); - load_idt((const struct desc_ptr *)&idt_descr); + idt_setup_early_handler(); copy_bootdata(__va(real_mode_data)); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index cd4658c1e405..a1475810059c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -25,6 +25,18 @@ const struct desc_ptr debug_idt_descr = { }; #endif +/** + * idt_setup_early_handler - Initializes the idt table with early handlers + */ +void __init idt_setup_early_handler(void) +{ + int i; + + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) + set_intr_gate(i, early_idt_handler_array[i]); + load_idt(&idt_descr); +} + /** * idt_invalidate - Invalidate interrupt descriptor table * @addr: The virtual address of the 'invalid' IDT -- cgit v1.2.3 From 87e81786b13b267c4355e0d23e33c7e4c08fa63f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:48 +0200 Subject: x86/idt: Move early IDT setup out of 32-bit asm The early IDT setup can be done in C code like it's done on 64-bit kernels. Reuse the 64-bit version. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.757980775@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/segment.h | 1 + arch/x86/kernel/head32.c | 4 ++++ arch/x86/kernel/head_32.S | 36 ++---------------------------------- arch/x86/kernel/idt.c | 4 ++++ 4 files changed, 11 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 5a602d6e874d..066aaf813141 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -238,6 +238,7 @@ #ifndef __ASSEMBLY__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; +extern void early_ignore_irq(void); /* * Load a segment. Fall back on loading the zero segment if something goes diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 538ec012b371..cf2ce063f65a 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,9 @@ static void __init i386_default_early_setup(void) asmlinkage __visible void __init i386_start_kernel(void) { cr4_init_shadow(); + + idt_setup_early_handler(); + sanitize_boot_params(&boot_params); x86_early_init_platform_quirks(); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index ce8c6ed417a4..a615a5efa8ec 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -345,7 +345,6 @@ ENTRY(startup_32_smp) movl %eax,%cr0 lgdt early_gdt_descr - lidt idt_descr ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. @@ -378,37 +377,6 @@ ENDPROC(startup_32_smp) */ __INIT setup_once: - /* - * Set up a idt with 256 interrupt gates that push zero if there - * is no error code and then jump to early_idt_handler_common. - * It doesn't actually load the idt - that needs to be done on - * each CPU. Interrupts are enabled elsewhere, when we can be - * relatively sure everything is ok. - */ - - movl $idt_table,%edi - movl $early_idt_handler_array,%eax - movl $NUM_EXCEPTION_VECTORS,%ecx -1: - movl %eax,(%edi) - movl %eax,4(%edi) - /* interrupt gate, dpl=0, present */ - movl $(0x8E000000 + __KERNEL_CS),2(%edi) - addl $EARLY_IDT_HANDLER_SIZE,%eax - addl $8,%edi - loop 1b - - movl $256 - NUM_EXCEPTION_VECTORS,%ecx - movl $ignore_int,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax /* selector = 0x0010 = cs */ - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ -2: - movl %eax,(%edi) - movl %edx,4(%edi) - addl $8,%edi - loop 2b - #ifdef CONFIG_CC_STACKPROTECTOR /* * Configure the stack canary. The linker can't handle this by @@ -498,7 +466,7 @@ ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ ALIGN -ignore_int: +ENTRY(early_ignore_irq) cld #ifdef CONFIG_PRINTK pushl %eax @@ -533,7 +501,7 @@ ignore_int: hlt_loop: hlt jmp hlt_loop -ENDPROC(ignore_int) +ENDPROC(early_ignore_irq) __INITDATA .align 4 GLOBAL(early_recursion_flag) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index a1475810059c..70ca24853ef0 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -34,6 +34,10 @@ void __init idt_setup_early_handler(void) for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); +#ifdef CONFIG_X86_32 + for ( ; i < NR_VECTORS; i++) + set_intr_gate(i, early_ignore_irq); +#endif load_idt(&idt_descr); } -- cgit v1.2.3 From 3318e9744244a415ee9481ca7e54234caf5e12c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:49 +0200 Subject: x86/idt: Prepare for table based init The IDT setup code is handled in several places. All of them use variants of set_intr_gate() inlines. This can be done with a table based initialization, which allows to reduce the inline zoo and puts all IDT related code and information into a single place. Add the infrastructure. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.849877032@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/idt.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 70ca24853ef0..ae6fc12fd64f 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -5,8 +5,49 @@ */ #include +#include +#include #include +struct idt_data { + unsigned int vector; + unsigned int segment; + struct idt_bits bits; + const void *addr; +}; + +#define DPL0 0x0 +#define DPL3 0x3 + +#define DEFAULT_STACK 0 + +#define G(_vector, _addr, _ist, _type, _dpl, _segment) \ + { \ + .vector = _vector, \ + .bits.ist = _ist, \ + .bits.type = _type, \ + .bits.dpl = _dpl, \ + .bits.p = 1, \ + .addr = _addr, \ + .segment = _segment, \ + } + +/* Interrupt gate */ +#define INTG(_vector, _addr) \ + G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL0, __KERNEL_CS) + +/* System interrupt gate */ +#define SYSG(_vector, _addr) \ + G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS) + +/* Interrupt gate with interrupt stack */ +#define ISTG(_vector, _addr, _ist) \ + G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS) + +/* Task gate */ +#define TSKG(_vector, _gdt) \ + G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) + /* Must be page-aligned because the real IDT is used in a fixmap. */ gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; @@ -25,6 +66,32 @@ const struct desc_ptr debug_idt_descr = { }; #endif +static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) +{ + unsigned long addr = (unsigned long) d->addr; + + gate->offset_low = (u16) addr; + gate->segment = (u16) d->segment; + gate->bits = d->bits; + gate->offset_middle = (u16) (addr >> 16); +#ifdef CONFIG_X86_64 + gate->offset_high = (u32) (addr >> 32); + gate->reserved = 0; +#endif +} + +static __init void +idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size) +{ + gate_desc desc; + + for (; size > 0; t++, size--) { + idt_init_desc(&desc, t); + set_bit(t->vector, used_vectors); + write_idt_entry(idt, t->vector, &desc); + } +} + /** * idt_setup_early_handler - Initializes the idt table with early handlers */ -- cgit v1.2.3 From 433f8924fa8e55a50ce57f3b8a33ed095c405644 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:50 +0200 Subject: x86/idt: Switch early trap init to IDT tables Add the initialization table for the early trap setup and replace the early trap init code. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064958.929139008@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/idt.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup.c | 4 ++-- arch/x86/kernel/traps.c | 27 ------------------------- 3 files changed, 55 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index ae6fc12fd64f..64e221172a17 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -48,6 +48,28 @@ struct idt_data { #define TSKG(_vector, _gdt) \ G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) +/* + * Early traps running on the DEFAULT_STACK because the other interrupt + * stacks work only after cpu_init(). + */ +static const __initdata struct idt_data early_idts[] = { + INTG(X86_TRAP_DB, debug), + SYSG(X86_TRAP_BP, int3), +#ifdef CONFIG_X86_32 + INTG(X86_TRAP_PF, page_fault), +#endif +}; + +#ifdef CONFIG_X86_64 +/* + * Early traps running on the DEFAULT_STACK because the other interrupt + * stacks work only after cpu_init(). + */ +static const __initdata struct idt_data early_pf_idts[] = { + INTG(X86_TRAP_PF, page_fault), +}; +#endif + /* Must be page-aligned because the real IDT is used in a fixmap. */ gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; @@ -92,6 +114,37 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size) } } +/** + * idt_setup_early_traps - Initialize the idt table with early traps + * + * On X8664 these traps do not use interrupt stacks as they can't work + * before cpu_init() is invoked and sets up TSS. The IST variants are + * installed after that. + */ +void __init idt_setup_early_traps(void) +{ + idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts)); + load_idt(&idt_descr); +} + +#ifdef CONFIG_X86_64 +/** + * idt_setup_early_pf - Initialize the idt table with early pagefault handler + * + * On X8664 this does not use interrupt stacks as they can't work before + * cpu_init() is invoked and sets up TSS. The IST variant is installed + * after that. + * + * FIXME: Why is 32bit and 64bit installing the PF handler at different + * places in the early setup code? + */ +void __init idt_setup_early_pf(void) +{ + idt_setup_from_table(idt_table, early_pf_idts, + ARRAY_SIZE(early_pf_idts)); +} +#endif + /** * idt_setup_early_handler - Initializes the idt table with early handlers */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ecab32282f0f..30dc84ee35b2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -891,7 +891,7 @@ void __init setup_arch(char **cmdline_p) */ olpc_ofw_detect(); - early_trap_init(); + idt_setup_early_traps(); early_cpu_init(); early_ioremap_init(); @@ -1162,7 +1162,7 @@ void __init setup_arch(char **cmdline_p) init_mem_mapping(); - early_trap_pf_init(); + idt_setup_early_pf(); /* * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 41f4cd331cb2..835c7e8a0eb1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -923,33 +923,6 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) } #endif -/* Set of traps needed for early debugging. */ -void __init early_trap_init(void) -{ - /* - * Don't use IST to set DEBUG_STACK as it doesn't work until TSS - * is ready in cpu_init() <-- trap_init(). Before trap_init(), - * CPU runs at ring 0 so it is impossible to hit an invalid - * stack. Using the original stack works well enough at this - * early stage. DEBUG_STACK will be equipped after cpu_init() in - * trap_init(). - */ - set_intr_gate(X86_TRAP_DB, debug); - /* int3 can be called from all */ - set_system_intr_gate(X86_TRAP_BP, &int3); -#ifdef CONFIG_X86_32 - set_intr_gate(X86_TRAP_PF, page_fault); -#endif - load_idt(&idt_descr); -} - -void __init early_trap_pf_init(void) -{ -#ifdef CONFIG_X86_64 - set_intr_gate(X86_TRAP_PF, page_fault); -#endif -} - void __init trap_init(void) { int i; -- cgit v1.2.3 From 0a30908b9149b2b332ccf817261125a634765566 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:51 +0200 Subject: x86/idt: Move debug stack init to table based Add the debug_idt init table and make use of it. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.006502252@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 2 ++ arch/x86/kernel/idt.c | 23 +++++++++++++++++++++++ arch/x86/kernel/traps.c | 6 +----- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 5a3cdeb7cb03..930acd5d85f6 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -509,8 +509,10 @@ extern void idt_setup_early_traps(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); +extern void idt_setup_debugidt_traps(void); #else static inline void idt_setup_early_pf(void) { } +static inline void idt_setup_debugidt_traps(void) { } #endif extern void idt_invalidate(void *addr); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 64e221172a17..f5281b8f1131 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -68,6 +68,15 @@ static const __initdata struct idt_data early_idts[] = { static const __initdata struct idt_data early_pf_idts[] = { INTG(X86_TRAP_PF, page_fault), }; + +/* + * Override for the debug_idt. Same as the default, but with interrupt + * stack set to DEFAULT_STACK (0). Required for NMI trap handling. + */ +static const __initdata struct idt_data dbg_idts[] = { + INTG(X86_TRAP_DB, debug), + INTG(X86_TRAP_BP, int3), +}; #endif /* Must be page-aligned because the real IDT is used in a fixmap. */ @@ -82,6 +91,10 @@ struct desc_ptr idt_descr __ro_after_init = { /* No need to be aligned, but done to keep all IDTs defined the same way. */ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; +/* + * Override for the debug_idt. Same as the default, but with interrupt + * stack set to DEFAULT_STACK (0). Required for NMI trap handling. + */ const struct desc_ptr debug_idt_descr = { .size = IDT_ENTRIES * 16 - 1, .address = (unsigned long) debug_idt_table, @@ -143,6 +156,16 @@ void __init idt_setup_early_pf(void) idt_setup_from_table(idt_table, early_pf_idts, ARRAY_SIZE(early_pf_idts)); } + +/** + * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps + */ +void __init idt_setup_debugidt_traps(void) +{ + memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); + + idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts)); +} #endif /** diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 835c7e8a0eb1..1492bf5eb549 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -990,9 +990,5 @@ void __init trap_init(void) x86_init.irqs.trap_init(); -#ifdef CONFIG_X86_64 - memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); - set_nmi_gate(X86_TRAP_DB, &debug); - set_nmi_gate(X86_TRAP_BP, &int3); -#endif + idt_setup_debugidt_traps(); } -- cgit v1.2.3 From 90f6225fba0c732f3f5f9f5e265bdefa021ff12d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:52 +0200 Subject: x86/idt: Move IST stack based traps to table init Initialize the IST based traps via a table. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.091328949@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 2 ++ arch/x86/kernel/idt.c | 22 ++++++++++++++++++++++ arch/x86/kernel/traps.c | 9 +-------- 3 files changed, 25 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 930acd5d85f6..e62452777067 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -509,9 +509,11 @@ extern void idt_setup_early_traps(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); +extern void idt_setup_ist_traps(void); extern void idt_setup_debugidt_traps(void); #else static inline void idt_setup_early_pf(void) { } +static inline void idt_setup_ist_traps(void) { } static inline void idt_setup_debugidt_traps(void) { } #endif diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index f5281b8f1131..a6326fd6698a 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -91,6 +91,20 @@ struct desc_ptr idt_descr __ro_after_init = { /* No need to be aligned, but done to keep all IDTs defined the same way. */ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; +/* + * The exceptions which use Interrupt stacks. They are setup after + * cpu_init() when the TSS has been initialized. + */ +static const __initdata struct idt_data ist_idts[] = { + ISTG(X86_TRAP_DB, debug, DEBUG_STACK), + ISTG(X86_TRAP_NMI, nmi, NMI_STACK), + ISTG(X86_TRAP_BP, int3, DEBUG_STACK), + ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), +#ifdef CONFIG_X86_MCE + ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), +#endif +}; + /* * Override for the debug_idt. Same as the default, but with interrupt * stack set to DEFAULT_STACK (0). Required for NMI trap handling. @@ -157,6 +171,14 @@ void __init idt_setup_early_pf(void) ARRAY_SIZE(early_pf_idts)); } +/** + * idt_setup_ist_traps - Initialize the idt table with traps using IST + */ +void __init idt_setup_ist_traps(void) +{ + idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts)); +} + /** * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1492bf5eb549..293f5bddd761 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -979,14 +979,7 @@ void __init trap_init(void) */ cpu_init(); - /* - * X86_TRAP_DB and X86_TRAP_BP have been set - * in early_trap_init(). However, ITS works only after - * cpu_init() loads TSS. See comments in early_trap_init(). - */ - set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); - /* int3 can be called from all */ - set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); + idt_setup_ist_traps(); x86_init.irqs.trap_init(); -- cgit v1.2.3 From b70543a0b2b680f8953b6917a83b9203b20d7abd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:53 +0200 Subject: x86/idt: Move regular trap init to tables Initialize the regular traps with a table. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.182128165@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 1 + arch/x86/kernel/idt.c | 51 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 41 +----------------------------------- 3 files changed, 53 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e62452777067..0a5735ef06cb 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -506,6 +506,7 @@ static inline void load_current_idt(void) extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); +extern void idt_setup_traps(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index a6326fd6698a..14d9eb29763c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -60,6 +60,49 @@ static const __initdata struct idt_data early_idts[] = { #endif }; +/* + * The default IDT entries which are set up in trap_init() before + * cpu_init() is invoked. Interrupt stacks cannot be used at that point and + * the traps which use them are reinitialized with IST after cpu_init() has + * set up TSS. + */ +static const __initdata struct idt_data def_idts[] = { + INTG(X86_TRAP_DE, divide_error), + INTG(X86_TRAP_NMI, nmi), + INTG(X86_TRAP_BR, bounds), + INTG(X86_TRAP_UD, invalid_op), + INTG(X86_TRAP_NM, device_not_available), + INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun), + INTG(X86_TRAP_TS, invalid_TSS), + INTG(X86_TRAP_NP, segment_not_present), + INTG(X86_TRAP_SS, stack_segment), + INTG(X86_TRAP_GP, general_protection), + INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug), + INTG(X86_TRAP_MF, coprocessor_error), + INTG(X86_TRAP_AC, alignment_check), + INTG(X86_TRAP_XF, simd_coprocessor_error), + +#ifdef CONFIG_X86_32 + TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS), +#else + INTG(X86_TRAP_DF, double_fault), +#endif + INTG(X86_TRAP_DB, debug), + INTG(X86_TRAP_NMI, nmi), + INTG(X86_TRAP_BP, int3), + +#ifdef CONFIG_X86_MCE + INTG(X86_TRAP_MC, &machine_check), +#endif + + SYSG(X86_TRAP_OF, overflow), +#if defined(CONFIG_IA32_EMULATION) + SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), +#elif defined(CONFIG_X86_32) + SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), +#endif +}; + #ifdef CONFIG_X86_64 /* * Early traps running on the DEFAULT_STACK because the other interrupt @@ -154,6 +197,14 @@ void __init idt_setup_early_traps(void) load_idt(&idt_descr); } +/** + * idt_setup_traps - Initialize the idt table with default traps + */ +void __init idt_setup_traps(void) +{ + idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts)); +} + #ifdef CONFIG_X86_64 /** * idt_setup_early_pf - Initialize the idt table with early pagefault handler diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 293f5bddd761..34ea3651362e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -925,46 +925,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { - int i; - - set_intr_gate(X86_TRAP_DE, divide_error); - set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); - /* int4 can be called from all */ - set_system_intr_gate(X86_TRAP_OF, &overflow); - set_intr_gate(X86_TRAP_BR, bounds); - set_intr_gate(X86_TRAP_UD, invalid_op); - set_intr_gate(X86_TRAP_NM, device_not_available); -#ifdef CONFIG_X86_32 - set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); -#else - set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); -#endif - set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); - set_intr_gate(X86_TRAP_TS, invalid_TSS); - set_intr_gate(X86_TRAP_NP, segment_not_present); - set_intr_gate(X86_TRAP_SS, stack_segment); - set_intr_gate(X86_TRAP_GP, general_protection); - set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); - set_intr_gate(X86_TRAP_MF, coprocessor_error); - set_intr_gate(X86_TRAP_AC, alignment_check); -#ifdef CONFIG_X86_MCE - set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); -#endif - set_intr_gate(X86_TRAP_XF, simd_coprocessor_error); - - /* Reserve all the builtin and the syscall vector: */ - for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) - set_bit(i, used_vectors); - -#ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat); - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#endif - -#ifdef CONFIG_X86_32 - set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#endif + idt_setup_traps(); /* * Set the IDT descriptor to a fixed read-only location, so that the -- cgit v1.2.3 From 636a7598f65938e1dbacafec74c2e3acdc66bd4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:54 +0200 Subject: x86/idt: Move APIC gate initialization to tables Replace the APIC/SMP vector gate initialization with the table based mechanism. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.260177013@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 1 + arch/x86/kernel/idt.c | 48 +++++++++++++++++++++++++++++++ arch/x86/kernel/irqinit.c | 69 +-------------------------------------------- 3 files changed, 50 insertions(+), 68 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 0a5735ef06cb..cae0cb0f5a40 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -507,6 +507,7 @@ static inline void load_current_idt(void) extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); extern void idt_setup_traps(void); +extern void idt_setup_apic_and_irq_gates(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 14d9eb29763c..4327104f5af8 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -103,6 +103,46 @@ static const __initdata struct idt_data def_idts[] = { #endif }; +/* + * The APIC and SMP idt entries + */ +static const __initdata struct idt_data apic_idts[] = { +#ifdef CONFIG_SMP + INTG(RESCHEDULE_VECTOR, reschedule_interrupt), + INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), + INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt), + INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt), + INTG(REBOOT_VECTOR, reboot_interrupt), +#endif + +#ifdef CONFIG_X86_THERMAL_VECTOR + INTG(THERMAL_APIC_VECTOR, thermal_interrupt), +#endif + +#ifdef CONFIG_X86_MCE_THRESHOLD + INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt), +#endif + +#ifdef CONFIG_X86_MCE_AMD + INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt), +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + INTG(LOCAL_TIMER_VECTOR, apic_timer_interrupt), + INTG(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi), +# ifdef CONFIG_HAVE_KVM + INTG(POSTED_INTR_VECTOR, kvm_posted_intr_ipi), + INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), + INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi), +# endif +# ifdef CONFIG_IRQ_WORK + INTG(IRQ_WORK_VECTOR, irq_work_interrupt), +# endif + INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt), + INTG(ERROR_APIC_VECTOR, error_interrupt), +#endif +}; + #ifdef CONFIG_X86_64 /* * Early traps running on the DEFAULT_STACK because the other interrupt @@ -241,6 +281,14 @@ void __init idt_setup_debugidt_traps(void) } #endif +/** + * idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates + */ +void __init idt_setup_apic_and_irq_gates(void) +{ + idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts)); +} + /** * idt_setup_early_handler - Initializes the idt table with early handlers */ diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 4e5f8c022fdd..218cd06f22f6 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -87,73 +87,6 @@ void __init init_IRQ(void) x86_init.irqs.intr_init(); } -static void __init smp_intr_init(void) -{ -#ifdef CONFIG_SMP - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for generic single function call */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); - - /* IPI used for rebooting/stopping */ - alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); -#endif /* CONFIG_SMP */ -} - -static void __init apic_intr_init(void) -{ - smp_intr_init(); - -#ifdef CONFIG_X86_THERMAL_VECTOR - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -#ifdef CONFIG_X86_MCE_THRESHOLD - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); -#endif - -#ifdef CONFIG_X86_MCE_AMD - alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt); -#endif - -#ifdef CONFIG_X86_LOCAL_APIC - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI for X86 platform specific use */ - alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); -#ifdef CONFIG_HAVE_KVM - /* IPI for KVM to deliver posted interrupt */ - alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); - /* IPI for KVM to deliver interrupt to wake up tasks */ - alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); - /* IPI for KVM to deliver nested posted interrupt */ - alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi); -#endif - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* IRQ work interrupts: */ -# ifdef CONFIG_IRQ_WORK - alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt); -# endif - -#endif -} - void __init native_init_IRQ(void) { int i; @@ -161,7 +94,7 @@ void __init native_init_IRQ(void) /* Execute any quirks before the call gates are initialised: */ x86_init.irqs.pre_vector_init(); - apic_intr_init(); + idt_setup_apic_and_irq_gates(); /* * Cover the whole vector space, no vector can escape -- cgit v1.2.3 From dc20b2d526539344d7175a2a83221337302596b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:55 +0200 Subject: x86/idt: Move interrupt gate initialization to IDT code Move the gate intialization from interrupt init to the IDT code so all IDT related operations are at a single place. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.340209198@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/idt.c | 18 ++++++++++++++++++ arch/x86/kernel/irqinit.c | 18 ------------------ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 4327104f5af8..99f93a6fb92a 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -286,7 +286,25 @@ void __init idt_setup_debugidt_traps(void) */ void __init idt_setup_apic_and_irq_gates(void) { + int i = FIRST_EXTERNAL_VECTOR; + void *entry; + idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts)); + + for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { + entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + set_intr_gate(i, entry); + } + + for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { +#ifdef CONFIG_X86_LOCAL_APIC + set_bit(i, used_vectors); + set_intr_gate(i, spurious_interrupt); +#else + entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + set_intr_gate(i, entry); +#endif + } } /** diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 218cd06f22f6..1add9e08e83e 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -89,29 +89,11 @@ void __init init_IRQ(void) void __init native_init_IRQ(void) { - int i; - /* Execute any quirks before the call gates are initialised: */ x86_init.irqs.pre_vector_init(); idt_setup_apic_and_irq_gates(); - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - i = FIRST_EXTERNAL_VECTOR; - for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { - /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ - set_intr_gate(i, irq_entries_start + - 8 * (i - FIRST_EXTERNAL_VECTOR)); - } -#ifdef CONFIG_X86_LOCAL_APIC - for_each_clear_bit_from(i, used_vectors, NR_VECTORS) - set_intr_gate(i, spurious_interrupt); -#endif - if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2); -- cgit v1.2.3 From 485fa57bd73a0b79987d144e15bdc582f926701d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:56 +0200 Subject: x86/idt: Remove unused functions/inlines The IDT related inlines are not longer used. Remove them. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.422083717@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index cae0cb0f5a40..cbd36dd7993e 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -390,16 +390,6 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit1 = (limit >> 16) & 0xf; } -#ifdef CONFIG_X86_64 -static inline void set_nmi_gate(int gate, void *addr) -{ - gate_desc s; - - pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); - write_idt_entry(debug_idt_table, gate, &s); -} -#endif - static inline void _set_gate(int gate, unsigned type, const void *addr, unsigned dpl, unsigned ist, unsigned seg) { @@ -437,32 +427,6 @@ static inline void alloc_system_vector(int vector) set_intr_gate(n, addr); \ } while (0) -/* - * This routine sets up an interrupt gate at directory privilege level 3. - */ -static inline void set_system_intr_gate(unsigned int n, void *addr) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); -} - -static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); -} - -static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); -} - -static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); -} #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr); -- cgit v1.2.3 From db18da78f9a8bbab1bdc5968ba47ace788b5061f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:57 +0200 Subject: x86/idt: Deinline setup functions None of this is performance sensitive in any way - so debloat the kernel. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.502052875@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 37 ++----------------------------------- arch/x86/kernel/idt.c | 43 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index cbd36dd7993e..33f84f29a724 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -390,44 +390,11 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit1 = (limit >> 16) & 0xf; } -static inline void _set_gate(int gate, unsigned type, const void *addr, - unsigned dpl, unsigned ist, unsigned seg) -{ - gate_desc s; - - pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); - /* - * does not need to be atomic because it is only done once at - * setup time - */ - write_idt_entry(idt_table, gate, &s); -} - -static inline void set_intr_gate(unsigned int n, const void *addr) -{ - BUG_ON(n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); -} +void set_intr_gate(unsigned int n, const void *addr); +void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long used_vectors[]; -static inline void alloc_system_vector(int vector) -{ - BUG_ON(vector < FIRST_SYSTEM_VECTOR); - if (!test_bit(vector, used_vectors)) { - set_bit(vector, used_vectors); - } else { - BUG(); - } -} - -#define alloc_intr_gate(n, addr) \ - do { \ - alloc_system_vector(n); \ - set_intr_gate(n, addr); \ - } while (0) - - #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr); static inline bool is_debug_idt_enabled(void) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 99f93a6fb92a..8e9318d2b56d 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -212,15 +212,16 @@ static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) #endif } -static __init void -idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size) +static void +idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys) { gate_desc desc; for (; size > 0; t++, size--) { idt_init_desc(&desc, t); - set_bit(t->vector, used_vectors); write_idt_entry(idt, t->vector, &desc); + if (sys) + set_bit(t->vector, used_vectors); } } @@ -233,7 +234,8 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size) */ void __init idt_setup_early_traps(void) { - idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts)); + idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts), + true); load_idt(&idt_descr); } @@ -242,7 +244,7 @@ void __init idt_setup_early_traps(void) */ void __init idt_setup_traps(void) { - idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts)); + idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true); } #ifdef CONFIG_X86_64 @@ -259,7 +261,7 @@ void __init idt_setup_traps(void) void __init idt_setup_early_pf(void) { idt_setup_from_table(idt_table, early_pf_idts, - ARRAY_SIZE(early_pf_idts)); + ARRAY_SIZE(early_pf_idts), true); } /** @@ -267,7 +269,7 @@ void __init idt_setup_early_pf(void) */ void __init idt_setup_ist_traps(void) { - idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts)); + idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true); } /** @@ -277,7 +279,7 @@ void __init idt_setup_debugidt_traps(void) { memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); - idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts)); + idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), false); } #endif @@ -289,7 +291,7 @@ void __init idt_setup_apic_and_irq_gates(void) int i = FIRST_EXTERNAL_VECTOR; void *entry; - idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts)); + idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); @@ -333,3 +335,26 @@ void idt_invalidate(void *addr) load_idt(&idt); } + +void set_intr_gate(unsigned int n, const void *addr) +{ + struct idt_data data; + + BUG_ON(n > 0xFF); + + memset(&data, 0, sizeof(data)); + data.vector = n; + data.addr = addr; + data.segment = __KERNEL_CS; + data.bits.type = GATE_INTERRUPT; + data.bits.p = 1; + + idt_setup_from_table(idt_table, &data, 1, false); +} + +void alloc_intr_gate(unsigned int n, const void *addr) +{ + BUG_ON(test_bit(n, used_vectors) || n < FIRST_SYSTEM_VECTOR); + set_bit(n, used_vectors); + set_intr_gate(n, addr); +} -- cgit v1.2.3 From 4447ac1195a845b18f2f427686f116ab77c5b268 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:58 +0200 Subject: x86/idt: Simplify alloc_intr_gate() The only users of alloc_intr_gate() are hypervisors, which both check the used_vectors bitmap whether they have allocated the gate already. Move that check into alloc_intr_gate() and simplify the users. Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Reviewed-by: K. Y. Srinivasan Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.580830286@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mshyperv.c | 9 ++------- arch/x86/kernel/idt.c | 6 +++--- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 70e717fccdd6..9fc32651c911 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -59,13 +59,8 @@ void hyperv_vector_handler(struct pt_regs *regs) void hv_setup_vmbus_irq(void (*handler)(void)) { vmbus_handler = handler; - /* - * Setup the IDT for hypervisor callback. Prevent reallocation - * at module reload. - */ - if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, - hyperv_callback_vector); + /* Setup the IDT for hypervisor callback */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); } void hv_remove_vmbus_irq(void) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8e9318d2b56d..b609eac3d73c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -354,7 +354,7 @@ void set_intr_gate(unsigned int n, const void *addr) void alloc_intr_gate(unsigned int n, const void *addr) { - BUG_ON(test_bit(n, used_vectors) || n < FIRST_SYSTEM_VECTOR); - set_bit(n, used_vectors); - set_intr_gate(n, addr); + BUG_ON(n < FIRST_SYSTEM_VECTOR); + if (!test_and_set_bit(n, used_vectors)) + set_intr_gate(n, addr); } -- cgit v1.2.3 From facaa3e3c813848e6b49ee37a42a3688832e63cd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Aug 2017 08:47:59 +0200 Subject: x86/idt: Hide set_intr_gate() set_intr_gate() is an internal function of the IDT code. The only user left is the KVM code which replaces the pagefault handler eventually. Provide an explicit update_intr_gate() function and make set_intr_gate() static. While at it replace the magic number 14 in the KVM code with the proper trap define. Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20170828064959.663008004@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 2 +- arch/x86/kernel/idt.c | 33 ++++++++++++++++++++------------- arch/x86/kernel/kvm.c | 2 +- 3 files changed, 22 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 33f84f29a724..1a2ba368da39 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -390,7 +390,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit1 = (limit >> 16) & 0xf; } -void set_intr_gate(unsigned int n, const void *addr); +void update_intr_gate(unsigned int n, const void *addr); void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long used_vectors[]; diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index b609eac3d73c..61b490c69250 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -225,6 +225,22 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy } } +static void set_intr_gate(unsigned int n, const void *addr) +{ + struct idt_data data; + + BUG_ON(n > 0xFF); + + memset(&data, 0, sizeof(data)); + data.vector = n; + data.addr = addr; + data.segment = __KERNEL_CS; + data.bits.type = GATE_INTERRUPT; + data.bits.p = 1; + + idt_setup_from_table(idt_table, &data, 1, false); +} + /** * idt_setup_early_traps - Initialize the idt table with early traps * @@ -336,20 +352,11 @@ void idt_invalidate(void *addr) load_idt(&idt); } -void set_intr_gate(unsigned int n, const void *addr) +void __init update_intr_gate(unsigned int n, const void *addr) { - struct idt_data data; - - BUG_ON(n > 0xFF); - - memset(&data, 0, sizeof(data)); - data.vector = n; - data.addr = addr; - data.segment = __KERNEL_CS; - data.bits.type = GATE_INTERRUPT; - data.bits.p = 1; - - idt_setup_from_table(idt_table, &data, 1, false); + if (WARN_ON_ONCE(!test_bit(n, used_vectors))) + return; + set_intr_gate(n, addr); } void alloc_intr_gate(unsigned int n, const void *addr) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6ed9242b5fa7..874827b0d7ca 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -455,7 +455,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) static void __init kvm_apf_trap_init(void) { - set_intr_gate(14, async_page_fault); + update_intr_gate(X86_TRAP_PF, async_page_fault); } void __init kvm_guest_init(void) -- cgit v1.2.3 From 1d792a678c759b3b06af197c2c250cea13f9c57b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Aug 2017 10:22:06 +0200 Subject: x86/idt: Remove the tracing IDT leftovers Stephen reported a merge conflict with the XEN tree. That also shows that the IDT cleanup forgot to remove the now unused trace_{trap} defines. Remove them. Reported-by: Stephen Rothwell Signed-off-by: Thomas Gleixner Cc: Juergen Gross --- arch/x86/include/asm/traps.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index b4f322d6c95f..cd0365ed0f68 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -38,24 +38,6 @@ asmlinkage void machine_check(void); #endif /* CONFIG_X86_MCE */ asmlinkage void simd_coprocessor_error(void); -#ifdef CONFIG_TRACING -#define trace_stack_segment stack_segment -#define trace_divide_error divide_error -#define trace_bounds bounds -#define trace_invalid_op invalid_op -#define trace_device_not_available device_not_available -#define trace_coprocessor_segment_overrun coprocessor_segment_overrun -#define trace_invalid_TSS invalid_TSS -#define trace_segment_not_present segment_not_present -#define trace_general_protection general_protection -#define trace_spurious_interrupt_bug spurious_interrupt_bug -#define trace_coprocessor_error coprocessor_error -#define trace_alignment_check alignment_check -#define trace_simd_coprocessor_error simd_coprocessor_error -#define trace_async_page_fault async_page_fault -#define trace_page_fault page_fault -#endif - dotraplinkage void do_divide_error(struct pt_regs *, long); dotraplinkage void do_debug(struct pt_regs *, long); dotraplinkage void do_nmi(struct pt_regs *, long); -- cgit v1.2.3 From 594a30fb12424717a41c62323d2a8bf167dbccad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 30 Aug 2017 12:58:11 +0200 Subject: x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on CPUs without the feature When booting 4.13 on a VirtualBox VM on a Skylake host the following error shows up in the logs: [ 0.000000] [Firmware Bug]: TSC_DEADLINE disabled due to Errata; please update microcode to version: 0xb2 (or later) This is caused by apic_check_deadline_errata() only checking CPU model and not the X86_FEATURE_TSC_DEADLINE_TIMER flag (which VirtualBox does NOT export to the guest), combined with VirtualBox not exporting the micro-code version to the guest. This commit adds a check for X86_FEATURE_TSC_DEADLINE_TIMER to apic_check_deadline_errata(), silencing this error on VirtualBox VMs. Signed-off-by: Hans de Goede Acked-by: Thomas Gleixner Cc: Frank Mehnert Cc: Linus Torvalds Cc: Michael Thayer Cc: Michal Necasek Cc: Peter Zijlstra Fixes: bd9240a18e ("x86/apic: Add TSC_DEADLINE quirk due to errata") Link: http://lkml.kernel.org/r/20170830105811.27539-1-hdegoede@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index eebee4cbc14b..7834f73efbf1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -597,9 +597,13 @@ static const struct x86_cpu_id deadline_match[] = { static void apic_check_deadline_errata(void) { - const struct x86_cpu_id *m = x86_match_cpu(deadline_match); + const struct x86_cpu_id *m; u32 rev; + if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return; + + m = x86_match_cpu(deadline_match); if (!m) return; -- cgit v1.2.3 From 04b5de3a8f54fad8bb838827de85381bc6a5bc61 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 31 Aug 2017 14:16:53 +0200 Subject: x86/idt: Remove superfluous ALIGNment Commit 87e81786b13b ("x86/idt: Move early IDT setup out of 32-bit asm") switched early_ignore_irq to use ENTRY. ENTRY aligns the code, so there is no need for one more ALIGN right before the function. And add one \n after the function to separate it from the data. Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: Steven Rostedt Cc: Linus Torvalds Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Josh Poimboeuf Link: http://lkml.kernel.org/r/20170831121653.28917-1-jslaby@suse.cz --- arch/x86/kernel/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a615a5efa8ec..9ed3074d0d27 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -465,7 +465,6 @@ early_idt_handler_common: ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ - ALIGN ENTRY(early_ignore_irq) cld #ifdef CONFIG_PRINTK @@ -502,6 +501,7 @@ hlt_loop: hlt jmp hlt_loop ENDPROC(early_ignore_irq) + __INITDATA .align 4 GLOBAL(early_recursion_flag) -- cgit v1.2.3 From ef1d4deab953ecb1dfcf9f167043bda8b3f14a11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Aug 2017 20:08:16 +0200 Subject: x86/eisa: Add missing include The seperation of the EISA init missed to include linux/io.h which breaks the build with some special configurations. Reported-by: Ingo Molnar Fixes: f7eaf6e00fd5 ("x86/boot: Move EISA setup to a separate file") Signed-off-by: Thomas Gleixner --- arch/x86/kernel/eisa.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index 881f9236ebff..f260e452e4f8 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -5,6 +5,7 @@ */ #include #include +#include static __init int eisa_bus_probe(void) { -- cgit v1.2.3 From 5878d5d6fdef6447d73b0acc121ba445bef37f53 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 31 Aug 2017 19:42:49 +0200 Subject: x86/xen: Get rid of paravirt op adjust_exception_frame When running as Xen pv-guest the exception frame on the stack contains %r11 and %rcx additional to the other data pushed by the processor. Instead of having a paravirt op being called for each exception type prepend the Xen specific code to each exception entry. When running as Xen pv-guest just use the exception entry with prepended instructions, otherwise use the entry without the Xen specific code. [ tglx: Merged through tip to avoid ugly merge conflict ] Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Cc: xen-devel@lists.xenproject.org Cc: boris.ostrovsky@oracle.com Cc: luto@amacapital.net Link: http://lkml.kernel.org/r/20170831174249.26853-1-jg@pfupf.net --- arch/x86/entry/entry_64.S | 23 ++------ arch/x86/entry/entry_64_compat.S | 1 - arch/x86/include/asm/paravirt.h | 5 -- arch/x86/include/asm/paravirt_types.h | 3 -- arch/x86/include/asm/proto.h | 3 ++ arch/x86/include/asm/traps.h | 28 ++++++++-- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/paravirt.c | 3 -- arch/x86/xen/enlighten_pv.c | 98 +++++++++++++++++++++++------------ arch/x86/xen/irq.c | 3 -- arch/x86/xen/xen-asm_64.S | 41 +++++++++++++-- arch/x86/xen/xen-ops.h | 1 - 12 files changed, 133 insertions(+), 77 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 7a1d383c2192..bdd024a9afc9 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -821,7 +821,6 @@ ENTRY(\sym) .endif ASM_CLAC - PARAVIRT_ADJUST_EXCEPTION_FRAME .ifeq \has_error_code pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -967,7 +966,7 @@ ENTRY(do_softirq_own_stack) ENDPROC(do_softirq_own_stack) #ifdef CONFIG_XEN -idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 +idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 /* * A note on the "critical region" in our callback handler. @@ -1034,8 +1033,6 @@ ENTRY(xen_failsafe_callback) movq 8(%rsp), %r11 addq $0x30, %rsp pushq $0 /* RIP */ - pushq %r11 - pushq %rcx UNWIND_HINT_IRET_REGS offset=8 jmp general_protection 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ @@ -1066,9 +1063,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN -idtentry xen_debug do_debug has_error_code=0 -idtentry xen_int3 do_int3 has_error_code=0 -idtentry xen_stack_segment do_stack_segment has_error_code=1 +idtentry xendebug do_debug has_error_code=0 +idtentry xenint3 do_int3 has_error_code=0 #endif idtentry general_protection do_general_protection has_error_code=1 @@ -1232,20 +1228,9 @@ ENTRY(error_exit) END(error_exit) /* Runs on exception stack */ +/* XXX: broken on Xen PV */ ENTRY(nmi) UNWIND_HINT_IRET_REGS - /* - * Fix up the exception frame if we're on Xen. - * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most - * one value to the stack on native, so it may clobber the rdx - * scratch slot, but it won't clobber any of the important - * slots past it. - * - * Xen is a different story, because the Xen frame itself overlaps - * the "NMI executing" variable. - */ - PARAVIRT_ADJUST_EXCEPTION_FRAME - /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 5314d7b8e5ad..d8468ba24be0 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -293,7 +293,6 @@ ENTRY(entry_INT80_compat) /* * Interrupts are off on entry. */ - PARAVIRT_ADJUST_EXCEPTION_FRAME ASM_CLAC /* Do this early to minimize exposure */ SWAPGS diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 9ccac1926587..c25dd22f7c70 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -960,11 +960,6 @@ extern void default_banner(void); #define GET_CR2_INTO_RAX \ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) -#define PARAVIRT_ADJUST_EXCEPTION_FRAME \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ - CLBR_NONE, \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) - #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 9ffc36bfe4cd..6b64fc6367f2 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -196,9 +196,6 @@ struct pv_irq_ops { void (*safe_halt)(void); void (*halt)(void); -#ifdef CONFIG_X86_64 - void (*adjust_exception_frame)(void); -#endif } __no_randomize_layout; struct pv_mmu_ops { diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 8d3964fc5f91..b408b1886195 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); void entry_INT80_compat(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +void xen_entry_INT80_compat(void); +#endif #endif void x86_configure_nx(void); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index cd0365ed0f68..5545f6459bf5 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -13,9 +13,6 @@ asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); -asmlinkage void xen_debug(void); -asmlinkage void xen_int3(void); -asmlinkage void xen_stack_segment(void); asmlinkage void overflow(void); asmlinkage void bounds(void); asmlinkage void invalid_op(void); @@ -38,6 +35,31 @@ asmlinkage void machine_check(void); #endif /* CONFIG_X86_MCE */ asmlinkage void simd_coprocessor_error(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +asmlinkage void xen_divide_error(void); +asmlinkage void xen_xendebug(void); +asmlinkage void xen_xenint3(void); +asmlinkage void xen_nmi(void); +asmlinkage void xen_overflow(void); +asmlinkage void xen_bounds(void); +asmlinkage void xen_invalid_op(void); +asmlinkage void xen_device_not_available(void); +asmlinkage void xen_double_fault(void); +asmlinkage void xen_coprocessor_segment_overrun(void); +asmlinkage void xen_invalid_TSS(void); +asmlinkage void xen_segment_not_present(void); +asmlinkage void xen_stack_segment(void); +asmlinkage void xen_general_protection(void); +asmlinkage void xen_page_fault(void); +asmlinkage void xen_spurious_interrupt_bug(void); +asmlinkage void xen_coprocessor_error(void); +asmlinkage void xen_alignment_check(void); +#ifdef CONFIG_X86_MCE +asmlinkage void xen_machine_check(void); +#endif /* CONFIG_X86_MCE */ +asmlinkage void xen_simd_coprocessor_error(void); +#endif + dotraplinkage void do_divide_error(struct pt_regs *, long); dotraplinkage void do_debug(struct pt_regs *, long); dotraplinkage void do_nmi(struct pt_regs *, long); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 99332f550c48..cf42206926af 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -20,7 +20,6 @@ static char syscalls_ia32[] = { int main(void) { #ifdef CONFIG_PARAVIRT - OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); BLANK(); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index bc0a849589bb..a14df9eecfed 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = { .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .safe_halt = native_safe_halt, .halt = native_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = paravirt_nop, -#endif }; __visible struct pv_cpu_ops pv_cpu_ops = { diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 03fb07d28299..3859fc19164a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -579,6 +579,70 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, preempt_enable(); } +#ifdef CONFIG_X86_64 +struct trap_array_entry { + void (*orig)(void); + void (*xen)(void); + bool ist_okay; +}; + +static struct trap_array_entry trap_array[] = { + { debug, xen_xendebug, true }, + { int3, xen_xenint3, true }, + { double_fault, xen_double_fault, true }, +#ifdef CONFIG_X86_MCE + { machine_check, xen_machine_check, true }, +#endif + { nmi, xen_nmi, true }, + { overflow, xen_overflow, false }, +#ifdef CONFIG_IA32_EMULATION + { entry_INT80_compat, xen_entry_INT80_compat, false }, +#endif + { page_fault, xen_page_fault, false }, + { divide_error, xen_divide_error, false }, + { bounds, xen_bounds, false }, + { invalid_op, xen_invalid_op, false }, + { device_not_available, xen_device_not_available, false }, + { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, + { invalid_TSS, xen_invalid_TSS, false }, + { segment_not_present, xen_segment_not_present, false }, + { stack_segment, xen_stack_segment, false }, + { general_protection, xen_general_protection, false }, + { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, + { coprocessor_error, xen_coprocessor_error, false }, + { alignment_check, xen_alignment_check, false }, + { simd_coprocessor_error, xen_simd_coprocessor_error, false }, +}; + +static bool get_trap_addr(void **addr, unsigned int ist) +{ + unsigned int nr; + bool ist_okay = false; + + /* + * Replace trap handler addresses by Xen specific ones. + * Check for known traps using IST and whitelist them. + * The debugger ones are the only ones we care about. + * Xen will handle faults like double_fault, * so we should never see + * them. Warn if there's an unexpected IST-using fault handler. + */ + for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) { + struct trap_array_entry *entry = trap_array + nr; + + if (*addr == entry->orig) { + *addr = entry->xen; + ist_okay = entry->ist_okay; + break; + } + } + + if (WARN_ON(ist != 0 && !ist_okay)) + return false; + + return true; +} +#endif + static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { @@ -591,40 +655,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, addr = gate_offset(val); #ifdef CONFIG_X86_64 - /* - * Look for known traps using IST, and substitute them - * appropriately. The debugger ones are the only ones we care - * about. Xen will handle faults like double_fault, - * so we should never see them. Warn if - * there's an unexpected IST-using fault handler. - */ - if (addr == (unsigned long)debug) - addr = (unsigned long)xen_debug; - else if (addr == (unsigned long)int3) - addr = (unsigned long)xen_int3; - else if (addr == (unsigned long)stack_segment) - addr = (unsigned long)xen_stack_segment; - else if (addr == (unsigned long)double_fault) { - /* Don't need to handle these */ + if (!get_trap_addr((void **)&addr, val->bits.ist)) return 0; -#ifdef CONFIG_X86_MCE - } else if (addr == (unsigned long)machine_check) { - /* - * when xen hypervisor inject vMCE to guest, - * use native mce handler to handle it - */ - ; -#endif - } else if (addr == (unsigned long)nmi) - /* - * Use the native version as well. - */ - ; - else { - /* Some other trap using IST? */ - if (WARN_ON(val->bits.ist != 0)) - return 0; - } #endif /* CONFIG_X86_64 */ info->address = addr; diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 33e92955e09d..d4eff5676cfa 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { .safe_halt = xen_safe_halt, .halt = xen_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = xen_adjust_exception_frame, -#endif }; void __init xen_init_irq_ops(void) diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 3a3b6a211584..dae2cc33afb5 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -16,11 +16,42 @@ #include -ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp), %rcx - mov 8+8(%rsp), %r11 - ret $16 -ENDPROC(xen_adjust_exception_frame) +.macro xen_pv_trap name +ENTRY(xen_\name) + pop %rcx + pop %r11 + jmp \name +END(xen_\name) +.endm + +xen_pv_trap divide_error +xen_pv_trap debug +xen_pv_trap xendebug +xen_pv_trap int3 +xen_pv_trap xenint3 +xen_pv_trap nmi +xen_pv_trap overflow +xen_pv_trap bounds +xen_pv_trap invalid_op +xen_pv_trap device_not_available +xen_pv_trap double_fault +xen_pv_trap coprocessor_segment_overrun +xen_pv_trap invalid_TSS +xen_pv_trap segment_not_present +xen_pv_trap stack_segment +xen_pv_trap general_protection +xen_pv_trap page_fault +xen_pv_trap spurious_interrupt_bug +xen_pv_trap coprocessor_error +xen_pv_trap alignment_check +#ifdef CONFIG_X86_MCE +xen_pv_trap machine_check +#endif /* CONFIG_X86_MCE */ +xen_pv_trap simd_coprocessor_error +#ifdef CONFIG_IA32_EMULATION +xen_pv_trap entry_INT80_compat +#endif +xen_pv_trap hypervisor_callback hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 70301ac0d414..c8a6d224f7ed 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -138,7 +138,6 @@ __visible void xen_restore_fl_direct(unsigned long); __visible void xen_iret(void); __visible void xen_sysret32(void); __visible void xen_sysret64(void); -__visible void xen_adjust_exception_frame(void); extern int xen_panic_handler_init(void); -- cgit v1.2.3 From c6ef89421e236d75693ae968d80d44a52409889d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 1 Sep 2017 11:04:56 +0200 Subject: x86/idt: Fix the X86_TRAP_BP gate Andrei Vagin reported a CRIU regression and bisected it back to: 90f6225fba0c ("x86/idt: Move IST stack based traps to table init") This table init conversion loses the system-gate property of X86_TRAP_BP and erroneously moves it from DPL3 to DPL0. Fix it. Reported-by: Andrei Vagin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: dvlasenk@redhat.com Cc: linux-tip-commits@vger.kernel.org Cc: peterz@infradead.org Cc: brgerst@gmail.com Cc: rostedt@goodmis.org Cc: bp@alien8.de Cc: luto@kernel.org Cc: jpoimboe@redhat.com Cc: Cyrill Gorcunov Cc: torvalds@linux-foundation.org Cc: tip-bot for Jacob Shin Link: http://lkml.kernel.org/r/20170901082630.xvyi5bwk6etmppqc@gmail.com --- arch/x86/kernel/idt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 61b490c69250..6107ee1cb8d5 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -44,6 +44,10 @@ struct idt_data { #define ISTG(_vector, _addr, _ist) \ G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS) +/* System interrupt gate with interrupt stack */ +#define SISTG(_vector, _addr, _ist) \ + G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS) + /* Task gate */ #define TSKG(_vector, _gdt) \ G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) @@ -181,7 +185,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; static const __initdata struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), - ISTG(X86_TRAP_BP, int3, DEBUG_STACK), + SISTG(X86_TRAP_BP, int3, DEBUG_STACK), ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), #ifdef CONFIG_X86_MCE ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), -- cgit v1.2.3