From 1e019421bca68cfae1a61a09d9d49cf6a9e2143b Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 23 Sep 2013 16:25:00 -0500 Subject: [PATCH 1/9] x86/UV: Move NMI support This patch moves the UV NMI support from the x2apic file to a new separate uv_nmi.c file in preparation for the next sequence of patches. It prevents upcoming bloat of the x2apic file, and has the added benefit of putting the upcoming /sys/module parameters under the name 'uv_nmi' instead of 'x2apic_uv_x', which was obscure. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Reviewed-by: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jason Wessel Link: http://lkml.kernel.org/r/20130923212500.183295611@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv.h | 2 + arch/x86/kernel/apic/x2apic_uv_x.c | 69 ------------------- arch/x86/platform/uv/Makefile | 2 +- arch/x86/platform/uv/uv_nmi.c | 102 +++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 70 deletions(-) create mode 100644 arch/x86/platform/uv/uv_nmi.c diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 062921ef34e9..6b964a0b86d1 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -12,6 +12,7 @@ extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); extern void uv_cpu_init(void); extern void uv_nmi_init(void); +extern void uv_register_nmi_notifier(void); extern void uv_system_init(void); extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, @@ -25,6 +26,7 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline int is_uv_system(void) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } +static inline void uv_register_nmi_notifier(void) { } static inline const struct cpumask * uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int cpu) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1191ac1c9d25..9e47c06ae5ab 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -39,12 +39,6 @@ #include #include -/* BMC sets a bit this MMR non-zero before sending an NMI */ -#define UVH_NMI_MMR UVH_SCRATCH5 -#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) -#define UV_NMI_PENDING_MASK (1UL << 63) -DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); - DEFINE_PER_CPU(int, x2apic_extra_bits); #define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args) @@ -58,7 +52,6 @@ int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); unsigned int uv_apicid_hibits; EXPORT_SYMBOL_GPL(uv_apicid_hibits); -static DEFINE_SPINLOCK(uv_nmi_lock); static struct apic apic_x2apic_uv_x; @@ -847,68 +840,6 @@ void uv_cpu_init(void) set_x2apic_extra_bits(uv_hub_info->pnode); } -/* - * When NMI is received, print a stack trace. - */ -int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) -{ - unsigned long real_uv_nmi; - int bid; - - /* - * Each blade has an MMR that indicates when an NMI has been sent - * to cpus on the blade. If an NMI is detected, atomically - * clear the MMR and update a per-blade NMI count used to - * cause each cpu on the blade to notice a new NMI. - */ - bid = uv_numa_blade_id(); - real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); - - if (unlikely(real_uv_nmi)) { - spin_lock(&uv_blade_info[bid].nmi_lock); - real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); - if (real_uv_nmi) { - uv_blade_info[bid].nmi_count++; - uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); - } - spin_unlock(&uv_blade_info[bid].nmi_lock); - } - - if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) - return NMI_DONE; - - __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; - - /* - * Use a lock so only one cpu prints at a time. - * This prevents intermixed output. - */ - spin_lock(&uv_nmi_lock); - pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); - dump_stack(); - spin_unlock(&uv_nmi_lock); - - return NMI_HANDLED; -} - -void uv_register_nmi_notifier(void) -{ - if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) - printk(KERN_WARNING "UV NMI handler failed to register\n"); -} - -void uv_nmi_init(void) -{ - unsigned int value; - - /* - * Unmask NMI on all cpus - */ - value = apic_read(APIC_LVT1) | APIC_DM_NMI; - value &= ~APIC_LVT_MASKED; - apic_write(APIC_LVT1, value); -} - void __init uv_system_init(void) { union uvh_rh_gam_config_mmr_u m_n_config; diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile index 6c40995fefb8..52079bebd014 100644 --- a/arch/x86/platform/uv/Makefile +++ b/arch/x86/platform/uv/Makefile @@ -1 +1 @@ -obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o +obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c new file mode 100644 index 000000000000..37feb60618b1 --- /dev/null +++ b/arch/x86/platform/uv/uv_nmi.c @@ -0,0 +1,102 @@ +/* + * SGI NMI support routines + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Mike Travis + */ + +#include +#include + +#include +#include +#include +#include +#include + +/* BMC sets a bit this MMR non-zero before sending an NMI */ +#define UVH_NMI_MMR UVH_SCRATCH5 +#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) +#define UV_NMI_PENDING_MASK (1UL << 63) +DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); +static DEFINE_SPINLOCK(uv_nmi_lock); + +/* + * When NMI is received, print a stack trace. + */ +int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) +{ + unsigned long real_uv_nmi; + int bid; + + /* + * Each blade has an MMR that indicates when an NMI has been sent + * to cpus on the blade. If an NMI is detected, atomically + * clear the MMR and update a per-blade NMI count used to + * cause each cpu on the blade to notice a new NMI. + */ + bid = uv_numa_blade_id(); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + + if (unlikely(real_uv_nmi)) { + spin_lock(&uv_blade_info[bid].nmi_lock); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & + UV_NMI_PENDING_MASK); + if (real_uv_nmi) { + uv_blade_info[bid].nmi_count++; + uv_write_local_mmr(UVH_NMI_MMR_CLEAR, + UV_NMI_PENDING_MASK); + } + spin_unlock(&uv_blade_info[bid].nmi_lock); + } + + if (likely(__get_cpu_var(cpu_last_nmi_count) == + uv_blade_info[bid].nmi_count)) + return NMI_DONE; + + __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; + + /* + * Use a lock so only one cpu prints at a time. + * This prevents intermixed output. + */ + spin_lock(&uv_nmi_lock); + pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); + dump_stack(); + spin_unlock(&uv_nmi_lock); + + return NMI_HANDLED; +} + +void uv_register_nmi_notifier(void) +{ + if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) + pr_warn("UV NMI handler failed to register\n"); +} + +void uv_nmi_init(void) +{ + unsigned int value; + + /* + * Unmask NMI on all cpus + */ + value = apic_read(APIC_LVT1) | APIC_DM_NMI; + value &= ~APIC_LVT_MASKED; + apic_write(APIC_LVT1, value); +} + From 0d12ef0c900078cc1f4e78dff2245521aa5d0c89 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 23 Sep 2013 16:25:01 -0500 Subject: [PATCH 2/9] x86/UV: Update UV support for external NMI signals The current UV NMI handler has not been updated for the changes in the system NMI handler and the perf operations. The UV NMI handler reads an MMR in the UV Hub to check to see if the NMI event was caused by the external 'system NMI' that the operator can initiate on the System Mgmt Controller. The problem arises when the perf tools are running, causing millions of perf events per second on very large CPU count systems. Previously this was okay because the perf NMI handler ran at a higher priority on the NMI call chain and if the NMI was a perf event, it would stop calling other NMI handlers remaining on the NMI call chain. Now the system NMI handler calls all the handlers on the NMI call chain including the UV NMI handler. This causes the UV NMI handler to read the MMRs at the same millions per second rate. This can lead to significant performance loss and possible system failures. It also can cause thousands of 'Dazed and Confused' messages being sent to the system console. This effectively makes perf tools unusable on UV systems. To avoid this excessive overhead when perf tools are running, this code has been optimized to minimize reading of the MMRs as much as possible, by moving to the NMI_UNKNOWN notifier chain. This chain is called only when all the users on the standard NMI_LOCAL call chain have been called and none of them have claimed this NMI. There is an exception where the NMI_LOCAL notifier chain is used. When the perf tools are in use, it's possible that the UV NMI was captured by some other NMI handler and then either ignored or mistakenly processed as a perf event. We set a per_cpu ('ping') flag for those CPUs that ignored the initial NMI, and then send them an IPI NMI signal. The NMI_LOCAL handler on each cpu does not need to read the MMR, but instead checks the in memory flag indicating it was pinged. There are two module variables, 'ping_count' indicating how many requested NMI events occurred, and 'ping_misses' indicating how many stray NMI events. These most likely are perf events so it shows the overhead of the perf NMI interrupts and how many MMR reads were avoided. This patch also minimizes the reads of the MMRs by having the first cpu entering the NMI handler on each node set a per HUB in-memory atomic value. (Having a per HUB value avoids sending lock traffic over NumaLink.) Both types of UV NMIs from the SMI layer are supported. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Reviewed-by: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jason Wessel Link: http://lkml.kernel.org/r/20130923212500.353547733@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 57 ++- arch/x86/include/asm/uv/uv_mmrs.h | 31 ++ arch/x86/kernel/apic/x2apic_uv_x.c | 1 + arch/x86/platform/uv/uv_nmi.c | 553 ++++++++++++++++++++++++++--- 4 files changed, 600 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 2c32df95bb78..a30836c8ac4d 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -502,8 +502,8 @@ struct uv_blade_info { unsigned short nr_online_cpus; unsigned short pnode; short memory_nid; - spinlock_t nmi_lock; - unsigned long nmi_count; + spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ + unsigned long nmi_count; /* obsolete, see uv_hub_nmi */ }; extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; @@ -576,6 +576,59 @@ static inline int uv_num_possible_blades(void) return uv_possible_blades; } +/* Per Hub NMI support */ +extern void uv_nmi_setup(void); + +/* BMC sets a bit this MMR non-zero before sending an NMI */ +#define UVH_NMI_MMR UVH_SCRATCH5 +#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS +#define UVH_NMI_MMR_SHIFT 63 +#define UVH_NMI_MMR_TYPE "SCRATCH5" + +/* Newer SMM NMI handler, not present in all systems */ +#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0 +#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS +#define UVH_NMI_MMRX_SHIFT (is_uv1_hub() ? \ + UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :\ + UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT) +#define UVH_NMI_MMRX_TYPE "EXTIO_INT0" + +/* Non-zero indicates newer SMM NMI handler present */ +#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST + +/* Indicates to BIOS that we want to use the newer SMM NMI handler */ +#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2 +#define UVH_NMI_MMRX_REQ_SHIFT 62 + +struct uv_hub_nmi_s { + raw_spinlock_t nmi_lock; + atomic_t in_nmi; /* flag this node in UV NMI IRQ */ + atomic_t cpu_owner; /* last locker of this struct */ + atomic_t read_mmr_count; /* count of MMR reads */ + atomic_t nmi_count; /* count of true UV NMIs */ + unsigned long nmi_value; /* last value read from NMI MMR */ +}; + +struct uv_cpu_nmi_s { + struct uv_hub_nmi_s *hub; + atomic_t state; + atomic_t pinging; + int queries; + int pings; +}; + +DECLARE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); +#define uv_cpu_nmi (__get_cpu_var(__uv_cpu_nmi)) +#define uv_hub_nmi (uv_cpu_nmi.hub) +#define uv_cpu_nmi_per(cpu) (per_cpu(__uv_cpu_nmi, cpu)) +#define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub) + +/* uv_cpu_nmi_states */ +#define UV_NMI_STATE_OUT 0 +#define UV_NMI_STATE_IN 1 +#define UV_NMI_STATE_DUMP 2 +#define UV_NMI_STATE_DUMP_DONE 3 + /* Update SCIR state */ static inline void uv_set_scir_bits(unsigned char value) { diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index bd5f80e58a23..e42249bcf7e1 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -460,6 +460,23 @@ union uvh_event_occurred0_u { #define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 +/* ========================================================================= */ +/* UVH_EXTIO_INT0_BROADCAST */ +/* ========================================================================= */ +#define UVH_EXTIO_INT0_BROADCAST 0x61448UL +#define UVH_EXTIO_INT0_BROADCAST_32 0x3f0 + +#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0 +#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL + +union uvh_extio_int0_broadcast_u { + unsigned long v; + struct uvh_extio_int0_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s; +}; + /* ========================================================================= */ /* UVH_GR0_TLB_INT0_CONFIG */ /* ========================================================================= */ @@ -2605,6 +2622,20 @@ union uvh_scratch5_u { } s; }; +/* ========================================================================= */ +/* UVH_SCRATCH5_ALIAS */ +/* ========================================================================= */ +#define UVH_SCRATCH5_ALIAS 0x2d0208UL +#define UVH_SCRATCH5_ALIAS_32 0x780 + + +/* ========================================================================= */ +/* UVH_SCRATCH5_ALIAS_2 */ +/* ========================================================================= */ +#define UVH_SCRATCH5_ALIAS_2 0x2d0210UL +#define UVH_SCRATCH5_ALIAS_2_32 0x788 + + /* ========================================================================= */ /* UVXH_EVENT_OCCURRED2 */ /* ========================================================================= */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 9e47c06ae5ab..0a5a4b8ae36c 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -977,6 +977,7 @@ void __init uv_system_init(void) map_mmr_high(max_pnode); map_mmioh_high(min_pnode, max_pnode); + uv_nmi_setup(); uv_cpu_init(); uv_scir_register_cpu_notifier(); uv_register_nmi_notifier(); diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 37feb60618b1..fb02ea7d2b2d 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -20,72 +20,518 @@ */ #include +#include +#include #include +#include +#include #include +#include +#include +#include #include #include #include #include -/* BMC sets a bit this MMR non-zero before sending an NMI */ -#define UVH_NMI_MMR UVH_SCRATCH5 -#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) -#define UV_NMI_PENDING_MASK (1UL << 63) -DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); -static DEFINE_SPINLOCK(uv_nmi_lock); +/* + * UV handler for NMI + * + * Handle system-wide NMI events generated by the global 'power nmi' command. + * + * Basic operation is to field the NMI interrupt on each cpu and wait + * until all cpus have arrived into the nmi handler. If some cpus do not + * make it into the handler, try and force them in with the IPI(NMI) signal. + * + * We also have to lessen UV Hub MMR accesses as much as possible as this + * disrupts the UV Hub's primary mission of directing NumaLink traffic and + * can cause system problems to occur. + * + * To do this we register our primary NMI notifier on the NMI_UNKNOWN + * chain. This reduces the number of false NMI calls when the perf + * tools are running which generate an enormous number of NMIs per + * second (~4M/s for 1024 cpu threads). Our secondary NMI handler is + * very short as it only checks that if it has been "pinged" with the + * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR. + * + */ + +static struct uv_hub_nmi_s **uv_hub_nmi_list; + +DEFINE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); +EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_nmi); + +static unsigned long nmi_mmr; +static unsigned long nmi_mmr_clear; +static unsigned long nmi_mmr_pending; + +static atomic_t uv_in_nmi; +static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); +static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); +static atomic_t uv_nmi_slave_continue; +static cpumask_var_t uv_nmi_cpu_mask; + +/* Values for uv_nmi_slave_continue */ +#define SLAVE_CLEAR 0 +#define SLAVE_CONTINUE 1 +#define SLAVE_EXIT 2 /* - * When NMI is received, print a stack trace. + * Default is all stack dumps go to the console and buffer. + * Lower level to send to log buffer only. + */ +static int uv_nmi_loglevel = 7; +module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644); + +/* + * The following values show statistics on how perf events are affecting + * this system. + */ +static int param_get_local64(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg)); +} + +static int param_set_local64(const char *val, const struct kernel_param *kp) +{ + /* clear on any write */ + local64_set((local64_t *)kp->arg, 0); + return 0; +} + +static struct kernel_param_ops param_ops_local64 = { + .get = param_get_local64, + .set = param_set_local64, +}; +#define param_check_local64(name, p) __param_check(name, p, local64_t) + +static local64_t uv_nmi_count; +module_param_named(nmi_count, uv_nmi_count, local64, 0644); + +static local64_t uv_nmi_misses; +module_param_named(nmi_misses, uv_nmi_misses, local64, 0644); + +static local64_t uv_nmi_ping_count; +module_param_named(ping_count, uv_nmi_ping_count, local64, 0644); + +static local64_t uv_nmi_ping_misses; +module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644); + +/* + * Following values allow tuning for large systems under heavy loading + */ +static int uv_nmi_initial_delay = 100; +module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644); + +static int uv_nmi_slave_delay = 100; +module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644); + +static int uv_nmi_loop_delay = 100; +module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644); + +static int uv_nmi_trigger_delay = 10000; +module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644); + +static int uv_nmi_wait_count = 100; +module_param_named(wait_count, uv_nmi_wait_count, int, 0644); + +static int uv_nmi_retry_count = 500; +module_param_named(retry_count, uv_nmi_retry_count, int, 0644); + +/* Setup which NMI support is present in system */ +static void uv_nmi_setup_mmrs(void) +{ + if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) { + uv_write_local_mmr(UVH_NMI_MMRX_REQ, + 1UL << UVH_NMI_MMRX_REQ_SHIFT); + nmi_mmr = UVH_NMI_MMRX; + nmi_mmr_clear = UVH_NMI_MMRX_CLEAR; + nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT; + pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE); + } else { + nmi_mmr = UVH_NMI_MMR; + nmi_mmr_clear = UVH_NMI_MMR_CLEAR; + nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT; + pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE); + } +} + +/* Read NMI MMR and check if NMI flag was set by BMC. */ +static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi) +{ + hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr); + atomic_inc(&hub_nmi->read_mmr_count); + return !!(hub_nmi->nmi_value & nmi_mmr_pending); +} + +static inline void uv_local_mmr_clear_nmi(void) +{ + uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending); +} + +/* + * If first cpu in on this hub, set hub_nmi "in_nmi" and "owner" values and + * return true. If first cpu in on the system, set global "in_nmi" flag. + */ +static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi) +{ + int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1); + + if (first) { + atomic_set(&hub_nmi->cpu_owner, cpu); + if (atomic_add_unless(&uv_in_nmi, 1, 1)) + atomic_set(&uv_nmi_cpu, cpu); + + atomic_inc(&hub_nmi->nmi_count); + } + return first; +} + +/* Check if this is a system NMI event */ +static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) +{ + int cpu = smp_processor_id(); + int nmi = 0; + + local64_inc(&uv_nmi_count); + uv_cpu_nmi.queries++; + + do { + nmi = atomic_read(&hub_nmi->in_nmi); + if (nmi) + break; + + if (raw_spin_trylock(&hub_nmi->nmi_lock)) { + + /* check hub MMR NMI flag */ + if (uv_nmi_test_mmr(hub_nmi)) { + uv_set_in_nmi(cpu, hub_nmi); + nmi = 1; + break; + } + + /* MMR NMI flag is clear */ + raw_spin_unlock(&hub_nmi->nmi_lock); + + } else { + /* wait a moment for the hub nmi locker to set flag */ + cpu_relax(); + udelay(uv_nmi_slave_delay); + + /* re-check hub in_nmi flag */ + nmi = atomic_read(&hub_nmi->in_nmi); + if (nmi) + break; + } + + /* check if this BMC missed setting the MMR NMI flag */ + if (!nmi) { + nmi = atomic_read(&uv_in_nmi); + if (nmi) + uv_set_in_nmi(cpu, hub_nmi); + } + + } while (0); + + if (!nmi) + local64_inc(&uv_nmi_misses); + + return nmi; +} + +/* Need to reset the NMI MMR register, but only once per hub. */ +static inline void uv_clear_nmi(int cpu) +{ + struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi; + + if (cpu == atomic_read(&hub_nmi->cpu_owner)) { + atomic_set(&hub_nmi->cpu_owner, -1); + atomic_set(&hub_nmi->in_nmi, 0); + uv_local_mmr_clear_nmi(); + raw_spin_unlock(&hub_nmi->nmi_lock); + } +} + +/* Print non-responding cpus */ +static void uv_nmi_nr_cpus_pr(char *fmt) +{ + static char cpu_list[1024]; + int len = sizeof(cpu_list); + int c = cpumask_weight(uv_nmi_cpu_mask); + int n = cpulist_scnprintf(cpu_list, len, uv_nmi_cpu_mask); + + if (n >= len-1) + strcpy(&cpu_list[len - 6], "...\n"); + + printk(fmt, c, cpu_list); +} + +/* Ping non-responding cpus attemping to force them into the NMI handler */ +static void uv_nmi_nr_cpus_ping(void) +{ + int cpu; + + for_each_cpu(cpu, uv_nmi_cpu_mask) + atomic_set(&uv_cpu_nmi_per(cpu).pinging, 1); + + apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI); +} + +/* Clean up flags for cpus that ignored both NMI and ping */ +static void uv_nmi_cleanup_mask(void) +{ + int cpu; + + for_each_cpu(cpu, uv_nmi_cpu_mask) { + atomic_set(&uv_cpu_nmi_per(cpu).pinging, 0); + atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_OUT); + cpumask_clear_cpu(cpu, uv_nmi_cpu_mask); + } +} + +/* Loop waiting as cpus enter nmi handler */ +static int uv_nmi_wait_cpus(int first) +{ + int i, j, k, n = num_online_cpus(); + int last_k = 0, waiting = 0; + + if (first) { + cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask); + k = 0; + } else { + k = n - cpumask_weight(uv_nmi_cpu_mask); + } + + udelay(uv_nmi_initial_delay); + for (i = 0; i < uv_nmi_retry_count; i++) { + int loop_delay = uv_nmi_loop_delay; + + for_each_cpu(j, uv_nmi_cpu_mask) { + if (atomic_read(&uv_cpu_nmi_per(j).state)) { + cpumask_clear_cpu(j, uv_nmi_cpu_mask); + if (++k >= n) + break; + } + } + if (k >= n) { /* all in? */ + k = n; + break; + } + if (last_k != k) { /* abort if no new cpus coming in */ + last_k = k; + waiting = 0; + } else if (++waiting > uv_nmi_wait_count) + break; + + /* extend delay if waiting only for cpu 0 */ + if (waiting && (n - k) == 1 && + cpumask_test_cpu(0, uv_nmi_cpu_mask)) + loop_delay *= 100; + + udelay(loop_delay); + } + atomic_set(&uv_nmi_cpus_in_nmi, k); + return n - k; +} + +/* Wait until all slave cpus have entered UV NMI handler */ +static void uv_nmi_wait(int master) +{ + /* indicate this cpu is in */ + atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_IN); + + /* if not the first cpu in (the master), then we are a slave cpu */ + if (!master) + return; + + do { + /* wait for all other cpus to gather here */ + if (!uv_nmi_wait_cpus(1)) + break; + + /* if not all made it in, send IPI NMI to them */ + uv_nmi_nr_cpus_pr(KERN_ALERT + "UV: Sending NMI IPI to %d non-responding CPUs: %s\n"); + uv_nmi_nr_cpus_ping(); + + /* if all cpus are in, then done */ + if (!uv_nmi_wait_cpus(0)) + break; + + uv_nmi_nr_cpus_pr(KERN_ALERT + "UV: %d CPUs not in NMI loop: %s\n"); + } while (0); + + pr_alert("UV: %d of %d CPUs in NMI\n", + atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus()); +} + +/* Dump this cpu's state */ +static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) +{ + const char *dots = " ................................. "; + + printk(KERN_DEFAULT "UV:%sNMI process trace for CPU %d\n", dots, cpu); + show_regs(regs); + atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); +} + +/* Trigger a slave cpu to dump it's state */ +static void uv_nmi_trigger_dump(int cpu) +{ + int retry = uv_nmi_trigger_delay; + + if (atomic_read(&uv_cpu_nmi_per(cpu).state) != UV_NMI_STATE_IN) + return; + + atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP); + do { + cpu_relax(); + udelay(10); + if (atomic_read(&uv_cpu_nmi_per(cpu).state) + != UV_NMI_STATE_DUMP) + return; + } while (--retry > 0); + + pr_crit("UV: CPU %d stuck in process dump function\n", cpu); + atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP_DONE); +} + +/* Wait until all cpus ready to exit */ +static void uv_nmi_sync_exit(int master) +{ + atomic_dec(&uv_nmi_cpus_in_nmi); + if (master) { + while (atomic_read(&uv_nmi_cpus_in_nmi) > 0) + cpu_relax(); + atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR); + } else { + while (atomic_read(&uv_nmi_slave_continue)) + cpu_relax(); + } +} + +/* Walk through cpu list and dump state of each */ +static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) +{ + if (master) { + int tcpu; + int ignored = 0; + int saved_console_loglevel = console_loglevel; + + pr_alert("UV: tracing processes for %d CPUs from CPU %d\n", + atomic_read(&uv_nmi_cpus_in_nmi), cpu); + + console_loglevel = uv_nmi_loglevel; + atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); + for_each_online_cpu(tcpu) { + if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask)) + ignored++; + else if (tcpu == cpu) + uv_nmi_dump_state_cpu(tcpu, regs); + else + uv_nmi_trigger_dump(tcpu); + } + if (ignored) + printk(KERN_DEFAULT "UV: %d CPUs ignored NMI\n", + ignored); + + console_loglevel = saved_console_loglevel; + pr_alert("UV: process trace complete\n"); + } else { + while (!atomic_read(&uv_nmi_slave_continue)) + cpu_relax(); + while (atomic_read(&uv_cpu_nmi.state) != UV_NMI_STATE_DUMP) + cpu_relax(); + uv_nmi_dump_state_cpu(cpu, regs); + } + uv_nmi_sync_exit(master); +} + +static void uv_nmi_touch_watchdogs(void) +{ + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); +} + +/* + * UV NMI handler */ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) { - unsigned long real_uv_nmi; - int bid; + struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi; + int cpu = smp_processor_id(); + int master = 0; + unsigned long flags; - /* - * Each blade has an MMR that indicates when an NMI has been sent - * to cpus on the blade. If an NMI is detected, atomically - * clear the MMR and update a per-blade NMI count used to - * cause each cpu on the blade to notice a new NMI. - */ - bid = uv_numa_blade_id(); - real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + local_irq_save(flags); - if (unlikely(real_uv_nmi)) { - spin_lock(&uv_blade_info[bid].nmi_lock); - real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & - UV_NMI_PENDING_MASK); - if (real_uv_nmi) { - uv_blade_info[bid].nmi_count++; - uv_write_local_mmr(UVH_NMI_MMR_CLEAR, - UV_NMI_PENDING_MASK); - } - spin_unlock(&uv_blade_info[bid].nmi_lock); + /* If not a UV System NMI, ignore */ + if (!atomic_read(&uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) { + local_irq_restore(flags); + return NMI_DONE; } - if (likely(__get_cpu_var(cpu_last_nmi_count) == - uv_blade_info[bid].nmi_count)) - return NMI_DONE; + /* Indicate we are the first CPU into the NMI handler */ + master = (atomic_read(&uv_nmi_cpu) == cpu); - __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; + /* Pause as all cpus enter the NMI handler */ + uv_nmi_wait(master); - /* - * Use a lock so only one cpu prints at a time. - * This prevents intermixed output. - */ - spin_lock(&uv_nmi_lock); - pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); - dump_stack(); - spin_unlock(&uv_nmi_lock); + /* Dump state of each cpu */ + uv_nmi_dump_state(cpu, regs, master); + + /* Clear per_cpu "in nmi" flag */ + atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); + + /* Clear MMR NMI flag on each hub */ + uv_clear_nmi(cpu); + + /* Clear global flags */ + if (master) { + if (cpumask_weight(uv_nmi_cpu_mask)) + uv_nmi_cleanup_mask(); + atomic_set(&uv_nmi_cpus_in_nmi, -1); + atomic_set(&uv_nmi_cpu, -1); + atomic_set(&uv_in_nmi, 0); + } + + uv_nmi_touch_watchdogs(); + local_irq_restore(flags); return NMI_HANDLED; } +/* + * NMI handler for pulling in CPUs when perf events are grabbing our NMI + */ +int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) +{ + int ret; + + uv_cpu_nmi.queries++; + if (!atomic_read(&uv_cpu_nmi.pinging)) { + local64_inc(&uv_nmi_ping_misses); + return NMI_DONE; + } + + uv_cpu_nmi.pings++; + local64_inc(&uv_nmi_ping_count); + ret = uv_handle_nmi(reason, regs); + atomic_set(&uv_cpu_nmi.pinging, 0); + return ret; +} + void uv_register_nmi_notifier(void) { if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) - pr_warn("UV NMI handler failed to register\n"); + pr_warn("UV: NMI handler failed to register\n"); + + if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping")) + pr_warn("UV: PING NMI handler failed to register\n"); } void uv_nmi_init(void) @@ -100,3 +546,30 @@ void uv_nmi_init(void) apic_write(APIC_LVT1, value); } +void uv_nmi_setup(void) +{ + int size = sizeof(void *) * (1 << NODES_SHIFT); + int cpu, nid; + + /* Setup hub nmi info */ + uv_nmi_setup_mmrs(); + uv_hub_nmi_list = kzalloc(size, GFP_KERNEL); + pr_info("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size); + BUG_ON(!uv_hub_nmi_list); + size = sizeof(struct uv_hub_nmi_s); + for_each_present_cpu(cpu) { + nid = cpu_to_node(cpu); + if (uv_hub_nmi_list[nid] == NULL) { + uv_hub_nmi_list[nid] = kzalloc_node(size, + GFP_KERNEL, nid); + BUG_ON(!uv_hub_nmi_list[nid]); + raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock)); + atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1); + } + uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; + } + alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL); + BUG_ON(!uv_nmi_cpu_mask); +} + + From 3c121d9a21dc16ef030ad6ca3ebb159b5726fab9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 23 Sep 2013 16:25:02 -0500 Subject: [PATCH 3/9] x86/UV: Add summary of cpu activity to UV NMI handler The standard NMI handler dumps the states of all the cpus. This includes a full register dump and stack trace. This can be way more information than what is needed. This patch adds a "summary" dump that is basically a form of the "ps" command. It includes the symbolic IP address as well as the command field and basic process information. It is enabled when the nmi action is changed to "ips". Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Reviewed-by: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jason Wessel Link: http://lkml.kernel.org/r/20130923212500.507922930@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 48 ++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index fb02ea7d2b2d..4efcde1b9d54 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -139,6 +139,19 @@ module_param_named(wait_count, uv_nmi_wait_count, int, 0644); static int uv_nmi_retry_count = 500; module_param_named(retry_count, uv_nmi_retry_count, int, 0644); +/* + * Valid NMI Actions: + * "dump" - dump process stack for each cpu + * "ips" - dump IP info for each cpu + */ +static char uv_nmi_action[8] = "dump"; +module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); + +static inline bool uv_nmi_action_is(const char *action) +{ + return (strncmp(uv_nmi_action, action, strlen(action)) == 0); +} + /* Setup which NMI support is present in system */ static void uv_nmi_setup_mmrs(void) { @@ -367,13 +380,38 @@ static void uv_nmi_wait(int master) atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus()); } +static void uv_nmi_dump_cpu_ip_hdr(void) +{ + printk(KERN_DEFAULT + "\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n", + "CPU", "PID", "COMMAND", "IP"); +} + +static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs) +{ + printk(KERN_DEFAULT "UV: %4d %6d %-32.32s ", + cpu, current->pid, current->comm); + + printk_address(regs->ip, 1); +} + /* Dump this cpu's state */ static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) { const char *dots = " ................................. "; - printk(KERN_DEFAULT "UV:%sNMI process trace for CPU %d\n", dots, cpu); - show_regs(regs); + if (uv_nmi_action_is("ips")) { + if (cpu == 0) + uv_nmi_dump_cpu_ip_hdr(); + + if (current->pid != 0) + uv_nmi_dump_cpu_ip(cpu, regs); + + } else if (uv_nmi_action_is("dump")) { + printk(KERN_DEFAULT + "UV:%sNMI process trace for CPU %d\n", dots, cpu); + show_regs(regs); + } atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); } @@ -420,7 +458,8 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) int ignored = 0; int saved_console_loglevel = console_loglevel; - pr_alert("UV: tracing processes for %d CPUs from CPU %d\n", + pr_alert("UV: tracing %s for %d CPUs from CPU %d\n", + uv_nmi_action_is("ips") ? "IPs" : "processes", atomic_read(&uv_nmi_cpus_in_nmi), cpu); console_loglevel = uv_nmi_loglevel; @@ -482,7 +521,8 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) uv_nmi_wait(master); /* Dump state of each cpu */ - uv_nmi_dump_state(cpu, regs, master); + if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) + uv_nmi_dump_state(cpu, regs, master); /* Clear per_cpu "in nmi" flag */ atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); From 12ba6c990fab50fe568f3ad8715e81e356552428 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 23 Sep 2013 16:25:03 -0500 Subject: [PATCH 4/9] x86/UV: Add kdump to UV NMI handler If a system has hung and it no longer responds to external events, this patch adds the capability of doing a standard kdump and system reboot then triggered by the system NMI command. It is enabled when the nmi action is changed to "kdump" and the kernel is built with CONFIG_KEXEC enabled. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Reviewed-by: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jason Wessel Link: http://lkml.kernel.org/r/20130923212500.660567460@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 4efcde1b9d54..2579fbd6b95c 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -70,6 +71,7 @@ static atomic_t uv_in_nmi; static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); static atomic_t uv_nmi_slave_continue; +static atomic_t uv_nmi_kexec_failed; static cpumask_var_t uv_nmi_cpu_mask; /* Values for uv_nmi_slave_continue */ @@ -143,6 +145,7 @@ module_param_named(retry_count, uv_nmi_retry_count, int, 0644); * Valid NMI Actions: * "dump" - dump process stack for each cpu * "ips" - dump IP info for each cpu + * "kdump" - do crash dump */ static char uv_nmi_action[8] = "dump"; module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); @@ -496,6 +499,40 @@ static void uv_nmi_touch_watchdogs(void) touch_nmi_watchdog(); } +#if defined(CONFIG_KEXEC) +static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) +{ + /* Call crash to dump system state */ + if (master) { + pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu); + crash_kexec(regs); + + pr_emerg("UV: crash_kexec unexpectedly returned, "); + if (!kexec_crash_image) { + pr_cont("crash kernel not loaded\n"); + atomic_set(&uv_nmi_kexec_failed, 1); + uv_nmi_sync_exit(1); + return; + } + pr_cont("kexec busy, stalling cpus while waiting\n"); + } + + /* If crash exec fails the slaves should return, otherwise stall */ + while (atomic_read(&uv_nmi_kexec_failed) == 0) + mdelay(10); + + /* Crash kernel most likely not loaded, return in an orderly fashion */ + uv_nmi_sync_exit(0); +} + +#else /* !CONFIG_KEXEC */ +static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) +{ + if (master) + pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); +} +#endif /* !CONFIG_KEXEC */ + /* * UV NMI handler */ @@ -517,6 +554,10 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) /* Indicate we are the first CPU into the NMI handler */ master = (atomic_read(&uv_nmi_cpu) == cpu); + /* If NMI action is "kdump", then attempt to do it */ + if (uv_nmi_action_is("kdump")) + uv_nmi_kdump(cpu, master, regs); + /* Pause as all cpus enter the NMI handler */ uv_nmi_wait(master); From 8eba18428ac926f436064ac281e76d36d51bd631 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 23 Sep 2013 16:25:06 -0500 Subject: [PATCH 5/9] x86/UV: Add uvtrace support This patch adds support for the uvtrace module by providing a skeleton call to the registered trace function. It also provides another separate 'NMI' tracer that is triggered by the system wide 'power nmi' command. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Reviewed-by: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jason Wessel Link: http://lkml.kernel.org/r/20130923212501.185052551@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv.h | 8 ++++++++ arch/x86/platform/uv/uv_nmi.c | 13 ++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 6b964a0b86d1..8b1283daa332 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -14,6 +14,13 @@ extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_register_nmi_notifier(void); extern void uv_system_init(void); +extern void (*uv_trace_nmi_func)(unsigned int reason, struct pt_regs *regs); +extern void (*uv_trace_func)(const char *f, const int l, const char *fmt, ...); +#define uv_trace(fmt, ...) \ +do { \ + if (unlikely(uv_trace_func)) \ + (uv_trace_func)(__func__, __LINE__, fmt, ##__VA_ARGS__);\ +} while (0) extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, @@ -26,6 +33,7 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline int is_uv_system(void) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } +static inline void uv_trace(void *fmt, ...) { } static inline void uv_register_nmi_notifier(void) { } static inline const struct cpumask * uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 2579fbd6b95c..c171ca533156 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -1,5 +1,5 @@ /* - * SGI NMI support routines + * SGI NMI/TRACE support routines * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,6 +36,13 @@ #include #include +void (*uv_trace_func)(const char *f, const int l, const char *fmt, ...); +EXPORT_SYMBOL(uv_trace_func); + +void (*uv_trace_nmi_func)(unsigned int reason, struct pt_regs *regs); +EXPORT_SYMBOL(uv_trace_nmi_func); + + /* * UV handler for NMI * @@ -551,6 +558,10 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) return NMI_DONE; } + /* Call possible NMI trace function */ + if (unlikely(uv_trace_nmi_func)) + (uv_trace_nmi_func)(reason, regs); + /* Indicate we are the first CPU into the NMI handler */ master = (atomic_read(&uv_nmi_cpu) == cpu); From 8a1f4653f27ffd5d61088cf6b95c39bb13bf6132 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 24 Sep 2013 09:52:40 +0200 Subject: [PATCH 6/9] x86/UV: Check for alloc_cpumask_var() failures properly in uv_nmi_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC warned about: arch/x86/platform/uv/uv_nmi.c: In function ‘uv_nmi_setup’: arch/x86/platform/uv/uv_nmi.c:664:2: warning: the address of ‘uv_nmi_cpu_mask’ will always evaluate as ‘true’ The reason is this code: alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL); BUG_ON(!uv_nmi_cpu_mask); which is not the way to check for alloc_cpumask_var() failures - its return code should be checked instead. Cc: Mike Travis Link: http://lkml.kernel.org/n/tip-2pXRemsjupmvonbpmmnzleo1@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index c171ca533156..9126dfb6dbfc 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -660,8 +660,7 @@ void uv_nmi_setup(void) } uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; } - alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL); - BUG_ON(!uv_nmi_cpu_mask); + BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); } From 8daaa5f8261bffd2f6217a960f9182d0503a5c44 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 2 Oct 2013 10:14:18 -0500 Subject: [PATCH 7/9] kdb: Add support for external NMI handler to call KGDB/KDB This patch adds a kgdb_nmicallin() interface that can be used by external NMI handlers to call the KGDB/KDB handler. The primary need for this is for those types of NMI interrupts where all the CPUs have already received the NMI signal. Therefore no send_IPI(NMI) is required, and in fact it will cause a 2nd unhandled NMI to occur. This generates the "Dazed and Confuzed" messages. Since all the CPUs are getting the NMI at roughly the same time, it's not guaranteed that the first CPU that hits the NMI handler will manage to enter KGDB and set the dbg_master_lock before the slaves start entering. The new argument "send_ready" was added for KGDB to signal the NMI handler to release the slave CPUs for entry into KGDB. Signed-off-by: Mike Travis Acked-by: Jason Wessel Reviewed-by: Dimitri Sivanich Reviewed-by: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20131002151417.928886849@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- include/linux/kdb.h | 1 + include/linux/kgdb.h | 1 + kernel/debug/debug_core.c | 32 ++++++++++++++++++++++++++++++-- kernel/debug/debug_core.h | 3 +++ kernel/debug/kdb/kdb_debugger.c | 5 ++++- kernel/debug/kdb/kdb_main.c | 3 +++ 6 files changed, 42 insertions(+), 3 deletions(-) diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 7f6fe6e015bc..290db1269c4c 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -109,6 +109,7 @@ typedef enum { KDB_REASON_RECURSE, /* Recursive entry to kdb; * regs probably valid */ KDB_REASON_SSTEP, /* Single Step trap. - regs valid */ + KDB_REASON_SYSTEM_NMI, /* In NMI due to SYSTEM cmd; regs valid */ } kdb_reason_t; extern int kdb_trap_printk; diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index c6e091bf39a5..dfb4f2ffdaa2 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -310,6 +310,7 @@ extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, struct pt_regs *regs); extern int kgdb_nmicallback(int cpu, void *regs); +extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, atomic_t *snd_rdy); extern void gdbstub_exit(int status); extern int kgdb_single_step; diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0506d447aed2..7d2f35e5df2f 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -575,8 +575,12 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, raw_spin_lock(&dbg_slave_lock); #ifdef CONFIG_SMP + /* If send_ready set, slaves are already waiting */ + if (ks->send_ready) + atomic_set(ks->send_ready, 1); + /* Signal the other CPUs to enter kgdb_wait() */ - if ((!kgdb_single_step) && kgdb_do_roundup) + else if ((!kgdb_single_step) && kgdb_do_roundup) kgdb_roundup_cpus(flags); #endif @@ -678,11 +682,11 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) if (arch_kgdb_ops.enable_nmi) arch_kgdb_ops.enable_nmi(0); + memset(ks, 0, sizeof(struct kgdb_state)); ks->cpu = raw_smp_processor_id(); ks->ex_vector = evector; ks->signo = signo; ks->err_code = ecode; - ks->kgdb_usethreadid = 0; ks->linux_regs = regs; if (kgdb_reenter_check(ks)) @@ -732,6 +736,30 @@ int kgdb_nmicallback(int cpu, void *regs) return 1; } +int kgdb_nmicallin(int cpu, int trapnr, void *regs, atomic_t *send_ready) +{ +#ifdef CONFIG_SMP + if (!kgdb_io_ready(0) || !send_ready) + return 1; + + if (kgdb_info[cpu].enter_kgdb == 0) { + struct kgdb_state kgdb_var; + struct kgdb_state *ks = &kgdb_var; + + memset(ks, 0, sizeof(struct kgdb_state)); + ks->cpu = cpu; + ks->ex_vector = trapnr; + ks->signo = SIGTRAP; + ks->err_code = KGDB_KDB_REASON_SYSTEM_NMI; + ks->linux_regs = regs; + ks->send_ready = send_ready; + kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); + return 0; + } +#endif + return 1; +} + static void kgdb_console_write(struct console *co, const char *s, unsigned count) { diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index 2235967e78b0..572aa4f5677c 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h @@ -26,6 +26,7 @@ struct kgdb_state { unsigned long threadid; long kgdb_usethreadid; struct pt_regs *linux_regs; + atomic_t *send_ready; }; /* Exception state values */ @@ -74,11 +75,13 @@ extern int kdb_stub(struct kgdb_state *ks); extern int kdb_parse(const char *cmdstr); extern int kdb_common_init_state(struct kgdb_state *ks); extern int kdb_common_deinit_state(void); +#define KGDB_KDB_REASON_SYSTEM_NMI KDB_REASON_SYSTEM_NMI #else /* ! CONFIG_KGDB_KDB */ static inline int kdb_stub(struct kgdb_state *ks) { return DBG_PASS_EVENT; } +#define KGDB_KDB_REASON_SYSTEM_NMI 0 #endif /* CONFIG_KGDB_KDB */ #endif /* _DEBUG_CORE_H_ */ diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index 328d18ef31e4..8859ca34dcfe 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -69,7 +69,10 @@ int kdb_stub(struct kgdb_state *ks) if (atomic_read(&kgdb_setting_breakpoint)) reason = KDB_REASON_KEYBOARD; - if (in_nmi()) + if (ks->err_code == KDB_REASON_SYSTEM_NMI && ks->signo == SIGTRAP) + reason = KDB_REASON_SYSTEM_NMI; + + else if (in_nmi()) reason = KDB_REASON_NMI; for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) { diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 00eb8f7fbf41..0b097c8a1e50 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1200,6 +1200,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, instruction_pointer(regs)); kdb_dumpregs(regs); break; + case KDB_REASON_SYSTEM_NMI: + kdb_printf("due to System NonMaskable Interrupt\n"); + break; case KDB_REASON_NMI: kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "\n", From e379ea82dd53a5cc8e3ac0b7899a8012006c712c Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 2 Oct 2013 10:14:19 -0500 Subject: [PATCH 8/9] x86/UV: Add call to KGDB/KDB from NMI handler This patch restores the capability to enter KDB (and KGDB) from the UV NMI handler. This is needed because the UV system console is not capable of sending the 'break' signal to the serial console port. It is also useful when the kernel is hung in such a way that it isn't responding to normal external I/O, so sending 'g' to sysreq-trigger does not work either. Another benefit of the external NMI command is that all the cpus receive the NMI signal at roughly the same time so they are more closely aligned timewise. It utilizes the newly added kgdb_nmicallin function to gain entry to KGDB/KDB by the master. The slaves still enter via the standard kgdb_nmicallback function. It also uses the new 'send_ready' pointer to tell KGDB/KDB to signal the slaves when to proceed into the KGDB slave loop. It is enabled when the nmi action is set to "kdb" and the kernel is built with CONFIG_KDB enabled. Note that if kgdb is connected that interface will be used instead. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Reviewed-by: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jason Wessel Link: http://lkml.kernel.org/r/20131002151418.089692683@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 47 ++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 9126dfb6dbfc..9b8ac60ad8db 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -21,7 +21,9 @@ #include #include +#include #include +#include #include #include #include @@ -32,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -153,8 +156,9 @@ module_param_named(retry_count, uv_nmi_retry_count, int, 0644); * "dump" - dump process stack for each cpu * "ips" - dump IP info for each cpu * "kdump" - do crash dump + * "kdb" - enter KDB/KGDB (default) */ -static char uv_nmi_action[8] = "dump"; +static char uv_nmi_action[8] = "kdb"; module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); static inline bool uv_nmi_action_is(const char *action) @@ -540,6 +544,43 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) } #endif /* !CONFIG_KEXEC */ +#ifdef CONFIG_KGDB_KDB +/* Call KDB from NMI handler */ +static void uv_call_kdb(int cpu, struct pt_regs *regs, int master) +{ + int ret; + + if (master) { + /* call KGDB NMI handler as MASTER */ + ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, + &uv_nmi_slave_continue); + if (ret) { + pr_alert("KDB returned error, is kgdboc set?\n"); + atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); + } + } else { + /* wait for KGDB signal that it's ready for slaves to enter */ + int sig; + + do { + cpu_relax(); + sig = atomic_read(&uv_nmi_slave_continue); + } while (!sig); + + /* call KGDB as slave */ + if (sig == SLAVE_CONTINUE) + kgdb_nmicallback(cpu, regs); + } + uv_nmi_sync_exit(master); +} + +#else /* !CONFIG_KGDB_KDB */ +static inline void uv_call_kdb(int cpu, struct pt_regs *regs, int master) +{ + pr_err("UV: NMI error: KGDB/KDB is not enabled in this kernel\n"); +} +#endif /* !CONFIG_KGDB_KDB */ + /* * UV NMI handler */ @@ -576,6 +617,10 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) uv_nmi_dump_state(cpu, regs, master); + /* Call KDB if enabled */ + else if (uv_nmi_action_is("kdb")) + uv_call_kdb(cpu, regs, master); + /* Clear per_cpu "in nmi" flag */ atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); From b5dfcb09debc38582c3cb72a3c1a88b919b07f2d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 11 Nov 2013 19:53:42 +0100 Subject: [PATCH 9/9] Revert "x86/UV: Add uvtrace support" This reverts commit 8eba18428ac926f436064ac281e76d36d51bd631. uv_trace() is not used by anything, nor is uv_trace_nmi_func, nor uv_trace_func. That's not how we do instrumentation code in the kernel: we add tracepoints, printk()s, etc. so that everyone not just those with magic kernel modules can debug a system. So remove this unused (and misguied) piece of code. Signed-off-by: Ingo Molnar Cc: Mike Travis Cc: Dimitri Sivanich Cc: Hedi Berriche Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jason Wessel Link: http://lkml.kernel.org/n/tip-tumfBffmr4jmnt8Gyxanoblg@git.kernel.org --- arch/x86/include/asm/uv/uv.h | 8 -------- arch/x86/platform/uv/uv_nmi.c | 13 +------------ 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 8b1283daa332..6b964a0b86d1 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -14,13 +14,6 @@ extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_register_nmi_notifier(void); extern void uv_system_init(void); -extern void (*uv_trace_nmi_func)(unsigned int reason, struct pt_regs *regs); -extern void (*uv_trace_func)(const char *f, const int l, const char *fmt, ...); -#define uv_trace(fmt, ...) \ -do { \ - if (unlikely(uv_trace_func)) \ - (uv_trace_func)(__func__, __LINE__, fmt, ##__VA_ARGS__);\ -} while (0) extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, @@ -33,7 +26,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline int is_uv_system(void) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline void uv_trace(void *fmt, ...) { } static inline void uv_register_nmi_notifier(void) { } static inline const struct cpumask * uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 9b8ac60ad8db..2e863ad4a772 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -1,5 +1,5 @@ /* - * SGI NMI/TRACE support routines + * SGI NMI support routines * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -39,13 +39,6 @@ #include #include -void (*uv_trace_func)(const char *f, const int l, const char *fmt, ...); -EXPORT_SYMBOL(uv_trace_func); - -void (*uv_trace_nmi_func)(unsigned int reason, struct pt_regs *regs); -EXPORT_SYMBOL(uv_trace_nmi_func); - - /* * UV handler for NMI * @@ -599,10 +592,6 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) return NMI_DONE; } - /* Call possible NMI trace function */ - if (unlikely(uv_trace_nmi_func)) - (uv_trace_nmi_func)(reason, regs); - /* Indicate we are the first CPU into the NMI handler */ master = (atomic_read(&uv_nmi_cpu) == cpu);