x86, UV: Fix NMI handler for UV platforms
This fixes problems seen on UV systems handling NMIs from the node controller. I isolated the "dazed..." messages that I saw earlier to a bug in the BMC on our platform. It was sending NMIs w/o properly setting a register that indicated the source of NMI. So rather than _assuming_ any unhandled NMI came from the UV system maintenance console (SMC), add a check to verify that the SMC actually sent the NMI. Signed-off-by: Jack Steiner <steiner@sgi.com> Cc: gorcunov@gmail.com Cc: dzickus@redhat.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
693d92a1bb
commit
1d44e8288a
@ -398,6 +398,8 @@ struct uv_blade_info {
|
||||
unsigned short nr_online_cpus;
|
||||
unsigned short pnode;
|
||||
short memory_nid;
|
||||
spinlock_t nmi_lock;
|
||||
unsigned long nmi_count;
|
||||
};
|
||||
extern struct uv_blade_info *uv_blade_info;
|
||||
extern short *uv_node_to_blade;
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* SGI UV MMR definitions
|
||||
*
|
||||
* Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
|
||||
* Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ASM_X86_UV_UV_MMRS_H
|
||||
@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
|
||||
} s;
|
||||
};
|
||||
|
||||
/* ========================================================================= */
|
||||
/* UVH_SCRATCH5 */
|
||||
/* ========================================================================= */
|
||||
#define UVH_SCRATCH5 0x2d0200UL
|
||||
#define UVH_SCRATCH5_32 0x00778
|
||||
|
||||
#define UVH_SCRATCH5_SCRATCH5_SHFT 0
|
||||
#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
|
||||
union uvh_scratch5_u {
|
||||
unsigned long v;
|
||||
struct uvh_scratch5_s {
|
||||
unsigned long scratch5 : 64; /* RW, W1CS */
|
||||
} s;
|
||||
};
|
||||
|
||||
#endif /* __ASM_UV_MMRS_X86_H__ */
|
||||
|
@ -37,6 +37,13 @@
|
||||
#include <asm/smp.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/emergency-restart.h>
|
||||
#include <asm/nmi.h>
|
||||
|
||||
/* BMC sets a bit this MMR non-zero before sending an NMI */
|
||||
#define UVH_NMI_MMR UVH_SCRATCH5
|
||||
#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
|
||||
#define UV_NMI_PENDING_MASK (1UL << 63)
|
||||
DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
|
||||
|
||||
DEFINE_PER_CPU(int, x2apic_extra_bits);
|
||||
|
||||
@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
|
||||
*/
|
||||
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
|
||||
{
|
||||
unsigned long real_uv_nmi;
|
||||
int bid;
|
||||
|
||||
if (reason != DIE_NMIUNKNOWN)
|
||||
return NOTIFY_OK;
|
||||
|
||||
if (in_crash_kexec)
|
||||
/* do nothing if entering the crash kernel */
|
||||
return NOTIFY_OK;
|
||||
|
||||
/*
|
||||
* Use a lock so only one cpu prints at a time
|
||||
* to prevent intermixed output.
|
||||
* Each blade has an MMR that indicates when an NMI has been sent
|
||||
* to cpus on the blade. If an NMI is detected, atomically
|
||||
* clear the MMR and update a per-blade NMI count used to
|
||||
* cause each cpu on the blade to notice a new NMI.
|
||||
*/
|
||||
bid = uv_numa_blade_id();
|
||||
real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
|
||||
|
||||
if (unlikely(real_uv_nmi)) {
|
||||
spin_lock(&uv_blade_info[bid].nmi_lock);
|
||||
real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
|
||||
if (real_uv_nmi) {
|
||||
uv_blade_info[bid].nmi_count++;
|
||||
uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
|
||||
}
|
||||
spin_unlock(&uv_blade_info[bid].nmi_lock);
|
||||
}
|
||||
|
||||
if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
|
||||
|
||||
/*
|
||||
* Use a lock so only one cpu prints at a time.
|
||||
* This prevents intermixed output.
|
||||
*/
|
||||
spin_lock(&uv_nmi_lock);
|
||||
pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
|
||||
pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
|
||||
dump_stack();
|
||||
spin_unlock(&uv_nmi_lock);
|
||||
|
||||
@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
|
||||
}
|
||||
|
||||
static struct notifier_block uv_dump_stack_nmi_nb = {
|
||||
.notifier_call = uv_handle_nmi
|
||||
.notifier_call = uv_handle_nmi,
|
||||
.priority = NMI_LOCAL_LOW_PRIOR - 1,
|
||||
};
|
||||
|
||||
void uv_register_nmi_notifier(void)
|
||||
@ -720,8 +756,9 @@ void __init uv_system_init(void)
|
||||
printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
|
||||
|
||||
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
|
||||
uv_blade_info = kmalloc(bytes, GFP_KERNEL);
|
||||
uv_blade_info = kzalloc(bytes, GFP_KERNEL);
|
||||
BUG_ON(!uv_blade_info);
|
||||
|
||||
for (blade = 0; blade < uv_num_possible_blades(); blade++)
|
||||
uv_blade_info[blade].memory_nid = -1;
|
||||
|
||||
@ -747,6 +784,7 @@ void __init uv_system_init(void)
|
||||
uv_blade_info[blade].pnode = pnode;
|
||||
uv_blade_info[blade].nr_possible_cpus = 0;
|
||||
uv_blade_info[blade].nr_online_cpus = 0;
|
||||
spin_lock_init(&uv_blade_info[blade].nmi_lock);
|
||||
max_pnode = max(pnode, max_pnode);
|
||||
blade++;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user