forked from luck/tmp_suning_uos_patched
[PATCH] check nmi watchdog is broken
A bug against an xSeries system showed up recently noting that the check_nmi_watchdog() test was failing. I have been investigating it and discovered in both i386 and x86_64 the recent change to the routine to use the cpu_callin_map has uncovered a problem. Prior to that change, on an SMP box, the test was trivally passing because all cpu's were found to not yet be online, but now with the callin_map they are discovered, it goes on to test the counter and they have not yet begun to increment, so it announces a CPU is stuck and bails out. On all the systems I have access to test, the announcement of failure is also bougs... by the time you can login and check /proc/interrupts, the NMI count is happily incrementing on all CPUs. Its just that the test is being done too early. I have tried moving the call to the test around a bit, and it was always too early. I finally hit on this proposed solution, it delays the routine via a late_initcall(), seems like the right solution to me. Signed-off-by: Adrian Bunk <bunk@stusta.de> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
fd51f666fa
commit
67701ae976
@ -1265,8 +1265,6 @@ int __init APIC_init_uniprocessor (void)
|
|||||||
|
|
||||||
setup_local_APIC();
|
setup_local_APIC();
|
||||||
|
|
||||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
|
||||||
check_nmi_watchdog();
|
|
||||||
#ifdef CONFIG_X86_IO_APIC
|
#ifdef CONFIG_X86_IO_APIC
|
||||||
if (smp_found_config)
|
if (smp_found_config)
|
||||||
if (!skip_ioapic_setup && nr_ioapics)
|
if (!skip_ioapic_setup && nr_ioapics)
|
||||||
|
@ -2175,7 +2175,6 @@ static inline void check_timer(void)
|
|||||||
disable_8259A_irq(0);
|
disable_8259A_irq(0);
|
||||||
setup_nmi();
|
setup_nmi();
|
||||||
enable_8259A_irq(0);
|
enable_8259A_irq(0);
|
||||||
check_nmi_watchdog();
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -2198,7 +2197,6 @@ static inline void check_timer(void)
|
|||||||
add_pin_to_irq(0, 0, pin2);
|
add_pin_to_irq(0, 0, pin2);
|
||||||
if (nmi_watchdog == NMI_IO_APIC) {
|
if (nmi_watchdog == NMI_IO_APIC) {
|
||||||
setup_nmi();
|
setup_nmi();
|
||||||
check_nmi_watchdog();
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -102,20 +102,21 @@ int nmi_active;
|
|||||||
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
|
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
|
||||||
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
|
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
|
||||||
|
|
||||||
int __init check_nmi_watchdog (void)
|
static int __init check_nmi_watchdog(void)
|
||||||
{
|
{
|
||||||
unsigned int prev_nmi_count[NR_CPUS];
|
unsigned int prev_nmi_count[NR_CPUS];
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
printk(KERN_INFO "testing NMI watchdog ... ");
|
if (nmi_watchdog == NMI_NONE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
printk(KERN_INFO "Testing NMI watchdog ... ");
|
||||||
|
|
||||||
for (cpu = 0; cpu < NR_CPUS; cpu++)
|
for (cpu = 0; cpu < NR_CPUS; cpu++)
|
||||||
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
|
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
mdelay((10*1000)/nmi_hz); // wait 10 ticks
|
mdelay((10*1000)/nmi_hz); // wait 10 ticks
|
||||||
|
|
||||||
/* FIXME: Only boot CPU is online at this stage. Check CPUs
|
|
||||||
as they come up. */
|
|
||||||
for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
/* Check cpu_callin_map here because that is set
|
/* Check cpu_callin_map here because that is set
|
||||||
@ -139,6 +140,8 @@ int __init check_nmi_watchdog (void)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
/* This needs to happen later in boot so counters are working */
|
||||||
|
late_initcall(check_nmi_watchdog);
|
||||||
|
|
||||||
static int __init setup_nmi_watchdog(char *str)
|
static int __init setup_nmi_watchdog(char *str)
|
||||||
{
|
{
|
||||||
|
@ -1089,9 +1089,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
|
||||||
check_nmi_watchdog();
|
|
||||||
|
|
||||||
smpboot_setup_io_apic();
|
smpboot_setup_io_apic();
|
||||||
|
|
||||||
setup_boot_APIC_clock();
|
setup_boot_APIC_clock();
|
||||||
|
@ -1607,7 +1607,6 @@ static inline void check_timer(void)
|
|||||||
disable_8259A_irq(0);
|
disable_8259A_irq(0);
|
||||||
setup_nmi();
|
setup_nmi();
|
||||||
enable_8259A_irq(0);
|
enable_8259A_irq(0);
|
||||||
check_nmi_watchdog();
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1627,7 +1626,6 @@ static inline void check_timer(void)
|
|||||||
nmi_watchdog_default();
|
nmi_watchdog_default();
|
||||||
if (nmi_watchdog == NMI_IO_APIC) {
|
if (nmi_watchdog == NMI_IO_APIC) {
|
||||||
setup_nmi();
|
setup_nmi();
|
||||||
check_nmi_watchdog();
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -112,17 +112,20 @@ static __init int cpu_has_lapic(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int __init check_nmi_watchdog (void)
|
static int __init check_nmi_watchdog (void)
|
||||||
{
|
{
|
||||||
int counts[NR_CPUS];
|
int counts[NR_CPUS];
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
|
if (nmi_watchdog == NMI_NONE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) {
|
if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) {
|
||||||
nmi_watchdog = NMI_NONE;
|
nmi_watchdog = NMI_NONE;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
printk(KERN_INFO "testing NMI watchdog ... ");
|
printk(KERN_INFO "Testing NMI watchdog ... ");
|
||||||
|
|
||||||
for (cpu = 0; cpu < NR_CPUS; cpu++)
|
for (cpu = 0; cpu < NR_CPUS; cpu++)
|
||||||
counts[cpu] = cpu_pda[cpu].__nmi_count;
|
counts[cpu] = cpu_pda[cpu].__nmi_count;
|
||||||
@ -148,6 +151,8 @@ int __init check_nmi_watchdog (void)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
/* Have this called later during boot so counters are updating */
|
||||||
|
late_initcall(check_nmi_watchdog);
|
||||||
|
|
||||||
int __init setup_nmi_watchdog(char *str)
|
int __init setup_nmi_watchdog(char *str)
|
||||||
{
|
{
|
||||||
|
@ -109,7 +109,6 @@ extern int APIC_init_uniprocessor (void);
|
|||||||
extern void disable_APIC_timer(void);
|
extern void disable_APIC_timer(void);
|
||||||
extern void enable_APIC_timer(void);
|
extern void enable_APIC_timer(void);
|
||||||
|
|
||||||
extern int check_nmi_watchdog (void);
|
|
||||||
extern void enable_NMI_through_LVT0 (void * dummy);
|
extern void enable_NMI_through_LVT0 (void * dummy);
|
||||||
|
|
||||||
extern unsigned int nmi_watchdog;
|
extern unsigned int nmi_watchdog;
|
||||||
|
@ -99,7 +99,6 @@ extern void disable_APIC_timer(void);
|
|||||||
extern void enable_APIC_timer(void);
|
extern void enable_APIC_timer(void);
|
||||||
extern void clustered_apic_check(void);
|
extern void clustered_apic_check(void);
|
||||||
|
|
||||||
extern int check_nmi_watchdog(void);
|
|
||||||
extern void nmi_watchdog_default(void);
|
extern void nmi_watchdog_default(void);
|
||||||
extern int setup_nmi_watchdog(char *);
|
extern int setup_nmi_watchdog(char *);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user