[PATCH] x86: Refactor thermal throttle processing
Refactor the event processing (syslog messaging and rate limiting) into separate file therm_throt.c. This allows consistent reporting of CPU thermal throttle events. After ACK'ing the interrupt, if the event is current, the user (p4.c/mce_intel.c) calls therm_throt_process to log (and rate limit) the event. If that function returns 1, the user has the option to log things further (such as to mce_log in x86_64). AK: minor cleanup Signed-off-by: Dmitriy Zavin <dmitriyz@google.com> Signed-off-by: Andi Kleen <ak@suse.de>
This commit is contained in:
parent
3b17167283
commit
15d5f83983
@ -1,2 +1,2 @@
|
||||
obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o
|
||||
obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
|
||||
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include <asm/msr.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* as supported by the P4/Xeon family */
|
||||
@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
|
||||
/* P4/Xeon Thermal transition interrupt handler */
|
||||
static void intel_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
u32 l, h;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
static unsigned long next[NR_CPUS];
|
||||
__u64 msr_val;
|
||||
|
||||
ack_APIC_irq();
|
||||
|
||||
if (time_after(next[cpu], jiffies))
|
||||
return;
|
||||
|
||||
next[cpu] = jiffies + HZ*5;
|
||||
rdmsr(MSR_IA32_THERM_STATUS, l, h);
|
||||
if (l & 0x1) {
|
||||
printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
|
||||
printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
|
||||
cpu);
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
} else {
|
||||
printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
|
||||
}
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
therm_throt_process(msr_val & 0x1);
|
||||
}
|
||||
|
||||
/* Thermal interrupt handler for this CPU setup */
|
||||
@ -122,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
|
||||
rdmsr (MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
|
||||
|
||||
|
||||
l = apic_read (APIC_LVTTHMR);
|
||||
apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
|
||||
|
58
arch/i386/kernel/cpu/mcheck/therm_throt.c
Normal file
58
arch/i386/kernel/cpu/mcheck/therm_throt.c
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* linux/arch/i386/kerne/cpu/mcheck/therm_throt.c
|
||||
*
|
||||
* Thermal throttle event support code.
|
||||
*
|
||||
* Author: Dmitriy Zavin (dmitriyz@google.com)
|
||||
*
|
||||
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
/* How long to wait between reporting thermal events */
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, next_check);
|
||||
|
||||
/***
|
||||
* therm_throt_process - Process thermal throttling event
|
||||
* @curr: Whether the condition is current or not (boolean), since the
|
||||
* thermal interrupt normally gets called both when the thermal
|
||||
* event begins and once the event has ended.
|
||||
*
|
||||
* This function is normally called by the thermal interrupt after the
|
||||
* IRQ has been acknowledged.
|
||||
*
|
||||
* It will take care of rate limiting and printing messages to the syslog.
|
||||
*
|
||||
* Returns: 0 : Event should NOT be further logged, i.e. still in
|
||||
* "timeout" from previous log message.
|
||||
* 1 : Event should be logged further, and a message has been
|
||||
* printed to the syslog.
|
||||
*/
|
||||
int therm_throt_process(int curr)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
if (time_before(jiffies, __get_cpu_var(next_check)))
|
||||
return 0;
|
||||
|
||||
__get_cpu_var(next_check) = jiffies + CHECK_INTERVAL;
|
||||
|
||||
/* if we just entered the thermal event */
|
||||
if (curr) {
|
||||
printk(KERN_CRIT "CPU%d: Temperature above threshold, "
|
||||
"cpu clock throttled\n", cpu);
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
} else {
|
||||
printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
@ -11,7 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
|
||||
pci-dma.o pci-nommu.o alternative.o
|
||||
|
||||
obj-$(CONFIG_STACKTRACE) += stacktrace.o
|
||||
obj-$(CONFIG_X86_MCE) += mce.o
|
||||
obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
|
||||
obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
|
||||
@ -46,6 +46,7 @@ obj-y += intel_cacheinfo.o
|
||||
|
||||
CFLAGS_vsyscall.o := $(PROFILING) -g0
|
||||
|
||||
therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o
|
||||
bootflag-y += ../../i386/kernel/bootflag.o
|
||||
cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
|
||||
topology-y += ../../i386/kernel/topology.o
|
||||
@ -55,4 +56,3 @@ quirks-y += ../../i386/kernel/quirks.o
|
||||
i8237-y += ../../i386/kernel/i8237.o
|
||||
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
|
||||
alternative-y += ../../i386/kernel/alternative.o
|
||||
|
||||
|
@ -274,6 +274,33 @@ void do_machine_check(struct pt_regs * regs, long error_code)
|
||||
atomic_dec(&mce_entry);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
/***
|
||||
* mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
|
||||
* @cpu: The CPU on which the event occured.
|
||||
* @status: Event status information
|
||||
*
|
||||
* This function should be called by the thermal interrupt after the
|
||||
* event has been processed and the decision was made to log the event
|
||||
* further.
|
||||
*
|
||||
* The status parameter will be saved to the 'status' field of 'struct mce'
|
||||
* and historically has been the register value of the
|
||||
* MSR_IA32_THERMAL_STATUS (Intel) msr.
|
||||
*/
|
||||
void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
|
||||
{
|
||||
struct mce m;
|
||||
|
||||
memset(&m, 0, sizeof(m));
|
||||
m.cpu = cpu;
|
||||
m.bank = MCE_THERMAL_BANK;
|
||||
m.status = status;
|
||||
rdtscll(m.tsc);
|
||||
mce_log(&m);
|
||||
}
|
||||
#endif /* CONFIG_X86_MCE_INTEL */
|
||||
|
||||
/*
|
||||
* Periodic polling timer for "silent" machine check errors.
|
||||
*/
|
||||
|
@ -11,36 +11,21 @@
|
||||
#include <asm/mce.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/idle.h>
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, next_check);
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
asmlinkage void smp_thermal_interrupt(void)
|
||||
{
|
||||
struct mce m;
|
||||
__u64 msr_val;
|
||||
|
||||
ack_APIC_irq();
|
||||
|
||||
exit_idle();
|
||||
irq_enter();
|
||||
if (time_before(jiffies, __get_cpu_var(next_check)))
|
||||
goto done;
|
||||
|
||||
__get_cpu_var(next_check) = jiffies + HZ*300;
|
||||
memset(&m, 0, sizeof(m));
|
||||
m.cpu = smp_processor_id();
|
||||
m.bank = MCE_THERMAL_BANK;
|
||||
rdtscll(m.tsc);
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, m.status);
|
||||
if (m.status & 0x1) {
|
||||
printk(KERN_EMERG
|
||||
"CPU%d: Temperature above threshold, cpu clock throttled\n", m.cpu);
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
} else {
|
||||
printk(KERN_EMERG "CPU%d: Temperature/speed normal\n", m.cpu);
|
||||
}
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
if (therm_throt_process(msr_val & 1))
|
||||
mce_log_therm_throt_event(smp_processor_id(), msr_val);
|
||||
|
||||
mce_log(&m);
|
||||
done:
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
|
6
include/asm-i386/therm_throt.h
Normal file
6
include/asm-i386/therm_throt.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef __ASM_I386_THERM_THROT_H__
|
||||
#define __ASM_I386_THERM_THROT_H__ 1
|
||||
|
||||
int therm_throt_process(int curr);
|
||||
|
||||
#endif /* __ASM_I386_THERM_THROT_H__ */
|
@ -99,6 +99,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
}
|
||||
#endif
|
||||
|
||||
void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
|
||||
|
||||
extern atomic_t mce_entry;
|
||||
|
||||
#endif
|
||||
|
1
include/asm-x86_64/therm_throt.h
Normal file
1
include/asm-x86_64/therm_throt.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm-i386/therm_throt.h>
|
Loading…
Reference in New Issue
Block a user