Merge branch 'perf/x86' into perf/core, because it's ready

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2015-03-27 09:46:19 +01:00
commit 936c663aed
11 changed files with 1514 additions and 63 deletions

View File

@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Per-cpu breakpoints are not supported by our stepping
* mechanism.
*/
if (!bp->hw.bp_target)
if (!bp->hw.target)
return -EINVAL;
/*

View File

@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Disallow per-task kernel breakpoints since these would
* complicate the stepping code.
*/
if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target)
if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
return -EINVAL;
return 0;

View File

@ -12,7 +12,7 @@
#include <asm/disabled-features.h>
#endif
#define NCAPINTS 11 /* N 32-bit words worth of info */
#define NCAPINTS 13 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@ -226,6 +226,7 @@
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
@ -242,6 +243,12 @@
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
/*
* BUG word(s)
*/

View File

@ -109,6 +109,9 @@ struct cpuinfo_x86 {
/* in KB - valid for CPUS which support this call: */
int x86_cache_size;
int x86_cache_alignment; /* In bytes */
/* Cache QoS architectural values: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */

View File

@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
endif
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
perf_event_intel_uncore_snb.o \

View File

@ -646,6 +646,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[10] = eax;
}
/* Additional Intel-defined flags: level 0x0000000F */
if (c->cpuid_level >= 0x0000000F) {
u32 eax, ebx, ecx, edx;
/* QoS sub-leaf, EAX=0Fh, ECX=0 */
cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
c->x86_capability[11] = edx;
if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
/* will be overridden if occupancy monitoring exists */
c->x86_cache_max_rmid = ebx;
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
c->x86_capability[12] = edx;
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
}
} else {
c->x86_cache_max_rmid = -1;
c->x86_cache_occ_scale = -1;
}
}
/* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000);
c->extended_cpuid_level = xlvl;
@ -834,6 +858,20 @@ static void generic_identify(struct cpuinfo_x86 *c)
detect_nopl(c);
}
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
{
/*
* The heavy lifting of max_rmid and cache_occ_scale are handled
* in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
* in case CQM bits really aren't there in this CPU.
*/
if (c != &boot_cpu_data) {
boot_cpu_data.x86_cache_max_rmid =
min(boot_cpu_data.x86_cache_max_rmid,
c->x86_cache_max_rmid);
}
}
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
@ -923,6 +961,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
init_hypervisor(c);
x86_init_rdrand(c);
x86_init_cache_qos(c);
/*
* Clear/Set all flags overriden by options, need do it

File diff suppressed because it is too large Load Diff

View File

@ -53,6 +53,7 @@ struct perf_guest_info_callbacks {
#include <linux/sysfs.h>
#include <linux/perf_regs.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <asm/local.h>
struct perf_callchain_entry {
@ -118,10 +119,16 @@ struct hw_perf_event {
struct hrtimer hrtimer;
};
struct { /* tracepoint */
struct task_struct *tp_target;
/* for tp_event->class */
struct list_head tp_list;
};
struct { /* intel_cqm */
int cqm_state;
int cqm_rmid;
struct list_head cqm_events_entry;
struct list_head cqm_groups_entry;
struct list_head cqm_group_entry;
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
struct { /* breakpoint */
/*
@ -129,12 +136,12 @@ struct hw_perf_event {
* problem hw_breakpoint has with context
* creation and event initalization.
*/
struct task_struct *bp_target;
struct arch_hw_breakpoint info;
struct list_head bp_list;
};
#endif
};
struct task_struct *target;
int state;
local64_t prev_count;
u64 sample_period;
@ -271,6 +278,11 @@ struct pmu {
*/
size_t task_ctx_size;
/*
* Return the count value for a counter.
*/
u64 (*count) (struct perf_event *event); /*optional*/
};
/**
@ -547,6 +559,35 @@ struct perf_output_handle {
int page;
};
#ifdef CONFIG_CGROUP_PERF
/*
* perf_cgroup_info keeps track of time_enabled for a cgroup.
* This is a per-cpu dynamically allocated data structure.
*/
struct perf_cgroup_info {
u64 time;
u64 timestamp;
};
struct perf_cgroup {
struct cgroup_subsys_state css;
struct perf_cgroup_info __percpu *info;
};
/*
* Must ensure cgroup is pinned (css_get) before calling
* this function. In other words, we cannot call this function
* if there is no cgroup event for the current CPU context.
*/
static inline struct perf_cgroup *
perf_cgroup_from_task(struct task_struct *task)
{
return container_of(task_css(task, perf_event_cgrp_id),
struct perf_cgroup, css);
}
#endif /* CONFIG_CGROUP_PERF */
#ifdef CONFIG_PERF_EVENTS
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
@ -740,6 +781,11 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
__perf_event_task_sched_out(prev, next);
}
static inline u64 __perf_event_count(struct perf_event *event)
{
return local64_read(&event->count) + atomic64_read(&event->child_count);
}
extern void perf_event_mmap(struct vm_area_struct *vma);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);

View File

@ -34,11 +34,11 @@
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
#include <linux/hw_breakpoint.h>
#include <linux/mm_types.h>
#include <linux/cgroup.h>
#include <linux/module.h>
#include <linux/mman.h>
#include <linux/compat.h>
@ -351,32 +351,6 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
#ifdef CONFIG_CGROUP_PERF
/*
* perf_cgroup_info keeps track of time_enabled for a cgroup.
* This is a per-cpu dynamically allocated data structure.
*/
struct perf_cgroup_info {
u64 time;
u64 timestamp;
};
struct perf_cgroup {
struct cgroup_subsys_state css;
struct perf_cgroup_info __percpu *info;
};
/*
* Must ensure cgroup is pinned (css_get) before calling
* this function. In other words, we cannot call this function
* if there is no cgroup event for the current CPU context.
*/
static inline struct perf_cgroup *
perf_cgroup_from_task(struct task_struct *task)
{
return container_of(task_css(task, perf_event_cgrp_id),
struct perf_cgroup, css);
}
static inline bool
perf_cgroup_match(struct perf_event *event)
{
@ -3220,7 +3194,10 @@ static void __perf_event_read(void *info)
static inline u64 perf_event_count(struct perf_event *event)
{
return local64_read(&event->count) + atomic64_read(&event->child_count);
if (event->pmu->count)
return event->pmu->count(event);
return __perf_event_count(event);
}
static u64 perf_event_read(struct perf_event *event)
@ -7149,7 +7126,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
struct perf_event *group_leader,
struct perf_event *parent_event,
perf_overflow_handler_t overflow_handler,
void *context)
void *context, int cgroup_fd)
{
struct pmu *pmu;
struct perf_event *event;
@ -7204,16 +7181,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (task) {
event->attach_state = PERF_ATTACH_TASK;
if (attr->type == PERF_TYPE_TRACEPOINT)
event->hw.tp_target = task;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
* hw_breakpoint is a bit difficult here..
* XXX pmu::event_init needs to know what task to account to
* and we cannot use the ctx information because we need the
* pmu before we get a ctx.
*/
else if (attr->type == PERF_TYPE_BREAKPOINT)
event->hw.bp_target = task;
#endif
event->hw.target = task;
}
if (!overflow_handler && parent_event) {
@ -7245,6 +7218,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (!has_branch_stack(event))
event->attr.branch_sample_type = 0;
if (cgroup_fd != -1) {
err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
if (err)
goto err_ns;
}
pmu = perf_init_event(event);
if (!pmu)
goto err_ns;
@ -7268,6 +7247,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->destroy(event);
module_put(pmu->module);
err_ns:
if (is_cgroup_event(event))
perf_detach_cgroup(event);
if (event->ns)
put_pid_ns(event->ns);
kfree(event);
@ -7486,6 +7467,7 @@ SYSCALL_DEFINE5(perf_event_open,
int move_group = 0;
int err;
int f_flags = O_RDWR;
int cgroup_fd = -1;
/* for future expandability... */
if (flags & ~PERF_FLAG_ALL)
@ -7551,21 +7533,16 @@ SYSCALL_DEFINE5(perf_event_open,
get_online_cpus();
if (flags & PERF_FLAG_PID_CGROUP)
cgroup_fd = pid;
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
NULL, NULL);
NULL, NULL, cgroup_fd);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_cpus;
}
if (flags & PERF_FLAG_PID_CGROUP) {
err = perf_cgroup_connect(pid, event, &attr, group_leader);
if (err) {
__free_event(event);
goto err_cpus;
}
}
if (is_sampling_event(event)) {
if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
err = -ENOTSUPP;
@ -7802,7 +7779,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
*/
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
overflow_handler, context);
overflow_handler, context, -1);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err;
@ -8163,7 +8140,7 @@ inherit_event(struct perf_event *parent_event,
parent_event->cpu,
child,
group_leader, parent_event,
NULL, NULL);
NULL, NULL, -1);
if (IS_ERR(child_event))
return child_event;

View File

@ -116,12 +116,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
*/
static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
{
struct task_struct *tsk = bp->hw.bp_target;
struct task_struct *tsk = bp->hw.target;
struct perf_event *iter;
int count = 0;
list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
if (iter->hw.bp_target == tsk &&
if (iter->hw.target == tsk &&
find_slot_idx(iter) == type &&
(iter->cpu < 0 || cpu == iter->cpu))
count += hw_breakpoint_weight(iter);
@ -153,7 +153,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
int nr;
nr = info->cpu_pinned;
if (!bp->hw.bp_target)
if (!bp->hw.target)
nr += max_task_bp_pinned(cpu, type);
else
nr += task_bp_pinned(cpu, bp, type);
@ -210,7 +210,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
weight = -weight;
/* Pinned counter cpu profiling */
if (!bp->hw.bp_target) {
if (!bp->hw.target) {
get_bp_info(bp->cpu, type)->cpu_pinned += weight;
return;
}

View File

@ -1005,7 +1005,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
return true;
list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
if (event->hw.tp_target->mm == mm)
if (event->hw.target->mm == mm)
return true;
}
@ -1015,7 +1015,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
static inline bool
uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
{
return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
return __uprobe_perf_filter(&tu->filter, event->hw.target->mm);
}
static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
@ -1023,10 +1023,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
bool done;
write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) {
if (event->hw.target) {
list_del(&event->hw.tp_list);
done = tu->filter.nr_systemwide ||
(event->hw.tp_target->flags & PF_EXITING) ||
(event->hw.target->flags & PF_EXITING) ||
uprobe_filter_event(tu, event);
} else {
tu->filter.nr_systemwide--;
@ -1046,7 +1046,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
int err;
write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) {
if (event->hw.target) {
/*
* event->parent != NULL means copy_process(), we can avoid
* uprobe_apply(). current->mm must be probed and we can rely