From 4e486cba285ff06a1f28f0fc2991dde1482d1dcf Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Tue, 2 Aug 2016 16:45:37 +0100 Subject: [PATCH 1/8] bus: arm-ccn: Fix PMU handling of MN The "Miscellaneous Node" fell through cracks of node initialisation, as its ID is shared with HN-I. This patch treats MN as a special case (which it is), adding separate validation check for it and pre-defining the node ID in relevant events descriptions. That way one can simply run: # perf stat -a -e ccn/mn_ecbarrier/ Additionally, direction in the MN pseudo-events XP watchpoint definitions is corrected to be "TX" (1) as they are defined from the crosspoint point of view (thus barriers are transmitted from XP to MN). Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 97a9185af433..a11b9bb1f27c 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -187,6 +187,7 @@ struct arm_ccn { struct arm_ccn_component *xp; struct arm_ccn_dt dt; + int mn_id; }; static DEFINE_MUTEX(arm_ccn_mutex); @@ -328,6 +329,7 @@ struct arm_ccn_pmu_event { static ssize_t arm_ccn_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); struct arm_ccn_pmu_event *event = container_of(attr, struct arm_ccn_pmu_event, attr); ssize_t res; @@ -354,6 +356,9 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev, res += snprintf(buf + res, PAGE_SIZE - res, ",cmp_l=?,cmp_h=?,mask=?"); break; + case CCN_TYPE_MN: + res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); + break; default: res += snprintf(buf + res, PAGE_SIZE - res, ",node=?"); break; @@ -383,9 +388,9 @@ static umode_t arm_ccn_pmu_events_is_visible(struct kobject *kobj, } static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = { - CCN_EVENT_MN(eobarrier, "dir=0,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE), - CCN_EVENT_MN(ecbarrier, "dir=0,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE), - CCN_EVENT_MN(dvmop, "dir=0,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE), + CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE), CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY), CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY), CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY), @@ -759,6 +764,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) /* Validate node/xp vs topology */ switch (type) { + case CCN_TYPE_MN: + if (node_xp != ccn->mn_id) { + dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp); + return -EINVAL; + } + break; case CCN_TYPE_XP: if (node_xp >= ccn->num_xps) { dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp); @@ -1361,6 +1372,8 @@ static int arm_ccn_init_nodes(struct arm_ccn *ccn, int region, switch (type) { case CCN_TYPE_MN: + ccn->mn_id = id; + return 0; case CCN_TYPE_DT: return 0; case CCN_TYPE_XP: From b7c1beb278e8e3dc664ed3df3fc786db126120a9 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Fri, 5 Aug 2016 15:07:10 +0100 Subject: [PATCH 2/8] bus: arm-ccn: Do not attempt to configure XPs for cycle counter Fuzzing the CCN perf driver revealed a small but definitely dangerous mistake in the event setup code. When a cycle counter is requested, the driver should not reconfigure the events bus at all, otherwise it will corrupt (in most but the simplest cases) its configuration and may end up accessing XP array out of its bounds and corrupting control registers. Reported-by: Mark Rutland Reviewed-by: Mark Rutland Tested-by: Mark Rutland Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index a11b9bb1f27c..9bbb0ab275e4 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -897,6 +897,10 @@ static void arm_ccn_pmu_xp_dt_config(struct perf_event *event, int enable) struct arm_ccn_component *xp; u32 val, dt_cfg; + /* Nothing to do for cycle counter */ + if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER) + return; + if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP) xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)]; else From b928466b2169e061822daad48ecf55b005445547 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Wed, 10 Aug 2016 17:06:26 +0100 Subject: [PATCH 3/8] bus: arm-ccn: Fix XP watchpoint settings bitmask The code setting XP watchpoint comparator and mask registers should, in order to be fully compliant with specification, zero one or more most significant bits of each field. In both L cases it means zeroing bit 63. The bitmask doing this was wrong, though, zeroing bit 60 instead. Fortunately, due to a lucky coincidence, this turned out to be fairly innocent with the existing hardware. Fixed now. Cc: stable@vger.kernel.org # 3.17+ Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 9bbb0ab275e4..647a27b80eff 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1003,7 +1003,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event) /* Comparison values */ writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp)); - writel((cmp_l >> 32) & 0xefffffff, + writel((cmp_l >> 32) & 0x7fffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4); writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp)); writel((cmp_h >> 32) & 0x0fffffff, @@ -1011,7 +1011,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event) /* Mask */ writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp)); - writel((mask_l >> 32) & 0xefffffff, + writel((mask_l >> 32) & 0x7fffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4); writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp)); writel((mask_h >> 32) & 0x0fffffff, From 90d11e267a32a25d2cb69127174a96b9e518395e Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 11 Aug 2016 11:56:28 +0100 Subject: [PATCH 4/8] bus: arm-ccn: Correct required arguments for XP PMU events XP can provide events from two sources: watchpoints, observing traffic on device ports and PMU looking at internal buses. Unfortunately the sysfs definition of the PMU events was requiring port number (instead of bus number) and direction (the buses are unidirectional), as these fields were shared with the watchpoint event. Although it does not introduce a major problem (port can be used as bus alias and direction is simply ignored for XP PMU events), it's better to fix it now, before external tools start depending on this behaviour. Signed-off-by: Pawel Moll --- Documentation/arm/CCN.txt | 16 ++++++++++------ drivers/bus/arm-ccn.c | 13 ++++++++++--- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Documentation/arm/CCN.txt b/Documentation/arm/CCN.txt index ffca443a19b4..15cdb7bc57c3 100644 --- a/Documentation/arm/CCN.txt +++ b/Documentation/arm/CCN.txt @@ -18,13 +18,17 @@ and config2 fields of the perf_event_attr structure. The "events" directory provides configuration templates for all documented events, that can be used with perf tool. For example "xp_valid_flit" is an equivalent of "type=0x8,event=0x4". Other parameters must be -explicitly specified. For events originating from device, "node" -defines its index. All crosspoint events require "xp" (index), -"port" (device port number) and "vc" (virtual channel ID) and -"dir" (direction). Watchpoints (special "event" value 0xfe) also -require comparator values ("cmp_l" and "cmp_h") and "mask", being -index of the comparator mask. +explicitly specified. +For events originating from device, "node" defines its index. + +Crosspoint PMU events require "xp" (index), "bus" (bus number) +and "vc" (virtual channel ID). + +Crosspoint watchpoint-based events (special "event" value 0xfe) +require "xp" and "vc" as as above plus "port" (device port index), +"dir" (transmit/receive direction), comparator values ("cmp_l" +and "cmp_h") and "mask", being index of the comparator mask. Masks are defined separately from the event description (due to limited number of the config values) in the "cmp_mask" directory, with first 8 configurable by user and additional diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 647a27b80eff..ddb65c117012 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -213,6 +213,7 @@ static int arm_ccn_node_to_xp_port(int node) #define CCN_CONFIG_TYPE(_config) (((_config) >> 8) & 0xff) #define CCN_CONFIG_EVENT(_config) (((_config) >> 16) & 0xff) #define CCN_CONFIG_PORT(_config) (((_config) >> 24) & 0x3) +#define CCN_CONFIG_BUS(_config) (((_config) >> 24) & 0x3) #define CCN_CONFIG_VC(_config) (((_config) >> 26) & 0x7) #define CCN_CONFIG_DIR(_config) (((_config) >> 29) & 0x1) #define CCN_CONFIG_MASK(_config) (((_config) >> 30) & 0xf) @@ -242,6 +243,7 @@ static CCN_FORMAT_ATTR(xp, "config:0-7"); static CCN_FORMAT_ATTR(type, "config:8-15"); static CCN_FORMAT_ATTR(event, "config:16-23"); static CCN_FORMAT_ATTR(port, "config:24-25"); +static CCN_FORMAT_ATTR(bus, "config:24-25"); static CCN_FORMAT_ATTR(vc, "config:26-28"); static CCN_FORMAT_ATTR(dir, "config:29-29"); static CCN_FORMAT_ATTR(mask, "config:30-33"); @@ -254,6 +256,7 @@ static struct attribute *arm_ccn_pmu_format_attrs[] = { &arm_ccn_pmu_format_attr_type.attr.attr, &arm_ccn_pmu_format_attr_event.attr.attr, &arm_ccn_pmu_format_attr_port.attr.attr, + &arm_ccn_pmu_format_attr_bus.attr.attr, &arm_ccn_pmu_format_attr_vc.attr.attr, &arm_ccn_pmu_format_attr_dir.attr.attr, &arm_ccn_pmu_format_attr_mask.attr.attr, @@ -351,10 +354,14 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev, break; case CCN_TYPE_XP: res += snprintf(buf + res, PAGE_SIZE - res, - ",xp=?,port=?,vc=?,dir=?"); + ",xp=?,vc=?"); if (event->event == CCN_EVENT_WATCHPOINT) res += snprintf(buf + res, PAGE_SIZE - res, - ",cmp_l=?,cmp_h=?,mask=?"); + ",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?"); + else + res += snprintf(buf + res, PAGE_SIZE - res, + ",bus=?"); + break; case CCN_TYPE_MN: res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); @@ -1029,7 +1036,7 @@ static void arm_ccn_pmu_xp_event_config(struct perf_event *event) hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base); id = (CCN_CONFIG_VC(event->attr.config) << 4) | - (CCN_CONFIG_PORT(event->attr.config) << 3) | + (CCN_CONFIG_BUS(event->attr.config) << 3) | (CCN_CONFIG_EVENT(event->attr.config) << 0); val = readl(source->base + CCN_XP_PMU_EVENT_SEL); From 3249bce459ff0bb7c1621b00a8e2d6afe24c53bb Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 11 Aug 2016 12:00:36 +0100 Subject: [PATCH 5/8] bus: arm-ccn: Add missing event attribute exclusions for host/guest CCN PMUs have no knowledge into VM-related origins of the memory traffic, therefore can't handle requests for host-only or guest-only events. Added appropriate exclusions (they should have been there from the beginning). This required changing the error code returned, as the userspace tool only re-negotiates the options (exclude_guest is true by default) only for EINVAL. Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index ddb65c117012..02f81e308dff 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -745,9 +745,10 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) if (has_branch_stack(event) || event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_hv || - event->attr.exclude_idle) { + event->attr.exclude_idle || event->attr.exclude_host || + event->attr.exclude_guest) { dev_warn(ccn->dev, "Can't exclude execution levels!\n"); - return -EOPNOTSUPP; + return -EINVAL; } if (event->cpu < 0) { From 0811ef7e2f5470833a353426a6fbe0b845aea926 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 11 Aug 2016 10:50:41 +0100 Subject: [PATCH 6/8] bus: arm-ccn: fix PMU interrupt flags Currently the IRQ core is permitted to make the CCN PMU IRQ handler threaded, and will allow userspace to change the CPU affinity of the interrupt behind our back. Both of these could violate our synchronisation requirements with the core perf code, which relies upon strict CPU affinity and disabling of interrupts to guarantee mutual exclusion in some cases. As with the CPU PMU drivers, we should request the interrupt with IRQF_NOBALANCING and IRQF_NO_THREAD, to avoid these issues. Signed-off-by: Mark Rutland Acked-by: Pawel Moll Reviewed-by: Will Deacon Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 02f81e308dff..c826bb286054 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -1496,8 +1496,9 @@ static int arm_ccn_probe(struct platform_device *pdev) /* Can set 'disable' bits, so can acknowledge interrupts */ writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE, ccn->base + CCN_MN_ERRINT_STATUS); - err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0, - dev_name(ccn->dev), ccn); + err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(ccn->dev), ccn); if (err) return err; From 5b1e01f3ce15d3a8f2af5d38cc31f0d5c3c11dae Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 11 Aug 2016 10:50:42 +0100 Subject: [PATCH 7/8] bus: arm-ccn: fix hrtimer registration The CCN PMU driver has a single hrtimer, used to simulate a periodic interrupt on systems where the overflow interrupt is not possible to use. The hrtimer is started when any event is started, and cancelled when any event is stopped. Thus, stopping a single event is sufficient to disable to hrtimer, and overflows (of other events) may be lost. To avoid this, this patch reworks the hrtimer start/cancel to only occur when the first event is added to a PMU, and the last event removed, making use of the existing bitmap counting active events. Signed-off-by: Mark Rutland Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index c826bb286054..12c1fd1bc398 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -940,15 +940,6 @@ static void arm_ccn_pmu_event_start(struct perf_event *event, int flags) arm_ccn_pmu_read_counter(ccn, hw->idx)); hw->state = 0; - /* - * Pin the timer, so that the overflows are handled by the chosen - * event->cpu (this is the same one as presented in "cpumask" - * attribute). - */ - if (!ccn->irq) - hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(), - HRTIMER_MODE_REL_PINNED); - /* Set the DT bus input, engaging the counter */ arm_ccn_pmu_xp_dt_config(event, 1); } @@ -962,9 +953,6 @@ static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags) /* Disable counting, setting the DT bus to pass-through mode */ arm_ccn_pmu_xp_dt_config(event, 0); - if (!ccn->irq) - hrtimer_cancel(&ccn->dt.hrtimer); - /* Let the DT bus drain */ timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) + ccn->num_xps; @@ -1122,15 +1110,31 @@ static void arm_ccn_pmu_event_config(struct perf_event *event) spin_unlock(&ccn->dt.config_lock); } +static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn) +{ + return bitmap_weight(ccn->dt.pmu_counters_mask, + CCN_NUM_PMU_EVENT_COUNTERS + 1); +} + static int arm_ccn_pmu_event_add(struct perf_event *event, int flags) { int err; struct hw_perf_event *hw = &event->hw; + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); err = arm_ccn_pmu_event_alloc(event); if (err) return err; + /* + * Pin the timer, so that the overflows are handled by the chosen + * event->cpu (this is the same one as presented in "cpumask" + * attribute). + */ + if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1) + hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(), + HRTIMER_MODE_REL_PINNED); + arm_ccn_pmu_event_config(event); hw->state = PERF_HES_STOPPED; @@ -1143,9 +1147,14 @@ static int arm_ccn_pmu_event_add(struct perf_event *event, int flags) static void arm_ccn_pmu_event_del(struct perf_event *event, int flags) { + struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); + arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE); arm_ccn_pmu_event_release(event); + + if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0) + hrtimer_cancel(&ccn->dt.hrtimer); } static void arm_ccn_pmu_event_read(struct perf_event *event) From d662ed2e50c9dab1d4c25e80fa3e01ebe257bd65 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 11 Aug 2016 10:50:43 +0100 Subject: [PATCH 8/8] bus: arm-ccn: make event groups reliable The CCN PMU driver leaves the counting logic always enabled, and thus events are enabled while groups are manipulated. As each event is stopped and read individually, this leads to arbitrary skew across event groups, which can be seen if counting several identical events. To avoid this, implement pmu_{enable,disable} callbacks to stop and start all counters atomically around event manipulation. As the counters are now stopped, we cannot poll the cycle counter to wait for events to drain from the bus. However, as the counters are stopped and the events will not be read regardless, we can simply allow the bus to drain naturally. Signed-off-by: Mark Rutland Signed-off-by: Pawel Moll --- drivers/bus/arm-ccn.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 12c1fd1bc398..884c0305e290 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -946,20 +946,11 @@ static void arm_ccn_pmu_event_start(struct perf_event *event, int flags) static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags) { - struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu); struct hw_perf_event *hw = &event->hw; - u64 timeout; /* Disable counting, setting the DT bus to pass-through mode */ arm_ccn_pmu_xp_dt_config(event, 0); - /* Let the DT bus drain */ - timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) + - ccn->num_xps; - while (arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) < - timeout) - cpu_relax(); - if (flags & PERF_EF_UPDATE) arm_ccn_pmu_event_update(event); @@ -1162,6 +1153,24 @@ static void arm_ccn_pmu_event_read(struct perf_event *event) arm_ccn_pmu_event_update(event); } +static void arm_ccn_pmu_enable(struct pmu *pmu) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(pmu); + + u32 val = readl(ccn->dt.base + CCN_DT_PMCR); + val |= CCN_DT_PMCR__PMU_EN; + writel(val, ccn->dt.base + CCN_DT_PMCR); +} + +static void arm_ccn_pmu_disable(struct pmu *pmu) +{ + struct arm_ccn *ccn = pmu_to_arm_ccn(pmu); + + u32 val = readl(ccn->dt.base + CCN_DT_PMCR); + val &= ~CCN_DT_PMCR__PMU_EN; + writel(val, ccn->dt.base + CCN_DT_PMCR); +} + static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt) { u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR); @@ -1284,6 +1293,8 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) .start = arm_ccn_pmu_event_start, .stop = arm_ccn_pmu_event_stop, .read = arm_ccn_pmu_event_read, + .pmu_enable = arm_ccn_pmu_enable, + .pmu_disable = arm_ccn_pmu_disable, }; /* No overflow interrupt? Have to use a timer instead. */