s390/cpum_sf: Support ioctl PERF_EVENT_IOC_PERIOD

A perf_event can be set up to deliver overflow notifications
via SIGIO signal.  The setup of the event is:

 1. create event with perf_event_open()
 2. assign it a signal for I/O notification with fcntl()
 3. Install signal handler and consume samples

The initial setup of perf_event_open() determines the
period/frequency time span needed to elapse before each signal
is delivered to the user process.

While the event is active, system call
ioctl(.., PERF_EVENT_IOC_PERIOD, value) can be used the change
the frequency/period time span of the active event.
The remaining signal handler invocations honour the new value.

This does not work on s390. In fact the time span does not change
regardless of ioctl's third argument 'value'. The call succeeds
but the time span does not change.

Support this behavior and make it common with other platforms.
This is achieved by changing the interval value of the sampling
control block accordingly and feed this new value every time
the event is enabled using pmu_event_enable().

Before this change the interval value was set only once at
pmu_event_add() and never changed.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
Thomas Richter 2019-09-16 11:21:23 +02:00 committed by Vasily Gorbik
parent cf2957f390
commit 2cb549a821
2 changed files with 127 additions and 40 deletions

View File

@ -60,6 +60,7 @@ struct perf_sf_sde_regs {
#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \
PERF_CPUM_SF_DIAG_MODE)
#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */
#define PERF_CPUM_SF_FREQ_MODE 0x0008 /* Sampling with frequency */
#define REG_NONE 0
#define REG_OVERFLOW 1
@ -70,5 +71,6 @@ struct perf_sf_sde_regs {
#define SAMPL_FLAGS(hwc) ((hwc)->config_base)
#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
#define SAMPLE_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
#endif /* _ASM_S390_PERF_EVENT_H */

View File

@ -673,13 +673,89 @@ static void cpumsf_output_event_pid(struct perf_event *event,
rcu_read_unlock();
}
static unsigned long getrate(bool freq, unsigned long sample,
struct hws_qsi_info_block *si)
{
unsigned long rate;
if (freq) {
rate = freq_to_sample_rate(si, sample);
rate = hw_limit_rate(si, rate);
} else {
/* The min/max sampling rates specifies the valid range
* of sample periods. If the specified sample period is
* out of range, limit the period to the range boundary.
*/
rate = hw_limit_rate(si, sample);
/* The perf core maintains a maximum sample rate that is
* configurable through the sysctl interface. Ensure the
* sampling rate does not exceed this value. This also helps
* to avoid throttling when pushing samples with
* perf_event_overflow().
*/
if (sample_rate_to_freq(si, rate) >
sysctl_perf_event_sample_rate) {
debug_sprintf_event(sfdbg, 1,
"Sampling rate exceeds maximum "
"perf sample rate\n");
rate = 0;
}
}
return rate;
}
/* The sampling information (si) contains information about the
* min/max sampling intervals and the CPU speed. So calculate the
* correct sampling interval and avoid the whole period adjust
* feedback loop.
*
* Since the CPU Measurement sampling facility can not handle frequency
* calculate the sampling interval when frequency is specified using
* this formula:
* interval := cpu_speed * 1000000 / sample_freq
*
* Returns errno on bad input and zero on success with parameter interval
* set to the correct sampling rate.
*
* Note: This function turns off freq bit to avoid calling function
* perf_adjust_period(). This causes frequency adjustment in the common
* code part which causes tremendous variations in the counter values.
*/
static int __hw_perf_event_init_rate(struct perf_event *event,
struct hws_qsi_info_block *si)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
unsigned long rate;
if (attr->freq) {
if (!attr->sample_freq)
return -EINVAL;
rate = getrate(attr->freq, attr->sample_freq, si);
attr->freq = 0; /* Don't call perf_adjust_period() */
SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
} else {
rate = getrate(attr->freq, attr->sample_period, si);
if (!rate)
return -EINVAL;
}
attr->sample_period = rate;
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
"cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
event->attr.sample_period, event->attr.freq,
SAMPLE_FREQ_MODE(hwc));
return 0;
}
static int __hw_perf_event_init(struct perf_event *event)
{
struct cpu_hw_sf *cpuhw;
struct hws_qsi_info_block si;
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
unsigned long rate;
int cpu, err;
/* Reserve CPU-measurement sampling facility */
@ -745,43 +821,9 @@ static int __hw_perf_event_init(struct perf_event *event)
if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
/* The sampling information (si) contains information about the
* min/max sampling intervals and the CPU speed. So calculate the
* correct sampling interval and avoid the whole period adjust
* feedback loop.
*/
rate = 0;
if (attr->freq) {
if (!attr->sample_freq) {
err = -EINVAL;
goto out;
}
rate = freq_to_sample_rate(&si, attr->sample_freq);
rate = hw_limit_rate(&si, rate);
attr->freq = 0;
attr->sample_period = rate;
} else {
/* The min/max sampling rates specifies the valid range
* of sample periods. If the specified sample period is
* out of range, limit the period to the range boundary.
*/
rate = hw_limit_rate(&si, hwc->sample_period);
/* The perf core maintains a maximum sample rate that is
* configurable through the sysctl interface. Ensure the
* sampling rate does not exceed this value. This also helps
* to avoid throttling when pushing samples with
* perf_event_overflow().
*/
if (sample_rate_to_freq(&si, rate) >
sysctl_perf_event_sample_rate) {
err = -EINVAL;
debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
goto out;
}
}
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
err = __hw_perf_event_init_rate(event, &si);
if (err)
goto out;
/* Initialize sample data overflow accounting */
hwc->extra_reg.reg = REG_OVERFLOW;
@ -904,6 +946,8 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
extend_sampling_buffer(&cpuhw->sfb, hwc);
}
/* Rate may be adjusted with ioctl() */
cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
}
/* (Re)enable the PMU and sampling facility */
@ -922,8 +966,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
lpp(&S390_lowcore.lpp);
debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
"tear=%p dear=%p\n", cpuhw->lsctl.es,
cpuhw->lsctl.cs, cpuhw->lsctl.ed, cpuhw->lsctl.cd,
"interval:%lx tear=%p dear=%p\n",
cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
cpuhw->lsctl.cd, cpuhw->lsctl.interval,
(void *) cpuhw->lsctl.tear,
(void *) cpuhw->lsctl.dear);
}
@ -1717,6 +1762,44 @@ static void cpumsf_pmu_read(struct perf_event *event)
/* Nothing to do ... updates are interrupt-driven */
}
/* Check if the new sampling period/freqeuncy is appropriate.
*
* Return non-zero on error and zero on passed checks.
*/
static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
{
struct hws_qsi_info_block si;
unsigned long rate;
bool do_freq;
memset(&si, 0, sizeof(si));
if (event->cpu == -1) {
if (qsi(&si))
return -ENODEV;
} else {
/* Event is pinned to a particular CPU, retrieve the per-CPU
* sampling structure for accessing the CPU-specific QSI.
*/
struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
si = cpuhw->qsi;
}
do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
rate = getrate(do_freq, value, &si);
if (!rate)
return -EINVAL;
event->attr.sample_period = rate;
SAMPL_RATE(&event->hw) = rate;
hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
"cpu:%d value:%llx period:%llx freq:%d\n",
event->cpu, value,
event->attr.sample_period, do_freq);
return 0;
}
/* Activate sampling control.
* Next call of pmu_enable() starts sampling.
*/
@ -1908,6 +1991,8 @@ static struct pmu cpumf_sampling = {
.setup_aux = aux_buffer_setup,
.free_aux = aux_buffer_free,
.check_period = cpumsf_pmu_check_period,
};
static void cpumf_measurement_alert(struct ext_code ext_code,