Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "Tooling fixes plus a handful of late arriving tooling changes"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf tools: Fix link time error with sample_reg_masks on non x86
  perf build: Fix Intel PT instruction decoder dependency problem
  perf dwarf: Fix potential array out of bounds access
  perf record: Add ability to name registers to record
  perf/x86: Add list of register names
  perf script: Enable printing of interrupted machine state
  perf evlist: Open event on evsel cpus and threads
  bpf tools: New API to get name from a BPF object
  perf tools: Fix build on powerpc broken by pt/bts
This commit is contained in:
Linus Torvalds 2015-09-03 16:15:41 -07:00
commit 79b0691d0c
22 changed files with 201 additions and 16 deletions

View File

@ -880,15 +880,26 @@ struct bpf_object *bpf_object__open(const char *path)
} }
struct bpf_object *bpf_object__open_buffer(void *obj_buf, struct bpf_object *bpf_object__open_buffer(void *obj_buf,
size_t obj_buf_sz) size_t obj_buf_sz,
const char *name)
{ {
char tmp_name[64];
/* param validation */ /* param validation */
if (!obj_buf || obj_buf_sz <= 0) if (!obj_buf || obj_buf_sz <= 0)
return NULL; return NULL;
pr_debug("loading object from buffer\n"); if (!name) {
snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
(unsigned long)obj_buf,
(unsigned long)obj_buf_sz);
tmp_name[sizeof(tmp_name) - 1] = '\0';
name = tmp_name;
}
pr_debug("loading object '%s' from buffer\n",
name);
return __bpf_object__open("[buffer]", obj_buf, obj_buf_sz); return __bpf_object__open(name, obj_buf, obj_buf_sz);
} }
int bpf_object__unload(struct bpf_object *obj) int bpf_object__unload(struct bpf_object *obj)
@ -975,6 +986,14 @@ bpf_object__next(struct bpf_object *prev)
return next; return next;
} }
const char *
bpf_object__get_name(struct bpf_object *obj)
{
if (!obj)
return NULL;
return obj->path;
}
struct bpf_program * struct bpf_program *
bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
{ {

View File

@ -28,12 +28,14 @@ struct bpf_object;
struct bpf_object *bpf_object__open(const char *path); struct bpf_object *bpf_object__open(const char *path);
struct bpf_object *bpf_object__open_buffer(void *obj_buf, struct bpf_object *bpf_object__open_buffer(void *obj_buf,
size_t obj_buf_sz); size_t obj_buf_sz,
const char *name);
void bpf_object__close(struct bpf_object *object); void bpf_object__close(struct bpf_object *object);
/* Load/unload object into/from kernel */ /* Load/unload object into/from kernel */
int bpf_object__load(struct bpf_object *obj); int bpf_object__load(struct bpf_object *obj);
int bpf_object__unload(struct bpf_object *obj); int bpf_object__unload(struct bpf_object *obj);
const char *bpf_object__get_name(struct bpf_object *obj);
struct bpf_object *bpf_object__next(struct bpf_object *prev); struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \ #define bpf_object__for_each_safe(pos, tmp) \

View File

@ -276,7 +276,11 @@ filter out the startup phase of the program, which is often very different.
--intr-regs:: --intr-regs::
Capture machine state (registers) at interrupt, i.e., on counter overflows for Capture machine state (registers) at interrupt, i.e., on counter overflows for
each sample. List of captured registers depends on the architecture. This option each sample. List of captured registers depends on the architecture. This option
is off by default. is off by default. It is possible to select the registers to sample using their
symbolic names, e.g. on x86, ax, si. To list the available registers use
--intr-regs=\?. To name registers, pass a comma separated list such as
--intr-regs=ax,bx. The list of register is architecture dependent.
--running-time:: --running-time::
Record running and enabled time for read events (:S) Record running and enabled time for read events (:S)

View File

@ -116,7 +116,7 @@ OPTIONS
--fields:: --fields::
Comma separated list of fields to print. Options are: Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, flags. srcline, period, iregs, flags.
Field list can be prepended with the type, trace, sw or hw, Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies. to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace

View File

@ -51,5 +51,5 @@ const char *sh_regs_table[SH_MAX_REGS] = {
/* Return architecture dependent register string (for kprobe-tracer) */ /* Return architecture dependent register string (for kprobe-tracer) */
const char *get_arch_regstr(unsigned int n) const char *get_arch_regstr(unsigned int n)
{ {
return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL; return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL;
} }

View File

@ -39,5 +39,5 @@ const char *sparc_regs_table[SPARC_MAX_REGS] = {
*/ */
const char *get_arch_regstr(unsigned int n) const char *get_arch_regstr(unsigned int n)
{ {
return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
} }

View File

@ -2,6 +2,7 @@ libperf-y += header.o
libperf-y += tsc.o libperf-y += tsc.o
libperf-y += pmu.o libperf-y += pmu.o
libperf-y += kvm-stat.o libperf-y += kvm-stat.o
libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o

View File

@ -71,5 +71,5 @@ const char *x86_64_regs_table[X86_64_MAX_REGS] = {
/* Return architecture dependent register string (for kprobe-tracer) */ /* Return architecture dependent register string (for kprobe-tracer) */
const char *get_arch_regstr(unsigned int n) const char *get_arch_regstr(unsigned int n)
{ {
return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
} }

View File

@ -0,0 +1,28 @@
#include "../../perf.h"
#include "../../util/perf_regs.h"
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
SMPL_REG(BX, PERF_REG_X86_BX),
SMPL_REG(CX, PERF_REG_X86_CX),
SMPL_REG(DX, PERF_REG_X86_DX),
SMPL_REG(SI, PERF_REG_X86_SI),
SMPL_REG(DI, PERF_REG_X86_DI),
SMPL_REG(BP, PERF_REG_X86_BP),
SMPL_REG(SP, PERF_REG_X86_SP),
SMPL_REG(IP, PERF_REG_X86_IP),
SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
SMPL_REG(CS, PERF_REG_X86_CS),
SMPL_REG(SS, PERF_REG_X86_SS),
#ifdef HAVE_ARCH_X86_64_SUPPORT
SMPL_REG(R8, PERF_REG_X86_R8),
SMPL_REG(R9, PERF_REG_X86_R9),
SMPL_REG(R10, PERF_REG_X86_R10),
SMPL_REG(R11, PERF_REG_X86_R11),
SMPL_REG(R12, PERF_REG_X86_R12),
SMPL_REG(R13, PERF_REG_X86_R13),
SMPL_REG(R14, PERF_REG_X86_R14),
SMPL_REG(R15, PERF_REG_X86_R15),
#endif
SMPL_REG_END
};

View File

@ -27,8 +27,10 @@
#include "util/cpumap.h" #include "util/cpumap.h"
#include "util/thread_map.h" #include "util/thread_map.h"
#include "util/data.h" #include "util/data.h"
#include "util/perf_regs.h"
#include "util/auxtrace.h" #include "util/auxtrace.h"
#include "util/parse-branch-options.h" #include "util/parse-branch-options.h"
#include "util/parse-regs-options.h"
#include <unistd.h> #include <unistd.h>
#include <sched.h> #include <sched.h>
@ -279,7 +281,7 @@ static int record__open(struct record *rec)
evlist__for_each(evlist, pos) { evlist__for_each(evlist, pos) {
try_again: try_again:
if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
if (verbose) if (verbose)
ui__warning("%s\n", msg); ui__warning("%s\n", msg);
@ -1080,8 +1082,9 @@ struct option __record_options[] = {
"sample transaction flags (special events only)"), "sample transaction flags (special events only)"),
OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
"use per-thread mmaps"), "use per-thread mmaps"),
OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
"Sample machine registers on interrupt"), "sample selected machine registers on interrupt,"
" use -I ? to list register names", parse_regs),
OPT_BOOLEAN(0, "running-time", &record.opts.running_time, OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
"Record running/enabled time of read (:S) events"), "Record running/enabled time of read (:S) events"),
OPT_CALLBACK('k', "clockid", &record.opts, OPT_CALLBACK('k', "clockid", &record.opts,

View File

@ -6,6 +6,7 @@
#include "util/exec_cmd.h" #include "util/exec_cmd.h"
#include "util/header.h" #include "util/header.h"
#include "util/parse-options.h" #include "util/parse-options.h"
#include "util/perf_regs.h"
#include "util/session.h" #include "util/session.h"
#include "util/tool.h" #include "util/tool.h"
#include "util/symbol.h" #include "util/symbol.h"
@ -46,6 +47,7 @@ enum perf_output_field {
PERF_OUTPUT_SYMOFFSET = 1U << 11, PERF_OUTPUT_SYMOFFSET = 1U << 11,
PERF_OUTPUT_SRCLINE = 1U << 12, PERF_OUTPUT_SRCLINE = 1U << 12,
PERF_OUTPUT_PERIOD = 1U << 13, PERF_OUTPUT_PERIOD = 1U << 13,
PERF_OUTPUT_IREGS = 1U << 14,
}; };
struct output_option { struct output_option {
@ -66,6 +68,7 @@ struct output_option {
{.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
{.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
{.str = "period", .field = PERF_OUTPUT_PERIOD}, {.str = "period", .field = PERF_OUTPUT_PERIOD},
{.str = "iregs", .field = PERF_OUTPUT_IREGS},
}; };
/* default set to maintain compatibility with current format */ /* default set to maintain compatibility with current format */
@ -255,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_PERIOD)) PERF_OUTPUT_PERIOD))
return -EINVAL; return -EINVAL;
if (PRINT_FIELD(IREGS) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
PERF_OUTPUT_IREGS))
return -EINVAL;
return 0; return 0;
} }
@ -352,6 +360,24 @@ static int perf_session__check_output_opt(struct perf_session *session)
return 0; return 0;
} }
static void print_sample_iregs(union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct thread *thread __maybe_unused,
struct perf_event_attr *attr)
{
struct regs_dump *regs = &sample->intr_regs;
uint64_t mask = attr->sample_regs_intr;
unsigned i = 0, r;
if (!regs)
return;
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val);
}
}
static void print_sample_start(struct perf_sample *sample, static void print_sample_start(struct perf_sample *sample,
struct thread *thread, struct thread *thread,
struct perf_evsel *evsel) struct perf_evsel *evsel)
@ -525,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
PERF_MAX_STACK_DEPTH); PERF_MAX_STACK_DEPTH);
} }
if (PRINT_FIELD(IREGS))
print_sample_iregs(event, sample, thread, attr);
printf("\n"); printf("\n");
} }
@ -1643,7 +1672,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. " "comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. " "Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period,flags", parse_output_fields), "addr,symoff,period,iregs,flags", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide, OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"), "system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",

View File

@ -54,7 +54,6 @@ struct record_opts {
bool sample_time_set; bool sample_time_set;
bool callgraph_set; bool callgraph_set;
bool period; bool period;
bool sample_intr_regs;
bool running_time; bool running_time;
bool full_auxtrace; bool full_auxtrace;
bool auxtrace_snapshot_mode; bool auxtrace_snapshot_mode;
@ -64,6 +63,7 @@ struct record_opts {
unsigned int auxtrace_mmap_pages; unsigned int auxtrace_mmap_pages;
unsigned int user_freq; unsigned int user_freq;
u64 branch_stack; u64 branch_stack;
u64 sample_intr_regs;
u64 default_interval; u64 default_interval;
u64 user_interval; u64 user_interval;
size_t auxtrace_snapshot_size; size_t auxtrace_snapshot_size;

View File

@ -26,7 +26,7 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
{ {
struct bpf_object *obj; struct bpf_object *obj;
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
if (!obj) if (!obj)
return -1; return -1;
bpf_object__close(obj); bpf_object__close(obj);

View File

@ -75,6 +75,7 @@ libperf-y += record.o
libperf-y += srcline.o libperf-y += srcline.o
libperf-y += data.o libperf-y += data.o
libperf-$(CONFIG_X86) += tsc.o libperf-$(CONFIG_X86) += tsc.o
libperf-$(CONFIG_AUXTRACE) += tsc.o
libperf-y += cloexec.o libperf-y += cloexec.o
libperf-y += thread-stack.o libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o
@ -82,6 +83,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-y += parse-branch-options.o libperf-y += parse-branch-options.o
libperf-y += parse-regs-options.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o libperf-$(CONFIG_LIBELF) += probe-file.o

View File

@ -1181,6 +1181,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e
if (evsel->filter == NULL) if (evsel->filter == NULL)
continue; continue;
/*
* filters only work for tracepoint event, which doesn't have cpu limit.
* So evlist and evsel should always be same.
*/
err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
if (err) { if (err) {
*err_evsel = evsel; *err_evsel = evsel;

View File

@ -787,7 +787,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
perf_evsel__config_callgraph(evsel, opts, &callchain_param); perf_evsel__config_callgraph(evsel, opts, &callchain_param);
if (opts->sample_intr_regs) { if (opts->sample_intr_regs) {
attr->sample_regs_intr = PERF_REGS_MASK; attr->sample_regs_intr = opts->sample_intr_regs;
perf_evsel__set_sample_bit(evsel, REGS_INTR); perf_evsel__set_sample_bit(evsel, REGS_INTR);
} }

View File

@ -4,6 +4,7 @@ inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
$(call rule_mkdir)
@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c

View File

@ -146,6 +146,9 @@ static void intel_pt_insn_decoder(struct insn *insn,
case 4: case 4:
intel_pt_insn->rel = bswap_32(insn->immediate.value); intel_pt_insn->rel = bswap_32(insn->immediate.value);
break; break;
default:
intel_pt_insn->rel = 0;
break;
} }
#else #else
intel_pt_insn->rel = insn->immediate.value; intel_pt_insn->rel = insn->immediate.value;

View File

@ -0,0 +1,71 @@
#include "perf.h"
#include "util/util.h"
#include "util/debug.h"
#include "util/parse-options.h"
#include "util/parse-regs-options.h"
int
parse_regs(const struct option *opt, const char *str, int unset)
{
uint64_t *mode = (uint64_t *)opt->value;
const struct sample_reg *r;
char *s, *os = NULL, *p;
int ret = -1;
if (unset)
return 0;
/*
* cannot set it twice
*/
if (*mode)
return -1;
/* str may be NULL in case no arg is passed to -I */
if (str) {
/* because str is read-only */
s = os = strdup(str);
if (!s)
return -1;
for (;;) {
p = strchr(s, ',');
if (p)
*p = '\0';
if (!strcmp(s, "?")) {
fprintf(stderr, "available registers: ");
for (r = sample_reg_masks; r->name; r++) {
fprintf(stderr, "%s ", r->name);
}
fputc('\n', stderr);
/* just printing available regs */
return -1;
}
for (r = sample_reg_masks; r->name; r++) {
if (!strcasecmp(s, r->name))
break;
}
if (!r->name) {
ui__warning("unknown register %s,"
" check man page\n", s);
goto error;
}
*mode |= r->mask;
if (!p)
break;
s = p + 1;
}
}
ret = 0;
/* default to all possible regs */
if (*mode == 0)
*mode = PERF_REGS_MASK;
error:
free(os);
return ret;
}

View File

@ -0,0 +1,5 @@
#ifndef _PERF_PARSE_REGS_OPTIONS_H
#define _PERF_PARSE_REGS_OPTIONS_H 1
struct option;
int parse_regs(const struct option *opt, const char *str, int unset);
#endif /* _PERF_PARSE_REGS_OPTIONS_H */

View File

@ -2,6 +2,10 @@
#include "perf_regs.h" #include "perf_regs.h"
#include "event.h" #include "event.h"
const struct sample_reg __weak sample_reg_masks[] = {
SMPL_REG_END
};
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
{ {
int i, idx = 0; int i, idx = 0;

View File

@ -5,6 +5,15 @@
struct regs_dump; struct regs_dump;
struct sample_reg {
const char *name;
uint64_t mask;
};
#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
#define SMPL_REG_END { .name = NULL }
extern const struct sample_reg sample_reg_masks[];
#ifdef HAVE_PERF_REGS_SUPPORT #ifdef HAVE_PERF_REGS_SUPPORT
#include <perf_regs.h> #include <perf_regs.h>