perf: Drop the skip argument from perf_arch_fetch_regs_caller

Drop this argument now that we always want to rewind only to the
state of the first caller.
It means frame pointers are not necessary anymore to reliably get
the source of an event. But this also means we need this helper
to be a macro now, as an inline function is not an option since
we need to know when to provide a default implentation.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: David Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Frederic Weisbecker 2010-05-20 07:47:21 +02:00
parent c9cf4dbb4d
commit b0f82b81fe
11 changed files with 46 additions and 83 deletions

View File

@ -21,3 +21,15 @@
#ifdef CONFIG_FSL_EMB_PERF_EVENT #ifdef CONFIG_FSL_EMB_PERF_EVENT
#include <asm/perf_event_fsl_emb.h> #include <asm/perf_event_fsl_emb.h>
#endif #endif
#ifdef CONFIG_PERF_EVENTS
#include <asm/ptrace.h>
#include <asm/reg.h>
#define perf_arch_fetch_caller_regs(regs, __ip) \
do { \
(regs)->nip = __ip; \
(regs)->gpr[1] = *(unsigned long *)__get_SP(); \
asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
} while (0)
#endif

View File

@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
_GLOBAL(__restore_cpu_power7) _GLOBAL(__restore_cpu_power7)
/* place holder */ /* place holder */
blr blr
/*
* Get a minimal set of registers for our caller's nth caller.
* r3 = regs pointer, r5 = n.
*
* We only get R1 (stack pointer), NIP (next instruction pointer)
* and LR (link register). These are all we can get in the
* general case without doing complicated stack unwinding, but
* fortunately they are enough to do a stack backtrace, which
* is all we need them for.
*/
_GLOBAL(perf_arch_fetch_caller_regs)
mr r6,r1
cmpwi r5,0
mflr r4
ble 2f
mtctr r5
1: PPC_LL r6,0(r6)
bdnz 1b
PPC_LL r4,PPC_LR_STKOFF(r6)
2: PPC_LL r7,0(r6)
PPC_LL r7,PPC_LR_STKOFF(r7)
PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
blr

View File

@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0 #define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
#include <asm/ptrace.h>
extern void init_hw_perf_events(void); extern void init_hw_perf_events(void);
extern void
__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
#define perf_arch_fetch_caller_regs(pt_regs, ip) \
__perf_arch_fetch_caller_regs(pt_regs, ip, 1);
#else #else
static inline void init_hw_perf_events(void) { } static inline void init_hw_perf_events(void) { }
#endif #endif

View File

@ -47,9 +47,9 @@ stack_trace_flush:
.size stack_trace_flush,.-stack_trace_flush .size stack_trace_flush,.-stack_trace_flush
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
.globl perf_arch_fetch_caller_regs .globl __perf_arch_fetch_caller_regs
.type perf_arch_fetch_caller_regs,#function .type __perf_arch_fetch_caller_regs,#function
perf_arch_fetch_caller_regs: __perf_arch_fetch_caller_regs:
/* We always read the %pstate into %o5 since we will use /* We always read the %pstate into %o5 since we will use
* that to construct a fake %tstate to store into the regs. * that to construct a fake %tstate to store into the regs.
*/ */

View File

@ -140,6 +140,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs) #define perf_misc_flags(regs) perf_misc_flags(regs)
#include <asm/stacktrace.h>
/*
* We abuse bit 3 from flags to pass exact information, see perf_misc_flags
* and the comment with PERF_EFLAGS_EXACT.
*/
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \
(regs)->bp = caller_frame_pointer(); \
(regs)->cs = __KERNEL_CS; \
regs->flags = 0; \
}
#else #else
static inline void init_hw_perf_events(void) { } static inline void init_hw_perf_events(void) { }
static inline void perf_events_lapic_init(void) { } static inline void perf_events_lapic_init(void) { }

View File

@ -78,17 +78,14 @@ struct stack_frame_ia32 {
u32 return_address; u32 return_address;
}; };
static inline unsigned long rewind_frame_pointer(int n) static inline unsigned long caller_frame_pointer(void)
{ {
struct stack_frame *frame; struct stack_frame *frame;
get_bp(frame); get_bp(frame);
#ifdef CONFIG_FRAME_POINTER #ifdef CONFIG_FRAME_POINTER
while (n--) { frame = frame->next_frame;
if (probe_kernel_address(&frame->next_frame, frame))
break;
}
#endif #endif
return (unsigned long)frame; return (unsigned long)frame;

View File

@ -1706,22 +1706,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry; return entry;
} }
void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
{
regs->ip = ip;
/*
* perf_arch_fetch_caller_regs adds another call, we need to increment
* the skip level
*/
regs->bp = rewind_frame_pointer(skip + 1);
regs->cs = __KERNEL_CS;
/*
* We abuse bit 3 to pass exact information, see perf_misc_flags
* and the comment with PERF_EFLAGS_EXACT.
*/
regs->flags = 0;
}
unsigned long perf_instruction_pointer(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs)
{ {
unsigned long ip; unsigned long ip;

View File

@ -905,8 +905,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
extern void #ifndef perf_arch_fetch_caller_regs
perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); static inline void
perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { }
#endif
/* /*
* Take a snapshot of the regs. Skip ip and frame pointer to * Take a snapshot of the regs. Skip ip and frame pointer to
@ -916,31 +918,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
* - bp for callchains * - bp for callchains
* - eflags, for future purposes, just in case * - eflags, for future purposes, just in case
*/ */
static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip) static inline void perf_fetch_caller_regs(struct pt_regs *regs)
{ {
unsigned long ip;
memset(regs, 0, sizeof(*regs)); memset(regs, 0, sizeof(*regs));
switch (skip) { perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
case 1 :
ip = CALLER_ADDR0;
break;
case 2 :
ip = CALLER_ADDR1;
break;
case 3 :
ip = CALLER_ADDR2;
break;
case 4:
ip = CALLER_ADDR3;
break;
/* No need to support further for now */
default:
ip = 0;
}
return perf_arch_fetch_caller_regs(regs, ip, skip);
} }
static inline void static inline void
@ -950,7 +932,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
struct pt_regs hot_regs; struct pt_regs hot_regs;
if (!regs) { if (!regs) {
perf_fetch_caller_regs(&hot_regs, 1); perf_fetch_caller_regs(&hot_regs);
regs = &hot_regs; regs = &hot_regs;
} }
__perf_sw_event(event_id, nr, nmi, regs, addr); __perf_sw_event(event_id, nr, nmi, regs, addr);

View File

@ -705,7 +705,7 @@ perf_trace_##call(void *__data, proto) \
int __data_size; \ int __data_size; \
int rctx; \ int rctx; \
\ \
perf_fetch_caller_regs(&__regs, 1); \ perf_fetch_caller_regs(&__regs); \
\ \
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\

View File

@ -2851,11 +2851,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return NULL; return NULL;
} }
__weak
void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
{
}
/* /*
* We assume there is only KVM supporting the callbacks. * We assume there is only KVM supporting the callbacks.

View File

@ -9,8 +9,6 @@
#include <linux/kprobes.h> #include <linux/kprobes.h>
#include "trace.h" #include "trace.h"
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
static char *perf_trace_buf[4]; static char *perf_trace_buf[4];
/* /*