From 45677454dd6d128608117abe7dcd2bdfdd7cdf72 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Thu, 28 Oct 2010 00:24:34 +0800 Subject: [PATCH 1/3] ftrace: Speed up recordmcount cmd_record_mcount is used to locate the _mcount symbols in the object files, only the files compiled with -pg has the _mcount symbol, so, it is only needed for such files, but the current cmd_record_mcount is used for all of the object files, so, we need to fix it and speed it up. Since -pg may be removed by the method used in kernel/trace/Makefile: ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) Or may be removed by the method used in arch/x86/kernel/Makefile: CFLAGS_REMOVE_file.o = -pg So, we must check the last variable stores the compiling flags, that is c_flags(Please refer to cmd_cc_o_c and rule_cc_o_c defined in scripts/Makefile.build) and since the CFLAGS_REMOVE_file.o is already filtered in _c_flags(Please refer to scripts/Makefile.lib) and _c_flags has less symbols, therefore, we only need to check _c_flags. --------------- Changes from v1: o Don't touch Makefile for CONFIG_FTRACE_MCOUNT_RECORD is enough o Use _c_flags intead of KBUILD_CFLAGS to cover CONFIG_REMOVE_file.o = -pg (feedback from Steven Rostedt ) Acked-by: Michal Marek Signed-off-by: Wu Zhangjin LKML-Reference: <3dc8cddf022eb7024f9f2cf857529a15bee8999a.1288196498.git.wuzhangjin@gmail.com> [ changed if [ .. == .. ] to if [ .. = .. ] to handle dash environments ] Signed-off-by: Steven Rostedt --- scripts/Makefile.build | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 5ad25e17b6cb..4eb99ab34053 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -214,17 +214,22 @@ ifdef BUILD_C_RECORDMCOUNT # The empty.o file is created in the make process in order to determine # the target endianness and word size. It is made before all other C # files, including recordmcount. -cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then \ - $(objtree)/scripts/recordmcount "$(@)"; \ - fi; +sub_cmd_record_mcount = \ + if [ $(@) != "scripts/mod/empty.o" ]; then \ + $(objtree)/scripts/recordmcount "$(@)"; \ + fi; else -cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ +sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; endif +cmd_record_mcount = \ + if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \ + $(sub_cmd_record_mcount) \ + fi; endif define rule_cc_o_c From 042957801626465492b9428860de39a3cb2a8219 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 12 Nov 2010 22:32:11 -0500 Subject: [PATCH 2/3] tracing/events: Show real number in array fields Currently we have in something like the sched_switch event: field:char prev_comm[TASK_COMM_LEN]; offset:12; size:16; signed:1; When a userspace tool such as perf tries to parse this, the TASK_COMM_LEN is meaningless. This is done because the TRACE_EVENT() macro simply uses a #len to show the string of the length. When the length is an enum, we get a string that means nothing for tools. By adding a static buffer and a mutex to protect it, we can store the string into that buffer with snprintf and show the actual number. Now we get: field:char prev_comm[16]; offset:12; size:16; signed:1; Something much more useful. Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++++ include/trace/ftrace.h | 14 ++++++++++---- kernel/trace/trace_events.c | 6 ++++++ kernel/trace/trace_export.c | 14 ++++++++++---- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 725bf6bd39f7..47e3997f7b5c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -225,6 +225,10 @@ enum { FILTER_PTR_STRING, }; +#define EVENT_STORAGE_SIZE 128 +extern struct mutex event_storage_mutex; +extern char event_storage[EVENT_STORAGE_SIZE]; + extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index e718a917d897..e16610c208c9 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -296,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + mutex_lock(&event_storage_mutex); \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __dynamic_array #define __dynamic_array(type, item, len) \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0725eeab1937..35fde09b81de 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,6 +27,12 @@ DEFINE_MUTEX(event_mutex); +DEFINE_MUTEX(event_storage_mutex); +EXPORT_SYMBOL_GPL(event_storage_mutex); + +char event_storage[EVENT_STORAGE_SIZE]; +EXPORT_SYMBOL_GPL(event_storage); + LIST_HEAD(ftrace_events); LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4ba44deaac25..4b74d71705c0 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + do { \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + mutex_lock(&event_storage_mutex); \ + snprintf(event_storage, sizeof(event_storage), \ + "%s[%d]", #type, len); \ + ret = trace_define_field(event_call, event_storage, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - if (ret) \ - return ret; + mutex_unlock(&event_storage_mutex); \ + if (ret) \ + return ret; \ + } while (0); #undef __array_desc #define __array_desc(type, container, item, len) \ From 287050d390264402e11bea8b811859e42e8faa29 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Dec 2010 16:46:18 -0500 Subject: [PATCH 3/3] tracing: Add TRACE_EVENT_CONDITIONAL() There are instances in the kernel that we only want to trace a tracepoint when a certain condition is set. But we do not want to test for that condition in the core kernel. If we test for that condition before calling the tracepoin, then we will be performing that test even when tracing is not enabled. This is 99.99% of the time. We currently can just filter out on that condition, but that happens after we write to the trace buffer. We just wasted time writing to the ring buffer for an event we never cared about. This patch adds: TRACE_EVENT_CONDITION() and DEFINE_EVENT_CONDITION() These have a new TP_CONDITION() argument that comes right after the TP_ARGS(). This condition can use the parameters of TP_ARGS() in the TRACE_EVENT() to determine if the tracepoint should be traced or not. The TP_CONDITION() will be placed in a if (cond) trace; For example, for the tracepoint sched_wakeup, it is useless to trace a wakeup event where the caller never actually wakes anything up (where success == 0). So adding: TP_CONDITION(success), which uses the "success" parameter of the wakeup tracepoint will have it only trace when we have successfully woken up a task. Acked-by: Mathieu Desnoyers Acked-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Thomas Gleixner Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 29 +++++++++++++++++++++++------ include/trace/define_trace.h | 15 +++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 5a6074fcd81d..d3e4f87e95c0 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, #define TP_PROTO(args...) args #define TP_ARGS(args...) args +#define TP_CONDITION(args...) args #ifdef CONFIG_TRACEPOINTS @@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". */ -#define __DO_TRACE(tp, proto, args) \ +#define __DO_TRACE(tp, proto, args, cond) \ do { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ void *__data; \ \ + if (!(cond)) \ + return; \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ @@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ @@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, do_trace: \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ - TP_ARGS(data_args)); \ + TP_ARGS(data_args), \ + TP_CONDITION(cond)); \ } \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ @@ -186,7 +190,7 @@ do_trace: \ EXPORT_SYMBOL(__tracepoint_##name) #else /* !CONFIG_TRACEPOINTS */ -#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ static inline void trace_##name(proto) \ { } \ static inline int \ @@ -227,13 +231,18 @@ do_trace: \ * "void *__data, proto" as the callback prototype. */ #define DECLARE_TRACE_NOARGS(name) \ - __DECLARE_TRACE(name, void, , void *__data, __data) + __DECLARE_TRACE(name, void, , 1, void *__data, __data) #define DECLARE_TRACE(name, proto, args) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1, \ PARAMS(void *__data, proto), \ PARAMS(__data, args)) +#define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \ + PARAMS(void *__data, proto), \ + PARAMS(__data, args)) + #define TRACE_EVENT_FLAGS(event, flag) #endif /* DECLARE_TRACE */ @@ -349,12 +358,20 @@ do_trace: \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_CONDITION(template, name, proto, \ + args, cond) \ + DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_CONDITION(name, proto, args, cond, \ + struct, assign, print) \ + DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_FLAGS(event, flag) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 1dfab5401511..b0b4eb24d592 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -26,6 +26,15 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) +#undef TRACE_EVENT_CONDITION +#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \ + TRACE_EVENT(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)) + #undef TRACE_EVENT_FN #define TRACE_EVENT_FN(name, proto, args, tstruct, \ assign, print, reg, unreg) \ @@ -39,6 +48,10 @@ #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_TRACE(name) +#undef DEFINE_EVENT_CONDITION +#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -75,9 +88,11 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef TRACE_EVENT_CONDITION #undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT +#undef DEFINE_EVENT_CONDITION #undef TRACE_HEADER_MULTI_READ #undef DECLARE_TRACE