From 3cbd6a43be932e56907abd21091314dc044175f2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 11 Nov 2015 10:20:51 -0500 Subject: [PATCH 01/24] ftracetest: Add instance create and delete test Create a test to test instance creation and deletion. Several tasks are created that create 3 directories and delete them. The tasks all create the same directories. This places a stress on the code that creates and deletes instances. Signed-off-by: Steven Rostedt --- .../ftrace/test.d/instances/instance.tc | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/instances/instance.tc diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc new file mode 100644 index 000000000000..773e276ff90b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc @@ -0,0 +1,90 @@ +#!/bin/sh +# description: Test creation and deletion of trace instances + +if [ ! -d instances ] ; then + echo "no instance directory with this kernel" + exit_unsupported; +fi + +fail() { # mesg + rmdir x y z 2>/dev/null + echo $1 + set -e + exit $FAIL +} + +cd instances + +# we don't want to fail on error +set +e + +mkdir x +rmdir x +result=$? + +if [ $result -ne 0 ]; then + echo "instance rmdir not supported" + exit_unsupported +fi + +instance_slam() { + while :; do + mkdir x + mkdir y + mkdir z + rmdir x + rmdir y + rmdir z + done 2>/dev/null +} + +instance_slam & +x=`jobs -l` +p1=`echo $x | cut -d' ' -f2` +echo $p1 + +instance_slam & +x=`jobs -l | tail -1` +p2=`echo $x | cut -d' ' -f2` +echo $p2 + +instance_slam & +x=`jobs -l | tail -1` +p3=`echo $x | cut -d' ' -f2` +echo $p3 + +instance_slam & +x=`jobs -l | tail -1` +p4=`echo $x | cut -d' ' -f2` +echo $p4 + +instance_slam & +x=`jobs -l | tail -1` +p5=`echo $x | cut -d' ' -f2` +echo $p5 + +ls -lR >/dev/null +sleep 1 + +kill -1 $p1 +kill -1 $p2 +kill -1 $p3 +kill -1 $p4 +kill -1 $p5 + +echo "Wait for processes to finish" +wait $p1 $p2 $p3 $p4 $p5 +echo "all processes finished, wait for cleanup" + +mkdir x y z +ls x y z +rmdir x y z +for d in x y z; do + if [ -d $d ]; then + fail "instance $d still exists" + fi +done + +set -e + +exit 0 From 8573636ea794fa088f459429e65e47d7776532cf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 17 Nov 2015 14:03:11 -0500 Subject: [PATCH 02/24] ring-buffer: Use READ_ONCE() for most tail_page access As cpu_buffer->tail_page may be modified by interrupts at almost any time, the flow of logic is very important. Do not let gcc get smart with re-reading cpu_buffer->tail_page by adding READ_ONCE() around most of its accesses. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9c6045a27ba3..ab102e6259bc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1036,7 +1036,7 @@ static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, * it is, then it is up to us to update the tail * pointer. */ - if (tail_page == cpu_buffer->tail_page) { + if (tail_page == READ_ONCE(cpu_buffer->tail_page)) { /* Zero the write counter */ unsigned long val = old_write & ~RB_WRITE_MASK; unsigned long eval = old_entries & ~RB_WRITE_MASK; @@ -2036,12 +2036,15 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the tail page would have moved. */ if (ret == RB_PAGE_NORMAL) { + struct buffer_page *buffer_tail_page; + + buffer_tail_page = READ_ONCE(cpu_buffer->tail_page); /* * If the tail had moved passed next, then we need * to reset the pointer. */ - if (cpu_buffer->tail_page != tail_page && - cpu_buffer->tail_page != next_page) + if (buffer_tail_page != tail_page && + buffer_tail_page != next_page) rb_head_page_set_normal(cpu_buffer, new_head, next_page, RB_PAGE_HEAD); @@ -2362,7 +2365,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, addr = (unsigned long)event; addr &= PAGE_MASK; - bpage = cpu_buffer->tail_page; + bpage = READ_ONCE(cpu_buffer->tail_page); if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { unsigned long write_mask = @@ -2410,7 +2413,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) again: max_count = cpu_buffer->nr_pages * 100; - while (cpu_buffer->commit_page != cpu_buffer->tail_page) { + while (cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page)) { if (RB_WARN_ON(cpu_buffer, !(--max_count))) return; if (RB_WARN_ON(cpu_buffer, @@ -2443,7 +2446,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) * and pushed the tail page forward, we will be left with * a dangling commit that will never go forward. */ - if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page)) + if (unlikely(cpu_buffer->commit_page != READ_ONCE(cpu_buffer->tail_page))) goto again; } @@ -2699,7 +2702,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (unlikely(info->add_timestamp)) info->length += RB_LEN_TIME_EXTEND; - tail_page = info->tail_page = cpu_buffer->tail_page; + /* Don't let the compiler play games with cpu_buffer->tail_page */ + tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page); write = local_add_return(info->length, &tail_page->write); /* set write to only the index of the write */ From 70004986ffdf36d8bc787403af2571aeeef96595 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 17 Nov 2015 15:15:19 -0500 Subject: [PATCH 03/24] ring-buffer: Remove redundant update of page timestamp The first commit of a buffer page updates the timestamp of that page. No need to have the update to the next page add the timestamp too. It will only be replaced by the first commit on that page anyway. Only update to a page if it contains an event. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ab102e6259bc..631541a53baf 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1001,17 +1001,13 @@ static int rb_head_page_replace(struct buffer_page *old, /* * rb_tail_page_update - move the tail page forward - * - * Returns 1 if moved tail page, 0 if someone else did. */ -static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, +static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *tail_page, struct buffer_page *next_page) { - struct buffer_page *old_tail; unsigned long old_entries; unsigned long old_write; - int ret = 0; /* * The tail page now needs to be moved forward. @@ -1061,14 +1057,9 @@ static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, */ local_set(&next_page->page->commit, 0); - old_tail = cmpxchg(&cpu_buffer->tail_page, - tail_page, next_page); - - if (old_tail == tail_page) - ret = 1; + /* Again, either we update tail_page or an interrupt does */ + (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page); } - - return ret; } static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, @@ -2150,7 +2141,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; - u64 ts; next_page = tail_page; @@ -2224,15 +2214,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, } } - ret = rb_tail_page_update(cpu_buffer, tail_page, next_page); - if (ret) { - /* - * Nested commits always have zero deltas, so - * just reread the time stamp - */ - ts = rb_time_stamp(buffer); - next_page->page->time_stamp = ts; - } + rb_tail_page_update(cpu_buffer, tail_page, next_page); out_again: @@ -2422,8 +2404,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); - cpu_buffer->write_stamp = - cpu_buffer->commit_page->page->time_stamp; + /* Only update the write stamp if the page has an event */ + if (rb_page_write(cpu_buffer->commit_page)) + cpu_buffer->write_stamp = + cpu_buffer->commit_page->page->time_stamp; /* add barrier to keep gcc from optimizing too much */ barrier(); } From 4239c38fe0b3847e1e6d962c74b41b08ba0e2990 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 17 Nov 2015 16:36:06 -0500 Subject: [PATCH 04/24] ring-buffer: Process commits whenever moving to a new page. When crossing over to a new page, commit the current work. This will allow readers to get data with less latency, and also simplifies the work to get timestamps working for interrupted events. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 631541a53baf..95181e36891a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2129,6 +2129,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, local_sub(length, &tail_page->write); } +static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer); + /* * This is the slow path, force gcc not to inline it. */ @@ -2220,6 +2222,11 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, rb_reset_tail(cpu_buffer, tail, info); + /* Commit what we have for now. */ + rb_end_commit(cpu_buffer); + /* rb_end_commit() decs committing */ + local_inc(&cpu_buffer->committing); + /* fail and let the caller try again */ return ERR_PTR(-EAGAIN); From 4e4a4d75700da67c1209c151d5f11a309c3b8367 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 23 Nov 2015 13:51:16 -0600 Subject: [PATCH 05/24] tracing: Update cond flag when enabling or disabling a trigger When a trigger is enabled, the cond flag should be set beforehand, otherwise a trigger that's expecting to process a trace record (e.g. one with post_trigger set) could be invoked without one. Likewise a trigger's cond flag should be reset after it's disabled, not before. Link: http://lkml.kernel.org/r/a420b52a67b1c2d3cab017914362d153255acb99.1448303214.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Signed-off-by: Daniel Wagner Reviewed-by: Masami Hiramatsu Reviewed-by: Namhyung Kim Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_trigger.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 42a4009fd75a..4d2f3ccc56f6 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -543,11 +543,12 @@ static int register_trigger(char *glob, struct event_trigger_ops *ops, list_add_rcu(&data->list, &file->triggers); ret++; + update_cond_flag(file); if (trace_event_trigger_enable_disable(file, 1) < 0) { list_del_rcu(&data->list); + update_cond_flag(file); ret--; } - update_cond_flag(file); out: return ret; } @@ -575,8 +576,8 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { unregistered = true; list_del_rcu(&data->list); - update_cond_flag(file); trace_event_trigger_enable_disable(file, 0); + update_cond_flag(file); break; } } @@ -1319,11 +1320,12 @@ static int event_enable_register_trigger(char *glob, list_add_rcu(&data->list, &file->triggers); ret++; + update_cond_flag(file); if (trace_event_trigger_enable_disable(file, 1) < 0) { list_del_rcu(&data->list); + update_cond_flag(file); ret--; } - update_cond_flag(file); out: return ret; } @@ -1344,8 +1346,8 @@ static void event_enable_unregister_trigger(char *glob, (enable_data->file == test_enable_data->file)) { unregistered = true; list_del_rcu(&data->list); - update_cond_flag(file); trace_event_trigger_enable_disable(file, 0); + update_cond_flag(file); break; } } From 02a392a0439ffdc62b4d8f17bd18d68736b166a9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Nov 2015 12:50:47 -0500 Subject: [PATCH 06/24] ftrace: Add new type to distinguish what kind of ftrace_bug() The ftrace function hook utility has several internal checks to make sure that whatever it modifies is exactly what it expects to be modifying. This is essential as modifying running code can be extremely dangerous to the system. When an anomaly is detected, ftrace_bug() is called which sends a splat to the console and disables function tracing. There's some extra information that is printed to help diagnose the issue. One thing that is missing though is output of what ftrace was doing at the time of the crash. Was it updating a call site or perhaps converting a call site to a nop? A new global enum variable is created to state what ftrace was doing at the time of the anomaly, and this is reported in ftrace_bug(). Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 9 +++++++++ kernel/trace/ftrace.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index eae6548efbf0..870c8eea38cd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -296,6 +296,15 @@ int ftrace_arch_code_modify_post_process(void); struct dyn_ftrace; +enum ftrace_bug_type { + FTRACE_BUG_UNKNOWN, + FTRACE_BUG_INIT, + FTRACE_BUG_NOP, + FTRACE_BUG_CALL, + FTRACE_BUG_UPDATE, +}; +extern enum ftrace_bug_type ftrace_bug_type; + void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f743b147247..b95efcac9dfe 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1953,6 +1953,28 @@ static void print_ip_ins(const char *fmt, unsigned char *p) static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); +enum ftrace_bug_type ftrace_bug_type; + +static void print_bug_type(void) +{ + switch (ftrace_bug_type) { + case FTRACE_BUG_UNKNOWN: + break; + case FTRACE_BUG_INIT: + pr_info("Initializing ftrace call sites\n"); + break; + case FTRACE_BUG_NOP: + pr_info("Setting ftrace call site to NOP\n"); + break; + case FTRACE_BUG_CALL: + pr_info("Setting ftrace call site to call ftrace function\n"); + break; + case FTRACE_BUG_UPDATE: + pr_info("Updating ftrace call site to call a different ftrace function\n"); + break; + } +} + /** * ftrace_bug - report and shutdown function tracer * @failed: The failed type (EFAULT, EINVAL, EPERM) @@ -1992,6 +2014,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) pr_info("ftrace faulted on unknown error "); print_ip_sym(ip); } + print_bug_type(); if (rec) { struct ftrace_ops *ops = NULL; @@ -2016,6 +2039,8 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) { unsigned long flag = 0UL; + ftrace_bug_type = FTRACE_BUG_UNKNOWN; + /* * If we are updating calls: * @@ -2077,9 +2102,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) * from the save regs, to a non-save regs function or * vice versa, or from a trampoline call. */ - if (flag & FTRACE_FL_ENABLED) + if (flag & FTRACE_FL_ENABLED) { + ftrace_bug_type = FTRACE_BUG_CALL; return FTRACE_UPDATE_MAKE_CALL; + } + ftrace_bug_type = FTRACE_BUG_UPDATE; return FTRACE_UPDATE_MODIFY_CALL; } @@ -2096,6 +2124,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) FTRACE_FL_REGS_EN); } + ftrace_bug_type = FTRACE_BUG_NOP; return FTRACE_UPDATE_MAKE_NOP; } @@ -2307,17 +2336,22 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); + ftrace_bug_type = FTRACE_BUG_UNKNOWN; + switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; case FTRACE_UPDATE_MAKE_CALL: + ftrace_bug_type = FTRACE_BUG_CALL; return ftrace_make_call(rec, ftrace_addr); case FTRACE_UPDATE_MAKE_NOP: + ftrace_bug_type = FTRACE_BUG_NOP; return ftrace_make_nop(NULL, rec, ftrace_old_addr); case FTRACE_UPDATE_MODIFY_CALL: + ftrace_bug_type = FTRACE_BUG_UPDATE; return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } @@ -2425,6 +2459,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); if (ret) { + ftrace_bug_type = FTRACE_BUG_INIT; ftrace_bug(ret, rec); return 0; } From b05086c77a162dd8ef79606cb4723f1fc1448bb1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Nov 2015 14:13:11 -0500 Subject: [PATCH 07/24] ftrace: Add variable ftrace_expected for archs to show expected code When an anomaly is found while modifying function code, ftrace_bug() is called which disables the function tracing infrastructure and reports information about what failed. If the code that is to be replaced does not match what is expected, then actual code is shown. Currently there is no arch generic way to show what was expected. Add a new variable pointer calld ftrace_expected that the arch code can set to point to what it expected so that ftrace_bug() can report the actual text as well as the text that was expected to be there. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 9 +++++++++ include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 9 +++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 311bcf338f07..909da012406d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -105,6 +105,8 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, { unsigned char replaced[MCOUNT_INSN_SIZE]; + ftrace_expected = old_code; + /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -154,6 +156,8 @@ int ftrace_make_nop(struct module *mod, if (addr == MCOUNT_ADDR) return ftrace_modify_code_direct(rec->ip, old, new); + ftrace_expected = NULL; + /* Normal cases use add_brk_on_nop */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; @@ -220,6 +224,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); + ftrace_expected = NULL; return -EINVAL; } @@ -314,6 +319,8 @@ static int add_break(unsigned long ip, const char *old) if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; + ftrace_expected = old; + /* Make sure it is what we expect it to be */ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) return -EINVAL; @@ -413,6 +420,8 @@ static int remove_breakpoint(struct dyn_ftrace *rec) ftrace_addr = ftrace_get_addr_curr(rec); nop = ftrace_call_replace(ip, ftrace_addr); + ftrace_expected = nop; + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 870c8eea38cd..134f8d45b35b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -305,6 +305,12 @@ enum ftrace_bug_type { }; extern enum ftrace_bug_type ftrace_bug_type; +/* + * Archs can set this to point to a variable that holds the value that was + * expected at the call site before calling ftrace_bug(). + */ +extern const void *ftrace_expected; + void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b95efcac9dfe..7870c03b4c4d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1940,7 +1940,7 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); } -static void print_ip_ins(const char *fmt, unsigned char *p) +static void print_ip_ins(const char *fmt, const unsigned char *p) { int i; @@ -1954,6 +1954,7 @@ static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); enum ftrace_bug_type ftrace_bug_type; +const void *ftrace_expected; static void print_bug_type(void) { @@ -2001,8 +2002,12 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); - print_ip_ins(" actual: ", (unsigned char *)ip); + print_ip_ins(" actual: ", (unsigned char *)ip); pr_cont("\n"); + if (ftrace_expected) { + print_ip_ins(" expected: ", ftrace_expected); + pr_cont("\n"); + } break; case -EPERM: FTRACE_WARN_ON_ONCE(1); From 39daa7b9e89512f234b7fb5d55812a78318251fc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Nov 2015 15:12:38 -0500 Subject: [PATCH 08/24] ftrace: Show all tramps registered to a record on ftrace_bug() When an anomaly is detected in the function call modification code, ftrace_bug() is called to disable function tracing as well as give any information that may help debug the problem. Currently, only the first found trampoline that is attached to the failed record is reported. Instead, show all trampolines that are hooked to it. Also, not only show the ops pointer but also report the function it calls. While at it, add this info to the enabled_functions debug file too. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 46 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7870c03b4c4d..e371aed51fcf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1952,6 +1952,8 @@ static void print_ip_ins(const char *fmt, const unsigned char *p) static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); +static struct ftrace_ops * +ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); enum ftrace_bug_type ftrace_bug_type; const void *ftrace_expected; @@ -2028,15 +2030,19 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) rec->flags & FTRACE_FL_REGS ? " R" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); - if (ops) - pr_cont("\ttramp: %pS", - (void *)ops->trampoline); - else + if (ops) { + do { + pr_cont("\ttramp: %pS (%pS)", + (void *)ops->trampoline, + (void *)ops->func); + ops = ftrace_find_tramp_ops_next(rec, ops); + } while (ops); + } else pr_cont("\ttramp: ERROR!"); } ip = ftrace_get_addr_curr(rec); - pr_cont(" expected tramp: %lx\n", ip); + pr_cont("\n expected tramp: %lx\n", ip); } } @@ -2178,6 +2184,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec) return NULL; } +static struct ftrace_ops * +ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, + struct ftrace_ops *op) +{ + unsigned long ip = rec->ip; + + while_for_each_ftrace_op(op) { + + if (!op->trampoline) + continue; + + if (hash_contains_ip(ip, op->func_hash)) + return op; + } + + return NULL; +} + static struct ftrace_ops * ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) { @@ -3306,10 +3330,14 @@ static int t_show(struct seq_file *m, void *v) rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); - if (ops) - seq_printf(m, "\ttramp: %pS", - (void *)ops->trampoline); - else + if (ops) { + do { + seq_printf(m, "\ttramp: %pS (%pS)", + (void *)ops->trampoline, + (void *)ops->func); + ops = ftrace_find_tramp_ops_next(rec, ops); + } while (ops); + } else seq_puts(m, "\ttramp: ERROR!"); } From b8ec330a63eb39127f5cfcae5f8524e969ef9f94 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Mon, 30 Nov 2015 18:23:36 +0800 Subject: [PATCH 09/24] ftrace: Fix a typo in comment s/ARCH_SUPPORT_FTARCE_OPS/ARCH_SUPPORTS_FTRACE_OPS/ Link: http://lkml.kernel.org/r/1448879016-8659-1-git-send-email-huawei.libin@huawei.com Signed-off-by: Li Bin Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e371aed51fcf..b8dfe8138aa2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5263,7 +5263,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS. * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. * An architecture can pass partial regs with ftrace_ops and still - * set the ARCH_SUPPORT_FTARCE_OPS. + * set the ARCH_SUPPORTS_FTRACE_OPS. */ #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, From 030f4e1cb86f059185572fd1678a55b5e8ff0d08 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 1 Dec 2015 12:24:45 -0500 Subject: [PATCH 10/24] ftrace: Fix output of enabled_functions for showing tramp When showing all tramps registered to a ftrace record in the file enabled_functions, it exits the loop with ops == NULL. But then it is suppose to show the function on the ops->trampoline and add_trampoline_func() is called with the given ops. But because ops is now NULL (to exit the loop), it always shows the static trampoline instead of the one that is really registered to the record. The call to add_trampoline_func() that shows the trampoline for the given ops needs to be called at every iteration. Fixes: 39daa7b9e895 "ftrace: Show all tramps registered to a record on ftrace_bug()" Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b8dfe8138aa2..bf7bebcdad82 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3322,7 +3322,7 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) { - struct ftrace_ops *ops = NULL; + struct ftrace_ops *ops; seq_printf(m, " (%ld)%s%s", ftrace_rec_count(rec), @@ -3335,13 +3335,14 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "\ttramp: %pS (%pS)", (void *)ops->trampoline, (void *)ops->func); + add_trampoline_func(m, ops, rec); ops = ftrace_find_tramp_ops_next(rec, ops); } while (ops); } else seq_puts(m, "\ttramp: ERROR!"); - + } else { + add_trampoline_func(m, NULL, rec); } - add_trampoline_func(m, ops, rec); } seq_putc(m, '\n'); From ba27f2bc731135a0396f3968bdddb54f3bc72e64 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 30 Nov 2015 17:23:39 -0500 Subject: [PATCH 11/24] ftrace: Remove use of control list and ops Currently perf has its own list function within the ftrace infrastructure that seems to be used only to allow for it to have per-cpu disabling as well as a check to make sure that it's not called while RCU is not watching. It uses something called the "control_ops" which is used to iterate over ops under it with the control_list_func(). The problem is that this control_ops and control_list_func unnecessarily complicates the code. By replacing FTRACE_OPS_FL_CONTROL with two new flags (FTRACE_OPS_FL_RCU and FTRACE_OPS_FL_PER_CPU) we can remove all the code that is special with the control ops and add the needed checks within the generic ftrace_list_func(). Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 35 ++++----- kernel/trace/ftrace.c | 126 ++++++++++---------------------- kernel/trace/trace.h | 2 - kernel/trace/trace_event_perf.c | 2 +- 4 files changed, 57 insertions(+), 108 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 134f8d45b35b..4736a826baf5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -76,8 +76,8 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ENABLED - set/unset when ftrace_ops is registered/unregistered * DYNAMIC - set when ftrace_ops is registered to denote dynamically * allocated ftrace_ops which need special care - * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops - * could be controled by following calls: + * PER_CPU - set manualy by ftrace_ops user to denote the ftrace_ops + * could be controlled by following calls: * ftrace_function_local_enable * ftrace_function_local_disable * SAVE_REGS - The ftrace_ops wants regs saved at each function called @@ -121,7 +121,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_DYNAMIC = 1 << 1, - FTRACE_OPS_FL_CONTROL = 1 << 2, + FTRACE_OPS_FL_PER_CPU = 1 << 2, FTRACE_OPS_FL_SAVE_REGS = 1 << 3, FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 4, FTRACE_OPS_FL_RECURSION_SAFE = 1 << 5, @@ -134,6 +134,7 @@ enum { FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, FTRACE_OPS_FL_IPMODIFY = 1 << 13, FTRACE_OPS_FL_PID = 1 << 14, + FTRACE_OPS_FL_RCU = 1 << 15, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -146,11 +147,11 @@ struct ftrace_ops_hash { #endif /* - * Note, ftrace_ops can be referenced outside of RCU protection. - * (Although, for perf, the control ops prevent that). If ftrace_ops is - * allocated and not part of kernel core data, the unregistering of it will - * perform a scheduling on all CPUs to make sure that there are no more users. - * Depending on the load of the system that may take a bit of time. + * Note, ftrace_ops can be referenced outside of RCU protection, unless + * the RCU flag is set. If ftrace_ops is allocated and not part of kernel + * core data, the unregistering of it will perform a scheduling on all CPUs + * to make sure that there are no more users. Depending on the load of the + * system that may take a bit of time. * * Any private data added must also take care not to be freed and if private * data is added to a ftrace_ops that is in core code, the user of the @@ -196,34 +197,34 @@ int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); /** - * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu + * ftrace_function_local_enable - enable ftrace_ops on current cpu * * This function enables tracing on current cpu by decreasing * the per cpu control variable. * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. */ static inline void ftrace_function_local_enable(struct ftrace_ops *ops) { - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) return; (*this_cpu_ptr(ops->disabled))--; } /** - * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu + * ftrace_function_local_disable - disable ftrace_ops on current cpu * - * This function enables tracing on current cpu by decreasing + * This function disables tracing on current cpu by increasing * the per cpu control variable. * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. */ static inline void ftrace_function_local_disable(struct ftrace_ops *ops) { - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) return; (*this_cpu_ptr(ops->disabled))++; @@ -235,12 +236,12 @@ static inline void ftrace_function_local_disable(struct ftrace_ops *ops) * * This function returns value of ftrace_ops::disabled on current cpu. * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. */ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) { - WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)); + WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)); return *this_cpu_ptr(ops->disabled); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bf7bebcdad82..bc7f4eb6b4b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,8 +62,6 @@ #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 -#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL) - #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ @@ -113,11 +111,9 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; -static struct ftrace_ops control_ops; static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs); @@ -203,7 +199,7 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static void control_ops_disable_all(struct ftrace_ops *ops) +static void per_cpu_ops_disable_all(struct ftrace_ops *ops) { int cpu; @@ -211,16 +207,19 @@ static void control_ops_disable_all(struct ftrace_ops *ops) *per_cpu_ptr(ops->disabled, cpu) = 1; } -static int control_ops_alloc(struct ftrace_ops *ops) +static int per_cpu_ops_alloc(struct ftrace_ops *ops) { int __percpu *disabled; + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) + return -EINVAL; + disabled = alloc_percpu(int); if (!disabled) return -ENOMEM; ops->disabled = disabled; - control_ops_disable_all(ops); + per_cpu_ops_disable_all(ops); return 0; } @@ -256,10 +255,11 @@ static inline void update_function_graph_func(void) { } static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) { /* - * If this is a dynamic ops or we force list func, + * If this is a dynamic, RCU, or per CPU ops, or we force list func, * then it needs to call the list anyway. */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU | + FTRACE_OPS_FL_RCU) || FTRACE_FORCE_LIST_FUNC) return ftrace_ops_list_func; return ftrace_ops_get_func(ops); @@ -383,26 +383,6 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) return 0; } -static void add_ftrace_list_ops(struct ftrace_ops **list, - struct ftrace_ops *main_ops, - struct ftrace_ops *ops) -{ - int first = *list == &ftrace_list_end; - add_ftrace_ops(list, ops); - if (first) - add_ftrace_ops(&ftrace_ops_list, main_ops); -} - -static int remove_ftrace_list_ops(struct ftrace_ops **list, - struct ftrace_ops *main_ops, - struct ftrace_ops *ops) -{ - int ret = remove_ftrace_ops(list, ops); - if (!ret && *list == &ftrace_list_end) - ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); - return ret; -} - static void ftrace_update_trampoline(struct ftrace_ops *ops); static int __register_ftrace_function(struct ftrace_ops *ops) @@ -430,14 +410,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_CONTROL) { - if (control_ops_alloc(ops)) + if (ops->flags & FTRACE_OPS_FL_PER_CPU) { + if (per_cpu_ops_alloc(ops)) return -ENOMEM; - add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); - /* The control_ops needs the trampoline update */ - ops = &control_ops; - } else - add_ftrace_ops(&ftrace_ops_list, ops); + } + + add_ftrace_ops(&ftrace_ops_list, ops); /* Always save the function, and reset at unregistering */ ops->saved_func = ops->func; @@ -460,11 +438,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) return -EBUSY; - if (ops->flags & FTRACE_OPS_FL_CONTROL) { - ret = remove_ftrace_list_ops(&ftrace_control_list, - &control_ops, ops); - } else - ret = remove_ftrace_ops(&ftrace_ops_list, ops); + ret = remove_ftrace_ops(&ftrace_ops_list, ops); if (ret < 0) return ret; @@ -2630,7 +2604,7 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) { } -static void control_ops_free(struct ftrace_ops *ops) +static void per_cpu_ops_free(struct ftrace_ops *ops) { free_percpu(ops->disabled); } @@ -2731,13 +2705,13 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (!command || !ftrace_enabled) { /* - * If these are control ops, they still need their + * If these are per_cpu ops, they still need their * per_cpu field freed. Since, function tracing is * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & FTRACE_OPS_FL_CONTROL) - control_ops_free(ops); + if (ops->flags & FTRACE_OPS_FL_PER_CPU) + per_cpu_ops_free(ops); return 0; } @@ -2778,7 +2752,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. - * The same goes for freeing the per_cpu data of the control + * The same goes for freeing the per_cpu data of the per_cpu * ops. * * Again, normal synchronize_sched() is not good enough. @@ -2789,13 +2763,13 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * infrastructure to do the synchronization, thus we must do it * ourselves. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { schedule_on_each_cpu(ftrace_sync); arch_ftrace_trampoline_free(ops); - if (ops->flags & FTRACE_OPS_FL_CONTROL) - control_ops_free(ops); + if (ops->flags & FTRACE_OPS_FL_PER_CPU) + per_cpu_ops_free(ops); } return 0; @@ -5185,44 +5159,6 @@ void ftrace_reset_array_ops(struct trace_array *tr) tr->ops->func = ftrace_stub; } -static void -ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) -{ - if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) - return; - - /* - * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). - */ - preempt_disable_notrace(); - trace_recursion_set(TRACE_CONTROL_BIT); - - /* - * Control funcs (perf) uses RCU. Only trace if - * RCU is currently active. - */ - if (!rcu_is_watching()) - goto out; - - do_for_each_ftrace_op(op, ftrace_control_list) { - if (!(op->flags & FTRACE_OPS_FL_STUB) && - !ftrace_function_local_disabled(op) && - ftrace_ops_test(op, ip, regs)) - op->func(ip, parent_ip, op, regs); - } while_for_each_ftrace_op(op); - out: - trace_recursion_clear(TRACE_CONTROL_BIT); - preempt_enable_notrace(); -} - -static struct ftrace_ops control_ops = { - .func = ftrace_ops_control_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_OPS_HASH(control_ops) -}; - static inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) @@ -5239,8 +5175,22 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * they must be freed after a synchronize_sched(). */ preempt_disable_notrace(); + do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip, regs)) { + /* + * Check the following for each ops before calling their func: + * if RCU flag is set, then rcu_is_watching() must be true + * if PER_CPU is set, then ftrace_function_local_disable() + * must be false + * Otherwise test if the ip matches the ops filter + * + * If any of the above fails then the op->func() is not executed. + */ + if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) && + (!(op->flags & FTRACE_OPS_FL_PER_CPU) || + !ftrace_function_local_disabled(op)) && + ftrace_ops_test(op, ip, regs)) { + if (FTRACE_WARN_ON(!op->func)) { pr_warn("op=%p %pS\n", op, op); goto out; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 919d9d07686f..d3980b87bf04 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -467,8 +467,6 @@ enum { TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, - TRACE_CONTROL_BIT, - TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index abfc903e741e..2649c85cd162 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -334,7 +334,7 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_CONTROL; + ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; ops->func = perf_ftrace_function_call; return register_ftrace_function(ops); } From c68c0fa29341754de86b6e5317b6074f1e334581 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 1 Dec 2015 13:28:16 -0500 Subject: [PATCH 12/24] ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too Jiri Olsa noted that the change to replace the control_ops did not update the trampoline for when running perf on a single CPU and with CONFIG_PREEMPT disabled (where dynamic ops, like perf, can use trampolines directly). The result was that perf function could be called when RCU is not watching as well as not handle the ftrace_local_disable(). Modify the ftrace_ops_get_func() to also check the RCU and PER_CPU ops flags and use the recursive function if they are set. The recursive function is modified to check those flags and execute the appropriate checks if they are set. Link: http://lkml.kernel.org/r/20151201134213.GA14155@krava.brq.redhat.com Reported-by: Jiri Olsa Patch-fixed-up-by: Jiri Olsa Tested-by: Jiri Olsa Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bc7f4eb6b4b0..e290a30f2d0b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -115,9 +115,6 @@ static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; -static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); - #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs); @@ -5231,20 +5228,29 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) /* * If there's only one function registered but it does not support - * recursion, this function will be called by the mcount trampoline. - * This function will handle recursion protection. + * recursion, needs RCU protection and/or requires per cpu handling, then + * this function will be called by the mcount trampoline. */ -static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, +static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) { int bit; + if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching()) + return; + bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; - op->func(ip, parent_ip, op, regs); + preempt_disable_notrace(); + if (!(op->flags & FTRACE_OPS_FL_PER_CPU) || + !ftrace_function_local_disabled(op)) { + op->func(ip, parent_ip, op, regs); + } + + preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -5262,12 +5268,12 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) { /* - * If the func handles its own recursion, call it directly. - * Otherwise call the recursion protected function that - * will call the ftrace ops function. + * If the function does not handle recursion, needs to be RCU safe, + * or does per cpu logic, then we need to call the assist handler. */ - if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE)) - return ftrace_ops_recurs_func; + if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || + ops->flags & (FTRACE_OPS_FL_RCU | FTRACE_OPS_FL_PER_CPU)) + return ftrace_ops_assist_func; return ops->func; } From 27dff4e04199cf0ecf06239a26d0d225d3c046e9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 11 Dec 2015 18:35:59 +0100 Subject: [PATCH 13/24] bpf: Constify bpf_verifier_ops structure This bpf_verifier_ops structure is never modified, like the other bpf_verifier_ops structures, so declare it as const. Done with the help of Coccinelle. Link: http://lkml.kernel.org/r/1449855359-13724-1-git-send-email-Julia.Lawall@lip6.fr Signed-off-by: Julia Lawall Signed-off-by: Steven Rostedt --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4228fd3682c3..45dd798bcd37 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -316,7 +316,7 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type return true; } -static struct bpf_verifier_ops kprobe_prog_ops = { +static const struct bpf_verifier_ops kprobe_prog_ops = { .get_func_proto = kprobe_prog_func_proto, .is_valid_access = kprobe_prog_is_valid_access, }; From ff078d8fc644722f7b163d79e8a03b00b9dc2385 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Mon, 16 Nov 2015 12:57:28 -0700 Subject: [PATCH 14/24] tracing: Use seq_buf_used() in seq_buf_to_user() instead of len commit 5ac48378414d ("tracing: Use trace_seq_used() and seq_buf_used() instead of len") changed the tracing code to use trace_seq_used() and seq_buf_used() instead of using the seq_buf len directly to avoid overflow issues, but missed a spot in seq_buf_to_user() that makes use of s->len. Cleaned up the code a bit as well per suggestion of Steve Rostedt. Link: http://lkml.kernel.org/r/1447703848-2951-1-git-send-email-jsnitsel@redhat.com Signed-off-by: Jerry Snitselaar Signed-off-by: Steven Rostedt --- lib/seq_buf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 5c94e1012a91..cb18469e1f49 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -306,10 +306,12 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, int cnt) if (!cnt) return 0; - if (s->len <= s->readpos) + len = seq_buf_used(s); + + if (len <= s->readpos) return -EBUSY; - len = seq_buf_used(s) - s->readpos; + len -= s->readpos; if (cnt > len) cnt = len; ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); From 2701121b8f4db4d69c327c0d8f8694ff2ce30ef7 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Mon, 14 Dec 2015 23:18:05 +0300 Subject: [PATCH 15/24] tracing: Introduce TRACE_EVENT_FN_COND macro TRACE_EVENT_FN can't be used in some circumstances like invoking trace functions from offlined CPU due to RCU usage. This patch adds the TRACE_EVENT_FN_COND macro to make such trace points conditional. Link: http://lkml.kernel.org/r/1450124286-4822-1-git-send-email-kda@linux-powerpc.org Signed-off-by: Denis Kirjanov Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 4 ++++ include/trace/define_trace.h | 6 ++++++ include/trace/trace_events.h | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 696a339c592c..45b3fcf7dd3c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -493,6 +493,10 @@ extern void syscall_unregfunc(void); #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \ + assign, print, reg, unreg) \ + DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_CONDITION(name, proto, args, cond, \ struct, assign, print) \ DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 2d8639ea64d5..6e3945f64102 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -40,6 +40,11 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef TRACE_EVENT_FN_COND +#define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, \ + assign, print, reg, unreg) \ + DEFINE_TRACE_FN(name, reg, unreg) + #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) @@ -93,6 +98,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef TRACE_EVENT_FN_COND #undef TRACE_EVENT_CONDITION #undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index de996cf61053..170c93bbdbb7 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -123,6 +123,12 @@ TRACE_MAKE_SYSTEM_STR(); TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ +#undef TRACE_EVENT_FN_COND +#define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, \ + assign, print, reg, unreg) \ + TRACE_EVENT_CONDITION(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \ + PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ + #undef TRACE_EVENT_FLAGS #define TRACE_EVENT_FLAGS(name, value) \ __TRACE_EVENT_FLAGS(name, value) From b6b71f66a16a9212b853c168f6ec1f303c5c7a7d Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Wed, 2 Dec 2015 15:39:57 +0100 Subject: [PATCH 16/24] ftrace: Join functions ftrace_module_init() and ftrace_init_module() Simple cleanup. No need for two functions here. The whole work can simply be done inside 'ftrace_module_init'. Link: http://lkml.kernel.org/r/1449067197-5718-1-git-send-email-abelvesa@linux.com Signed-off-by: Abel Vesa Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e290a30f2d0b..29fcc2c76583 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4980,19 +4980,16 @@ void ftrace_release_mod(struct module *mod) mutex_unlock(&ftrace_lock); } -static void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) -{ - if (ftrace_disabled || start == end) - return; - ftrace_process_locs(mod, start, end); -} - void ftrace_module_init(struct module *mod) { - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); + unsigned long *start = mod->ftrace_callsites; + unsigned long *end = mod->ftrace_callsites + + mod->num_ftrace_callsites; + + if (ftrace_disabled || start == end) + return; + + ftrace_process_locs(mod, start, end); } static int ftrace_module_notify_exit(struct notifier_block *self, From 97e9b4fca52bf4e2f7eed9463a0722f8e7afbe90 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 23 Dec 2015 12:12:22 -0500 Subject: [PATCH 17/24] ftrace: Clean up ftrace_module_init() code The start and end variables were only used when ftrace_module_init() was split up into multiple functions. No need to keep them around after the merger. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 29fcc2c76583..0f7ee341f89f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4982,14 +4982,11 @@ void ftrace_release_mod(struct module *mod) void ftrace_module_init(struct module *mod) { - unsigned long *start = mod->ftrace_callsites; - unsigned long *end = mod->ftrace_callsites + - mod->num_ftrace_callsites; - - if (ftrace_disabled || start == end) + if (ftrace_disabled || !mod->num_ftrace_callsites) return; - ftrace_process_locs(mod, start, end); + ftrace_process_locs(mod, mod->ftrace_callsites, + mod->ftrace_callsites + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, From cbbe12c43d8208b531c7be4d28a69c8137d19f21 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Fri, 4 Dec 2015 15:27:43 +0800 Subject: [PATCH 18/24] ia64: ftrace: Fix the comments for ftrace_modify_code() There is no need to worry about module and __init text disappearing case, because that ftrace has a module notifier that is called when a module is being unloaded and before the text goes away and this code grabs the ftrace_lock mutex and removes the module functions from the ftrace list, such that it will no longer do any modifications to that module's text, the update to make functions be traced or not is done under the ftrace_lock mutex as well. And by now, __init section codes should not been modified by ftrace, because it is black listed in recordmcount.c and ignored by ftrace. Link: http://lkml.kernel.org/r/1449214067-12177-2-git-send-email-huawei.libin@huawei.com Cc: linux-ia64@vger.kernel.org Acked-by: Tony Luck Suggested-by: Steven Rostedt Signed-off-by: Li Bin Signed-off-by: Steven Rostedt --- arch/ia64/kernel/ftrace.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index 3b0c2aa07857..cee411e647ca 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -97,13 +97,11 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char replaced[MCOUNT_INSN_SIZE]; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ if (!do_check) From 5243238ad5c4a58a4d6c095c3318c36a7e18c08b Mon Sep 17 00:00:00 2001 From: Li Bin Date: Sun, 6 Dec 2015 10:02:57 +0800 Subject: [PATCH 19/24] sh: ftrace: Fix the comments for ftrace_modify_code() There is no need to worry about module and __init text disappearing case, because that ftrace has a module notifier that is called when a module is being unloaded and before the text goes away and this code grabs the ftrace_lock mutex and removes the module functions from the ftrace list, such that it will no longer do any modifications to that module's text, the update to make functions be traced or not is done under the ftrace_lock mutex as well. And by now, __init section codes should not been modified by ftrace, because it is black listed in recordmcount.c and ignored by ftrace. Link: http://lkml.kernel.org/r/1449367378-29430-5-git-send-email-huawei.libin@huawei.com Cc: linux-sh@vger.kernel.org Suggested-by: Steven Rostedt Signed-off-by: Li Bin Signed-off-by: Steven Rostedt --- arch/sh/kernel/ftrace.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 079d70e6d74b..38993e09ef03 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -212,13 +212,11 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char replaced[MCOUNT_INSN_SIZE]; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ From e9b349f0892622831edcfbb63f5b88473e88dbc6 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Sun, 6 Dec 2015 10:02:55 +0800 Subject: [PATCH 20/24] metag: ftrace: Fix the comments for ftrace_modify_code There is no need to worry about module and __init text disappearing case, because that ftrace has a module notifier that is called when a module is being unloaded and before the text goes away and this code grabs the ftrace_lock mutex and removes the module functions from the ftrace list, such that it will no longer do any modifications to that module's text, the update to make functions be traced or not is done under the ftrace_lock mutex as well. And by now, __init section codes should not been modified by ftrace, because it is black listed in recordmcount.c and ignored by ftrace. Link: http://lkml.kernel.org/r/1449367378-29430-3-git-send-email-huawei.libin@huawei.com Cc: linux-metag@vger.kernel.org Acked-by: James Hogan Suggested-by: Steven Rostedt Signed-off-by: Li Bin Signed-off-by: Steven Rostedt --- arch/metag/kernel/ftrace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c index ed1d685157c2..ac8c039b0318 100644 --- a/arch/metag/kernel/ftrace.c +++ b/arch/metag/kernel/ftrace.c @@ -54,12 +54,11 @@ static int ftrace_modify_code(unsigned long pc, unsigned char *old_code, unsigned char replaced[MCOUNT_INSN_SIZE]; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. - * - * No real locking needed, this code is run through - * kstop_machine. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ From 05a724bd44a68cd5c60bc4b949a946b57d4c9e55 Mon Sep 17 00:00:00 2001 From: Chuyu Hu Date: Tue, 22 Dec 2015 09:44:33 -0500 Subject: [PATCH 21/24] tracing: Fix comment to use tracing_on over tracing_enable The file tracing_enable is obsolete and does not exist anymore. Replace the comment that references it with the proper tracing_on file. Link: http://lkml.kernel.org/r/1450787141-45544-1-git-send-email-chuhu@redhat.com Signed-off-by: Chuyu Hu Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d3980b87bf04..8414fa40bf27 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -363,8 +363,8 @@ struct trace_option_dentry { * @name: the name chosen to select it on the available_tracers file * @init: called when one switches to this tracer (echo name > current_tracer) * @reset: called when one switches to another tracer - * @start: called when tracing is unpaused (echo 1 > tracing_enabled) - * @stop: called when tracing is paused (echo 0 > tracing_enabled) + * @start: called when tracing is unpaused (echo 1 > tracing_on) + * @stop: called when tracing is paused (echo 0 > tracing_on) * @update_thresh: called when tracing_thresh is updated * @open: called when the trace file is opened * @pipe_open: called when the trace_pipe file is opened From c5d641f92c9633f568740332989c067a0ba7d4dc Mon Sep 17 00:00:00 2001 From: Li Bin Date: Sun, 6 Dec 2015 10:02:58 +0800 Subject: [PATCH 22/24] x86: ftrace: Fix the comments for ftrace_modify_code_direct() There is no need to worry about module and __init text disappearing case, because that ftrace has a module notifier that is called when a module is being unloaded and before the text goes away and this code grabs the ftrace_lock mutex and removes the module functions from the ftrace list, such that it will no longer do any modifications to that module's text, the update to make functions be traced or not is done under the ftrace_lock mutex as well. And by now, __init section codes should not been modified by ftrace, because it is black listed in recordmcount.c and ignored by ftrace. Link: http://lkml.kernel.org/r/1449367378-29430-6-git-send-email-huawei.libin@huawei.com Reviewed-by: Thomas Gleixner Suggested-by: Steven Rostedt Signed-off-by: Li Bin Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 909da012406d..29408d6d6626 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -108,13 +108,11 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, ftrace_expected = old_code; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ From b7ffffbb46f205e7727a18bcc7a46c3c2b534f7c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 7 Jan 2016 15:40:01 -0500 Subject: [PATCH 23/24] ftrace: Add infrastructure for delayed enabling of module functions Qiu Peiyang pointed out that there's a race when enabling function tracing and loading a module. In order to make the modifications of converting nops in the prologue of functions into callbacks, the text needs to be converted from read-only to read-write. When enabling function tracing, the text permission is updated, the functions are modified, and then they are put back. When loading a module, the updates to convert function calls to mcount is done before the module text is set to read-only. But after it is done, the module text is visible by the function tracer. Thus we have the following race: CPU 0 CPU 1 ----- ----- start function tracing set text to read-write load_module add functions to ftrace set module text read-only update all functions to callbacks modify module functions too < Can't it's read-only > When this happens, ftrace detects the issue and disables itself till the next reboot. To fix this, a new DISABLED flag is added for ftrace records, which all module functions get when they are added. Then later, after the module code is all set, the records will have the DISABLED flag cleared, and they will be enabled if any callback wants all functions to be traced. Note, this doesn't add the delay to later. It simply changes the ftrace_module_init() to do both the setting of DISABLED records, and then immediately calls the enable code. This helps with testing this new code as it has the same behavior as previously. Another change will come after this to have the ftrace_module_enable() called after the text is set to read-only. Cc: Qiu Peiyang Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 +- kernel/trace/ftrace.c | 163 +++++++++++++++++++++++++++-------------- 2 files changed, 111 insertions(+), 58 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4736a826baf5..660e7c698f3b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -357,6 +357,7 @@ bool is_ftrace_trampoline(unsigned long addr); * REGS - the record wants the function to save regs * REGS_EN - the function is set up to save regs. * IPMODIFY - the record allows for the IP address to be changed. + * DISABLED - the record is not ready to be touched yet * * When a new ftrace_ops is registered and wants a function to save * pt_regs, the rec->flag REGS is set. When the function has been @@ -371,10 +372,11 @@ enum { FTRACE_FL_TRAMP = (1UL << 28), FTRACE_FL_TRAMP_EN = (1UL << 27), FTRACE_FL_IPMODIFY = (1UL << 26), + FTRACE_FL_DISABLED = (1UL << 25), }; -#define FTRACE_REF_MAX_SHIFT 26 -#define FTRACE_FL_BITS 6 +#define FTRACE_REF_MAX_SHIFT 25 +#define FTRACE_FL_BITS 7 #define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) #define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0f7ee341f89f..23683b06b18c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1658,6 +1658,9 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int in_hash = 0; int match = 0; + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (all) { /* * Only the filter_hash affects all records. @@ -2023,6 +2026,9 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) ftrace_bug_type = FTRACE_BUG_UNKNOWN; + if (rec->flags & FTRACE_FL_DISABLED) + return FTRACE_UPDATE_IGNORE; + /* * If we are updating calls: * @@ -2833,9 +2839,9 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return 0; - /* If ops traces all mods, we already accounted for it */ + /* If ops traces all then it includes this function */ if (ops_traces_mod(ops)) - return 0; + return 1; /* The function must be in the filter */ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && @@ -2849,64 +2855,41 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) return 1; } -static int referenced_filters(struct dyn_ftrace *rec) -{ - struct ftrace_ops *ops; - int cnt = 0; - - for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops_references_rec(ops, rec)) - cnt++; - } - - return cnt; -} - static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { struct ftrace_page *pg; struct dyn_ftrace *p; cycle_t start, stop; unsigned long update_cnt = 0; - unsigned long ref = 0; - bool test = false; + unsigned long rec_flags = 0; int i; - /* - * When adding a module, we need to check if tracers are - * currently enabled and if they are set to trace all functions. - * If they are, we need to enable the module functions as well - * as update the reference counts for those function records. - */ - if (mod) { - struct ftrace_ops *ops; - - for (ops = ftrace_ops_list; - ops != &ftrace_list_end; ops = ops->next) { - if (ops->flags & FTRACE_OPS_FL_ENABLED) { - if (ops_traces_mod(ops)) - ref++; - else - test = true; - } - } - } - start = ftrace_now(raw_smp_processor_id()); + /* + * When a module is loaded, this function is called to convert + * the calls to mcount in its text to nops, and also to create + * an entry in the ftrace data. Now, if ftrace is activated + * after this call, but before the module sets its text to + * read-only, the modification of enabling ftrace can fail if + * the read-only is done while ftrace is converting the calls. + * To prevent this, the module's records are set as disabled + * and will be enabled after the call to set the module's text + * to read-only. + */ + if (mod) + rec_flags |= FTRACE_FL_DISABLED; + for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { - int cnt = ref; /* If something went wrong, bail without enabling anything */ if (unlikely(ftrace_disabled)) return -1; p = &pg->records[i]; - if (test) - cnt += referenced_filters(p); - p->flags = cnt; + p->flags = rec_flags; /* * Do the initial record conversion from mcount jump @@ -2916,21 +2899,6 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) break; update_cnt++; - - /* - * If the tracing is enabled, go ahead and enable the record. - * - * The reason not to enable the record immediatelly is the - * inherent check of ftrace_make_nop/ftrace_make_call for - * correct previous instructions. Making first the NOP - * conversion puts the module to the correct state, thus - * passing the ftrace_make_call check. - */ - if (ftrace_start_up && cnt) { - int failed = __ftrace_replace_code(p, 1); - if (failed) - ftrace_bug(failed, p); - } } } @@ -4938,6 +4906,19 @@ static int ftrace_process_locs(struct module *mod, #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) +static int referenced_filters(struct dyn_ftrace *rec) +{ + struct ftrace_ops *ops; + int cnt = 0; + + for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { + if (ops_references_rec(ops, rec)) + cnt++; + } + + return cnt; +} + void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; @@ -4980,6 +4961,75 @@ void ftrace_release_mod(struct module *mod) mutex_unlock(&ftrace_lock); } +static void ftrace_module_enable(struct module *mod) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + mutex_lock(&ftrace_lock); + + if (ftrace_disabled) + goto out_unlock; + + /* + * If the tracing is enabled, go ahead and enable the record. + * + * The reason not to enable the record immediatelly is the + * inherent check of ftrace_make_nop/ftrace_make_call for + * correct previous instructions. Making first the NOP + * conversion puts the module to the correct state, thus + * passing the ftrace_make_call check. + * + * We also delay this to after the module code already set the + * text to read-only, as we now need to set it back to read-write + * so that we can modify the text. + */ + if (ftrace_start_up) + ftrace_arch_code_modify_prepare(); + + do_for_each_ftrace_rec(pg, rec) { + int cnt; + /* + * do_for_each_ftrace_rec() is a double loop. + * module text shares the pg. If a record is + * not part of this module, then skip this pg, + * which the "break" will do. + */ + if (!within_module_core(rec->ip, mod)) + break; + + cnt = 0; + + /* + * When adding a module, we need to check if tracers are + * currently enabled and if they are, and can trace this record, + * we need to enable the module functions as well as update the + * reference counts for those function records. + */ + if (ftrace_start_up) + cnt += referenced_filters(rec); + + /* This clears FTRACE_FL_DISABLED */ + rec->flags = cnt; + + if (ftrace_start_up && cnt) { + int failed = __ftrace_replace_code(rec, 1); + if (failed) { + ftrace_bug(failed, rec); + goto out_loop; + } + } + + } while_for_each_ftrace_rec(); + + out_loop: + if (ftrace_start_up) + ftrace_arch_code_modify_post_process(); + + out_unlock: + mutex_unlock(&ftrace_lock); +} + void ftrace_module_init(struct module *mod) { if (ftrace_disabled || !mod->num_ftrace_callsites) @@ -4987,6 +5037,7 @@ void ftrace_module_init(struct module *mod) ftrace_process_locs(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); + ftrace_module_enable(mod); } static int ftrace_module_notify_exit(struct notifier_block *self, From 5156dca34a3e1e1edac2d0dabf43d8632909b7aa Mon Sep 17 00:00:00 2001 From: Qiu Peiyang Date: Fri, 25 Dec 2015 14:46:00 +0800 Subject: [PATCH 24/24] ftrace: Fix the race between ftrace and insmod We hit ftrace_bug report when booting Android on a 64bit ATOM SOC chip. Basically, there is a race between insmod and ftrace_run_update_code. After load_module=>ftrace_module_init, another thread jumps in to call ftrace_run_update_code=>ftrace_arch_code_modify_prepare =>set_all_modules_text_rw, to change all modules as RW. Since the new module is at MODULE_STATE_UNFORMED, the text attribute is not changed. Then, the 2nd thread goes ahead to change codes. However, load_module continues to call complete_formation=>set_section_ro_nx, then 2nd thread would fail when probing the module's TEXT. The patch fixes it by using notifier to delay the enabling of ftrace records to the time when module is at state MODULE_STATE_COMING. Link: http://lkml.kernel.org/r/567CE628.3000609@intel.com Signed-off-by: Qiu Peiyang Signed-off-by: Zhang Yanmin Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 23683b06b18c..eca592f977b2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5037,29 +5037,36 @@ void ftrace_module_init(struct module *mod) ftrace_process_locs(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); - ftrace_module_enable(mod); } -static int ftrace_module_notify_exit(struct notifier_block *self, - unsigned long val, void *data) +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) { struct module *mod = data; - if (val == MODULE_STATE_GOING) + switch (val) { + case MODULE_STATE_COMING: + ftrace_module_enable(mod); + break; + case MODULE_STATE_GOING: ftrace_release_mod(mod); + break; + default: + break; + } return 0; } #else -static int ftrace_module_notify_exit(struct notifier_block *self, - unsigned long val, void *data) +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) { return 0; } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_exit_nb = { - .notifier_call = ftrace_module_notify_exit, +struct notifier_block ftrace_module_nb = { + .notifier_call = ftrace_module_notify, .priority = INT_MIN, /* Run after anything that can remove kprobes */ }; @@ -5091,7 +5098,7 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_exit_nb); + ret = register_module_notifier(&ftrace_module_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n");