diff --git a/kernel/events/core.c b/kernel/events/core.c index d640a8b4dcbc..afdd9e1d7144 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -902,13 +902,23 @@ static void put_ctx(struct perf_event_context *ctx) } } -static void unclone_ctx(struct perf_event_context *ctx) +/* + * This must be done under the ctx->lock, such as to serialize against + * context_equiv(), therefore we cannot call put_ctx() since that might end up + * calling scheduler related locks and ctx->lock nests inside those. + */ +static __must_check struct perf_event_context * +unclone_ctx(struct perf_event_context *ctx) { - if (ctx->parent_ctx) { - put_ctx(ctx->parent_ctx); + struct perf_event_context *parent_ctx = ctx->parent_ctx; + + lockdep_assert_held(&ctx->lock); + + if (parent_ctx) ctx->parent_ctx = NULL; - } ctx->generation++; + + return parent_ctx; } static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) @@ -2210,6 +2220,9 @@ static void ctx_sched_out(struct perf_event_context *ctx, static int context_equiv(struct perf_event_context *ctx1, struct perf_event_context *ctx2) { + lockdep_assert_held(&ctx1->lock); + lockdep_assert_held(&ctx2->lock); + /* Pinning disables the swap optimization */ if (ctx1->pin_count || ctx2->pin_count) return 0; @@ -2943,6 +2956,7 @@ static int event_enable_on_exec(struct perf_event *event, */ static void perf_event_enable_on_exec(struct perf_event_context *ctx) { + struct perf_event_context *clone_ctx = NULL; struct perf_event *event; unsigned long flags; int enabled = 0; @@ -2974,7 +2988,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) * Unclone this context if we enabled any event. */ if (enabled) - unclone_ctx(ctx); + clone_ctx = unclone_ctx(ctx); raw_spin_unlock(&ctx->lock); @@ -2984,6 +2998,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) perf_event_context_sched_in(ctx, ctx->task); out: local_irq_restore(flags); + + if (clone_ctx) + put_ctx(clone_ctx); } void perf_event_exec(void) @@ -3135,7 +3152,7 @@ find_lively_task_by_vpid(pid_t vpid) static struct perf_event_context * find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) { - struct perf_event_context *ctx; + struct perf_event_context *ctx, *clone_ctx = NULL; struct perf_cpu_context *cpuctx; unsigned long flags; int ctxn, err; @@ -3169,9 +3186,12 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) retry: ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { - unclone_ctx(ctx); + clone_ctx = unclone_ctx(ctx); ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); + + if (clone_ctx) + put_ctx(clone_ctx); } else { ctx = alloc_perf_context(pmu, task); err = -ENOMEM; @@ -7523,7 +7543,7 @@ __perf_event_exit_task(struct perf_event *child_event, static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { struct perf_event *child_event, *next; - struct perf_event_context *child_ctx, *parent_ctx; + struct perf_event_context *child_ctx, *clone_ctx = NULL; unsigned long flags; if (likely(!child->perf_event_ctxp[ctxn])) { @@ -7549,29 +7569,17 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) task_ctx_sched_out(child_ctx); child->perf_event_ctxp[ctxn] = NULL; - /* - * In order to avoid freeing: child_ctx->parent_ctx->task - * under perf_event_context::lock, grab another reference. - */ - parent_ctx = child_ctx->parent_ctx; - if (parent_ctx) - get_ctx(parent_ctx); - /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all * the events from it. */ - unclone_ctx(child_ctx); + clone_ctx = unclone_ctx(child_ctx); update_context_time(child_ctx); raw_spin_unlock_irqrestore(&child_ctx->lock, flags); - /* - * Now that we no longer hold perf_event_context::lock, drop - * our extra child_ctx->parent_ctx reference. - */ - if (parent_ctx) - put_ctx(parent_ctx); + if (clone_ctx) + put_ctx(clone_ctx); /* * Report the task dead after unscheduling the events so that we