e9838bd511
While attempting to clear the busy bit at the end of a work execution,
atomic_cmpxchg() expects the value of the flags with the pending bit
cleared as the old value. However by mistake the value of the flags is
passed without clearing the pending bit first.
As a result, clearing the busy bit fails and irq_work_sync() may stall:
watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [blktrace:4948]
CPU: 0 PID: 4948 Comm: blktrace Not tainted 5.4.0-rc7-00003-gfeb4a51323bab #1
RIP: 0010:irq_work_sync+0x4/0x10
Call Trace:
relay_close_buf+0x19/0x50
relay_close+0x64/0x100
blk_trace_free+0x1f/0x50
__blk_trace_remove+0x1e/0x30
blk_trace_ioctl+0x11b/0x140
blkdev_ioctl+0x6c1/0xa40
block_ioctl+0x39/0x40
do_vfs_ioctl+0xa5/0x700
ksys_ioctl+0x70/0x80
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x5b/0x1d0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
So clear the appropriate bit before passing the old flags to cmpxchg().
Fixes: feb4a51323
("irq_work: Slightly simplify IRQ_WORK_PENDING clearing")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Reported-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Leonard Crestez <leonard.crestez@nxp.com>
Link: https://lkml.kernel.org/r/20191113171201.14032-1-frederic@kernel.org
198 lines
4.7 KiB
C
198 lines
4.7 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
|
|
*
|
|
* Provides a framework for enqueueing and running callbacks from hardirq
|
|
* context. The enqueueing is NMI-safe.
|
|
*/
|
|
|
|
#include <linux/bug.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/irq_work.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/hardirq.h>
|
|
#include <linux/irqflags.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/smp.h>
|
|
#include <asm/processor.h>
|
|
|
|
|
|
static DEFINE_PER_CPU(struct llist_head, raised_list);
|
|
static DEFINE_PER_CPU(struct llist_head, lazy_list);
|
|
|
|
/*
|
|
* Claim the entry so that no one else will poke at it.
|
|
*/
|
|
static bool irq_work_claim(struct irq_work *work)
|
|
{
|
|
int oflags;
|
|
|
|
oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
|
|
/*
|
|
* If the work is already pending, no need to raise the IPI.
|
|
* The pairing atomic_fetch_andnot() in irq_work_run() makes sure
|
|
* everything we did before is visible.
|
|
*/
|
|
if (oflags & IRQ_WORK_PENDING)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
void __weak arch_irq_work_raise(void)
|
|
{
|
|
/*
|
|
* Lame architectures will get the timer tick callback
|
|
*/
|
|
}
|
|
|
|
/* Enqueue on current CPU, work must already be claimed and preempt disabled */
|
|
static void __irq_work_queue_local(struct irq_work *work)
|
|
{
|
|
/* If the work is "lazy", handle it from next tick if any */
|
|
if (atomic_read(&work->flags) & IRQ_WORK_LAZY) {
|
|
if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
|
|
tick_nohz_tick_stopped())
|
|
arch_irq_work_raise();
|
|
} else {
|
|
if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
|
|
arch_irq_work_raise();
|
|
}
|
|
}
|
|
|
|
/* Enqueue the irq work @work on the current CPU */
|
|
bool irq_work_queue(struct irq_work *work)
|
|
{
|
|
/* Only queue if not already pending */
|
|
if (!irq_work_claim(work))
|
|
return false;
|
|
|
|
/* Queue the entry and raise the IPI if needed. */
|
|
preempt_disable();
|
|
__irq_work_queue_local(work);
|
|
preempt_enable();
|
|
|
|
return true;
|
|
}
|
|
EXPORT_SYMBOL_GPL(irq_work_queue);
|
|
|
|
/*
|
|
* Enqueue the irq_work @work on @cpu unless it's already pending
|
|
* somewhere.
|
|
*
|
|
* Can be re-enqueued while the callback is still in progress.
|
|
*/
|
|
bool irq_work_queue_on(struct irq_work *work, int cpu)
|
|
{
|
|
#ifndef CONFIG_SMP
|
|
return irq_work_queue(work);
|
|
|
|
#else /* CONFIG_SMP: */
|
|
/* All work should have been flushed before going offline */
|
|
WARN_ON_ONCE(cpu_is_offline(cpu));
|
|
|
|
/* Only queue if not already pending */
|
|
if (!irq_work_claim(work))
|
|
return false;
|
|
|
|
preempt_disable();
|
|
if (cpu != smp_processor_id()) {
|
|
/* Arch remote IPI send/receive backend aren't NMI safe */
|
|
WARN_ON_ONCE(in_nmi());
|
|
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
|
|
arch_send_call_function_single_ipi(cpu);
|
|
} else {
|
|
__irq_work_queue_local(work);
|
|
}
|
|
preempt_enable();
|
|
|
|
return true;
|
|
#endif /* CONFIG_SMP */
|
|
}
|
|
|
|
|
|
bool irq_work_needs_cpu(void)
|
|
{
|
|
struct llist_head *raised, *lazy;
|
|
|
|
raised = this_cpu_ptr(&raised_list);
|
|
lazy = this_cpu_ptr(&lazy_list);
|
|
|
|
if (llist_empty(raised) || arch_irq_work_has_interrupt())
|
|
if (llist_empty(lazy))
|
|
return false;
|
|
|
|
/* All work should have been flushed before going offline */
|
|
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
|
|
|
|
return true;
|
|
}
|
|
|
|
static void irq_work_run_list(struct llist_head *list)
|
|
{
|
|
struct irq_work *work, *tmp;
|
|
struct llist_node *llnode;
|
|
|
|
BUG_ON(!irqs_disabled());
|
|
|
|
if (llist_empty(list))
|
|
return;
|
|
|
|
llnode = llist_del_all(list);
|
|
llist_for_each_entry_safe(work, tmp, llnode, llnode) {
|
|
int flags;
|
|
/*
|
|
* Clear the PENDING bit, after this point the @work
|
|
* can be re-used.
|
|
* Make it immediately visible so that other CPUs trying
|
|
* to claim that work don't rely on us to handle their data
|
|
* while we are in the middle of the func.
|
|
*/
|
|
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
|
|
|
|
work->func(work);
|
|
/*
|
|
* Clear the BUSY bit and return to the free state if
|
|
* no-one else claimed it meanwhile.
|
|
*/
|
|
flags &= ~IRQ_WORK_PENDING;
|
|
(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* hotplug calls this through:
|
|
* hotplug_cfd() -> flush_smp_call_function_queue()
|
|
*/
|
|
void irq_work_run(void)
|
|
{
|
|
irq_work_run_list(this_cpu_ptr(&raised_list));
|
|
irq_work_run_list(this_cpu_ptr(&lazy_list));
|
|
}
|
|
EXPORT_SYMBOL_GPL(irq_work_run);
|
|
|
|
void irq_work_tick(void)
|
|
{
|
|
struct llist_head *raised = this_cpu_ptr(&raised_list);
|
|
|
|
if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
|
|
irq_work_run_list(raised);
|
|
irq_work_run_list(this_cpu_ptr(&lazy_list));
|
|
}
|
|
|
|
/*
|
|
* Synchronize against the irq_work @entry, ensures the entry is not
|
|
* currently in use.
|
|
*/
|
|
void irq_work_sync(struct irq_work *work)
|
|
{
|
|
lockdep_assert_irqs_enabled();
|
|
|
|
while (atomic_read(&work->flags) & IRQ_WORK_BUSY)
|
|
cpu_relax();
|
|
}
|
|
EXPORT_SYMBOL_GPL(irq_work_sync);
|