diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 132e1ec67da0..00ef43233e03 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -516,7 +516,7 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { - if (!mce_gen_pool_empty() && keventd_up()) + if (!mce_gen_pool_empty()) schedule_work(&mce_work); } diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 623264445100..4c10a9df3b91 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -3923,10 +3923,6 @@ void unblank_screen(void) */ static void blank_screen_t(unsigned long dummy) { - if (unlikely(!keventd_up())) { - mod_timer(&console_timer, jiffies + (blankinterval * HZ)); - return; - } blank_timer_expired = 1; schedule_work(&console_work); } diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index d4f16cf6281c..a26cc437293c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -603,14 +603,6 @@ static inline bool schedule_delayed_work(struct delayed_work *dwork, return queue_delayed_work(system_wq, dwork, delay); } -/** - * keventd_up - is workqueue initialized yet? - */ -static inline bool keventd_up(void) -{ - return system_wq != NULL; -} - #ifndef CONFIG_SMP static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { @@ -645,4 +637,7 @@ int workqueue_online_cpu(unsigned int cpu); int workqueue_offline_cpu(unsigned int cpu); #endif +int __init workqueue_init_early(void); +int __init workqueue_init(void); + #endif diff --git a/init/main.c b/init/main.c index fa201166cba7..23c275cca73a 100644 --- a/init/main.c +++ b/init/main.c @@ -553,6 +553,14 @@ asmlinkage __visible void __init start_kernel(void) "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); idr_init_cache(); + + /* + * Allow workqueue creation and work item queueing/cancelling + * early. Work item execution depends on kthreads and starts after + * workqueue_init(). + */ + workqueue_init_early(); + rcu_init(); /* trace_printk() and trace points may be used after this */ @@ -1009,6 +1017,8 @@ static noinline void __init kernel_init_freeable(void) smp_prepare_cpus(setup_max_cpus); + workqueue_init(); + do_pre_smp_initcalls(); lockup_detector_init(); diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 168ff442ebde..97b0df71303e 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -482,16 +482,7 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } - /* - * This function may be called very early during boot, for example, - * from of_clk_init(), where irq needs to stay disabled. - * cancel_delayed_work_sync() assumes that irq is enabled on - * invocation and re-enables it on return. Avoid calling it until - * workqueue is initialized. - */ - if (keventd_up()) - cancel_delayed_work_sync(&req->work); - + cancel_delayed_work_sync(&req->work); __pm_qos_update_request(req, new_value); } EXPORT_SYMBOL_GPL(pm_qos_update_request); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 479d840db286..1d9fb6543a66 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -290,6 +290,8 @@ module_param_named(disable_numa, wq_disable_numa, bool, 0444); static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT); module_param_named(power_efficient, wq_power_efficient, bool, 0444); +static bool wq_online; /* can kworkers be created yet? */ + static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */ @@ -2583,6 +2585,9 @@ void flush_workqueue(struct workqueue_struct *wq) }; int next_color; + if (WARN_ON(!wq_online)) + return; + lock_map_acquire(&wq->lockdep_map); lock_map_release(&wq->lockdep_map); @@ -2843,6 +2848,9 @@ bool flush_work(struct work_struct *work) { struct wq_barrier barr; + if (WARN_ON(!wq_online)) + return false; + lock_map_acquire(&work->lockdep_map); lock_map_release(&work->lockdep_map); @@ -2913,7 +2921,13 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) mark_work_canceling(work); local_irq_restore(flags); - flush_work(work); + /* + * This allows canceling during early boot. We know that @work + * isn't executing. + */ + if (wq_online) + flush_work(work); + clear_work_data(work); /* @@ -3364,7 +3378,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) goto fail; /* create and start the initial worker */ - if (!create_worker(pool)) + if (wq_online && !create_worker(pool)) goto fail; /* install */ @@ -3429,6 +3443,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) { struct workqueue_struct *wq = pwq->wq; bool freezable = wq->flags & WQ_FREEZABLE; + unsigned long flags; /* for @wq->saved_max_active */ lockdep_assert_held(&wq->mutex); @@ -3437,7 +3452,8 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) if (!freezable && pwq->max_active == wq->saved_max_active) return; - spin_lock_irq(&pwq->pool->lock); + /* this function can be called during early boot w/ irq disabled */ + spin_lock_irqsave(&pwq->pool->lock, flags); /* * During [un]freezing, the caller is responsible for ensuring that @@ -3460,7 +3476,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) pwq->max_active = 0; } - spin_unlock_irq(&pwq->pool->lock); + spin_unlock_irqrestore(&pwq->pool->lock, flags); } /* initialize newly alloced @pwq which is associated with @wq and @pool */ @@ -4033,6 +4049,7 @@ void destroy_workqueue(struct workqueue_struct *wq) for (i = 0; i < WORK_NR_COLORS; i++) { if (WARN_ON(pwq->nr_in_flight[i])) { mutex_unlock(&wq->mutex); + show_workqueue_state(); return; } } @@ -4041,6 +4058,7 @@ void destroy_workqueue(struct workqueue_struct *wq) WARN_ON(pwq->nr_active) || WARN_ON(!list_empty(&pwq->delayed_works))) { mutex_unlock(&wq->mutex); + show_workqueue_state(); return; } } @@ -5467,7 +5485,17 @@ static void __init wq_numa_init(void) wq_numa_enabled = true; } -static int __init init_workqueues(void) +/** + * workqueue_init_early - early init for workqueue subsystem + * + * This is the first half of two-staged workqueue subsystem initialization + * and invoked as soon as the bare basics - memory allocation, cpumasks and + * idr are up. It sets up all the data structures and system workqueues + * and allows early boot code to create workqueues and queue/cancel work + * items. Actual work item execution starts only after kthreads can be + * created and scheduled right before early initcalls. + */ +int __init workqueue_init_early(void) { int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; int i, cpu; @@ -5479,8 +5507,6 @@ static int __init init_workqueues(void) pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); - wq_numa_init(); - /* initialize CPU pools */ for_each_possible_cpu(cpu) { struct worker_pool *pool; @@ -5500,16 +5526,6 @@ static int __init init_workqueues(void) } } - /* create the initial worker */ - for_each_online_cpu(cpu) { - struct worker_pool *pool; - - for_each_cpu_worker_pool(pool, cpu) { - pool->flags &= ~POOL_DISASSOCIATED; - BUG_ON(!create_worker(pool)); - } - } - /* create default unbound and ordered wq attrs */ for (i = 0; i < NR_STD_WORKER_POOLS; i++) { struct workqueue_attrs *attrs; @@ -5546,8 +5562,59 @@ static int __init init_workqueues(void) !system_power_efficient_wq || !system_freezable_power_efficient_wq); + return 0; +} + +/** + * workqueue_init - bring workqueue subsystem fully online + * + * This is the latter half of two-staged workqueue subsystem initialization + * and invoked as soon as kthreads can be created and scheduled. + * Workqueues have been created and work items queued on them, but there + * are no kworkers executing the work items yet. Populate the worker pools + * with the initial workers and enable future kworker creations. + */ +int __init workqueue_init(void) +{ + struct workqueue_struct *wq; + struct worker_pool *pool; + int cpu, bkt; + + /* + * It'd be simpler to initialize NUMA in workqueue_init_early() but + * CPU to node mapping may not be available that early on some + * archs such as power and arm64. As per-cpu pools created + * previously could be missing node hint and unbound pools NUMA + * affinity, fix them up. + */ + wq_numa_init(); + + mutex_lock(&wq_pool_mutex); + + for_each_possible_cpu(cpu) { + for_each_cpu_worker_pool(pool, cpu) { + pool->node = cpu_to_node(cpu); + } + } + + list_for_each_entry(wq, &workqueues, list) + wq_update_unbound_numa(wq, smp_processor_id(), true); + + mutex_unlock(&wq_pool_mutex); + + /* create the initial workers */ + for_each_online_cpu(cpu) { + for_each_cpu_worker_pool(pool, cpu) { + pool->flags &= ~POOL_DISASSOCIATED; + BUG_ON(!create_worker(pool)); + } + } + + hash_for_each(unbound_pool_hash, bkt, pool, hash_node) + BUG_ON(!create_worker(pool)); + + wq_online = true; wq_watchdog_init(); return 0; } -early_initcall(init_workqueues); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 056052dc8e91..04c1ef717fe0 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -199,7 +199,7 @@ static void free_object(struct debug_obj *obj) * initialized: */ if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) - sched = keventd_up(); + sched = 1; hlist_add_head(&obj->node, &obj_pool); obj_pool_free++; obj_pool_used--; diff --git a/mm/slab.c b/mm/slab.c index 87b29e76cafd..29bc6c0dedd0 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -552,12 +552,7 @@ static void start_cpu_timer(int cpu) { struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu); - /* - * When this gets called from do_initcalls via cpucache_init(), - * init_workqueues() has already run, so keventd will be setup - * at that time. - */ - if (keventd_up() && reap_work->work.func == NULL) { + if (reap_work->work.func == NULL) { init_reap_node(cpu); INIT_DEFERRABLE_WORK(reap_work, cache_reap); schedule_delayed_work_on(cpu, reap_work,