30a2da7b7e
There is a potential race between ioc_release_fn() and ioc_clear_queue() as shown below, due to which below kernel crash is observed. It also can result into use-after-free issue. context#1: context#2: ioc_release_fn() __ioc_clear_queue() gets the same icq ->spin_lock(&ioc->lock); ->spin_lock(&ioc->lock); ->ioc_destroy_icq(icq); ->list_del_init(&icq->q_node); ->call_rcu(&icq->__rcu_head, icq_free_icq_rcu); ->spin_unlock(&ioc->lock); ->ioc_destroy_icq(icq); ->hlist_del_init(&icq->ioc_node); This results into below crash as this memory is now used by icq->__rcu_head in context#1. There is a chance that icq could be free'd as well. 22150.386550: <6> Unable to handle kernel write to read-only memory at virtual address ffffffaa8d31ca50 ... Call trace: 22150.607350: <2> ioc_destroy_icq+0x44/0x110 22150.611202: <2> ioc_clear_queue+0xac/0x148 22150.615056: <2> blk_cleanup_queue+0x11c/0x1a0 22150.619174: <2> __scsi_remove_device+0xdc/0x128 22150.623465: <2> scsi_forget_host+0x2c/0x78 22150.627315: <2> scsi_remove_host+0x7c/0x2a0 22150.631257: <2> usb_stor_disconnect+0x74/0xc8 22150.635371: <2> usb_unbind_interface+0xc8/0x278 22150.639665: <2> device_release_driver_internal+0x198/0x250 22150.644897: <2> device_release_driver+0x24/0x30 22150.649176: <2> bus_remove_device+0xec/0x140 22150.653204: <2> device_del+0x270/0x460 22150.656712: <2> usb_disable_device+0x120/0x390 22150.660918: <2> usb_disconnect+0xf4/0x2e0 22150.664684: <2> hub_event+0xd70/0x17e8 22150.668197: <2> process_one_work+0x210/0x480 22150.672222: <2> worker_thread+0x32c/0x4c8 Fix this by adding a new ICQ_DESTROYED flag in ioc_destroy_icq() to indicate this icq is once marked as destroyed. Also, ensure __ioc_clear_queue() is accessing icq within rcu_read_lock/unlock so that icq doesn't get free'd up while it is still using it. Signed-off-by: Sahitya Tummala <stummala@codeaurora.org> Co-developed-by: Pradeep P V K <ppvk@codeaurora.org> Signed-off-by: Pradeep P V K <ppvk@codeaurora.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
160 lines
4.8 KiB
C
160 lines
4.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef IOCONTEXT_H
|
|
#define IOCONTEXT_H
|
|
|
|
#include <linux/radix-tree.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/workqueue.h>
|
|
|
|
enum {
|
|
ICQ_EXITED = 1 << 2,
|
|
ICQ_DESTROYED = 1 << 3,
|
|
};
|
|
|
|
/*
|
|
* An io_cq (icq) is association between an io_context (ioc) and a
|
|
* request_queue (q). This is used by elevators which need to track
|
|
* information per ioc - q pair.
|
|
*
|
|
* Elevator can request use of icq by setting elevator_type->icq_size and
|
|
* ->icq_align. Both size and align must be larger than that of struct
|
|
* io_cq and elevator can use the tail area for private information. The
|
|
* recommended way to do this is defining a struct which contains io_cq as
|
|
* the first member followed by private members and using its size and
|
|
* align. For example,
|
|
*
|
|
* struct snail_io_cq {
|
|
* struct io_cq icq;
|
|
* int poke_snail;
|
|
* int feed_snail;
|
|
* };
|
|
*
|
|
* struct elevator_type snail_elv_type {
|
|
* .ops = { ... },
|
|
* .icq_size = sizeof(struct snail_io_cq),
|
|
* .icq_align = __alignof__(struct snail_io_cq),
|
|
* ...
|
|
* };
|
|
*
|
|
* If icq_size is set, block core will manage icq's. All requests will
|
|
* have its ->elv.icq field set before elevator_ops->elevator_set_req_fn()
|
|
* is called and be holding a reference to the associated io_context.
|
|
*
|
|
* Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is
|
|
* called and, on destruction, ->elevator_exit_icq_fn(). Both functions
|
|
* are called with both the associated io_context and queue locks held.
|
|
*
|
|
* Elevator is allowed to lookup icq using ioc_lookup_icq() while holding
|
|
* queue lock but the returned icq is valid only until the queue lock is
|
|
* released. Elevators can not and should not try to create or destroy
|
|
* icq's.
|
|
*
|
|
* As icq's are linked from both ioc and q, the locking rules are a bit
|
|
* complex.
|
|
*
|
|
* - ioc lock nests inside q lock.
|
|
*
|
|
* - ioc->icq_list and icq->ioc_node are protected by ioc lock.
|
|
* q->icq_list and icq->q_node by q lock.
|
|
*
|
|
* - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
|
|
* itself is protected by q lock. However, both the indexes and icq
|
|
* itself are also RCU managed and lookup can be performed holding only
|
|
* the q lock.
|
|
*
|
|
* - icq's are not reference counted. They are destroyed when either the
|
|
* ioc or q goes away. Each request with icq set holds an extra
|
|
* reference to ioc to ensure it stays until the request is completed.
|
|
*
|
|
* - Linking and unlinking icq's are performed while holding both ioc and q
|
|
* locks. Due to the lock ordering, q exit is simple but ioc exit
|
|
* requires reverse-order double lock dance.
|
|
*/
|
|
struct io_cq {
|
|
struct request_queue *q;
|
|
struct io_context *ioc;
|
|
|
|
/*
|
|
* q_node and ioc_node link io_cq through icq_list of q and ioc
|
|
* respectively. Both fields are unused once ioc_exit_icq() is
|
|
* called and shared with __rcu_icq_cache and __rcu_head which are
|
|
* used for RCU free of io_cq.
|
|
*/
|
|
union {
|
|
struct list_head q_node;
|
|
struct kmem_cache *__rcu_icq_cache;
|
|
};
|
|
union {
|
|
struct hlist_node ioc_node;
|
|
struct rcu_head __rcu_head;
|
|
};
|
|
|
|
unsigned int flags;
|
|
};
|
|
|
|
/*
|
|
* I/O subsystem state of the associated processes. It is refcounted
|
|
* and kmalloc'ed. These could be shared between processes.
|
|
*/
|
|
struct io_context {
|
|
atomic_long_t refcount;
|
|
atomic_t active_ref;
|
|
atomic_t nr_tasks;
|
|
|
|
/* all the fields below are protected by this lock */
|
|
spinlock_t lock;
|
|
|
|
unsigned short ioprio;
|
|
|
|
/*
|
|
* For request batching
|
|
*/
|
|
int nr_batch_requests; /* Number of requests left in the batch */
|
|
unsigned long last_waited; /* Time last woken after wait for request */
|
|
|
|
struct radix_tree_root icq_tree;
|
|
struct io_cq __rcu *icq_hint;
|
|
struct hlist_head icq_list;
|
|
|
|
struct work_struct release_work;
|
|
};
|
|
|
|
/**
|
|
* get_io_context_active - get active reference on ioc
|
|
* @ioc: ioc of interest
|
|
*
|
|
* Only iocs with active reference can issue new IOs. This function
|
|
* acquires an active reference on @ioc. The caller must already have an
|
|
* active reference on @ioc.
|
|
*/
|
|
static inline void get_io_context_active(struct io_context *ioc)
|
|
{
|
|
WARN_ON_ONCE(atomic_long_read(&ioc->refcount) <= 0);
|
|
WARN_ON_ONCE(atomic_read(&ioc->active_ref) <= 0);
|
|
atomic_long_inc(&ioc->refcount);
|
|
atomic_inc(&ioc->active_ref);
|
|
}
|
|
|
|
static inline void ioc_task_link(struct io_context *ioc)
|
|
{
|
|
get_io_context_active(ioc);
|
|
|
|
WARN_ON_ONCE(atomic_read(&ioc->nr_tasks) <= 0);
|
|
atomic_inc(&ioc->nr_tasks);
|
|
}
|
|
|
|
struct task_struct;
|
|
#ifdef CONFIG_BLOCK
|
|
void put_io_context(struct io_context *ioc);
|
|
void put_io_context_active(struct io_context *ioc);
|
|
void exit_io_context(struct task_struct *task);
|
|
struct io_context *get_task_io_context(struct task_struct *task,
|
|
gfp_t gfp_flags, int node);
|
|
#else
|
|
struct io_context;
|
|
static inline void put_io_context(struct io_context *ioc) { }
|
|
static inline void exit_io_context(struct task_struct *task) { }
|
|
#endif
|
|
|
|
#endif
|