diff --git a/block/Kconfig b/block/Kconfig index 5136ad4bb6d5..1d4d624492fc 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -125,4 +125,9 @@ config BLOCK_COMPAT depends on BLOCK && COMPAT default y +config BLK_MQ_PCI + bool + depends on BLOCK && PCI + default y + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 9eda2322b2d4..37a0d93f97bb 100644 --- a/block/Makefile +++ b/block/Makefile @@ -22,4 +22,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o - +obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o diff --git a/block/blk-flush.c b/block/blk-flush.c index d308def812db..6a14b68b9135 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -232,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, int error) /* release the tag's ownership to the req cloned from */ spin_lock_irqsave(&fq->mq_flush_lock, flags); - hctx = q->mq_ops->map_queue(q, flush_rq->mq_ctx->cpu); + hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu); blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); flush_rq->tag = -1; } @@ -325,7 +325,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) flush_rq->tag = first_rq->tag; fq->orig_rq = first_rq; - hctx = q->mq_ops->map_queue(q, first_rq->mq_ctx->cpu); + hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu); blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); } @@ -358,7 +358,7 @@ static void mq_flush_data_end_io(struct request *rq, int error) unsigned long flags; struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx); - hctx = q->mq_ops->map_queue(q, ctx->cpu); + hctx = blk_mq_map_queue(q, ctx->cpu); /* * After populating an empty queue, kick it to avoid stall. Read diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index d0634bcf322f..19b1d9c5f07e 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -31,14 +31,16 @@ static int get_first_sibling(unsigned int cpu) return cpu; } -int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, - const struct cpumask *online_mask) +int blk_mq_map_queues(struct blk_mq_tag_set *set) { + unsigned int *map = set->mq_map; + unsigned int nr_queues = set->nr_hw_queues; + const struct cpumask *online_mask = cpu_online_mask; unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; cpumask_var_t cpus; if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) - return 1; + return -ENOMEM; cpumask_clear(cpus); nr_cpus = nr_uniq_cpus = 0; @@ -86,23 +88,6 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, return 0; } -unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) -{ - unsigned int *map; - - /* If cpus are offline, map them to first hctx */ - map = kzalloc_node(sizeof(*map) * nr_cpu_ids, GFP_KERNEL, - set->numa_node); - if (!map) - return NULL; - - if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask)) - return map; - - kfree(map); - return NULL; -} - /* * We have no quick way of doing reverse lookups. This is only used at * queue init time, so runtime isn't important. diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c new file mode 100644 index 000000000000..966c2169762e --- /dev/null +++ b/block/blk-mq-pci.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 Christoph Hellwig. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include +#include +#include +#include + +/** + * blk_mq_pci_map_queues - provide a default queue mapping for PCI device + * @set: tagset to provide the mapping for + * @pdev: PCI device associated with @set. + * + * This function assumes the PCI device @pdev has at least as many available + * interrupt vetors as @set has queues. It will then queuery the vector + * corresponding to each queue for it's affinity mask and built queue mapping + * that maps a queue to the CPUs that have irq affinity for the corresponding + * vector. + */ +int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = pci_irq_get_affinity(pdev, queue); + if (!mask) + return -EINVAL; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; +} +EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cef618f6fc92..dcf5ce3ba4bf 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -141,8 +141,7 @@ static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, io_schedule(); data->ctx = blk_mq_get_ctx(data->q); - data->hctx = data->q->mq_ops->map_queue(data->q, - data->ctx->cpu); + data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); if (data->flags & BLK_MQ_REQ_RESERVED) { bt = &data->hctx->tags->breserved_tags; } else { @@ -399,11 +398,6 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, if (!tags) return NULL; - if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) { - kfree(tags); - return NULL; - } - tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; @@ -414,7 +408,6 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) { sbitmap_queue_free(&tags->bitmap_tags); sbitmap_queue_free(&tags->breserved_tags); - free_cpumask_var(tags->cpumask); kfree(tags); } @@ -453,7 +446,7 @@ u32 blk_mq_unique_tag(struct request *rq) int hwq = 0; if (q->mq_ops) { - hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu); + hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); hwq = hctx->queue_num; } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 09f4cc0aaa84..d1662734dc53 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -17,8 +17,6 @@ struct blk_mq_tags { struct request **rqs; struct list_head page_list; - - cpumask_var_t cpumask; }; diff --git a/block/blk-mq.c b/block/blk-mq.c index dc5f47f60931..b65f572a4faf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -224,7 +224,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, return ERR_PTR(ret); ctx = blk_mq_get_ctx(q); - hctx = q->mq_ops->map_queue(q, ctx->cpu); + hctx = blk_mq_map_queue(q, ctx->cpu); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw, 0); blk_mq_put_ctx(ctx); @@ -319,11 +319,7 @@ EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request); void blk_mq_free_request(struct request *rq) { - struct blk_mq_hw_ctx *hctx; - struct request_queue *q = rq->q; - - hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu); - blk_mq_free_hctx_request(hctx, rq); + blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq); } EXPORT_SYMBOL_GPL(blk_mq_free_request); @@ -1058,9 +1054,7 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, { struct blk_mq_ctx *ctx = rq->mq_ctx; struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx; - - hctx = q->mq_ops->map_queue(q, ctx->cpu); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq, at_head); @@ -1077,12 +1071,10 @@ static void blk_mq_insert_requests(struct request_queue *q, bool from_schedule) { - struct blk_mq_hw_ctx *hctx; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); trace_block_unplug(q, depth, !from_schedule); - hctx = q->mq_ops->map_queue(q, ctx->cpu); - /* * preemption doesn't flush plug list, so it's possible ctx->cpu is * offline now @@ -1216,7 +1208,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, blk_queue_enter_live(q); ctx = blk_mq_get_ctx(q); - hctx = q->mq_ops->map_queue(q, ctx->cpu); + hctx = blk_mq_map_queue(q, ctx->cpu); if (rw_is_sync(bio_op(bio), bio->bi_opf)) op_flags |= REQ_SYNC; @@ -1235,8 +1227,7 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie) { int ret; struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, - rq->mq_ctx->cpu); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); struct blk_mq_queue_data bd = { .rq = rq, .list = NULL, @@ -1440,15 +1431,6 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) return cookie; } -/* - * Default mapping to a software queue, since we use one per CPU. - */ -struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu) -{ - return q->queue_hw_ctx[q->mq_map[cpu]]; -} -EXPORT_SYMBOL(blk_mq_map_queue); - static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx) { @@ -1757,7 +1739,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, if (!cpu_online(i)) continue; - hctx = q->mq_ops->map_queue(q, i); + hctx = blk_mq_map_queue(q, i); /* * Set local node, IFF we have more than one hw queue. If @@ -1795,7 +1777,7 @@ static void blk_mq_map_swqueue(struct request_queue *q, continue; ctx = per_cpu_ptr(q->queue_ctx, i); - hctx = q->mq_ops->map_queue(q, i); + hctx = blk_mq_map_queue(q, i); cpumask_set_cpu(i, hctx->cpumask); ctx->index_hw = hctx->nr_ctx; @@ -1824,7 +1806,6 @@ static void blk_mq_map_swqueue(struct request_queue *q, hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); - cpumask_copy(hctx->tags->cpumask, hctx->cpumask); /* * Set the map size to the number of mapped software queues. * This is more accurate and more efficient than looping @@ -1918,7 +1899,6 @@ void blk_mq_release(struct request_queue *q) kfree(hctx); } - kfree(q->mq_map); q->mq_map = NULL; kfree(q->queue_hw_ctx); @@ -2017,9 +1997,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_hw_ctx) goto err_percpu; - q->mq_map = blk_mq_make_queue_map(set); - if (!q->mq_map) - goto err_map; + q->mq_map = set->mq_map; blk_mq_realloc_hw_ctxs(set, q); if (!q->nr_hw_queues) @@ -2069,8 +2047,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, return q; err_hctxs: - kfree(q->mq_map); -err_map: kfree(q->queue_hw_ctx); err_percpu: free_percpu(q->queue_ctx); @@ -2102,8 +2078,6 @@ static void blk_mq_queue_reinit(struct request_queue *q, blk_mq_sysfs_unregister(q); - blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask); - /* * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe * we should change hctx numa_node according to new topology (this @@ -2242,12 +2216,6 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0; } -struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags) -{ - return tags->cpumask; -} -EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask); - /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -2256,6 +2224,8 @@ EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask); */ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) { + int ret; + BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS); if (!set->nr_hw_queues) @@ -2265,7 +2235,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) return -EINVAL; - if (!set->ops->queue_rq || !set->ops->map_queue) + if (!set->ops->queue_rq) return -EINVAL; if (set->queue_depth > BLK_MQ_MAX_DEPTH) { @@ -2294,17 +2264,35 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->tags) return -ENOMEM; - if (blk_mq_alloc_rq_maps(set)) - goto enomem; + ret = -ENOMEM; + set->mq_map = kzalloc_node(sizeof(*set->mq_map) * nr_cpu_ids, + GFP_KERNEL, set->numa_node); + if (!set->mq_map) + goto out_free_tags; + + if (set->ops->map_queues) + ret = set->ops->map_queues(set); + else + ret = blk_mq_map_queues(set); + if (ret) + goto out_free_mq_map; + + ret = blk_mq_alloc_rq_maps(set); + if (ret) + goto out_free_mq_map; mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list); return 0; -enomem: + +out_free_mq_map: + kfree(set->mq_map); + set->mq_map = NULL; +out_free_tags: kfree(set->tags); set->tags = NULL; - return -ENOMEM; + return ret; } EXPORT_SYMBOL(blk_mq_alloc_tag_set); @@ -2317,6 +2305,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) blk_mq_free_rq_map(set, set->tags[i], i); } + kfree(set->mq_map); + set->mq_map = NULL; + kfree(set->tags); set->tags = NULL; } diff --git a/block/blk-mq.h b/block/blk-mq.h index 9b15d2ef7f7b..df6474cb5a4c 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -45,11 +45,15 @@ void blk_mq_disable_hotplug(void); /* * CPU -> queue mappings */ -extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); -extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, - const struct cpumask *online_mask); +int blk_mq_map_queues(struct blk_mq_tag_set *set); extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + /* * sysfs helpers */ diff --git a/block/blk.h b/block/blk.h index c37492f5edaa..74444c49078f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -39,14 +39,9 @@ extern struct ida blk_queue_ida; static inline struct blk_flush_queue *blk_get_flush_queue( struct request_queue *q, struct blk_mq_ctx *ctx) { - struct blk_mq_hw_ctx *hctx; - - if (!q->mq_ops) - return q->fq; - - hctx = q->mq_ops->map_queue(q, ctx->cpu); - - return hctx->fq; + if (q->mq_ops) + return blk_mq_map_queue(q, ctx->cpu)->fq; + return q->fq; } static inline void __blk_get_queue(struct request_queue *q) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c9f2107f7095..cbdb3b162718 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1703,7 +1703,6 @@ static int loop_init_request(void *data, struct request *rq, static struct blk_mq_ops loop_mq_ops = { .queue_rq = loop_queue_rq, - .map_queue = blk_mq_map_queue, .init_request = loop_init_request, }; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 88c46853dbb5..3cfd879267b2 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3895,7 +3895,6 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, static struct blk_mq_ops mtip_mq_ops = { .queue_rq = mtip_queue_rq, - .map_queue = blk_mq_map_queue, .init_request = mtip_init_cmd, .exit_request = mtip_free_cmd, .complete = mtip_softirq_done_fn, diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ccfcfc11399a..ba405b55329f 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -881,7 +881,6 @@ static int nbd_init_request(void *data, struct request *rq, static struct blk_mq_ops nbd_mq_ops = { .queue_rq = nbd_queue_rq, - .map_queue = blk_mq_map_queue, .init_request = nbd_init_request, .timeout = nbd_xmit_timeout, }; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 91e1de898daf..ba6f4a2e73db 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -394,7 +394,6 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, - .map_queue = blk_mq_map_queue, .init_hctx = null_init_hctx, .complete = null_softirq_done_fn, }; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6c6519f6492a..c1f84df7838b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3621,7 +3621,6 @@ static int rbd_init_request(void *data, struct request *rq, static struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, - .map_queue = blk_mq_map_queue, .init_request = rbd_init_request, }; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 93b1aaa5ba3b..2dc5c96c186a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -542,7 +542,6 @@ static int virtblk_init_request(void *data, struct request *rq, static struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, - .map_queue = blk_mq_map_queue, .complete = virtblk_request_done, .init_request = virtblk_init_request, }; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 88ef6d4729b4..9908597c5209 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -909,7 +909,6 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, static struct blk_mq_ops blkfront_mq_ops = { .queue_rq = blkif_queue_rq, - .map_queue = blk_mq_map_queue, }; static void blkif_set_queue_limits(struct blkfront_info *info) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 182b67947dad..5eacce1ef88b 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -938,7 +938,6 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, static struct blk_mq_ops dm_mq_ops = { .queue_rq = dm_mq_queue_rq, - .map_queue = blk_mq_map_queue, .complete = dm_softirq_done, .init_request = dm_mq_init_request, }; diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index ebf46ad2d513..d1e6931c132f 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -351,7 +351,6 @@ static int ubiblock_init_request(void *data, struct request *req, static struct blk_mq_ops ubiblock_mq_ops = { .queue_rq = ubiblock_queue_rq, .init_request = ubiblock_init_request, - .map_queue = blk_mq_map_queue, }; static DEFINE_IDR(ubiblock_minor_idr); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4669c052239e..329381a28edf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1851,9 +1851,6 @@ static void nvme_scan_work(struct work_struct *work) list_sort(NULL, &ctrl->namespaces, ns_cmp); mutex_unlock(&ctrl->namespaces_mutex); kfree(id); - - if (ctrl->ops->post_scan) - ctrl->ops->post_scan(ctrl); } void nvme_queue_scan(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b0a9ec681685..d47f5a5d18c7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -185,7 +185,6 @@ struct nvme_ctrl_ops { int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); int (*reset_ctrl)(struct nvme_ctrl *ctrl); void (*free_ctrl)(struct nvme_ctrl *ctrl); - void (*post_scan)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); int (*delete_ctrl)(struct nvme_ctrl *ctrl); const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 60f7eab11865..68ef1875e8a8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -88,7 +89,6 @@ struct nvme_dev { unsigned max_qid; int q_depth; u32 db_stride; - struct msix_entry *entry; void __iomem *bar; struct work_struct reset_work; struct work_struct remove_work; @@ -201,6 +201,11 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev) nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES); } +static int nvmeq_irq(struct nvme_queue *nvmeq) +{ + return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector); +} + static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -263,6 +268,13 @@ static int nvme_init_request(void *data, struct request *req, return 0; } +static int nvme_pci_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_dev *dev = set->driver_data; + + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); +} + /** * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use @@ -960,7 +972,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) spin_unlock_irq(&nvmeq->q_lock); return 1; } - vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; + vector = nvmeq_irq(nvmeq); nvmeq->dev->online_queues--; nvmeq->cq_vector = -1; spin_unlock_irq(&nvmeq->q_lock); @@ -968,7 +980,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q); - irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); return 0; @@ -1075,15 +1086,14 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, return NULL; } -static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, - const char *name) +static int queue_request_irq(struct nvme_queue *nvmeq) { if (use_threaded_interrupts) - return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector, - nvme_irq_check, nvme_irq, IRQF_SHARED, - name, nvmeq); - return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq, - IRQF_SHARED, name, nvmeq); + return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check, + nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq); + else + return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED, + nvmeq->irqname, nvmeq); } static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) @@ -1114,7 +1124,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) if (result < 0) goto release_cq; - result = queue_request_irq(dev, nvmeq, nvmeq->irqname); + result = queue_request_irq(nvmeq); if (result < 0) goto release_sq; @@ -1131,7 +1141,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) static struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_complete_rq, - .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, @@ -1141,9 +1150,9 @@ static struct blk_mq_ops nvme_mq_admin_ops = { static struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_complete_rq, - .map_queue = blk_mq_map_queue, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, + .map_queues = nvme_pci_map_queues, .timeout = nvme_timeout, .poll = nvme_poll, }; @@ -1234,7 +1243,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) goto free_nvmeq; nvmeq->cq_vector = 0; - result = queue_request_irq(dev, nvmeq, nvmeq->irqname); + result = queue_request_irq(nvmeq); if (result) { nvmeq->cq_vector = -1; goto free_nvmeq; @@ -1382,7 +1391,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); - int result, i, vecs, nr_io_queues, size; + int result, nr_io_queues, size; nr_io_queues = num_online_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); @@ -1417,29 +1426,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } /* Deregister the admin queue's interrupt */ - free_irq(dev->entry[0].vector, adminq); + free_irq(pci_irq_vector(pdev, 0), adminq); /* * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - if (pdev->msi_enabled) - pci_disable_msi(pdev); - else if (pdev->msix_enabled) - pci_disable_msix(pdev); - - for (i = 0; i < nr_io_queues; i++) - dev->entry[i].entry = i; - vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); - if (vecs < 0) { - vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32)); - if (vecs < 0) { - vecs = 1; - } else { - for (i = 0; i < vecs; i++) - dev->entry[i].vector = i + pdev->irq; - } - } + pci_free_irq_vectors(pdev); + nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY); + if (nr_io_queues <= 0) + return -EIO; + dev->max_qid = nr_io_queues; /* * Should investigate if there's a performance win from allocating @@ -1447,10 +1445,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * path to scale better, even if the receive path is limited by the * number of interrupts. */ - nr_io_queues = vecs; - dev->max_qid = nr_io_queues; - result = queue_request_irq(dev, adminq, adminq->irqname); + result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; goto free_queues; @@ -1462,23 +1458,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } -static void nvme_pci_post_scan(struct nvme_ctrl *ctrl) -{ - struct nvme_dev *dev = to_nvme_dev(ctrl); - struct nvme_queue *nvmeq; - int i; - - for (i = 0; i < dev->online_queues; i++) { - nvmeq = dev->queues[i]; - - if (!nvmeq->tags || !(*nvmeq->tags)) - continue; - - irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - blk_mq_tags_cpumask(*nvmeq->tags)); - } -} - static void nvme_del_queue_end(struct request *req, int error) { struct nvme_queue *nvmeq = req->end_io_data; @@ -1615,15 +1594,9 @@ static int nvme_pci_enable(struct nvme_dev *dev) * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll * adjust this later. */ - if (pci_enable_msix(pdev, dev->entry, 1)) { - pci_enable_msi(pdev); - dev->entry[0].vector = pdev->irq; - } - - if (!dev->entry[0].vector) { - result = -ENODEV; - goto disable; - } + result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (result < 0) + return result; cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -1665,10 +1638,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - if (pdev->msi_enabled) - pci_disable_msi(pdev); - else if (pdev->msix_enabled) - pci_disable_msix(pdev); + pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { pci_disable_pcie_error_reporting(pdev); @@ -1743,7 +1713,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); - kfree(dev->entry); kfree(dev); } @@ -1887,7 +1856,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read64 = nvme_pci_reg_read64, .reset_ctrl = nvme_pci_reset_ctrl, .free_ctrl = nvme_pci_free_ctrl, - .post_scan = nvme_pci_post_scan, .submit_async_event = nvme_pci_submit_async_event, }; @@ -1920,10 +1888,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) return -ENOMEM; - dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry), - GFP_KERNEL, node); - if (!dev->entry) - goto free; dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *), GFP_KERNEL, node); if (!dev->queues) @@ -1964,7 +1928,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_dev_unmap(dev); free: kfree(dev->queues); - kfree(dev->entry); kfree(dev); return result; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 28632292e85e..5a8388177959 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1480,7 +1480,6 @@ static void nvme_rdma_complete_rq(struct request *rq) static struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_rdma_init_request, .exit_request = nvme_rdma_exit_request, .reinit_request = nvme_rdma_reinit_request, @@ -1492,7 +1491,6 @@ static struct blk_mq_ops nvme_rdma_mq_ops = { static struct blk_mq_ops nvme_rdma_admin_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_rdma_init_admin_request, .exit_request = nvme_rdma_exit_admin_request, .reinit_request = nvme_rdma_reinit_request, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 395e60dad835..d5df77d686b2 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -273,7 +273,6 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, static struct blk_mq_ops nvme_loop_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_loop_init_request, .init_hctx = nvme_loop_init_hctx, .timeout = nvme_loop_timeout, @@ -282,7 +281,6 @@ static struct blk_mq_ops nvme_loop_mq_ops = { static struct blk_mq_ops nvme_loop_admin_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_loop_init_admin_request, .init_hctx = nvme_loop_init_admin_hctx, .timeout = nvme_loop_timeout, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c71344aebdbb..2cca9cffc63f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2077,7 +2077,6 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) } static struct blk_mq_ops scsi_mq_ops = { - .map_queue = blk_mq_map_queue, .queue_rq = scsi_queue_rq, .complete = scsi_softirq_done, .timeout = scsi_timeout, diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h new file mode 100644 index 000000000000..6ab595259112 --- /dev/null +++ b/include/linux/blk-mq-pci.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_BLK_MQ_PCI_H +#define _LINUX_BLK_MQ_PCI_H + +struct blk_mq_tag_set; +struct pci_dev; + +int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev); + +#endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5daa0ef756dd..ef6aebf291ed 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -62,6 +62,7 @@ struct blk_mq_hw_ctx { }; struct blk_mq_tag_set { + unsigned int *mq_map; struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; /* max hw supported */ @@ -85,7 +86,6 @@ struct blk_mq_queue_data { }; typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); -typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); @@ -99,6 +99,7 @@ typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef void (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef int (map_queues_fn)(struct blk_mq_tag_set *set); struct blk_mq_ops { @@ -107,11 +108,6 @@ struct blk_mq_ops { */ queue_rq_fn *queue_rq; - /* - * Map to specific hardware queue - */ - map_queue_fn *map_queue; - /* * Called on request timeout */ @@ -144,6 +140,8 @@ struct blk_mq_ops { init_request_fn *init_request; exit_request_fn *exit_request; reinit_request_fn *reinit_request; + + map_queues_fn *map_queues; }; enum { @@ -199,7 +197,6 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op, unsigned int flags, unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); -struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); enum { BLK_MQ_UNIQUE_TAG_BITS = 16, @@ -218,7 +215,6 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } -struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq);