Merge branch 'for-4.9/block-irq' of git://git.kernel.dk/linux-block
Pull blk-mq irq/cpu mapping updates from Jens Axboe: "This is the block-irq topic branch for 4.9-rc. It's mostly from Christoph, and it allows drivers to specify their own mappings, and more importantly, to share the blk-mq mappings with the IRQ affinity mappings. It's a good step towards making this work better out of the box" * 'for-4.9/block-irq' of git://git.kernel.dk/linux-block: blk_mq: linux/blk-mq.h does not include all the headers it depends on blk-mq: kill unused blk_mq_create_mq_map() blk-mq: get rid of the cpumask in struct blk_mq_tags nvme: remove the post_scan callout nvme: switch to use pci_alloc_irq_vectors blk-mq: provide a default queue mapping for PCI device blk-mq: allow the driver to pass in a queue mapping blk-mq: remove ->map_queue blk-mq: only allocate a single mq_map per tag_set blk-mq: don't redistribute hardware queues on a CPU hotplug event
This commit is contained in:
commit
12e3d3cdd9
|
@ -125,4 +125,9 @@ config BLOCK_COMPAT
|
|||
depends on BLOCK && COMPAT
|
||||
default y
|
||||
|
||||
config BLK_MQ_PCI
|
||||
bool
|
||||
depends on BLOCK && PCI
|
||||
default y
|
||||
|
||||
source block/Kconfig.iosched
|
||||
|
|
|
@ -22,4 +22,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
|
|||
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
|
||||
obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
|
||||
|
||||
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
|
||||
|
|
|
@ -232,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, int error)
|
|||
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
hctx = q->mq_ops->map_queue(q, flush_rq->mq_ctx->cpu);
|
||||
hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
|
||||
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
|
||||
flush_rq->tag = -1;
|
||||
}
|
||||
|
@ -325,7 +325,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
|
|||
flush_rq->tag = first_rq->tag;
|
||||
fq->orig_rq = first_rq;
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, first_rq->mq_ctx->cpu);
|
||||
hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
|
||||
blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
|
||||
}
|
||||
|
||||
|
@ -358,7 +358,7 @@ static void mq_flush_data_end_io(struct request *rq, int error)
|
|||
unsigned long flags;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, ctx->cpu);
|
||||
hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
/*
|
||||
* After populating an empty queue, kick it to avoid stall. Read
|
||||
|
|
|
@ -31,14 +31,16 @@ static int get_first_sibling(unsigned int cpu)
|
|||
return cpu;
|
||||
}
|
||||
|
||||
int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
|
||||
const struct cpumask *online_mask)
|
||||
int blk_mq_map_queues(struct blk_mq_tag_set *set)
|
||||
{
|
||||
unsigned int *map = set->mq_map;
|
||||
unsigned int nr_queues = set->nr_hw_queues;
|
||||
const struct cpumask *online_mask = cpu_online_mask;
|
||||
unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
|
||||
cpumask_var_t cpus;
|
||||
|
||||
if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
|
||||
return 1;
|
||||
return -ENOMEM;
|
||||
|
||||
cpumask_clear(cpus);
|
||||
nr_cpus = nr_uniq_cpus = 0;
|
||||
|
@ -86,23 +88,6 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
|
|||
return 0;
|
||||
}
|
||||
|
||||
unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
|
||||
{
|
||||
unsigned int *map;
|
||||
|
||||
/* If cpus are offline, map them to first hctx */
|
||||
map = kzalloc_node(sizeof(*map) * nr_cpu_ids, GFP_KERNEL,
|
||||
set->numa_node);
|
||||
if (!map)
|
||||
return NULL;
|
||||
|
||||
if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask))
|
||||
return map;
|
||||
|
||||
kfree(map);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have no quick way of doing reverse lookups. This is only used at
|
||||
* queue init time, so runtime isn't important.
|
||||
|
|
47
block/blk-mq-pci.c
Normal file
47
block/blk-mq-pci.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Christoph Hellwig.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-pci.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/**
|
||||
* blk_mq_pci_map_queues - provide a default queue mapping for PCI device
|
||||
* @set: tagset to provide the mapping for
|
||||
* @pdev: PCI device associated with @set.
|
||||
*
|
||||
* This function assumes the PCI device @pdev has at least as many available
|
||||
* interrupt vetors as @set has queues. It will then queuery the vector
|
||||
* corresponding to each queue for it's affinity mask and built queue mapping
|
||||
* that maps a queue to the CPUs that have irq affinity for the corresponding
|
||||
* vector.
|
||||
*/
|
||||
int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
|
||||
{
|
||||
const struct cpumask *mask;
|
||||
unsigned int queue, cpu;
|
||||
|
||||
for (queue = 0; queue < set->nr_hw_queues; queue++) {
|
||||
mask = pci_irq_get_affinity(pdev, queue);
|
||||
if (!mask)
|
||||
return -EINVAL;
|
||||
|
||||
for_each_cpu(cpu, mask)
|
||||
set->mq_map[cpu] = queue;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues);
|
|
@ -141,8 +141,7 @@ static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt,
|
|||
io_schedule();
|
||||
|
||||
data->ctx = blk_mq_get_ctx(data->q);
|
||||
data->hctx = data->q->mq_ops->map_queue(data->q,
|
||||
data->ctx->cpu);
|
||||
data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
|
||||
if (data->flags & BLK_MQ_REQ_RESERVED) {
|
||||
bt = &data->hctx->tags->breserved_tags;
|
||||
} else {
|
||||
|
@ -399,11 +398,6 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
|
|||
if (!tags)
|
||||
return NULL;
|
||||
|
||||
if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) {
|
||||
kfree(tags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tags->nr_tags = total_tags;
|
||||
tags->nr_reserved_tags = reserved_tags;
|
||||
|
||||
|
@ -414,7 +408,6 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
|
|||
{
|
||||
sbitmap_queue_free(&tags->bitmap_tags);
|
||||
sbitmap_queue_free(&tags->breserved_tags);
|
||||
free_cpumask_var(tags->cpumask);
|
||||
kfree(tags);
|
||||
}
|
||||
|
||||
|
@ -453,7 +446,7 @@ u32 blk_mq_unique_tag(struct request *rq)
|
|||
int hwq = 0;
|
||||
|
||||
if (q->mq_ops) {
|
||||
hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
|
||||
hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
|
||||
hwq = hctx->queue_num;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,8 +17,6 @@ struct blk_mq_tags {
|
|||
|
||||
struct request **rqs;
|
||||
struct list_head page_list;
|
||||
|
||||
cpumask_var_t cpumask;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -224,7 +224,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
|
|||
return ERR_PTR(ret);
|
||||
|
||||
ctx = blk_mq_get_ctx(q);
|
||||
hctx = q->mq_ops->map_queue(q, ctx->cpu);
|
||||
hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
|
||||
rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
|
||||
blk_mq_put_ctx(ctx);
|
||||
|
@ -319,11 +319,7 @@ EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
|
|||
|
||||
void blk_mq_free_request(struct request *rq)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
|
||||
blk_mq_free_hctx_request(hctx, rq);
|
||||
blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_free_request);
|
||||
|
||||
|
@ -1058,9 +1054,7 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
|
|||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, ctx->cpu);
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
spin_lock(&ctx->lock);
|
||||
__blk_mq_insert_request(hctx, rq, at_head);
|
||||
|
@ -1077,12 +1071,10 @@ static void blk_mq_insert_requests(struct request_queue *q,
|
|||
bool from_schedule)
|
||||
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
trace_block_unplug(q, depth, !from_schedule);
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, ctx->cpu);
|
||||
|
||||
/*
|
||||
* preemption doesn't flush plug list, so it's possible ctx->cpu is
|
||||
* offline now
|
||||
|
@ -1216,7 +1208,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
|
|||
|
||||
blk_queue_enter_live(q);
|
||||
ctx = blk_mq_get_ctx(q);
|
||||
hctx = q->mq_ops->map_queue(q, ctx->cpu);
|
||||
hctx = blk_mq_map_queue(q, ctx->cpu);
|
||||
|
||||
if (rw_is_sync(bio_op(bio), bio->bi_opf))
|
||||
op_flags |= REQ_SYNC;
|
||||
|
@ -1235,8 +1227,7 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
|
|||
{
|
||||
int ret;
|
||||
struct request_queue *q = rq->q;
|
||||
struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
|
||||
rq->mq_ctx->cpu);
|
||||
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
|
||||
struct blk_mq_queue_data bd = {
|
||||
.rq = rq,
|
||||
.list = NULL,
|
||||
|
@ -1440,15 +1431,6 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
|||
return cookie;
|
||||
}
|
||||
|
||||
/*
|
||||
* Default mapping to a software queue, since we use one per CPU.
|
||||
*/
|
||||
struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
|
||||
{
|
||||
return q->queue_hw_ctx[q->mq_map[cpu]];
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_map_queue);
|
||||
|
||||
static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
|
||||
struct blk_mq_tags *tags, unsigned int hctx_idx)
|
||||
{
|
||||
|
@ -1757,7 +1739,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
|
|||
if (!cpu_online(i))
|
||||
continue;
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, i);
|
||||
hctx = blk_mq_map_queue(q, i);
|
||||
|
||||
/*
|
||||
* Set local node, IFF we have more than one hw queue. If
|
||||
|
@ -1795,7 +1777,7 @@ static void blk_mq_map_swqueue(struct request_queue *q,
|
|||
continue;
|
||||
|
||||
ctx = per_cpu_ptr(q->queue_ctx, i);
|
||||
hctx = q->mq_ops->map_queue(q, i);
|
||||
hctx = blk_mq_map_queue(q, i);
|
||||
|
||||
cpumask_set_cpu(i, hctx->cpumask);
|
||||
ctx->index_hw = hctx->nr_ctx;
|
||||
|
@ -1824,7 +1806,6 @@ static void blk_mq_map_swqueue(struct request_queue *q,
|
|||
hctx->tags = set->tags[i];
|
||||
WARN_ON(!hctx->tags);
|
||||
|
||||
cpumask_copy(hctx->tags->cpumask, hctx->cpumask);
|
||||
/*
|
||||
* Set the map size to the number of mapped software queues.
|
||||
* This is more accurate and more efficient than looping
|
||||
|
@ -1918,7 +1899,6 @@ void blk_mq_release(struct request_queue *q)
|
|||
kfree(hctx);
|
||||
}
|
||||
|
||||
kfree(q->mq_map);
|
||||
q->mq_map = NULL;
|
||||
|
||||
kfree(q->queue_hw_ctx);
|
||||
|
@ -2017,9 +1997,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
|||
if (!q->queue_hw_ctx)
|
||||
goto err_percpu;
|
||||
|
||||
q->mq_map = blk_mq_make_queue_map(set);
|
||||
if (!q->mq_map)
|
||||
goto err_map;
|
||||
q->mq_map = set->mq_map;
|
||||
|
||||
blk_mq_realloc_hw_ctxs(set, q);
|
||||
if (!q->nr_hw_queues)
|
||||
|
@ -2069,8 +2047,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
|||
return q;
|
||||
|
||||
err_hctxs:
|
||||
kfree(q->mq_map);
|
||||
err_map:
|
||||
kfree(q->queue_hw_ctx);
|
||||
err_percpu:
|
||||
free_percpu(q->queue_ctx);
|
||||
|
@ -2102,8 +2078,6 @@ static void blk_mq_queue_reinit(struct request_queue *q,
|
|||
|
||||
blk_mq_sysfs_unregister(q);
|
||||
|
||||
blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
|
||||
|
||||
/*
|
||||
* redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
|
||||
* we should change hctx numa_node according to new topology (this
|
||||
|
@ -2242,12 +2216,6 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags)
|
||||
{
|
||||
return tags->cpumask;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask);
|
||||
|
||||
/*
|
||||
* Alloc a tag set to be associated with one or more request queues.
|
||||
* May fail with EINVAL for various error conditions. May adjust the
|
||||
|
@ -2256,6 +2224,8 @@ EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask);
|
|||
*/
|
||||
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
||||
{
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
|
||||
|
||||
if (!set->nr_hw_queues)
|
||||
|
@ -2265,7 +2235,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
|||
if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
|
||||
return -EINVAL;
|
||||
|
||||
if (!set->ops->queue_rq || !set->ops->map_queue)
|
||||
if (!set->ops->queue_rq)
|
||||
return -EINVAL;
|
||||
|
||||
if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
|
||||
|
@ -2294,17 +2264,35 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
|
|||
if (!set->tags)
|
||||
return -ENOMEM;
|
||||
|
||||
if (blk_mq_alloc_rq_maps(set))
|
||||
goto enomem;
|
||||
ret = -ENOMEM;
|
||||
set->mq_map = kzalloc_node(sizeof(*set->mq_map) * nr_cpu_ids,
|
||||
GFP_KERNEL, set->numa_node);
|
||||
if (!set->mq_map)
|
||||
goto out_free_tags;
|
||||
|
||||
if (set->ops->map_queues)
|
||||
ret = set->ops->map_queues(set);
|
||||
else
|
||||
ret = blk_mq_map_queues(set);
|
||||
if (ret)
|
||||
goto out_free_mq_map;
|
||||
|
||||
ret = blk_mq_alloc_rq_maps(set);
|
||||
if (ret)
|
||||
goto out_free_mq_map;
|
||||
|
||||
mutex_init(&set->tag_list_lock);
|
||||
INIT_LIST_HEAD(&set->tag_list);
|
||||
|
||||
return 0;
|
||||
enomem:
|
||||
|
||||
out_free_mq_map:
|
||||
kfree(set->mq_map);
|
||||
set->mq_map = NULL;
|
||||
out_free_tags:
|
||||
kfree(set->tags);
|
||||
set->tags = NULL;
|
||||
return -ENOMEM;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_alloc_tag_set);
|
||||
|
||||
|
@ -2317,6 +2305,9 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
|
|||
blk_mq_free_rq_map(set, set->tags[i], i);
|
||||
}
|
||||
|
||||
kfree(set->mq_map);
|
||||
set->mq_map = NULL;
|
||||
|
||||
kfree(set->tags);
|
||||
set->tags = NULL;
|
||||
}
|
||||
|
|
|
@ -45,11 +45,15 @@ void blk_mq_disable_hotplug(void);
|
|||
/*
|
||||
* CPU -> queue mappings
|
||||
*/
|
||||
extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
|
||||
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
|
||||
const struct cpumask *online_mask);
|
||||
int blk_mq_map_queues(struct blk_mq_tag_set *set);
|
||||
extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
|
||||
|
||||
static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
|
||||
int cpu)
|
||||
{
|
||||
return q->queue_hw_ctx[q->mq_map[cpu]];
|
||||
}
|
||||
|
||||
/*
|
||||
* sysfs helpers
|
||||
*/
|
||||
|
|
11
block/blk.h
11
block/blk.h
|
@ -39,14 +39,9 @@ extern struct ida blk_queue_ida;
|
|||
static inline struct blk_flush_queue *blk_get_flush_queue(
|
||||
struct request_queue *q, struct blk_mq_ctx *ctx)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
if (!q->mq_ops)
|
||||
return q->fq;
|
||||
|
||||
hctx = q->mq_ops->map_queue(q, ctx->cpu);
|
||||
|
||||
return hctx->fq;
|
||||
if (q->mq_ops)
|
||||
return blk_mq_map_queue(q, ctx->cpu)->fq;
|
||||
return q->fq;
|
||||
}
|
||||
|
||||
static inline void __blk_get_queue(struct request_queue *q)
|
||||
|
|
|
@ -1703,7 +1703,6 @@ static int loop_init_request(void *data, struct request *rq,
|
|||
|
||||
static struct blk_mq_ops loop_mq_ops = {
|
||||
.queue_rq = loop_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = loop_init_request,
|
||||
};
|
||||
|
||||
|
|
|
@ -3895,7 +3895,6 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
|
|||
|
||||
static struct blk_mq_ops mtip_mq_ops = {
|
||||
.queue_rq = mtip_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = mtip_init_cmd,
|
||||
.exit_request = mtip_free_cmd,
|
||||
.complete = mtip_softirq_done_fn,
|
||||
|
|
|
@ -881,7 +881,6 @@ static int nbd_init_request(void *data, struct request *rq,
|
|||
|
||||
static struct blk_mq_ops nbd_mq_ops = {
|
||||
.queue_rq = nbd_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = nbd_init_request,
|
||||
.timeout = nbd_xmit_timeout,
|
||||
};
|
||||
|
|
|
@ -394,7 +394,6 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|||
|
||||
static struct blk_mq_ops null_mq_ops = {
|
||||
.queue_rq = null_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_hctx = null_init_hctx,
|
||||
.complete = null_softirq_done_fn,
|
||||
};
|
||||
|
|
|
@ -3621,7 +3621,6 @@ static int rbd_init_request(void *data, struct request *rq,
|
|||
|
||||
static struct blk_mq_ops rbd_mq_ops = {
|
||||
.queue_rq = rbd_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = rbd_init_request,
|
||||
};
|
||||
|
||||
|
|
|
@ -542,7 +542,6 @@ static int virtblk_init_request(void *data, struct request *rq,
|
|||
|
||||
static struct blk_mq_ops virtio_mq_ops = {
|
||||
.queue_rq = virtio_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.complete = virtblk_request_done,
|
||||
.init_request = virtblk_init_request,
|
||||
};
|
||||
|
|
|
@ -909,7 +909,6 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||
|
||||
static struct blk_mq_ops blkfront_mq_ops = {
|
||||
.queue_rq = blkif_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
};
|
||||
|
||||
static void blkif_set_queue_limits(struct blkfront_info *info)
|
||||
|
|
|
@ -938,7 +938,6 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||
|
||||
static struct blk_mq_ops dm_mq_ops = {
|
||||
.queue_rq = dm_mq_queue_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.complete = dm_softirq_done,
|
||||
.init_request = dm_mq_init_request,
|
||||
};
|
||||
|
|
|
@ -351,7 +351,6 @@ static int ubiblock_init_request(void *data, struct request *req,
|
|||
static struct blk_mq_ops ubiblock_mq_ops = {
|
||||
.queue_rq = ubiblock_queue_rq,
|
||||
.init_request = ubiblock_init_request,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
};
|
||||
|
||||
static DEFINE_IDR(ubiblock_minor_idr);
|
||||
|
|
|
@ -1851,9 +1851,6 @@ static void nvme_scan_work(struct work_struct *work)
|
|||
list_sort(NULL, &ctrl->namespaces, ns_cmp);
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
kfree(id);
|
||||
|
||||
if (ctrl->ops->post_scan)
|
||||
ctrl->ops->post_scan(ctrl);
|
||||
}
|
||||
|
||||
void nvme_queue_scan(struct nvme_ctrl *ctrl)
|
||||
|
|
|
@ -185,7 +185,6 @@ struct nvme_ctrl_ops {
|
|||
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
|
||||
int (*reset_ctrl)(struct nvme_ctrl *ctrl);
|
||||
void (*free_ctrl)(struct nvme_ctrl *ctrl);
|
||||
void (*post_scan)(struct nvme_ctrl *ctrl);
|
||||
void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
|
||||
int (*delete_ctrl)(struct nvme_ctrl *ctrl);
|
||||
const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/blk-mq-pci.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/errno.h>
|
||||
|
@ -88,7 +89,6 @@ struct nvme_dev {
|
|||
unsigned max_qid;
|
||||
int q_depth;
|
||||
u32 db_stride;
|
||||
struct msix_entry *entry;
|
||||
void __iomem *bar;
|
||||
struct work_struct reset_work;
|
||||
struct work_struct remove_work;
|
||||
|
@ -201,6 +201,11 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev)
|
|||
nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
|
||||
}
|
||||
|
||||
static int nvmeq_irq(struct nvme_queue *nvmeq)
|
||||
{
|
||||
return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector);
|
||||
}
|
||||
|
||||
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
|
@ -263,6 +268,13 @@ static int nvme_init_request(void *data, struct request *req,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
|
||||
{
|
||||
struct nvme_dev *dev = set->driver_data;
|
||||
|
||||
return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev));
|
||||
}
|
||||
|
||||
/**
|
||||
* __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
|
||||
* @nvmeq: The queue to use
|
||||
|
@ -960,7 +972,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
|
|||
spin_unlock_irq(&nvmeq->q_lock);
|
||||
return 1;
|
||||
}
|
||||
vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
|
||||
vector = nvmeq_irq(nvmeq);
|
||||
nvmeq->dev->online_queues--;
|
||||
nvmeq->cq_vector = -1;
|
||||
spin_unlock_irq(&nvmeq->q_lock);
|
||||
|
@ -968,7 +980,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
|
|||
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
|
||||
blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
|
||||
|
||||
irq_set_affinity_hint(vector, NULL);
|
||||
free_irq(vector, nvmeq);
|
||||
|
||||
return 0;
|
||||
|
@ -1075,15 +1086,14 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
|
||||
const char *name)
|
||||
static int queue_request_irq(struct nvme_queue *nvmeq)
|
||||
{
|
||||
if (use_threaded_interrupts)
|
||||
return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector,
|
||||
nvme_irq_check, nvme_irq, IRQF_SHARED,
|
||||
name, nvmeq);
|
||||
return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq,
|
||||
IRQF_SHARED, name, nvmeq);
|
||||
return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
|
||||
nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
|
||||
else
|
||||
return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
|
||||
nvmeq->irqname, nvmeq);
|
||||
}
|
||||
|
||||
static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
|
||||
|
@ -1114,7 +1124,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
|
|||
if (result < 0)
|
||||
goto release_cq;
|
||||
|
||||
result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
|
||||
result = queue_request_irq(nvmeq);
|
||||
if (result < 0)
|
||||
goto release_sq;
|
||||
|
||||
|
@ -1131,7 +1141,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
|
|||
static struct blk_mq_ops nvme_mq_admin_ops = {
|
||||
.queue_rq = nvme_queue_rq,
|
||||
.complete = nvme_complete_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_hctx = nvme_admin_init_hctx,
|
||||
.exit_hctx = nvme_admin_exit_hctx,
|
||||
.init_request = nvme_admin_init_request,
|
||||
|
@ -1141,9 +1150,9 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
|
|||
static struct blk_mq_ops nvme_mq_ops = {
|
||||
.queue_rq = nvme_queue_rq,
|
||||
.complete = nvme_complete_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_hctx = nvme_init_hctx,
|
||||
.init_request = nvme_init_request,
|
||||
.map_queues = nvme_pci_map_queues,
|
||||
.timeout = nvme_timeout,
|
||||
.poll = nvme_poll,
|
||||
};
|
||||
|
@ -1234,7 +1243,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
|
|||
goto free_nvmeq;
|
||||
|
||||
nvmeq->cq_vector = 0;
|
||||
result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
|
||||
result = queue_request_irq(nvmeq);
|
||||
if (result) {
|
||||
nvmeq->cq_vector = -1;
|
||||
goto free_nvmeq;
|
||||
|
@ -1382,7 +1391,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
|
|||
{
|
||||
struct nvme_queue *adminq = dev->queues[0];
|
||||
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
||||
int result, i, vecs, nr_io_queues, size;
|
||||
int result, nr_io_queues, size;
|
||||
|
||||
nr_io_queues = num_online_cpus();
|
||||
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
|
||||
|
@ -1417,29 +1426,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
|
|||
}
|
||||
|
||||
/* Deregister the admin queue's interrupt */
|
||||
free_irq(dev->entry[0].vector, adminq);
|
||||
free_irq(pci_irq_vector(pdev, 0), adminq);
|
||||
|
||||
/*
|
||||
* If we enable msix early due to not intx, disable it again before
|
||||
* setting up the full range we need.
|
||||
*/
|
||||
if (pdev->msi_enabled)
|
||||
pci_disable_msi(pdev);
|
||||
else if (pdev->msix_enabled)
|
||||
pci_disable_msix(pdev);
|
||||
|
||||
for (i = 0; i < nr_io_queues; i++)
|
||||
dev->entry[i].entry = i;
|
||||
vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues);
|
||||
if (vecs < 0) {
|
||||
vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32));
|
||||
if (vecs < 0) {
|
||||
vecs = 1;
|
||||
} else {
|
||||
for (i = 0; i < vecs; i++)
|
||||
dev->entry[i].vector = i + pdev->irq;
|
||||
}
|
||||
}
|
||||
pci_free_irq_vectors(pdev);
|
||||
nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues,
|
||||
PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY);
|
||||
if (nr_io_queues <= 0)
|
||||
return -EIO;
|
||||
dev->max_qid = nr_io_queues;
|
||||
|
||||
/*
|
||||
* Should investigate if there's a performance win from allocating
|
||||
|
@ -1447,10 +1445,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
|
|||
* path to scale better, even if the receive path is limited by the
|
||||
* number of interrupts.
|
||||
*/
|
||||
nr_io_queues = vecs;
|
||||
dev->max_qid = nr_io_queues;
|
||||
|
||||
result = queue_request_irq(dev, adminq, adminq->irqname);
|
||||
result = queue_request_irq(adminq);
|
||||
if (result) {
|
||||
adminq->cq_vector = -1;
|
||||
goto free_queues;
|
||||
|
@ -1462,23 +1458,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
|
|||
return result;
|
||||
}
|
||||
|
||||
static void nvme_pci_post_scan(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_dev *dev = to_nvme_dev(ctrl);
|
||||
struct nvme_queue *nvmeq;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dev->online_queues; i++) {
|
||||
nvmeq = dev->queues[i];
|
||||
|
||||
if (!nvmeq->tags || !(*nvmeq->tags))
|
||||
continue;
|
||||
|
||||
irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
|
||||
blk_mq_tags_cpumask(*nvmeq->tags));
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_del_queue_end(struct request *req, int error)
|
||||
{
|
||||
struct nvme_queue *nvmeq = req->end_io_data;
|
||||
|
@ -1615,15 +1594,9 @@ static int nvme_pci_enable(struct nvme_dev *dev)
|
|||
* interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
|
||||
* adjust this later.
|
||||
*/
|
||||
if (pci_enable_msix(pdev, dev->entry, 1)) {
|
||||
pci_enable_msi(pdev);
|
||||
dev->entry[0].vector = pdev->irq;
|
||||
}
|
||||
|
||||
if (!dev->entry[0].vector) {
|
||||
result = -ENODEV;
|
||||
goto disable;
|
||||
}
|
||||
result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
|
||||
|
||||
|
@ -1665,10 +1638,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
|
|||
{
|
||||
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
||||
|
||||
if (pdev->msi_enabled)
|
||||
pci_disable_msi(pdev);
|
||||
else if (pdev->msix_enabled)
|
||||
pci_disable_msix(pdev);
|
||||
pci_free_irq_vectors(pdev);
|
||||
|
||||
if (pci_is_enabled(pdev)) {
|
||||
pci_disable_pcie_error_reporting(pdev);
|
||||
|
@ -1743,7 +1713,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
|
|||
if (dev->ctrl.admin_q)
|
||||
blk_put_queue(dev->ctrl.admin_q);
|
||||
kfree(dev->queues);
|
||||
kfree(dev->entry);
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
|
@ -1887,7 +1856,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
|
|||
.reg_read64 = nvme_pci_reg_read64,
|
||||
.reset_ctrl = nvme_pci_reset_ctrl,
|
||||
.free_ctrl = nvme_pci_free_ctrl,
|
||||
.post_scan = nvme_pci_post_scan,
|
||||
.submit_async_event = nvme_pci_submit_async_event,
|
||||
};
|
||||
|
||||
|
@ -1920,10 +1888,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry),
|
||||
GFP_KERNEL, node);
|
||||
if (!dev->entry)
|
||||
goto free;
|
||||
dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *),
|
||||
GFP_KERNEL, node);
|
||||
if (!dev->queues)
|
||||
|
@ -1964,7 +1928,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
nvme_dev_unmap(dev);
|
||||
free:
|
||||
kfree(dev->queues);
|
||||
kfree(dev->entry);
|
||||
kfree(dev);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -1480,7 +1480,6 @@ static void nvme_rdma_complete_rq(struct request *rq)
|
|||
static struct blk_mq_ops nvme_rdma_mq_ops = {
|
||||
.queue_rq = nvme_rdma_queue_rq,
|
||||
.complete = nvme_rdma_complete_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = nvme_rdma_init_request,
|
||||
.exit_request = nvme_rdma_exit_request,
|
||||
.reinit_request = nvme_rdma_reinit_request,
|
||||
|
@ -1492,7 +1491,6 @@ static struct blk_mq_ops nvme_rdma_mq_ops = {
|
|||
static struct blk_mq_ops nvme_rdma_admin_mq_ops = {
|
||||
.queue_rq = nvme_rdma_queue_rq,
|
||||
.complete = nvme_rdma_complete_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = nvme_rdma_init_admin_request,
|
||||
.exit_request = nvme_rdma_exit_admin_request,
|
||||
.reinit_request = nvme_rdma_reinit_request,
|
||||
|
|
|
@ -273,7 +273,6 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|||
static struct blk_mq_ops nvme_loop_mq_ops = {
|
||||
.queue_rq = nvme_loop_queue_rq,
|
||||
.complete = nvme_loop_complete_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = nvme_loop_init_request,
|
||||
.init_hctx = nvme_loop_init_hctx,
|
||||
.timeout = nvme_loop_timeout,
|
||||
|
@ -282,7 +281,6 @@ static struct blk_mq_ops nvme_loop_mq_ops = {
|
|||
static struct blk_mq_ops nvme_loop_admin_mq_ops = {
|
||||
.queue_rq = nvme_loop_queue_rq,
|
||||
.complete = nvme_loop_complete_rq,
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.init_request = nvme_loop_init_admin_request,
|
||||
.init_hctx = nvme_loop_init_admin_hctx,
|
||||
.timeout = nvme_loop_timeout,
|
||||
|
|
|
@ -2077,7 +2077,6 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
|
|||
}
|
||||
|
||||
static struct blk_mq_ops scsi_mq_ops = {
|
||||
.map_queue = blk_mq_map_queue,
|
||||
.queue_rq = scsi_queue_rq,
|
||||
.complete = scsi_softirq_done,
|
||||
.timeout = scsi_timeout,
|
||||
|
|
9
include/linux/blk-mq-pci.h
Normal file
9
include/linux/blk-mq-pci.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
#ifndef _LINUX_BLK_MQ_PCI_H
|
||||
#define _LINUX_BLK_MQ_PCI_H
|
||||
|
||||
struct blk_mq_tag_set;
|
||||
struct pci_dev;
|
||||
|
||||
int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev);
|
||||
|
||||
#endif /* _LINUX_BLK_MQ_PCI_H */
|
|
@ -62,6 +62,7 @@ struct blk_mq_hw_ctx {
|
|||
};
|
||||
|
||||
struct blk_mq_tag_set {
|
||||
unsigned int *mq_map;
|
||||
struct blk_mq_ops *ops;
|
||||
unsigned int nr_hw_queues;
|
||||
unsigned int queue_depth; /* max hw supported */
|
||||
|
@ -85,7 +86,6 @@ struct blk_mq_queue_data {
|
|||
};
|
||||
|
||||
typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
|
||||
typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
|
||||
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
|
||||
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
|
||||
typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
|
||||
|
@ -99,6 +99,7 @@ typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
|
|||
bool);
|
||||
typedef void (busy_tag_iter_fn)(struct request *, void *, bool);
|
||||
typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int);
|
||||
typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
|
||||
|
||||
|
||||
struct blk_mq_ops {
|
||||
|
@ -107,11 +108,6 @@ struct blk_mq_ops {
|
|||
*/
|
||||
queue_rq_fn *queue_rq;
|
||||
|
||||
/*
|
||||
* Map to specific hardware queue
|
||||
*/
|
||||
map_queue_fn *map_queue;
|
||||
|
||||
/*
|
||||
* Called on request timeout
|
||||
*/
|
||||
|
@ -144,6 +140,8 @@ struct blk_mq_ops {
|
|||
init_request_fn *init_request;
|
||||
exit_request_fn *exit_request;
|
||||
reinit_request_fn *reinit_request;
|
||||
|
||||
map_queues_fn *map_queues;
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -199,7 +197,6 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
|
|||
struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
|
||||
unsigned int flags, unsigned int hctx_idx);
|
||||
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
|
||||
struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
|
||||
|
||||
enum {
|
||||
BLK_MQ_UNIQUE_TAG_BITS = 16,
|
||||
|
@ -218,7 +215,6 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
|
|||
return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
|
||||
}
|
||||
|
||||
struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
|
||||
|
||||
int blk_mq_request_started(struct request *rq);
|
||||
void blk_mq_start_request(struct request *rq);
|
||||
|
|
Loading…
Reference in New Issue
Block a user