From bb351abaf5cd4f9237e1b3094d9cc04853de6d95 Mon Sep 17 00:00:00 2001 From: Weiping Zhang Date: Wed, 26 Dec 2018 11:56:33 +0800 Subject: [PATCH 01/22] block: add documentation for io_timeout Add documentation for /sys/block//queue/io_timeout. Reviewed-by: Bart Van Assche Signed-off-by: Weiping Zhang Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 9 +++++++++ Documentation/block/queue-sysfs.txt | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 7710d4022b19..dfad7427817c 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -279,3 +279,12 @@ Description: size in 512B sectors of the zones of the device, with the eventual exception of the last zone of the device which may be smaller. + +What: /sys/block//queue/io_timeout +Date: November 2018 +Contact: Weiping Zhang +Description: + io_timeout is the request timeout in milliseconds. If a request + does not complete in this time then the block driver timeout + handler is invoked. That timeout handler can decide to retry + the request, to fail it or to start a device recovery strategy. diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index 39e286d7afc9..83b457e24bba 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -67,6 +67,13 @@ If set to a value larger than 0, the kernel will put the process issuing IO to sleep for this amount of microseconds before entering classic polling. +io_timeout (RW) +--------------- +io_timeout is the request timeout in milliseconds. If a request does not +complete in this time then the block driver timeout handler is invoked. +That timeout handler can decide to retry the request, to fail it or to start +a device recovery strategy. + iostats (RW) ------------- This file is used to control (on/off) the iostats accounting of the From 373282e7ab6840cd583a223fa90628f2d8293c26 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Fri, 4 Jan 2019 12:06:37 -0500 Subject: [PATCH 02/22] null_blk: add zoned config support information If the kernel is built without CONFIG_BLK_DEV_ZONED, a modprobe of the null_blk driver with zoned=1 fails with 'Invalid argument'. This can be confusing to users, prompting a search as to why the parameter is invalid. To assist in that search, add a bit more information to the failure, additionally adding to the documentation that CONFIG_BLK_DEV_ZONED is needed for zoned=1. Reviewed-by: Bart Van Assche Signed-off-by: John Pittman Added null_blk prefix to error message. Signed-off-by: Jens Axboe --- Documentation/block/null_blk.txt | 3 ++- drivers/block/null_blk.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt index ea2dafe49ae8..4cad1024fff7 100644 --- a/Documentation/block/null_blk.txt +++ b/Documentation/block/null_blk.txt @@ -88,7 +88,8 @@ shared_tags=[0/1]: Default: 0 zoned=[0/1]: Default: 0 0: Block device is exposed as a random-access block device. - 1: Block device is exposed as a host-managed zoned block device. + 1: Block device is exposed as a host-managed zoned block device. Requires + CONFIG_BLK_DEV_ZONED. zone_size=[MB]: Default: 256 Per zone size when exposed as a zoned block device. Must be a power of two. diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index b3df2793e7cd..34b22d6523ba 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -97,6 +97,7 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); #else static inline int null_zone_init(struct nullb_device *dev) { + pr_err("null_blk: CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; } static inline void null_zone_exit(struct nullb_device *dev) {} From 40405851af73c59678ffd8f490e6b288c7fbaf29 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 8 Jan 2019 16:57:34 -0500 Subject: [PATCH 03/22] block: clarify documentation for blk_{start|finish}_plug There was some confusion about what these functions did. Make it clear that this is a hint for upper layers to pass to the block layer, and that it does not guarantee that I/O will not be submitted between a start and finish plug. Reported-by: "Darrick J. Wong" Reviewed-by: Darrick J. Wong Reviewed-by: Ming Lei Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/blk-core.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index c78042975737..f2732f106a2e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1683,6 +1683,15 @@ EXPORT_SYMBOL(kblockd_mod_delayed_work_on); * @plug: The &struct blk_plug that needs to be initialized * * Description: + * blk_start_plug() indicates to the block layer an intent by the caller + * to submit multiple I/O requests in a batch. The block layer may use + * this hint to defer submitting I/Os from the caller until blk_finish_plug() + * is called. However, the block layer may choose to submit requests + * before a call to blk_finish_plug() if the number of queued I/Os + * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than + * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if + * the task schedules (see below). + * * Tracking blk_plug inside the task_struct will help with auto-flushing the * pending I/O should the task end up blocking between blk_start_plug() and * blk_finish_plug(). This is important from a performance perspective, but @@ -1765,6 +1774,16 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) blk_mq_flush_plug_list(plug, from_schedule); } +/** + * blk_finish_plug - mark the end of a batch of submitted I/O + * @plug: The &struct blk_plug passed to blk_start_plug() + * + * Description: + * Indicate that a batch of I/O submissions is complete. This function + * must be paired with an initial call to blk_start_plug(). The intent + * is to allow the block layer to optimize I/O submission. See the + * documentation for blk_start_plug() for more information. + */ void blk_finish_plug(struct blk_plug *plug) { if (plug != current->plug) From 47cb393ee4815e10ab66f981fed581afdcc7caac Mon Sep 17 00:00:00 2001 From: John Pittman Date: Tue, 8 Jan 2019 16:56:13 -0500 Subject: [PATCH 04/22] block: doc: add slice_idle_us to bfq documentation Of the tunables available for the bfq I/O scheduler, the only one missing from the documentation in 'Documentation/block/bfq-iosched.txt' is slice_idle_us. Add this tunable to the documentation and a short explanation of its purpose. Acked-by: Paolo Valente Signed-off-by: John Pittman Signed-off-by: Jens Axboe --- Documentation/block/bfq-iosched.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt index 8d8d8f06cab2..98a8dd5ee385 100644 --- a/Documentation/block/bfq-iosched.txt +++ b/Documentation/block/bfq-iosched.txt @@ -357,6 +357,13 @@ video playing/streaming, a very low drop rate may be more important than maximum throughput. In these cases, consider setting the strict_guarantees parameter. +slice_idle_us +------------- + +Controls the same tuning parameter as slice_idle, but in microseconds. +Either tunable can be used to set idling behavior. Afterwards, the +other tunable will reflect the newly set value in sysfs. + strict_guarantees ----------------- From c61e678f30da733a1b7fdd5983d0770de2e6009c Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Mon, 24 Dec 2018 11:15:53 +0800 Subject: [PATCH 05/22] nvme-pci: fix the wrong setting of nr_maps We only set the nr_maps to 3 if poll queues are supported. Signed-off-by: Jianchao Wang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5a0bf6a24d50..cc65fa8a537b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2294,7 +2294,6 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.nr_maps = 2; /* default + read */ if (dev->io_queues[HCTX_TYPE_POLL]) dev->tagset.nr_maps++; - dev->tagset.nr_maps = HCTX_MAX_TYPES; dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = From cc667f6d5de023ee131e96bb88e5cddca23272bd Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Sat, 29 Dec 2018 17:23:43 +0000 Subject: [PATCH 06/22] nvme-pci: use the same attributes when freeing host_mem_desc_bufs. When using HMB the PCIe host driver allocates host_mem_desc_bufs using dma_alloc_attrs() but frees them using dma_free_coherent(). Use the correct dma_free_attrs() function to free the buffers. Signed-off-by: Liviu Dudau Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cc65fa8a537b..efe46f518022 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1885,8 +1885,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev) struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i], - le64_to_cpu(desc->addr)); + dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i], + le64_to_cpu(desc->addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(dev->host_mem_desc_bufs); @@ -1952,8 +1953,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, while (--i >= 0) { size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr)); + dma_free_attrs(dev->dev, size, bufs[i], + le64_to_cpu(descs[i].addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(bufs); From 8fae268b40f5191227ae7050a99cb2cf1b914ddd Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 4 Jan 2019 15:04:33 -0700 Subject: [PATCH 07/22] nvme-pci: rerun irq setup on IO queue init errors If the driver is unable to create a subset of IO queues for any reason, the read/write and polled queue sets will not match the actual allocated hardware contexts. This leaves gaps in the CPU affinity mappings and causes the following kernel panic after blk_mq_map_queue_type() returns a NULL hctx. BUG: unable to handle kernel NULL pointer dereference at 0000000000000198 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 64 PID: 1171 Comm: kworker/u259:1 Not tainted 4.20.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 Workqueue: nvme-wq nvme_scan_work [nvme_core] RIP: 0010:blk_mq_init_allocated_queue+0x2d9/0x440 RSP: 0018:ffffb1bf0abc3cd0 EFLAGS: 00010286 RAX: 000000000000001f RBX: ffff8ea744cf0718 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 000000000000007c RDI: ffffffff9109a820 RBP: ffff8ea7565f7008 R08: 000000000000001f R09: 000000000000003f R10: ffffb1bf0abc3c00 R11: 0000000000000000 R12: 000000000001d008 R13: ffff8ea7565f7008 R14: 000000000000003f R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8ea757200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000198 CR3: 0000000013058000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: blk_mq_init_queue+0x35/0x60 nvme_validate_ns+0xc6/0x7c0 [nvme_core] ? nvme_identify_ctrl.isra.56+0x7e/0xc0 [nvme_core] nvme_scan_work+0xc8/0x340 [nvme_core] ? __wake_up_common+0x6d/0x120 ? try_to_wake_up+0x55/0x410 process_one_work+0x1e9/0x3d0 worker_thread+0x2d/0x3d0 ? process_one_work+0x3d0/0x3d0 kthread+0x111/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 Modules linked in: nvme nvme_core serio_raw CR2: 0000000000000198 Fix by re-running the interrupt vector setup from scratch using a reduced count that may be successful until the created queues matches the irq affinity plus polling queue sets. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 50 +++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index efe46f518022..f891eb57f263 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -95,6 +95,7 @@ struct nvme_dev; struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -1420,6 +1421,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) return 0; } +static void nvme_suspend_io_queues(struct nvme_dev *dev) +{ + int i; + + for (i = dev->ctrl.queue_count - 1; i > 0; i--) + nvme_suspend_queue(&dev->queues[i]); +} + static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; @@ -2134,6 +2143,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) return result; } +static void nvme_disable_io_queues(struct nvme_dev *dev) +{ + if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq)) + __nvme_disable_io_queues(dev, nvme_admin_delete_cq); +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; @@ -2170,6 +2185,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } while (1); adminq->q_db = dev->dbs; + retry: /* Deregister the admin queue's interrupt */ pci_free_irq(pdev, 0, adminq); @@ -2187,25 +2203,34 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = max(result - 1, 1); dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; - dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", - dev->io_queues[HCTX_TYPE_DEFAULT], - dev->io_queues[HCTX_TYPE_READ], - dev->io_queues[HCTX_TYPE_POLL]); - /* * Should investigate if there's a performance win from allocating * more queues than interrupt vectors; it might allow the submission * path to scale better, even if the receive path is limited by the * number of interrupts. */ - result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; return result; } set_bit(NVMEQ_ENABLED, &adminq->flags); - return nvme_create_io_queues(dev); + + result = nvme_create_io_queues(dev); + if (result || dev->online_queues < 2) + return result; + + if (dev->online_queues - 1 < dev->max_qid) { + nr_io_queues = dev->online_queues - 1; + nvme_disable_io_queues(dev); + nvme_suspend_io_queues(dev); + goto retry; + } + dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", + dev->io_queues[HCTX_TYPE_DEFAULT], + dev->io_queues[HCTX_TYPE_READ], + dev->io_queues[HCTX_TYPE_POLL]); + return 0; } static void nvme_del_queue_end(struct request *req, blk_status_t error) @@ -2250,7 +2275,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) { int nr_queues = dev->online_queues - 1, sent = 0; unsigned long timeout; @@ -2411,7 +2436,6 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i; bool dead = true; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -2438,13 +2462,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - if (nvme_disable_io_queues(dev, nvme_admin_delete_sq)) - nvme_disable_io_queues(dev, nvme_admin_delete_cq); + nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } - for (i = dev->ctrl.queue_count - 1; i >= 0; i--) - nvme_suspend_queue(&dev->queues[i]); - + nvme_suspend_io_queues(dev); + nvme_suspend_queue(&dev->queues[0]); nvme_pci_disable(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); From dcca1662727220d18fa351097ddff33f95f516c5 Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Mon, 7 Jan 2019 10:22:07 +0800 Subject: [PATCH 08/22] nvme-pci: fix out of bounds access in nvme_cqe_pending There is an out of bounds array access in nvme_cqe_peding(). When enable irq_thread for nvme interrupt, there is racing between the nvmeq->cq_head updating and reading. nvmeq->cq_head is updated in nvme_update_cq_head(), if nvmeq->cq_head equals nvmeq->q_depth and before its value set to zero, nvme_cqe_pending() uses its value as an array index, the index will be out of bounds. Signed-off-by: Hongbo Yao [hch: slight coding style update] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f891eb57f263..3f53a2c3042d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1020,9 +1020,11 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - if (++nvmeq->cq_head == nvmeq->q_depth) { + if (nvmeq->cq_head == nvmeq->q_depth - 1) { nvmeq->cq_head = 0; nvmeq->cq_phase = !nvmeq->cq_phase; + } else { + nvmeq->cq_head++; } } From e9c2edc098921173920df370c69b5c38fe52df56 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 31 Dec 2018 23:58:29 -0800 Subject: [PATCH 09/22] nvme-tcp: remove dead code We should never touch the opal device from the transport driver. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index de174912445e..7210b2d6df13 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1620,7 +1620,6 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_queue(ctrl, 0); if (remove) { - free_opal_dev(ctrl->opal_dev); blk_cleanup_queue(ctrl->admin_q); blk_mq_free_tag_set(ctrl->admin_tagset); } From e85037a2e90ac9aa448a08927d7a7436206c6000 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 31 Dec 2018 23:58:30 -0800 Subject: [PATCH 10/22] nvme-tcp: don't ask if controller is fabrics For sure we are a fabric driver. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 7210b2d6df13..265a0543b381 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1565,8 +1565,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_io_queues(ctrl); if (remove) { - if (ctrl->ops->flags & NVME_F_FABRICS) - blk_cleanup_queue(ctrl->connect_q); + blk_cleanup_queue(ctrl->connect_q); blk_mq_free_tag_set(ctrl->tagset); } nvme_tcp_free_io_queues(ctrl); @@ -1587,12 +1586,10 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_free_io_queues; } - if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) { - ret = PTR_ERR(ctrl->connect_q); - goto out_free_tag_set; - } + ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); + if (IS_ERR(ctrl->connect_q)) { + ret = PTR_ERR(ctrl->connect_q); + goto out_free_tag_set; } } else { blk_mq_update_nr_hw_queues(ctrl->tagset, @@ -1606,7 +1603,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return 0; out_cleanup_connect_q: - if (new && (ctrl->ops->flags & NVME_F_FABRICS)) + if (new) blk_cleanup_queue(ctrl->connect_q); out_free_tag_set: if (new) From 9846ac0143fe9872e92fe2a1ddff868ad05bdbb6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 7 Jan 2019 23:54:23 -0800 Subject: [PATCH 11/22] nvme-fabrics: unset write/poll queues for discovery controllers Even if user-space sent it to us, it got it wrong so lets help by disallowing it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index b2ab213f43de..3eb908c50e1a 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -874,6 +874,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, if (opts->discovery_nqn) { opts->kato = 0; opts->nr_io_queues = 0; + opts->nr_write_queues = 0; + opts->nr_poll_queues = 0; opts->duplicate_connect = true; } if (ctrl_loss_tmo < 0) From c7055fd15ff46d92eb0dd1c16a4fe010d58224c8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 8 Jan 2019 12:46:58 +0100 Subject: [PATCH 12/22] nvme-multipath: zero out ANA log buffer When nvme_init_identify() fails the ANA log buffer is deallocated but _not_ set to NULL. This can cause double free oops when this controller is deleted without ever being reconnected. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 183ec17ba067..df4b3a6db51b 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -570,6 +570,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) return 0; out_free_ana_log_buf: kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = NULL; out: return error; } @@ -577,5 +578,6 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = NULL; } From 3da584f57133e51aeb84aaefae5e3d69531a1e4f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 8 Jan 2019 09:37:43 -0700 Subject: [PATCH 13/22] nvme: pad fake subsys NQN vid and ssvid with zeros We need to preserve the leading zeros in the vid and ssvid when generating a unique NQN. Truncating these may lead to naming collisions. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 08f2c92602f4..f81fde164d37 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2184,7 +2184,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, - "nqn.2014.08.org.nvmexpress:%4x%4x", + "nqn.2014.08.org.nvmexpress:%04x%04x", le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn)); off += sizeof(id->sn); From 6299358d198a0635da2dd3c4b3ec37789e811e44 Mon Sep 17 00:00:00 2001 From: James Dingwall Date: Tue, 8 Jan 2019 10:20:51 -0700 Subject: [PATCH 14/22] nvme: introduce NVME_QUIRK_IGNORE_DEV_SUBNQN If a device provides an NQN it is expected to be globally unique. Unfortunately some firmware revisions for Intel 760p/Pro 7600p devices did not satisfy this requirement. In these circumstances if a system has >1 affected device then only one device is enabled. If this quirk is enabled then the device supplied subnqn is ignored and we fallback to generating one as if the field was empty. In this case we also suppress the version check so we don't print a warning when the quirk is enabled. Reviewed-by: Keith Busch Signed-off-by: James Dingwall Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 16 +++++++++------- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 2 ++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f81fde164d37..8485be6bb895 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2173,14 +2173,16 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct size_t nqnlen; int off; - nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); - if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); - return; - } + if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) { + nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); + if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { + strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + return; + } - if (ctrl->vs >= NVME_VS(1, 2, 1)) - dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); + if (ctrl->vs >= NVME_VS(1, 2, 1)) + dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); + } /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2b36ac922596..ab961bdeea89 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -90,6 +90,11 @@ enum nvme_quirks { * Set MEDIUM priority on SQ creation */ NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), + + /* + * Ignore device provided subnqn. + */ + NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3f53a2c3042d..fc9d17c317b8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2971,6 +2971,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_MEDIUM_PRIO_SQ }, + { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ From b8a38ea64dc714a64f8fb76e311a4f15a3f67861 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 7 Jan 2019 19:08:49 -0800 Subject: [PATCH 15/22] nvme: don't initlialize ctrl->cntlid twice ctrl->cntlid will already be initialized from id->cntlid for non-NVME_F_FABRICS controllers few lines below. For NVME_F_FABRICS controllers this field should already be initialized, otherwise the check if (ctrl->cntlid != le16_to_cpu(id->cntlid)) below will always be a no-op. Signed-off-by: Andrey Smirnov Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8485be6bb895..150e49723c15 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2502,7 +2502,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; - ctrl->cntlid = le16_to_cpup(&id->cntlid); if (id->mdts) max_hw_sectors = 1 << (id->mdts + page_shift - 9); else From 649d4968860ba708636ad643bd52b28027367042 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 9 Jan 2019 13:59:32 -0700 Subject: [PATCH 16/22] block: fix kerneldoc comment for blk_attempt_plug_merge() Commit 5f0ed774ed29 ("block: sum requests in the plug structure") removed the request_count parameter from block_attempt_plug_merge(), but did not remove the associated kerneldoc comment, introducing this warning to the docs build: ./block/blk-core.c:685: warning: Excess function parameter 'request_count' description in 'blk_attempt_plug_merge' Remove the obsolete description and make things a little quieter. Signed-off-by: Jonathan Corbet Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index f2732f106a2e..3c5f61ceeb67 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -661,7 +661,6 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, * blk_attempt_plug_merge - try to merge with %current's plugged list * @q: request_queue new bio is being queued at * @bio: new bio being queued - * @request_count: out parameter for number of traversed plugged requests * @same_queue_rq: pointer to &struct request that gets filled in when * another request associated with @q is found on the plug list * (optional, may be %NULL) From 5db470e229e22b7eda6e23b5566e532c96fb5bc3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 9 Jan 2019 19:17:14 -0800 Subject: [PATCH 17/22] loop: drop caches if offset or block_size are changed If we don't drop caches used in old offset or block_size, we can get old data from new offset/block_size, which gives unexpected data to user. For example, Martijn found a loopback bug in the below scenario. 1) LOOP_SET_FD loads first two pages on loop file 2) LOOP_SET_STATUS64 changes the offset on the loop file 3) mount is failed due to the cached pages having wrong superblock Cc: Jens Axboe Cc: linux-block@vger.kernel.org Reported-by: Martijn Coenen Reviewed-by: Bart Van Assche Signed-off-by: Jaegeuk Kim Signed-off-by: Jens Axboe --- drivers/block/loop.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b8a0720d3653..cf5538942834 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1190,6 +1190,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unlock; } + if (lo->lo_offset != info->lo_offset || + lo->lo_sizelimit != info->lo_sizelimit) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1218,6 +1224,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { + /* kill_bdev should have truncated all the pages */ + if (lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1443,22 +1457,39 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + int err = 0; + if (lo->lo_state != Lo_bound) return -ENXIO; if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; + if (lo->lo_queue->limits.logical_block_size != arg) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + blk_mq_freeze_queue(lo->lo_queue); + /* kill_bdev should have truncated all the pages */ + if (lo->lo_queue->limits.logical_block_size != arg && + lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); - +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - return 0; + return err; } static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, From 49e54187ae0b2f9b5c0760e568a103baf4481610 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:25 +0100 Subject: [PATCH 18/22] ata: libahci_platform: comply to PHY framework Current implementation of the libahci does not take into account the new PHY framework. Correct the situation by adding a call to phy_set_mode() before phy_power_on(). PHYs should also be handled at suspend/resume time. For this, call ahci_platform_enable/disable_phys() at suspend/resume_host() time. These calls are guarded by a HFLAG (AHCI_HFLAG_SUSPEND_PHYS) that the user of the libahci driver must set manually in hpriv->flags at probe time. This is to avoid breaking users that have not been tested with this change. Reviewed-by: Hans de Goede Suggested-by: Grzegorz Jaszczyk Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci.h | 2 ++ drivers/ata/libahci_platform.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index ef356e70e6de..8810475f307a 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -254,6 +254,8 @@ enum { AHCI_HFLAG_IS_MOBILE = (1 << 25), /* mobile chipset, use SATA_MOBILE_LPM_POLICY as default lpm_policy */ + AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during + suspend/resume */ /* ap->flags bits */ diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 4b900fc659f7..81b1a3332ed6 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -56,6 +56,12 @@ static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) if (rc) goto disable_phys; + rc = phy_set_mode(hpriv->phys[i], PHY_MODE_SATA); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + rc = phy_power_on(hpriv->phys[i]); if (rc) { phy_exit(hpriv->phys[i]); @@ -738,6 +744,9 @@ int ahci_platform_suspend_host(struct device *dev) writel(ctl, mmio + HOST_CTL); readl(mmio + HOST_CTL); /* flush */ + if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS) + ahci_platform_disable_phys(hpriv); + return ata_host_suspend(host, PMSG_SUSPEND); } EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); @@ -756,6 +765,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); int ahci_platform_resume_host(struct device *dev) { struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; int rc; if (dev->power.power_state.event == PM_EVENT_SUSPEND) { @@ -766,6 +776,9 @@ int ahci_platform_resume_host(struct device *dev) ahci_init_controller(host); } + if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS) + ahci_platform_enable_phys(hpriv); + ata_host_resume(host); return 0; From c9bc136791ba0eefe07ed57d3850b8c5cee6471b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:26 +0100 Subject: [PATCH 19/22] ata: ahci: mvebu: remove stale comment For Armada-38x (32-bit) SoCs, PM platform support has been added since: commit 32f9494c9dfd ("ARM: mvebu: prepare pm-board.c for the introduction of Armada 38x support") commit 3cbd6a6ca81c ("ARM: mvebu: Add standby support") For Armada 64-bit SoCs, like the A3700 also using this AHCI driver, PM platform support has always existed. There are even suspend/resume hooks in this driver since: commit d6ecf15814888 ("ata: ahci_mvebu: add suspend/resume support") Remove the stale comment at the end of this driver stating that all the above does not exist yet. Fixes: d6ecf15814888 ("ata: ahci_mvebu: add suspend/resume support") Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index f9cb51be38eb..128d6f22926d 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -197,11 +197,6 @@ static const struct of_device_id ahci_mvebu_of_match[] = { }; MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match); -/* - * We currently don't provide power management related operations, - * since there is no suspend/resume support at the platform level for - * Armada 38x for the moment. - */ static struct platform_driver ahci_mvebu_driver = { .probe = ahci_mvebu_probe, .remove = ata_platform_remove_one, From 96dbcb40e4b1a387cdb9b21f43638c759aebb5a4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:27 +0100 Subject: [PATCH 20/22] ata: ahci: mvebu: do Armada 38x configuration only on relevant SoCs At the beginning, only Armada 38x SoCs where supported by the ahci_mvebu.c driver. Commit 15d3ce7b63bd ("ata: ahci_mvebu: add support for Armada 3700 variant") introduced Armada 3700 support. As opposed to Armada 38x SoCs, the 3700 variants do not have to configure mbus and the regret option. This patch took care of avoiding such configuration when not needed in the probe function, but failed to do the same in the resume path. While doing so looks harmless by experience, let's clean the driver logic and avoid doing this useless configuration with Armada 3700 SoCs. Because the logic is very similar between these two places, it has been decided to factorize this code and put it in a "Armada 38x configuration function". This function is part of a new (per-compatible) platform data structure, so that the addition of such configuration function for Armada 3700 will be eased. Fixes: 15d3ce7b63bd ("ata: ahci_mvebu: add support for Armada 3700 variant") Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 68 ++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index 128d6f22926d..7839a5df1fd2 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -28,6 +28,10 @@ #define AHCI_WINDOW_BASE(win) (0x64 + ((win) << 4)) #define AHCI_WINDOW_SIZE(win) (0x68 + ((win) << 4)) +struct ahci_mvebu_plat_data { + int (*plat_config)(struct ahci_host_priv *hpriv); +}; + static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv, const struct mbus_dram_target_info *dram) { @@ -62,6 +66,22 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv) writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); } +static int ahci_mvebu_armada_380_config(struct ahci_host_priv *hpriv) +{ + const struct mbus_dram_target_info *dram; + int rc = 0; + + dram = mv_mbus_dram_info(); + if (dram) + ahci_mvebu_mbus_config(hpriv, dram); + else + rc = -ENODEV; + + ahci_mvebu_regret_option(hpriv); + + return rc; +} + /** * ahci_mvebu_stop_engine * @@ -126,13 +146,10 @@ static int ahci_mvebu_resume(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); struct ahci_host_priv *hpriv = host->private_data; - const struct mbus_dram_target_info *dram; + const struct ahci_mvebu_plat_data *pdata = hpriv->plat_data; - dram = mv_mbus_dram_info(); - if (dram) - ahci_mvebu_mbus_config(hpriv, dram); - - ahci_mvebu_regret_option(hpriv); + if (pdata->plat_config) + pdata->plat_config(hpriv); return ahci_platform_resume_host(&pdev->dev); } @@ -154,28 +171,31 @@ static struct scsi_host_template ahci_platform_sht = { static int ahci_mvebu_probe(struct platform_device *pdev) { + const struct ahci_mvebu_plat_data *pdata; struct ahci_host_priv *hpriv; - const struct mbus_dram_target_info *dram; int rc; + pdata = of_device_get_match_data(&pdev->dev); + if (!pdata) + return -EINVAL; + hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); + hpriv->plat_data = (void *)pdata; + rc = ahci_platform_enable_resources(hpriv); if (rc) return rc; hpriv->stop_engine = ahci_mvebu_stop_engine; - if (of_device_is_compatible(pdev->dev.of_node, - "marvell,armada-380-ahci")) { - dram = mv_mbus_dram_info(); - if (!dram) - return -ENODEV; - - ahci_mvebu_mbus_config(hpriv, dram); - ahci_mvebu_regret_option(hpriv); + pdata = hpriv->plat_data; + if (pdata->plat_config) { + rc = pdata->plat_config(hpriv); + if (rc) + goto disable_resources; } rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, @@ -190,9 +210,23 @@ static int ahci_mvebu_probe(struct platform_device *pdev) return rc; } +static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { + .plat_config = ahci_mvebu_armada_380_config, +}; + +static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { + .plat_config = NULL, +}; + static const struct of_device_id ahci_mvebu_of_match[] = { - { .compatible = "marvell,armada-380-ahci", }, - { .compatible = "marvell,armada-3700-ahci", }, + { + .compatible = "marvell,armada-380-ahci", + .data = &ahci_mvebu_armada_380_plat_data, + }, + { + .compatible = "marvell,armada-3700-ahci", + .data = &ahci_mvebu_armada_3700_plat_data, + }, { }, }; MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match); From 2f558bc3f33ca344489cec2218545741028b6a70 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:28 +0100 Subject: [PATCH 21/22] ata: ahci: mvebu: add Armada 3700 initialization needed for S2RAM A3700 comphy initialization is done in the firmware (TF-A). Looking at the SATA PHY initialization routine, there is a comment about "vendor specific" registers. Two registers are mentioned. They are not initialized there in the firmware because they are AHCI related, while the firmware at this location does only PHY configuration. The solution to avoid doing such initialization is relying on U-Boot. While this work at boot time, U-Boot is definitely not going to run during a resume after suspending to RAM. Two possible solutions were considered: * Fixing the firmware. * Fixing the kernel driver. The first solution would take ages to propagate, while the second solution is easy to implement as the driver as been a little bit reworked to prepare for such platform configuration. Hence, this patch adds an Armada 3700 configuration function to set these two registers both at boot time (in the probe) and after a suspend (in the resume path). Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index 7839a5df1fd2..bbab688d3c34 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -82,6 +82,19 @@ static int ahci_mvebu_armada_380_config(struct ahci_host_priv *hpriv) return rc; } +static int ahci_mvebu_armada_3700_config(struct ahci_host_priv *hpriv) +{ + u32 reg; + + writel(0, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_ADDR); + + reg = readl(hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); + reg |= BIT(6); + writel(reg, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); + + return 0; +} + /** * ahci_mvebu_stop_engine * @@ -148,8 +161,7 @@ static int ahci_mvebu_resume(struct platform_device *pdev) struct ahci_host_priv *hpriv = host->private_data; const struct ahci_mvebu_plat_data *pdata = hpriv->plat_data; - if (pdata->plat_config) - pdata->plat_config(hpriv); + pdata->plat_config(hpriv); return ahci_platform_resume_host(&pdev->dev); } @@ -191,12 +203,9 @@ static int ahci_mvebu_probe(struct platform_device *pdev) hpriv->stop_engine = ahci_mvebu_stop_engine; - pdata = hpriv->plat_data; - if (pdata->plat_config) { - rc = pdata->plat_config(hpriv); - if (rc) - goto disable_resources; - } + rc = pdata->plat_config(hpriv); + if (rc) + goto disable_resources; rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, &ahci_platform_sht); @@ -215,7 +224,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { }; static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { - .plat_config = NULL, + .plat_config = ahci_mvebu_armada_3700_config, }; static const struct of_device_id ahci_mvebu_of_match[] = { From bde0b5c109e8b22b57745e3b9914f9e87ad857ea Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:29 +0100 Subject: [PATCH 22/22] ata: ahci: mvebu: request PHY suspend/resume for Armada 3700 A feature has been added in the libahci driver: the possibility to set a new flag in hpriv->flags to let the core handle PHY suspend/resume automatically. Make use of this feature to make suspend to RAM work with SATA drives on A3700. Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index bbab688d3c34..d4bba3ace45d 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -30,6 +30,7 @@ struct ahci_mvebu_plat_data { int (*plat_config)(struct ahci_host_priv *hpriv); + unsigned int flags; }; static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv, @@ -195,6 +196,7 @@ static int ahci_mvebu_probe(struct platform_device *pdev) if (IS_ERR(hpriv)) return PTR_ERR(hpriv); + hpriv->flags |= pdata->flags; hpriv->plat_data = (void *)pdata; rc = ahci_platform_enable_resources(hpriv); @@ -225,6 +227,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { .plat_config = ahci_mvebu_armada_3700_config, + .flags = AHCI_HFLAG_SUSPEND_PHYS, }; static const struct of_device_id ahci_mvebu_of_match[] = {