From 9bf2b972afeaffd173fe2ce211ebc555ea7e8a87 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 8 Apr 2016 16:11:02 -0600 Subject: NVMe: Fix reset/remove race This fixes a scenario where device is present and being reset, but a request to unbind the driver occurs. A previous patch series addressing a device failure removal scenario flushed reset_work after controller disable to unblock reset_work waiting on a completion that wouldn't occur. This isn't safe as-is. The broken scenario can potentially be induced with: modprobe nvme && modprobe -r nvme To fix, the reset work is flushed immediately after setting the controller removing flag, and any subsequent reset will not proceed with controller initialization if the flag is set. The controller status must be polled while active, so the watchdog timer is also left active until the controller is disabled to cleanup requests that may be stuck during namespace removal. [Fixes: ff23a2a15a2117245b4599c1352343c8b8fb4c43] Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 24ccda303efb..660ec84bc40f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1859,6 +1859,9 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); + if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) + goto out; + set_bit(NVME_CTRL_RESETTING, &dev->flags); result = nvme_pci_enable(dev); @@ -2078,11 +2081,10 @@ static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - del_timer_sync(&dev->watchdog_timer); - set_bit(NVME_CTRL_REMOVING, &dev->flags); pci_set_drvdata(pdev, NULL); flush_work(&dev->async_work); + flush_work(&dev->reset_work); flush_work(&dev->scan_work); nvme_remove_namespaces(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl); -- cgit v1.2.3 From a5229050b69cfffb690b546c357ca5a60434c0c8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 8 Apr 2016 16:09:10 -0600 Subject: NVMe: Always use MSI/MSI-x interrupts Multiple users have reported device initialization failure due the driver not receiving legacy PCI interrupts. This is not unique to any particular controller, but has been observed on multiple platforms. There have been no issues reported or observed when with message signaled interrupts, so this patch attempts to use MSI-x during initialization, falling back to MSI. If that fails, legacy would become the default. The setup_io_queues error handling had to change as a result: the admin queue's msix_entry used to be initialized to the legacy IRQ. The case where nr_io_queues is 0 would fail request_irq when setting up the admin queue's interrupt since re-enabling MSI-x fails with 0 vectors, leaving the admin queue's msix_entry invalid. Instead, return success immediately. Reported-by: Tim Muhlemmer Reported-by: Jon Derrick Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 660ec84bc40f..4fd733ff72b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1478,8 +1478,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (result > 0) { dev_err(dev->ctrl.device, "Could not set queue count (%d)\n", result); - nr_io_queues = 0; - result = 0; + return 0; } if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) { @@ -1513,7 +1512,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - if (!pdev->irq) + if (pdev->msi_enabled) + pci_disable_msi(pdev); + else if (pdev->msix_enabled) pci_disable_msix(pdev); for (i = 0; i < nr_io_queues; i++) @@ -1696,7 +1697,6 @@ static int nvme_pci_enable(struct nvme_dev *dev) if (pci_enable_device_mem(pdev)) return result; - dev->entry[0].vector = pdev->irq; pci_set_master(pdev); if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && @@ -1709,13 +1709,18 @@ static int nvme_pci_enable(struct nvme_dev *dev) } /* - * Some devices don't advertse INTx interrupts, pre-enable a single - * MSIX vec for setup. We'll adjust this later. + * Some devices and/or platforms don't advertise or work with INTx + * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll + * adjust this later. */ - if (!pdev->irq) { - result = pci_enable_msix(pdev, dev->entry, 1); - if (result < 0) - goto disable; + if (pci_enable_msix(pdev, dev->entry, 1)) { + pci_enable_msi(pdev); + dev->entry[0].vector = pdev->irq; + } + + if (!dev->entry[0].vector) { + result = -ENODEV; + goto disable; } cap = lo_hi_readq(dev->bar + NVME_REG_CAP); -- cgit v1.2.3 From a7297a6a3a3322b054592e8e988981d2f5f29cc4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Apr 2016 18:51:28 +0800 Subject: block: loop: fix filesystem corruption in case of aio/dio Starting from commit e36f620428(block: split bios to max possible length), block core starts to split bio in the middle of bvec. Unfortunately loop dio/aio doesn't consider this situation, and always treat 'iter.iov_offset' as zero. Then filesystem corruption is observed. This patch figures out the offset of the base bvevc via 'bio->bi_iter.bi_bvec_done' and fixes the issue by passing the offset to iov iterator. Fixes: e36f6204288088f (block: split bios to max possible length) Cc: Keith Busch Cc: Al Viro Cc: stable@vger.kernel.org (4.5) Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/loop.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 423f4ca7d712..80cf8add46ff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -488,6 +488,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, bio_segments(bio), blk_rq_bytes(cmd->rq)); + /* + * This bio may be started from the middle of the 'bvec' + * because of bio splitting, so offset from the bvec must + * be passed to iov iterator + */ + iter.iov_offset = bio->bi_iter.bi_bvec_done; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; -- cgit v1.2.3