From f91389c8d2867d10a206273b8eb3b7955b65591e Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Thu, 8 Nov 2018 12:11:06 +0000
Subject: md: remove set but not used variable 'bi_rdev'

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/md/md.c: In function 'md_integrity_add_rdev':
drivers/md/md.c:2149:24: warning:
 variable 'bi_rdev' set but not used [-Wunused-but-set-variable]

It not used any more after commit
  1501efadc524 ("md/raid: only permit hot-add of compatible integrity profiles")

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index fc488cb30a94..69778419aa20 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2148,14 +2148,12 @@ EXPORT_SYMBOL(md_integrity_register);
  */
 int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
 {
-	struct blk_integrity *bi_rdev;
 	struct blk_integrity *bi_mddev;
 	char name[BDEVNAME_SIZE];
 
 	if (!mddev->gendisk)
 		return 0;
 
-	bi_rdev = bdev_get_integrity(rdev->bdev);
 	bi_mddev = blk_get_integrity(mddev->gendisk);
 
 	if (!bi_mddev) /* nothing to do */
-- 
cgit v1.2.3


From 37b22c28946046e587c12b2b5bdaba7062e23f75 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 14 Nov 2018 07:33:09 +0800
Subject: md: remvoe redundant condition check

mempool_destroy() can handle NULL pointer correctly,
so there is no need to check NULL pointer before calling
mempool_destroy().

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 69778419aa20..4f9b5827355e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5692,14 +5692,10 @@ int md_run(struct mddev *mddev)
 	return 0;
 
 abort:
-	if (mddev->flush_bio_pool) {
-		mempool_destroy(mddev->flush_bio_pool);
-		mddev->flush_bio_pool = NULL;
-	}
-	if (mddev->flush_pool){
-		mempool_destroy(mddev->flush_pool);
-		mddev->flush_pool = NULL;
-	}
+	mempool_destroy(mddev->flush_bio_pool);
+	mddev->flush_bio_pool = NULL;
+	mempool_destroy(mddev->flush_pool);
+	mddev->flush_pool = NULL;
 
 	return err;
 }
-- 
cgit v1.2.3


From caea3c47ad515210129f06b6bbe53f82f69efebe Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 7 Dec 2018 18:24:21 +0800
Subject: raid10: refactor common wait code from regular read/write request

Both raid10_read_request and raid10_write_request share
the same code at the beginning of them, so introduce
regular_request_wait to clean up code, and call it in
both request functions.

Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 72 +++++++++++++++++++----------------------------------
 1 file changed, 25 insertions(+), 47 deletions(-)

(limited to 'drivers')

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b98e746e7fc4..76c92e31afc0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1124,6 +1124,29 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 	kfree(plug);
 }
 
+/*
+ * 1. Register the new request and wait if the reconstruction thread has put
+ * up a bar for new requests. Continue immediately if no resync is active
+ * currently.
+ * 2. If IO spans the reshape position.  Need to wait for reshape to pass.
+ */
+static void regular_request_wait(struct mddev *mddev, struct r10conf *conf,
+				 struct bio *bio, sector_t sectors)
+{
+	wait_barrier(conf);
+	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+	    bio->bi_iter.bi_sector < conf->reshape_progress &&
+	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
+		raid10_log(conf->mddev, "wait reshape");
+		allow_barrier(conf);
+		wait_event(conf->wait_barrier,
+			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
+			   conf->reshape_progress >= bio->bi_iter.bi_sector +
+			   sectors);
+		wait_barrier(conf);
+	}
+}
+
 static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 				struct r10bio *r10_bio)
 {
@@ -1132,7 +1155,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	const int op = bio_op(bio);
 	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
 	int max_sectors;
-	sector_t sectors;
 	struct md_rdev *rdev;
 	char b[BDEVNAME_SIZE];
 	int slot = r10_bio->read_slot;
@@ -1166,30 +1188,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 		}
 		rcu_read_unlock();
 	}
-	/*
-	 * Register the new request and wait if the reconstruction
-	 * thread has put up a bar for new requests.
-	 * Continue immediately if no resync is active currently.
-	 */
-	wait_barrier(conf);
-
-	sectors = r10_bio->sectors;
-	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio->bi_iter.bi_sector < conf->reshape_progress &&
-	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
-		/*
-		 * IO spans the reshape position.  Need to wait for reshape to
-		 * pass
-		 */
-		raid10_log(conf->mddev, "wait reshape");
-		allow_barrier(conf);
-		wait_event(conf->wait_barrier,
-			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
-			   conf->reshape_progress >= bio->bi_iter.bi_sector +
-			   sectors);
-		wait_barrier(conf);
-	}
 
+	regular_request_wait(mddev, conf, bio, r10_bio->sectors);
 	rdev = read_balance(conf, r10_bio, &max_sectors);
 	if (!rdev) {
 		if (err_rdev) {
@@ -1332,30 +1332,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 		finish_wait(&conf->wait_barrier, &w);
 	}
 
-	/*
-	 * Register the new request and wait if the reconstruction
-	 * thread has put up a bar for new requests.
-	 * Continue immediately if no resync is active currently.
-	 */
-	wait_barrier(conf);
-
 	sectors = r10_bio->sectors;
-	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio->bi_iter.bi_sector < conf->reshape_progress &&
-	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
-		/*
-		 * IO spans the reshape position.  Need to wait for reshape to
-		 * pass
-		 */
-		raid10_log(conf->mddev, "wait reshape");
-		allow_barrier(conf);
-		wait_event(conf->wait_barrier,
-			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
-			   conf->reshape_progress >= bio->bi_iter.bi_sector +
-			   sectors);
-		wait_barrier(conf);
-	}
-
+	regular_request_wait(mddev, conf, bio, sectors);
 	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
 	    (mddev->reshape_backwards
 	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
-- 
cgit v1.2.3


From e820d55cb99dd93ac2dc949cf486bb187e5cd70d Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Wed, 19 Dec 2018 14:19:25 +0800
Subject: md: fix raid10 hang issue caused by barrier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When both regular IO and resync IO happen at the same time,
and if we also need to split regular. Then we can see tasks
hang due to barrier.

1. resync thread
[ 1463.757205] INFO: task md1_resync:5215 blocked for more than 480 seconds.
[ 1463.757207]       Not tainted 4.19.5-1-default #1
[ 1463.757209] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1463.757212] md1_resync      D    0  5215      2 0x80000000
[ 1463.757216] Call Trace:
[ 1463.757223]  ? __schedule+0x29a/0x880
[ 1463.757231]  ? raise_barrier+0x8d/0x140 [raid10]
[ 1463.757236]  schedule+0x78/0x110
[ 1463.757243]  raise_barrier+0x8d/0x140 [raid10]
[ 1463.757248]  ? wait_woken+0x80/0x80
[ 1463.757257]  raid10_sync_request+0x1f6/0x1e30 [raid10]
[ 1463.757265]  ? _raw_spin_unlock_irq+0x22/0x40
[ 1463.757284]  ? is_mddev_idle+0x125/0x137 [md_mod]
[ 1463.757302]  md_do_sync.cold.78+0x404/0x969 [md_mod]
[ 1463.757311]  ? wait_woken+0x80/0x80
[ 1463.757336]  ? md_rdev_init+0xb0/0xb0 [md_mod]
[ 1463.757351]  md_thread+0xe9/0x140 [md_mod]
[ 1463.757358]  ? _raw_spin_unlock_irqrestore+0x2e/0x60
[ 1463.757364]  ? __kthread_parkme+0x4c/0x70
[ 1463.757369]  kthread+0x112/0x130
[ 1463.757374]  ? kthread_create_worker_on_cpu+0x40/0x40
[ 1463.757380]  ret_from_fork+0x3a/0x50

2. regular IO
[ 1463.760679] INFO: task kworker/0:8:5367 blocked for more than 480 seconds.
[ 1463.760683]       Not tainted 4.19.5-1-default #1
[ 1463.760684] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1463.760687] kworker/0:8     D    0  5367      2 0x80000000
[ 1463.760718] Workqueue: md submit_flushes [md_mod]
[ 1463.760721] Call Trace:
[ 1463.760731]  ? __schedule+0x29a/0x880
[ 1463.760741]  ? wait_barrier+0xdd/0x170 [raid10]
[ 1463.760746]  schedule+0x78/0x110
[ 1463.760753]  wait_barrier+0xdd/0x170 [raid10]
[ 1463.760761]  ? wait_woken+0x80/0x80
[ 1463.760768]  raid10_write_request+0xf2/0x900 [raid10]
[ 1463.760774]  ? wait_woken+0x80/0x80
[ 1463.760778]  ? mempool_alloc+0x55/0x160
[ 1463.760795]  ? md_write_start+0xa9/0x270 [md_mod]
[ 1463.760801]  ? try_to_wake_up+0x44/0x470
[ 1463.760810]  raid10_make_request+0xc1/0x120 [raid10]
[ 1463.760816]  ? wait_woken+0x80/0x80
[ 1463.760831]  md_handle_request+0x121/0x190 [md_mod]
[ 1463.760851]  md_make_request+0x78/0x190 [md_mod]
[ 1463.760860]  generic_make_request+0x1c6/0x470
[ 1463.760870]  raid10_write_request+0x77a/0x900 [raid10]
[ 1463.760875]  ? wait_woken+0x80/0x80
[ 1463.760879]  ? mempool_alloc+0x55/0x160
[ 1463.760895]  ? md_write_start+0xa9/0x270 [md_mod]
[ 1463.760904]  raid10_make_request+0xc1/0x120 [raid10]
[ 1463.760910]  ? wait_woken+0x80/0x80
[ 1463.760926]  md_handle_request+0x121/0x190 [md_mod]
[ 1463.760931]  ? _raw_spin_unlock_irq+0x22/0x40
[ 1463.760936]  ? finish_task_switch+0x74/0x260
[ 1463.760954]  submit_flushes+0x21/0x40 [md_mod]

So resync io is waiting for regular write io to complete to
decrease nr_pending (conf->barrier++ is called before waiting).
The regular write io splits another bio after call wait_barrier
which call nr_pending++, then the splitted bio would continue
with raid10_write_request -> wait_barrier, so the splitted bio
has to wait for barrier to be zero, then deadlock happens as
follows.

	resync io		regular io

	raise_barrier
				wait_barrier
				generic_make_request
				wait_barrier

To resolve the issue, we need to call allow_barrier to decrease
nr_pending before generic_make_request since regular IO is not
issued to underlying devices, and wait_barrier is called again
to ensure no internal IO happening.

Fixes: fc9977dd069e ("md/raid10: simplify the splitting of requests.")
Reported-and-tested-by: Siniša Bandin <sinisa@4net.rs>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers')

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 76c92e31afc0..abb5d382f64d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1209,7 +1209,9 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 		struct bio *split = bio_split(bio, max_sectors,
 					      gfp, &conf->bio_split);
 		bio_chain(split, bio);
+		allow_barrier(conf);
 		generic_make_request(bio);
+		wait_barrier(conf);
 		bio = split;
 		r10_bio->master_bio = bio;
 		r10_bio->sectors = max_sectors;
@@ -1492,7 +1494,9 @@ retry_write:
 		struct bio *split = bio_split(bio, r10_bio->sectors,
 					      GFP_NOIO, &conf->bio_split);
 		bio_chain(split, bio);
+		allow_barrier(conf);
 		generic_make_request(bio);
+		wait_barrier(conf);
 		bio = split;
 		r10_bio->master_bio = bio;
 	}
-- 
cgit v1.2.3