aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig1
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/btree.c27
-rw-r--r--drivers/md/bcache/journal.c20
-rw-r--r--drivers/md/bcache/movinggc.c16
-rw-r--r--drivers/md/bcache/request.c74
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c44
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/writeback.c40
-rw-r--r--drivers/md/dm-bufio.c87
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-delay.c112
-rw-r--r--drivers/md/dm-flakey.c2
-rw-r--r--drivers/md/dm-integrity.c11
-rw-r--r--drivers/md/dm-raid.c3
-rw-r--r--drivers/md/dm-verity-fec.c7
-rw-r--r--drivers/md/dm-verity-target.c30
-rw-r--r--drivers/md/dm-verity.h8
-rw-r--r--drivers/md/md.c158
-rw-r--r--drivers/md/raid5.c4
21 files changed, 361 insertions, 290 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2a8b081bce7d..3ff87cb4dc49 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -660,6 +660,7 @@ config DM_ZONED
config DM_AUDIT
bool "DM audit events"
+ depends on BLK_DEV_DM
depends on AUDIT
help
Generate audit events for device-mapper.
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 05be59ae21b2..6ae2329052c9 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -265,6 +265,7 @@ struct bcache_device {
#define BCACHE_DEV_WB_RUNNING 3
#define BCACHE_DEV_RATE_DW_RUNNING 4
int nr_stripes;
+#define BCH_MIN_STRIPE_SZ ((4 << 20) >> SECTOR_SHIFT)
unsigned int stripe_size;
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index ae5cbb55861f..196cdacce38f 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -293,16 +293,16 @@ static void btree_complete_write(struct btree *b, struct btree_write *w)
w->journal = NULL;
}
-static void btree_node_write_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_write_unlock)
{
- struct btree *b = container_of(cl, struct btree, io);
+ closure_type(b, struct btree, io);
up(&b->io_mutex);
}
-static void __btree_node_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(__btree_node_write_done)
{
- struct btree *b = container_of(cl, struct btree, io);
+ closure_type(b, struct btree, io);
struct btree_write *w = btree_prev_write(b);
bch_bbio_free(b->bio, b->c);
@@ -315,12 +315,12 @@ static void __btree_node_write_done(struct closure *cl)
closure_return_with_destructor(cl, btree_node_write_unlock);
}
-static void btree_node_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(btree_node_write_done)
{
- struct btree *b = container_of(cl, struct btree, io);
+ closure_type(b, struct btree, io);
bio_free_pages(b->bio);
- __btree_node_write_done(cl);
+ __btree_node_write_done(&cl->work);
}
static void btree_node_write_endio(struct bio *bio)
@@ -1000,6 +1000,9 @@ err:
*
* The btree node will have either a read or a write lock held, depending on
* level and op->lock.
+ *
+ * Note: Only error code or btree pointer will be returned, it is unncessary
+ * for callers to check NULL pointer.
*/
struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
struct bkey *k, int level, bool write,
@@ -1111,6 +1114,10 @@ retry:
mutex_unlock(&b->c->bucket_lock);
}
+/*
+ * Only error code or btree pointer will be returned, it is unncessary for
+ * callers to check NULL pointer.
+ */
struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
int level, bool wait,
struct btree *parent)
@@ -1368,7 +1375,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
memset(new_nodes, 0, sizeof(new_nodes));
closure_init_stack(&cl);
- while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b))
+ while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b))
keys += r[nodes++].keys;
blocks = btree_default_blocks(b->c) * 2 / 3;
@@ -1515,7 +1522,7 @@ out_nocoalesce:
bch_keylist_free(&keylist);
for (i = 0; i < nodes; i++)
- if (!IS_ERR(new_nodes[i])) {
+ if (!IS_ERR_OR_NULL(new_nodes[i])) {
btree_node_free(new_nodes[i]);
rw_unlock(true, new_nodes[i]);
}
@@ -1532,6 +1539,8 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
return 0;
n = btree_node_alloc_replacement(replace, NULL);
+ if (IS_ERR(n))
+ return 0;
/* recheck reserve after allocating replacement node */
if (btree_check_reserve(b, NULL)) {
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index c182c21de2e8..7ff14bd2feb8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -723,11 +723,11 @@ static void journal_write_endio(struct bio *bio)
closure_put(&w->c->journal.io);
}
-static void journal_write(struct closure *cl);
+static CLOSURE_CALLBACK(journal_write);
-static void journal_write_done(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_done)
{
- struct journal *j = container_of(cl, struct journal, io);
+ closure_type(j, struct journal, io);
struct journal_write *w = (j->cur == j->w)
? &j->w[1]
: &j->w[0];
@@ -736,19 +736,19 @@ static void journal_write_done(struct closure *cl)
continue_at_nobarrier(cl, journal_write, bch_journal_wq);
}
-static void journal_write_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_unlock)
__releases(&c->journal.lock)
{
- struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+ closure_type(c, struct cache_set, journal.io);
c->journal.io_in_flight = 0;
spin_unlock(&c->journal.lock);
}
-static void journal_write_unlocked(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write_unlocked)
__releases(c->journal.lock)
{
- struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+ closure_type(c, struct cache_set, journal.io);
struct cache *ca = c->cache;
struct journal_write *w = c->journal.cur;
struct bkey *k = &c->journal.key;
@@ -823,12 +823,12 @@ static void journal_write_unlocked(struct closure *cl)
continue_at(cl, journal_write_done, NULL);
}
-static void journal_write(struct closure *cl)
+static CLOSURE_CALLBACK(journal_write)
{
- struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+ closure_type(c, struct cache_set, journal.io);
spin_lock(&c->journal.lock);
- journal_write_unlocked(cl);
+ journal_write_unlocked(&cl->work);
}
static void journal_try_write(struct cache_set *c)
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 9f32901fdad1..ebd500bdf0b2 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -35,16 +35,16 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
/* Moving GC - IO loop */
-static void moving_io_destructor(struct closure *cl)
+static CLOSURE_CALLBACK(moving_io_destructor)
{
- struct moving_io *io = container_of(cl, struct moving_io, cl);
+ closure_type(io, struct moving_io, cl);
kfree(io);
}
-static void write_moving_finish(struct closure *cl)
+static CLOSURE_CALLBACK(write_moving_finish)
{
- struct moving_io *io = container_of(cl, struct moving_io, cl);
+ closure_type(io, struct moving_io, cl);
struct bio *bio = &io->bio.bio;
bio_free_pages(bio);
@@ -89,9 +89,9 @@ static void moving_init(struct moving_io *io)
bch_bio_map(bio, NULL);
}
-static void write_moving(struct closure *cl)
+static CLOSURE_CALLBACK(write_moving)
{
- struct moving_io *io = container_of(cl, struct moving_io, cl);
+ closure_type(io, struct moving_io, cl);
struct data_insert_op *op = &io->op;
if (!op->status) {
@@ -113,9 +113,9 @@ static void write_moving(struct closure *cl)
continue_at(cl, write_moving_finish, op->wq);
}
-static void read_moving_submit(struct closure *cl)
+static CLOSURE_CALLBACK(read_moving_submit)
{
- struct moving_io *io = container_of(cl, struct moving_io, cl);
+ closure_type(io, struct moving_io, cl);
struct bio *bio = &io->bio.bio;
bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index a9b1f3896249..83d112bd2b1c 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -25,7 +25,7 @@
struct kmem_cache *bch_search_cache;
-static void bch_data_insert_start(struct closure *cl);
+static CLOSURE_CALLBACK(bch_data_insert_start);
static unsigned int cache_mode(struct cached_dev *dc)
{
@@ -55,9 +55,9 @@ static void bio_csum(struct bio *bio, struct bkey *k)
/* Insert data into cache */
-static void bch_data_insert_keys(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_keys)
{
- struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+ closure_type(op, struct data_insert_op, cl);
atomic_t *journal_ref = NULL;
struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
int ret;
@@ -136,9 +136,9 @@ out:
continue_at(cl, bch_data_insert_keys, op->wq);
}
-static void bch_data_insert_error(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_error)
{
- struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+ closure_type(op, struct data_insert_op, cl);
/*
* Our data write just errored, which means we've got a bunch of keys to
@@ -163,7 +163,7 @@ static void bch_data_insert_error(struct closure *cl)
op->insert_keys.top = dst;
- bch_data_insert_keys(cl);
+ bch_data_insert_keys(&cl->work);
}
static void bch_data_insert_endio(struct bio *bio)
@@ -184,9 +184,9 @@ static void bch_data_insert_endio(struct bio *bio)
bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
}
-static void bch_data_insert_start(struct closure *cl)
+static CLOSURE_CALLBACK(bch_data_insert_start)
{
- struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+ closure_type(op, struct data_insert_op, cl);
struct bio *bio = op->bio, *n;
if (op->bypass)
@@ -305,16 +305,16 @@ err:
* If op->bypass is true, instead of inserting the data it invalidates the
* region of the cache represented by op->bio and op->inode.
*/
-void bch_data_insert(struct closure *cl)
+CLOSURE_CALLBACK(bch_data_insert)
{
- struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
+ closure_type(op, struct data_insert_op, cl);
trace_bcache_write(op->c, op->inode, op->bio,
op->writeback, op->bypass);
bch_keylist_init(&op->insert_keys);
bio_get(op->bio);
- bch_data_insert_start(cl);
+ bch_data_insert_start(&cl->work);
}
/*
@@ -575,9 +575,9 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
return n == bio ? MAP_DONE : MAP_CONTINUE;
}
-static void cache_lookup(struct closure *cl)
+static CLOSURE_CALLBACK(cache_lookup)
{
- struct search *s = container_of(cl, struct search, iop.cl);
+ closure_type(s, struct search, iop.cl);
struct bio *bio = &s->bio.bio;
struct cached_dev *dc;
int ret;
@@ -698,9 +698,9 @@ static void do_bio_hook(struct search *s,
bio_cnt_set(bio, 3);
}
-static void search_free(struct closure *cl)
+static CLOSURE_CALLBACK(search_free)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
atomic_dec(&s->iop.c->search_inflight);
@@ -749,20 +749,20 @@ static inline struct search *search_alloc(struct bio *bio,
/* Cached devices */
-static void cached_dev_bio_complete(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_bio_complete)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
cached_dev_put(dc);
- search_free(cl);
+ search_free(&cl->work);
}
/* Process reads */
-static void cached_dev_read_error_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_error_done)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
if (s->iop.replace_collision)
bch_mark_cache_miss_collision(s->iop.c, s->d);
@@ -770,12 +770,12 @@ static void cached_dev_read_error_done(struct closure *cl)
if (s->iop.bio)
bio_free_pages(s->iop.bio);
- cached_dev_bio_complete(cl);
+ cached_dev_bio_complete(&cl->work);
}
-static void cached_dev_read_error(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_error)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
struct bio *bio = &s->bio.bio;
/*
@@ -801,9 +801,9 @@ static void cached_dev_read_error(struct closure *cl)
continue_at(cl, cached_dev_read_error_done, NULL);
}
-static void cached_dev_cache_miss_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_cache_miss_done)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
struct bcache_device *d = s->d;
if (s->iop.replace_collision)
@@ -812,13 +812,13 @@ static void cached_dev_cache_miss_done(struct closure *cl)
if (s->iop.bio)
bio_free_pages(s->iop.bio);
- cached_dev_bio_complete(cl);
+ cached_dev_bio_complete(&cl->work);
closure_put(&d->cl);
}
-static void cached_dev_read_done(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_done)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
/*
@@ -858,9 +858,9 @@ static void cached_dev_read_done(struct closure *cl)
continue_at(cl, cached_dev_cache_miss_done, NULL);
}
-static void cached_dev_read_done_bh(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_read_done_bh)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
bch_mark_cache_accounting(s->iop.c, s->d,
@@ -955,13 +955,13 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
/* Process writes */
-static void cached_dev_write_complete(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_write_complete)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
up_read_non_owner(&dc->writeback_lock);
- cached_dev_bio_complete(cl);
+ cached_dev_bio_complete(&cl->work);
}
static void cached_dev_write(struct cached_dev *dc, struct search *s)
@@ -1048,9 +1048,9 @@ insert_data:
continue_at(cl, cached_dev_write_complete, NULL);
}
-static void cached_dev_nodata(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_nodata)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
struct bio *bio = &s->bio.bio;
if (s->iop.flush_journal)
@@ -1265,9 +1265,9 @@ static int flash_dev_cache_miss(struct btree *b, struct search *s,
return MAP_CONTINUE;
}
-static void flash_dev_nodata(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_nodata)
{
- struct search *s = container_of(cl, struct search, cl);
+ closure_type(s, struct search, cl);
if (s->iop.flush_journal)
bch_journal_meta(s->iop.c, cl);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 38ab4856eaab..46bbef00aebb 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -34,7 +34,7 @@ struct data_insert_op {
};
unsigned int bch_get_congested(const struct cache_set *c);
-void bch_data_insert(struct closure *cl);
+CLOSURE_CALLBACK(bch_data_insert);
void bch_cached_dev_request_init(struct cached_dev *dc);
void cached_dev_submit_bio(struct bio *bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 8bd899766372..1402096b8076 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -327,9 +327,9 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
submit_bio(bio);
}
-static void bch_write_bdev_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bch_write_bdev_super_unlock)
{
- struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
+ closure_type(dc, struct cached_dev, sb_write);
up(&dc->sb_write_mutex);
}
@@ -363,9 +363,9 @@ static void write_super_endio(struct bio *bio)
closure_put(&ca->set->sb_write);
}
-static void bcache_write_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bcache_write_super_unlock)
{
- struct cache_set *c = container_of(cl, struct cache_set, sb_write);
+ closure_type(c, struct cache_set, sb_write);
up(&c->sb_write_mutex);
}
@@ -407,9 +407,9 @@ static void uuid_endio(struct bio *bio)
closure_put(cl);
}
-static void uuid_io_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(uuid_io_unlock)
{
- struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
+ closure_type(c, struct cache_set, uuid_write);
up(&c->uuid_write_mutex);
}
@@ -905,6 +905,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
if (!d->stripe_size)
d->stripe_size = 1 << 31;
+ else if (d->stripe_size < BCH_MIN_STRIPE_SZ)
+ d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size);
n = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
if (!n || n > max_stripes) {
@@ -1342,9 +1344,9 @@ void bch_cached_dev_release(struct kobject *kobj)
module_put(THIS_MODULE);
}
-static void cached_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_free)
{
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+ closure_type(dc, struct cached_dev, disk.cl);
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
cancel_writeback_rate_update_dwork(dc);
@@ -1376,9 +1378,9 @@ static void cached_dev_free(struct closure *cl)
kobject_put(&dc->disk.kobj);
}
-static void cached_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_flush)
{
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+ closure_type(dc, struct cached_dev, disk.cl);
struct bcache_device *d = &dc->disk;
mutex_lock(&bch_register_lock);
@@ -1497,9 +1499,9 @@ void bch_flash_dev_release(struct kobject *kobj)
kfree(d);
}
-static void flash_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_free)
{
- struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ closure_type(d, struct bcache_device, cl);
mutex_lock(&bch_register_lock);
atomic_long_sub(bcache_dev_sectors_dirty(d),
@@ -1510,9 +1512,9 @@ static void flash_dev_free(struct closure *cl)
kobject_put(&d->kobj);
}
-static void flash_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_flush)
{
- struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ closure_type(d, struct bcache_device, cl);
mutex_lock(&bch_register_lock);
bcache_device_unlink(d);
@@ -1668,9 +1670,9 @@ void bch_cache_set_release(struct kobject *kobj)
module_put(THIS_MODULE);
}
-static void cache_set_free(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_free)
{
- struct cache_set *c = container_of(cl, struct cache_set, cl);
+ closure_type(c, struct cache_set, cl);
struct cache *ca;
debugfs_remove(c->debug);
@@ -1709,9 +1711,9 @@ static void cache_set_free(struct closure *cl)
kobject_put(&c->kobj);
}
-static void cache_set_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_flush)
{
- struct cache_set *c = container_of(cl, struct cache_set, caching);
+ closure_type(c, struct cache_set, caching);
struct cache *ca = c->cache;
struct btree *b;
@@ -1806,9 +1808,9 @@ static void conditional_stop_bcache_device(struct cache_set *c,
}
}
-static void __cache_set_unregister(struct closure *cl)
+static CLOSURE_CALLBACK(__cache_set_unregister)
{
- struct cache_set *c = container_of(cl, struct cache_set, caching);
+ closure_type(c, struct cache_set, caching);
struct cached_dev *dc;
struct bcache_device *d;
size_t i;
@@ -2016,7 +2018,7 @@ static int run_cache_set(struct cache_set *c)
c->root = bch_btree_node_get(c, NULL, k,
j->btree_level,
true, NULL);
- if (IS_ERR_OR_NULL(c->root))
+ if (IS_ERR(c->root))
goto err;
list_del_init(&c->root->list);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 45d8af755de6..a438efb66069 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -1104,7 +1104,7 @@ SHOW(__bch_cache)
sum += INITIAL_PRIO - cached[i];
if (n)
- do_div(sum, n);
+ sum = div64_u64(sum, n);
for (i = 0; i < ARRAY_SIZE(q); i++)
q[i] = INITIAL_PRIO - cached[n * (i + 1) /
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 24c049067f61..8827a6f130ad 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -341,16 +341,16 @@ static void dirty_init(struct keybuf_key *w)
bch_bio_map(bio, NULL);
}
-static void dirty_io_destructor(struct closure *cl)
+static CLOSURE_CALLBACK(dirty_io_destructor)
{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+ closure_type(io, struct dirty_io, cl);
kfree(io);
}
-static void write_dirty_finish(struct closure *cl)
+static CLOSURE_CALLBACK(write_dirty_finish)
{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+ closure_type(io, struct dirty_io, cl);
struct keybuf_key *w = io->bio.bi_private;
struct cached_dev *dc = io->dc;
@@ -400,9 +400,9 @@ static void dirty_endio(struct bio *bio)
closure_put(&io->cl);
}
-static void write_dirty(struct closure *cl)
+static CLOSURE_CALLBACK(write_dirty)
{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+ closure_type(io, struct dirty_io, cl);
struct keybuf_key *w = io->bio.bi_private;
struct cached_dev *dc = io->dc;
@@ -462,9 +462,9 @@ static void read_dirty_endio(struct bio *bio)
dirty_endio(bio);
}
-static void read_dirty_submit(struct closure *cl)
+static CLOSURE_CALLBACK(read_dirty_submit)
{
- struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+ closure_type(io, struct dirty_io, cl);
closure_bio_submit(io->dc->disk.c, &io->bio, cl);
@@ -913,7 +913,7 @@ static int bch_dirty_init_thread(void *arg)
int cur_idx, prev_idx, skip_nr;
k = p = NULL;
- cur_idx = prev_idx = 0;
+ prev_idx = 0;
bch_btree_iter_init(&c->root->keys, &iter, NULL);
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
@@ -977,24 +977,35 @@ static int bch_btre_dirty_init_thread_nr(void)
void bch_sectors_dirty_init(struct bcache_device *d)
{
int i;
+ struct btree *b = NULL;
struct bkey *k = NULL;
struct btree_iter iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state state;
+retry_lock:
+ b = c->root;
+ rw_lock(0, b, b->level);
+ if (b != c->root) {
+ rw_unlock(0, b);
+ goto retry_lock;
+ }
+
/* Just count root keys if no leaf node */
- rw_lock(0, c->root, c->root->level);
if (c->root->level == 0) {
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
op.count = 0;
for_each_key_filter(&c->root->keys,
- k, &iter, bch_ptr_invalid)
+ k, &iter, bch_ptr_invalid) {
+ if (KEY_INODE(k) != op.inode)
+ continue;
sectors_dirty_init_fn(&op.op, c->root, k);
+ }
- rw_unlock(0, c->root);
+ rw_unlock(0, b);
return;
}
@@ -1014,23 +1025,24 @@ void bch_sectors_dirty_init(struct bcache_device *d)
if (atomic_read(&state.enough))
break;
+ atomic_inc(&state.started);
state.infos[i].state = &state;
state.infos[i].thread =
kthread_run(bch_dirty_init_thread, &state.infos[i],
"bch_dirtcnt[%d]", i);
if (IS_ERR(state.infos[i].thread)) {
pr_err("fails to run thread bch_dirty_init[%d]\n", i);
+ atomic_dec(&state.started);
for (--i; i >= 0; i--)
kthread_stop(state.infos[i].thread);
goto out;
}
- atomic_inc(&state.started);
}
out:
/* Must wait for all threads to stop. */
wait_event(state.wait, atomic_read(&state.started) == 0);
- rw_unlock(0, c->root);
+ rw_unlock(0, b);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 62eb27639c9b..f03d7dba270c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -254,7 +254,7 @@ enum evict_result {
typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
-static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
{
unsigned long tested = 0;
struct list_head *h = lru->cursor;
@@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
h = h->next;
- cond_resched();
+ if (!no_sleep)
+ cond_resched();
}
return NULL;
@@ -382,7 +383,10 @@ struct dm_buffer {
*/
struct buffer_tree {
- struct rw_semaphore lock;
+ union {
+ struct rw_semaphore lock;
+ rwlock_t spinlock;
+ } u;
struct rb_root root;
} ____cacheline_aligned_in_smp;
@@ -393,9 +397,12 @@ struct dm_buffer_cache {
* on the locks.
*/
unsigned int num_locks;
+ bool no_sleep;
struct buffer_tree trees[];
};
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
{
return dm_hash_locks_index(block, num_locks);
@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
/*
@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool
static void __lh_lock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- down_write(&lh->cache->trees[index].lock);
- else
- down_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_read(&lh->cache->trees[index].u.lock);
+ }
}
static void __lh_unlock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- up_write(&lh->cache->trees[index].lock);
- else
- up_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_read(&lh->cache->trees[index].u.lock);
+ }
}
/*
@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
return le_to_buffer(le);
}
-static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
+static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
{
unsigned int i;
bc->num_locks = num_locks;
+ bc->no_sleep = no_sleep;
for (i = 0; i < bc->num_locks; i++) {
- init_rwsem(&bc->trees[i].lock);
+ if (no_sleep)
+ rwlock_init(&bc->trees[i].u.spinlock);
+ else
+ init_rwsem(&bc->trees[i].u.lock);
bc->trees[i].root = RB_ROOT;
}
@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
struct lru_entry *le;
struct dm_buffer *b;
- le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
return NULL;
@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
while (true) {
- le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
break;
@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
{
unsigned int i;
+ BUG_ON(bc->no_sleep);
for (i = 0; i < bc->num_locks; i++) {
- down_write(&bc->trees[i].lock);
+ down_write(&bc->trees[i].u.lock);
__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
- up_write(&bc->trees[i].lock);
+ up_write(&bc->trees[i].u.lock);
}
}
@@ -979,8 +1017,6 @@ struct dm_bufio_client {
struct dm_buffer_cache cache; /* must be last member */
};
-static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
-
/*----------------------------------------------------------------*/
#define dm_bufio_in_request() (!!current->bio_list)
@@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
if (need_submit)
submit_io(b, REQ_OP_READ, read_endio);
- wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+ if (nf != NF_GET) /* we already tested this condition above */
+ wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
int error = blk_status_to_errno(b->read_error);
@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
r = -ENOMEM;
goto bad_client;
}
- cache_init(&c->cache, num_locks);
+ cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
c->bdev = bdev;
c->block_size = block_size;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 6de107aff331..2ae8560b6a14 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
unsigned int remaining_size;
- unsigned int order = MAX_ORDER - 1;
+ unsigned int order = MAX_ORDER;
retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index efd510984e25..5eabdb06c649 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -33,7 +33,7 @@ struct delay_c {
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
struct task_struct *worker;
- atomic_t may_delay;
+ bool may_delay;
struct delay_class read;
struct delay_class write;
@@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc)
return !!dc->worker;
}
-static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
-{
- struct dm_delay_info *delayed, *next;
-
- mutex_lock(&delayed_bios_lock);
- list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
- if (flush_all || time_after_eq(jiffies, delayed->expires)) {
- struct bio *bio = dm_bio_from_per_bio_data(delayed,
- sizeof(struct dm_delay_info));
- list_del(&delayed->list);
- dm_submit_bio_remap(bio, NULL);
- delayed->class->ops--;
- }
- }
- mutex_unlock(&delayed_bios_lock);
-}
-
-static int flush_worker_fn(void *data)
-{
- struct delay_c *dc = data;
-
- while (1) {
- flush_delayed_bios_fast(dc, false);
- if (unlikely(list_empty(&dc->delayed_bios))) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- } else
- cond_resched();
- }
-
- return 0;
-}
-
static void flush_bios(struct bio *bio)
{
struct bio *n;
@@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio)
}
}
-static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
+static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
+ struct bio_list flush_bio_list;
unsigned long next_expires = 0;
- unsigned long start_timer = 0;
- struct bio_list flush_bios = { };
+ bool start_timer = false;
+ bio_list_init(&flush_bio_list);
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+ cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
sizeof(struct dm_delay_info));
list_del(&delayed->list);
- bio_list_add(&flush_bios, bio);
+ bio_list_add(&flush_bio_list, bio);
delayed->class->ops--;
continue;
}
- if (!start_timer) {
- start_timer = 1;
- next_expires = delayed->expires;
- } else
- next_expires = min(next_expires, delayed->expires);
+ if (!delay_is_fast(dc)) {
+ if (!start_timer) {
+ start_timer = true;
+ next_expires = delayed->expires;
+ } else {
+ next_expires = min(next_expires, delayed->expires);
+ }
+ }
}
mutex_unlock(&delayed_bios_lock);
if (start_timer)
queue_timeout(dc, next_expires);
- return bio_list_get(&flush_bios);
+ flush_bios(bio_list_get(&flush_bio_list));
+}
+
+static int flush_worker_fn(void *data)
+{
+ struct delay_c *dc = data;
+
+ while (!kthread_should_stop()) {
+ flush_delayed_bios(dc, false);
+ mutex_lock(&delayed_bios_lock);
+ if (unlikely(list_empty(&dc->delayed_bios))) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ mutex_unlock(&delayed_bios_lock);
+ schedule();
+ } else {
+ mutex_unlock(&delayed_bios_lock);
+ cond_resched();
+ }
+ }
+
+ return 0;
}
static void flush_expired_bios(struct work_struct *work)
@@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work)
struct delay_c *dc;
dc = container_of(work, struct delay_c, flush_expired_bios);
- if (delay_is_fast(dc))
- flush_delayed_bios_fast(dc, false);
- else
- flush_bios(flush_delayed_bios(dc, false));
+ flush_delayed_bios(dc, false);
}
static void delay_dtr(struct dm_target *ti)
@@ -177,8 +166,7 @@ static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);
- if (!delay_is_fast(dc))
- mutex_destroy(&dc->timer_lock);
+ mutex_destroy(&dc->timer_lock);
kfree(dc);
}
@@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
- atomic_set(&dc->may_delay, 1);
+ mutex_init(&dc->timer_lock);
+ dc->may_delay = true;
dc->argc = argc;
ret = delay_class_ctr(ti, &dc->read, argv);
@@ -282,12 +271,12 @@ out:
"dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
+ dc->worker = NULL;
goto bad;
}
} else {
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
- mutex_init(&dc->timer_lock);
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!dc->kdelayd_wq) {
ret = -EINVAL;
@@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
struct dm_delay_info *delayed;
unsigned long expires = 0;
- if (!c->delay || !atomic_read(&dc->may_delay))
+ if (!c->delay)
return DM_MAPIO_REMAPPED;
delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
@@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
mutex_lock(&delayed_bios_lock);
+ if (unlikely(!dc->may_delay)) {
+ mutex_unlock(&delayed_bios_lock);
+ return DM_MAPIO_REMAPPED;
+ }
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
@@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- atomic_set(&dc->may_delay, 0);
+ mutex_lock(&delayed_bios_lock);
+ dc->may_delay = false;
+ mutex_unlock(&delayed_bios_lock);
- if (delay_is_fast(dc))
- flush_delayed_bios_fast(dc, true);
- else {
+ if (!delay_is_fast(dc))
del_timer_sync(&dc->delay_timer);
- flush_bios(flush_delayed_bios(dc, true));
- }
+ flush_delayed_bios(dc, true);
}
static void delay_resume(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- atomic_set(&dc->may_delay, 1);
+ dc->may_delay = true;
}
static int delay_map(struct dm_target *ti, struct bio *bio)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 120153e44ae0..f57fb821528d 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -434,7 +434,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b
remaining_size = size;
- order = MAX_ORDER - 1;
+ order = MAX_ORDER;
while (remaining_size) {
struct page *pages;
unsigned size_to_add, to_copy;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index e85c688fd91e..c5f03aab4552 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1755,11 +1755,12 @@ static void integrity_metadata(struct work_struct *w)
sectors_to_process = dio->range.n_sectors;
__bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) {
+ struct bio_vec bv_copy = bv;
unsigned int pos;
char *mem, *checksums_ptr;
again:
- mem = bvec_kmap_local(&bv);
+ mem = bvec_kmap_local(&bv_copy);
pos = 0;
checksums_ptr = checksums;
do {
@@ -1768,7 +1769,7 @@ again:
sectors_to_process -= ic->sectors_per_block;
pos += ic->sectors_per_block << SECTOR_SHIFT;
sector += ic->sectors_per_block;
- } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
+ } while (pos < bv_copy.bv_len && sectors_to_process && checksums != checksums_onstack);
kunmap_local(mem);
r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
@@ -1793,9 +1794,9 @@ again:
if (!sectors_to_process)
break;
- if (unlikely(pos < bv.bv_len)) {
- bv.bv_offset += pos;
- bv.bv_len -= pos;
+ if (unlikely(pos < bv_copy.bv_len)) {
+ bv_copy.bv_offset += pos;
+ bv_copy.bv_len -= pos;
goto again;
}
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 91ebdcc6e9a8..eb009d6bb03a 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3317,6 +3317,9 @@ static void raid_dtr(struct dm_target *ti)
mddev_lock_nointr(&rs->md);
md_stop(&rs->md);
mddev_unlock(&rs->md);
+
+ if (work_pending(&rs->md.event_work))
+ flush_work(&rs->md.event_work);
raid_set_free(rs);
}
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 3ef9f018da60..b475200d8586 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -24,7 +24,8 @@ bool verity_fec_is_enabled(struct dm_verity *v)
*/
static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
{
- return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
+ return (struct dm_verity_fec_io *)
+ ((char *)io + io->v->ti->per_io_data_size - sizeof(struct dm_verity_fec_io));
}
/*
@@ -185,7 +186,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
{
if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io))))
+ verity_io_real_digest(v, io), true)))
return 0;
return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -386,7 +387,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
/* Always re-validate the corrected block against the expected hash */
r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io));
+ verity_io_real_digest(v, io), true);
if (unlikely(r < 0))
return r;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 26adcfea0302..14e58ae70521 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
* Wrapper for crypto_ahash_init, which handles verity salting.
*/
static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
- struct crypto_wait *wait)
+ struct crypto_wait *wait, bool may_sleep)
{
int r;
ahash_request_set_tfm(req, v->tfm);
- ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
- CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, (void *)wait);
+ ahash_request_set_callback(req,
+ may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
+ crypto_req_done, (void *)wait);
crypto_init_wait(wait);
r = crypto_wait_req(crypto_ahash_init(req), wait);
if (unlikely(r < 0)) {
- DMERR("crypto_ahash_init failed: %d", r);
+ if (r != -ENOMEM)
+ DMERR("crypto_ahash_init failed: %d", r);
return r;
}
@@ -179,12 +180,12 @@ out:
}
int verity_hash(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len, u8 *digest)
+ const u8 *data, size_t len, u8 *digest, bool may_sleep)
{
int r;
struct crypto_wait wait;
- r = verity_hash_init(v, req, &wait);
+ r = verity_hash_init(v, req, &wait, may_sleep);
if (unlikely(r < 0))
goto out;
@@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
r = verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->hash_dev_block_bits,
- verity_io_real_digest(v, io));
+ verity_io_real_digest(v, io), !io->in_tasklet);
if (unlikely(r < 0))
goto release_ret_r;
@@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io)
continue;
}
- r = verity_hash_init(v, req, &wait);
+ r = verity_hash_init(v, req, &wait, !io->in_tasklet);
if (unlikely(r < 0))
return r;
@@ -641,7 +642,6 @@ static void verity_work(struct work_struct *w)
io->in_tasklet = false;
- verity_fec_init_io(io);
verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
}
@@ -652,7 +652,7 @@ static void verity_tasklet(unsigned long data)
io->in_tasklet = true;
err = verity_verify_io(io);
- if (err == -EAGAIN) {
+ if (err == -EAGAIN || err == -ENOMEM) {
/* fallback to retrying with work-queue */
INIT_WORK(&io->work, verity_work);
queue_work(io->v->verify_wq, &io->work);
@@ -667,7 +667,9 @@ static void verity_end_io(struct bio *bio)
struct dm_verity_io *io = bio->bi_private;
if (bio->bi_status &&
- (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) {
+ (!verity_fec_is_enabled(io->v) ||
+ verity_is_system_shutting_down() ||
+ (bio->bi_opf & REQ_RAHEAD))) {
verity_finish_io(io, bio->bi_status);
return;
}
@@ -791,6 +793,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
bio->bi_private = io;
io->iter = bio->bi_iter;
+ verity_fec_init_io(io);
+
verity_submit_prefetch(v, io);
submit_bio_noacct(bio);
@@ -1033,7 +1037,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
goto out;
r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
- v->zero_digest);
+ v->zero_digest, true);
out:
kfree(req);
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 2f555b420367..f9d522c870e6 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -115,12 +115,6 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v,
return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size;
}
-static inline u8 *verity_io_digest_end(struct dm_verity *v,
- struct dm_verity_io *io)
-{
- return verity_io_want_digest(v, io) + v->digest_size;
-}
-
extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
struct bvec_iter *iter,
int (*process)(struct dm_verity *v,
@@ -128,7 +122,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
u8 *data, size_t len));
extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len, u8 *digest);
+ const u8 *data, size_t len, u8 *digest, bool may_sleep);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4ee4593c874a..9bdd57324c37 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -82,6 +82,14 @@ static struct module *md_cluster_mod;
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
+
+/*
+ * This workqueue is used for sync_work to register new sync_thread, and for
+ * del_work to remove rdev, and for event_work that is only set by dm-raid.
+ *
+ * Noted that sync_work will grab reconfig_mutex, hence never flush this
+ * workqueue whith reconfig_mutex grabbed.
+ */
static struct workqueue_struct *md_misc_wq;
struct workqueue_struct *md_bitmap_wq;
@@ -490,7 +498,7 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
}
EXPORT_SYMBOL_GPL(mddev_suspend);
-void mddev_resume(struct mddev *mddev)
+static void __mddev_resume(struct mddev *mddev, bool recovery_needed)
{
lockdep_assert_not_held(&mddev->reconfig_mutex);
@@ -507,12 +515,18 @@ void mddev_resume(struct mddev *mddev)
percpu_ref_resurrect(&mddev->active_io);
wake_up(&mddev->sb_wait);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ if (recovery_needed)
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
mutex_unlock(&mddev->suspend_mutex);
}
+
+void mddev_resume(struct mddev *mddev)
+{
+ return __mddev_resume(mddev, true);
+}
EXPORT_SYMBOL_GPL(mddev_resume);
/*
@@ -4840,25 +4854,29 @@ action_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", type);
}
-static void stop_sync_thread(struct mddev *mddev)
+/**
+ * stop_sync_thread() - wait for sync_thread to stop if it's running.
+ * @mddev: the array.
+ * @locked: if set, reconfig_mutex will still be held after this function
+ * return; if not set, reconfig_mutex will be released after this
+ * function return.
+ * @check_seq: if set, only wait for curent running sync_thread to stop, noted
+ * that new sync_thread can still start.
+ */
+static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
{
- if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- return;
+ int sync_seq;
- if (mddev_lock(mddev))
- return;
+ if (check_seq)
+ sync_seq = atomic_read(&mddev->sync_seq);
- /*
- * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
- * held.
- */
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
- mddev_unlock(mddev);
+ if (!locked)
+ mddev_unlock(mddev);
return;
}
- if (work_pending(&mddev->del_work))
- flush_workqueue(md_misc_wq);
+ mddev_unlock(mddev);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
@@ -4866,21 +4884,28 @@ static void stop_sync_thread(struct mddev *mddev)
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);
+ if (work_pending(&mddev->sync_work))
+ flush_work(&mddev->sync_work);
- mddev_unlock(mddev);
+ wait_event(resync_wait,
+ !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ (check_seq && sync_seq != atomic_read(&mddev->sync_seq)));
+
+ if (locked)
+ mddev_lock_nointr(mddev);
}
static void idle_sync_thread(struct mddev *mddev)
{
- int sync_seq = atomic_read(&mddev->sync_seq);
-
mutex_lock(&mddev->sync_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev);
- wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
- !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ if (mddev_lock(mddev)) {
+ mutex_unlock(&mddev->sync_mutex);
+ return;
+ }
+ stop_sync_thread(mddev, false, true);
mutex_unlock(&mddev->sync_mutex);
}
@@ -4888,11 +4913,13 @@ static void frozen_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- stop_sync_thread(mddev);
- wait_event(resync_wait, mddev->sync_thread == NULL &&
- !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
+ if (mddev_lock(mddev)) {
+ mutex_unlock(&mddev->sync_mutex);
+ return;
+ }
+ stop_sync_thread(mddev, false, false);
mutex_unlock(&mddev->sync_mutex);
}
@@ -6264,14 +6291,7 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- if (work_pending(&mddev->del_work))
- flush_workqueue(md_misc_wq);
- if (mddev->sync_thread) {
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev);
- }
-
+ stop_sync_thread(mddev, true, false);
del_timer_sync(&mddev->safemode_timer);
if (mddev->pers && mddev->pers->quiesce) {
@@ -6318,9 +6338,6 @@ static void __md_stop(struct mddev *mddev)
struct md_personality *pers = mddev->pers;
md_bitmap_destroy(mddev);
mddev_detach(mddev);
- /* Ensure ->event_work is done */
- if (mddev->event_work.func)
- flush_workqueue(md_misc_wq);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
@@ -6355,25 +6372,16 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
int err = 0;
int did_freeze = 0;
+ if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+ return -EBUSY;
+
if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- /*
- * Thread might be blocked waiting for metadata update which will now
- * never happen
- */
- md_wakeup_thread_directly(mddev->sync_thread);
-
- if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
- return -EBUSY;
- mddev_unlock(mddev);
- wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
- &mddev->recovery));
+ stop_sync_thread(mddev, false, false);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
@@ -6383,29 +6391,30 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
mddev->sync_thread ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
pr_warn("md: %s still in use.\n",mdname(mddev));
- if (did_freeze) {
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
- }
err = -EBUSY;
goto out;
}
+
if (mddev->pers) {
__md_stop_writes(mddev);
- err = -ENXIO;
- if (mddev->ro == MD_RDONLY)
+ if (mddev->ro == MD_RDONLY) {
+ err = -ENXIO;
goto out;
+ }
+
mddev->ro = MD_RDONLY;
set_disk_ro(mddev->gendisk, 1);
+ }
+
+out:
+ if ((mddev->pers && !err) || did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_state);
- err = 0;
}
-out:
+
mutex_unlock(&mddev->open_mutex);
return err;
}
@@ -6426,20 +6435,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- /*
- * Thread might be blocked waiting for metadata update which will now
- * never happen
- */
- md_wakeup_thread_directly(mddev->sync_thread);
-
- mddev_unlock(mddev);
- wait_event(resync_wait, (mddev->sync_thread == NULL &&
- !test_bit(MD_RECOVERY_RUNNING,
- &mddev->recovery)));
- mddev_lock_nointr(mddev);
+ stop_sync_thread(mddev, true, false);
mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
@@ -8666,7 +8663,8 @@ static void md_end_clone_io(struct bio *bio)
struct bio *orig_bio = md_io_clone->orig_bio;
struct mddev *mddev = md_io_clone->mddev;
- orig_bio->bi_status = bio->bi_status;
+ if (bio->bi_status && !orig_bio->bi_status)
+ orig_bio->bi_status = bio->bi_status;
if (md_io_clone->start_time)
bio_end_io_acct(orig_bio, md_io_clone->start_time);
@@ -9402,7 +9400,15 @@ static void md_start_sync(struct work_struct *ws)
goto not_running;
}
- suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
+ mddev_unlock(mddev);
+ /*
+ * md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should
+ * not set it again. Otherwise, we may cause issue like this one:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=218200
+ * Therefore, use __mddev_resume(mddev, false).
+ */
+ if (suspend)
+ __mddev_resume(mddev, false);
md_wakeup_thread(mddev->sync_thread);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event();
@@ -9414,7 +9420,15 @@ not_running:
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- suspend ? mddev_unlock_and_resume(mddev) : mddev_unlock(mddev);
+ mddev_unlock(mddev);
+ /*
+ * md_start_sync was triggered by MD_RECOVERY_NEEDED, so we should
+ * not set it again. Otherwise, we may cause issue like this one:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=218200
+ * Therefore, use __mddev_resume(mddev, false).
+ */
+ if (suspend)
+ __mddev_resume(mddev, false);
wake_up(&resync_wait);
if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index dc031d42f53b..26e1e8a5e941 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5892,11 +5892,11 @@ static bool stripe_ahead_of_reshape(struct mddev *mddev, struct r5conf *conf,
int dd_idx;
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
- if (dd_idx == sh->pd_idx)
+ if (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
continue;
min_sector = min(min_sector, sh->dev[dd_idx].sector);
- max_sector = min(max_sector, sh->dev[dd_idx].sector);
+ max_sector = max(max_sector, sh->dev[dd_idx].sector);
}
spin_lock_irq(&conf->device_lock);