aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig6
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/request.c20
-rw-r--r--drivers/md/bcache/stats.c14
-rw-r--r--drivers/md/bcache/stats.h1
-rw-r--r--drivers/md/bcache/super.c15
-rw-r--r--drivers/md/bcache/sysfs.c4
-rw-r--r--drivers/md/dm-rq.c9
-rw-r--r--drivers/md/dm.c22
-rw-r--r--drivers/md/md-bitmap.c2
-rw-r--r--drivers/md/md-faulty.c2
-rw-r--r--drivers/md/md-linear.c2
-rw-r--r--drivers/md/md-multipath.c2
-rw-r--r--drivers/md/md.c141
-rw-r--r--drivers/md/md.h19
-rw-r--r--drivers/md/raid0.c3
-rw-r--r--drivers/md/raid1.c15
-rw-r--r--drivers/md/raid1.h1
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid10.h1
-rw-r--r--drivers/md/raid5.c65
21 files changed, 185 insertions, 166 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index f2014385d48b..0602e82a9516 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -47,7 +47,7 @@ config MD_AUTODETECT
If unsure, say Y.
config MD_LINEAR
- tristate "Linear (append) mode"
+ tristate "Linear (append) mode (deprecated)"
depends on BLK_DEV_MD
help
If you say Y here, then your multiple devices driver will be able to
@@ -158,7 +158,7 @@ config MD_RAID456
If unsure, say Y.
config MD_MULTIPATH
- tristate "Multipath I/O support"
+ tristate "Multipath I/O support (deprecated)"
depends on BLK_DEV_MD
help
MD_MULTIPATH provides a simple multi-path personality for use
@@ -169,7 +169,7 @@ config MD_MULTIPATH
If unsure, say N.
config MD_FAULTY
- tristate "Faulty test module for MD"
+ tristate "Faulty test module for MD (deprecated)"
depends on BLK_DEV_MD
help
The "faulty" module allows for a block device that occasionally returns
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 0a4551e165ab..5fc989a6d452 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -364,7 +364,6 @@ struct cached_dev {
/* The rest of this all shows up in sysfs */
unsigned int sequential_cutoff;
- unsigned int readahead;
unsigned int io_disable:1;
unsigned int verify:1;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 29c231758293..6d1de889baeb 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -880,9 +880,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
struct bio *bio, unsigned int sectors)
{
int ret = MAP_CONTINUE;
- unsigned int reada = 0;
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct bio *miss, *cache_bio;
+ unsigned int size_limit;
s->cache_missed = 1;
@@ -892,14 +892,10 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
goto out_submit;
}
- if (!(bio->bi_opf & REQ_RAHEAD) &&
- !(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
- s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA)
- reada = min_t(sector_t, dc->readahead >> 9,
- get_capacity(bio->bi_bdev->bd_disk) -
- bio_end_sector(bio));
-
- s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
+ /* Limitation for valid replace key size and cache_bio bvecs number */
+ size_limit = min_t(unsigned int, BIO_MAX_VECS * PAGE_SECTORS,
+ (1 << KEY_SIZE_BITS) - 1);
+ s->insert_bio_sectors = min3(size_limit, sectors, bio_sectors(bio));
s->iop.replace_key = KEY(s->iop.inode,
bio->bi_iter.bi_sector + s->insert_bio_sectors,
@@ -911,7 +907,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->iop.replace = true;
- miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
+ miss = bio_next_split(bio, s->insert_bio_sectors, GFP_NOIO,
+ &s->d->bio_split);
/* btree_search_recurse()'s btree iterator is no good anymore */
ret = miss == bio ? MAP_DONE : -EINTR;
@@ -933,9 +930,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
if (bch_bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO))
goto out_put;
- if (reada)
- bch_mark_cache_readahead(s->iop.c, s->d);
-
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 503aafe188dc..4c7ee5fedb9d 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -46,7 +46,6 @@ read_attribute(cache_misses);
read_attribute(cache_bypass_hits);
read_attribute(cache_bypass_misses);
read_attribute(cache_hit_ratio);
-read_attribute(cache_readaheads);
read_attribute(cache_miss_collisions);
read_attribute(bypassed);
@@ -64,7 +63,6 @@ SHOW(bch_stats)
DIV_SAFE(var(cache_hits) * 100,
var(cache_hits) + var(cache_misses)));
- var_print(cache_readaheads);
var_print(cache_miss_collisions);
sysfs_hprint(bypassed, var(sectors_bypassed) << 9);
#undef var
@@ -86,7 +84,6 @@ static struct attribute *bch_stats_files[] = {
&sysfs_cache_bypass_hits,
&sysfs_cache_bypass_misses,
&sysfs_cache_hit_ratio,
- &sysfs_cache_readaheads,
&sysfs_cache_miss_collisions,
&sysfs_bypassed,
NULL
@@ -113,7 +110,6 @@ void bch_cache_accounting_clear(struct cache_accounting *acc)
acc->total.cache_misses = 0;
acc->total.cache_bypass_hits = 0;
acc->total.cache_bypass_misses = 0;
- acc->total.cache_readaheads = 0;
acc->total.cache_miss_collisions = 0;
acc->total.sectors_bypassed = 0;
}
@@ -145,7 +141,6 @@ static void scale_stats(struct cache_stats *stats, unsigned long rescale_at)
scale_stat(&stats->cache_misses);
scale_stat(&stats->cache_bypass_hits);
scale_stat(&stats->cache_bypass_misses);
- scale_stat(&stats->cache_readaheads);
scale_stat(&stats->cache_miss_collisions);
scale_stat(&stats->sectors_bypassed);
}
@@ -168,7 +163,6 @@ static void scale_accounting(struct timer_list *t)
move_stat(cache_misses);
move_stat(cache_bypass_hits);
move_stat(cache_bypass_misses);
- move_stat(cache_readaheads);
move_stat(cache_miss_collisions);
move_stat(sectors_bypassed);
@@ -209,14 +203,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
mark_cache_stats(&c->accounting.collector, hit, bypass);
}
-void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
-{
- struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-
- atomic_inc(&dc->accounting.collector.cache_readaheads);
- atomic_inc(&c->accounting.collector.cache_readaheads);
-}
-
void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h
index abfaabf7e7fc..ca4f435f7216 100644
--- a/drivers/md/bcache/stats.h
+++ b/drivers/md/bcache/stats.h
@@ -7,7 +7,6 @@ struct cache_stat_collector {
atomic_t cache_misses;
atomic_t cache_bypass_hits;
atomic_t cache_bypass_misses;
- atomic_t cache_readaheads;
atomic_t cache_miss_collisions;
atomic_t sectors_bypassed;
};
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bea8c4429ae8..185246a0d855 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -890,13 +890,9 @@ static void bcache_device_free(struct bcache_device *d)
if (disk_added)
del_gendisk(disk);
- if (disk->queue)
- blk_cleanup_queue(disk->queue);
-
+ blk_cleanup_disk(disk);
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
- if (disk_added)
- put_disk(disk);
}
bioset_exit(&d->bio_split);
@@ -946,7 +942,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
goto err;
- d->disk = alloc_disk(BCACHE_MINORS);
+ d->disk = blk_alloc_disk(NUMA_NO_NODE);
if (!d->disk)
goto err;
@@ -955,14 +951,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->disk->major = bcache_major;
d->disk->first_minor = idx_to_first_minor(idx);
+ d->disk->minors = BCACHE_MINORS;
d->disk->fops = ops;
d->disk->private_data = d;
- q = blk_alloc_queue(NUMA_NO_NODE);
- if (!q)
- return -ENOMEM;
-
- d->disk->queue = q;
+ q = d->disk->queue;
q->limits.max_hw_sectors = UINT_MAX;
q->limits.max_sectors = UINT_MAX;
q->limits.max_segment_size = UINT_MAX;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index cc89f3156d1a..05ac1d6fbbf3 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -137,7 +137,6 @@ rw_attribute(io_disable);
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
-rw_attribute(readahead);
rw_attribute(errors);
rw_attribute(io_error_limit);
rw_attribute(io_error_halflife);
@@ -260,7 +259,6 @@ SHOW(__bch_cached_dev)
var_printf(partial_stripes_expensive, "%u");
var_hprint(sequential_cutoff);
- var_hprint(readahead);
sysfs_print(running, atomic_read(&dc->running));
sysfs_print(state, states[BDEV_STATE(&dc->sb)]);
@@ -365,7 +363,6 @@ STORE(__cached_dev)
sysfs_strtoul_clamp(sequential_cutoff,
dc->sequential_cutoff,
0, UINT_MAX);
- d_strtoi_h(readahead);
if (attr == &sysfs_clear_stats)
bch_cache_accounting_clear(&dc->accounting);
@@ -538,7 +535,6 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_running,
&sysfs_state,
&sysfs_label,
- &sysfs_readahead,
#ifdef CONFIG_BCACHE_DEBUG
&sysfs_verify,
&sysfs_bypass_torture_test,
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 9c3bc3711b33..0dbd48cbdff9 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -530,7 +530,6 @@ static const struct blk_mq_ops dm_mq_ops = {
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
{
- struct request_queue *q;
struct dm_target *immutable_tgt;
int err;
@@ -557,12 +556,10 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
if (err)
goto out_kfree_tag_set;
- q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
- if (IS_ERR(q)) {
- err = PTR_ERR(q);
+ err = blk_mq_init_allocated_queue(md->tag_set, md->queue);
+ if (err)
goto out_tag_set;
- }
-
+ elevator_init_mq(md->queue);
return 0;
out_tag_set:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 420a12b42708..2c5f9e585211 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1694,13 +1694,13 @@ static void cleanup_mapped_device(struct mapped_device *md)
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
del_gendisk(md->disk);
- put_disk(md->disk);
}
- if (md->queue) {
+ if (md->queue)
dm_queue_destroy_keyslot_manager(md->queue);
- blk_cleanup_queue(md->queue);
- }
+
+ if (md->disk)
+ blk_cleanup_disk(md->disk);
cleanup_srcu_struct(&md->io_barrier);
@@ -1763,13 +1763,10 @@ static struct mapped_device *alloc_dev(int minor)
* established. If request-based table is loaded: blk-mq will
* override accordingly.
*/
- md->queue = blk_alloc_queue(numa_node_id);
- if (!md->queue)
- goto bad;
-
- md->disk = alloc_disk_node(1, md->numa_node_id);
+ md->disk = blk_alloc_disk(md->numa_node_id);
if (!md->disk)
goto bad;
+ md->queue = md->disk->queue;
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
@@ -1782,6 +1779,7 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->major = _major;
md->disk->first_minor = minor;
+ md->disk->minors = 1;
md->disk->fops = &dm_blk_dops;
md->disk->queue = md->queue;
md->disk->private_data = md;
@@ -2230,7 +2228,7 @@ static bool md_in_flight_bios(struct mapped_device *md)
return sum != 0;
}
-static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state)
+static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state)
{
int r = 0;
DEFINE_WAIT(wait);
@@ -2253,7 +2251,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state
return r;
}
-static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state)
{
int r = 0;
@@ -2380,7 +2378,7 @@ static void unlock_fs(struct mapped_device *md)
* are being added to md->deferred list.
*/
static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
- unsigned suspend_flags, long task_state,
+ unsigned suspend_flags, unsigned int task_state,
int dmf_suspended_flag)
{
bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index ea3130e11680..e29c6298ef5c 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2616,7 +2616,7 @@ static struct attribute *md_bitmap_attrs[] = {
&max_backlog_used.attr,
NULL
};
-struct attribute_group md_bitmap_group = {
+const struct attribute_group md_bitmap_group = {
.name = "bitmap",
.attrs = md_bitmap_attrs,
};
diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index fda4cb3f936f..c0dc6f2ef4a3 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -357,7 +357,7 @@ static void raid_exit(void)
module_init(raid_init);
module_exit(raid_exit);
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Fault injection personality for MD");
+MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)");
MODULE_ALIAS("md-personality-10"); /* faulty */
MODULE_ALIAS("md-faulty");
MODULE_ALIAS("md-level--5");
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 63ed8329a98d..1ff51647a682 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -312,7 +312,7 @@ static void linear_exit (void)
module_init(linear_init);
module_exit(linear_exit);
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Linear device concatenation personality for MD");
+MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
MODULE_ALIAS("md-linear");
MODULE_ALIAS("md-level--1");
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 776bbe542db5..e7d6486f090f 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -471,7 +471,7 @@ static void __exit multipath_exit (void)
module_init(multipath_init);
module_exit(multipath_exit);
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("simple multi-path personality for MD");
+MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
MODULE_ALIAS("md-multipath");
MODULE_ALIAS("md-level--4");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 49f897fbb89b..ae8fe54ea358 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -441,30 +441,6 @@ check_suspended:
}
EXPORT_SYMBOL(md_handle_request);
-struct md_io {
- struct mddev *mddev;
- bio_end_io_t *orig_bi_end_io;
- void *orig_bi_private;
- struct block_device *orig_bi_bdev;
- unsigned long start_time;
-};
-
-static void md_end_io(struct bio *bio)
-{
- struct md_io *md_io = bio->bi_private;
- struct mddev *mddev = md_io->mddev;
-
- bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev);
-
- bio->bi_end_io = md_io->orig_bi_end_io;
- bio->bi_private = md_io->orig_bi_private;
-
- mempool_free(md_io, &mddev->md_io_pool);
-
- if (bio->bi_end_io)
- bio->bi_end_io(bio);
-}
-
static blk_qc_t md_submit_bio(struct bio *bio)
{
const int rw = bio_data_dir(bio);
@@ -489,21 +465,6 @@ static blk_qc_t md_submit_bio(struct bio *bio)
return BLK_QC_T_NONE;
}
- if (bio->bi_end_io != md_end_io) {
- struct md_io *md_io;
-
- md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
- md_io->mddev = mddev;
- md_io->orig_bi_end_io = bio->bi_end_io;
- md_io->orig_bi_private = bio->bi_private;
- md_io->orig_bi_bdev = bio->bi_bdev;
-
- bio->bi_end_io = md_end_io;
- bio->bi_private = md_io;
-
- md_io->start_time = bio_start_io_acct(bio);
- }
-
/* bio could be mergeable after passing to underlayer */
bio->bi_opf &= ~REQ_NOMERGE;
@@ -824,7 +785,7 @@ out_free_new:
return ERR_PTR(error);
}
-static struct attribute_group md_redundancy_group;
+static const struct attribute_group md_redundancy_group;
void mddev_unlock(struct mddev *mddev)
{
@@ -841,7 +802,7 @@ void mddev_unlock(struct mddev *mddev)
* test it under the same mutex to ensure its correct value
* is seen.
*/
- struct attribute_group *to_remove = mddev->to_remove;
+ const struct attribute_group *to_remove = mddev->to_remove;
mddev->to_remove = NULL;
mddev->sysfs_active = 1;
mutex_unlock(&mddev->reconfig_mutex);
@@ -2379,7 +2340,15 @@ int md_integrity_register(struct mddev *mddev)
bdev_get_integrity(reference->bdev));
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
- if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
+ if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
+ (mddev->level != 1 && mddev->level != 10 &&
+ bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
+ /*
+ * No need to handle the failure of bioset_integrity_create,
+ * because the function is called by md_run() -> pers->run(),
+ * md_run calls bioset_exit -> bioset_integrity_free in case
+ * of failure case.
+ */
pr_err("md: failed to create integrity pool for %s\n",
mdname(mddev));
return -EINVAL;
@@ -5538,7 +5507,7 @@ static struct attribute *md_redundancy_attrs[] = {
&md_degraded.attr,
NULL,
};
-static struct attribute_group md_redundancy_group = {
+static const struct attribute_group md_redundancy_group = {
.name = NULL,
.attrs = md_redundancy_attrs,
};
@@ -5598,17 +5567,16 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_level)
sysfs_put(mddev->sysfs_level);
- if (mddev->gendisk)
+ if (mddev->gendisk) {
del_gendisk(mddev->gendisk);
- if (mddev->queue)
- blk_cleanup_queue(mddev->queue);
- if (mddev->gendisk)
- put_disk(mddev->gendisk);
+ blk_cleanup_disk(mddev->gendisk);
+ }
percpu_ref_exit(&mddev->writes_pending);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
- mempool_exit(&mddev->md_io_pool);
+ if (mddev->level != 1 && mddev->level != 10)
+ bioset_exit(&mddev->io_acct_set);
kfree(mddev);
}
@@ -5705,26 +5673,14 @@ static int md_alloc(dev_t dev, char *name)
*/
mddev->hold_active = UNTIL_STOP;
- error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
- sizeof(struct md_io));
- if (error)
- goto abort;
-
error = -ENOMEM;
- mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
- if (!mddev->queue)
+ disk = blk_alloc_disk(NUMA_NO_NODE);
+ if (!disk)
goto abort;
- blk_set_stacking_limits(&mddev->queue->limits);
-
- disk = alloc_disk(1 << shift);
- if (!disk) {
- blk_cleanup_queue(mddev->queue);
- mddev->queue = NULL;
- goto abort;
- }
disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift;
+ disk->minors = 1 << shift;
if (name)
strcpy(disk->disk_name, name);
else if (partitioned)
@@ -5733,7 +5689,9 @@ static int md_alloc(dev_t dev, char *name)
sprintf(disk->disk_name, "md%d", unit);
disk->fops = &md_fops;
disk->private_data = mddev;
- disk->queue = mddev->queue;
+
+ mddev->queue = disk->queue;
+ blk_set_stacking_limits(&mddev->queue->limits);
blk_queue_write_cache(mddev->queue, true, true);
/* Allow extended partitions. This makes the
* 'mdp' device redundant, but we can't really
@@ -5907,7 +5865,14 @@ int md_run(struct mddev *mddev)
if (!bioset_initialized(&mddev->sync_set)) {
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err)
- return err;
+ goto exit_bio_set;
+ }
+ if (mddev->level != 1 && mddev->level != 10 &&
+ !bioset_initialized(&mddev->io_acct_set)) {
+ err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
+ offsetof(struct md_io_acct, bio_clone), 0);
+ if (err)
+ goto exit_sync_set;
}
spin_lock(&pers_lock);
@@ -6035,6 +6000,7 @@ int md_run(struct mddev *mddev)
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
else
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
+ blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
@@ -6084,8 +6050,12 @@ bitmap_abort:
module_put(pers->owner);
md_bitmap_destroy(mddev);
abort:
- bioset_exit(&mddev->bio_set);
+ if (mddev->level != 1 && mddev->level != 10)
+ bioset_exit(&mddev->io_acct_set);
+exit_sync_set:
bioset_exit(&mddev->sync_set);
+exit_bio_set:
+ bioset_exit(&mddev->bio_set);
return err;
}
EXPORT_SYMBOL_GPL(md_run);
@@ -6309,6 +6279,8 @@ void md_stop(struct mddev *mddev)
__md_stop(mddev);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
+ if (mddev->level != 1 && mddev->level != 10)
+ bioset_exit(&mddev->io_acct_set);
}
EXPORT_SYMBOL_GPL(md_stop);
@@ -8613,6 +8585,41 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
+static void md_end_io_acct(struct bio *bio)
+{
+ struct md_io_acct *md_io_acct = bio->bi_private;
+ struct bio *orig_bio = md_io_acct->orig_bio;
+
+ orig_bio->bi_status = bio->bi_status;
+
+ bio_end_io_acct(orig_bio, md_io_acct->start_time);
+ bio_put(bio);
+ bio_endio(orig_bio);
+}
+
+/*
+ * Used by personalities that don't already clone the bio and thus can't
+ * easily add the timestamp to their extended bio structure.
+ */
+void md_account_bio(struct mddev *mddev, struct bio **bio)
+{
+ struct md_io_acct *md_io_acct;
+ struct bio *clone;
+
+ if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue))
+ return;
+
+ clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
+ md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
+ md_io_acct->orig_bio = *bio;
+ md_io_acct->start_time = bio_start_io_acct(*bio);
+
+ clone->bi_end_io = md_end_io_acct;
+ clone->bi_private = md_io_acct;
+ *bio = clone;
+}
+EXPORT_SYMBOL_GPL(md_account_bio);
+
/* md_allow_write(mddev)
* Calling this ensures that the array is marked 'active' so that writes
* may proceed without blocking. It is important to call this before
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fb7eab58cfd5..832547cf038f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -395,10 +395,10 @@ struct mddev {
* that we are never stopping an array while it is open.
* 'reconfig_mutex' protects all other reconfiguration.
* These locks are separate due to conflicting interactions
- * with bdev->bd_mutex.
+ * with disk->open_mutex.
* Lock ordering is:
- * reconfig_mutex -> bd_mutex
- * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
+ * reconfig_mutex -> disk->open_mutex
+ * disk->open_mutex -> open_mutex: e.g. __blkdev_get -> md_open
*/
struct mutex open_mutex;
struct mutex reconfig_mutex;
@@ -481,13 +481,13 @@ struct mddev {
atomic_t max_corr_read_errors; /* max read retries */
struct list_head all_mddevs;
- struct attribute_group *to_remove;
+ const struct attribute_group *to_remove;
struct bio_set bio_set;
struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes
*/
- mempool_t md_io_pool;
+ struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */
/* Generic flush handling.
* The last to finish preflush schedules a worker to submit
@@ -613,7 +613,7 @@ struct md_sysfs_entry {
ssize_t (*show)(struct mddev *, char *);
ssize_t (*store)(struct mddev *, const char *, size_t);
};
-extern struct attribute_group md_bitmap_group;
+extern const struct attribute_group md_bitmap_group;
static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
{
@@ -684,6 +684,12 @@ struct md_thread {
void *private;
};
+struct md_io_acct {
+ struct bio *orig_bio;
+ unsigned long start_time;
+ struct bio bio_clone;
+};
+
#define THREAD_WAKEUP 0
static inline void safe_put_page(struct page *p)
@@ -715,6 +721,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio, sector_t start, sector_t size);
+void md_account_bio(struct mddev *mddev, struct bio **bio);
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e5d7411cba9b..62c8b6adac70 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -546,6 +546,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
bio = split;
}
+ if (bio->bi_pool != &mddev->bio_set)
+ md_account_bio(mddev, &bio);
+
orig_sector = sector;
zone = find_zone(mddev->private, &sector);
switch (conf->layout) {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ced076ba560e..51f2547c2007 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -300,6 +300,8 @@ static void call_bio_endio(struct r1bio *r1_bio)
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR;
+ if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ bio_end_io_acct(bio, r1_bio->start_time);
bio_endio(bio);
}
@@ -1210,7 +1212,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
int max_sectors;
int rdisk;
- bool print_msg = !!r1_bio;
+ bool r1bio_existed = !!r1_bio;
char b[BDEVNAME_SIZE];
/*
@@ -1220,7 +1222,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
*/
gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO;
- if (print_msg) {
+ if (r1bio_existed) {
/* Need to get the block device name carefully */
struct md_rdev *rdev;
rcu_read_lock();
@@ -1252,7 +1254,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
if (rdisk < 0) {
/* couldn't find anywhere to read from */
- if (print_msg) {
+ if (r1bio_existed) {
pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n",
mdname(mddev),
b,
@@ -1263,7 +1265,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
}
mirror = conf->mirrors + rdisk;
- if (print_msg)
+ if (r1bio_existed)
pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n",
mdname(mddev),
(unsigned long long)r1_bio->sector,
@@ -1292,6 +1294,9 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
r1_bio->read_disk = rdisk;
+ if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ r1_bio->start_time = bio_start_io_acct(bio);
+
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r1_bio->bios[rdisk] = read_bio;
@@ -1461,6 +1466,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->sectors = max_sectors;
}
+ if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ r1_bio->start_time = bio_start_io_acct(bio);
atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index b7eb09e8c025..ccf10e59b116 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -158,6 +158,7 @@ struct r1bio {
sector_t sector;
int sectors;
unsigned long state;
+ unsigned long start_time;
struct mddev *mddev;
/*
* original bio going to /dev/mdx
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 13f5e6b2a73d..16977e8e075d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -297,6 +297,8 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
bio->bi_status = BLK_STS_IOERR;
+ if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ bio_end_io_acct(bio, r10_bio->start_time);
bio_endio(bio);
/*
* Wake up any possible resync thread that waits for the device
@@ -1184,6 +1186,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
slot = r10_bio->read_slot;
+ if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ r10_bio->start_time = bio_start_io_acct(bio);
read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
r10_bio->devs[slot].bio = read_bio;
@@ -1483,6 +1487,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->master_bio = bio;
}
+ if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ r10_bio->start_time = bio_start_io_acct(bio);
atomic_set(&r10_bio->remaining, 1);
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 1461fd55311b..c34bb196790e 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -124,6 +124,7 @@ struct r10bio {
sector_t sector; /* virtual sector number */
int sectors;
unsigned long state;
+ unsigned long start_time;
struct mddev *mddev;
/*
* original bio going to /dev/mdx
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 841e1c1aa5e6..b8436e4930ed 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5311,8 +5311,6 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
unsigned int chunk_sectors;
unsigned int bio_sectors = bio_sectors(bio);
- WARN_ON_ONCE(bio->bi_bdev->bd_partno);
-
chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
return chunk_sectors >=
((sector & (chunk_sectors - 1)) + bio_sectors);
@@ -5364,11 +5362,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
*/
static void raid5_align_endio(struct bio *bi)
{
- struct bio* raid_bi = bi->bi_private;
+ struct md_io_acct *md_io_acct = bi->bi_private;
+ struct bio *raid_bi = md_io_acct->orig_bio;
struct mddev *mddev;
struct r5conf *conf;
struct md_rdev *rdev;
blk_status_t error = bi->bi_status;
+ unsigned long start_time = md_io_acct->start_time;
bio_put(bi);
@@ -5380,6 +5380,8 @@ static void raid5_align_endio(struct bio *bi)
rdev_dec_pending(rdev, conf->mddev);
if (!error) {
+ if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
+ bio_end_io_acct(raid_bi, start_time);
bio_endio(raid_bi);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent);
@@ -5398,6 +5400,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct md_rdev *rdev;
sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx;
+ struct md_io_acct *md_io_acct;
+ bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) {
pr_debug("%s: non aligned\n", __func__);
@@ -5427,29 +5431,46 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
- bio_set_dev(align_bio, rdev->bdev);
- align_bio->bi_end_io = raid5_align_endio;
- align_bio->bi_private = raid_bio;
- align_bio->bi_iter.bi_sector = sector;
-
- raid_bio->bi_next = (void *)rdev;
-
- if (is_badblock(rdev, sector, bio_sectors(align_bio), &first_bad,
+ if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad,
&bad_sectors)) {
- bio_put(align_bio);
+ bio_put(raid_bio);
rdev_dec_pending(rdev, mddev);
return 0;
}
+ align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set);
+ md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
+ raid_bio->bi_next = (void *)rdev;
+ if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
+ md_io_acct->start_time = bio_start_io_acct(raid_bio);
+ md_io_acct->orig_bio = raid_bio;
+
+ bio_set_dev(align_bio, rdev->bdev);
+ align_bio->bi_end_io = raid5_align_endio;
+ align_bio->bi_private = md_io_acct;
+ align_bio->bi_iter.bi_sector = sector;
+
/* No reshape active, so we can trust rdev->data_offset */
align_bio->bi_iter.bi_sector += rdev->data_offset;
- spin_lock_irq(&conf->device_lock);
- wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0,
- conf->device_lock);
- atomic_inc(&conf->active_aligned_reads);
- spin_unlock_irq(&conf->device_lock);
+ did_inc = false;
+ if (conf->quiesce == 0) {
+ atomic_inc(&conf->active_aligned_reads);
+ did_inc = true;
+ }
+ /* need a memory barrier to detect the race with raid5_quiesce() */
+ if (!did_inc || smp_load_acquire(&conf->quiesce) != 0) {
+ /* quiesce is in progress, so we need to undo io activation and wait
+ * for it to finish
+ */
+ if (did_inc && atomic_dec_and_test(&conf->active_aligned_reads))
+ wake_up(&conf->wait_for_quiescent);
+ spin_lock_irq(&conf->device_lock);
+ wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0,
+ conf->device_lock);
+ atomic_inc(&conf->active_aligned_reads);
+ spin_unlock_irq(&conf->device_lock);
+ }
if (mddev->gendisk)
trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk),
@@ -5798,6 +5819,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
last_sector = bio_end_sector(bi);
bi->bi_next = NULL;
+ md_account_bio(mddev, &bi);
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
int previous;
@@ -6930,7 +6952,7 @@ static struct attribute *raid5_attrs[] = {
&ppl_write_hint.attr,
NULL,
};
-static struct attribute_group raid5_attrs_group = {
+static const struct attribute_group raid5_attrs_group = {
.name = NULL,
.attrs = raid5_attrs,
};
@@ -8336,7 +8358,10 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce)
* active stripes can drain
*/
r5c_flush_cache(conf, INT_MAX);
- conf->quiesce = 2;
+ /* need a memory barrier to make sure read_one_chunk() sees
+ * quiesce started and reverts to slow (locked) path.
+ */
+ smp_store_release(&conf->quiesce, 2);
wait_event_cmd(conf->wait_for_quiescent,
atomic_read(&conf->active_stripes) == 0 &&
atomic_read(&conf->active_aligned_reads) == 0,