diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 6 | ||||
-rw-r--r-- | drivers/md/bcache/bcache.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 20 | ||||
-rw-r--r-- | drivers/md/bcache/stats.c | 14 | ||||
-rw-r--r-- | drivers/md/bcache/stats.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 15 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 9 | ||||
-rw-r--r-- | drivers/md/dm.c | 22 | ||||
-rw-r--r-- | drivers/md/md-bitmap.c | 2 | ||||
-rw-r--r-- | drivers/md/md-faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/md-linear.c | 2 | ||||
-rw-r--r-- | drivers/md/md-multipath.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 141 | ||||
-rw-r--r-- | drivers/md/md.h | 19 | ||||
-rw-r--r-- | drivers/md/raid0.c | 3 | ||||
-rw-r--r-- | drivers/md/raid1.c | 15 | ||||
-rw-r--r-- | drivers/md/raid1.h | 1 | ||||
-rw-r--r-- | drivers/md/raid10.c | 6 | ||||
-rw-r--r-- | drivers/md/raid10.h | 1 | ||||
-rw-r--r-- | drivers/md/raid5.c | 65 |
21 files changed, 185 insertions, 166 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index f2014385d48b..0602e82a9516 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -47,7 +47,7 @@ config MD_AUTODETECT If unsure, say Y. config MD_LINEAR - tristate "Linear (append) mode" + tristate "Linear (append) mode (deprecated)" depends on BLK_DEV_MD help If you say Y here, then your multiple devices driver will be able to @@ -158,7 +158,7 @@ config MD_RAID456 If unsure, say Y. config MD_MULTIPATH - tristate "Multipath I/O support" + tristate "Multipath I/O support (deprecated)" depends on BLK_DEV_MD help MD_MULTIPATH provides a simple multi-path personality for use @@ -169,7 +169,7 @@ config MD_MULTIPATH If unsure, say N. config MD_FAULTY - tristate "Faulty test module for MD" + tristate "Faulty test module for MD (deprecated)" depends on BLK_DEV_MD help The "faulty" module allows for a block device that occasionally returns diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 0a4551e165ab..5fc989a6d452 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -364,7 +364,6 @@ struct cached_dev { /* The rest of this all shows up in sysfs */ unsigned int sequential_cutoff; - unsigned int readahead; unsigned int io_disable:1; unsigned int verify:1; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 29c231758293..6d1de889baeb 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -880,9 +880,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct bio *bio, unsigned int sectors) { int ret = MAP_CONTINUE; - unsigned int reada = 0; struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); struct bio *miss, *cache_bio; + unsigned int size_limit; s->cache_missed = 1; @@ -892,14 +892,10 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_submit; } - if (!(bio->bi_opf & REQ_RAHEAD) && - !(bio->bi_opf & (REQ_META|REQ_PRIO)) && - s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) - reada = min_t(sector_t, dc->readahead >> 9, - get_capacity(bio->bi_bdev->bd_disk) - - bio_end_sector(bio)); - - s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); + /* Limitation for valid replace key size and cache_bio bvecs number */ + size_limit = min_t(unsigned int, BIO_MAX_VECS * PAGE_SECTORS, + (1 << KEY_SIZE_BITS) - 1); + s->insert_bio_sectors = min3(size_limit, sectors, bio_sectors(bio)); s->iop.replace_key = KEY(s->iop.inode, bio->bi_iter.bi_sector + s->insert_bio_sectors, @@ -911,7 +907,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->iop.replace = true; - miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split); + miss = bio_next_split(bio, s->insert_bio_sectors, GFP_NOIO, + &s->d->bio_split); /* btree_search_recurse()'s btree iterator is no good anymore */ ret = miss == bio ? MAP_DONE : -EINTR; @@ -933,9 +930,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, if (bch_bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO)) goto out_put; - if (reada) - bch_mark_cache_readahead(s->iop.c, s->d); - s->cache_miss = miss; s->iop.bio = cache_bio; bio_get(cache_bio); diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index 503aafe188dc..4c7ee5fedb9d 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -46,7 +46,6 @@ read_attribute(cache_misses); read_attribute(cache_bypass_hits); read_attribute(cache_bypass_misses); read_attribute(cache_hit_ratio); -read_attribute(cache_readaheads); read_attribute(cache_miss_collisions); read_attribute(bypassed); @@ -64,7 +63,6 @@ SHOW(bch_stats) DIV_SAFE(var(cache_hits) * 100, var(cache_hits) + var(cache_misses))); - var_print(cache_readaheads); var_print(cache_miss_collisions); sysfs_hprint(bypassed, var(sectors_bypassed) << 9); #undef var @@ -86,7 +84,6 @@ static struct attribute *bch_stats_files[] = { &sysfs_cache_bypass_hits, &sysfs_cache_bypass_misses, &sysfs_cache_hit_ratio, - &sysfs_cache_readaheads, &sysfs_cache_miss_collisions, &sysfs_bypassed, NULL @@ -113,7 +110,6 @@ void bch_cache_accounting_clear(struct cache_accounting *acc) acc->total.cache_misses = 0; acc->total.cache_bypass_hits = 0; acc->total.cache_bypass_misses = 0; - acc->total.cache_readaheads = 0; acc->total.cache_miss_collisions = 0; acc->total.sectors_bypassed = 0; } @@ -145,7 +141,6 @@ static void scale_stats(struct cache_stats *stats, unsigned long rescale_at) scale_stat(&stats->cache_misses); scale_stat(&stats->cache_bypass_hits); scale_stat(&stats->cache_bypass_misses); - scale_stat(&stats->cache_readaheads); scale_stat(&stats->cache_miss_collisions); scale_stat(&stats->sectors_bypassed); } @@ -168,7 +163,6 @@ static void scale_accounting(struct timer_list *t) move_stat(cache_misses); move_stat(cache_bypass_hits); move_stat(cache_bypass_misses); - move_stat(cache_readaheads); move_stat(cache_miss_collisions); move_stat(sectors_bypassed); @@ -209,14 +203,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, mark_cache_stats(&c->accounting.collector, hit, bypass); } -void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d) -{ - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - - atomic_inc(&dc->accounting.collector.cache_readaheads); - atomic_inc(&c->accounting.collector.cache_readaheads); -} - void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d) { struct cached_dev *dc = container_of(d, struct cached_dev, disk); diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h index abfaabf7e7fc..ca4f435f7216 100644 --- a/drivers/md/bcache/stats.h +++ b/drivers/md/bcache/stats.h @@ -7,7 +7,6 @@ struct cache_stat_collector { atomic_t cache_misses; atomic_t cache_bypass_hits; atomic_t cache_bypass_misses; - atomic_t cache_readaheads; atomic_t cache_miss_collisions; atomic_t sectors_bypassed; }; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index bea8c4429ae8..185246a0d855 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -890,13 +890,9 @@ static void bcache_device_free(struct bcache_device *d) if (disk_added) del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); - + blk_cleanup_disk(disk); ida_simple_remove(&bcache_device_idx, first_minor_to_idx(disk->first_minor)); - if (disk_added) - put_disk(disk); } bioset_exit(&d->bio_split); @@ -946,7 +942,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) goto err; - d->disk = alloc_disk(BCACHE_MINORS); + d->disk = blk_alloc_disk(NUMA_NO_NODE); if (!d->disk) goto err; @@ -955,14 +951,11 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->disk->major = bcache_major; d->disk->first_minor = idx_to_first_minor(idx); + d->disk->minors = BCACHE_MINORS; d->disk->fops = ops; d->disk->private_data = d; - q = blk_alloc_queue(NUMA_NO_NODE); - if (!q) - return -ENOMEM; - - d->disk->queue = q; + q = d->disk->queue; q->limits.max_hw_sectors = UINT_MAX; q->limits.max_sectors = UINT_MAX; q->limits.max_segment_size = UINT_MAX; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index cc89f3156d1a..05ac1d6fbbf3 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -137,7 +137,6 @@ rw_attribute(io_disable); rw_attribute(discard); rw_attribute(running); rw_attribute(label); -rw_attribute(readahead); rw_attribute(errors); rw_attribute(io_error_limit); rw_attribute(io_error_halflife); @@ -260,7 +259,6 @@ SHOW(__bch_cached_dev) var_printf(partial_stripes_expensive, "%u"); var_hprint(sequential_cutoff); - var_hprint(readahead); sysfs_print(running, atomic_read(&dc->running)); sysfs_print(state, states[BDEV_STATE(&dc->sb)]); @@ -365,7 +363,6 @@ STORE(__cached_dev) sysfs_strtoul_clamp(sequential_cutoff, dc->sequential_cutoff, 0, UINT_MAX); - d_strtoi_h(readahead); if (attr == &sysfs_clear_stats) bch_cache_accounting_clear(&dc->accounting); @@ -538,7 +535,6 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_running, &sysfs_state, &sysfs_label, - &sysfs_readahead, #ifdef CONFIG_BCACHE_DEBUG &sysfs_verify, &sysfs_bypass_torture_test, diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 9c3bc3711b33..0dbd48cbdff9 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -530,7 +530,6 @@ static const struct blk_mq_ops dm_mq_ops = { int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) { - struct request_queue *q; struct dm_target *immutable_tgt; int err; @@ -557,12 +556,10 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) if (err) goto out_kfree_tag_set; - q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true); - if (IS_ERR(q)) { - err = PTR_ERR(q); + err = blk_mq_init_allocated_queue(md->tag_set, md->queue); + if (err) goto out_tag_set; - } - + elevator_init_mq(md->queue); return 0; out_tag_set: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 420a12b42708..2c5f9e585211 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1694,13 +1694,13 @@ static void cleanup_mapped_device(struct mapped_device *md) md->disk->private_data = NULL; spin_unlock(&_minor_lock); del_gendisk(md->disk); - put_disk(md->disk); } - if (md->queue) { + if (md->queue) dm_queue_destroy_keyslot_manager(md->queue); - blk_cleanup_queue(md->queue); - } + + if (md->disk) + blk_cleanup_disk(md->disk); cleanup_srcu_struct(&md->io_barrier); @@ -1763,13 +1763,10 @@ static struct mapped_device *alloc_dev(int minor) * established. If request-based table is loaded: blk-mq will * override accordingly. */ - md->queue = blk_alloc_queue(numa_node_id); - if (!md->queue) - goto bad; - - md->disk = alloc_disk_node(1, md->numa_node_id); + md->disk = blk_alloc_disk(md->numa_node_id); if (!md->disk) goto bad; + md->queue = md->disk->queue; init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); @@ -1782,6 +1779,7 @@ static struct mapped_device *alloc_dev(int minor) md->disk->major = _major; md->disk->first_minor = minor; + md->disk->minors = 1; md->disk->fops = &dm_blk_dops; md->disk->queue = md->queue; md->disk->private_data = md; @@ -2230,7 +2228,7 @@ static bool md_in_flight_bios(struct mapped_device *md) return sum != 0; } -static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state) +static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state) { int r = 0; DEFINE_WAIT(wait); @@ -2253,7 +2251,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state return r; } -static int dm_wait_for_completion(struct mapped_device *md, long task_state) +static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state) { int r = 0; @@ -2380,7 +2378,7 @@ static void unlock_fs(struct mapped_device *md) * are being added to md->deferred list. */ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, - unsigned suspend_flags, long task_state, + unsigned suspend_flags, unsigned int task_state, int dmf_suspended_flag) { bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index ea3130e11680..e29c6298ef5c 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2616,7 +2616,7 @@ static struct attribute *md_bitmap_attrs[] = { &max_backlog_used.attr, NULL }; -struct attribute_group md_bitmap_group = { +const struct attribute_group md_bitmap_group = { .name = "bitmap", .attrs = md_bitmap_attrs, }; diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c index fda4cb3f936f..c0dc6f2ef4a3 100644 --- a/drivers/md/md-faulty.c +++ b/drivers/md/md-faulty.c @@ -357,7 +357,7 @@ static void raid_exit(void) module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Fault injection personality for MD"); +MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)"); MODULE_ALIAS("md-personality-10"); /* faulty */ MODULE_ALIAS("md-faulty"); MODULE_ALIAS("md-level--5"); diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 63ed8329a98d..1ff51647a682 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -312,7 +312,7 @@ static void linear_exit (void) module_init(linear_init); module_exit(linear_exit); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Linear device concatenation personality for MD"); +MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)"); MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ MODULE_ALIAS("md-linear"); MODULE_ALIAS("md-level--1"); diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 776bbe542db5..e7d6486f090f 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -471,7 +471,7 @@ static void __exit multipath_exit (void) module_init(multipath_init); module_exit(multipath_exit); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("simple multi-path personality for MD"); +MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)"); MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ MODULE_ALIAS("md-multipath"); MODULE_ALIAS("md-level--4"); diff --git a/drivers/md/md.c b/drivers/md/md.c index 49f897fbb89b..ae8fe54ea358 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -441,30 +441,6 @@ check_suspended: } EXPORT_SYMBOL(md_handle_request); -struct md_io { - struct mddev *mddev; - bio_end_io_t *orig_bi_end_io; - void *orig_bi_private; - struct block_device *orig_bi_bdev; - unsigned long start_time; -}; - -static void md_end_io(struct bio *bio) -{ - struct md_io *md_io = bio->bi_private; - struct mddev *mddev = md_io->mddev; - - bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev); - - bio->bi_end_io = md_io->orig_bi_end_io; - bio->bi_private = md_io->orig_bi_private; - - mempool_free(md_io, &mddev->md_io_pool); - - if (bio->bi_end_io) - bio->bi_end_io(bio); -} - static blk_qc_t md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); @@ -489,21 +465,6 @@ static blk_qc_t md_submit_bio(struct bio *bio) return BLK_QC_T_NONE; } - if (bio->bi_end_io != md_end_io) { - struct md_io *md_io; - - md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO); - md_io->mddev = mddev; - md_io->orig_bi_end_io = bio->bi_end_io; - md_io->orig_bi_private = bio->bi_private; - md_io->orig_bi_bdev = bio->bi_bdev; - - bio->bi_end_io = md_end_io; - bio->bi_private = md_io; - - md_io->start_time = bio_start_io_acct(bio); - } - /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; @@ -824,7 +785,7 @@ out_free_new: return ERR_PTR(error); } -static struct attribute_group md_redundancy_group; +static const struct attribute_group md_redundancy_group; void mddev_unlock(struct mddev *mddev) { @@ -841,7 +802,7 @@ void mddev_unlock(struct mddev *mddev) * test it under the same mutex to ensure its correct value * is seen. */ - struct attribute_group *to_remove = mddev->to_remove; + const struct attribute_group *to_remove = mddev->to_remove; mddev->to_remove = NULL; mddev->sysfs_active = 1; mutex_unlock(&mddev->reconfig_mutex); @@ -2379,7 +2340,15 @@ int md_integrity_register(struct mddev *mddev) bdev_get_integrity(reference->bdev)); pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); - if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) { + if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || + (mddev->level != 1 && mddev->level != 10 && + bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) { + /* + * No need to handle the failure of bioset_integrity_create, + * because the function is called by md_run() -> pers->run(), + * md_run calls bioset_exit -> bioset_integrity_free in case + * of failure case. + */ pr_err("md: failed to create integrity pool for %s\n", mdname(mddev)); return -EINVAL; @@ -5538,7 +5507,7 @@ static struct attribute *md_redundancy_attrs[] = { &md_degraded.attr, NULL, }; -static struct attribute_group md_redundancy_group = { +static const struct attribute_group md_redundancy_group = { .name = NULL, .attrs = md_redundancy_attrs, }; @@ -5598,17 +5567,16 @@ static void md_free(struct kobject *ko) if (mddev->sysfs_level) sysfs_put(mddev->sysfs_level); - if (mddev->gendisk) + if (mddev->gendisk) { del_gendisk(mddev->gendisk); - if (mddev->queue) - blk_cleanup_queue(mddev->queue); - if (mddev->gendisk) - put_disk(mddev->gendisk); + blk_cleanup_disk(mddev->gendisk); + } percpu_ref_exit(&mddev->writes_pending); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); - mempool_exit(&mddev->md_io_pool); + if (mddev->level != 1 && mddev->level != 10) + bioset_exit(&mddev->io_acct_set); kfree(mddev); } @@ -5705,26 +5673,14 @@ static int md_alloc(dev_t dev, char *name) */ mddev->hold_active = UNTIL_STOP; - error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE, - sizeof(struct md_io)); - if (error) - goto abort; - error = -ENOMEM; - mddev->queue = blk_alloc_queue(NUMA_NO_NODE); - if (!mddev->queue) + disk = blk_alloc_disk(NUMA_NO_NODE); + if (!disk) goto abort; - blk_set_stacking_limits(&mddev->queue->limits); - - disk = alloc_disk(1 << shift); - if (!disk) { - blk_cleanup_queue(mddev->queue); - mddev->queue = NULL; - goto abort; - } disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; + disk->minors = 1 << shift; if (name) strcpy(disk->disk_name, name); else if (partitioned) @@ -5733,7 +5689,9 @@ static int md_alloc(dev_t dev, char *name) sprintf(disk->disk_name, "md%d", unit); disk->fops = &md_fops; disk->private_data = mddev; - disk->queue = mddev->queue; + + mddev->queue = disk->queue; + blk_set_stacking_limits(&mddev->queue->limits); blk_queue_write_cache(mddev->queue, true, true); /* Allow extended partitions. This makes the * 'mdp' device redundant, but we can't really @@ -5907,7 +5865,14 @@ int md_run(struct mddev *mddev) if (!bioset_initialized(&mddev->sync_set)) { err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); if (err) - return err; + goto exit_bio_set; + } + if (mddev->level != 1 && mddev->level != 10 && + !bioset_initialized(&mddev->io_acct_set)) { + err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE, + offsetof(struct md_io_acct, bio_clone), 0); + if (err) + goto exit_sync_set; } spin_lock(&pers_lock); @@ -6035,6 +6000,7 @@ int md_run(struct mddev *mddev) blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue); else blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue); } if (pers->sync_request) { if (mddev->kobj.sd && @@ -6084,8 +6050,12 @@ bitmap_abort: module_put(pers->owner); md_bitmap_destroy(mddev); abort: - bioset_exit(&mddev->bio_set); + if (mddev->level != 1 && mddev->level != 10) + bioset_exit(&mddev->io_acct_set); +exit_sync_set: bioset_exit(&mddev->sync_set); +exit_bio_set: + bioset_exit(&mddev->bio_set); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -6309,6 +6279,8 @@ void md_stop(struct mddev *mddev) __md_stop(mddev); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); + if (mddev->level != 1 && mddev->level != 10) + bioset_exit(&mddev->io_acct_set); } EXPORT_SYMBOL_GPL(md_stop); @@ -8613,6 +8585,41 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, } EXPORT_SYMBOL_GPL(md_submit_discard_bio); +static void md_end_io_acct(struct bio *bio) +{ + struct md_io_acct *md_io_acct = bio->bi_private; + struct bio *orig_bio = md_io_acct->orig_bio; + + orig_bio->bi_status = bio->bi_status; + + bio_end_io_acct(orig_bio, md_io_acct->start_time); + bio_put(bio); + bio_endio(orig_bio); +} + +/* + * Used by personalities that don't already clone the bio and thus can't + * easily add the timestamp to their extended bio structure. + */ +void md_account_bio(struct mddev *mddev, struct bio **bio) +{ + struct md_io_acct *md_io_acct; + struct bio *clone; + + if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue)) + return; + + clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set); + md_io_acct = container_of(clone, struct md_io_acct, bio_clone); + md_io_acct->orig_bio = *bio; + md_io_acct->start_time = bio_start_io_acct(*bio); + + clone->bi_end_io = md_end_io_acct; + clone->bi_private = md_io_acct; + *bio = clone; +} +EXPORT_SYMBOL_GPL(md_account_bio); + /* md_allow_write(mddev) * Calling this ensures that the array is marked 'active' so that writes * may proceed without blocking. It is important to call this before diff --git a/drivers/md/md.h b/drivers/md/md.h index fb7eab58cfd5..832547cf038f 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -395,10 +395,10 @@ struct mddev { * that we are never stopping an array while it is open. * 'reconfig_mutex' protects all other reconfiguration. * These locks are separate due to conflicting interactions - * with bdev->bd_mutex. + * with disk->open_mutex. * Lock ordering is: - * reconfig_mutex -> bd_mutex - * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open + * reconfig_mutex -> disk->open_mutex + * disk->open_mutex -> open_mutex: e.g. __blkdev_get -> md_open */ struct mutex open_mutex; struct mutex reconfig_mutex; @@ -481,13 +481,13 @@ struct mddev { atomic_t max_corr_read_errors; /* max read retries */ struct list_head all_mddevs; - struct attribute_group *to_remove; + const struct attribute_group *to_remove; struct bio_set bio_set; struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ - mempool_t md_io_pool; + struct bio_set io_acct_set; /* for raid0 and raid5 io accounting */ /* Generic flush handling. * The last to finish preflush schedules a worker to submit @@ -613,7 +613,7 @@ struct md_sysfs_entry { ssize_t (*show)(struct mddev *, char *); ssize_t (*store)(struct mddev *, const char *, size_t); }; -extern struct attribute_group md_bitmap_group; +extern const struct attribute_group md_bitmap_group; static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name) { @@ -684,6 +684,12 @@ struct md_thread { void *private; }; +struct md_io_acct { + struct bio *orig_bio; + unsigned long start_time; + struct bio bio_clone; +}; + #define THREAD_WAKEUP 0 static inline void safe_put_page(struct page *p) @@ -715,6 +721,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio, sector_t start, sector_t size); +void md_account_bio(struct mddev *mddev, struct bio **bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e5d7411cba9b..62c8b6adac70 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -546,6 +546,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) bio = split; } + if (bio->bi_pool != &mddev->bio_set) + md_account_bio(mddev, &bio); + orig_sector = sector; zone = find_zone(mddev->private, §or); switch (conf->layout) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ced076ba560e..51f2547c2007 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -300,6 +300,8 @@ static void call_bio_endio(struct r1bio *r1_bio) if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) bio->bi_status = BLK_STS_IOERR; + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + bio_end_io_acct(bio, r1_bio->start_time); bio_endio(bio); } @@ -1210,7 +1212,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); int max_sectors; int rdisk; - bool print_msg = !!r1_bio; + bool r1bio_existed = !!r1_bio; char b[BDEVNAME_SIZE]; /* @@ -1220,7 +1222,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, */ gfp_t gfp = r1_bio ? (GFP_NOIO | __GFP_HIGH) : GFP_NOIO; - if (print_msg) { + if (r1bio_existed) { /* Need to get the block device name carefully */ struct md_rdev *rdev; rcu_read_lock(); @@ -1252,7 +1254,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, if (rdisk < 0) { /* couldn't find anywhere to read from */ - if (print_msg) { + if (r1bio_existed) { pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", mdname(mddev), b, @@ -1263,7 +1265,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, } mirror = conf->mirrors + rdisk; - if (print_msg) + if (r1bio_existed) pr_info_ratelimited("md/raid1:%s: redirecting sector %llu to other mirror: %s\n", mdname(mddev), (unsigned long long)r1_bio->sector, @@ -1292,6 +1294,9 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, r1_bio->read_disk = rdisk; + if (!r1bio_existed && blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + r1_bio->start_time = bio_start_io_acct(bio); + read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); r1_bio->bios[rdisk] = read_bio; @@ -1461,6 +1466,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->sectors = max_sectors; } + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + r1_bio->start_time = bio_start_io_acct(bio); atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->behind_remaining, 0); diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index b7eb09e8c025..ccf10e59b116 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -158,6 +158,7 @@ struct r1bio { sector_t sector; int sectors; unsigned long state; + unsigned long start_time; struct mddev *mddev; /* * original bio going to /dev/mdx diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 13f5e6b2a73d..16977e8e075d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -297,6 +297,8 @@ static void raid_end_bio_io(struct r10bio *r10_bio) if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) bio->bi_status = BLK_STS_IOERR; + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + bio_end_io_acct(bio, r10_bio->start_time); bio_endio(bio); /* * Wake up any possible resync thread that waits for the device @@ -1184,6 +1186,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, } slot = r10_bio->read_slot; + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + r10_bio->start_time = bio_start_io_acct(bio); read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set); r10_bio->devs[slot].bio = read_bio; @@ -1483,6 +1487,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, r10_bio->master_bio = bio; } + if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + r10_bio->start_time = bio_start_io_acct(bio); atomic_set(&r10_bio->remaining, 1); md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 1461fd55311b..c34bb196790e 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -124,6 +124,7 @@ struct r10bio { sector_t sector; /* virtual sector number */ int sectors; unsigned long state; + unsigned long start_time; struct mddev *mddev; /* * original bio going to /dev/mdx diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 841e1c1aa5e6..b8436e4930ed 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5311,8 +5311,6 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) unsigned int chunk_sectors; unsigned int bio_sectors = bio_sectors(bio); - WARN_ON_ONCE(bio->bi_bdev->bd_partno); - chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); return chunk_sectors >= ((sector & (chunk_sectors - 1)) + bio_sectors); @@ -5364,11 +5362,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf, */ static void raid5_align_endio(struct bio *bi) { - struct bio* raid_bi = bi->bi_private; + struct md_io_acct *md_io_acct = bi->bi_private; + struct bio *raid_bi = md_io_acct->orig_bio; struct mddev *mddev; struct r5conf *conf; struct md_rdev *rdev; blk_status_t error = bi->bi_status; + unsigned long start_time = md_io_acct->start_time; bio_put(bi); @@ -5380,6 +5380,8 @@ static void raid5_align_endio(struct bio *bi) rdev_dec_pending(rdev, conf->mddev); if (!error) { + if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue)) + bio_end_io_acct(raid_bi, start_time); bio_endio(raid_bi); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_quiescent); @@ -5398,6 +5400,8 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) struct md_rdev *rdev; sector_t sector, end_sector, first_bad; int bad_sectors, dd_idx; + struct md_io_acct *md_io_acct; + bool did_inc; if (!in_chunk_boundary(mddev, raid_bio)) { pr_debug("%s: non aligned\n", __func__); @@ -5427,29 +5431,46 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); - bio_set_dev(align_bio, rdev->bdev); - align_bio->bi_end_io = raid5_align_endio; - align_bio->bi_private = raid_bio; - align_bio->bi_iter.bi_sector = sector; - - raid_bio->bi_next = (void *)rdev; - - if (is_badblock(rdev, sector, bio_sectors(align_bio), &first_bad, + if (is_badblock(rdev, sector, bio_sectors(raid_bio), &first_bad, &bad_sectors)) { - bio_put(align_bio); + bio_put(raid_bio); rdev_dec_pending(rdev, mddev); return 0; } + align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->io_acct_set); + md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone); + raid_bio->bi_next = (void *)rdev; + if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue)) + md_io_acct->start_time = bio_start_io_acct(raid_bio); + md_io_acct->orig_bio = raid_bio; + + bio_set_dev(align_bio, rdev->bdev); + align_bio->bi_end_io = raid5_align_endio; + align_bio->bi_private = md_io_acct; + align_bio->bi_iter.bi_sector = sector; + /* No reshape active, so we can trust rdev->data_offset */ align_bio->bi_iter.bi_sector += rdev->data_offset; - spin_lock_irq(&conf->device_lock); - wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, - conf->device_lock); - atomic_inc(&conf->active_aligned_reads); - spin_unlock_irq(&conf->device_lock); + did_inc = false; + if (conf->quiesce == 0) { + atomic_inc(&conf->active_aligned_reads); + did_inc = true; + } + /* need a memory barrier to detect the race with raid5_quiesce() */ + if (!did_inc || smp_load_acquire(&conf->quiesce) != 0) { + /* quiesce is in progress, so we need to undo io activation and wait + * for it to finish + */ + if (did_inc && atomic_dec_and_test(&conf->active_aligned_reads)) + wake_up(&conf->wait_for_quiescent); + spin_lock_irq(&conf->device_lock); + wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, + conf->device_lock); + atomic_inc(&conf->active_aligned_reads); + spin_unlock_irq(&conf->device_lock); + } if (mddev->gendisk) trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), @@ -5798,6 +5819,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) last_sector = bio_end_sector(bi); bi->bi_next = NULL; + md_account_bio(mddev, &bi); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { int previous; @@ -6930,7 +6952,7 @@ static struct attribute *raid5_attrs[] = { &ppl_write_hint.attr, NULL, }; -static struct attribute_group raid5_attrs_group = { +static const struct attribute_group raid5_attrs_group = { .name = NULL, .attrs = raid5_attrs, }; @@ -8336,7 +8358,10 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce) * active stripes can drain */ r5c_flush_cache(conf, INT_MAX); - conf->quiesce = 2; + /* need a memory barrier to make sure read_one_chunk() sees + * quiesce started and reverts to slow (locked) path. + */ + smp_store_release(&conf->quiesce, 2); wait_event_cmd(conf->wait_for_quiescent, atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_aligned_reads) == 0, |