From 94e8c95ccba8bc25b6385b8c2ba1b9cd90e86de6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 13 Nov 2020 20:51:28 +0800 Subject: btrfs: add structure to keep track of extent range in end_bio_extent_readpage In end_bio_extent_readpage() we had a strange dance around extent_start/extent_len. Hidden behind the strange dance is, it's just calling endio_readpage_release_extent() on each bvec range. Here is an example to explain the original work flow: Bio is for inode 257, containing 2 pages, for range [1M, 1M+8K) end_bio_extent_extent_readpage() entered |- extent_start = 0; |- extent_end = 0; |- bio_for_each_segment_all() { | |- /* Got the 1st bvec */ | |- start = SZ_1M; | |- end = SZ_1M + SZ_4K - 1; | |- update = 1; | |- if (extent_len == 0) { | | |- extent_start = start; /* SZ_1M */ | | |- extent_len = end + 1 - start; /* SZ_1M */ | | } | | | |- /* Got the 2nd bvec */ | |- start = SZ_1M + 4K; | |- end = SZ_1M + 4K - 1; | |- update = 1; | |- if (extent_start + extent_len == start) { | | |- extent_len += end + 1 - start; /* SZ_8K */ | | } | } /* All bio vec iterated */ | |- if (extent_len) { |- endio_readpage_release_extent(tree, extent_start, extent_len, update); /* extent_start == SZ_1M, extent_len == SZ_8K, uptodate = 1 */ As the above flow shows, the existing code in end_bio_extent_readpage() is accumulates extent_start/extent_len, and when the contiguous range stops, calls endio_readpage_release_extent() for the range. However current behavior has something not really considered: - The inode can change For bio, its pages don't need to have contiguous page_offset. This means, even pages from different inodes can be packed into one bio. - bvec cross page boundary There is a feature called multi-page bvec, where bvec->bv_len can go beyond bvec->bv_page boundary. - Poor readability This patch will address the problem: - Introduce a proper structure, processed_extent, to record processed extent range - Integrate inode/start/end/uptodate check into endio_readpage_release_extent() - Add more comment on each step. This should greatly improve the readability, now in end_bio_extent_readpage() there are only two endio_readpage_release_extent() calls. - Add inode check for contiguity Now we also ensure the inode is the same one before checking if the range is contiguous. Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 107 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 34 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 07db1af9d7d5..519f74502a3e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2769,16 +2769,77 @@ static void end_bio_extent_writepage(struct bio *bio) bio_put(bio); } -static void -endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, - int uptodate) +/* + * Record previously processed extent range + * + * For endio_readpage_release_extent() to handle a full extent range, reducing + * the extent io operations. + */ +struct processed_extent { + struct btrfs_inode *inode; + /* Start of the range in @inode */ + u64 start; + /* End of the range in in @inode */ + u64 end; + bool uptodate; +}; + +/* + * Try to release processed extent range + * + * May not release the extent range right now if the current range is + * contiguous to processed extent. + * + * Will release processed extent when any of @inode, @uptodate, the range is + * no longer contiguous to the processed range. + * + * Passing @inode == NULL will force processed extent to be released. + */ +static void endio_readpage_release_extent(struct processed_extent *processed, + struct btrfs_inode *inode, u64 start, u64 end, + bool uptodate) { struct extent_state *cached = NULL; - u64 end = start + len - 1; + struct extent_io_tree *tree; + + /* The first extent, initialize @processed */ + if (!processed->inode) + goto update; - if (uptodate && tree->track_uptodate) - set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); - unlock_extent_cached_atomic(tree, start, end, &cached); + /* + * Contiguous to processed extent, just uptodate the end. + * + * Several things to notice: + * + * - bio can be merged as long as on-disk bytenr is contiguous + * This means we can have page belonging to other inodes, thus need to + * check if the inode still matches. + * - bvec can contain range beyond current page for multi-page bvec + * Thus we need to do processed->end + 1 >= start check + */ + if (processed->inode == inode && processed->uptodate == uptodate && + processed->end + 1 >= start && end >= processed->end) { + processed->end = end; + return; + } + + tree = &processed->inode->io_tree; + /* + * Now we don't have range contiguous to the processed range, release + * the processed range now. + */ + if (processed->uptodate && tree->track_uptodate) + set_extent_uptodate(tree, processed->start, processed->end, + &cached, GFP_ATOMIC); + unlock_extent_cached_atomic(tree, processed->start, processed->end, + &cached); + +update: + /* Update processed to current range */ + processed->inode = inode; + processed->start = start; + processed->end = end; + processed->uptodate = uptodate; } /* @@ -2798,12 +2859,11 @@ static void end_bio_extent_readpage(struct bio *bio) int uptodate = !bio->bi_status; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree, *failure_tree; + struct processed_extent processed = { 0 }; u64 offset = 0; u64 start; u64 end; u64 len; - u64 extent_start = 0; - u64 extent_len = 0; int mirror; int ret; struct bvec_iter_all iter_all; @@ -2912,32 +2972,11 @@ readpage_ok: unlock_page(page); offset += len; - if (unlikely(!uptodate)) { - if (extent_len) { - endio_readpage_release_extent(tree, - extent_start, - extent_len, 1); - extent_start = 0; - extent_len = 0; - } - endio_readpage_release_extent(tree, start, - end - start + 1, 0); - } else if (!extent_len) { - extent_start = start; - extent_len = end + 1 - start; - } else if (extent_start + extent_len == start) { - extent_len += end + 1 - start; - } else { - endio_readpage_release_extent(tree, extent_start, - extent_len, uptodate); - extent_start = start; - extent_len = end + 1 - start; - } + endio_readpage_release_extent(&processed, BTRFS_I(inode), + start, end, uptodate); } - - if (extent_len) - endio_readpage_release_extent(tree, extent_start, extent_len, - uptodate); + /* Release the last extent */ + endio_readpage_release_extent(&processed, NULL, 0, 0, false); btrfs_io_bio_free_csum(io_bio); bio_put(bio); } -- cgit v1.2.3