aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
authorGravatar Qu Wenruo <wqu@suse.com> 2020-11-13 20:51:28 +0800
committerGravatar David Sterba <dsterba@suse.com> 2020-12-08 15:54:13 +0100
commit94e8c95ccba8bc25b6385b8c2ba1b9cd90e86de6 (patch)
tree464901848f8e5ed4913277efaf16987f4daae3ae /fs/btrfs
parentbtrfs: tests: remove invalid extent-io test (diff)
downloadlinux-94e8c95ccba8bc25b6385b8c2ba1b9cd90e86de6.tar.gz
linux-94e8c95ccba8bc25b6385b8c2ba1b9cd90e86de6.tar.bz2
linux-94e8c95ccba8bc25b6385b8c2ba1b9cd90e86de6.zip
btrfs: add structure to keep track of extent range in end_bio_extent_readpage
In end_bio_extent_readpage() we had a strange dance around extent_start/extent_len. Hidden behind the strange dance is, it's just calling endio_readpage_release_extent() on each bvec range. Here is an example to explain the original work flow: Bio is for inode 257, containing 2 pages, for range [1M, 1M+8K) end_bio_extent_extent_readpage() entered |- extent_start = 0; |- extent_end = 0; |- bio_for_each_segment_all() { | |- /* Got the 1st bvec */ | |- start = SZ_1M; | |- end = SZ_1M + SZ_4K - 1; | |- update = 1; | |- if (extent_len == 0) { | | |- extent_start = start; /* SZ_1M */ | | |- extent_len = end + 1 - start; /* SZ_1M */ | | } | | | |- /* Got the 2nd bvec */ | |- start = SZ_1M + 4K; | |- end = SZ_1M + 4K - 1; | |- update = 1; | |- if (extent_start + extent_len == start) { | | |- extent_len += end + 1 - start; /* SZ_8K */ | | } | } /* All bio vec iterated */ | |- if (extent_len) { |- endio_readpage_release_extent(tree, extent_start, extent_len, update); /* extent_start == SZ_1M, extent_len == SZ_8K, uptodate = 1 */ As the above flow shows, the existing code in end_bio_extent_readpage() is accumulates extent_start/extent_len, and when the contiguous range stops, calls endio_readpage_release_extent() for the range. However current behavior has something not really considered: - The inode can change For bio, its pages don't need to have contiguous page_offset. This means, even pages from different inodes can be packed into one bio. - bvec cross page boundary There is a feature called multi-page bvec, where bvec->bv_len can go beyond bvec->bv_page boundary. - Poor readability This patch will address the problem: - Introduce a proper structure, processed_extent, to record processed extent range - Integrate inode/start/end/uptodate check into endio_readpage_release_extent() - Add more comment on each step. This should greatly improve the readability, now in end_bio_extent_readpage() there are only two endio_readpage_release_extent() calls. - Add inode check for contiguity Now we also ensure the inode is the same one before checking if the range is contiguous. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent_io.c107
1 files changed, 73 insertions, 34 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 07db1af9d7d5..519f74502a3e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2769,16 +2769,77 @@ static void end_bio_extent_writepage(struct bio *bio)
bio_put(bio);
}
-static void
-endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
- int uptodate)
+/*
+ * Record previously processed extent range
+ *
+ * For endio_readpage_release_extent() to handle a full extent range, reducing
+ * the extent io operations.
+ */
+struct processed_extent {
+ struct btrfs_inode *inode;
+ /* Start of the range in @inode */
+ u64 start;
+ /* End of the range in in @inode */
+ u64 end;
+ bool uptodate;
+};
+
+/*
+ * Try to release processed extent range
+ *
+ * May not release the extent range right now if the current range is
+ * contiguous to processed extent.
+ *
+ * Will release processed extent when any of @inode, @uptodate, the range is
+ * no longer contiguous to the processed range.
+ *
+ * Passing @inode == NULL will force processed extent to be released.
+ */
+static void endio_readpage_release_extent(struct processed_extent *processed,
+ struct btrfs_inode *inode, u64 start, u64 end,
+ bool uptodate)
{
struct extent_state *cached = NULL;
- u64 end = start + len - 1;
+ struct extent_io_tree *tree;
+
+ /* The first extent, initialize @processed */
+ if (!processed->inode)
+ goto update;
- if (uptodate && tree->track_uptodate)
- set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
- unlock_extent_cached_atomic(tree, start, end, &cached);
+ /*
+ * Contiguous to processed extent, just uptodate the end.
+ *
+ * Several things to notice:
+ *
+ * - bio can be merged as long as on-disk bytenr is contiguous
+ * This means we can have page belonging to other inodes, thus need to
+ * check if the inode still matches.
+ * - bvec can contain range beyond current page for multi-page bvec
+ * Thus we need to do processed->end + 1 >= start check
+ */
+ if (processed->inode == inode && processed->uptodate == uptodate &&
+ processed->end + 1 >= start && end >= processed->end) {
+ processed->end = end;
+ return;
+ }
+
+ tree = &processed->inode->io_tree;
+ /*
+ * Now we don't have range contiguous to the processed range, release
+ * the processed range now.
+ */
+ if (processed->uptodate && tree->track_uptodate)
+ set_extent_uptodate(tree, processed->start, processed->end,
+ &cached, GFP_ATOMIC);
+ unlock_extent_cached_atomic(tree, processed->start, processed->end,
+ &cached);
+
+update:
+ /* Update processed to current range */
+ processed->inode = inode;
+ processed->start = start;
+ processed->end = end;
+ processed->uptodate = uptodate;
}
/*
@@ -2798,12 +2859,11 @@ static void end_bio_extent_readpage(struct bio *bio)
int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree, *failure_tree;
+ struct processed_extent processed = { 0 };
u64 offset = 0;
u64 start;
u64 end;
u64 len;
- u64 extent_start = 0;
- u64 extent_len = 0;
int mirror;
int ret;
struct bvec_iter_all iter_all;
@@ -2912,32 +2972,11 @@ readpage_ok:
unlock_page(page);
offset += len;
- if (unlikely(!uptodate)) {
- if (extent_len) {
- endio_readpage_release_extent(tree,
- extent_start,
- extent_len, 1);
- extent_start = 0;
- extent_len = 0;
- }
- endio_readpage_release_extent(tree, start,
- end - start + 1, 0);
- } else if (!extent_len) {
- extent_start = start;
- extent_len = end + 1 - start;
- } else if (extent_start + extent_len == start) {
- extent_len += end + 1 - start;
- } else {
- endio_readpage_release_extent(tree, extent_start,
- extent_len, uptodate);
- extent_start = start;
- extent_len = end + 1 - start;
- }
+ endio_readpage_release_extent(&processed, BTRFS_I(inode),
+ start, end, uptodate);
}
-
- if (extent_len)
- endio_readpage_release_extent(tree, extent_start, extent_len,
- uptodate);
+ /* Release the last extent */
+ endio_readpage_release_extent(&processed, NULL, 0, 0, false);
btrfs_io_bio_free_csum(io_bio);
bio_put(bio);
}