aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/bio.c
diff options
context:
space:
mode:
authorGravatar Christoph Hellwig <hch@lst.de> 2023-05-03 09:06:13 +0200
committerGravatar David Sterba <dsterba@suse.com> 2023-06-19 13:59:23 +0200
commitda023618076a13c35bcde1a49a87b7da64761f1d (patch)
treedcb8b9dd56e5b841e50a04b03b1d8088b79cf2a1 /fs/btrfs/bio.c
parentbtrfs: use SECTOR_SHIFT to convert LBA to physical offset (diff)
downloadlinux-da023618076a13c35bcde1a49a87b7da64761f1d.tar.gz
linux-da023618076a13c35bcde1a49a87b7da64761f1d.tar.bz2
linux-da023618076a13c35bcde1a49a87b7da64761f1d.zip
btrfs: submit IO synchronously for fast checksum implementations
Most modern hardware supports very fast accelerated crc32c calculation. If that is supported the CPU overhead of the checksum calculation is very limited, and offloading the calculation to special worker threads has a lot of overhead for no gain. E.g. on an Intel Optane device is actually very much slows down even 1M buffered writes with fio: Unpatched: write: IOPS=3316, BW=3316MiB/s (3477MB/s)(200GiB/61757msec); 0 zone resets With synchronous CRCs: write: IOPS=4882, BW=4882MiB/s (5119MB/s)(200GiB/41948msec); 0 zone resets With a lot of variation during the unpatched run going down as low as 1100MB/s, while the synchronous CRC version has about the same peak write speed but much lower dips, and fewer kworkers churning around. Both tests had fio saturated at 100% CPU. (thanks to Jens Axboe via Chris Mason for the benchmarking) Reviewed-by: Chris Mason <clm@fb.com> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/bio.c')
-rw-r--r--fs/btrfs/bio.c20
1 files changed, 7 insertions, 13 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 2a182edcfb61..67fb8f6a0eb9 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -574,6 +574,10 @@ static void run_one_async_free(struct btrfs_work *work)
static bool should_async_write(struct btrfs_bio *bbio)
{
+ /* Submit synchronously if the checksum implementation is fast. */
+ if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
+ return false;
+
/*
* If the I/O is not issued by fsync and friends, (->sync_writers != 0),
* then try to defer the submission to a workqueue to parallelize the
@@ -582,19 +586,9 @@ static bool should_async_write(struct btrfs_bio *bbio)
if (atomic_read(&bbio->inode->sync_writers))
return false;
- /*
- * Submit metadata writes synchronously if the checksum implementation
- * is fast, or we are on a zoned device that wants I/O to be submitted
- * in order.
- */
- if (bbio->bio.bi_opf & REQ_META) {
- struct btrfs_fs_info *fs_info = bbio->fs_info;
-
- if (btrfs_is_zoned(fs_info))
- return false;
- if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
- return false;
- }
+ /* Zoned devices require I/O to be submitted in order. */
+ if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
+ return false;
return true;
}