aboutsummaryrefslogtreecommitdiff
path: root/fs/f2fs
diff options
context:
space:
mode:
authorGravatar Linus Torvalds <torvalds@linux-foundation.org> 2023-04-26 09:42:10 -0700
committerGravatar Linus Torvalds <torvalds@linux-foundation.org> 2023-04-26 09:42:10 -0700
commit5c7ecada25d2086aee607ff7deb69e77faa4aa92 (patch)
tree6f9790fe434587423004d5b4eaaedd92f835607a /fs/f2fs
parentMerge tag 'dlm-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland... (diff)
parentf2fs: remove unnessary comment in __may_age_extent_tree (diff)
downloadlinux-5c7ecada25d2086aee607ff7deb69e77faa4aa92.tar.gz
linux-5c7ecada25d2086aee607ff7deb69e77faa4aa92.tar.bz2
linux-5c7ecada25d2086aee607ff7deb69e77faa4aa92.zip
Merge tag 'f2fs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs update from Jaegeuk Kim: "In this round, we've mainly modified to support non-power-of-two zone size, which is not required for f2fs by design. In order to avoid arch dependency, we refactored the messy rb_entry structure shared across different extent_cache. In addition to the improvement, we've also fixed several subtle bugs and error cases. Enhancements: - support non-power-of-two zone size for zoned device - remove sharing the rb_entry structure in extent cache - refactor f2fs_gc to call checkpoint in urgent condition - support iopoll Bug fixes: - fix potential corruption when moving a directory - fix to avoid use-after-free for cached IPU bio - fix the folio private usage - avoid kernel warnings or panics in the cp_error case - fix to recover quota data correctly - fix some bugs in atomic operations - fix system crash due to lack of free space in LFS - fix null pointer panic in tracepoint in __replace_atomic_write_block - fix iostat lock protection - fix scheduling while atomic in decompression path - preserve direct write semantics when buffering is forced - fix to call f2fs_wait_on_page_writeback() in f2fs_write_raw_pages()" * tag 'f2fs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (52 commits) f2fs: remove unnessary comment in __may_age_extent_tree f2fs: allocate node blocks for atomic write block replacement f2fs: use cow inode data when updating atomic write f2fs: remove power-of-two limitation of zoned device f2fs: allocate trace path buffer from names_cache f2fs: add has_enough_free_secs() f2fs: relax sanity check if checkpoint is corrupted f2fs: refactor f2fs_gc to call checkpoint in urgent condition f2fs: remove folio_detach_private() in .invalidate_folio and .release_folio f2fs: remove bulk remove_proc_entry() and unnecessary kobject_del() f2fs: support iopoll method f2fs: remove batched_trim_sections node description f2fs: fix to check return value of inc_valid_block_count() f2fs: fix to check return value of f2fs_do_truncate_blocks() f2fs: fix passing relative address when discard zones f2fs: fix potential corruption when moving a directory f2fs: add radix_tree_preload_end in error case f2fs: fix to recover quota data correctly f2fs: fix to check readonly condition correctly docs: f2fs: Correct instruction to disable checkpoint ...
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/checkpoint.c52
-rw-r--r--fs/f2fs/compress.c47
-rw-r--r--fs/f2fs/data.c71
-rw-r--r--fs/f2fs/debug.c37
-rw-r--r--fs/f2fs/dir.c47
-rw-r--r--fs/f2fs/extent_cache.c264
-rw-r--r--fs/f2fs/f2fs.h252
-rw-r--r--fs/f2fs/file.c76
-rw-r--r--fs/f2fs/gc.c205
-rw-r--r--fs/f2fs/gc.h16
-rw-r--r--fs/f2fs/inline.c2
-rw-r--r--fs/f2fs/inode.c5
-rw-r--r--fs/f2fs/namei.c28
-rw-r--r--fs/f2fs/node.h20
-rw-r--r--fs/f2fs/recovery.c17
-rw-r--r--fs/f2fs/segment.c390
-rw-r--r--fs/f2fs/segment.h57
-rw-r--r--fs/f2fs/super.c111
-rw-r--r--fs/f2fs/sysfs.c50
19 files changed, 877 insertions, 870 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c3e058e0a018..64b3860f50ee 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -152,6 +152,11 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
se = get_seg_entry(sbi, segno);
exist = f2fs_test_bit(offset, se->cur_valid_map);
+
+ /* skip data, if we already have an error in checkpoint. */
+ if (unlikely(f2fs_cp_error(sbi)))
+ return exist;
+
if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) {
f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
blkaddr, exist);
@@ -202,6 +207,11 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
case DATA_GENERIC_ENHANCE_UPDATE:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi))) {
+
+ /* Skip to emit an error message. */
+ if (unlikely(f2fs_cp_error(sbi)))
+ return false;
+
f2fs_warn(sbi, "access invalid blkaddr:%u",
blkaddr);
set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -325,8 +335,15 @@ static int __f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) {
+ ClearPageUptodate(page);
+ dec_page_count(sbi, F2FS_DIRTY_META);
+ unlock_page(page);
+ return 0;
+ }
goto redirty_out;
+ }
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
@@ -508,6 +525,7 @@ retry:
if (!e) {
if (!new) {
spin_unlock(&im->ino_lock);
+ radix_tree_preload_end();
goto retry;
}
e = new;
@@ -706,32 +724,18 @@ err_out:
int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blocks, i, j;
- unsigned int s_flags = sbi->sb->s_flags;
int err = 0;
-#ifdef CONFIG_QUOTA
- int quota_enabled;
-#endif
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
- if (bdev_read_only(sbi->sb->s_bdev)) {
+ if (f2fs_hw_is_readonly(sbi)) {
f2fs_info(sbi, "write access unavailable, skipping orphan cleanup");
return 0;
}
- if (s_flags & SB_RDONLY) {
+ if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
f2fs_info(sbi, "orphan cleanup on readonly fs");
- sbi->sb->s_flags &= ~SB_RDONLY;
- }
-
-#ifdef CONFIG_QUOTA
- /*
- * Turn on quotas which were not enabled for read-only mounts if
- * filesystem has quota feature, so that they are updated correctly.
- */
- quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
-#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
@@ -765,13 +769,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
out:
set_sbi_flag(sbi, SBI_IS_RECOVERED);
-#ifdef CONFIG_QUOTA
- /* Turn quotas off */
- if (quota_enabled)
- f2fs_quota_off_umount(sbi->sb);
-#endif
- sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
-
return err;
}
@@ -982,7 +979,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
if (cur_page == cp2)
- cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+ cp_blk_no += BIT(le32_to_cpu(fsb->log_blocks_per_seg));
for (i = 1; i < cp_blks; i++) {
void *sit_bitmap_ptr;
@@ -1133,7 +1130,7 @@ retry:
goto retry;
}
-int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
+static int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
struct list_head *head = &sbi->inode_list[DIRTY_META];
struct inode *inode;
@@ -1306,7 +1303,8 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
if (!get_pages(sbi, type))
break;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi) &&
+ !is_sbi_flag_set(sbi, SBI_IS_CLOSE)))
break;
if (type == F2FS_DIRTY_META)
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index b40dec3d7f79..11653fa79289 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -264,35 +264,21 @@ static void lz4_destroy_compress_ctx(struct compress_ctx *cc)
cc->private = NULL;
}
-#ifdef CONFIG_F2FS_FS_LZ4HC
-static int lz4hc_compress_pages(struct compress_ctx *cc)
+static int lz4_compress_pages(struct compress_ctx *cc)
{
+ int len = -EINVAL;
unsigned char level = F2FS_I(cc->inode)->i_compress_level;
- int len;
- if (level)
- len = LZ4_compress_HC(cc->rbuf, cc->cbuf->cdata, cc->rlen,
- cc->clen, level, cc->private);
- else
+ if (!level)
len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen,
cc->clen, cc->private);
- if (!len)
- return -EAGAIN;
-
- cc->clen = len;
- return 0;
-}
-#endif
-
-static int lz4_compress_pages(struct compress_ctx *cc)
-{
- int len;
-
#ifdef CONFIG_F2FS_FS_LZ4HC
- return lz4hc_compress_pages(cc);
+ else
+ len = LZ4_compress_HC(cc->rbuf, cc->cbuf->cdata, cc->rlen,
+ cc->clen, level, cc->private);
#endif
- len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen,
- cc->clen, cc->private);
+ if (len < 0)
+ return len;
if (!len)
return -EAGAIN;
@@ -670,7 +656,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
cc->cbuf->clen = cpu_to_le32(cc->clen);
- if (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)
+ if (fi->i_compress_flag & BIT(COMPRESS_CHKSUM))
chksum = f2fs_crc32(F2FS_I_SB(cc->inode),
cc->cbuf->cdata, cc->clen);
cc->cbuf->chksum = cpu_to_le32(chksum);
@@ -755,13 +741,18 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) {
ret = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION);
+
+ /* Avoid f2fs_commit_super in irq context */
+ if (in_task)
+ f2fs_save_errors(sbi, ERROR_FAIL_DECOMPRESSION);
+ else
+ f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION);
goto out_release;
}
ret = cops->decompress_pages(dic);
- if (!ret && (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)) {
+ if (!ret && (fi->i_compress_flag & BIT(COMPRESS_CHKSUM))) {
u32 provided = le32_to_cpu(dic->cbuf->chksum);
u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen);
@@ -1456,6 +1447,12 @@ continue_unlock:
if (!PageDirty(cc->rpages[i]))
goto continue_unlock;
+ if (PageWriteback(cc->rpages[i])) {
+ if (wbc->sync_mode == WB_SYNC_NONE)
+ goto continue_unlock;
+ f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true);
+ }
+
if (!clear_page_dirty_for_io(cc->rpages[i]))
goto continue_unlock;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 06b552a0aba2..7165b1202f53 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -93,17 +93,17 @@ static enum count_type __read_io_type(struct page *page)
/* postprocessing steps for read bios */
enum bio_post_read_step {
#ifdef CONFIG_FS_ENCRYPTION
- STEP_DECRYPT = 1 << 0,
+ STEP_DECRYPT = BIT(0),
#else
STEP_DECRYPT = 0, /* compile out the decryption-related code */
#endif
#ifdef CONFIG_F2FS_FS_COMPRESSION
- STEP_DECOMPRESS = 1 << 1,
+ STEP_DECOMPRESS = BIT(1),
#else
STEP_DECOMPRESS = 0, /* compile out the decompression-related code */
#endif
#ifdef CONFIG_FS_VERITY
- STEP_VERITY = 1 << 2,
+ STEP_VERITY = BIT(2),
#else
STEP_VERITY = 0, /* compile out the verity-related code */
#endif
@@ -420,7 +420,7 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
{
- unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
+ unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
unsigned int fua_flag, meta_flag, io_flag;
blk_opf_t op_flags = 0;
@@ -442,9 +442,9 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
* 5 | 4 | 3 | 2 | 1 | 0 |
* Cold | Warm | Hot | Cold | Warm | Hot |
*/
- if ((1 << fio->temp) & meta_flag)
+ if (BIT(fio->temp) & meta_flag)
op_flags |= REQ_META;
- if ((1 << fio->temp) & fua_flag)
+ if (BIT(fio->temp) & fua_flag)
op_flags |= REQ_FUA;
return op_flags;
}
@@ -874,6 +874,8 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
bool found = false;
struct bio *target = bio ? *bio : NULL;
+ f2fs_bug_on(sbi, !target && !page);
+
for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
struct list_head *head = &io->bio_list;
@@ -2235,6 +2237,10 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (ret)
goto out;
+ if (unlikely(f2fs_cp_error(sbi))) {
+ ret = -EIO;
+ goto out_put_dnode;
+ }
f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
skip_reading_dnode:
@@ -2798,7 +2804,8 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
* don't drop any dirty dentry pages for keeping lastest
* directory structure.
*/
- if (S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) &&
+ !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
goto redirty_out;
goto out;
}
@@ -2898,7 +2905,8 @@ out:
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_write(sbi, DATA);
- f2fs_submit_merged_ipu_write(sbi, bio, NULL);
+ if (bio && *bio)
+ f2fs_submit_merged_ipu_write(sbi, bio, NULL);
submitted = NULL;
}
@@ -3123,12 +3131,9 @@ continue_unlock:
}
if (folio_test_writeback(folio)) {
- if (wbc->sync_mode != WB_SYNC_NONE)
- f2fs_wait_on_page_writeback(
- &folio->page,
- DATA, true, true);
- else
+ if (wbc->sync_mode == WB_SYNC_NONE)
goto continue_unlock;
+ f2fs_wait_on_page_writeback(&folio->page, DATA, true, true);
}
if (!folio_clear_dirty_for_io(folio))
@@ -3486,7 +3491,7 @@ unlock_out:
static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
struct page *page, loff_t pos, unsigned int len,
- block_t *blk_addr, bool *node_changed)
+ block_t *blk_addr, bool *node_changed, bool *use_cow)
{
struct inode *inode = page->mapping->host;
struct inode *cow_inode = F2FS_I(inode)->cow_inode;
@@ -3500,10 +3505,12 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
/* Look for the block in COW inode first */
err = __find_data_block(cow_inode, index, blk_addr);
- if (err)
+ if (err) {
return err;
- else if (*blk_addr != NULL_ADDR)
+ } else if (*blk_addr != NULL_ADDR) {
+ *use_cow = true;
return 0;
+ }
if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
goto reserve_block;
@@ -3533,6 +3540,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
struct page *page = NULL;
pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
bool need_balance = false;
+ bool use_cow = false;
block_t blkaddr = NULL_ADDR;
int err = 0;
@@ -3592,7 +3600,7 @@ repeat:
if (f2fs_is_atomic_file(inode))
err = prepare_atomic_write_begin(sbi, page, pos, len,
- &blkaddr, &need_balance);
+ &blkaddr, &need_balance, &use_cow);
else
err = prepare_write_begin(sbi, page, pos, len,
&blkaddr, &need_balance);
@@ -3632,7 +3640,9 @@ repeat:
f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto fail;
}
- err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
+ err = f2fs_submit_page_read(use_cow ?
+ F2FS_I(inode)->cow_inode : inode, page,
+ blkaddr, 0, true);
if (err)
goto fail;
@@ -3725,37 +3735,16 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
f2fs_remove_dirty_inode(inode);
}
}
-
- clear_page_private_reference(&folio->page);
- clear_page_private_gcing(&folio->page);
-
- if (test_opt(sbi, COMPRESS_CACHE) &&
- inode->i_ino == F2FS_COMPRESS_INO(sbi))
- clear_page_private_data(&folio->page);
-
- folio_detach_private(folio);
+ clear_page_private_all(&folio->page);
}
bool f2fs_release_folio(struct folio *folio, gfp_t wait)
{
- struct f2fs_sb_info *sbi;
-
/* If this is dirty folio, keep private data */
if (folio_test_dirty(folio))
return false;
- sbi = F2FS_M_SB(folio->mapping);
- if (test_opt(sbi, COMPRESS_CACHE)) {
- struct inode *inode = folio->mapping->host;
-
- if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
- clear_page_private_data(&folio->page);
- }
-
- clear_page_private_reference(&folio->page);
- clear_page_private_gcing(&folio->page);
-
- folio_detach_private(folio);
+ clear_page_private_all(&folio->page);
return true;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 30a77936e3c5..61c35b59126e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -336,22 +336,23 @@ get_cache:
#endif
}
-static char *s_flag[] = {
- [SBI_IS_DIRTY] = " fs_dirty",
- [SBI_IS_CLOSE] = " closing",
- [SBI_NEED_FSCK] = " need_fsck",
- [SBI_POR_DOING] = " recovering",
- [SBI_NEED_SB_WRITE] = " sb_dirty",
- [SBI_NEED_CP] = " need_cp",
- [SBI_IS_SHUTDOWN] = " shutdown",
- [SBI_IS_RECOVERED] = " recovered",
- [SBI_CP_DISABLED] = " cp_disabled",
- [SBI_CP_DISABLED_QUICK] = " cp_disabled_quick",
- [SBI_QUOTA_NEED_FLUSH] = " quota_need_flush",
- [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush",
- [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair",
- [SBI_IS_RESIZEFS] = " resizefs",
- [SBI_IS_FREEZING] = " freezefs",
+static const char *s_flag[MAX_SBI_FLAG] = {
+ [SBI_IS_DIRTY] = "fs_dirty",
+ [SBI_IS_CLOSE] = "closing",
+ [SBI_NEED_FSCK] = "need_fsck",
+ [SBI_POR_DOING] = "recovering",
+ [SBI_NEED_SB_WRITE] = "sb_dirty",
+ [SBI_NEED_CP] = "need_cp",
+ [SBI_IS_SHUTDOWN] = "shutdown",
+ [SBI_IS_RECOVERED] = "recovered",
+ [SBI_CP_DISABLED] = "cp_disabled",
+ [SBI_CP_DISABLED_QUICK] = "cp_disabled_quick",
+ [SBI_QUOTA_NEED_FLUSH] = "quota_need_flush",
+ [SBI_QUOTA_SKIP_FLUSH] = "quota_skip_flush",
+ [SBI_QUOTA_NEED_REPAIR] = "quota_need_repair",
+ [SBI_IS_RESIZEFS] = "resizefs",
+ [SBI_IS_FREEZING] = "freezefs",
+ [SBI_IS_WRITABLE] = "writable",
};
static const char *ipu_mode_names[F2FS_IPU_MAX] = {
@@ -384,8 +385,8 @@ static int stat_show(struct seq_file *s, void *v)
"Disabled" : (f2fs_cp_error(sbi) ? "Error" : "Good"));
if (sbi->s_flag) {
seq_puts(s, "[SBI:");
- for_each_set_bit(j, &sbi->s_flag, 32)
- seq_puts(s, s_flag[j]);
+ for_each_set_bit(j, &sbi->s_flag, MAX_SBI_FLAG)
+ seq_printf(s, " %s", s_flag[j]);
seq_puts(s, "]\n");
}
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 9ccdbe120425..887e55988450 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -29,7 +29,7 @@ static unsigned long dir_blocks(struct inode *inode)
static unsigned int dir_buckets(unsigned int level, int dir_level)
{
if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
- return 1 << (level + dir_level);
+ return BIT(level + dir_level);
else
return MAX_DIR_BUCKETS;
}
@@ -42,39 +42,6 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
-static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
- [F2FS_FT_UNKNOWN] = DT_UNKNOWN,
- [F2FS_FT_REG_FILE] = DT_REG,
- [F2FS_FT_DIR] = DT_DIR,
- [F2FS_FT_CHRDEV] = DT_CHR,
- [F2FS_FT_BLKDEV] = DT_BLK,
- [F2FS_FT_FIFO] = DT_FIFO,
- [F2FS_FT_SOCK] = DT_SOCK,
- [F2FS_FT_SYMLINK] = DT_LNK,
-};
-
-static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
- [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
-};
-
-static void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
-{
- de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
-}
-
-unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de)
-{
- if (de->file_type < F2FS_FT_MAX)
- return f2fs_filetype_table[de->file_type];
- return DT_UNKNOWN;
-}
-
/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
@@ -485,7 +452,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
lock_page(page);
f2fs_wait_on_page_writeback(page, type, true, true);
de->ino = cpu_to_le32(inode->i_ino);
- set_de_type(de, inode->i_mode);
+ de->file_type = fs_umode_to_ftype(inode->i_mode);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = current_time(dir);
@@ -699,7 +666,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
de->name_len = cpu_to_le16(name->len);
memcpy(d->filename[bit_pos], name->name, name->len);
de->ino = cpu_to_le32(ino);
- set_de_type(de, mode);
+ de->file_type = fs_umode_to_ftype(mode);
for (i = 0; i < slots; i++) {
__set_bit_le(bit_pos + i, (void *)d->bitmap);
/* avoid wrong garbage data for readdir */
@@ -938,14 +905,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
f2fs_clear_page_cache_dirty_tag(page);
clear_page_dirty_for_io(page);
ClearPageUptodate(page);
-
- clear_page_private_gcing(page);
+ clear_page_private_all(page);
inode_dec_dirty_pages(dir);
f2fs_remove_dirty_inode(dir);
-
- detach_page_private(page);
- set_page_private(page, 0);
}
f2fs_put_page(page, 1);
@@ -1036,7 +999,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
continue;
}
- d_type = f2fs_get_de_type(de);
+ d_type = fs_ftype_to_dtype(de->file_type);
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 28b12553f2b3..0e2d49140c07 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -23,18 +23,26 @@ bool sanity_check_extent_cache(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct extent_tree *et = fi->extent_tree[EX_READ];
struct extent_info *ei;
- if (!fi->extent_tree[EX_READ])
+ if (!et)
+ return true;
+
+ ei = &et->largest;
+ if (!ei->len)
return true;
- ei = &fi->extent_tree[EX_READ]->largest;
+ /* Let's drop, if checkpoint got corrupted. */
+ if (is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) {
+ ei->len = 0;
+ et->largest_updated = true;
+ return true;
+ }
- if (ei->len &&
- (!f2fs_is_valid_blkaddr(sbi, ei->blk,
- DATA_GENERIC_ENHANCE) ||
- !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
- DATA_GENERIC_ENHANCE))) {
+ if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC_ENHANCE)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
__func__, inode->i_ino,
@@ -86,7 +94,6 @@ static bool __may_age_extent_tree(struct inode *inode)
if (!test_opt(sbi, AGE_EXTENT_CACHE))
return false;
- /* don't cache block age info for cold file */
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
return false;
if (file_is_cold(inode))
@@ -161,118 +168,52 @@ static bool __is_front_mergeable(struct extent_info *cur,
return __is_extent_mergeable(cur, front, type);
}
-static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
- unsigned int ofs)
-{
- if (cached_re) {
- if (cached_re->ofs <= ofs &&
- cached_re->ofs + cached_re->len > ofs) {
- return cached_re;
- }
- }
- return NULL;
-}
-
-static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root,
- unsigned int ofs)
+static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
+ struct extent_node *cached_en, unsigned int fofs)
{
struct rb_node *node = root->rb_root.rb_node;
- struct rb_entry *re;
+ struct extent_node *en;
+ /* check a cached entry */
+ if (cached_en && cached_en->ei.fofs <= fofs &&
+ cached_en->ei.fofs + cached_en->ei.len > fofs)
+ return cached_en;
+
+ /* check rb_tree */
while (node) {
- re = rb_entry(node, struct rb_entry, rb_node);
+ en = rb_entry(node, struct extent_node, rb_node);
- if (ofs < re->ofs)
+ if (fofs < en->ei.fofs)
node = node->rb_left;
- else if (ofs >= re->ofs + re->len)
+ else if (fofs >= en->ei.fofs + en->ei.len)
node = node->rb_right;
else
- return re;
+ return en;
}
return NULL;
}
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
- struct rb_entry *cached_re, unsigned int ofs)
-{
- struct rb_entry *re;
-
- re = __lookup_rb_tree_fast(cached_re, ofs);
- if (!re)
- return __lookup_rb_tree_slow(root, ofs);
-
- return re;
-}
-
-struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root,
- struct rb_node **parent,
- unsigned long long key, bool *leftmost)
-{
- struct rb_node **p = &root->rb_root.rb_node;
- struct rb_entry *re;
-
- while (*p) {
- *parent = *p;
- re = rb_entry(*parent, struct rb_entry, rb_node);
-
- if (key < re->key) {
- p = &(*p)->rb_left;
- } else {
- p = &(*p)->rb_right;
- *leftmost = false;
- }
- }
-
- return p;
-}
-
-struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root,
- struct rb_node **parent,
- unsigned int ofs, bool *leftmost)
-{
- struct rb_node **p = &root->rb_root.rb_node;
- struct rb_entry *re;
-
- while (*p) {
- *parent = *p;
- re = rb_entry(*parent, struct rb_entry, rb_node);
-
- if (ofs < re->ofs) {
- p = &(*p)->rb_left;
- } else if (ofs >= re->ofs + re->len) {
- p = &(*p)->rb_right;
- *leftmost = false;
- } else {
- f2fs_bug_on(sbi, 1);
- }
- }
-
- return p;
-}
-
/*
- * lookup rb entry in position of @ofs in rb-tree,
+ * lookup rb entry in position of @fofs in rb-tree,
* if hit, return the entry, otherwise, return NULL
- * @prev_ex: extent before ofs
- * @next_ex: extent after ofs
- * @insert_p: insert point for new extent at ofs
+ * @prev_ex: extent before fofs
+ * @next_ex: extent after fofs
+ * @insert_p: insert point for new extent at fofs
* in order to simplify the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
- struct rb_entry *cached_re,
- unsigned int ofs,
- struct rb_entry **prev_entry,
- struct rb_entry **next_entry,
+static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
+ struct extent_node *cached_en,
+ unsigned int fofs,
+ struct extent_node **prev_entry,
+ struct extent_node **next_entry,
struct rb_node ***insert_p,
struct rb_node **insert_parent,
- bool force, bool *leftmost)
+ bool *leftmost)
{
struct rb_node **pnode = &root->rb_root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
- struct rb_entry *re = cached_re;
+ struct extent_node *en = cached_en;
*insert_p = NULL;
*insert_parent = NULL;
@@ -282,24 +223,20 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
if (RB_EMPTY_ROOT(&root->rb_root))
return NULL;
- if (re) {
- if (re->ofs <= ofs && re->ofs + re->len > ofs)
- goto lookup_neighbors;
- }
+ if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
+ goto lookup_neighbors;
- if (leftmost)
- *leftmost = true;
+ *leftmost = true;
while (*pnode) {
parent = *pnode;
- re = rb_entry(*pnode, struct rb_entry, rb_node);
+ en = rb_entry(*pnode, struct extent_node, rb_node);
- if (ofs < re->ofs) {
+ if (fofs < en->ei.fofs) {
pnode = &(*pnode)->rb_left;
- } else if (ofs >= re->ofs + re->len) {
+ } else if (fofs >= en->ei.fofs + en->ei.len) {
pnode = &(*pnode)->rb_right;
- if (leftmost)
- *leftmost = false;
+ *leftmost = false;
} else {
goto lookup_neighbors;
}
@@ -308,71 +245,32 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
*insert_p = pnode;
*insert_parent = parent;
- re = rb_entry(parent, struct rb_entry, rb_node);
+ en = rb_entry(parent, struct extent_node, rb_node);
tmp_node = parent;
- if (parent && ofs > re->ofs)
+ if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
- *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
+ *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
tmp_node = parent;
- if (parent && ofs < re->ofs)
+ if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
- *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
+ *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
return NULL;
lookup_neighbors:
- if (ofs == re->ofs || force) {
+ if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
- tmp_node = rb_prev(&re->rb_node);
- *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
+ tmp_node = rb_prev(&en->rb_node);
+ *prev_entry = rb_entry_safe(tmp_node,
+ struct extent_node, rb_node);
}
- if (ofs == re->ofs + re->len - 1 || force) {
+ if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
- tmp_node = rb_next(&re->rb_node);
- *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
- }
- return re;
-}
-
-bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root, bool check_key)
-{
-#ifdef CONFIG_F2FS_CHECK_FS
- struct rb_node *cur = rb_first_cached(root), *next;
- struct rb_entry *cur_re, *next_re;
-
- if (!cur)
- return true;
-
- while (cur) {
- next = rb_next(cur);
- if (!next)
- return true;
-
- cur_re = rb_entry(cur, struct rb_entry, rb_node);
- next_re = rb_entry(next, struct rb_entry, rb_node);
-
- if (check_key) {
- if (cur_re->key > next_re->key) {
- f2fs_info(sbi, "inconsistent rbtree, "
- "cur(%llu) next(%llu)",
- cur_re->key, next_re->key);
- return false;
- }
- goto next;
- }
-
- if (cur_re->ofs + cur_re->len > next_re->ofs) {
- f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
- cur_re->ofs, cur_re->len,
- next_re->ofs, next_re->len);
- return false;
- }
-next:
- cur = next;
+ tmp_node = rb_next(&en->rb_node);
+ *next_entry = rb_entry_safe(tmp_node,
+ struct extent_node, rb_node);
}
-#endif
- return true;
+ return en;
}
static struct kmem_cache *extent_tree_slab;
@@ -587,8 +485,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
}
- en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root,
- (struct rb_entry *)et->cached_en, pgofs);
+ en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
if (!en)
goto out;
@@ -662,7 +559,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
bool leftmost)
{
struct extent_tree_info *eti = &sbi->extent_tree[et->type];
- struct rb_node **p;
+ struct rb_node **p = &et->root.rb_root.rb_node;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
@@ -674,8 +571,21 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
leftmost = true;
- p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent,
- ei->fofs, &leftmost);
+ /* look up extent_node in the rb tree */
+ while (*p) {
+ parent = *p;
+ en = rb_entry(parent, struct extent_node, rb_node);
+
+ if (ei->fofs < en->ei.fofs) {
+ p = &(*p)->rb_left;
+ } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ } else {
+ f2fs_bug_on(sbi, 1);
+ }
+ }
+
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
if (!en)
@@ -734,11 +644,10 @@ static void __update_extent_tree_range(struct inode *inode,
}
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
- en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
- (struct rb_entry *)et->cached_en, fofs,
- (struct rb_entry **)&prev_en,
- (struct rb_entry **)&next_en,
- &insert_p, &insert_parent, false,
+ en = __lookup_extent_node_ret(&et->root,
+ et->cached_en, fofs,
+ &prev_en, &next_en,
+ &insert_p, &insert_parent,
&leftmost);
if (!en)
en = next_en;
@@ -876,12 +785,11 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
write_lock(&et->lock);
- en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
- (struct rb_entry *)et->cached_en, fofs,
- (struct rb_entry **)&prev_en,
- (struct rb_entry **)&next_en,
- &insert_p, &insert_parent, false,
- &leftmost);
+ en = __lookup_extent_node_ret(&et->root,
+ et->cached_en, fofs,
+ &prev_en, &next_en,
+ &insert_p, &insert_parent,
+ &leftmost);
if (en)
goto unlock_out;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b0ab2062038a..d211ee89c158 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -65,7 +65,7 @@ enum {
};
#ifdef CONFIG_F2FS_FAULT_INJECTION
-#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
+#define F2FS_ALL_FAULT_TYPE (GENMASK(FAULT_MAX - 1, 0))
struct f2fs_fault_info {
atomic_t inject_ops;
@@ -74,7 +74,7 @@ struct f2fs_fault_info {
};
extern const char *f2fs_fault_name[FAULT_MAX];
-#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
+#define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type))
#endif
/*
@@ -353,15 +353,7 @@ struct discard_info {
struct discard_cmd {
struct rb_node rb_node; /* rb node located in rb-tree */
- union {
- struct {
- block_t lstart; /* logical start address */
- block_t len; /* length */
- block_t start; /* actual start address in dev */
- };
- struct discard_info di; /* discard info */
-
- };
+ struct discard_info di; /* discard info */
struct list_head list; /* command list */
struct completion wait; /* compleation */
struct block_device *bdev; /* bdev */
@@ -628,17 +620,6 @@ enum extent_type {
NR_EXTENT_CACHES,
};
-struct rb_entry {
- struct rb_node rb_node; /* rb node located in rb-tree */
- union {
- struct {
- unsigned int ofs; /* start offset of the entry */
- unsigned int len; /* length of the entry */
- };
- unsigned long long key; /* 64-bits key */
- } __packed;
-};
-
struct extent_info {
unsigned int fofs; /* start offset in a file */
unsigned int len; /* length of the extent */
@@ -862,7 +843,7 @@ struct f2fs_inode_info {
kprojid_t i_projid; /* id for project quota */
int i_inline_xattr_size; /* inline xattr size */
struct timespec64 i_crtime; /* inode creation time */
- struct timespec64 i_disk_time[4];/* inode disk times */
+ struct timespec64 i_disk_time[3];/* inode disk times */
/* for file compress */
atomic_t i_compr_blocks; /* # of compressed blocks */
@@ -1293,7 +1274,10 @@ struct f2fs_gc_control {
unsigned int nr_free_secs; /* # of free sections to do GC */
};
-/* For s_flag in struct f2fs_sb_info */
+/*
+ * For s_flag in struct f2fs_sb_info
+ * Modification on enum should be synchronized with s_flag array
+ */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
SBI_IS_CLOSE, /* specify unmounting */
@@ -1310,6 +1294,8 @@ enum {
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
SBI_IS_RESIZEFS, /* resizefs is in process */
SBI_IS_FREEZING, /* freezefs is in process */
+ SBI_IS_WRITABLE, /* remove ro mountoption transiently */
+ MAX_SBI_FLAG,
};
enum {
@@ -1412,86 +1398,6 @@ enum {
PAGE_PRIVATE_MAX
};
-#define PAGE_PRIVATE_GET_FUNC(name, flagname) \
-static inline bool page_private_##name(struct page *page) \
-{ \
- return PagePrivate(page) && \
- test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
- test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
-}
-
-#define PAGE_PRIVATE_SET_FUNC(name, flagname) \
-static inline void set_page_private_##name(struct page *page) \
-{ \
- if (!PagePrivate(page)) { \
- get_page(page); \
- SetPagePrivate(page); \
- set_page_private(page, 0); \
- } \
- set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \
- set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
-}
-
-#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \
-static inline void clear_page_private_##name(struct page *page) \
-{ \
- clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
- if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) { \
- set_page_private(page, 0); \
- if (PagePrivate(page)) { \
- ClearPagePrivate(page); \
- put_page(page); \
- }\
- } \
-}
-
-PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER);
-PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE);
-PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE);
-
-PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE);
-PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE);
-PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE);
-
-PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE);
-PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE);
-PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE);
-
-static inline unsigned long get_page_private_data(struct page *page)
-{
- unsigned long data = page_private(page);
-
- if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data))
- return 0;
- return data >> PAGE_PRIVATE_MAX;
-}
-
-static inline void set_page_private_data(struct page *page, unsigned long data)
-{
- if (!PagePrivate(page)) {
- get_page(page);
- SetPagePrivate(page);
- set_page_private(page, 0);
- }
- set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
- page_private(page) |= data << PAGE_PRIVATE_MAX;
-}
-
-static inline void clear_page_private_data(struct page *page)
-{
- page_private(page) &= (1 << PAGE_PRIVATE_MAX) - 1;
- if (page_private(page) == 1 << PAGE_PRIVATE_NOT_POINTER) {
- set_page_private(page, 0);
- if (PagePrivate(page)) {
- ClearPagePrivate(page);
- put_page(page);
- }
- }
-}
-
/* For compression */
enum compress_algorithm_type {
COMPRESS_LZO,
@@ -1617,7 +1523,6 @@ struct f2fs_sb_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
- unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */
#endif
/* for node-related operations */
@@ -2386,6 +2291,80 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...);
#define f2fs_debug(sbi, fmt, ...) \
f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__)
+#define PAGE_PRIVATE_GET_FUNC(name, flagname) \
+static inline bool page_private_##name(struct page *page) \
+{ \
+ return PagePrivate(page) && \
+ test_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)) && \
+ test_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
+}
+
+#define PAGE_PRIVATE_SET_FUNC(name, flagname) \
+static inline void set_page_private_##name(struct page *page) \
+{ \
+ if (!PagePrivate(page)) \
+ attach_page_private(page, (void *)0); \
+ set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page)); \
+ set_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
+}
+
+#define PAGE_PRIVATE_CLEAR_FUNC(name, flagname) \
+static inline void clear_page_private_##name(struct page *page) \
+{ \
+ clear_bit(PAGE_PRIVATE_##flagname, &page_private(page)); \
+ if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER)) \
+ detach_page_private(page); \
+}
+
+PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER);
+PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE);
+PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION);
+PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE);
+
+PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE);
+PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE);
+PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION);
+PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE);
+
+PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE);
+PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE);
+PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION);
+PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE);
+
+static inline unsigned long get_page_private_data(struct page *page)
+{
+ unsigned long data = page_private(page);
+
+ if (!test_bit(PAGE_PRIVATE_NOT_POINTER, &data))
+ return 0;
+ return data >> PAGE_PRIVATE_MAX;
+}
+
+static inline void set_page_private_data(struct page *page, unsigned long data)
+{
+ if (!PagePrivate(page))
+ attach_page_private(page, (void *)0);
+ set_bit(PAGE_PRIVATE_NOT_POINTER, &page_private(page));
+ page_private(page) |= data << PAGE_PRIVATE_MAX;
+}
+
+static inline void clear_page_private_data(struct page *page)
+{
+ page_private(page) &= GENMASK(PAGE_PRIVATE_MAX - 1, 0);
+ if (page_private(page) == BIT(PAGE_PRIVATE_NOT_POINTER))
+ detach_page_private(page);
+}
+
+static inline void clear_page_private_all(struct page *page)
+{
+ clear_page_private_data(page);
+ clear_page_private_reference(page);
+ clear_page_private_gcing(page);
+ clear_page_private_inline(page);
+
+ f2fs_bug_on(F2FS_P_SB(page), page_private(page));
+}
+
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -2892,7 +2871,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr)
int mask;
addr += (nr >> 3);
- mask = 1 << (7 - (nr & 0x07));
+ mask = BIT(7 - (nr & 0x07));
return mask & *addr;
}
@@ -2901,7 +2880,7 @@ static inline void f2fs_set_bit(unsigned int nr, char *addr)
int mask;
addr += (nr >> 3);
- mask = 1 << (7 - (nr & 0x07));
+ mask = BIT(7 - (nr & 0x07));
*addr |= mask;
}
@@ -2910,7 +2889,7 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr)
int mask;
addr += (nr >> 3);
- mask = 1 << (7 - (nr & 0x07));
+ mask = BIT(7 - (nr & 0x07));
*addr &= ~mask;
}
@@ -2920,7 +2899,7 @@ static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr)
int ret;
addr += (nr >> 3);
- mask = 1 << (7 - (nr & 0x07));
+ mask = BIT(7 - (nr & 0x07));
ret = mask & *addr;
*addr |= mask;
return ret;
@@ -2932,7 +2911,7 @@ static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr)
int ret;
addr += (nr >> 3);
- mask = 1 << (7 - (nr & 0x07));
+ mask = BIT(7 - (nr & 0x07));
ret = mask & *addr;
*addr &= ~mask;
return ret;
@@ -2943,7 +2922,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
int mask;
addr += (nr >> 3);
- mask = 1 << (7 - (nr & 0x07));
+ mask = BIT(7 - (nr & 0x07));
*addr ^= mask;
}
@@ -3307,9 +3286,6 @@ static inline bool f2fs_is_time_consistent(struct inode *inode)
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
return false;
- if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3,
- &F2FS_I(inode)->i_crtime))
- return false;
return true;
}
@@ -3370,6 +3346,19 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
return kmalloc(size, flags);
}
+static inline void *f2fs_getname(struct f2fs_sb_info *sbi)
+{
+ if (time_to_inject(sbi, FAULT_KMALLOC))
+ return NULL;
+
+ return __getname();
+}
+
+static inline void f2fs_putname(char *buf)
+{
+ __putname(buf);
+}
+
static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
@@ -3489,7 +3478,6 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
/*
* dir.c
*/
-unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de);
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname);
int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
@@ -3554,6 +3542,7 @@ int f2fs_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
void f2fs_quota_off_umount(struct super_block *sb);
void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
+void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -3737,7 +3726,6 @@ void f2fs_set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type);
bool f2fs_is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type);
-int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi);
void f2fs_release_orphan_inode(struct f2fs_sb_info *sbi);
void f2fs_add_orphan_inode(struct inode *inode);
@@ -3830,6 +3818,10 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
void f2fs_destroy_garbage_collection_cache(void);
+/* victim selection function for cleaning and SSR */
+int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
+ int gc_type, int type, char alloc_mode,
+ unsigned long long age);
/*
* recovery.c
@@ -4138,23 +4130,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
* extent_cache.c
*/
bool sanity_check_extent_cache(struct inode *inode);
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
- struct rb_entry *cached_re, unsigned int ofs);
-struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root,
- struct rb_node **parent,
- unsigned long long key, bool *left_most);
-struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root,
- struct rb_node **parent,
- unsigned int ofs, bool *leftmost);
-struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
- struct rb_entry *cached_re, unsigned int ofs,
- struct rb_entry **prev_entry, struct rb_entry **next_entry,
- struct rb_node ***insert_p, struct rb_node **insert_parent,
- bool force, bool *leftmost);
-bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root, bool check_key);
void f2fs_init_extent_tree(struct inode *inode);
void f2fs_drop_extent_tree(struct inode *inode);
void f2fs_destroy_extent_node(struct inode *inode);
@@ -4354,9 +4329,9 @@ static inline int set_compress_context(struct inode *inode)
F2FS_OPTION(sbi).compress_log_size;
F2FS_I(inode)->i_compress_flag =
F2FS_OPTION(sbi).compress_chksum ?
- 1 << COMPRESS_CHKSUM : 0;
+ BIT(COMPRESS_CHKSUM) : 0;
F2FS_I(inode)->i_cluster_size =
- 1 << F2FS_I(inode)->i_log_cluster_size;
+ BIT(F2FS_I(inode)->i_log_cluster_size);
if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
F2FS_OPTION(sbi).compress_level)
@@ -4414,7 +4389,7 @@ F2FS_FEATURE_FUNCS(readonly, RO);
static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
block_t blkaddr)
{
- unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
+ unsigned int zno = blkaddr / sbi->blocks_per_blkz;
return test_bit(zno, FDEV(devi).blkz_seq);
}
@@ -4462,6 +4437,11 @@ static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi)
return false;
}
+static inline bool f2fs_dev_is_readonly(struct f2fs_sb_info *sbi)
+{
+ return f2fs_sb_has_readonly(sbi) || f2fs_hw_is_readonly(sbi);
+}
+
static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
{
return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 15dabeac4690..5ac53d2627d2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2113,7 +2113,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
} else {
/* Reuse the already created COW inode */
- f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
+ ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
+ if (ret) {
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
+ goto out;
+ }
}
f2fs_write_inode(inode, NULL);
@@ -3009,15 +3013,16 @@ int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
struct dquot *transfer_to[MAXQUOTAS] = {};
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct super_block *sb = sbi->sb;
- int err = 0;
+ int err;
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
- if (!IS_ERR(transfer_to[PRJQUOTA])) {
- err = __dquot_transfer(inode, transfer_to);
- if (err)
- set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
- dqput(transfer_to[PRJQUOTA]);
- }
+ if (IS_ERR(transfer_to[PRJQUOTA]))
+ return PTR_ERR(transfer_to[PRJQUOTA]);
+
+ err = __dquot_transfer(inode, transfer_to);
+ if (err)
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ dqput(transfer_to[PRJQUOTA]);
return err;
}
@@ -3964,7 +3969,7 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
F2FS_I(inode)->i_compress_algorithm = option.algorithm;
F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
- F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size;
+ F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size);
f2fs_mark_inode_dirty_sync(inode, true);
if (!f2fs_is_compress_backend_ready(inode))
@@ -4062,8 +4067,11 @@ static int f2fs_ioc_decompress_file(struct file *filp)
if (ret < 0)
break;
- if (get_dirty_pages(inode) >= blk_per_seg)
- filemap_fdatawrite(inode->i_mapping);
+ if (get_dirty_pages(inode) >= blk_per_seg) {
+ ret = filemap_fdatawrite(inode->i_mapping);
+ if (ret < 0)
+ break;
+ }
count -= len;
page_idx += len;
@@ -4133,8 +4141,11 @@ static int f2fs_ioc_compress_file(struct file *filp)
if (ret < 0)
break;
- if (get_dirty_pages(inode) >= blk_per_seg)
- filemap_fdatawrite(inode->i_mapping);
+ if (get_dirty_pages(inode) >= blk_per_seg) {
+ ret = filemap_fdatawrite(inode->i_mapping);
+ if (ret < 0)
+ break;
+ }
count -= len;
page_idx += len;
@@ -4361,7 +4372,7 @@ static void f2fs_trace_rw_file_path(struct kiocb *iocb, size_t count, int rw)
struct inode *inode = file_inode(iocb->ki_filp);
char *buf, *path;
- buf = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL);
+ buf = f2fs_getname(F2FS_I_SB(inode));
if (!buf)
return;
path = dentry_path_raw(file_dentry(iocb->ki_filp), buf, PATH_MAX);
@@ -4374,7 +4385,7 @@ static void f2fs_trace_rw_file_path(struct kiocb *iocb, size_t count, int rw)
trace_f2fs_dataread_start(inode, iocb->ki_pos, count,
current->pid, path, current->comm);
free_buf:
- kfree(buf);
+ f2fs_putname(buf);
}
static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -4534,6 +4545,19 @@ static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
.end_io = f2fs_dio_write_end_io,
};
+static void f2fs_flush_buffered_write(struct address_space *mapping,
+ loff_t start_pos, loff_t end_pos)
+{
+ int ret;
+
+ ret = filemap_write_and_wait_range(mapping, start_pos, end_pos);
+ if (ret < 0)
+ return;
+ invalidate_mapping_pages(mapping,
+ start_pos >> PAGE_SHIFT,
+ end_pos >> PAGE_SHIFT);
+}
+
static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
bool *may_need_sync)
{
@@ -4633,14 +4657,9 @@ static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
ret += ret2;
- ret2 = filemap_write_and_wait_range(file->f_mapping,
- bufio_start_pos,
- bufio_end_pos);
- if (ret2 < 0)
- goto out;
- invalidate_mapping_pages(file->f_mapping,
- bufio_start_pos >> PAGE_SHIFT,
- bufio_end_pos >> PAGE_SHIFT);
+ f2fs_flush_buffered_write(file->f_mapping,
+ bufio_start_pos,
+ bufio_end_pos);
}
} else {
/* iomap_dio_rw() already handled the generic_write_sync(). */
@@ -4723,8 +4742,18 @@ out_unlock:
inode_unlock(inode);
out:
trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
+
if (ret > 0 && may_need_sync)
ret = generic_write_sync(iocb, ret);
+
+ /* If buffered IO was forced, flush and drop the data from
+ * the page cache to preserve O_DIRECT semantics
+ */
+ if (ret > 0 && !dio && (iocb->ki_flags & IOCB_DIRECT))
+ f2fs_flush_buffered_write(iocb->ki_filp->f_mapping,
+ orig_pos,
+ orig_pos + ret - 1);
+
return ret;
}
@@ -4879,6 +4908,7 @@ const struct file_operations f2fs_file_operations = {
.llseek = f2fs_llseek,
.read_iter = f2fs_file_read_iter,
.write_iter = f2fs_file_write_iter,
+ .iopoll = iocb_bio_iopoll,
.open = f2fs_file_open,
.release = f2fs_release_file,
.mmap = f2fs_file_mmap,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 0a9dfa459860..61c5f9d26018 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -390,40 +390,95 @@ static unsigned int count_bits(const unsigned long *addr,
return sum;
}
-static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
- unsigned long long mtime, unsigned int segno,
- struct rb_node *parent, struct rb_node **p,
- bool left_most)
+static bool f2fs_check_victim_tree(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root)
+{
+#ifdef CONFIG_F2FS_CHECK_FS
+ struct rb_node *cur = rb_first_cached(root), *next;
+ struct victim_entry *cur_ve, *next_ve;
+
+ while (cur) {
+ next = rb_next(cur);
+ if (!next)
+ return true;
+
+ cur_ve = rb_entry(cur, struct victim_entry, rb_node);
+ next_ve = rb_entry(next, struct victim_entry, rb_node);
+
+ if (cur_ve->mtime > next_ve->mtime) {
+ f2fs_info(sbi, "broken victim_rbtree, "
+ "cur_mtime(%llu) next_mtime(%llu)",
+ cur_ve->mtime, next_ve->mtime);
+ return false;
+ }
+ cur = next;
+ }
+#endif
+ return true;
+}
+
+static struct victim_entry *__lookup_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *node = am->root.rb_root.rb_node;
+ struct victim_entry *ve = NULL;
+
+ while (node) {
+ ve = rb_entry(node, struct victim_entry, rb_node);
+
+ if (mtime < ve->mtime)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return ve;
+}
+
+static struct victim_entry *__create_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno)
{
struct atgc_management *am = &sbi->am;
struct victim_entry *ve;
- ve = f2fs_kmem_cache_alloc(victim_entry_slab,
- GFP_NOFS, true, NULL);
+ ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS, true, NULL);
ve->mtime = mtime;
ve->segno = segno;
- rb_link_node(&ve->rb_node, parent, p);
- rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
-
list_add_tail(&ve->list, &am->victim_list);
-
am->victim_count++;
return ve;
}
-static void insert_victim_entry(struct f2fs_sb_info *sbi,
+static void __insert_victim_entry(struct f2fs_sb_info *sbi,
unsigned long long mtime, unsigned int segno)
{
struct atgc_management *am = &sbi->am;
- struct rb_node **p;
+ struct rb_root_cached *root = &am->root;
+ struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *parent = NULL;
+ struct victim_entry *ve;
bool left_most = true;
- p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
- attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
+ /* look up rb tree to find parent node */
+ while (*p) {
+ parent = *p;
+ ve = rb_entry(parent, struct victim_entry, rb_node);
+
+ if (mtime < ve->mtime) {
+ p = &(*p)->rb_left;
+ } else {
+ p = &(*p)->rb_right;
+ left_most = false;
+ }
+ }
+
+ ve = __create_victim_entry(sbi, mtime, segno);
+
+ rb_link_node(&ve->rb_node, parent, p);
+ rb_insert_color_cached(&ve->rb_node, root, left_most);
}
static void add_victim_entry(struct f2fs_sb_info *sbi,
@@ -459,19 +514,7 @@ static void add_victim_entry(struct f2fs_sb_info *sbi,
if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
return;
- insert_victim_entry(sbi, mtime, segno);
-}
-
-static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
- struct victim_sel_policy *p)
-{
- struct atgc_management *am = &sbi->am;
- struct rb_node *parent = NULL;
- bool left_most;
-
- f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
-
- return parent;
+ __insert_victim_entry(sbi, mtime, segno);
}
static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
@@ -481,7 +524,6 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
struct atgc_management *am = &sbi->am;
struct rb_root_cached *root = &am->root;
struct rb_node *node;
- struct rb_entry *re;
struct victim_entry *ve;
unsigned long long total_time;
unsigned long long age, u, accu;
@@ -508,12 +550,10 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
node = rb_first_cached(root);
next:
- re = rb_entry_safe(node, struct rb_entry, rb_node);
- if (!re)
+ ve = rb_entry_safe(node, struct victim_entry, rb_node);
+ if (!ve)
return;
- ve = (struct victim_entry *)re;
-
if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
goto skip;
@@ -555,8 +595,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
{
struct sit_info *sit_i = SIT_I(sbi);
struct atgc_management *am = &sbi->am;
- struct rb_node *node;
- struct rb_entry *re;
struct victim_entry *ve;
unsigned long long age;
unsigned long long max_mtime = sit_i->dirty_max_mtime;
@@ -566,25 +604,22 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
unsigned int dirty_threshold = max(am->max_candidate_count,
am->candidate_ratio *
am->victim_count / 100);
- unsigned int cost;
- unsigned int iter = 0;
+ unsigned int cost, iter;
int stage = 0;
if (max_mtime < min_mtime)
return;
max_mtime += 1;
next_stage:
- node = lookup_central_victim(sbi, p);
+ iter = 0;
+ ve = __lookup_victim_entry(sbi, p->age);
next_node:
- re = rb_entry_safe(node, struct rb_entry, rb_node);
- if (!re) {
- if (stage == 0)
- goto skip_stage;
+ if (!ve) {
+ if (stage++ == 0)
+ goto next_stage;
return;
}
- ve = (struct victim_entry *)re;
-
if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
goto skip_node;
@@ -610,24 +645,20 @@ next_node:
}
skip_node:
if (iter < dirty_threshold) {
- if (stage == 0)
- node = rb_prev(node);
- else if (stage == 1)
- node = rb_next(node);
+ ve = rb_entry(stage == 0 ? rb_prev(&ve->rb_node) :
+ rb_next(&ve->rb_node),
+ struct victim_entry, rb_node);
goto next_node;
}
-skip_stage:
- if (stage < 1) {
- stage++;
- iter = 0;
+
+ if (stage++ == 0)
goto next_stage;
- }
}
+
static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
struct victim_sel_policy *p)
{
- f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &sbi->am.root, true));
+ f2fs_bug_on(sbi, !f2fs_check_victim_tree(sbi, &sbi->am.root));
if (p->gc_mode == GC_AT)
atgc_lookup_victim(sbi, p);
@@ -710,9 +741,9 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type,
* When it is called from SSR segment selection, it finds a segment
* which has minimum valid blocks and removes it from dirty seglist.
*/
-static int get_victim_by_default(struct f2fs_sb_info *sbi,
- unsigned int *result, int gc_type, int type,
- char alloc_mode, unsigned long long age)
+int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
+ int gc_type, int type, char alloc_mode,
+ unsigned long long age)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
@@ -906,10 +937,6 @@ out:
return ret;
}
-static const struct victim_selection default_v_ops = {
- .get_victim = get_victim_by_default,
-};
-
static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
{
struct inode_entry *ie;
@@ -1589,14 +1616,14 @@ next_step:
int err;
if (S_ISREG(inode->i_mode)) {
- if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) {
+ if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[WRITE])) {
sbi->skipped_gc_rwsem++;
continue;
}
if (!f2fs_down_write_trylock(
- &fi->i_gc_rwsem[WRITE])) {
+ &fi->i_gc_rwsem[READ])) {
sbi->skipped_gc_rwsem++;
- f2fs_up_write(&fi->i_gc_rwsem[READ]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
continue;
}
locked = true;
@@ -1619,8 +1646,8 @@ next_step:
submitted++;
if (locked) {
- f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
f2fs_up_write(&fi->i_gc_rwsem[READ]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
}
stat_inc_data_blk_count(sbi, 1, gc_type);
@@ -1640,8 +1667,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
int ret;
down_write(&sit_i->sentry_lock);
- ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
- NO_CHECK_TYPE, LFS, 0);
+ ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0);
up_write(&sit_i->sentry_lock);
return ret;
}
@@ -1779,6 +1805,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned int skipped_round = 0, round = 0;
+ unsigned int upper_secs;
trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
gc_control->nr_free_secs,
@@ -1791,8 +1818,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
- sbi->skipped_gc_rwsem = 0;
gc_more:
+ sbi->skipped_gc_rwsem = 0;
if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
goto stop;
@@ -1802,7 +1829,10 @@ gc_more:
goto stop;
}
- if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
+ /* Let's run FG_GC, if we don't have enough space. */
+ if (has_not_enough_free_secs(sbi, 0, 0)) {
+ gc_type = FG_GC;
+
/*
* For example, if there are many prefree_segments below given
* threshold, we can make them free by checkpoint. Then, we
@@ -1813,8 +1843,6 @@ gc_more:
if (ret)
goto stop;
}
- if (has_not_enough_free_secs(sbi, 0, 0))
- gc_type = FG_GC;
}
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
@@ -1841,19 +1869,15 @@ retry:
if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
- if (gc_type == FG_GC)
+ if (gc_type == FG_GC) {
sbi->cur_victim_sec = NULL_SEGNO;
- if (gc_control->init_gc_type == FG_GC ||
- !has_not_enough_free_secs(sbi,
- (gc_type == FG_GC) ? sec_freed : 0, 0)) {
- if (gc_type == FG_GC && sec_freed < gc_control->nr_free_secs)
- goto go_gc_more;
- goto stop;
- }
-
- /* FG_GC stops GC by skip_count */
- if (gc_type == FG_GC) {
+ if (has_enough_free_secs(sbi, sec_freed, 0)) {
+ if (!gc_control->no_bg_gc &&
+ sec_freed < gc_control->nr_free_secs)
+ goto go_gc_more;
+ goto stop;
+ }
if (sbi->skipped_gc_rwsem)
skipped_round++;
round++;
@@ -1862,10 +1886,17 @@ retry:
ret = f2fs_write_checkpoint(sbi, &cpc);
goto stop;
}
+ } else if (has_enough_free_secs(sbi, 0, 0)) {
+ goto stop;
}
- /* Write checkpoint to reclaim prefree segments */
- if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
+ __get_secs_required(sbi, NULL, &upper_secs, NULL);
+
+ /*
+ * Write checkpoint to reclaim prefree segments.
+ * We need more three extra sections for writer's data/node/dentry.
+ */
+ if (free_sections(sbi) <= upper_secs + NR_GC_CHECKPOINT_SECS &&
prefree_segments(sbi)) {
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
@@ -1932,8 +1963,6 @@ static void init_atgc_management(struct f2fs_sb_info *sbi)
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
{
- DIRTY_I(sbi)->v_ops = &default_v_ops;
-
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
@@ -2064,8 +2093,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
FDEV(last_dev).end_blk =
(long long)FDEV(last_dev).end_blk + blks;
#ifdef CONFIG_BLK_DEV_ZONED
- FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz +
- (int)(blks >> sbi->log_blocks_per_blkz);
+ FDEV(last_dev).nr_blkz = FDEV(last_dev).nr_blkz +
+ div_u64(blks, sbi->blocks_per_blkz);
#endif
}
}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 15bd1d680f67..28a00942802c 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -30,6 +30,8 @@
/* Search max. number of dirty segments to select a victim segment */
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
+#define NR_GC_CHECKPOINT_SECS (3) /* data/node/dentry sections */
+
struct f2fs_gc_kthread {
struct task_struct *f2fs_gc_task;
wait_queue_head_t gc_wait_queue_head;
@@ -55,20 +57,10 @@ struct gc_inode_list {
struct radix_tree_root iroot;
};
-struct victim_info {
- unsigned long long mtime; /* mtime of section */
- unsigned int segno; /* section No. */
-};
-
struct victim_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
- union {
- struct {
- unsigned long long mtime; /* mtime of section */
- unsigned int segno; /* segment No. */
- };
- struct victim_info vi; /* victim info */
- };
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* segment No. */
struct list_head list;
};
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 72269e7efd26..4638fee16a91 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -497,7 +497,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
fname.hash = de->hash_code;
ino = le32_to_cpu(de->ino);
- fake_mode = f2fs_get_de_type(de) << S_SHIFT;
+ fake_mode = fs_ftype_to_dtype(de->file_type) << S_DT_SHIFT;
err = f2fs_add_regular_entry(dir, &fname, NULL, ino, fake_mode);
if (err)
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7d2e2c0dba65..cf4327ad106c 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -326,7 +326,6 @@ static void init_idisk_time(struct inode *inode)
fi->i_disk_time[0] = inode->i_atime;
fi->i_disk_time[1] = inode->i_ctime;
fi->i_disk_time[2] = inode->i_mtime;
- fi->i_disk_time[3] = fi->i_crtime;
}
static int do_read_inode(struct inode *inode)
@@ -454,8 +453,8 @@ static int do_read_inode(struct inode *inode)
fi->i_compress_level = compress_flag >>
COMPRESS_LEVEL_OFFSET;
fi->i_compress_flag = compress_flag &
- (BIT(COMPRESS_LEVEL_OFFSET) - 1);
- fi->i_cluster_size = 1 << fi->i_log_cluster_size;
+ GENMASK(COMPRESS_LEVEL_OFFSET - 1, 0);
+ fi->i_cluster_size = BIT(fi->i_log_cluster_size);
set_inode_flag(inode, FI_COMPRESSED_FILE);
}
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 11fc4c8036a9..77a71276ecb1 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -22,7 +22,7 @@
#include "acl.h"
#include <trace/events/f2fs.h>
-static inline int is_extension_exist(const unsigned char *s, const char *sub,
+static inline bool is_extension_exist(const unsigned char *s, const char *sub,
bool tmp_ext)
{
size_t slen = strlen(s);
@@ -30,19 +30,19 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub,
int i;
if (sublen == 1 && *sub == '*')
- return 1;
+ return true;
/*
* filename format of multimedia file should be defined as:
* "filename + '.' + extension + (optional: '.' + temp extension)".
*/
if (slen < sublen + 2)
- return 0;
+ return false;
if (!tmp_ext) {
/* file has no temp extension */
if (s[slen - sublen - 1] != '.')
- return 0;
+ return false;
return !strncasecmp(s + slen - sublen, sub, sublen);
}
@@ -50,10 +50,10 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub,
if (s[i] != '.')
continue;
if (!strncasecmp(s + i + 1, sub, sublen))
- return 1;
+ return true;
}
- return 0;
+ return false;
}
int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
@@ -995,12 +995,20 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
goto out;
}
+ /*
+ * Copied from ext4_rename: we need to protect against old.inode
+ * directory getting converted from inline directory format into
+ * a normal one.
+ */
+ if (S_ISDIR(old_inode->i_mode))
+ inode_lock_nested(old_inode, I_MUTEX_NONDIR2);
+
err = -ENOENT;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
err = PTR_ERR(old_page);
- goto out;
+ goto out_unlock_old;
}
if (S_ISDIR(old_inode->i_mode)) {
@@ -1108,6 +1116,9 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
f2fs_unlock_op(sbi);
+ if (S_ISDIR(old_inode->i_mode))
+ inode_unlock(old_inode);
+
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -1122,6 +1133,9 @@ out_dir:
f2fs_put_page(old_dir_page, 0);
out_old:
f2fs_put_page(old_page, 0);
+out_unlock_old:
+ if (S_ISDIR(old_inode->i_mode))
+ inode_unlock(old_inode);
out:
iput(whiteout);
return err;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 99454d46a939..906fb67a99da 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -93,17 +93,15 @@ static inline void copy_node_info(struct node_info *dst,
static inline void set_nat_flag(struct nat_entry *ne,
unsigned int type, bool set)
{
- unsigned char mask = 0x01 << type;
if (set)
- ne->ni.flag |= mask;
+ ne->ni.flag |= BIT(type);
else
- ne->ni.flag &= ~mask;
+ ne->ni.flag &= ~BIT(type);
}
static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
{
- unsigned char mask = 0x01 << type;
- return ne->ni.flag & mask;
+ return ne->ni.flag & BIT(type);
}
static inline void nat_reset_flag(struct nat_entry *ne)
@@ -225,7 +223,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
struct f2fs_nm_info *nm_i = NM_I(sbi);
block_addr -= nm_i->nat_blkaddr;
- block_addr ^= 1 << sbi->log_blocks_per_seg;
+ block_addr ^= BIT(sbi->log_blocks_per_seg);
return block_addr + nm_i->nat_blkaddr;
}
@@ -395,7 +393,7 @@ static inline nid_t get_nid(struct page *p, int off, bool i)
static inline int is_node(struct page *page, int type)
{
struct f2fs_node *rn = F2FS_NODE(page);
- return le32_to_cpu(rn->footer.flag) & (1 << type);
+ return le32_to_cpu(rn->footer.flag) & BIT(type);
}
#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT)
@@ -408,9 +406,9 @@ static inline void set_cold_node(struct page *page, bool is_dir)
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (is_dir)
- flag &= ~(0x1 << COLD_BIT_SHIFT);
+ flag &= ~BIT(COLD_BIT_SHIFT);
else
- flag |= (0x1 << COLD_BIT_SHIFT);
+ flag |= BIT(COLD_BIT_SHIFT);
rn->footer.flag = cpu_to_le32(flag);
}
@@ -419,9 +417,9 @@ static inline void set_mark(struct page *page, int mark, int type)
struct f2fs_node *rn = F2FS_NODE(page);
unsigned int flag = le32_to_cpu(rn->footer.flag);
if (mark)
- flag |= (0x1 << type);
+ flag |= BIT(type);
else
- flag &= ~(0x1 << type);
+ flag &= ~BIT(type);
rn->footer.flag = cpu_to_le32(flag);
#ifdef CONFIG_F2FS_CHECK_FS
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index dfd41908b12d..58c1a0096f7d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -825,19 +825,9 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
bool fix_curseg_write_pointer = false;
-#ifdef CONFIG_QUOTA
- int quota_enabled;
-#endif
- if (s_flags & SB_RDONLY) {
+ if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
f2fs_info(sbi, "recover fsync data on readonly fs");
- sbi->sb->s_flags &= ~SB_RDONLY;
- }
-
-#ifdef CONFIG_QUOTA
- /* Turn on quotas so that they are updated correctly */
- quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
-#endif
INIT_LIST_HEAD(&inode_list);
INIT_LIST_HEAD(&tmp_inode_list);
@@ -909,11 +899,6 @@ skip:
}
}
-#ifdef CONFIG_QUOTA
- /* Turn quotas off */
- if (quota_enabled)
- f2fs_quota_off_umount(sbi->sb);
-#endif
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
return ret ? ret : err;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 227e25836173..6db410f1bb8c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -217,7 +217,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
retry:
set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE_RA);
+ err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (err) {
if (err == -ENOMEM) {
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
@@ -246,10 +246,16 @@ retry:
} else {
blkcnt_t count = 1;
+ err = inc_valid_block_count(sbi, inode, &count);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
+
*old_addr = dn.data_blkaddr;
f2fs_truncate_data_blocks_range(&dn, 1);
dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
- inc_valid_block_count(sbi, inode, &count);
+
f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
ni.version, true, false);
}
@@ -257,7 +263,7 @@ retry:
f2fs_put_dnode(&dn);
trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
- index, *old_addr, new_addr, recover);
+ index, old_addr ? *old_addr : 0, new_addr, recover);
return 0;
}
@@ -406,27 +412,28 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
*/
- if (has_not_enough_free_secs(sbi, 0, 0)) {
- if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
- sbi->gc_thread->f2fs_gc_task) {
- DEFINE_WAIT(wait);
-
- prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
- TASK_UNINTERRUPTIBLE);
- wake_up(&sbi->gc_thread->gc_wait_queue_head);
- io_schedule();
- finish_wait(&sbi->gc_thread->fggc_wq, &wait);
- } else {
- struct f2fs_gc_control gc_control = {
- .victim_segno = NULL_SEGNO,
- .init_gc_type = BG_GC,
- .no_bg_gc = true,
- .should_migrate_blocks = false,
- .err_gc_skipped = false,
- .nr_free_secs = 1 };
- f2fs_down_write(&sbi->gc_lock);
- f2fs_gc(sbi, &gc_control);
- }
+ if (has_enough_free_secs(sbi, 0, 0))
+ return;
+
+ if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
+ sbi->gc_thread->f2fs_gc_task) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ wake_up(&sbi->gc_thread->gc_wait_queue_head);
+ io_schedule();
+ finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+ } else {
+ struct f2fs_gc_control gc_control = {
+ .victim_segno = NULL_SEGNO,
+ .init_gc_type = BG_GC,
+ .no_bg_gc = true,
+ .should_migrate_blocks = false,
+ .err_gc_skipped = false,
+ .nr_free_secs = 1 };
+ f2fs_down_write(&sbi->gc_lock);
+ f2fs_gc(sbi, &gc_control);
}
}
@@ -933,9 +940,9 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
INIT_LIST_HEAD(&dc->list);
dc->bdev = bdev;
- dc->lstart = lstart;
- dc->start = start;
- dc->len = len;
+ dc->di.lstart = lstart;
+ dc->di.start = start;
+ dc->di.len = len;
dc->ref = 0;
dc->state = D_PREP;
dc->queued = 0;
@@ -950,20 +957,108 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
return dc;
}
-static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
- struct block_device *bdev, block_t lstart,
- block_t start, block_t len,
- struct rb_node *parent, struct rb_node **p,
- bool leftmost)
+static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_F2FS_CHECK_FS
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct rb_node *cur = rb_first_cached(&dcc->root), *next;
+ struct discard_cmd *cur_dc, *next_dc;
+
+ while (cur) {
+ next = rb_next(cur);
+ if (!next)
+ return true;
+
+ cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
+ next_dc = rb_entry(next, struct discard_cmd, rb_node);
+
+ if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
+ f2fs_info(sbi, "broken discard_rbtree, "
+ "cur(%u, %u) next(%u, %u)",
+ cur_dc->di.lstart, cur_dc->di.len,
+ next_dc->di.lstart, next_dc->di.len);
+ return false;
+ }
+ cur = next;
+ }
+#endif
+ return true;
+}
+
+static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct rb_node *node = dcc->root.rb_root.rb_node;
struct discard_cmd *dc;
- dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
+ while (node) {
+ dc = rb_entry(node, struct discard_cmd, rb_node);
- rb_link_node(&dc->rb_node, parent, p);
- rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
+ if (blkaddr < dc->di.lstart)
+ node = node->rb_left;
+ else if (blkaddr >= dc->di.lstart + dc->di.len)
+ node = node->rb_right;
+ else
+ return dc;
+ }
+ return NULL;
+}
+
+static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
+ block_t blkaddr,
+ struct discard_cmd **prev_entry,
+ struct discard_cmd **next_entry,
+ struct rb_node ***insert_p,
+ struct rb_node **insert_parent)
+{
+ struct rb_node **pnode = &root->rb_root.rb_node;
+ struct rb_node *parent = NULL, *tmp_node;
+ struct discard_cmd *dc;
+
+ *insert_p = NULL;
+ *insert_parent = NULL;
+ *prev_entry = NULL;
+ *next_entry = NULL;
+
+ if (RB_EMPTY_ROOT(&root->rb_root))
+ return NULL;
+
+ while (*pnode) {
+ parent = *pnode;
+ dc = rb_entry(*pnode, struct discard_cmd, rb_node);
+
+ if (blkaddr < dc->di.lstart)
+ pnode = &(*pnode)->rb_left;
+ else if (blkaddr >= dc->di.lstart + dc->di.len)
+ pnode = &(*pnode)->rb_right;
+ else
+ goto lookup_neighbors;
+ }
+
+ *insert_p = pnode;
+ *insert_parent = parent;
+
+ dc = rb_entry(parent, struct discard_cmd, rb_node);
+ tmp_node = parent;
+ if (parent && blkaddr > dc->di.lstart)
+ tmp_node = rb_next(parent);
+ *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
+
+ tmp_node = parent;
+ if (parent && blkaddr < dc->di.lstart)
+ tmp_node = rb_prev(parent);
+ *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
+ return NULL;
+lookup_neighbors:
+ /* lookup prev node for merging backward later */
+ tmp_node = rb_prev(&dc->rb_node);
+ *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
+
+ /* lookup next node for merging frontward later */
+ tmp_node = rb_next(&dc->rb_node);
+ *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
return dc;
}
@@ -975,7 +1070,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
list_del(&dc->list);
rb_erase_cached(&dc->rb_node, &dcc->root);
- dcc->undiscard_blks -= dc->len;
+ dcc->undiscard_blks -= dc->di.len;
kmem_cache_free(discard_cmd_slab, dc);
@@ -988,7 +1083,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
unsigned long flags;
- trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+ trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
spin_lock_irqsave(&dc->lock, flags);
if (dc->bio_ref) {
@@ -1006,7 +1101,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
printk_ratelimited(
"%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
KERN_INFO, sbi->sb->s_id,
- dc->lstart, dc->start, dc->len, dc->error);
+ dc->di.lstart, dc->di.start, dc->di.len, dc->error);
__detach_discard_cmd(dcc, dc);
}
@@ -1122,14 +1217,14 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
return 0;
- trace_f2fs_issue_discard(bdev, dc->start, dc->len);
+ trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
- lstart = dc->lstart;
- start = dc->start;
- len = dc->len;
+ lstart = dc->di.lstart;
+ start = dc->di.start;
+ len = dc->di.len;
total_len = len;
- dc->len = 0;
+ dc->di.len = 0;
while (total_len && *issued < dpolicy->max_requests && !err) {
struct bio *bio = NULL;
@@ -1145,7 +1240,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
if (*issued == dpolicy->max_requests)
last = true;
- dc->len += len;
+ dc->di.len += len;
if (time_to_inject(sbi, FAULT_DISCARD)) {
err = -EIO;
@@ -1207,34 +1302,41 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
return err;
}
-static void __insert_discard_tree(struct f2fs_sb_info *sbi,
+static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
- block_t start, block_t len,
- struct rb_node **insert_p,
- struct rb_node *insert_parent)
+ block_t start, block_t len)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- struct rb_node **p;
+ struct rb_node **p = &dcc->root.rb_root.rb_node;
struct rb_node *parent = NULL;
+ struct discard_cmd *dc;
bool leftmost = true;
- if (insert_p && insert_parent) {
- parent = insert_parent;
- p = insert_p;
- goto do_insert;
+ /* look up rb tree to find parent node */
+ while (*p) {
+ parent = *p;
+ dc = rb_entry(parent, struct discard_cmd, rb_node);
+
+ if (lstart < dc->di.lstart) {
+ p = &(*p)->rb_left;
+ } else if (lstart >= dc->di.lstart + dc->di.len) {
+ p = &(*p)->rb_right;
+ leftmost = false;
+ } else {
+ f2fs_bug_on(sbi, 1);
+ }
}
- p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
- lstart, &leftmost);
-do_insert:
- __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
- p, leftmost);
+ dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
+
+ rb_link_node(&dc->rb_node, parent, p);
+ rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
}
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
- list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
+ list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
}
static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
@@ -1244,7 +1346,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_info di = dc->di;
bool modified = false;
- if (dc->state == D_DONE || dc->len == 1) {
+ if (dc->state == D_DONE || dc->di.len == 1) {
__remove_discard_cmd(sbi, dc);
return;
}
@@ -1252,23 +1354,22 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
dcc->undiscard_blks -= di.len;
if (blkaddr > di.lstart) {
- dc->len = blkaddr - dc->lstart;
- dcc->undiscard_blks += dc->len;
+ dc->di.len = blkaddr - dc->di.lstart;
+ dcc->undiscard_blks += dc->di.len;
__relocate_discard_cmd(dcc, dc);
modified = true;
}
if (blkaddr < di.lstart + di.len - 1) {
if (modified) {
- __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
+ __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
di.start + blkaddr + 1 - di.lstart,
- di.lstart + di.len - 1 - blkaddr,
- NULL, NULL);
+ di.lstart + di.len - 1 - blkaddr);
} else {
- dc->lstart++;
- dc->len--;
- dc->start++;
- dcc->undiscard_blks += dc->len;
+ dc->di.lstart++;
+ dc->di.len--;
+ dc->di.start++;
+ dcc->undiscard_blks += dc->di.len;
__relocate_discard_cmd(dcc, dc);
}
}
@@ -1287,17 +1388,14 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
block_t end = lstart + len;
- dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
- NULL, lstart,
- (struct rb_entry **)&prev_dc,
- (struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true, NULL);
+ dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
+ &prev_dc, &next_dc, &insert_p, &insert_parent);
if (dc)
prev_dc = dc;
if (!prev_dc) {
di.lstart = lstart;
- di.len = next_dc ? next_dc->lstart - lstart : len;
+ di.len = next_dc ? next_dc->di.lstart - lstart : len;
di.len = min(di.len, len);
di.start = start;
}
@@ -1308,16 +1406,16 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *tdc = NULL;
if (prev_dc) {
- di.lstart = prev_dc->lstart + prev_dc->len;
+ di.lstart = prev_dc->di.lstart + prev_dc->di.len;
if (di.lstart < lstart)
di.lstart = lstart;
if (di.lstart >= end)
break;
- if (!next_dc || next_dc->lstart > end)
+ if (!next_dc || next_dc->di.lstart > end)
di.len = end - di.lstart;
else
- di.len = next_dc->lstart - di.lstart;
+ di.len = next_dc->di.lstart - di.lstart;
di.start = start + di.lstart - lstart;
}
@@ -1350,10 +1448,9 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
merged = true;
}
- if (!merged) {
- __insert_discard_tree(sbi, bdev, di.lstart, di.start,
- di.len, NULL, NULL);
- }
+ if (!merged)
+ __insert_discard_cmd(sbi, bdev,
+ di.lstart, di.start, di.len);
next:
prev_dc = next_dc;
if (!prev_dc)
@@ -1392,15 +1489,11 @@ static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
struct rb_node **insert_p = NULL, *insert_parent = NULL;
struct discard_cmd *dc;
struct blk_plug plug;
- unsigned int pos = dcc->next_pos;
bool io_interrupted = false;
mutex_lock(&dcc->cmd_lock);
- dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
- NULL, pos,
- (struct rb_entry **)&prev_dc,
- (struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true, NULL);
+ dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
+ &prev_dc, &next_dc, &insert_p, &insert_parent);
if (!dc)
dc = next_dc;
@@ -1418,7 +1511,7 @@ static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
break;
}
- dcc->next_pos = dc->lstart + dc->len;
+ dcc->next_pos = dc->di.lstart + dc->di.len;
err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
if (*issued >= dpolicy->max_requests)
@@ -1477,8 +1570,7 @@ retry:
if (list_empty(pend_list))
goto next;
if (unlikely(dcc->rbtree_check))
- f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root, false));
+ f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -1556,7 +1648,7 @@ static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
dc->ref--;
if (!dc->ref) {
if (!dc->error)
- len = dc->len;
+ len = dc->di.len;
__remove_discard_cmd(sbi, dc);
}
mutex_unlock(&dcc->cmd_lock);
@@ -1579,14 +1671,15 @@ next:
mutex_lock(&dcc->cmd_lock);
list_for_each_entry_safe(iter, tmp, wait_list, list) {
- if (iter->lstart + iter->len <= start || end <= iter->lstart)
+ if (iter->di.lstart + iter->di.len <= start ||
+ end <= iter->di.lstart)
continue;
- if (iter->len < dpolicy->granularity)
+ if (iter->di.len < dpolicy->granularity)
continue;
if (iter->state == D_DONE && !iter->ref) {
wait_for_completion_io(&iter->wait);
if (!iter->error)
- trimmed += iter->len;
+ trimmed += iter->di.len;
__remove_discard_cmd(sbi, iter);
} else {
iter->ref++;
@@ -1630,8 +1723,7 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
bool need_wait = false;
mutex_lock(&dcc->cmd_lock);
- dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
- NULL, blkaddr);
+ dc = __lookup_discard_cmd(sbi, blkaddr);
if (dc) {
if (dc->state == D_PREP) {
__punch_discard_cmd(sbi, dc, blkaddr);
@@ -1760,6 +1852,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
sector_t sector, nr_sects;
block_t lblkstart = blkstart;
int devi = 0;
+ u64 remainder = 0;
if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkstart);
@@ -1775,9 +1868,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
sector = SECTOR_FROM_BLOCK(blkstart);
nr_sects = SECTOR_FROM_BLOCK(blklen);
+ div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
- if (sector & (bdev_zone_sectors(bdev) - 1) ||
- nr_sects != bdev_zone_sectors(bdev)) {
+ if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
devi, sbi->s_ndevs ? FDEV(devi).path : "",
blkstart, blklen);
@@ -1982,9 +2075,11 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
if (force && start >= cpc->trim_start &&
(end - 1) <= cpc->trim_end)
- continue;
+ continue;
- if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
+ /* Should cover 2MB zoned device for zone-based reset */
+ if (!f2fs_sb_has_blkzoned(sbi) &&
+ (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
continue;
@@ -2787,7 +2882,6 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
int alloc_mode, unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
- const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
unsigned segno = NULL_SEGNO;
unsigned short seg_type = curseg->seg_type;
int i, cnt;
@@ -2796,7 +2890,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
sanity_check_seg_type(sbi, seg_type);
/* f2fs_need_SSR() already forces to do this */
- if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
+ if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
@@ -2823,7 +2917,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
for (; cnt-- > 0; reversed ? i-- : i++) {
if (i == seg_type)
continue;
- if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
+ if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
@@ -2964,24 +3058,20 @@ next:
mutex_lock(&dcc->cmd_lock);
if (unlikely(dcc->rbtree_check))
- f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root, false));
-
- dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
- NULL, start,
- (struct rb_entry **)&prev_dc,
- (struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true, NULL);
+ f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
+
+ dc = __lookup_discard_cmd_ret(&dcc->root, start,
+ &prev_dc, &next_dc, &insert_p, &insert_parent);
if (!dc)
dc = next_dc;
blk_start_plug(&plug);
- while (dc && dc->lstart <= end) {
+ while (dc && dc->di.lstart <= end) {
struct rb_node *node;
int err = 0;
- if (dc->len < dpolicy->granularity)
+ if (dc->di.len < dpolicy->granularity)
goto skip;
if (dc->state != D_PREP) {
@@ -2992,7 +3082,7 @@ next:
err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (issued >= dpolicy->max_requests) {
- start = dc->lstart + dc->len;
+ start = dc->di.lstart + dc->di.len;
if (err)
__remove_discard_cmd(sbi, dc);
@@ -4859,9 +4949,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
"New zone for curseg[%d] is not yet discarded. "
"Reset the zone: curseg[0x%x,0x%x]",
type, cs->segno, cs->next_blkoff);
- err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
- zone_sector >> log_sectors_per_block,
- zone.len >> log_sectors_per_block);
+ err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
+ zone.len >> log_sectors_per_block);
if (err) {
f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
zbd->path, err);
@@ -4920,48 +5009,6 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
return 0;
}
-static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
- unsigned int dev_idx)
-{
- if (!bdev_is_zoned(FDEV(dev_idx).bdev))
- return true;
- return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
-}
-
-/* Return the zone index in the given device */
-static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
- int dev_idx)
-{
- block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
-
- return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
- sbi->log_blocks_per_blkz;
-}
-
-/*
- * Return the usable segments in a section based on the zone's
- * corresponding zone capacity. Zone is equal to a section.
- */
-static inline unsigned int f2fs_usable_zone_segs_in_sec(
- struct f2fs_sb_info *sbi, unsigned int segno)
-{
- unsigned int dev_idx, zone_idx;
-
- dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
- zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
-
- /* Conventional zone's capacity is always equal to zone size */
- if (is_conv_zone(sbi, zone_idx, dev_idx))
- return sbi->segs_per_sec;
-
- if (!sbi->unusable_blocks_per_sec)
- return sbi->segs_per_sec;
-
- /* Get the segment count beyond zone capacity block */
- return sbi->segs_per_sec - (sbi->unusable_blocks_per_sec >>
- sbi->log_blocks_per_seg);
-}
-
/*
* Return the number of usable blocks in a segment. The number of blocks
* returned is always equal to the number of blocks in a segment for
@@ -4974,23 +5021,13 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(
struct f2fs_sb_info *sbi, unsigned int segno)
{
block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
- unsigned int zone_idx, dev_idx, secno;
-
- secno = GET_SEC_FROM_SEG(sbi, segno);
- seg_start = START_BLOCK(sbi, segno);
- dev_idx = f2fs_target_device_index(sbi, seg_start);
- zone_idx = get_zone_idx(sbi, secno, dev_idx);
-
- /*
- * Conventional zone's capacity is always equal to zone size,
- * so, blocks per segment is unchanged.
- */
- if (is_conv_zone(sbi, zone_idx, dev_idx))
- return sbi->blocks_per_seg;
+ unsigned int secno;
if (!sbi->unusable_blocks_per_sec)
return sbi->blocks_per_seg;
+ secno = GET_SEC_FROM_SEG(sbi, segno);
+ seg_start = START_BLOCK(sbi, segno);
sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
@@ -5024,11 +5061,6 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi
return 0;
}
-static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
- unsigned int segno)
-{
- return 0;
-}
#endif
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
unsigned int segno)
@@ -5043,7 +5075,7 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int segno)
{
if (f2fs_sb_has_blkzoned(sbi))
- return f2fs_usable_zone_segs_in_sec(sbi, segno);
+ return CAP_SEGS_PER_SEC(sbi);
return sbi->segs_per_sec;
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index efdb7fc3b797..2ca8fb5d0dc4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -104,6 +104,9 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
#define CAP_BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \
(sbi)->unusable_blocks_per_sec)
+#define CAP_SEGS_PER_SEC(sbi) \
+ ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\
+ (sbi)->log_blocks_per_seg))
#define GET_SEC_FROM_SEG(sbi, segno) \
(((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
@@ -286,7 +289,6 @@ enum dirty_type {
};
struct dirty_seglist_info {
- const struct victim_selection *v_ops; /* victim selction operation */
unsigned long *dirty_segmap[NR_DIRTY_TYPE];
unsigned long *dirty_secmap;
struct mutex seglist_lock; /* lock for segment bitmaps */
@@ -297,12 +299,6 @@ struct dirty_seglist_info {
bool enable_pin_section; /* enable pinning section */
};
-/* victim selection function for cleaning and SSR */
-struct victim_selection {
- int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
- int, int, char, unsigned long long);
-};
-
/* for active log information */
struct curseg_info {
struct mutex curseg_mutex; /* lock for consistency */
@@ -599,8 +595,12 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
return true;
}
-static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
- int freed, int needed)
+/*
+ * calculate needed sections for dirty node/dentry
+ * and call has_curseg_enough_space
+ */
+static inline void __get_secs_required(struct f2fs_sb_info *sbi,
+ unsigned int *lower_p, unsigned int *upper_p, bool *curseg_p)
{
unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
get_pages(sbi, F2FS_DIRTY_DENTS) +
@@ -610,27 +610,50 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi);
unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
- unsigned int free, need_lower, need_upper;
+
+ if (lower_p)
+ *lower_p = node_secs + dent_secs;
+ if (upper_p)
+ *upper_p = node_secs + dent_secs +
+ (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
+ if (curseg_p)
+ *curseg_p = has_curseg_enough_space(sbi,
+ node_blocks, dent_blocks);
+}
+
+static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
+ int freed, int needed)
+{
+ unsigned int free_secs, lower_secs, upper_secs;
+ bool curseg_space;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
- free = free_sections(sbi) + freed;
- need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed;
- need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
+ __get_secs_required(sbi, &lower_secs, &upper_secs, &curseg_space);
+
+ free_secs = free_sections(sbi) + freed;
+ lower_secs += needed + reserved_sections(sbi);
+ upper_secs += needed + reserved_sections(sbi);
- if (free > need_upper)
+ if (free_secs > upper_secs)
return false;
- else if (free <= need_lower)
+ else if (free_secs <= lower_secs)
return true;
- return !has_curseg_enough_space(sbi, node_blocks, dent_blocks);
+ return !curseg_space;
+}
+
+static inline bool has_enough_free_secs(struct f2fs_sb_info *sbi,
+ int freed, int needed)
+{
+ return !has_not_enough_free_secs(sbi, freed, needed);
}
static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
{
if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return true;
- if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+ if (likely(has_enough_free_secs(sbi, 0, 0)))
return true;
return false;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fbaaabbcd6de..9f15b03037db 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -880,8 +880,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
- f2fs_warn(sbi, "Not support %d, larger than %d",
- 1 << arg, BIO_MAX_VECS);
+ f2fs_warn(sbi, "Not support %ld, larger than %d",
+ BIT(arg), BIO_MAX_VECS);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
@@ -1179,9 +1179,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
kfree(name);
break;
case Opt_compress_chksum:
+ if (!f2fs_sb_has_compression(sbi)) {
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
+ }
F2FS_OPTION(sbi).compress_chksum = true;
break;
case Opt_compress_mode:
+ if (!f2fs_sb_has_compression(sbi)) {
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
+ }
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
@@ -1196,6 +1204,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
kfree(name);
break;
case Opt_compress_cache:
+ if (!f2fs_sb_has_compression(sbi)) {
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
+ }
set_opt(sbi, COMPRESS_CACHE);
break;
#else
@@ -1310,7 +1322,7 @@ default_check:
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
- f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
+ f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO",
F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
@@ -2060,10 +2072,12 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
- F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4;
- F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE;
- F2FS_OPTION(sbi).compress_ext_cnt = 0;
- F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS;
+ if (f2fs_sb_has_compression(sbi)) {
+ F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4;
+ F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE;
+ F2FS_OPTION(sbi).compress_ext_cnt = 0;
+ F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS;
+ }
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
@@ -2274,7 +2288,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (f2fs_readonly(sb) && (*flags & SB_RDONLY))
goto skip;
- if (f2fs_sb_has_readonly(sbi) && !(*flags & SB_RDONLY)) {
+ if (f2fs_dev_is_readonly(sbi) && !(*flags & SB_RDONLY)) {
err = -EROFS;
goto restore_opts;
}
@@ -2487,6 +2501,54 @@ restore_opts:
}
#ifdef CONFIG_QUOTA
+static bool f2fs_need_recovery(struct f2fs_sb_info *sbi)
+{
+ /* need to recovery orphan */
+ if (is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
+ return true;
+ /* need to recovery data */
+ if (test_opt(sbi, DISABLE_ROLL_FORWARD))
+ return false;
+ if (test_opt(sbi, NORECOVERY))
+ return false;
+ return !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG);
+}
+
+static bool f2fs_recover_quota_begin(struct f2fs_sb_info *sbi)
+{
+ bool readonly = f2fs_readonly(sbi->sb);
+
+ if (!f2fs_need_recovery(sbi))
+ return false;
+
+ /* it doesn't need to check f2fs_sb_has_readonly() */
+ if (f2fs_hw_is_readonly(sbi))
+ return false;
+
+ if (readonly) {
+ sbi->sb->s_flags &= ~SB_RDONLY;
+ set_sbi_flag(sbi, SBI_IS_WRITABLE);
+ }
+
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
+ return f2fs_enable_quota_files(sbi, readonly);
+}
+
+static void f2fs_recover_quota_end(struct f2fs_sb_info *sbi,
+ bool quota_enabled)
+{
+ if (quota_enabled)
+ f2fs_quota_off_umount(sbi->sb);
+
+ if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) {
+ clear_sbi_flag(sbi, SBI_IS_WRITABLE);
+ sbi->sb->s_flags |= SB_RDONLY;
+ }
+}
+
/* Read data from quotafile */
static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off)
@@ -3260,7 +3322,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
segment0_blkaddr) >> log_blocks_per_seg);
- if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
+ if (f2fs_readonly(sb) || f2fs_hw_is_readonly(sbi)) {
set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
res = "internally";
} else {
@@ -3348,7 +3410,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
total_sections = le32_to_cpu(raw_super->section_count);
/* blocks_per_seg should be 512, given the above check */
- blocks_per_seg = 1 << le32_to_cpu(raw_super->log_blocks_per_seg);
+ blocks_per_seg = BIT(le32_to_cpu(raw_super->log_blocks_per_seg));
if (segment_count > F2FS_MAX_SEGMENT ||
segment_count < F2FS_MIN_SEGMENTS) {
@@ -3617,9 +3679,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
- sbi->blocksize = 1 << sbi->log_blocksize;
+ sbi->blocksize = BIT(sbi->log_blocksize);
sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
- sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
+ sbi->blocks_per_seg = BIT(sbi->log_blocks_per_seg);
sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
sbi->total_sections = le32_to_cpu(raw_super->section_count);
@@ -3744,12 +3806,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
SECTOR_TO_BLOCK(zone_sectors))
return -EINVAL;
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(zone_sectors);
- if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
- __ilog2_u32(sbi->blocks_per_blkz))
- return -EINVAL;
- sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
- FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
- sbi->log_blocks_per_blkz;
+ FDEV(devi).nr_blkz = div_u64(SECTOR_TO_BLOCK(nr_sectors),
+ sbi->blocks_per_blkz);
if (nr_sectors & (zone_sectors - 1))
FDEV(devi).nr_blkz++;
@@ -3836,7 +3894,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
int err;
if ((recover && f2fs_readonly(sbi->sb)) ||
- bdev_read_only(sbi->sb->s_bdev)) {
+ f2fs_hw_is_readonly(sbi)) {
set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
return -EROFS;
}
@@ -3875,7 +3933,7 @@ void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
f2fs_down_write(&sbi->sb_lock);
- if (raw_super->s_stop_reason[reason] < ((1 << BITS_PER_BYTE) - 1))
+ if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
raw_super->s_stop_reason[reason]++;
err = f2fs_commit_super(sbi, false);
@@ -3885,7 +3943,7 @@ void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
f2fs_up_write(&sbi->sb_lock);
}
-static void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
+void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
{
spin_lock(&sbi->error_lock);
if (!test_bit(flag, (unsigned long *)sbi->errors)) {
@@ -4025,7 +4083,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
FDEV(i).start_blk, FDEV(i).end_blk);
}
f2fs_info(sbi,
- "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
+ "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -4102,6 +4160,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
int recovery, i, valid_super_block;
struct curseg_info *seg_i;
int retry_cnt = 1;
+#ifdef CONFIG_QUOTA
+ bool quota_enabled = false;
+#endif
try_onemore:
err = -EINVAL;
@@ -4395,6 +4456,8 @@ try_onemore:
if (err)
f2fs_err(sbi, "Cannot turn on quotas: error %d", err);
}
+
+ quota_enabled = f2fs_recover_quota_begin(sbi);
#endif
/* if there are any orphan inodes, free them */
err = f2fs_recover_orphan_inodes(sbi);
@@ -4452,6 +4515,10 @@ try_onemore:
}
}
+#ifdef CONFIG_QUOTA
+ f2fs_recover_quota_end(sbi, quota_enabled);
+#endif
+
/*
* If the f2fs is not readonly and fsync data recovery succeeds,
* check zoned block devices' write pointer consistency.
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 0b19163c90d4..8ea05340bad9 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -312,19 +312,14 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
struct ckpt_req_control *cprc = &sbi->cprc_info;
- int len = 0;
int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
- if (class == IOPRIO_CLASS_RT)
- len += scnprintf(buf + len, PAGE_SIZE - len, "rt,");
- else if (class == IOPRIO_CLASS_BE)
- len += scnprintf(buf + len, PAGE_SIZE - len, "be,");
- else
+ if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE)
return -EINVAL;
- len += scnprintf(buf + len, PAGE_SIZE - len, "%d\n", data);
- return len;
+ return sysfs_emit(buf, "%s,%d\n",
+ class == IOPRIO_CLASS_RT ? "rt" : "be", data);
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -452,7 +447,7 @@ out:
if (ret < 0)
return ret;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
+ if (a->struct_type == FAULT_INFO_TYPE && t >= BIT(FAULT_MAX))
return -EINVAL;
if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
return -EINVAL;
@@ -575,9 +570,9 @@ out:
if (!strcmp(a->attr.name, "iostat_period_ms")) {
if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS)
return -EINVAL;
- spin_lock(&sbi->iostat_lock);
+ spin_lock_irq(&sbi->iostat_lock);
sbi->iostat_period_ms = (unsigned int)t;
- spin_unlock(&sbi->iostat_lock);
+ spin_unlock_irq(&sbi->iostat_lock);
return count;
}
#endif
@@ -598,6 +593,20 @@ out:
sbi->compr_new_inode = 0;
return count;
}
+
+ if (!strcmp(a->attr.name, "compress_percent")) {
+ if (t == 0 || t > 100)
+ return -EINVAL;
+ *ui = t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "compress_watermark")) {
+ if (t == 0 || t > 100)
+ return -EINVAL;
+ *ui = t;
+ return count;
+ }
#endif
if (!strcmp(a->attr.name, "atgc_candidate_ratio")) {
@@ -950,6 +959,8 @@ F2FS_FEATURE_RO_ATTR(compression);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_percent, compress_percent);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compress_watermark, compress_watermark);
#endif
F2FS_FEATURE_RO_ATTR(pin_file);
@@ -1057,6 +1068,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(compr_written_block),
ATTR_LIST(compr_saved_block),
ATTR_LIST(compr_new_inode),
+ ATTR_LIST(compress_percent),
+ ATTR_LIST(compress_watermark),
#endif
/* For ATGC */
ATTR_LIST(atgc_candidate_ratio),
@@ -1449,25 +1462,14 @@ put_sb_kobj:
void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
{
- if (sbi->s_proc) {
-#ifdef CONFIG_F2FS_IOSTAT
- remove_proc_entry("iostat_info", sbi->s_proc);
-#endif
- remove_proc_entry("segment_info", sbi->s_proc);
- remove_proc_entry("segment_bits", sbi->s_proc);
- remove_proc_entry("victim_bits", sbi->s_proc);
- remove_proc_entry("discard_plist_info", sbi->s_proc);
- remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
- }
+ if (sbi->s_proc)
+ remove_proc_subtree(sbi->sb->s_id, f2fs_proc_root);
- kobject_del(&sbi->s_stat_kobj);
kobject_put(&sbi->s_stat_kobj);
wait_for_completion(&sbi->s_stat_kobj_unregister);
- kobject_del(&sbi->s_feature_list_kobj);
kobject_put(&sbi->s_feature_list_kobj);
wait_for_completion(&sbi->s_feature_list_kobj_unregister);
- kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
}