From 019a0c9e377c9f7bd477a0742706d93cdddaee4d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 10 Jun 2022 09:57:18 +0200 Subject: fat: add a vfat_rename2() and make existing .rename callback a helper Patch series "fat: add support for the renameat2 RENAME_EXCHANGE flag", v6. The series adds support for the renameat2 system call RENAME_EXCHANGE flag (which allows to atomically replace two paths) to the vfat filesystem code. There are many use cases for this, but we are particularly interested in making possible for vfat filesystems to be part of OSTree [0] deployments. Currently OSTree relies on symbolic links to make the deployment updates an atomic transactional operation. But RENAME_EXCHANGE could be used [1] to achieve a similar level of robustness when using a vfat filesystem. Patch #1 is just a preparatory patch to introduce the RENAME_EXCHANGE support, patch #2 moves some code blocks in vfat_rename() to a set of helper functions, that can be reused by tvfat_rename_exchange() that's added by patch #3 and finally patch #4 adds some kselftests to test it. This patch (of 4): Currently vfat only supports the RENAME_NOREPLACE flag which is handled by the virtual file system layer but doesn't support the RENAME_EXCHANGE flag. Add a vfat_rename2() function to be used as the .rename callback and move the current vfat_rename() handler to a helper. This is in preparation for implementing the RENAME_NOREPLACE flag using a different helper function. Link: https://lkml.kernel.org/r/20220610075721.1182745-1-javierm@redhat.com Link: https://lkml.kernel.org/r/20220610075721.1182745-2-javierm@redhat.com Signed-off-by: Javier Martinez Canillas Acked-by: OGAWA Hirofumi Cc: Christian Kellner Cc: Peter Jones Cc: Chung-Chiang Cheng Cc: Lennart Poettering Cc: Alexander Larsson Cc: Colin Walters Cc: Muhammad Usama Anjum Signed-off-by: Andrew Morton --- fs/fat/namei_vfat.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'fs/fat') diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c573314806cf..88ccb2ee3537 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -889,9 +889,8 @@ out: return err; } -static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir, - struct dentry *old_dentry, struct inode *new_dir, - struct dentry *new_dentry, unsigned int flags) +static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { struct buffer_head *dotdot_bh; struct msdos_dir_entry *dotdot_de; @@ -902,9 +901,6 @@ static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir, int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = d_inode(old_dentry); new_inode = d_inode(new_dentry); @@ -1021,13 +1017,24 @@ error_inode: goto out; } +static int vfat_rename2(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) +{ + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + /* VFS already handled RENAME_NOREPLACE, handle it as a normal rename */ + return vfat_rename(old_dir, old_dentry, new_dir, new_dentry); +} + static const struct inode_operations vfat_dir_inode_operations = { .create = vfat_create, .lookup = vfat_lookup, .unlink = vfat_unlink, .mkdir = vfat_mkdir, .rmdir = vfat_rmdir, - .rename = vfat_rename, + .rename = vfat_rename2, .setattr = fat_setattr, .getattr = fat_getattr, .update_time = fat_update_time, -- cgit v1.2.3 From 204d03203a145b443cd8676dc12dbb47e1a3751f Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Fri, 10 Jun 2022 09:57:19 +0200 Subject: fat: factor out reusable code in vfat_rename() as helper functions The vfat_rename() function is quite big and there are code blocks that can be moved into helper functions. This not only simplify the implementation of that function but also allows these helpers to be reused. For example, the helpers can be used by the handler of the RENAME_EXCHANGE flag once this is implemented in a subsequent patch. Link: https://lkml.kernel.org/r/20220610075721.1182745-3-javierm@redhat.com Signed-off-by: OGAWA Hirofumi Signed-off-by: Javier Martinez Canillas Acked-by: OGAWA Hirofumi Cc: Alexander Larsson Cc: Christian Kellner Cc: Chung-Chiang Cheng Cc: Colin Walters Cc: Lennart Poettering Cc: Muhammad Usama Anjum Cc: Peter Jones Signed-off-by: Andrew Morton --- fs/fat/namei_vfat.c | 89 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 32 deletions(-) (limited to 'fs/fat') diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 88ccb2ee3537..9c04053a8f1c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -889,16 +889,55 @@ out: return err; } +static int vfat_get_dotdot_de(struct inode *inode, struct buffer_head **bh, + struct msdos_dir_entry **de) +{ + if (S_ISDIR(inode->i_mode)) { + if (fat_get_dotdot_entry(inode, bh, de)) + return -EIO; + } + return 0; +} + +static int vfat_sync_ipos(struct inode *dir, struct inode *inode) +{ + if (IS_DIRSYNC(dir)) + return fat_sync_inode(inode); + mark_inode_dirty(inode); + return 0; +} + +static int vfat_update_dotdot_de(struct inode *dir, struct inode *inode, + struct buffer_head *dotdot_bh, + struct msdos_dir_entry *dotdot_de) +{ + fat_set_start(dotdot_de, MSDOS_I(dir)->i_logstart); + mark_buffer_dirty_inode(dotdot_bh, inode); + if (IS_DIRSYNC(dir)) + return sync_dirty_buffer(dotdot_bh); + return 0; +} + +static void vfat_update_dir_metadata(struct inode *dir, struct timespec64 *ts) +{ + inode_inc_iversion(dir); + fat_truncate_time(dir, ts, S_CTIME | S_MTIME); + if (IS_DIRSYNC(dir)) + (void)fat_sync_inode(dir); + else + mark_inode_dirty(dir); +} + static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct buffer_head *dotdot_bh; - struct msdos_dir_entry *dotdot_de; + struct msdos_dir_entry *dotdot_de = NULL; struct inode *old_inode, *new_inode; struct fat_slot_info old_sinfo, sinfo; struct timespec64 ts; loff_t new_i_pos; - int err, is_dir, update_dotdot, corrupt = 0; + int err, is_dir, corrupt = 0; struct super_block *sb = old_dir->i_sb; old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; @@ -909,15 +948,13 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; - is_dir = S_ISDIR(old_inode->i_mode); - update_dotdot = (is_dir && old_dir != new_dir); - if (update_dotdot) { - if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) { - err = -EIO; + if (old_dir != new_dir) { + err = vfat_get_dotdot_de(old_inode, &dotdot_bh, &dotdot_de); + if (err) goto out; - } } + is_dir = S_ISDIR(old_inode->i_mode); ts = current_time(old_dir); if (new_inode) { if (is_dir) { @@ -938,21 +975,15 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, fat_detach(old_inode); fat_attach(old_inode, new_i_pos); - if (IS_DIRSYNC(new_dir)) { - err = fat_sync_inode(old_inode); - if (err) - goto error_inode; - } else - mark_inode_dirty(old_inode); + err = vfat_sync_ipos(new_dir, old_inode); + if (err) + goto error_inode; - if (update_dotdot) { - fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); - mark_buffer_dirty_inode(dotdot_bh, old_inode); - if (IS_DIRSYNC(new_dir)) { - err = sync_dirty_buffer(dotdot_bh); - if (err) - goto error_dotdot; - } + if (dotdot_de) { + err = vfat_update_dotdot_de(new_dir, old_inode, dotdot_bh, + dotdot_de); + if (err) + goto error_dotdot; drop_nlink(old_dir); if (!new_inode) inc_nlink(new_dir); @@ -962,12 +993,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, old_sinfo.bh = NULL; if (err) goto error_dotdot; - inode_inc_iversion(old_dir); - fat_truncate_time(old_dir, &ts, S_CTIME|S_MTIME); - if (IS_DIRSYNC(old_dir)) - (void)fat_sync_inode(old_dir); - else - mark_inode_dirty(old_dir); + vfat_update_dir_metadata(old_dir, &ts); if (new_inode) { drop_nlink(new_inode); @@ -987,10 +1013,9 @@ error_dotdot: /* data cluster is shared, serious corruption */ corrupt = 1; - if (update_dotdot) { - fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); - mark_buffer_dirty_inode(dotdot_bh, old_inode); - corrupt |= sync_dirty_buffer(dotdot_bh); + if (dotdot_de) { + corrupt |= vfat_update_dotdot_de(old_dir, old_inode, dotdot_bh, + dotdot_de); } error_inode: fat_detach(old_inode); -- cgit v1.2.3 From da87e1725ae2136baeb9aac04c572c283afc917f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 10 Jun 2022 09:57:20 +0200 Subject: fat: add renameat2 RENAME_EXCHANGE flag support The renameat2 RENAME_EXCHANGE flag allows to atomically exchange two paths but is currently not supported by the Linux vfat filesystem driver. Add a vfat_rename_exchange() helper function that implements this support. The super block lock is acquired during the operation to ensure atomicity, and in the error path actions made are reversed also with the mutex held. It makes the operation as transactional as possible, within the limitation impossed by vfat due not having a journal with logs to replay. Link: https://lkml.kernel.org/r/20220610075721.1182745-4-javierm@redhat.com Signed-off-by: Javier Martinez Canillas Acked-by: OGAWA Hirofumi Cc: Alexander Larsson Cc: Christian Kellner Cc: Chung-Chiang Cheng Cc: Colin Walters Cc: Lennart Poettering Cc: Muhammad Usama Anjum Cc: Peter Jones Signed-off-by: Andrew Morton --- fs/fat/namei_vfat.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) (limited to 'fs/fat') diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 9c04053a8f1c..21620054e1c4 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1042,13 +1042,134 @@ error_inode: goto out; } +static void vfat_exchange_ipos(struct inode *old_inode, struct inode *new_inode, + loff_t old_i_pos, loff_t new_i_pos) +{ + fat_detach(old_inode); + fat_detach(new_inode); + fat_attach(old_inode, new_i_pos); + fat_attach(new_inode, old_i_pos); +} + +static void vfat_move_nlink(struct inode *src, struct inode *dst) +{ + drop_nlink(src); + inc_nlink(dst); +} + +static int vfat_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct buffer_head *old_dotdot_bh = NULL, *new_dotdot_bh = NULL; + struct msdos_dir_entry *old_dotdot_de = NULL, *new_dotdot_de = NULL; + struct inode *old_inode, *new_inode; + struct timespec64 ts = current_time(old_dir); + loff_t old_i_pos, new_i_pos; + int err, corrupt = 0; + struct super_block *sb = old_dir->i_sb; + + old_inode = d_inode(old_dentry); + new_inode = d_inode(new_dentry); + + /* Acquire super block lock for the operation to be atomic */ + mutex_lock(&MSDOS_SB(sb)->s_lock); + + /* if directories are not the same, get ".." info to update */ + if (old_dir != new_dir) { + err = vfat_get_dotdot_de(old_inode, &old_dotdot_bh, + &old_dotdot_de); + if (err) + goto out; + + err = vfat_get_dotdot_de(new_inode, &new_dotdot_bh, + &new_dotdot_de); + if (err) + goto out; + } + + old_i_pos = MSDOS_I(old_inode)->i_pos; + new_i_pos = MSDOS_I(new_inode)->i_pos; + + vfat_exchange_ipos(old_inode, new_inode, old_i_pos, new_i_pos); + + err = vfat_sync_ipos(old_dir, new_inode); + if (err) + goto error_exchange; + err = vfat_sync_ipos(new_dir, old_inode); + if (err) + goto error_exchange; + + /* update ".." directory entry info */ + if (old_dotdot_de) { + err = vfat_update_dotdot_de(new_dir, old_inode, old_dotdot_bh, + old_dotdot_de); + if (err) + goto error_old_dotdot; + } + if (new_dotdot_de) { + err = vfat_update_dotdot_de(old_dir, new_inode, new_dotdot_bh, + new_dotdot_de); + if (err) + goto error_new_dotdot; + } + + /* if cross directory and only one is a directory, adjust nlink */ + if (!old_dotdot_de != !new_dotdot_de) { + if (old_dotdot_de) + vfat_move_nlink(old_dir, new_dir); + else + vfat_move_nlink(new_dir, old_dir); + } + + vfat_update_dir_metadata(old_dir, &ts); + /* if directories are not the same, update new_dir as well */ + if (old_dir != new_dir) + vfat_update_dir_metadata(new_dir, &ts); + +out: + brelse(old_dotdot_bh); + brelse(new_dotdot_bh); + mutex_unlock(&MSDOS_SB(sb)->s_lock); + + return err; + +error_new_dotdot: + if (new_dotdot_de) { + corrupt |= vfat_update_dotdot_de(new_dir, new_inode, + new_dotdot_bh, new_dotdot_de); + } + +error_old_dotdot: + if (old_dotdot_de) { + corrupt |= vfat_update_dotdot_de(old_dir, old_inode, + old_dotdot_bh, old_dotdot_de); + } + +error_exchange: + vfat_exchange_ipos(old_inode, new_inode, new_i_pos, old_i_pos); + corrupt |= vfat_sync_ipos(new_dir, new_inode); + corrupt |= vfat_sync_ipos(old_dir, old_inode); + + if (corrupt < 0) { + fat_fs_error(new_dir->i_sb, + "%s: Filesystem corrupted (i_pos %lld, %lld)", + __func__, old_i_pos, new_i_pos); + } + goto out; +} + static int vfat_rename2(struct user_namespace *mnt_userns, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - if (flags & ~RENAME_NOREPLACE) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) return -EINVAL; + if (flags & RENAME_EXCHANGE) { + return vfat_rename_exchange(old_dir, old_dentry, + new_dir, new_dentry); + } + /* VFS already handled RENAME_NOREPLACE, handle it as a normal rename */ return vfat_rename(old_dir, old_dentry, new_dir, new_dentry); } -- cgit v1.2.3