From 07fd7c329839cf0b8c7766883d830a1a0d12d1dd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 18 Feb 2024 14:50:13 +0100 Subject: libfs: add path_from_stashed() Add a helper for both nsfs and pidfs to reuse an already stashed dentry or to add and stash a new dentry. Link: https://lore.kernel.org/r/20240218-neufahrzeuge-brauhaus-fb0eb6459771@brauner Signed-off-by: Christian Brauner --- fs/libfs.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'fs/libfs.c') diff --git a/fs/libfs.c b/fs/libfs.c index eec6031b0155..2a55e87e1439 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1973,3 +1973,97 @@ struct timespec64 simple_inode_init_ts(struct inode *inode) return ts; } EXPORT_SYMBOL(simple_inode_init_ts); + +static inline struct dentry *get_stashed_dentry(struct dentry *stashed) +{ + struct dentry *dentry; + + guard(rcu)(); + dentry = READ_ONCE(stashed); + if (!dentry) + return NULL; + if (!lockref_get_not_dead(&dentry->d_lockref)) + return NULL; + return dentry; +} + +static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, + struct super_block *sb, + const struct file_operations *fops, + void *data) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_anon(sb); + if (!dentry) + return ERR_PTR(-ENOMEM); + + inode = new_inode_pseudo(sb); + if (!inode) { + dput(dentry); + return ERR_PTR(-ENOMEM); + } + + inode->i_ino = ino; + inode->i_flags |= S_IMMUTABLE; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = fops; + inode->i_private = data; + simple_inode_init_ts(inode); + + /* @data is now owned by the fs */ + d_instantiate(dentry, inode); + + if (cmpxchg(stashed, NULL, dentry)) { + d_delete(dentry); /* make sure ->d_prune() does nothing */ + dput(dentry); + cpu_relax(); + return ERR_PTR(-EAGAIN); + } + + return dentry; +} + +/** + * path_from_stashed - create path from stashed or new dentry + * @stashed: where to retrieve or stash dentry + * @ino: inode number to use + * @mnt: mnt of the filesystems to use + * @fops: file operations to use + * @data: data to store in inode->i_private + * @path: path to create + * + * The function tries to retrieve a stashed dentry from @stashed. If the dentry + * is still valid then it will be reused. If the dentry isn't able the function + * will allocate a new dentry and inode. It will then try to update @stashed + * with the newly added dentry. If it fails -EAGAIN is returned and the caller + * my retry. + * + * Special-purpose helper for nsfs and pidfs. + * + * Return: If 0 or an error is returned the caller can be sure that @data must + * be cleaned up. If 1 or -EAGAIN is returned @data is owned by the + * filesystem. + */ +int path_from_stashed(struct dentry **stashed, unsigned long ino, + struct vfsmount *mnt, const struct file_operations *fops, + void *data, struct path *path) +{ + struct dentry *dentry; + int ret = 0; + + dentry = get_stashed_dentry(*stashed); + if (dentry) + goto out_path; + + dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, data); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + ret = 1; + +out_path: + path->dentry = dentry; + path->mnt = mntget(mnt); + return ret; +} -- cgit v1.2.3 From b28ddcc32d8fa3e20745b3a47dff863fe0376d79 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 19 Feb 2024 16:30:57 +0100 Subject: pidfs: convert to path_from_stashed() helper Moving pidfds from the anonymous inode infrastructure to a separate tiny in-kernel filesystem similar to sockfs, pipefs, and anon_inodefs causes selinux denials and thus various userspace components that make heavy use of pidfds to fail as pidfds used anon_inode_getfile() which aren't subject to any LSM hooks. But dentry_open() is and that would cause regressions. The failures that are seen are selinux denials. But the core failure is dbus-broker. That cascades into other services failing that depend on dbus-broker. For example, when dbus-broker fails to start polkit and all the others won't be able to work because they depend on dbus-broker. The reason for dbus-broker failing is because it doesn't handle failures for SO_PEERPIDFD correctly. Last kernel release we introduced SO_PEERPIDFD (and SCM_PIDFD). SO_PEERPIDFD allows dbus-broker and polkit and others to receive a pidfd for the peer of an AF_UNIX socket. This is the first time in the history of Linux that we can safely authenticate clients in a race-free manner. dbus-broker immediately made use of this but messed up the error checking. It only allowed EINVAL as a valid failure for SO_PEERPIDFD. That's obviously problematic not just because of LSM denials but because of seccomp denials that would prevent SO_PEERPIDFD from working; or any other new error code from there. So this is catching a flawed implementation in dbus-broker as well. It has to fallback to the old pid-based authentication when SO_PEERPIDFD doesn't work no matter the reasons otherwise it'll always risk such failures. So overall that LSM denial should not have caused dbus-broker to fail. It can never assume that a feature released one kernel ago like SO_PEERPIDFD can be assumed to be available. So, the next fix separate from the selinux policy update is to try and fix dbus-broker at [3]. That should make it into Fedora as well. In addition the selinux reference policy should also be updated. See [4] for that. If Selinux is in enforcing mode in userspace and it encounters anything that it doesn't know about it will deny it by default. And the policy is entirely in userspace including declaring new types for stuff like nsfs or pidfs to allow it. For now we continue to raise S_PRIVATE on the inode if it's a pidfs inode which means things behave exactly like before. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2265630 Link: https://github.com/fedora-selinux/selinux-policy/pull/2050 Link: https://github.com/bus1/dbus-broker/pull/343 [3] Link: https://github.com/SELinuxProject/refpolicy/pull/762 [4] Reported-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240222190334.GA412503@dev-arch.thelio-3990X Link: https://lore.kernel.org/r/20240218-neufahrzeuge-brauhaus-fb0eb6459771@brauner Signed-off-by: Christian Brauner --- fs/libfs.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/libfs.c b/fs/libfs.c index 2a55e87e1439..2acba9d53756 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -1990,6 +1991,7 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, struct super_block *sb, const struct file_operations *fops, + const struct inode_operations *iops, void *data) { struct dentry *dentry; @@ -2007,8 +2009,13 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, inode->i_ino = ino; inode->i_flags |= S_IMMUTABLE; + if (is_pidfs_sb(sb)) + inode->i_flags |= S_PRIVATE; inode->i_mode = S_IFREG | S_IRUGO; - inode->i_fop = fops; + if (iops) + inode->i_op = iops; + if (fops) + inode->i_fop = fops; inode->i_private = data; simple_inode_init_ts(inode); @@ -2030,6 +2037,7 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, * @stashed: where to retrieve or stash dentry * @ino: inode number to use * @mnt: mnt of the filesystems to use + * @iops: inode operations to use * @fops: file operations to use * @data: data to store in inode->i_private * @path: path to create @@ -2048,7 +2056,8 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, */ int path_from_stashed(struct dentry **stashed, unsigned long ino, struct vfsmount *mnt, const struct file_operations *fops, - void *data, struct path *path) + const struct inode_operations *iops, void *data, + struct path *path) { struct dentry *dentry; int ret = 0; @@ -2057,7 +2066,7 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, if (dentry) goto out_path; - dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, data); + dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); if (IS_ERR(dentry)) return PTR_ERR(dentry); ret = 1; -- cgit v1.2.3 From 159a0d9fd50b92cc48e4c82cde79c4cb34c85953 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 18 Feb 2024 14:52:24 +0100 Subject: libfs: improve path_from_stashed() helper In earlier patches we moved both nsfs and pidfs to path_from_stashed(). The helper currently tries to add and stash a new dentry if a reusable dentry couldn't be found and returns EAGAIN if it lost the race to stash the dentry. The caller can use EAGAIN to retry. The helper and the two filesystems be written in a way that makes returning EAGAIN unnecessary. To do this we need to change the dentry->d_prune() implementation of nsfs and pidfs to not simply replace the stashed dentry with NULL but to use a cmpxchg() and only replace their own dentry. Then path_from_stashed() can then be changed to not just stash a new dentry when no dentry is currently stashed but also when an already dead dentry is stashed. If another task managed to install a dentry in the meantime it can simply be reused. Pack that into a loop and call it a day. Suggested-by: Linus Torvalds Link: https://lore.kernel.org/r/CAHk-=wgtLF5Z5=15-LKAczWm=-tUjHO+Bpf7WjBG+UU3s=fEQw@mail.gmail.com Signed-off-by: Christian Brauner --- fs/libfs.c | 61 ++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 21 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/libfs.c b/fs/libfs.c index 2acba9d53756..7617e1bc6e5b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1988,11 +1988,11 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) return dentry; } -static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, - struct super_block *sb, - const struct file_operations *fops, - const struct inode_operations *iops, - void *data) +static struct dentry *prepare_anon_dentry(unsigned long ino, + struct super_block *sb, + const struct file_operations *fops, + const struct inode_operations *iops, + void *data) { struct dentry *dentry; struct inode *inode; @@ -2021,15 +2021,29 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, /* @data is now owned by the fs */ d_instantiate(dentry, inode); + return dentry; +} - if (cmpxchg(stashed, NULL, dentry)) { - d_delete(dentry); /* make sure ->d_prune() does nothing */ - dput(dentry); - cpu_relax(); - return ERR_PTR(-EAGAIN); - } +static struct dentry *stash_dentry(struct dentry **stashed, + struct dentry *dentry) +{ + guard(rcu)(); + for (;;) { + struct dentry *old; - return dentry; + /* Assume any old dentry was cleared out. */ + old = cmpxchg(stashed, NULL, dentry); + if (likely(!old)) + return dentry; + + /* Check if somebody else installed a reusable dentry. */ + if (lockref_get_not_dead(&old->d_lockref)) + return old; + + /* There's an old dead dentry there, try to take it over. */ + if (likely(try_cmpxchg(stashed, &old, dentry))) + return dentry; + } } /** @@ -2044,15 +2058,14 @@ static struct dentry *stash_dentry(struct dentry **stashed, unsigned long ino, * * The function tries to retrieve a stashed dentry from @stashed. If the dentry * is still valid then it will be reused. If the dentry isn't able the function - * will allocate a new dentry and inode. It will then try to update @stashed - * with the newly added dentry. If it fails -EAGAIN is returned and the caller - * my retry. + * will allocate a new dentry and inode. It will then check again whether it + * can reuse an existing dentry in case one has been added in the meantime or + * update @stashed with the newly added dentry. * * Special-purpose helper for nsfs and pidfs. * * Return: If 0 or an error is returned the caller can be sure that @data must - * be cleaned up. If 1 or -EAGAIN is returned @data is owned by the - * filesystem. + * be cleaned up. If 1 is returned @data is owned by the filesystem. */ int path_from_stashed(struct dentry **stashed, unsigned long ino, struct vfsmount *mnt, const struct file_operations *fops, @@ -2062,17 +2075,23 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, struct dentry *dentry; int ret = 0; - dentry = get_stashed_dentry(*stashed); - if (dentry) + /* See if dentry can be reused. */ + path->dentry = get_stashed_dentry(*stashed); + if (path->dentry) goto out_path; - dentry = stash_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); + /* Allocate a new dentry. */ + dentry = prepare_anon_dentry(ino, mnt->mnt_sb, fops, iops, data); if (IS_ERR(dentry)) return PTR_ERR(dentry); + + /* Added a new dentry. @data is now owned by the filesystem. */ + path->dentry = stash_dentry(stashed, dentry); + if (path->dentry != dentry) + dput(dentry); ret = 1; out_path: - path->dentry = dentry; path->mnt = mntget(mnt); return ret; } -- cgit v1.2.3 From 2558e3b23112adb82a558bab616890a790a38bc6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 21 Feb 2024 09:59:51 +0100 Subject: libfs: add stashed_dentry_prune() Both pidfs and nsfs use a memory location to stash a dentry for reuse by concurrent openers. Right now two custom dentry->d_prune::{ns,pidfs}_prune_dentry() methods are needed that do the same thing. The only thing that differs is that they need to get to the memory location to store or retrieve the dentry from differently. Fix that by remember the stashing location for the dentry in dentry->d_fsdata which allows us to retrieve it in dentry->d_prune. That in turn makes it possible to add a common helper that pidfs and nsfs can both use. Link: https://lore.kernel.org/r/CAHk-=wg8cHY=i3m6RnXQ2Y2W8psicKWQEZq1=94ivUiviM-0OA@mail.gmail.com Signed-off-by: Christian Brauner --- fs/libfs.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/libfs.c b/fs/libfs.c index 7617e1bc6e5b..472f21bd0325 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1988,7 +1988,8 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) return dentry; } -static struct dentry *prepare_anon_dentry(unsigned long ino, +static struct dentry *prepare_anon_dentry(struct dentry **stashed, + unsigned long ino, struct super_block *sb, const struct file_operations *fops, const struct inode_operations *iops, @@ -2019,6 +2020,9 @@ static struct dentry *prepare_anon_dentry(unsigned long ino, inode->i_private = data; simple_inode_init_ts(inode); + /* Store address of location where dentry's supposed to be stashed. */ + dentry->d_fsdata = stashed; + /* @data is now owned by the fs */ d_instantiate(dentry, inode); return dentry; @@ -2081,7 +2085,7 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, goto out_path; /* Allocate a new dentry. */ - dentry = prepare_anon_dentry(ino, mnt->mnt_sb, fops, iops, data); + dentry = prepare_anon_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -2092,6 +2096,27 @@ int path_from_stashed(struct dentry **stashed, unsigned long ino, ret = 1; out_path: + WARN_ON_ONCE(path->dentry->d_fsdata != stashed); + WARN_ON_ONCE(d_inode(path->dentry)->i_private != data); path->mnt = mntget(mnt); return ret; } + +void stashed_dentry_prune(struct dentry *dentry) +{ + struct dentry **stashed = dentry->d_fsdata; + struct inode *inode = d_inode(dentry); + + if (WARN_ON_ONCE(!stashed)) + return; + + if (!inode) + return; + + /* + * Only replace our own @dentry as someone else might've + * already cleared out @dentry and stashed their own + * dentry in there. + */ + cmpxchg(stashed, dentry, NULL); +} -- cgit v1.2.3 From e9c5263ce16d96311c118111ac779f004be8b473 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 1 Mar 2024 10:26:03 +0100 Subject: libfs: improve path_from_stashed() Right now we pass a bunch of info that is fs specific which doesn't make a lot of sense and it bleeds fs sepcific details into the generic helper. nsfs and pidfs have slightly different needs when initializing inodes. Add simple operations that are stashed in sb->s_fs_info that both can implement. This also allows us to get rid of cleaning up references in the caller. All in all path_from_stashed() becomes way simpler. Signed-off-by: Christian Brauner --- fs/libfs.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'fs/libfs.c') diff --git a/fs/libfs.c b/fs/libfs.c index 472f21bd0325..65322e11bcda 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1991,12 +1991,11 @@ static inline struct dentry *get_stashed_dentry(struct dentry *stashed) static struct dentry *prepare_anon_dentry(struct dentry **stashed, unsigned long ino, struct super_block *sb, - const struct file_operations *fops, - const struct inode_operations *iops, void *data) { struct dentry *dentry; struct inode *inode; + const struct stashed_operations *sops = sb->s_fs_info; dentry = d_alloc_anon(sb); if (!dentry) @@ -2010,15 +2009,13 @@ static struct dentry *prepare_anon_dentry(struct dentry **stashed, inode->i_ino = ino; inode->i_flags |= S_IMMUTABLE; - if (is_pidfs_sb(sb)) - inode->i_flags |= S_PRIVATE; - inode->i_mode = S_IFREG | S_IRUGO; - if (iops) - inode->i_op = iops; - if (fops) - inode->i_fop = fops; - inode->i_private = data; + inode->i_mode = S_IFREG; simple_inode_init_ts(inode); + sops->init_inode(inode, data); + + /* Notice when this is changed. */ + WARN_ON_ONCE(!S_ISREG(inode->i_mode)); + WARN_ON_ONCE(!IS_IMMUTABLE(inode)); /* Store address of location where dentry's supposed to be stashed. */ dentry->d_fsdata = stashed; @@ -2055,8 +2052,6 @@ static struct dentry *stash_dentry(struct dentry **stashed, * @stashed: where to retrieve or stash dentry * @ino: inode number to use * @mnt: mnt of the filesystems to use - * @iops: inode operations to use - * @fops: file operations to use * @data: data to store in inode->i_private * @path: path to create * @@ -2068,38 +2063,38 @@ static struct dentry *stash_dentry(struct dentry **stashed, * * Special-purpose helper for nsfs and pidfs. * - * Return: If 0 or an error is returned the caller can be sure that @data must - * be cleaned up. If 1 is returned @data is owned by the filesystem. + * Return: On success zero and on failure a negative error is returned. */ int path_from_stashed(struct dentry **stashed, unsigned long ino, - struct vfsmount *mnt, const struct file_operations *fops, - const struct inode_operations *iops, void *data, - struct path *path) + struct vfsmount *mnt, void *data, struct path *path) { struct dentry *dentry; - int ret = 0; + const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info; /* See if dentry can be reused. */ path->dentry = get_stashed_dentry(*stashed); - if (path->dentry) + if (path->dentry) { + sops->put_data(data); goto out_path; + } /* Allocate a new dentry. */ - dentry = prepare_anon_dentry(stashed, ino, mnt->mnt_sb, fops, iops, data); - if (IS_ERR(dentry)) + dentry = prepare_anon_dentry(stashed, ino, mnt->mnt_sb, data); + if (IS_ERR(dentry)) { + sops->put_data(data); return PTR_ERR(dentry); + } /* Added a new dentry. @data is now owned by the filesystem. */ path->dentry = stash_dentry(stashed, dentry); if (path->dentry != dentry) dput(dentry); - ret = 1; out_path: WARN_ON_ONCE(path->dentry->d_fsdata != stashed); WARN_ON_ONCE(d_inode(path->dentry)->i_private != data); path->mnt = mntget(mnt); - return ret; + return 0; } void stashed_dentry_prune(struct dentry *dentry) -- cgit v1.2.3