From cfd86ef7e8e7b9e015707e46479a6b1de141eed0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:15 +0200 Subject: [PATCH 1/9] anon_inode: use a proper mode internally This allows the VFS to not trip over anonymous inodes and we can add asserts based on the mode into the vfs. When we report it to userspace we can simply hide the mode to avoid regressions. I've audited all direct callers of alloc_anon_inode() and only secretmen overrides i_mode and i_op inode operations but it already uses a regular file. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-1-53a44c20d44e@kernel.org Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()") Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org # all LTS kernels Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com Signed-off-by: Christian Brauner --- fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++ fs/internal.h | 3 +++ fs/libfs.c | 8 +++++++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 583ac81669c2..42e4b9c34f89 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -24,9 +24,43 @@ #include +#include "internal.h" + static struct vfsmount *anon_inode_mnt __ro_after_init; static struct inode *anon_inode_inode __ro_after_init; +/* + * User space expects anonymous inodes to have no file type in st_mode. + * + * In particular, 'lsof' has this legacy logic: + * + * type = s->st_mode & S_IFMT; + * switch (type) { + * ... + * case 0: + * if (!strcmp(p, "anon_inode")) + * Lf->ntype = Ntype = N_ANON_INODE; + * + * to detect our old anon_inode logic. + * + * Rather than mess with our internal sane inode data, just fix it + * up here in getattr() by masking off the format bits. + */ +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + + generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); + stat->mode &= ~S_IFMT; + return 0; +} + +static const struct inode_operations anon_inode_operations = { + .getattr = anon_inode_getattr, +}; + /* * anon_inodefs_dname() is called from d_path(). */ @@ -66,6 +100,7 @@ static struct inode *anon_inode_make_secure_inode( if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; + inode->i_op = &anon_inode_operations; error = security_inode_init_security_anon(inode, &QSTR(name), context_inode); if (error) { @@ -313,6 +348,7 @@ static int __init anon_inode_init(void) anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); if (IS_ERR(anon_inode_inode)) panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); + anon_inode_inode->i_op = &anon_inode_operations; return 0; } diff --git a/fs/internal.h b/fs/internal.h index b9b3e29a73fd..717dc9eb6185 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -343,3 +343,6 @@ static inline bool path_mounted(const struct path *path) void file_f_owner_release(struct file *file); bool file_seek_cur_needs_f_lock(struct file *file); int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *stat, u32 request_mask, + unsigned int query_flags); diff --git a/fs/libfs.c b/fs/libfs.c index 6393d7c49ee6..e1146620346e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1647,7 +1647,13 @@ struct inode *alloc_anon_inode(struct super_block *s) * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; + /* + * Historically anonymous inodes didn't have a type at all and + * userspace has come to rely on this. Internally they're just + * regular files but S_IFREG is masked off when reporting + * information to userspace. + */ + inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE; From 37e62dafbfaba467975b0a8c11b9ffaa678f8559 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:16 +0200 Subject: [PATCH 2/9] pidfs: use anon_inode_getattr() So far pidfs did use it's own version. Just use the generic version. We use our own wrappers because we're going to be implementing our own retrieval properties soon. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-2-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/pidfs.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index d64a4cbeb0da..809c3393b6a3 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -572,33 +572,11 @@ static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EOPNOTSUPP; } - -/* - * User space expects pidfs inodes to have no file type in st_mode. - * - * In particular, 'lsof' has this legacy logic: - * - * type = s->st_mode & S_IFMT; - * switch (type) { - * ... - * case 0: - * if (!strcmp(p, "anon_inode")) - * Lf->ntype = Ntype = N_ANON_INODE; - * - * to detect our old anon_inode logic. - * - * Rather than mess with our internal sane inode data, just fix it - * up here in getattr() by masking off the format bits. - */ static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(path->dentry); - - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); - stat->mode &= ~S_IFMT; - return 0; + return anon_inode_getattr(idmap, path, stat, request_mask, query_flags); } static const struct inode_operations pidfs_inode_operations = { From 22bdf3d6581af6d06ed8a46c6835648421cca0ea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:17 +0200 Subject: [PATCH 3/9] anon_inode: explicitly block ->setattr() It is currently possible to change the mode and owner of the single anonymous inode in the kernel: int main(int argc, char *argv[]) { int ret, sfd; sigset_t mask; struct signalfd_siginfo fdsi; sigemptyset(&mask); sigaddset(&mask, SIGINT); sigaddset(&mask, SIGQUIT); ret = sigprocmask(SIG_BLOCK, &mask, NULL); if (ret < 0) _exit(1); sfd = signalfd(-1, &mask, 0); if (sfd < 0) _exit(2); ret = fchown(sfd, 5555, 5555); if (ret < 0) _exit(3); ret = fchmod(sfd, 0777); if (ret < 0) _exit(3); _exit(4); } This is a bug. It's not really a meaningful one because anonymous inodes don't really figure into path lookup and they cannot be reopened via /proc//fd/ and can't be used for lookup itself. So they can only ever serve as direct references. But it is still completely bogus to allow the mode and ownership or any of the properties of the anonymous inode to be changed. Block this! Link: https://lore.kernel.org/20250407-work-anon_inode-v1-3-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org # all LTS kernels Signed-off-by: Christian Brauner --- fs/anon_inodes.c | 7 +++++++ fs/internal.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 42e4b9c34f89..cb51a90bece0 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -57,8 +57,15 @@ int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, return 0; } +int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +{ + return -EOPNOTSUPP; +} + static const struct inode_operations anon_inode_operations = { .getattr = anon_inode_getattr, + .setattr = anon_inode_setattr, }; /* diff --git a/fs/internal.h b/fs/internal.h index 717dc9eb6185..f545400ce607 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -346,3 +346,5 @@ int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_ int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); +int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr); From c83b9024966090fe0df92aab16975b8d00089e1f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:18 +0200 Subject: [PATCH 4/9] pidfs: use anon_inode_setattr() So far pidfs did use it's own version. Just use the generic version. We use our own wrappers because we're going to be implementing properties soon. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-4-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/pidfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 809c3393b6a3..10b4ee454cca 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -569,7 +569,7 @@ static struct vfsmount *pidfs_mnt __ro_after_init; static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { - return -EOPNOTSUPP; + return anon_inode_setattr(idmap, dentry, attr); } static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, From 1ed95281c0c77dbb1540f9855cd3c5f19900f7a5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:19 +0200 Subject: [PATCH 5/9] anon_inode: raise SB_I_NODEV and SB_I_NOEXEC It isn't possible to execute anonymous inodes because they cannot be opened in any way after they have been created. This includes execution: execveat(fd_anon_inode, "", NULL, NULL, AT_EMPTY_PATH) Anonymous inodes have inode->f_op set to no_open_fops which sets no_open() which returns ENXIO. That means any call to do_dentry_open() which is the endpoint of the do_open_execat() will fail. There's no chance to execute an anonymous inode. Unless a given subsystem overrides it ofc. However, we should still harden this and raise SB_I_NODEV and SB_I_NOEXEC on the superblock itself so that no one gets any creative ideas. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-5-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org # all LTS kernels Signed-off-by: Christian Brauner --- fs/anon_inodes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index cb51a90bece0..e51e7d88980a 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -86,6 +86,8 @@ static int anon_inodefs_init_fs_context(struct fs_context *fc) struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC); if (!ctx) return -ENOMEM; + fc->s_iflags |= SB_I_NOEXEC; + fc->s_iflags |= SB_I_NODEV; ctx->dops = &anon_inodefs_dentry_operations; return 0; } From c784159750bc83aa91d49ceeb2dbd627d22c0587 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:20 +0200 Subject: [PATCH 6/9] selftests/filesystems: add chown() test for anonymous inodes Test that anonymous inodes cannot be chown()ed. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-6-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- .../testing/selftests/filesystems/.gitignore | 1 + tools/testing/selftests/filesystems/Makefile | 2 +- .../selftests/filesystems/anon_inode_test.c | 26 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/filesystems/anon_inode_test.c diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 828b66a10c63..7afa58e2bb20 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -2,3 +2,4 @@ dnotify_test devpts_pts file_stressor +anon_inode_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 66305fc34c60..b02326193fee 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c new file mode 100644 index 000000000000..f2cae8f1ccae --- /dev/null +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include + +#include "../kselftest_harness.h" +#include "overlayfs/wrappers.h" + +TEST(anon_inode_no_chown) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchown(fd_context, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST_HARNESS_MAIN + From fcf31ec7cade5b17699bc2ed04cacbc92b14a864 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:21 +0200 Subject: [PATCH 7/9] selftests/filesystems: add chmod() test for anonymous inodes Test that anonymous inodes cannot be chmod()ed. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-7-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- .../testing/selftests/filesystems/anon_inode_test.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c index f2cae8f1ccae..7c4d0a225363 100644 --- a/tools/testing/selftests/filesystems/anon_inode_test.c +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -22,5 +22,18 @@ TEST(anon_inode_no_chown) EXPECT_EQ(close(fd_context), 0); } +TEST(anon_inode_no_chmod) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchmod(fd_context, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + TEST_HARNESS_MAIN From f8ca403ae77cbfb4bf0fcd92c74075886f5f4aa8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:22 +0200 Subject: [PATCH 8/9] selftests/filesystems: add exec() test for anonymous inodes Test that anonymous inodes cannot be exec()ed. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-8-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- .../testing/selftests/filesystems/anon_inode_test.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c index 7c4d0a225363..486496252ddd 100644 --- a/tools/testing/selftests/filesystems/anon_inode_test.c +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -35,5 +35,18 @@ TEST(anon_inode_no_chmod) EXPECT_EQ(close(fd_context), 0); } +TEST(anon_inode_no_exec) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); + + EXPECT_EQ(close(fd_context), 0); +} + TEST_HARNESS_MAIN From 25a6cc9a630b4b1b783903b23a3a97c5bf16bf41 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 7 Apr 2025 11:54:23 +0200 Subject: [PATCH 9/9] selftests/filesystems: add open() test for anonymous inodes Test that anonymous inodes cannot be open()ed. Link: https://lore.kernel.org/20250407-work-anon_inode-v1-9-53a44c20d44e@kernel.org Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- .../selftests/filesystems/anon_inode_test.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c index 486496252ddd..e8e0ef1460d2 100644 --- a/tools/testing/selftests/filesystems/anon_inode_test.c +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -48,5 +48,22 @@ TEST(anon_inode_no_exec) EXPECT_EQ(close(fd_context), 0); } +TEST(anon_inode_no_open) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_GE(dup2(fd_context, 500), 0); + ASSERT_EQ(close(fd_context), 0); + fd_context = 500; + + ASSERT_LT(open("/proc/self/fd/500", 0), 0); + ASSERT_EQ(errno, ENXIO); + + EXPECT_EQ(close(fd_context), 0); +} + TEST_HARNESS_MAIN