From cc678bf7aa9e2e6c2356fd7f955513c1bd7d4c97 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:04 +0200 Subject: [PATCH 01/11] fhandle: raise FILEID_IS_DIR in handle_type Currently FILEID_IS_DIR is raised in fh_flags which is wrong. Raise it in handle->handle_type were it's supposed to be. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-1-d02a04858fe3@kernel.org Fixes: c374196b2b9f ("fs: name_to_handle_at() support for "explicit connectable" file handles") Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Cc: stable@kernel.org Signed-off-by: Christian Brauner --- fs/fhandle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index 3e092ae6d142..66ff60591d17 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -88,7 +88,7 @@ static long do_sys_name_to_handle(const struct path *path, if (fh_flags & EXPORT_FH_CONNECTABLE) { handle->handle_type |= FILEID_IS_CONNECTABLE; if (d_is_dir(path->dentry)) - fh_flags |= FILEID_IS_DIR; + handle->handle_type |= FILEID_IS_DIR; } retval = 0; } From 774adcb55f159167fe0dfd343174fdedba3ae2f4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:05 +0200 Subject: [PATCH 02/11] fhandle: hoist copy_from_user() above get_path_from_fd() In follow-up patches we need access to @file_handle->handle_type before we start caring about get_path_from_fd(). Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-2-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/fhandle.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index 66ff60591d17..73f56f8e7d5d 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -323,13 +323,24 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, { int retval = 0; struct file_handle f_handle; - struct file_handle *handle = NULL; + struct file_handle *handle __free(kfree) = NULL; struct handle_to_path_ctx ctx = {}; const struct export_operations *eops; + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) + return -EFAULT; + + if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || + (f_handle.handle_bytes == 0)) + return -EINVAL; + + if (f_handle.handle_type < 0 || + FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) + return -EINVAL; + retval = get_path_from_fd(mountdirfd, &ctx.root); if (retval) - goto out_err; + return retval; eops = ctx.root.mnt->mnt_sb->s_export_op; if (eops && eops->permission) @@ -339,21 +350,6 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, if (retval) goto out_path; - if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { - retval = -EFAULT; - goto out_path; - } - if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || - (f_handle.handle_bytes == 0)) { - retval = -EINVAL; - goto out_path; - } - if (f_handle.handle_type < 0 || - FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) { - retval = -EINVAL; - goto out_path; - } - handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { @@ -366,7 +362,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, &ufh->f_handle, f_handle.handle_bytes)) { retval = -EFAULT; - goto out_handle; + goto out_path; } /* @@ -384,11 +380,8 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, handle->handle_type &= ~FILEID_USER_FLAGS_MASK; retval = do_handle_to_path(handle, path, &ctx); -out_handle: - kfree(handle); out_path: path_put(&ctx.root); -out_err: return retval; } From f7be8a333253cc319f5c6456b5cdab2a57b7351b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:06 +0200 Subject: [PATCH 03/11] fhandle: rename to get_path_anchor() Rename as we're going to expand the function in the next step. The path just serves as the anchor tying the decoding to the filesystem. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-3-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/fhandle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index 73f56f8e7d5d..d8d32208c621 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -168,7 +168,7 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, return err; } -static int get_path_from_fd(int fd, struct path *root) +static int get_path_anchor(int fd, struct path *root) { if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; @@ -338,7 +338,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS) return -EINVAL; - retval = get_path_from_fd(mountdirfd, &ctx.root); + retval = get_path_anchor(mountdirfd, &ctx.root); if (retval) return retval; From a0d8051cfd8145eb49dbd0c0c2f174d09da77796 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:07 +0200 Subject: [PATCH 04/11] pidfs: add pidfs_root_path() helper Allow to return the root of the global pidfs filesystem. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-4-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/internal.h | 1 + fs/pidfs.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/fs/internal.h b/fs/internal.h index 22ba066d1dba..ad256bccdc85 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -353,3 +353,4 @@ int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path, unsigned int query_flags); int anon_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); +void pidfs_get_root(struct path *path); diff --git a/fs/pidfs.c b/fs/pidfs.c index 47f5f9e0bdff..4fc7a7f4a3fe 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -31,6 +31,14 @@ static struct kmem_cache *pidfs_attr_cachep __ro_after_init; static struct kmem_cache *pidfs_xattr_cachep __ro_after_init; +static struct path pidfs_root_path = {}; + +void pidfs_get_root(struct path *path) +{ + *path = pidfs_root_path; + path_get(path); +} + /* * Stashes information that userspace needs to access even after the * process has been reaped. @@ -1068,4 +1076,7 @@ void __init pidfs_init(void) pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); + + pidfs_root_path.mnt = pidfs_mnt; + pidfs_root_path.dentry = pidfs_mnt->mnt_root; } From 1c5484395f9f8b7bf0702f34aa3406353e45d7ec Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:08 +0200 Subject: [PATCH 05/11] fhandle: reflow get_path_anchor() Switch to a more common coding style. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-5-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/fhandle.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index d8d32208c621..9ef35f8e8989 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -170,21 +170,25 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, static int get_path_anchor(int fd, struct path *root) { + if (fd >= 0) { + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; + *root = fd_file(f)->f_path; + path_get(root); + return 0; + } + if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); *root = fs->pwd; path_get(root); spin_unlock(&fs->lock); - } else { - CLASS(fd, f)(fd); - if (fd_empty(f)) - return -EBADF; - *root = fd_file(f)->f_path; - path_get(root); + return 0; } - return 0; + return -EBADF; } static int vfs_dentry_acceptable(void *context, struct dentry *dentry) From a4c746f06853f91d3759ae8aca514d135b6aa56d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 15:48:42 +0200 Subject: [PATCH 06/11] uapi/fcntl: mark range as reserved Mark the range from -10000 to -40000 as a range reserved for special in-kernel values. Move the PIDFD_SELF_*/PIDFD_THREAD_* sentinels over so all the special values are in one place. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-6-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 16 ++++++++++++++++ include/uapi/linux/pidfd.h | 15 --------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a15ac2fa4b20..ed02d8ae0667 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -90,10 +90,26 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* Reserved kernel ranges [-100], [-10000, -40000]. */ #define AT_FDCWD -100 /* Special value for dirfd used to indicate openat should use the current working directory. */ +/* + * The concept of process and threads in userland and the kernel is a confusing + * one - within the kernel every thread is a 'task' with its own individual PID, + * however from userland's point of view threads are grouped by a single PID, + * which is that of the 'thread group leader', typically the first thread + * spawned. + * + * To cut the Gideon knot, for internal kernel usage, we refer to + * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel + * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread + * group leader... + */ +#define PIDFD_SELF_THREAD -10000 /* Current thread. */ +#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ + /* Generic flags for the *at(2) family of syscalls. */ diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index c27a4e238e4b..957db425d459 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -42,21 +42,6 @@ #define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ #define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ -/* - * The concept of process and threads in userland and the kernel is a confusing - * one - within the kernel every thread is a 'task' with its own individual PID, - * however from userland's point of view threads are grouped by a single PID, - * which is that of the 'thread group leader', typically the first thread - * spawned. - * - * To cut the Gideon knot, for internal kernel usage, we refer to - * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel - * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread - * group leader... - */ -#define PIDFD_SELF_THREAD -10000 /* Current thread. */ -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ - /* * ...and for userland we make life simpler - PIDFD_SELF refers to the current * thread, PIDFD_SELF_PROCESS refers to the process thread group leader. From 67fcec2919e4ed31ab845eb456ad7d6f1e85505c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 15:48:49 +0200 Subject: [PATCH 07/11] fcntl/pidfd: redefine PIDFD_SELF_THREAD_GROUP Don't jump somewhere into the middle of the reserved range. We're still able to change that value it won't be that widely used yet. If not, we can revert. Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 2 +- tools/testing/selftests/pidfd/pidfd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index ed02d8ae0667..ba4a698d2f33 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -108,7 +108,7 @@ * group leader... */ #define PIDFD_SELF_THREAD -10000 /* Current thread. */ -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ /* Generic flags for the *at(2) family of syscalls. */ diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index efd74063126e..5dfeb1bdf399 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -56,7 +56,7 @@ #endif #ifndef PIDFD_SELF_THREAD_GROUP -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #endif #ifndef PIDFD_SELF From cd5d2006327b6d8488612cb8c03ad7304417c8f2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:10 +0200 Subject: [PATCH 08/11] uapi/fcntl: add FD_INVALID Add a marker for an invalid file descriptor. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-7-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index ba4a698d2f33..a5bebe7c4400 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -110,6 +110,7 @@ #define PIDFD_SELF_THREAD -10000 /* Current thread. */ #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ +#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ From 3941e37f62fe2c3c8b8675c12183185f20450539 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 16:57:51 +0200 Subject: [PATCH 09/11] uapi/fcntl: add FD_PIDFS_ROOT Add a special file descriptor indicating the root of the pidfs filesystem. Signed-off-by: Christian Brauner --- include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index a5bebe7c4400..f291ab4f94eb 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -110,6 +110,7 @@ #define PIDFD_SELF_THREAD -10000 /* Current thread. */ #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ +#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ #define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ From b95361481b1e5bd3627835b7e4b921d5a09e68a4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:13 +0200 Subject: [PATCH 10/11] fhandle, pidfs: support open_by_handle_at() purely based on file handle Various filesystems such as pidfs (and likely drm in the future) have a use-case to support opening files purely based on the handle without having to require a file descriptor to another object. That's especially the case for filesystems that don't do any lookup whatsoever and there's zero relationship between the objects. Such filesystems are also singletons that stay around for the lifetime of the system meaning that they can be uniquely identified and accessed purely based on the file handle type. Enable that so that userspace doesn't have to allocate an object needlessly especially if they can't do that for whatever reason. Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-10-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/fhandle.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/fhandle.c b/fs/fhandle.c index 9ef35f8e8989..b1363ead6c5e 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -188,6 +188,11 @@ static int get_path_anchor(int fd, struct path *root) return 0; } + if (fd == FD_PIDFS_ROOT) { + pidfs_get_root(root); + return 0; + } + return -EBADF; } From 914e6b1e85c5715ca2e7ec6293c05c71e9a98e86 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Jun 2025 10:29:14 +0200 Subject: [PATCH 11/11] selftests/pidfd: decode pidfd file handles withou having to specify an fd Link: https://lore.kernel.org/20250624-work-pidfs-fhandle-v2-11-d02a04858fe3@kernel.org Reviewed-by: Jan Kara Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- tools/testing/selftests/pidfd/Makefile | 2 +- tools/testing/selftests/pidfd/pidfd.h | 4 ++ .../selftests/pidfd/pidfd_file_handle_test.c | 60 +++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 03a6eede9c9e..764a8f9ecefa 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall +CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 5dfeb1bdf399..cd244d0860ff 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -19,6 +19,10 @@ #include "../kselftest.h" #include "../clone3/clone3_selftests.h" +#ifndef FD_PIDFS_ROOT +#define FD_PIDFS_ROOT -10002 +#endif + #ifndef P_PIDFD #define P_PIDFD 3 #endif diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c index 439b9c6c0457..6bd2e9c9565b 100644 --- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags) ASSERT_EQ(close(pidfd), 0); } +/* + * That we decode a file handle without having to pass a pidfd. + */ +TEST_F(file_handle, decode_purely_based_on_file_handle) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(-EBADF, fh, 0); + ASSERT_LT(pidfd, 0); + + pidfd = open_by_handle_at(AT_FDCWD, fh, 0); + ASSERT_LT(pidfd, 0); + + free(fh); +} + TEST_HARNESS_MAIN