pidfs: get rid of __pidfd_prepare()

Fold it into pidfd_prepare() and rename PIDFD_CLONE to PIDFD_STALE to
indicate that the passed pid might not have task linkage and no explicit
check for that should be performed.

Link: https://lore.kernel.org/20250425-work-pidfs-net-v2-3-450a19461e75@kernel.org
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: David Rheinsberg <david@readahead.eu>
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2025-04-25 10:11:32 +02:00
parent fd0a109a0f
commit a71f402acd
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
4 changed files with 44 additions and 65 deletions

View File

@ -768,7 +768,7 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
{
enum pid_type type;
if (flags & PIDFD_CLONE)
if (flags & PIDFD_STALE)
return true;
/*
@ -777,10 +777,14 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
* pidfd has been allocated perform another check that the pid
* is still alive. If it is exit information is available even
* if the task gets reaped before the pidfd is returned to
* userspace. The only exception is PIDFD_CLONE where no task
* linkage has been established for @pid yet and the kernel is
* in the middle of process creation so there's nothing for
* pidfs to miss.
* userspace. The only exception are indicated by PIDFD_STALE:
*
* (1) The kernel is in the middle of task creation and thus no
* task linkage has been established yet.
* (2) The caller knows @pid has been registered in pidfs at a
* time when the task was still alive.
*
* In both cases exit information will have been reported.
*/
if (flags & PIDFD_THREAD)
type = PIDTYPE_PID;
@ -874,11 +878,11 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
int ret;
/*
* Ensure that PIDFD_CLONE can be passed as a flag without
* Ensure that PIDFD_STALE can be passed as a flag without
* overloading other uapi pidfd flags.
*/
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD);
BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK);
BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);
ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
if (ret < 0)
@ -887,7 +891,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
if (!pidfs_pid_valid(pid, &path, flags))
return ERR_PTR(-ESRCH);
flags &= ~PIDFD_CLONE;
flags &= ~PIDFD_STALE;
pidfd_file = dentry_open(&path, flags, current_cred());
/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
if (!IS_ERR(pidfd_file))

View File

@ -77,7 +77,7 @@ struct file;
struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file);
void do_notify_pidfd(struct task_struct *task);
static inline struct pid *get_pid(struct pid *pid)

View File

@ -12,7 +12,7 @@
#define PIDFD_THREAD O_EXCL
#ifdef __KERNEL__
#include <linux/sched.h>
#define PIDFD_CLONE CLONE_PIDFD
#define PIDFD_STALE CLONE_PIDFD
#endif
/* Flags for pidfd_send_signal(). */

View File

@ -2035,55 +2035,11 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
/**
* __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd
* @ret: Where to return the file for the pidfd.
*
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
*
* The helper doesn't perform checks on @pid which makes it useful for pidfds
* created via CLONE_PIDFD where @pid has no task attached when the pidfd and
* pidfd file are prepared.
*
* If this function returns successfully the caller is responsible to either
* call fd_install() passing the returned pidfd and pidfd file as arguments in
* order to install the pidfd into its file descriptor table or they must use
* put_unused_fd() and fput() on the returned pidfd and pidfd file
* respectively.
*
* This function is useful when a pidfd must already be reserved but there
* might still be points of failure afterwards and the caller wants to ensure
* that no pidfd is leaked into its file descriptor table.
*
* Return: On success, a reserved pidfd is returned from the function and a new
* pidfd file is returned in the last argument to the function. On
* error, a negative error code is returned from the function and the
* last argument remains unchanged.
*/
static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
struct file *pidfd_file;
CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
if (pidfd < 0)
return pidfd;
pidfd_file = pidfs_alloc_file(pid, flags | O_RDWR);
if (IS_ERR(pidfd_file))
return PTR_ERR(pidfd_file);
*ret = pidfd_file;
return take_fd(pidfd);
}
/**
* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd
* @ret: Where to return the pidfd.
* @ret_file: return the new pidfs file
*
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
@ -2106,16 +2062,26 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
* error, a negative error code is returned from the function and the
* last argument remains unchanged.
*/
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file)
{
struct file *pidfs_file;
/*
* While holding the pidfd waitqueue lock removing the task
* linkage for the thread-group leader pid (PIDTYPE_TGID) isn't
* possible. Thus, if there's still task linkage for PIDTYPE_PID
* not having thread-group leader linkage for the pid means it
* wasn't a thread-group leader in the first place.
* PIDFD_STALE is only allowed to be passed if the caller knows
* that @pid is already registered in pidfs and thus
* PIDFD_INFO_EXIT information is guaranteed to be available.
*/
scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) {
if (!(flags & PIDFD_STALE)) {
/*
* While holding the pidfd waitqueue lock removing the
* task linkage for the thread-group leader pid
* (PIDTYPE_TGID) isn't possible. Thus, if there's still
* task linkage for PIDTYPE_PID not having thread-group
* leader linkage for the pid means it wasn't a
* thread-group leader in the first place.
*/
guard(spinlock_irq)(&pid->wait_pidfd.lock);
/* Task has already been reaped. */
if (!pid_has_task(pid, PIDTYPE_PID))
return -ESRCH;
@ -2128,7 +2094,16 @@ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
return -ENOENT;
}
return __pidfd_prepare(pid, flags, ret);
CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
if (pidfd < 0)
return pidfd;
pidfs_file = pidfs_alloc_file(pid, flags | O_RDWR);
if (IS_ERR(pidfs_file))
return PTR_ERR(pidfs_file);
*ret_file = pidfs_file;
return take_fd(pidfd);
}
static void __delayed_free_task(struct rcu_head *rhp)
@ -2477,7 +2452,7 @@ __latent_entropy struct task_struct *copy_process(
* Note that no task has been attached to @pid yet indicate
* that via CLONE_PIDFD.
*/
retval = __pidfd_prepare(pid, flags | PIDFD_CLONE, &pidfile);
retval = pidfd_prepare(pid, flags | PIDFD_STALE, &pidfile);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;