selftests/pidfd: add CLONE_PIDFD_AUTOKILL tests

Add tests for CLONE_PIDFD_AUTOKILL:

- autokill_basic: Verify closing the clone3 pidfd kills the child.
- autokill_requires_pidfd: Verify AUTOKILL without CLONE_PIDFD fails.
- autokill_requires_autoreap: Verify AUTOKILL without CLONE_AUTOREAP
  fails.
- autokill_rejects_thread: Verify AUTOKILL with CLONE_THREAD fails.
- autokill_pidfd_open_no_effect: Verify only the clone3 pidfd triggers
  autokill, not pidfd_open().
- autokill_requires_cap_sys_admin: Verify AUTOKILL without CLONE_NNP
  fails with -EPERM for an unprivileged caller.
- autokill_without_nnp_with_cap: Verify AUTOKILL without CLONE_NNP
  succeeds with CAP_SYS_ADMIN.

Link: https://patch.msgid.link/20260226-work-pidfs-autoreap-v5-6-d148b984a989@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2026-02-26 14:51:04 +01:00
parent 2a4d85aa1c
commit ec26879e6d

View File

@ -30,6 +30,33 @@
#define CLONE_NNP (1ULL << 35)
#endif
#ifndef CLONE_PIDFD_AUTOKILL
#define CLONE_PIDFD_AUTOKILL (1ULL << 36)
#endif
#ifndef _LINUX_CAPABILITY_VERSION_3
#define _LINUX_CAPABILITY_VERSION_3 0x20080522
#endif
struct cap_header {
__u32 version;
int pid;
};
struct cap_data {
__u32 effective;
__u32 permitted;
__u32 inheritable;
};
static int drop_all_caps(void)
{
struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 };
struct cap_data data[2] = {};
return syscall(__NR_capset, &hdr, data);
}
static pid_t create_autoreap_child(int *pidfd)
{
struct __clone_args args = {
@ -619,4 +646,255 @@ TEST(autoreap_no_new_privs_unset)
close(pidfd);
}
/*
* Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP.
*/
static pid_t create_autokill_child(int *pidfd)
{
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP | CLONE_NNP,
.exit_signal = 0,
.pidfd = ptr_to_u64(pidfd),
};
return sys_clone3(&args, sizeof(args));
}
/*
* Basic autokill test: child blocks in pause(), parent closes the
* clone3 pidfd, child should be killed and autoreaped.
*/
TEST(autokill_basic)
{
int pidfd = -1, pollfd_fd = -1, ret;
struct pollfd pfd;
pid_t pid;
pid = create_autokill_child(&pidfd);
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
ASSERT_GE(pid, 0);
if (pid == 0) {
pause();
_exit(1);
}
ASSERT_GE(pidfd, 0);
/*
* Open a second pidfd via pidfd_open() so we can observe the
* child's death after closing the clone3 pidfd.
*/
pollfd_fd = sys_pidfd_open(pid, 0);
ASSERT_GE(pollfd_fd, 0);
/* Close the clone3 pidfd — this should trigger autokill. */
close(pidfd);
/* Wait for the child to die via the pidfd_open'd fd. */
pfd.fd = pollfd_fd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/* Child should be autoreaped — no zombie. */
usleep(100000);
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(pollfd_fd);
}
/*
* CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL.
*/
TEST(autokill_requires_pidfd)
{
struct __clone_args args = {
.flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP,
.exit_signal = 0,
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL.
*/
TEST(autokill_requires_autoreap)
{
int pidfd = -1;
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL,
.exit_signal = 0,
.pidfd = ptr_to_u64(&pidfd),
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL.
*/
TEST(autokill_rejects_thread)
{
int pidfd = -1;
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP | CLONE_THREAD |
CLONE_SIGHAND | CLONE_VM,
.exit_signal = 0,
.pidfd = ptr_to_u64(&pidfd),
};
pid_t pid;
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EINVAL);
}
/*
* Test that only the clone3 pidfd triggers autokill, not pidfd_open().
* Close the pidfd_open'd fd first child should survive.
* Then close the clone3 pidfd child should be killed and autoreaped.
*/
TEST(autokill_pidfd_open_no_effect)
{
int pidfd = -1, open_fd = -1, ret;
struct pollfd pfd;
pid_t pid;
pid = create_autokill_child(&pidfd);
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
ASSERT_GE(pid, 0);
if (pid == 0) {
pause();
_exit(1);
}
ASSERT_GE(pidfd, 0);
/* Open a second pidfd via pidfd_open(). */
open_fd = sys_pidfd_open(pid, 0);
ASSERT_GE(open_fd, 0);
/*
* Close the pidfd_open'd fd child should survive because
* only the clone3 pidfd has autokill.
*/
close(open_fd);
usleep(200000);
/* Verify child is still alive by polling the clone3 pidfd. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 0);
ASSERT_EQ(ret, 0) {
TH_LOG("Child died after closing pidfd_open fd — should still be alive");
}
/* Open another observation fd before triggering autokill. */
open_fd = sys_pidfd_open(pid, 0);
ASSERT_GE(open_fd, 0);
/* Now close the clone3 pidfd — this triggers autokill. */
close(pidfd);
pfd.fd = open_fd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ASSERT_TRUE(pfd.revents & POLLIN);
/* Child should be autoreaped — no zombie. */
usleep(100000);
ret = waitpid(pid, NULL, WNOHANG);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ECHILD);
close(open_fd);
}
/*
* Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM
* for an unprivileged caller.
*/
TEST(autokill_requires_cap_sys_admin)
{
int pidfd = -1, ret;
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP,
.exit_signal = 0,
.pidfd = ptr_to_u64(&pidfd),
};
pid_t pid;
/* Drop all capabilities so we lack CAP_SYS_ADMIN. */
ret = drop_all_caps();
ASSERT_EQ(ret, 0);
pid = sys_clone3(&args, sizeof(args));
ASSERT_EQ(pid, -1);
ASSERT_EQ(errno, EPERM);
}
/*
* Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with
* CAP_SYS_ADMIN.
*/
TEST(autokill_without_nnp_with_cap)
{
struct __clone_args args = {
.flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL |
CLONE_AUTOREAP,
.exit_signal = 0,
};
struct pidfd_info info = { .mask = PIDFD_INFO_EXIT };
int pidfd = -1, ret;
struct pollfd pfd;
pid_t pid;
if (geteuid() != 0)
SKIP(return, "Need root/CAP_SYS_ADMIN");
args.pidfd = ptr_to_u64(&pidfd);
pid = sys_clone3(&args, sizeof(args));
if (pid < 0 && errno == EINVAL)
SKIP(return, "CLONE_PIDFD_AUTOKILL not supported");
ASSERT_GE(pid, 0);
if (pid == 0)
_exit(0);
ASSERT_GE(pidfd, 0);
/* Wait for child to exit. */
pfd.fd = pidfd;
pfd.events = POLLIN;
ret = poll(&pfd, 1, 5000);
ASSERT_EQ(ret, 1);
ret = ioctl(pidfd, PIDFD_GET_INFO, &info);
ASSERT_EQ(ret, 0);
ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT);
ASSERT_TRUE(WIFEXITED(info.exit_code));
ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
close(pidfd);
}
TEST_HARNESS_MAIN