From fb4366ba8f1c86a72ffcb2b6f349e05cf77897d0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:15 +0200 Subject: [PATCH 01/24] coredump: rename format_corename() It's not really about the name anymore. It parses very distinct information. Reflect that in the name. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-1-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 3568c300623c..79a3c8141e8c 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -225,12 +225,13 @@ static int cn_print_exe_file(struct core_name *cn, bool name_only) return ret; } -/* format_corename will inspect the pattern parameter, and output a - * name into corename, which must have space for at least - * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. +/* + * coredump_parse will inspect the pattern parameter, and output a name + * into corename, which must have space for at least CORENAME_MAX_SIZE + * bytes plus one byte for the zero terminator. */ -static int format_corename(struct core_name *cn, struct coredump_params *cprm, - size_t **argv, int *argc) +static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; @@ -910,7 +911,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) old_cred = override_creds(cred); - retval = format_corename(&cn, &cprm, &argv, &argc); + retval = coredump_parse(&cn, &cprm, &argv, &argc); if (retval < 0) { coredump_report_failure("format_corename failed, aborting core"); goto fail_unlock; From a5715af549b2ee0139ff965d337cfd1a5f7ee615 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:16 +0200 Subject: [PATCH 02/24] coredump: make coredump_parse() return bool There's no point in returning negative error values. They will never be seen by anyone. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-2-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 79a3c8141e8c..42ceb9db2a5a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -230,8 +230,8 @@ static int cn_print_exe_file(struct core_name *cn, bool name_only) * into corename, which must have space for at least CORENAME_MAX_SIZE * bytes plus one byte for the zero terminator. */ -static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, - size_t **argv, int *argc) +static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; @@ -251,7 +251,7 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, else cn->core_type = COREDUMP_FILE; if (expand_corename(cn, core_name_size)) - return -ENOMEM; + return false; cn->corename[0] = '\0'; switch (cn->core_type) { @@ -259,33 +259,33 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, int argvs = sizeof(core_pattern) / 2; (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); if (!(*argv)) - return -ENOMEM; + return false; (*argv)[(*argc)++] = 0; ++pat_ptr; if (!(*pat_ptr)) - return -ENOMEM; + return false; break; } case COREDUMP_SOCK: { /* skip the @ */ pat_ptr++; if (!(*pat_ptr)) - return -ENOMEM; + return false; if (*pat_ptr == '@') { pat_ptr++; if (!(*pat_ptr)) - return -ENOMEM; + return false; cn->core_type = COREDUMP_SOCK_REQ; } err = cn_printf(cn, "%s", pat_ptr); if (err) - return err; + return false; /* Require absolute paths. */ if (cn->corename[0] != '/') - return -EINVAL; + return false; /* * Ensure we can uses spaces to indicate additional @@ -293,7 +293,7 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, */ if (strchr(cn->corename, ' ')) { coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename); - return -EINVAL; + return false; } /* @@ -303,13 +303,13 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, * via /proc/, using the SO_PEERPIDFD to guard * against pid recycling when opening /proc/. */ - return 0; + return true; } case COREDUMP_FILE: break; default: WARN_ON_ONCE(true); - return -EINVAL; + return false; } /* Repeat as long as we have more pattern to process and more output @@ -447,7 +447,7 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, } if (err) - return err; + return false; } out: @@ -457,9 +457,9 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid) - return cn_printf(cn, ".%d", task_tgid_vnr(current)); + return cn_printf(cn, ".%d", task_tgid_vnr(current)) == 0; - return 0; + return true; } static int zap_process(struct signal_struct *signal, int exit_code) @@ -911,8 +911,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) old_cred = override_creds(cred); - retval = coredump_parse(&cn, &cprm, &argv, &argc); - if (retval < 0) { + if (!coredump_parse(&cn, &cprm, &argv, &argc)) { coredump_report_failure("format_corename failed, aborting core"); goto fail_unlock; } From 67c3a0b0ad1a78d7ee9c3aadaed22561f7f85466 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:17 +0200 Subject: [PATCH 03/24] coredump: fix socket path validation Make sure that we keep it extensible and well-formed. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-3-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 42ceb9db2a5a..70e37435eca9 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1399,9 +1399,17 @@ static inline bool check_coredump_socket(void) if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns) return false; - /* Must be an absolute path or the socket request. */ - if (*(core_pattern + 1) != '/' && *(core_pattern + 1) != '@') + /* Must be an absolute path... */ + if (core_pattern[1] != '/') { + /* ... or the socket request protocol... */ + if (core_pattern[1] != '@') + return false; + /* ... and if so must be an absolute path. */ + if (core_pattern[2] != '/') + return false; + /* Anything else is unsupported. */ return false; + } return true; } From 3a2c977c463c68bf6fcd0138d15efa5f3adc743c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:18 +0200 Subject: [PATCH 04/24] coredump: validate that path doesn't exceed UNIX_PATH_MAX so we don't pointlessly accepts things that go over the limit. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-4-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 70e37435eca9..a64b87878ab3 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1388,6 +1388,8 @@ void validate_coredump_safety(void) static inline bool check_coredump_socket(void) { + const char *p; + if (core_pattern[0] != '@') return true; @@ -1407,10 +1409,15 @@ static inline bool check_coredump_socket(void) /* ... and if so must be an absolute path. */ if (core_pattern[2] != '/') return false; - /* Anything else is unsupported. */ - return false; + p = &core_pattern[2]; + } else { + p = &core_pattern[1]; } + /* The path obviously cannot exceed UNIX_PATH_MAX. */ + if (strlen(p) >= UNIX_PATH_MAX) + return false; + return true; } From 0da3e3822cfabf062945e449f91ea3ca529eeaa4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:19 +0200 Subject: [PATCH 05/24] fs: move name_contains_dotdot() to header Move the helper from the firmware specific code to a header so we can reuse it for coredump sockets. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-5-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- drivers/base/firmware_loader/main.c | 31 ++++++++++------------------- include/linux/fs.h | 16 +++++++++++++++ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 44486b2c7172..6942c62fa59d 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -822,26 +822,6 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name, {} #endif -/* - * Reject firmware file names with ".." path components. - * There are drivers that construct firmware file names from device-supplied - * strings, and we don't want some device to be able to tell us "I would like to - * be sent my firmware from ../../../etc/shadow, please". - * - * Search for ".." surrounded by either '/' or start/end of string. - * - * This intentionally only looks at the firmware name, not at the firmware base - * directory or at symlink contents. - */ -static bool name_contains_dotdot(const char *name) -{ - size_t name_len = strlen(name); - - return strcmp(name, "..") == 0 || strncmp(name, "../", 3) == 0 || - strstr(name, "/../") != NULL || - (name_len >= 3 && strcmp(name+name_len-3, "/..") == 0); -} - /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, @@ -862,6 +842,17 @@ _request_firmware(const struct firmware **firmware_p, const char *name, goto out; } + + /* + * Reject firmware file names with ".." path components. + * There are drivers that construct firmware file names from + * device-supplied strings, and we don't want some device to be + * able to tell us "I would like to be sent my firmware from + * ../../../etc/shadow, please". + * + * This intentionally only looks at the firmware name, not at + * the firmware base directory or at symlink contents. + */ if (name_contains_dotdot(name)) { dev_warn(device, "Firmware load for '%s' refused, path contains '..' component\n", diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..18fdbd184eea 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3264,6 +3264,22 @@ static inline bool is_dot_dotdot(const char *name, size_t len) (len == 1 || (len == 2 && name[1] == '.')); } +/** + * name_contains_dotdot - check if a file name contains ".." path components + * + * Search for ".." surrounded by either '/' or start/end of string. + */ +static inline bool name_contains_dotdot(const char *name) +{ + size_t name_len; + + name_len = strlen(name); + return strcmp(name, "..") == 0 || + strncmp(name, "../", 3) == 0 || + strstr(name, "/../") != NULL || + (name_len >= 3 && strcmp(name + name_len - 3, "/..") == 0); +} + #include /* needed for stackable file system support */ From edfe3bdbbb52339cd8c2366402f2702c5ebc15c7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:20 +0200 Subject: [PATCH 06/24] coredump: don't allow ".." in coredump socket path There's no point in allowing to walk upwards for the coredump socket. We already force userspace to give use a sane path, no symlinks, no magiclinks, and also block "..". Use an absolute path without any shenanigans. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-6-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/coredump.c b/fs/coredump.c index a64b87878ab3..8437bdc26d08 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1418,6 +1418,10 @@ static inline bool check_coredump_socket(void) if (strlen(p) >= UNIX_PATH_MAX) return false; + /* Must not contain ".." in the path. */ + if (name_contains_dotdot(core_pattern)) + return false; + return true; } From 6dfc06d328b70af22c577bb908c97f8841b9f4fc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:21 +0200 Subject: [PATCH 07/24] coredump: validate socket path in coredump_parse() properly again. Someone might have modified the buffer concurrently. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-7-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/coredump.c b/fs/coredump.c index 8437bdc26d08..52efd1b34261 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -296,6 +296,17 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, return false; } + /* Must not contain ".." in the path. */ + if (name_contains_dotdot(cn->corename)) { + coredump_report_failure("Coredump socket may not %s contain '..' spaces", cn->corename); + return false; + } + + if (strlen(cn->corename) >= UNIX_PATH_MAX) { + coredump_report_failure("Coredump socket path %s too long", cn->corename); + return false; + } + /* * Currently no need to parse any other options. * Relevant information can be retrieved from the peer From 8a25350fa430a28d0595a6d14af661d4f151b123 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:22 +0200 Subject: [PATCH 08/24] selftests/coredump: make sure invalid paths are rejected Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-8-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- .../selftests/coredump/stackdump_test.c | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9a789156f27e..a4ac80bb1003 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -241,16 +241,19 @@ static int create_and_listen_unix_socket(const char *path) static bool set_core_pattern(const char *pattern) { - FILE *file; - int ret; + int fd; + ssize_t ret; - file = fopen("/proc/sys/kernel/core_pattern", "w"); - if (!file) + fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC); + if (fd < 0) return false; - ret = fprintf(file, "%s", pattern); - fclose(file); + ret = write(fd, pattern, strlen(pattern)); + close(fd); + if (ret < 0) + return false; + fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern)); return ret == strlen(pattern); } @@ -1804,4 +1807,21 @@ TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500) wait_and_check_coredump_server(pid_coredump_server, _metadata, self); } +TEST_F(coredump, socket_invalid_paths) +{ + ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@..")); + + ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@@..")); + + ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket")); +} + TEST_HARNESS_MAIN From 70e3ee31282d293c794fb5bbec8efe495c32044b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:23 +0200 Subject: [PATCH 09/24] coredump: rename do_coredump() to vfs_coredump() Align the naming with the rest of our helpers exposed outside of core vfs. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-9-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- Documentation/security/credentials.rst | 2 +- Documentation/translations/zh_CN/security/credentials.rst | 2 +- fs/coredump.c | 2 +- include/linux/coredump.h | 4 ++-- kernel/signal.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/security/credentials.rst b/Documentation/security/credentials.rst index 2aa0791bcefe..d0191c8b8060 100644 --- a/Documentation/security/credentials.rst +++ b/Documentation/security/credentials.rst @@ -555,5 +555,5 @@ the VFS, and that can be done by calling into such as ``vfs_mkdir()`` with a different set of credentials. This is done in the following places: * ``sys_faccessat()``. - * ``do_coredump()``. + * ``vfs_coredump()``. * nfs4recover.c. diff --git a/Documentation/translations/zh_CN/security/credentials.rst b/Documentation/translations/zh_CN/security/credentials.rst index 91c353dfb622..88fcd9152ffe 100644 --- a/Documentation/translations/zh_CN/security/credentials.rst +++ b/Documentation/translations/zh_CN/security/credentials.rst @@ -475,5 +475,5 @@ const指针上操作,因此不需要进行类型转换,但需要临时放弃 如 ``vfs_mkdir()`` 来实现。以下是一些进行此操作的位置: * ``sys_faccessat()``. - * ``do_coredump()``. + * ``vfs_coredump()``. * nfs4recover.c. diff --git a/fs/coredump.c b/fs/coredump.c index 52efd1b34261..8a401eeee940 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -865,7 +865,7 @@ static inline void coredump_sock_wait(struct file *file) { } static inline void coredump_sock_shutdown(struct file *file) { } #endif -void do_coredump(const kernel_siginfo_t *siginfo) +void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; struct core_name cn; diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 76e41805b92d..96e8a66da133 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -43,7 +43,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); -extern void do_coredump(const kernel_siginfo_t *siginfo); +extern void vfs_coredump(const kernel_siginfo_t *siginfo); /* * Logging for the coredump code, ratelimited. @@ -63,7 +63,7 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); #define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__) #else -static inline void do_coredump(const kernel_siginfo_t *siginfo) {} +static inline void vfs_coredump(const kernel_siginfo_t *siginfo) {} #define coredump_report(...) #define coredump_report_failure(...) diff --git a/kernel/signal.c b/kernel/signal.c index 148082db9a55..e2c928de7d2c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3016,7 +3016,7 @@ bool get_signal(struct ksignal *ksig) * first and our do_group_exit call below will use * that value and ignore the one we pass it. */ - do_coredump(&ksig->info); + vfs_coredump(&ksig->info); } /* From 7bbb05dbea38e56d9f6ac7ac1040f93b0808cbce Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:24 +0200 Subject: [PATCH 10/24] coredump: split file coredumping into coredump_file() * Move that whole mess into a separate helper instead of having all that hanging around in vfs_coredump() directly. * Stop using that need_suid_safe variable and add an inline helper that clearly communicates what's going on everywhere consistently. The mm flag snapshot is stable and can't change so nothing's gained with that boolean. * Only setup cprm->file once everything else succeeded, using RAII for the coredump file before. That allows to don't care to what goto label we jump in vfs_coredump(). Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-10-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 207 ++++++++++++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 101 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 8a401eeee940..9f9d8ae29359 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -865,6 +865,108 @@ static inline void coredump_sock_wait(struct file *file) { } static inline void coredump_sock_shutdown(struct file *file) { } #endif +/* cprm->mm_flags contains a stable snapshot of dumpability flags. */ +static inline bool coredump_force_suid_safe(const struct coredump_params *cprm) +{ + /* Require nonrelative corefile path and be extra careful. */ + return __get_dumpable(cprm->mm_flags) == SUID_DUMP_ROOT; +} + +static bool coredump_file(struct core_name *cn, struct coredump_params *cprm, + const struct linux_binfmt *binfmt) +{ + struct mnt_idmap *idmap; + struct inode *inode; + struct file *file __free(fput) = NULL; + int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL; + + if (cprm->limit < binfmt->min_coredump) + return false; + + if (coredump_force_suid_safe(cprm) && cn->corename[0] != '/') { + coredump_report_failure("this process can only dump core to a fully qualified path, skipping core dump"); + return false; + } + + /* + * Unlink the file if it exists unless this is a SUID + * binary - in that case, we're running around with root + * privs and don't want to unlink another user's coredump. + */ + if (!coredump_force_suid_safe(cprm)) { + /* + * If it doesn't exist, that's fine. If there's some + * other problem, we'll catch it at the filp_open(). + */ + do_unlinkat(AT_FDCWD, getname_kernel(cn->corename)); + } + + /* + * There is a race between unlinking and creating the + * file, but if that causes an EEXIST here, that's + * fine - another process raced with us while creating + * the corefile, and the other process won. To userspace, + * what matters is that at least one of the two processes + * writes its coredump successfully, not which one. + */ + if (coredump_force_suid_safe(cprm)) { + /* + * Using user namespaces, normal user tasks can change + * their current->fs->root to point to arbitrary + * directories. Since the intention of the "only dump + * with a fully qualified path" rule is to control where + * coredumps may be placed using root privileges, + * current->fs->root must not be used. Instead, use the + * root directory of init_task. + */ + struct path root; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + file = file_open_root(&root, cn->corename, open_flags, 0600); + path_put(&root); + } else { + file = filp_open(cn->corename, open_flags, 0600); + } + if (IS_ERR(file)) + return false; + + inode = file_inode(file); + if (inode->i_nlink > 1) + return false; + if (d_unhashed(file->f_path.dentry)) + return false; + /* + * AK: actually i see no reason to not allow this for named + * pipes etc, but keep the previous behaviour for now. + */ + if (!S_ISREG(inode->i_mode)) + return false; + /* + * Don't dump core if the filesystem changed owner or mode + * of the file during file creation. This is an issue when + * a process dumps core while its cwd is e.g. on a vfat + * filesystem. + */ + idmap = file_mnt_idmap(file); + if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) { + coredump_report_failure("Core dump to %s aborted: cannot preserve file owner", cn->corename); + return false; + } + if ((inode->i_mode & 0677) != 0600) { + coredump_report_failure("Core dump to %s aborted: cannot preserve file permissions", cn->corename); + return false; + } + if (!(file->f_mode & FMODE_CAN_WRITE)) + return false; + if (do_truncate(idmap, file->f_path.dentry, 0, 0, file)) + return false; + + cprm->file = no_free_ptr(file); + return true; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -876,8 +978,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) int retval = 0; size_t *argv = NULL; int argc = 0; - /* require nonrelative corefile path and be extra careful */ - bool need_suid_safe = false; bool core_dumped = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { @@ -910,11 +1010,8 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { - /* Setuid core dump mode */ - cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ - need_suid_safe = true; - } + if (coredump_force_suid_safe(&cprm)) + cred->fsuid = GLOBAL_ROOT_UID; retval = coredump_wait(siginfo->si_signo, &core_state); if (retval < 0) @@ -928,102 +1025,10 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) } switch (cn.core_type) { - case COREDUMP_FILE: { - struct mnt_idmap *idmap; - struct inode *inode; - int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | - O_LARGEFILE | O_EXCL; - - if (cprm.limit < binfmt->min_coredump) - goto fail_unlock; - - if (need_suid_safe && cn.corename[0] != '/') { - coredump_report_failure( - "this process can only dump core to a fully qualified path, skipping core dump"); - goto fail_unlock; - } - - /* - * Unlink the file if it exists unless this is a SUID - * binary - in that case, we're running around with root - * privs and don't want to unlink another user's coredump. - */ - if (!need_suid_safe) { - /* - * If it doesn't exist, that's fine. If there's some - * other problem, we'll catch it at the filp_open(). - */ - do_unlinkat(AT_FDCWD, getname_kernel(cn.corename)); - } - - /* - * There is a race between unlinking and creating the - * file, but if that causes an EEXIST here, that's - * fine - another process raced with us while creating - * the corefile, and the other process won. To userspace, - * what matters is that at least one of the two processes - * writes its coredump successfully, not which one. - */ - if (need_suid_safe) { - /* - * Using user namespaces, normal user tasks can change - * their current->fs->root to point to arbitrary - * directories. Since the intention of the "only dump - * with a fully qualified path" rule is to control where - * coredumps may be placed using root privileges, - * current->fs->root must not be used. Instead, use the - * root directory of init_task. - */ - struct path root; - - task_lock(&init_task); - get_fs_root(init_task.fs, &root); - task_unlock(&init_task); - cprm.file = file_open_root(&root, cn.corename, - open_flags, 0600); - path_put(&root); - } else { - cprm.file = filp_open(cn.corename, open_flags, 0600); - } - if (IS_ERR(cprm.file)) - goto fail_unlock; - - inode = file_inode(cprm.file); - if (inode->i_nlink > 1) - goto close_fail; - if (d_unhashed(cprm.file->f_path.dentry)) - goto close_fail; - /* - * AK: actually i see no reason to not allow this for named - * pipes etc, but keep the previous behaviour for now. - */ - if (!S_ISREG(inode->i_mode)) - goto close_fail; - /* - * Don't dump core if the filesystem changed owner or mode - * of the file during file creation. This is an issue when - * a process dumps core while its cwd is e.g. on a vfat - * filesystem. - */ - idmap = file_mnt_idmap(cprm.file); - if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), - current_fsuid())) { - coredump_report_failure("Core dump to %s aborted: " - "cannot preserve file owner", cn.corename); - goto close_fail; - } - if ((inode->i_mode & 0677) != 0600) { - coredump_report_failure("Core dump to %s aborted: " - "cannot preserve file permissions", cn.corename); - goto close_fail; - } - if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) - goto close_fail; - if (do_truncate(idmap, cprm.file->f_path.dentry, - 0, 0, cprm.file)) + case COREDUMP_FILE: + if (!coredump_file(&cn, &cprm, binfmt)) goto close_fail; break; - } case COREDUMP_PIPE: { int argi; int dump_count; From a961c737cda8f172e108da881691cadafb9a061e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:25 +0200 Subject: [PATCH 11/24] coredump: prepare to simplify exit paths The exit path is currently entangled with core pipe limit accounting which is really unpleasant. Use a local variable in struct core_name that remembers whether the count was incremented and if so to clean decrement in once the coredump is done. Assert that this only happens for pipes. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-11-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 9f9d8ae29359..4afaf792a12e 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -93,6 +93,7 @@ enum coredump_type_t { struct core_name { char *corename; int used, size; + unsigned int core_pipe_limit; enum coredump_type_t core_type; u64 mask; }; @@ -244,6 +245,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, cn->mask |= COREDUMP_WAIT; cn->used = 0; cn->corename = NULL; + cn->core_pipe_limit = 0; if (*pat_ptr == '|') cn->core_type = COREDUMP_PIPE; else if (*pat_ptr == '@') @@ -1031,7 +1033,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) break; case COREDUMP_PIPE: { int argi; - int dump_count; char **helper_argv; struct subprocess_info *sub_info; @@ -1052,21 +1053,21 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) * core_pattern process dies. */ coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); - goto fail_unlock; + goto close_fail; } cprm.limit = RLIM_INFINITY; - dump_count = atomic_inc_return(&core_dump_count); - if (core_pipe_limit && (core_pipe_limit < dump_count)) { + cn.core_pipe_limit = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < cn.core_pipe_limit)) { coredump_report_failure("over core_pipe_limit, skipping core dump"); - goto fail_dropcount; + goto close_fail; } helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL); if (!helper_argv) { coredump_report_failure("%s failed to allocate memory", __func__); - goto fail_dropcount; + goto close_fail; } for (argi = 0; argi < argc; argi++) helper_argv[argi] = cn.corename + argv[argi]; @@ -1168,9 +1169,10 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) close_fail: if (cprm.file) filp_close(cprm.file, NULL); -fail_dropcount: - if (cn.core_type == COREDUMP_PIPE) + if (cn.core_pipe_limit) { + VFS_WARN_ON_ONCE(cn.core_type != COREDUMP_PIPE); atomic_dec(&core_dump_count); + } fail_unlock: kfree(argv); kfree(cn.corename); From 4f599219f71399ac2092d2e06b2cc38e50c45c53 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:26 +0200 Subject: [PATCH 12/24] coredump: move core_pipe_count to global variable The pipe coredump counter is a static local variable instead of a global variable like all of the rest. Move it to a global variable so it's all consistent. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-12-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 4afaf792a12e..c863e053b1f8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -82,6 +82,7 @@ static unsigned int core_sort_vma; static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; +static atomic_t core_pipe_count = ATOMIC_INIT(0); enum coredump_type_t { COREDUMP_FILE = 1, @@ -981,7 +982,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) size_t *argv = NULL; int argc = 0; bool core_dumped = false; - static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .siginfo = siginfo, .limit = rlimit(RLIMIT_CORE), @@ -1057,7 +1057,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) } cprm.limit = RLIM_INFINITY; - cn.core_pipe_limit = atomic_inc_return(&core_dump_count); + cn.core_pipe_limit = atomic_inc_return(&core_pipe_count); if (core_pipe_limit && (core_pipe_limit < cn.core_pipe_limit)) { coredump_report_failure("over core_pipe_limit, skipping core dump"); goto close_fail; @@ -1171,7 +1171,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) filp_close(cprm.file, NULL); if (cn.core_pipe_limit) { VFS_WARN_ON_ONCE(cn.core_type != COREDUMP_PIPE); - atomic_dec(&core_dump_count); + atomic_dec(&core_pipe_count); } fail_unlock: kfree(argv); From 9f29a347d7b1b2022dfc6e5a93d4f2a7b34f5d4d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:27 +0200 Subject: [PATCH 13/24] coredump: split pipe coredumping into coredump_pipe() * Move that whole mess into a separate helper instead of having all that hanging around in vfs_coredump() directly. Cleanup paths are already centralized. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-13-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 114 ++++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index c863e053b1f8..f4f7f0a0ae40 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -970,6 +970,63 @@ static bool coredump_file(struct core_name *cn, struct coredump_params *cprm, return true; } +static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, + size_t *argv, int argc) +{ + int argi; + char **helper_argv __free(kfree) = NULL; + struct subprocess_info *sub_info; + + if (cprm->limit == 1) { + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. + * + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * cprm.limit of 1 here as a special value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); + return false; + } + cprm->limit = RLIM_INFINITY; + + cn->core_pipe_limit = atomic_inc_return(&core_pipe_count); + if (core_pipe_limit && (core_pipe_limit < cn->core_pipe_limit)) { + coredump_report_failure("over core_pipe_limit, skipping core dump"); + return false; + } + + helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL); + if (!helper_argv) { + coredump_report_failure("%s failed to allocate memory", __func__); + return false; + } + for (argi = 0; argi < argc; argi++) + helper_argv[argi] = cn->corename + argv[argi]; + helper_argv[argi] = NULL; + + sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, + GFP_KERNEL, umh_coredump_setup, + NULL, cprm); + if (!sub_info) + return false; + + if (call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC)) { + coredump_report_failure("|%s pipe failed", cn->corename); + return false; + } + + return true; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -1031,63 +1088,10 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (!coredump_file(&cn, &cprm, binfmt)) goto close_fail; break; - case COREDUMP_PIPE: { - int argi; - char **helper_argv; - struct subprocess_info *sub_info; - - if (cprm.limit == 1) { - /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. - * - * Normally core limits are irrelevant to pipes, since - * we're not writing to the file system, but we use - * cprm.limit of 1 here as a special value, this is a - * consistent way to catch recursive crashes. - * We can still crash if the core_pattern binary sets - * RLIM_CORE = !1, but it runs as root, and can do - * lots of stupid things. - * - * Note that we use task_tgid_vnr here to grab the pid - * of the process group leader. That way we get the - * right pid if a thread in a multi-threaded - * core_pattern process dies. - */ - coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); + case COREDUMP_PIPE: + if (!coredump_pipe(&cn, &cprm, argv, argc)) goto close_fail; - } - cprm.limit = RLIM_INFINITY; - - cn.core_pipe_limit = atomic_inc_return(&core_pipe_count); - if (core_pipe_limit && (core_pipe_limit < cn.core_pipe_limit)) { - coredump_report_failure("over core_pipe_limit, skipping core dump"); - goto close_fail; - } - - helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), - GFP_KERNEL); - if (!helper_argv) { - coredump_report_failure("%s failed to allocate memory", __func__); - goto close_fail; - } - for (argi = 0; argi < argc; argi++) - helper_argv[argi] = cn.corename + argv[argi]; - helper_argv[argi] = NULL; - - retval = -ENOMEM; - sub_info = call_usermodehelper_setup(helper_argv[0], - helper_argv, NULL, GFP_KERNEL, - umh_coredump_setup, NULL, &cprm); - if (sub_info) - retval = call_usermodehelper_exec(sub_info, - UMH_WAIT_EXEC); - - kfree(helper_argv); - if (retval) { - coredump_report_failure("|%s pipe failed", cn.corename); - goto close_fail; - } break; - } case COREDUMP_SOCK_REQ: fallthrough; case COREDUMP_SOCK: From 5153053692987035a82bb4a6714ea12a5bd2bfdc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:28 +0200 Subject: [PATCH 14/24] coredump: move pipe specific file check into coredump_pipe() There's no point in having this eyesore in the middle of vfs_coredump(). Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-14-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index f4f7f0a0ae40..1e05d831cda8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1024,6 +1024,15 @@ static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, return false; } + /* + * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would + * have this set to NULL. + */ + if (!cprm->file) { + coredump_report_failure("Core dump to |%s disabled", cn->corename); + return false; + } + return true; } @@ -1117,14 +1126,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) goto close_fail; if ((cn.mask & COREDUMP_KERNEL) && !dump_interrupted()) { - /* - * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would - * have this set to NULL. - */ - if (!cprm.file) { - coredump_report_failure("Core dump to |%s disabled", cn.corename); - goto close_fail; - } if (!dump_vma_snapshot(&cprm)) goto close_fail; From d6527db34d08d7d411c377a25c05fff30eeba9ea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:29 +0200 Subject: [PATCH 15/24] coredump: use a single helper for the socket Don't split it into multiple functions. Just use a single one like we do for coredump_file() and coredump_pipe() now. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-15-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 1e05d831cda8..48d90ec8c86a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -855,17 +855,18 @@ static bool coredump_sock_request(struct core_name *cn, struct coredump_params * cn->mask = ack.mask; return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK); } -#else -static bool coredump_sock_connect(struct core_name *cn, - struct coredump_params *cprm) + +static bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { - coredump_report_failure("Core dump socket support %s disabled", cn->corename); - return false; + if (!coredump_sock_connect(cn, cprm)) + return false; + + return coredump_sock_request(cn, cprm); } -static bool coredump_sock_request(struct core_name *cn, - struct coredump_params *cprm) { return false; } +#else static inline void coredump_sock_wait(struct file *file) { } static inline void coredump_sock_shutdown(struct file *file) { } +static inline bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { return false; } #endif /* cprm->mm_flags contains a stable snapshot of dumpability flags. */ @@ -1104,10 +1105,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) case COREDUMP_SOCK_REQ: fallthrough; case COREDUMP_SOCK: - if (!coredump_sock_connect(&cn, &cprm)) - goto close_fail; - - if (!coredump_sock_request(&cn, &cprm)) + if (!coredump_socket(&cn, &cprm)) goto close_fail; break; default: From 3a4db72d0368c5f618e18a12580d5c5dca7b2839 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:30 +0200 Subject: [PATCH 16/24] coredump: add coredump_write() to encapsulate that logic simplifying vfs_coredump() even further. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-16-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 56 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 48d90ec8c86a..dea4823b55b8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -95,6 +95,7 @@ struct core_name { char *corename; int used, size; unsigned int core_pipe_limit; + bool core_dumped; enum coredump_type_t core_type; u64 mask; }; @@ -247,6 +248,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, cn->used = 0; cn->corename = NULL; cn->core_pipe_limit = 0; + cn->core_dumped = false; if (*pat_ptr == '|') cn->core_type = COREDUMP_PIPE; else if (*pat_ptr == '@') @@ -1037,6 +1039,34 @@ static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, return true; } +static bool coredump_write(struct core_name *cn, + struct coredump_params *cprm, + struct linux_binfmt *binfmt) +{ + + if (dump_interrupted()) + return true; + + if (!dump_vma_snapshot(cprm)) + return false; + + file_start_write(cprm->file); + cn->core_dumped = binfmt->core_dump(cprm); + /* + * Ensures that file size is big enough to contain the current + * file postion. This prevents gdb from complaining about + * a truncated file if the last "write" to the file was + * dump_skip. + */ + if (cprm->to_skip) { + cprm->to_skip--; + dump_emit(cprm, "", 1); + } + file_end_write(cprm->file); + free_vma_snapshot(cprm); + return true; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -1048,7 +1078,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) int retval = 0; size_t *argv = NULL; int argc = 0; - bool core_dumped = false; struct coredump_params cprm = { .siginfo = siginfo, .limit = rlimit(RLIMIT_CORE), @@ -1123,31 +1152,14 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (retval) goto close_fail; - if ((cn.mask & COREDUMP_KERNEL) && !dump_interrupted()) { - if (!dump_vma_snapshot(&cprm)) - goto close_fail; - - file_start_write(cprm.file); - core_dumped = binfmt->core_dump(&cprm); - /* - * Ensures that file size is big enough to contain the current - * file postion. This prevents gdb from complaining about - * a truncated file if the last "write" to the file was - * dump_skip. - */ - if (cprm.to_skip) { - cprm.to_skip--; - dump_emit(&cprm, "", 1); - } - file_end_write(cprm.file); - free_vma_snapshot(&cprm); - } + if ((cn.mask & COREDUMP_KERNEL) && !coredump_write(&cn, &cprm, binfmt)) + goto close_fail; coredump_sock_shutdown(cprm.file); /* Let the parent know that a coredump was generated. */ if (cn.mask & COREDUMP_USERSPACE) - core_dumped = true; + cn.core_dumped = true; /* * When core_pipe_limit is set we wait for the coredump server @@ -1179,7 +1191,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) fail_unlock: kfree(argv); kfree(cn.corename); - coredump_finish(core_dumped); + coredump_finish(cn.core_dumped); revert_creds(old_cred); fail_creds: put_cred(cred); From 4a9f5d7fb6649af534c36aa8cc9c1aa51b172ad9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:31 +0200 Subject: [PATCH 17/24] coredump: auto cleanup argv to prepare for a simpler exit path. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-17-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index dea4823b55b8..68da77d00170 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1076,7 +1076,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) const struct cred *old_cred; struct cred *cred; int retval = 0; - size_t *argv = NULL; + size_t *argv __free(kfree) = NULL; int argc = 0; struct coredump_params cprm = { .siginfo = siginfo, @@ -1189,7 +1189,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) atomic_dec(&core_pipe_count); } fail_unlock: - kfree(argv); kfree(cn.corename); coredump_finish(cn.core_dumped); revert_creds(old_cred); From 8434fac512d35597488b13e23cc85e2903f5c8ae Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:32 +0200 Subject: [PATCH 18/24] coredump: directly return instead of jumping to a pointless cleanup label. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-18-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 68da77d00170..b2e9ac34d9a3 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1095,13 +1095,13 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) binfmt = mm->binfmt; if (!binfmt || !binfmt->core_dump) - goto fail; + return; if (!__get_dumpable(cprm.mm_flags)) - goto fail; + return; cred = prepare_creds(); if (!cred) - goto fail; + return; /* * We cannot trust fsuid as being the "true" uid of the process * nor do we know its entire history. We only know it was tainted @@ -1194,7 +1194,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) revert_creds(old_cred); fail_creds: put_cred(cred); -fail: return; } From 377d7860c960ac8e672881bc50353d867e2f94a4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:33 +0200 Subject: [PATCH 19/24] cred: add auto cleanup method Add a simple auto cleanup method for struct cred. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-19-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- include/linux/cred.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/cred.h b/include/linux/cred.h index 5658a3bfe803..a102a10f833f 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -263,6 +263,8 @@ static inline void put_cred(const struct cred *cred) put_cred_many(cred, 1); } +DEFINE_FREE(put_cred, struct cred *, if (!IS_ERR_OR_NULL(_T)) put_cred(_T)) + /** * current_cred - Access the current task's subjective credentials * From 7a568fcdad7c75a1ee196921cf651de607c2c5d5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:34 +0200 Subject: [PATCH 20/24] coredump: auto cleanup prepare_creds() which will allow us to simplify the exit path in further patches. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-20-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index b2e9ac34d9a3..e9c8696283f6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1074,7 +1074,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; - struct cred *cred; + struct cred *cred __free(put_cred) = NULL; int retval = 0; size_t *argv __free(kfree) = NULL; int argc = 0; @@ -1113,7 +1113,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) retval = coredump_wait(siginfo->si_signo, &core_state); if (retval < 0) - goto fail_creds; + return; old_cred = override_creds(cred); @@ -1192,8 +1192,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) kfree(cn.corename); coredump_finish(cn.core_dumped); revert_creds(old_cred); -fail_creds: - put_cred(cred); return; } From cfd6c12293d7cc257f27770399a3d8f11bf7d25c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:35 +0200 Subject: [PATCH 21/24] coredump: add coredump_cleanup() Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-21-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index e9c8696283f6..9c028ef087d4 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1067,6 +1067,18 @@ static bool coredump_write(struct core_name *cn, return true; } +static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm) +{ + if (cprm->file) + filp_close(cprm->file, NULL); + if (cn->core_pipe_limit) { + VFS_WARN_ON_ONCE(cn->core_type != COREDUMP_PIPE); + atomic_dec(&core_pipe_count); + } + kfree(cn->corename); + coredump_finish(cn->core_dumped); +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -1119,7 +1131,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (!coredump_parse(&cn, &cprm, &argv, &argc)) { coredump_report_failure("format_corename failed, aborting core"); - goto fail_unlock; + goto close_fail; } switch (cn.core_type) { @@ -1182,15 +1194,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) } close_fail: - if (cprm.file) - filp_close(cprm.file, NULL); - if (cn.core_pipe_limit) { - VFS_WARN_ON_ONCE(cn.core_type != COREDUMP_PIPE); - atomic_dec(&core_pipe_count); - } -fail_unlock: - kfree(cn.corename); - coredump_finish(cn.core_dumped); + coredump_cleanup(&cn, &cprm); revert_creds(old_cred); return; } From ae20958b37acf82da4928910ca6351719b5ddba7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:36 +0200 Subject: [PATCH 22/24] coredump: order auto cleanup variables at the top so they're easy to spot. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-22-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 9c028ef087d4..d3f09bf71f5f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1081,14 +1081,14 @@ static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm) void vfs_coredump(const kernel_siginfo_t *siginfo) { + struct cred *cred __free(put_cred) = NULL; + size_t *argv __free(kfree) = NULL; struct core_state core_state; struct core_name cn; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; - struct cred *cred __free(put_cred) = NULL; int retval = 0; - size_t *argv __free(kfree) = NULL; int argc = 0; struct coredump_params cprm = { .siginfo = siginfo, From 9dd88f3626462e4ffd008196e053d4004e44427b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:37 +0200 Subject: [PATCH 23/24] coredump: avoid pointless variable we don't use that value at all so don't bother with it in the first place. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-23-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index d3f09bf71f5f..178eddbcd6ad 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1088,7 +1088,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; - int retval = 0; int argc = 0; struct coredump_params cprm = { .siginfo = siginfo, @@ -1123,8 +1122,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (coredump_force_suid_safe(&cprm)) cred->fsuid = GLOBAL_ROOT_UID; - retval = coredump_wait(siginfo->si_signo, &core_state); - if (retval < 0) + if (coredump_wait(siginfo->si_signo, &core_state) < 0) return; old_cred = override_creds(cred); @@ -1160,8 +1158,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) /* get us an unshared descriptor table; almost always a no-op */ /* The cell spufs coredump code reads the file descriptor tables */ - retval = unshare_files(); - if (retval) + if (unshare_files()) goto close_fail; if ((cn.mask & COREDUMP_KERNEL) && !coredump_write(&cn, &cprm, binfmt)) From da9029b47d790675dcd82a6d9e332bc41e1a17f1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:38 +0200 Subject: [PATCH 24/24] coredump: add coredump_skip() helper Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-24-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 178eddbcd6ad..fadf9d4be2e1 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1079,6 +1079,18 @@ static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm) coredump_finish(cn->core_dumped); } +static inline bool coredump_skip(const struct coredump_params *cprm, + const struct linux_binfmt *binfmt) +{ + if (!binfmt) + return true; + if (!binfmt->core_dump) + return true; + if (!__get_dumpable(cprm->mm_flags)) + return true; + return false; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct cred *cred __free(put_cred) = NULL; @@ -1086,7 +1098,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct core_state core_state; struct core_name cn; struct mm_struct *mm = current->mm; - struct linux_binfmt * binfmt; + struct linux_binfmt *binfmt = mm->binfmt; const struct cred *old_cred; int argc = 0; struct coredump_params cprm = { @@ -1104,10 +1116,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) audit_core_dumps(siginfo->si_signo); - binfmt = mm->binfmt; - if (!binfmt || !binfmt->core_dump) - return; - if (!__get_dumpable(cprm.mm_flags)) + if (coredump_skip(&cprm, binfmt)) return; cred = prepare_creds();