From 49f1f2d4a7612bbed5211a73d11d6a94fbe3bb69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 26 May 2025 12:04:44 +0200 Subject: [PATCH 1/8] coredump: get rid of _META_MANDATORY_MAX No functional change. This change is done in preparation for future changes. Currently, the list of fields which are received on the command line is a strict subset of the fields which are always expected to be received on a socket. But when we add new kernel args in the future, we'll have two non-overlapping sets and this approach will not work. Get rid of the variable and enumerate the required fields. This set will never change, so this is actually more maintainable. The message with the hint where to add new fields is switched with _META_ARGV_MAX. The new order is more correct. --- src/coredump/coredump.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 07f4da2c7a..4bc1e4ad80 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -87,7 +87,7 @@ assert_cc(JOURNAL_SIZE_MAX <= DATA_SIZE_MAX); #define MOUNT_TREE_ROOT "/run/systemd/mount-rootfs" -enum { +typedef enum { /* We use these as array indexes for our process metadata cache. * * The first indices of the cache stores the same metadata as the ones passed by the kernel via @@ -103,9 +103,9 @@ enum { _META_ARGV_REQUIRED, /* The fields below were added to kernel/core_pattern at later points, so they might be missing. */ META_ARGV_HOSTNAME = _META_ARGV_REQUIRED, /* %h: hostname */ - _META_ARGV_MAX, /* If new fields are added, they should be added here, to maintain compatibility * with callers which don't know about the new fields. */ + _META_ARGV_MAX, /* The following indexes are cached for a couple of special fields we use (and * thereby need to be retrieved quickly) for naming coredump files, and attaching @@ -113,16 +113,15 @@ enum { * environment. */ META_COMM = _META_ARGV_MAX, - _META_MANDATORY_MAX, /* The rest are similar to the previous ones except that we won't fail if one of * them is missing in a message sent over the socket. */ - META_EXE = _META_MANDATORY_MAX, + META_EXE, META_UNIT, META_PROC_AUXV, _META_MAX -}; +} meta_argv_t; static const char * const meta_field_names[_META_MAX] = { [META_ARGV_PID] = "COREDUMP_PID=", @@ -1228,10 +1227,24 @@ static int process_socket(int fd) { if (r < 0) return r; - /* Make sure we received at least all fields we need. */ - for (int i = 0; i < _META_MANDATORY_MAX; i++) + /* Make sure we received all the expected fields. We support being called by an *older* + * systemd-coredump from the outside, so we require only the basic set of fields that + * was being sent when the support for sending to containers over a socket was added + * in a108c43e36d3ceb6e34efe37c014fc2cda856000. */ + meta_argv_t i; + FOREACH_ARGUMENT(i, + META_ARGV_PID, + META_ARGV_UID, + META_ARGV_GID, + META_ARGV_SIGNAL, + META_ARGV_TIMESTAMP, + META_ARGV_RLIMIT, + META_ARGV_HOSTNAME, + META_COMM) if (!context.meta[i]) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "A mandatory argument (%i) has not been sent, aborting.", i); + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Mandatory argument %s not received on socket, aborting.", + meta_field_names[i]); return submit_coredump(&context, &iovw, input_fd); } From 0c49e0049b7665bb7769a13ef346fef92e1ad4d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 29 Apr 2025 14:47:59 +0200 Subject: [PATCH 2/8] coredump: use %d in kernel core pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel provides %d which is documented as "dump mode—same as value returned by prctl(2) PR_GET_DUMPABLE". We already query /proc/pid/auxv for this information, but unfortunately this check is subject to a race, because the crashed process may be replaced by an attacker before we read this data, for example replacing a SUID process that was killed by a signal with another process that is not SUID, tricking us into making the coredump of the original process readable by the attacker. With this patch, we effectively add one more check to the list of conditions that need be satisfied if we are to make the coredump accessible to the user. Reportedy-by: Qualys Security Advisory In principle, %d might return a value other than 0, 1, or 2 in the future. Thus, we accept those, but emit a notice. --- man/systemd-coredump.xml | 12 ++++++++++++ src/coredump/coredump.c | 21 ++++++++++++++++++--- sysctl.d/50-coredump.conf.in | 2 +- test/units/TEST-87-AUX-UTILS-VM.coredump.sh | 5 +++++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/man/systemd-coredump.xml b/man/systemd-coredump.xml index 9972ba02e4..b62eb7b129 100644 --- a/man/systemd-coredump.xml +++ b/man/systemd-coredump.xml @@ -329,6 +329,18 @@ COREDUMP_FILENAME=/var/lib/systemd/coredump/core.Web….552351.….zst + + COREDUMP_DUMPABLE= + + The PR_GET_DUMPABLE field as reported by the kernel, see + prctl2. + + + + + + COREDUMP_OPEN_FDS= diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 4bc1e4ad80..bfb7db9d28 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -103,6 +103,7 @@ typedef enum { _META_ARGV_REQUIRED, /* The fields below were added to kernel/core_pattern at later points, so they might be missing. */ META_ARGV_HOSTNAME = _META_ARGV_REQUIRED, /* %h: hostname */ + META_ARGV_DUMPABLE, /* %d: as set by the kernel */ /* If new fields are added, they should be added here, to maintain compatibility * with callers which don't know about the new fields. */ _META_ARGV_MAX, @@ -131,6 +132,7 @@ static const char * const meta_field_names[_META_MAX] = { [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=", [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", + [META_ARGV_DUMPABLE] = "COREDUMP_DUMPABLE=", [META_COMM] = "COREDUMP_COMM=", [META_EXE] = "COREDUMP_EXE=", [META_UNIT] = "COREDUMP_UNIT=", @@ -141,6 +143,7 @@ typedef struct Context { PidRef pidref; uid_t uid; gid_t gid; + unsigned dumpable; int signo; uint64_t rlimit; bool is_pid1; @@ -437,14 +440,16 @@ static int grant_user_access(int core_fd, const Context *context) { if (r < 0) return r; - /* We allow access if we got all the data and at_secure is not set and - * the uid/gid matches euid/egid. */ + /* We allow access if %d/dumpable on the command line was exactly 1, we got all the data, + * at_secure is not set, and the uid/gid match euid/egid. */ bool ret = + context->dumpable == 1 && at_secure == 0 && uid != UID_INVALID && euid != UID_INVALID && uid == euid && gid != GID_INVALID && egid != GID_INVALID && gid == egid; - log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", + log_debug("Will %s access (dumpable=%u uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", ret ? "permit" : "restrict", + context->dumpable, uid, euid, gid, egid, yes_no(at_secure)); return ret; } @@ -1087,6 +1092,16 @@ static int context_parse_iovw(Context *context, struct iovec_wrapper *iovw) { if (r < 0) log_warning_errno(r, "Failed to parse resource limit \"%s\", ignoring: %m", context->meta[META_ARGV_RLIMIT]); + /* The value is set to contents of /proc/sys/fs/suid_dumpable, which we set to 2, + * if the process is marked as not dumpable, see PR_SET_DUMPABLE(2const). */ + if (context->meta[META_ARGV_DUMPABLE]) { + r = safe_atou(context->meta[META_ARGV_DUMPABLE], &context->dumpable); + if (r < 0) + return log_error_errno(r, "Failed to parse dumpable field \"%s\": %m", context->meta[META_ARGV_DUMPABLE]); + if (context->dumpable > 2) + log_notice("Got unexpected %%d/dumpable value %u.", context->dumpable); + } + unit = context->meta[META_UNIT]; context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE); context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE); diff --git a/sysctl.d/50-coredump.conf.in b/sysctl.d/50-coredump.conf.in index 90c080bdfe..a550c87258 100644 --- a/sysctl.d/50-coredump.conf.in +++ b/sysctl.d/50-coredump.conf.in @@ -13,7 +13,7 @@ # the core dump. # # See systemd-coredump(8) and core(5). -kernel.core_pattern=|{{LIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h +kernel.core_pattern=|{{LIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h %d # Allow 16 coredumps to be dispatched in parallel by the kernel. # We collect metadata from /proc/%P/, and thus need to make sure the crashed diff --git a/test/units/TEST-87-AUX-UTILS-VM.coredump.sh b/test/units/TEST-87-AUX-UTILS-VM.coredump.sh index 6ad7e29e28..ce4a2c9ed1 100755 --- a/test/units/TEST-87-AUX-UTILS-VM.coredump.sh +++ b/test/units/TEST-87-AUX-UTILS-VM.coredump.sh @@ -198,12 +198,17 @@ journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE /usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509900 12345 journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE="/usr/bin/test-dump" | /usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509901 12345 mymachine +journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE="/usr/bin/test-dump" | + /usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509902 12345 youmachine 1 # Wait a bit for the coredumps to get processed timeout 30 bash -c "while [[ \$(coredumpctl list -q --no-legend $$ | wc -l) -lt 2 ]]; do sleep 1; done" coredumpctl info $$ coredumpctl info COREDUMP_TIMESTAMP=1679509900000000 coredumpctl info COREDUMP_TIMESTAMP=1679509901000000 coredumpctl info COREDUMP_HOSTNAME="mymachine" +coredumpctl info COREDUMP_TIMESTAMP=1679509902000000 +coredumpctl info COREDUMP_HOSTNAME="youmachine" +coredumpctl info COREDUMP_DUMPABLE="1" # This used to cause a stack overflow systemd-run -t --property CoredumpFilter=all ls /tmp From 8fc7b2a211eb13ef1a94250b28e1c79cab8bdcb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 5 May 2025 15:48:40 +0200 Subject: [PATCH 3/8] coredump: also stop forwarding non-dumpable processes See the comment in the patch for details. Suggested-by: Qualys Security Advisory --- src/coredump/coredump.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index bfb7db9d28..29ab5eca9a 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -1564,13 +1564,23 @@ static int receive_ucred(int transport_fd, struct ucred *ret_ucred) { return 0; } -static int can_forward_coredump(const PidRef *pid) { +static int can_forward_coredump(Context *context, const PidRef *pid) { _cleanup_free_ char *cgroup = NULL, *path = NULL, *unit = NULL; int r; + assert(context); assert(pidref_is_set(pid)); assert(!pidref_is_remote(pid)); + /* We don't use %F/pidfd to pin down the crashed process yet. We need to avoid a situation where the + * attacker crashes a SUID process or a root daemon and quickly replaces it with a namespaced process + * and we forward the initial part of the coredump to the attacker, inside the namespace. + * + * TODO: relax this check when %F is implemented and used. + */ + if (context->dumpable != 1) + return false; + r = cg_pidref_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); if (r < 0) return r; @@ -1615,7 +1625,7 @@ static int forward_coredump_to_container(Context *context) { if (r < 0) return log_debug_errno(r, "Failed to get namespace leader: %m"); - r = can_forward_coredump(&leader_pid); + r = can_forward_coredump(context, &leader_pid); if (r < 0) return log_debug_errno(r, "Failed to check if coredump can be forwarded: %m"); if (r == 0) From 13902e025321242b1d95c6d8b4e482b37f58cdef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Mon, 26 May 2025 15:24:04 +0200 Subject: [PATCH 4/8] coredump: get rid of a bogus assertion The check looks plausible, but when I started checking whether it needs to be lowered for the recent changes, I realized that it doesn't make much sense. context_parse_iovw() is called from a few places, e.g.: - process_socket(), where the other side controls the contents of the message. We already do other checks on the correctness of the message and this assert is not needed. - gather_pid_metadata_from_argv(), which is called after inserting MESSAGE_ID= and PRIORITY= into the array, so there is no direct relation between _META_ARGV_MAX and the number of args in the iovw. - gather_pid_metadata_from_procfs(), where we insert a bazillion fields, but without any relation to _META_ARGV_MAX. Since we already separately check if the required stuff was set, drop this misleading check. --- src/coredump/coredump.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 29ab5eca9a..442cc10a82 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -1031,7 +1031,6 @@ static int context_parse_iovw(Context *context, struct iovec_wrapper *iovw) { assert(context); assert(iovw); - assert(iovw->count >= _META_ARGV_MAX); /* Converts the data in the iovec array iovw into separate fields. Fills in context->meta[] (for * which no memory is allocated, it just contains direct pointers into the iovec array memory). */ From 868d95577ec9f862580ad365726515459be582fc Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 13 Apr 2025 22:10:36 +0100 Subject: [PATCH 5/8] coredump: add support for new %F PIDFD specifier A new core_pattern specifier was added, %F, to provide a PIDFD to the usermode helper process referring to the crashed process. This removes all possible race conditions, ensuring only the crashed process gets inspected by systemd-coredump. --- man/systemd-coredump.xml | 11 +++++++ src/coredump/coredump.c | 60 ++++++++++++++++++++++++++++++++++-- sysctl.d/50-coredump.conf.in | 2 +- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/man/systemd-coredump.xml b/man/systemd-coredump.xml index b62eb7b129..d499600dc1 100644 --- a/man/systemd-coredump.xml +++ b/man/systemd-coredump.xml @@ -229,6 +229,17 @@ COREDUMP_FILENAME=/var/lib/systemd/coredump/core.Web….552351.….zst + + COREDUMP_BY_PIDFD= + If the crashed process was analyzed using a PIDFD provided by the kernel (requires + kernel v6.16) then this field will be present and set to 1. If this field is + not set, then the crashed process was analyzed via a PID, which is known to be subject to race + conditions. + + + + + COREDUMP_TIMESTAMP= The time of the crash as reported by the kernel (in μs since the epoch). diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 442cc10a82..c4a5532cea 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -104,6 +104,7 @@ typedef enum { /* The fields below were added to kernel/core_pattern at later points, so they might be missing. */ META_ARGV_HOSTNAME = _META_ARGV_REQUIRED, /* %h: hostname */ META_ARGV_DUMPABLE, /* %d: as set by the kernel */ + META_ARGV_PIDFD, /* %F: pidfd of the process, since v6.16 */ /* If new fields are added, they should be added here, to maintain compatibility * with callers which don't know about the new fields. */ _META_ARGV_MAX, @@ -133,6 +134,7 @@ static const char * const meta_field_names[_META_MAX] = { [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", [META_ARGV_DUMPABLE] = "COREDUMP_DUMPABLE=", + [META_ARGV_PIDFD] = "COREDUMP_BY_PIDFD=", [META_COMM] = "COREDUMP_COMM=", [META_EXE] = "COREDUMP_EXE=", [META_UNIT] = "COREDUMP_UNIT=", @@ -1345,7 +1347,8 @@ static int gather_pid_metadata_from_argv( Context *context, int argc, char **argv) { - int r; + _cleanup_(pidref_done) PidRef local_pidref = PIDREF_NULL; + int r, kernel_fd = -EBADF; assert(iovw); assert(context); @@ -1377,6 +1380,47 @@ static int gather_pid_metadata_from_argv( t = buf; } + if (i == META_ARGV_PID) { + /* Store this so that we can check whether the core will be forwarded to a container + * even when the kernel doesn't provide a pidfd. Can be dropped once baseline is + * >= v6.16. */ + r = pidref_set_pidstr(&local_pidref, t); + if (r < 0) + return log_error_errno(r, "Failed to initialize pidref from pid %s: %m", t); + } + + if (i == META_ARGV_PIDFD) { + /* If the current kernel doesn't support the %F specifier (which resolves to a + * pidfd), but we included it in the core_pattern expression, we'll receive an empty + * string here. Deal with that gracefully. */ + if (isempty(t)) + continue; + + assert(!pidref_is_set(&context->pidref)); + assert(kernel_fd < 0); + + kernel_fd = parse_fd(t); + if (kernel_fd < 0) + return log_error_errno(kernel_fd, "Failed to parse pidfd \"%s\": %m", t); + + r = pidref_set_pidfd(&context->pidref, kernel_fd); + if (r < 0) + return log_error_errno(r, "Failed to initialize pidref from pidfd %d: %m", kernel_fd); + + /* If there are containers involved with different versions of the code they might + * not be using pidfds, so it would be wrong to set the metadata, skip it. */ + r = pidref_in_same_namespace(/* pid1 = */ NULL, &context->pidref, NAMESPACE_PID); + if (r < 0) + log_debug_errno(r, "Failed to check pidns of crashing process, ignoring: %m"); + if (r <= 0) + continue; + + /* We don't print the fd number in the journal as it's meaningless, but we still + * record that the parsing was done with a kernel-provided fd as it means it's safe + * from races, which is valuable information to provide in the journal record. */ + t = "1"; + } + r = iovw_put_string_field(iovw, meta_field_names[i], t); if (r < 0) return r; @@ -1384,7 +1428,19 @@ static int gather_pid_metadata_from_argv( /* Cache some of the process metadata we collected so far and that we'll need to * access soon. */ - return context_parse_iovw(context, iovw); + r = context_parse_iovw(context, iovw); + if (r < 0) + return r; + + /* If the kernel didn't give us a PIDFD, then use the one derived from the + * PID immediately, given we have it. */ + if (!pidref_is_set(&context->pidref)) + context->pidref = TAKE_PIDREF(local_pidref); + + /* Close the kernel-provided FD as the last thing after everything else succeeded. */ + kernel_fd = safe_close(kernel_fd); + + return 0; } static int gather_pid_metadata_from_procfs(struct iovec_wrapper *iovw, Context *context) { diff --git a/sysctl.d/50-coredump.conf.in b/sysctl.d/50-coredump.conf.in index a550c87258..fe8f7670b0 100644 --- a/sysctl.d/50-coredump.conf.in +++ b/sysctl.d/50-coredump.conf.in @@ -13,7 +13,7 @@ # the core dump. # # See systemd-coredump(8) and core(5). -kernel.core_pattern=|{{LIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h %d +kernel.core_pattern=|{{LIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h %d %F # Allow 16 coredumps to be dispatched in parallel by the kernel. # We collect metadata from /proc/%P/, and thus need to make sure the crashed From e6a8687b939ab21854f12f59a3cce703e32768cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 27 May 2025 10:44:32 +0200 Subject: [PATCH 6/8] coredump: when %F/pidfd is used, again allow forwarding to containers --- src/coredump/coredump.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index c4a5532cea..80696f3c3b 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -150,6 +150,7 @@ typedef struct Context { uint64_t rlimit; bool is_pid1; bool is_journald; + bool got_pidfd; int mount_tree_fd; /* These point into external memory, are not owned by this object */ @@ -1407,6 +1408,8 @@ static int gather_pid_metadata_from_argv( if (r < 0) return log_error_errno(r, "Failed to initialize pidref from pidfd %d: %m", kernel_fd); + context->got_pidfd = 1; + /* If there are containers involved with different versions of the code they might * not be using pidfds, so it would be wrong to set the metadata, skip it. */ r = pidref_in_same_namespace(/* pid1 = */ NULL, &context->pidref, NAMESPACE_PID); @@ -1627,13 +1630,11 @@ static int can_forward_coredump(Context *context, const PidRef *pid) { assert(pidref_is_set(pid)); assert(!pidref_is_remote(pid)); - /* We don't use %F/pidfd to pin down the crashed process yet. We need to avoid a situation where the - * attacker crashes a SUID process or a root daemon and quickly replaces it with a namespaced process - * and we forward the initial part of the coredump to the attacker, inside the namespace. - * - * TODO: relax this check when %F is implemented and used. - */ - if (context->dumpable != 1) + /* We need to avoid a situation where the attacker crashes a SUID process or a root daemon and + * quickly replaces it with a namespaced process and we forward the coredump to the attacker, into + * the namespace. With %F/pidfd we can reliably check the namespace of the original process, hence we + * can allow forwarding. */ + if (!context->got_pidfd && context->dumpable != 1) return false; r = cg_pidref_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); From 76e0ab49c47965877c19772a2b3bf55f6417ca39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 27 May 2025 20:32:30 +0200 Subject: [PATCH 7/8] coredump: introduce an enum to wrap dumpable constants Two constants are described in the man page, but are not defined by a header. The third constant is described in the kernel docs. Use explicit values to show that those are values are defined externally. --- src/coredump/coredump.c | 10 +++++----- src/shared/coredump-util.h | 7 +++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 80696f3c3b..2507e1b603 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -446,7 +446,7 @@ static int grant_user_access(int core_fd, const Context *context) { /* We allow access if %d/dumpable on the command line was exactly 1, we got all the data, * at_secure is not set, and the uid/gid match euid/egid. */ bool ret = - context->dumpable == 1 && + context->dumpable == SUID_DUMP_USER && at_secure == 0 && uid != UID_INVALID && euid != UID_INVALID && uid == euid && gid != GID_INVALID && egid != GID_INVALID && gid == egid; @@ -1094,13 +1094,13 @@ static int context_parse_iovw(Context *context, struct iovec_wrapper *iovw) { if (r < 0) log_warning_errno(r, "Failed to parse resource limit \"%s\", ignoring: %m", context->meta[META_ARGV_RLIMIT]); - /* The value is set to contents of /proc/sys/fs/suid_dumpable, which we set to 2, + /* The value is set to contents of /proc/sys/fs/suid_dumpable, which we set to SUID_DUMP_SAFE (2), * if the process is marked as not dumpable, see PR_SET_DUMPABLE(2const). */ if (context->meta[META_ARGV_DUMPABLE]) { r = safe_atou(context->meta[META_ARGV_DUMPABLE], &context->dumpable); if (r < 0) return log_error_errno(r, "Failed to parse dumpable field \"%s\": %m", context->meta[META_ARGV_DUMPABLE]); - if (context->dumpable > 2) + if (context->dumpable > SUID_DUMP_SAFE) log_notice("Got unexpected %%d/dumpable value %u.", context->dumpable); } @@ -1634,7 +1634,7 @@ static int can_forward_coredump(Context *context, const PidRef *pid) { * quickly replaces it with a namespaced process and we forward the coredump to the attacker, into * the namespace. With %F/pidfd we can reliably check the namespace of the original process, hence we * can allow forwarding. */ - if (!context->got_pidfd && context->dumpable != 1) + if (!context->got_pidfd && context->dumpable != SUID_DUMP_USER) return false; r = cg_pidref_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); @@ -2024,7 +2024,7 @@ static int run(int argc, char *argv[]) { log_set_target_and_open(LOG_TARGET_KMSG); /* Make sure we never enter a loop */ - (void) prctl(PR_SET_DUMPABLE, 0); + (void) prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE); /* Ignore all parse errors */ (void) parse_config(); diff --git a/src/shared/coredump-util.h b/src/shared/coredump-util.h index a72db4b4e5..f774425b29 100644 --- a/src/shared/coredump-util.h +++ b/src/shared/coredump-util.h @@ -28,6 +28,13 @@ typedef enum CoredumpFilter { /* The kernel doesn't like UINT64_MAX and returns ERANGE, use UINT32_MAX to support future new flags */ #define COREDUMP_FILTER_MASK_ALL UINT32_MAX +typedef enum SuidDumpMode { + SUID_DUMP_DISABLE = 0, /* PR_SET_DUMPABLE(2const) */ + SUID_DUMP_USER = 1, /* PR_SET_DUMPABLE(2const) */ + SUID_DUMP_SAFE = 2, /* https://www.kernel.org/doc/html/latest/admin-guide/sysctl/fs.html#suid-dumpable */ + _SUID_DUMP_MODE_MAX, +} SuidDumpMode; + const char* coredump_filter_to_string(CoredumpFilter i) _const_; CoredumpFilter coredump_filter_from_string(const char *s) _pure_; int coredump_filter_mask_from_string(const char *s, uint64_t *ret); From 9ce8e3e449def92c75ada41b7d10c5bc3946be77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 28 May 2025 18:31:13 +0200 Subject: [PATCH 8/8] Define helper to call PR_SET_DUMPABLE --- src/coredump/coredump.c | 3 +-- src/shared/coredump-util.c | 7 +++++++ src/shared/coredump-util.h | 2 ++ src/shared/elf-util.c | 4 ++-- src/shared/tests.c | 4 +++- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 2507e1b603..55a0b704e3 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -2024,7 +2023,7 @@ static int run(int argc, char *argv[]) { log_set_target_and_open(LOG_TARGET_KMSG); /* Make sure we never enter a loop */ - (void) prctl(PR_SET_DUMPABLE, SUID_DUMP_DISABLE); + (void) set_dumpable(SUID_DUMP_DISABLE); /* Ignore all parse errors */ (void) parse_config(); diff --git a/src/shared/coredump-util.c b/src/shared/coredump-util.c index 9d18cf1301..37dfb2c91a 100644 --- a/src/shared/coredump-util.c +++ b/src/shared/coredump-util.c @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include +#include #include "alloc-util.h" #include "coredump-util.h" +#include "errno-util.h" #include "extract-word.h" #include "fileio.h" #include "log.h" @@ -14,6 +16,11 @@ #include "unaligned.h" #include "virt.h" +int set_dumpable(SuidDumpMode mode) { + /* Cast mode explicitly to long, because prctl wants longs but is varargs. */ + return RET_NERRNO(prctl(PR_SET_DUMPABLE, (long) mode)); +} + static const char *const coredump_filter_table[_COREDUMP_FILTER_MAX] = { [COREDUMP_FILTER_PRIVATE_ANONYMOUS] = "private-anonymous", [COREDUMP_FILTER_SHARED_ANONYMOUS] = "shared-anonymous", diff --git a/src/shared/coredump-util.h b/src/shared/coredump-util.h index f774425b29..76e3715537 100644 --- a/src/shared/coredump-util.h +++ b/src/shared/coredump-util.h @@ -35,6 +35,8 @@ typedef enum SuidDumpMode { _SUID_DUMP_MODE_MAX, } SuidDumpMode; +int set_dumpable(SuidDumpMode mode); + const char* coredump_filter_to_string(CoredumpFilter i) _const_; CoredumpFilter coredump_filter_from_string(const char *s) _pure_; int coredump_filter_mask_from_string(const char *s, uint64_t *ret); diff --git a/src/shared/elf-util.c b/src/shared/elf-util.c index edc2e82d36..fd06b55ab1 100644 --- a/src/shared/elf-util.c +++ b/src/shared/elf-util.c @@ -6,12 +6,12 @@ #include #include #include -#include #include #include #include #include "alloc-util.h" +#include "coredump-util.h" #include "dlfcn-util.h" #include "elf-util.h" #include "errno-util.h" @@ -826,7 +826,7 @@ int parse_elf_object(int fd, const char *executable, const char *root, bool fork if (r == 0) { /* We want to avoid loops, given this can be called from systemd-coredump */ if (fork_disable_dump) { - r = RET_NERRNO(prctl(PR_SET_DUMPABLE, 0)); + r = set_dumpable(SUID_DUMP_DISABLE); if (r < 0) report_errno_and_exit(error_pipe[1], r); } diff --git a/src/shared/tests.c b/src/shared/tests.c index 66e1ae88cd..7284d00e23 100644 --- a/src/shared/tests.c +++ b/src/shared/tests.c @@ -16,6 +16,7 @@ #include "bus-wait-for-jobs.h" #include "cgroup-setup.h" #include "cgroup-util.h" +#include "coredump-util.h" #include "env-file.h" #include "env-util.h" #include "fd-util.h" @@ -433,7 +434,8 @@ int assert_signal_internal(void) { if (r == 0) { /* Speed things up by never even attempting to generate a coredump */ - (void) prctl(PR_SET_DUMPABLE, 0); + (void) set_dumpable(SUID_DUMP_DISABLE); + /* But still set an rlimit just in case */ (void) setrlimit(RLIMIT_CORE, &RLIMIT_MAKE_CONST(0)); return 0;