process-util: port pid_from_same_root_fs() to pidref + more (#35975)

Let's continue our move to PidRef.
This commit is contained in:
Lennart Poettering
2025-01-16 13:48:48 +01:00
committed by GitHub
9 changed files with 150 additions and 84 deletions

View File

@@ -519,12 +519,10 @@ int userns_acquire_empty(void) {
_cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL;
int r;
r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS, &pid);
r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
if (r < 0)
return r;
if (r == 0)
/* Child. We do nothing here, just freeze until somebody kills us. */
freeze();
assert(r > 0);
return pidref_namespace_open_by_type(&pid, NAMESPACE_USER);
}
@@ -541,12 +539,10 @@ int userns_acquire(const char *uid_map, const char *gid_map) {
* and then kills the process again. This way we have a userns fd that is not bound to any
* process. We can use that for file system mounts and similar. */
r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS, &pid);
r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid);
if (r < 0)
return r;
if (r == 0)
/* Child. We do nothing here, just freeze until somebody kills us. */
freeze();
assert(r > 0);
xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid);
r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
@@ -762,12 +758,10 @@ int netns_acquire(void) {
/* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills
* the process again. This way we have a netns fd that is not bound to any process. */
r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS, &pid);
r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid);
if (r < 0)
return log_debug_errno(r, "Failed to fork process into new netns: %m");
if (r == 0)
/* Child. We do nothing here, just freeze until somebody kills us. */
freeze();
assert(r > 0);
return pidref_namespace_open_by_type(&pid, NAMESPACE_NET);
}

View File

@@ -40,6 +40,10 @@ int pidref_acquire_pidfd_id(PidRef *pidref) {
bool pidref_equal(PidRef *a, PidRef *b) {
/* If this is the very same structure, it definitely refers to the same process */
if (a == b)
return true;
if (!pidref_is_set(a))
return !pidref_is_set(b);
@@ -58,9 +62,15 @@ bool pidref_equal(PidRef *a, PidRef *b) {
if (a->fd_id == 0 || b->fd_id == 0)
return true;
} else {
/* If the other side is remote, then this is not the same */
if (pidref_is_remote(b))
return false;
/* PID1 cannot exit, hence it cannot change pidfs ids, hence no point in comparing them, we
* can shortcut things */
if (a->pid == 1)
return true;
/* Try to compare pidfds using their inode numbers. This way we can ensure that we
* don't spuriously consider two PidRefs equal if the pid has been reused once. Note
* that we ignore all errors here, not only EOPNOTSUPP, as fstat() might fail due to

View File

@@ -1228,18 +1228,53 @@ int pidref_is_alive(const PidRef *pidref) {
return result;
}
int pid_from_same_root_fs(pid_t pid) {
const char *root;
int pidref_from_same_root_fs(PidRef *a, PidRef *b) {
_cleanup_(pidref_done) PidRef self = PIDREF_NULL;
int r;
if (pid < 0)
/* Checks if the two specified processes have the same root fs. Either can be specified as NULL in
* which case we'll check against ourselves. */
if (!a || !b) {
r = pidref_set_self(&self);
if (r < 0)
return r;
if (!a)
a = &self;
if (!b)
b = &self;
}
if (!pidref_is_set(a) || !pidref_is_set(b))
return -ESRCH;
/* If one of the two processes have the same root they cannot have the same root fs, but if both of
* them do we don't know */
if (pidref_is_remote(a) && pidref_is_remote(b))
return -EREMOTE;
if (pidref_is_remote(a) || pidref_is_remote(b))
return false;
if (pid == 0 || pid == getpid_cached())
if (pidref_equal(a, b))
return true;
root = procfs_file_alloca(pid, "root");
const char *roota = procfs_file_alloca(a->pid, "root");
const char *rootb = procfs_file_alloca(b->pid, "root");
return inode_same(root, "/proc/1/root", 0);
int result = inode_same(roota, rootb, 0);
if (result == -ENOENT)
return proc_mounted() == 0 ? -ENOSYS : -ESRCH;
if (result < 0)
return result;
r = pidref_verify(a);
if (r < 0)
return r;
r = pidref_verify(b);
if (r < 0)
return r;
return result;
}
bool is_main_thread(void) {
@@ -1763,6 +1798,9 @@ int safe_fork_full(
}
}
if (FLAGS_SET(flags, FORK_FREEZE))
freeze();
if (ret_pid)
*ret_pid = getpid_cached();
@@ -1959,7 +1997,8 @@ _noreturn_ void freeze(void) {
break;
}
/* waitid() failed with an unexpected error, things are really borked. Freeze now! */
/* waitid() failed with an ECHLD error (because there are no left-over child processes) or any other
* (unexpected) error. Freeze for good now! */
for (;;)
pause();
}

View File

@@ -93,7 +93,7 @@ int pid_is_unwaited(pid_t pid);
int pidref_is_unwaited(const PidRef *pidref);
int pid_is_my_child(pid_t pid);
int pidref_is_my_child(const PidRef *pidref);
int pid_from_same_root_fs(pid_t pid);
int pidref_from_same_root_fs(PidRef *a, PidRef *b);
bool is_main_thread(void);
@@ -192,6 +192,7 @@ typedef enum ForkFlags {
FORK_NEW_USERNS = 1 << 19, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
FORK_NEW_NETNS = 1 << 20, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
FORK_NEW_PIDNS = 1 << 21, /* Run child in its own PID namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
FORK_FREEZE = 1 << 22, /* Don't return in child, just call freeze() instead */
} ForkFlags;
int safe_fork_full(

View File

@@ -799,19 +799,16 @@ int running_in_chroot(void) {
if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
return 0;
r = inode_same("/proc/1/root", "/", 0);
if (r == -ENOENT) {
r = proc_mounted();
if (r == 0) {
if (getpid_cached() == 1)
return false; /* We will mount /proc, assuming we're not in a chroot. */
r = pidref_from_same_root_fs(&PIDREF_MAKE_FROM_PID(1), NULL);
if (r == -ENOSYS) {
if (getpid_cached() == 1)
return false; /* We will mount /proc, assuming we're not in a chroot. */
log_debug("/proc is not mounted, assuming we're in a chroot.");
return true;
}
if (r > 0) /* If we have fake /proc/, we can't do the check properly. */
return -ENOSYS;
log_debug("/proc/ is not mounted, assuming we're in a chroot.");
return true;
}
if (r == -ESRCH) /* We must have a fake /proc/, we can't do the check properly. */
return -ENOSYS;
if (r < 0)
return r;

View File

@@ -758,23 +758,16 @@ static int compose_open_fds(pid_t pid, char **ret) {
* Returns a negative number on errors.
*/
static int get_process_container_parent_cmdline(PidRef *pid, char** ret_cmdline) {
const char *proc_root_path;
struct stat root_stat, proc_root_stat;
int r;
assert(pidref_is_set(pid));
assert(!pidref_is_remote(pid));
/* To compare inodes of / and /proc/[pid]/root */
if (stat("/", &root_stat) < 0)
return -errno;
proc_root_path = procfs_file_alloca(pid->pid, "root");
if (stat(proc_root_path, &proc_root_stat) < 0)
return -errno;
/* The process uses system root. */
if (stat_inode_same(&proc_root_stat, &root_stat)) {
r = pidref_from_same_root_fs(pid, &PIDREF_MAKE_FROM_PID(1));
if (r < 0)
return r;
if (r > 0) {
/* The process uses system root. */
*ret_cmdline = NULL;
return 0;
}

View File

@@ -23,23 +23,27 @@
#include "string-util.h"
#include "terminal-util.h"
static bool argv_has_at(pid_t pid) {
_cleanup_fclose_ FILE *f = NULL;
const char *p;
char c = 0;
static int argv_has_at(const PidRef *pid) {
int r;
p = procfs_file_alloca(pid, "cmdline");
f = fopen(p, "re");
if (!f) {
log_debug_errno(errno, "Failed to open %s, ignoring: %m", p);
return true; /* not really, but has the desired effect */
}
assert(pidref_is_set(pid));
assert(!pidref_is_remote(pid));
const char *p = procfs_file_alloca(pid->pid, "cmdline");
_cleanup_fclose_ FILE *f = fopen(p, "re");
if (!f)
return log_debug_errno(errno, "Failed to open %s, ignoring: %m", p);
/* Try to read the first character of the command line. If the cmdline is empty (which might be the case for
* kernel threads but potentially also other stuff), this line won't do anything, but we don't care much, as
* actual kernel threads are already filtered out above. */
char c = 0;
(void) fread(&c, 1, 1, f);
r = pidref_verify(pid);
if (r < 0)
return log_debug_errno(r, "Failed to verify pid " PID_FMT ", ignoring: %m", pid->pid);
/* Processes with argv[0][0] = '@' we ignore from the killing spree.
*
* https://systemd.io/ROOT_STORAGE_DAEMONS */
@@ -74,9 +78,8 @@ static bool is_in_survivor_cgroup(const PidRef *pid) {
return r > 0;
}
static bool ignore_proc(const PidRef *pid, bool warn_rootfs) {
static bool ignore_proc(PidRef *pid, bool warn_rootfs) {
uid_t uid;
int r;
assert(pidref_is_set(pid));
@@ -85,28 +88,25 @@ static bool ignore_proc(const PidRef *pid, bool warn_rootfs) {
return true;
/* Ignore kernel threads */
r = pidref_is_kernel_thread(pid);
if (r != 0)
if (pidref_is_kernel_thread(pid) != 0)
return true; /* also ignore processes where we can't determine this */
/* Ignore processes that are part of a cgroup marked with the user.survive_final_kill_signal xattr */
if (is_in_survivor_cgroup(pid))
return true;
r = pidref_get_uid(pid, &uid);
if (r < 0)
if (pidref_get_uid(pid, &uid) < 0)
return true; /* not really, but better safe than sorry */
/* Non-root processes otherwise are always subject to be killed */
if (uid != 0)
return false;
if (!argv_has_at(pid->pid))
return false;
if (argv_has_at(pid) == 0)
return false; /* if this fails, ignore the process */
if (warn_rootfs &&
pid_from_same_root_fs(pid->pid) > 0) {
pidref_from_same_root_fs(pid, NULL) > 0) {
_cleanup_free_ char *comm = NULL;
(void) pidref_get_comm(pid, &comm);

View File

@@ -159,12 +159,8 @@ TEST(pidref_new_from_pid) {
TEST(pidref_kill) {
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
siginfo_t si;
int r;
r = pidref_safe_fork("(test-pidref-kill)", FORK_DEATHSIG_SIGKILL, &pidref);
assert_se(r >= 0);
if (r == 0)
freeze();
ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-kill)", FORK_DEATHSIG_SIGKILL|FORK_FREEZE, &pidref));
assert_se(pidref_kill(&pidref, SIGKILL) >= 0);
assert_se(pidref_wait_for_terminate(&pidref, &si) >= 0);
@@ -174,12 +170,8 @@ TEST(pidref_kill) {
TEST(pidref_kill_and_sigcont) {
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
siginfo_t si;
int r;
r = pidref_safe_fork("(test-pidref-kill-and-sigcont)", FORK_DEATHSIG_SIGTERM, &pidref);
assert_se(r >= 0);
if (r == 0)
freeze();
ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-kill-and-sigcont)", FORK_DEATHSIG_SIGTERM|FORK_FREEZE, &pidref));
assert_se(pidref_kill_and_sigcont(&pidref, SIGTERM) >= 0);
assert_se(pidref_wait_for_terminate(&pidref, &si) >= 0);
@@ -189,12 +181,8 @@ TEST(pidref_kill_and_sigcont) {
TEST(pidref_sigqueue) {
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
siginfo_t si;
int r;
r = pidref_safe_fork("(test-pidref-sigqueue)", FORK_DEATHSIG_SIGTERM, &pidref);
assert_se(r >= 0);
if (r == 0)
freeze();
ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-sigqueue)", FORK_DEATHSIG_SIGTERM|FORK_FREEZE, &pidref));
assert_se(pidref_sigqueue(&pidref, SIGTERM, 42) >= 0);
assert_se(pidref_wait_for_terminate(&pidref, &si) >= 0);
@@ -203,12 +191,8 @@ TEST(pidref_sigqueue) {
TEST(pidref_done_sigkill_wait) {
_cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL;
int r;
r = pidref_safe_fork("(test-pidref-done-sigkill-wait)", FORK_DEATHSIG_SIGKILL, &pidref);
assert_se(r >= 0);
if (r == 0)
freeze();
ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-done-sigkill-wait)", FORK_DEATHSIG_SIGKILL|FORK_FREEZE, &pidref));
}
TEST(pidref_verify) {

View File

@@ -3,6 +3,7 @@
#include <fcntl.h>
#include <linux/oom.h>
#include <pthread.h>
#include <sys/eventfd.h>
#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
@@ -1007,7 +1008,7 @@ TEST(pid_get_start_time) {
_cleanup_(pidref_done_sigkill_wait) PidRef child = PIDREF_NULL;
ASSERT_OK(pidref_safe_fork("(stub)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG, &child));
ASSERT_OK_POSITIVE(pidref_safe_fork("(stub)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG|FORK_FREEZE, &child));
usec_t start_time2;
ASSERT_OK(pidref_get_start_time(&child, &start_time2));
@@ -1017,6 +1018,53 @@ TEST(pid_get_start_time) {
ASSERT_GE(start_time2, start_time);
}
TEST(pidref_from_same_root_fs) {
int r;
_cleanup_(pidref_done) PidRef pid1 = PIDREF_NULL, self = PIDREF_NULL;
ASSERT_OK(pidref_set_self(&self));
ASSERT_OK(pidref_set_pid(&pid1, 1));
ASSERT_OK_POSITIVE(pidref_from_same_root_fs(&self, &self));
ASSERT_OK_POSITIVE(pidref_from_same_root_fs(&pid1, &pid1));
r = pidref_from_same_root_fs(&pid1, &self);
if (ERRNO_IS_NEG_PRIVILEGE(r))
return (void) log_tests_skipped("skipping pidref_from_same_root_fs() test, lacking privileged.");
ASSERT_OK(r);
log_info("PID1 and us have the same rootfs: %s", yes_no(r));
int q = pidref_from_same_root_fs(&self, &pid1);
ASSERT_OK(q);
ASSERT_EQ(r, q);
_cleanup_(pidref_done_sigkill_wait) PidRef child1 = PIDREF_NULL;
ASSERT_OK(pidref_safe_fork("(child1)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG|FORK_FREEZE, &child1));
ASSERT_OK_POSITIVE(pidref_from_same_root_fs(&self, &child1));
_cleanup_close_ int efd = eventfd(0, EFD_CLOEXEC);
ASSERT_OK_ERRNO(efd);
_cleanup_(pidref_done_sigkill_wait) PidRef child2 = PIDREF_NULL;
r = pidref_safe_fork("(child2)", FORK_RESET_SIGNALS|FORK_REOPEN_LOG, &child2);
ASSERT_OK(r);
if (r == 0) {
ASSERT_OK_ERRNO(chroot("/usr"));
uint64_t u = 1;
ASSERT_OK_EQ_ERRNO(write(efd, &u, sizeof(u)), (ssize_t) sizeof(u));
freeze();
}
uint64_t u;
ASSERT_OK_EQ_ERRNO(read(efd, &u, sizeof(u)), (ssize_t) sizeof(u));
ASSERT_OK_ZERO(pidref_from_same_root_fs(&self, &child2));
ASSERT_OK_ZERO(pidref_from_same_root_fs(&child2, &self));
}
static int intro(void) {
log_show_color(true);
return EXIT_SUCCESS;