mirror of
https://github.com/morgan9e/systemd
synced 2026-04-14 00:14:32 +09:00
tree-wide: drop support for kernels without pidfd_open() and pidfd_send_signal() (#35971)
This commit is contained in:
@@ -137,12 +137,7 @@ static inline int missing_name_to_handle_at(int fd, const char *name, struct fil
|
||||
|
||||
#if !HAVE_SETNS
|
||||
static inline int missing_setns(int fd, int nstype) {
|
||||
# ifdef __NR_setns
|
||||
return syscall(__NR_setns, fd, nstype);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
}
|
||||
|
||||
# define setns missing_setns
|
||||
@@ -162,12 +157,7 @@ static inline pid_t raw_getpid(void) {
|
||||
|
||||
#if !HAVE_RENAMEAT2
|
||||
static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) {
|
||||
# ifdef __NR_renameat2
|
||||
return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
}
|
||||
|
||||
# define renameat2 missing_renameat2
|
||||
@@ -177,12 +167,7 @@ static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, c
|
||||
|
||||
#if !HAVE_KCMP
|
||||
static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) {
|
||||
# if defined __NR_kcmp && __NR_kcmp >= 0
|
||||
return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
}
|
||||
|
||||
# define kcmp missing_kcmp
|
||||
@@ -192,34 +177,19 @@ static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long i
|
||||
|
||||
#if !HAVE_KEYCTL
|
||||
static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) {
|
||||
# if defined __NR_keyctl && __NR_keyctl >= 0
|
||||
return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
|
||||
# define keyctl missing_keyctl
|
||||
}
|
||||
|
||||
static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) {
|
||||
# if defined __NR_add_key && __NR_add_key >= 0
|
||||
return syscall(__NR_add_key, type, description, payload, plen, ringid);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
|
||||
# define add_key missing_add_key
|
||||
}
|
||||
|
||||
static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) {
|
||||
# if defined __NR_request_key && __NR_request_key >= 0
|
||||
return syscall(__NR_request_key, type, description, callout_info, destringid);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
|
||||
# define request_key missing_request_key
|
||||
}
|
||||
@@ -329,12 +299,7 @@ static inline long missing_get_mempolicy(int *mode, unsigned long *nodemask,
|
||||
|
||||
#if !HAVE_PIDFD_SEND_SIGNAL
|
||||
static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, unsigned flags) {
|
||||
# ifdef __NR_pidfd_send_signal
|
||||
return syscall(__NR_pidfd_send_signal, fd, sig, info, flags);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
}
|
||||
|
||||
# define pidfd_send_signal missing_pidfd_send_signal
|
||||
@@ -342,12 +307,7 @@ static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, un
|
||||
|
||||
#if !HAVE_PIDFD_OPEN
|
||||
static inline int missing_pidfd_open(pid_t pid, unsigned flags) {
|
||||
# ifdef __NR_pidfd_open
|
||||
return syscall(__NR_pidfd_open, pid, flags);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
}
|
||||
|
||||
# define pidfd_open missing_pidfd_open
|
||||
@@ -661,12 +621,7 @@ static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) {
|
||||
#if !HAVE_SCHED_SETATTR
|
||||
|
||||
static inline ssize_t missing_sched_setattr(pid_t pid, struct sched_attr *attr, unsigned int flags) {
|
||||
# if defined __NR_sched_setattr
|
||||
return syscall(__NR_sched_setattr, pid, attr, flags);
|
||||
# else
|
||||
errno = ENOSYS;
|
||||
return -1;
|
||||
# endif
|
||||
}
|
||||
|
||||
# define sched_setattr missing_sched_setattr
|
||||
|
||||
@@ -24,12 +24,8 @@ static int pidfd_check_pidfs(void) {
|
||||
return have_pidfs;
|
||||
|
||||
_cleanup_close_ int fd = pidfd_open(getpid_cached(), 0);
|
||||
if (fd < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
return (have_pidfs = false);
|
||||
|
||||
if (fd < 0)
|
||||
return -errno;
|
||||
}
|
||||
|
||||
return (have_pidfs = fd_is_fs_type(fd, PID_FS_MAGIC));
|
||||
}
|
||||
|
||||
@@ -84,8 +84,8 @@ int pidref_set_pid(PidRef *pidref, pid_t pid) {
|
||||
|
||||
fd = pidfd_open(pid, 0);
|
||||
if (fd < 0) {
|
||||
/* Graceful fallback in case the kernel doesn't support pidfds or is out of fds */
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno) && !ERRNO_IS_RESOURCE(errno))
|
||||
/* Graceful fallback in case the kernel is out of fds */
|
||||
if (!ERRNO_IS_RESOURCE(errno))
|
||||
return log_debug_errno(errno, "Failed to open pidfd for pid " PID_FMT ": %m", pid);
|
||||
|
||||
fd = -EBADF;
|
||||
|
||||
@@ -2042,7 +2042,7 @@ int posix_spawn_wrapper(
|
||||
* issues.
|
||||
*
|
||||
* Also, move the newly-created process into 'cgroup' through POSIX_SPAWN_SETCGROUP (clone3())
|
||||
* if available. Note that CLONE_INTO_CGROUP is only supported on cgroup v2.
|
||||
* if available.
|
||||
* returns 1: We're already in the right cgroup
|
||||
* 0: 'cgroup' not specified or POSIX_SPAWN_SETCGROUP is not supported. The caller
|
||||
* needs to call 'cg_attach' on their own */
|
||||
@@ -2061,14 +2061,10 @@ int posix_spawn_wrapper(
|
||||
_unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr;
|
||||
|
||||
#if HAVE_PIDFD_SPAWN
|
||||
static enum {
|
||||
CLONE_ONLY_PID,
|
||||
CLONE_CAN_PIDFD, /* 5.2 */
|
||||
CLONE_CAN_CGROUP, /* 5.7 */
|
||||
} clone_support = CLONE_CAN_CGROUP;
|
||||
static bool have_clone_into_cgroup = true; /* kernel 5.7+ */
|
||||
_cleanup_close_ int cgroup_fd = -EBADF;
|
||||
|
||||
if (cgroup && clone_support >= CLONE_CAN_CGROUP) {
|
||||
if (cgroup && have_clone_into_cgroup) {
|
||||
_cleanup_free_ char *resolved_cgroup = NULL;
|
||||
|
||||
r = cg_get_path_and_check(
|
||||
@@ -2099,47 +2095,41 @@ int posix_spawn_wrapper(
|
||||
return -r;
|
||||
|
||||
#if HAVE_PIDFD_SPAWN
|
||||
if (clone_support >= CLONE_CAN_PIDFD) {
|
||||
_cleanup_close_ int pidfd = -EBADF;
|
||||
_cleanup_close_ int pidfd = -EBADF;
|
||||
|
||||
r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
|
||||
if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) &&
|
||||
cg_is_threaded(cgroup) > 0) /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode. */
|
||||
return -EUCLEAN;
|
||||
if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
|
||||
FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
|
||||
/* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
|
||||
* need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
|
||||
* Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
|
||||
* but not CLONE_INTO_CGROUP. */
|
||||
r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
|
||||
if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0)
|
||||
return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode,
|
||||
turn that into something recognizable */
|
||||
if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
|
||||
FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
|
||||
/* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
|
||||
* need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
|
||||
* Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
|
||||
* but not CLONE_INTO_CGROUP. */
|
||||
|
||||
/* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
|
||||
* retry every time. */
|
||||
assert(clone_support >= CLONE_CAN_CGROUP);
|
||||
clone_support = CLONE_CAN_PIDFD;
|
||||
/* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
|
||||
* retry every time. */
|
||||
have_clone_into_cgroup = false;
|
||||
|
||||
flags &= ~POSIX_SPAWN_SETCGROUP;
|
||||
r = posix_spawnattr_setflags(&attr, flags);
|
||||
if (r != 0)
|
||||
return -r;
|
||||
|
||||
r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
|
||||
}
|
||||
if (r == 0) {
|
||||
r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
|
||||
}
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
|
||||
flags &= ~POSIX_SPAWN_SETCGROUP;
|
||||
r = posix_spawnattr_setflags(&attr, flags);
|
||||
if (r != 0)
|
||||
return -r;
|
||||
|
||||
clone_support = CLONE_ONLY_PID; /* No CLONE_PIDFD either? */
|
||||
r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
|
||||
}
|
||||
#endif
|
||||
if (r != 0)
|
||||
return -r;
|
||||
|
||||
r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
|
||||
#else
|
||||
pid_t pid;
|
||||
|
||||
r = posix_spawn(&pid, path, NULL, &attr, argv, envp);
|
||||
if (r != 0)
|
||||
return -r;
|
||||
@@ -2149,6 +2139,7 @@ int posix_spawn_wrapper(
|
||||
return r;
|
||||
|
||||
return 0; /* We did not use CLONE_INTO_CGROUP so return 0, the caller will have to move the child */
|
||||
#endif
|
||||
}
|
||||
|
||||
int proc_dir_open(DIR **ret) {
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "missing_magic.h"
|
||||
#include "missing_syscall.h"
|
||||
#include "missing_threads.h"
|
||||
#include "missing_wait.h"
|
||||
#include "origin-id.h"
|
||||
#include "path-util.h"
|
||||
#include "prioq.h"
|
||||
@@ -1074,6 +1075,8 @@ static void source_disconnect(sd_event_source *s) {
|
||||
}
|
||||
|
||||
static sd_event_source* source_free(sd_event_source *s) {
|
||||
int r;
|
||||
|
||||
assert(s);
|
||||
|
||||
source_disconnect(s);
|
||||
@@ -1087,31 +1090,23 @@ static sd_event_source* source_free(sd_event_source *s) {
|
||||
if (s->child.process_owned) {
|
||||
|
||||
if (!s->child.exited) {
|
||||
bool sent = false;
|
||||
|
||||
if (s->child.pidfd >= 0) {
|
||||
if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
|
||||
if (errno == ESRCH) /* Already dead */
|
||||
sent = true;
|
||||
else if (!ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
|
||||
s->child.pid);
|
||||
} else
|
||||
sent = true;
|
||||
}
|
||||
|
||||
if (!sent)
|
||||
if (kill(s->child.pid, SIGKILL) < 0)
|
||||
if (errno != ESRCH) /* Already dead */
|
||||
log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
|
||||
s->child.pid);
|
||||
if (s->child.pidfd >= 0)
|
||||
r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0));
|
||||
else
|
||||
r = RET_NERRNO(kill(s->child.pid, SIGKILL));
|
||||
if (r < 0 && r != -ESRCH)
|
||||
log_debug_errno(r, "Failed to kill process " PID_FMT ", ignoring: %m",
|
||||
s->child.pid);
|
||||
}
|
||||
|
||||
if (!s->child.waited) {
|
||||
siginfo_t si = {};
|
||||
|
||||
/* Reap the child if we can */
|
||||
(void) waitid(P_PID, s->child.pid, &si, WEXITED);
|
||||
if (s->child.pidfd >= 0)
|
||||
(void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED);
|
||||
else
|
||||
(void) waitid(P_PID, s->child.pid, &si, WEXITED);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1578,11 +1573,6 @@ static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *us
|
||||
return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
|
||||
}
|
||||
|
||||
static bool shall_use_pidfd(void) {
|
||||
/* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
|
||||
return secure_getenv_bool("SYSTEMD_PIDFD") != 0;
|
||||
}
|
||||
|
||||
_public_ int sd_event_add_child(
|
||||
sd_event *e,
|
||||
sd_event_source **ret,
|
||||
@@ -1630,34 +1620,29 @@ _public_ int sd_event_add_child(
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
|
||||
/* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
|
||||
* pin the PID, and make regular waitid() handling race-free. */
|
||||
|
||||
s->child.pidfd = pidfd_open(pid, 0);
|
||||
if (s->child.pidfd < 0)
|
||||
return -errno;
|
||||
|
||||
s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
|
||||
|
||||
s->wakeup = WAKEUP_EVENT_SOURCE;
|
||||
s->child.options = options;
|
||||
s->child.callback = callback;
|
||||
s->userdata = userdata;
|
||||
s->enabled = SD_EVENT_ONESHOT;
|
||||
|
||||
/* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
|
||||
* pin the PID, and make regular waitid() handling race-free. */
|
||||
|
||||
if (shall_use_pidfd()) {
|
||||
s->child.pidfd = pidfd_open(pid, 0);
|
||||
if (s->child.pidfd < 0) {
|
||||
/* Propagate errors unless the syscall is not supported or blocked */
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
|
||||
return -errno;
|
||||
} else
|
||||
s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
|
||||
} else
|
||||
s->child.pidfd = -EBADF;
|
||||
|
||||
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
|
||||
/* We have a pidfd and we only want to watch for exit */
|
||||
/* We only want to watch for exit */
|
||||
r = source_child_pidfd_register(s, s->enabled);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
} else {
|
||||
/* We have no pidfd or we shall wait for some other event than WEXITED */
|
||||
/* We shall wait for some other event than WEXITED */
|
||||
r = event_make_signal_data(e, SIGCHLD, NULL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@@ -1727,17 +1712,12 @@ _public_ int sd_event_add_child_pidfd(
|
||||
|
||||
s->wakeup = WAKEUP_EVENT_SOURCE;
|
||||
s->child.pidfd = pidfd;
|
||||
s->child.pid = pid;
|
||||
s->child.options = options;
|
||||
s->child.callback = callback;
|
||||
s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
|
||||
s->userdata = userdata;
|
||||
s->enabled = SD_EVENT_ONESHOT;
|
||||
|
||||
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
|
||||
/* We only want to watch for WEXITED */
|
||||
r = source_child_pidfd_register(s, s->enabled);
|
||||
@@ -1752,6 +1732,11 @@ _public_ int sd_event_add_child_pidfd(
|
||||
e->need_process_child = true;
|
||||
}
|
||||
|
||||
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
s->child.pid = pid;
|
||||
e->n_online_child_sources++;
|
||||
|
||||
if (ret)
|
||||
@@ -3239,12 +3224,10 @@ _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, cons
|
||||
if (si)
|
||||
copy = *si;
|
||||
|
||||
if (pidfd_send_signal(s->child.pidfd, sig, si ? © : NULL, 0) < 0) {
|
||||
/* Let's propagate the error only if the system call is not implemented or prohibited */
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
|
||||
return -errno;
|
||||
} else
|
||||
return 0;
|
||||
if (pidfd_send_signal(s->child.pidfd, sig, si ? © : NULL, 0) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
|
||||
|
||||
@@ -198,7 +198,7 @@ static int post_handler(sd_event_source *s, void *userdata) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
static void test_basic_one(bool with_pidfd) {
|
||||
TEST(basic) {
|
||||
sd_event *e = NULL;
|
||||
sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL;
|
||||
static const char ch = 'x';
|
||||
@@ -207,10 +207,6 @@ static void test_basic_one(bool with_pidfd) {
|
||||
uint64_t event_now;
|
||||
int64_t priority;
|
||||
|
||||
log_info("/* %s(pidfd=%s) */", __func__, yes_no(with_pidfd));
|
||||
|
||||
assert_se(setenv("SYSTEMD_PIDFD", yes_no(with_pidfd), 1) >= 0);
|
||||
|
||||
assert_se(pipe(a) >= 0);
|
||||
assert_se(pipe(b) >= 0);
|
||||
assert_se(pipe(d) >= 0);
|
||||
@@ -301,13 +297,6 @@ static void test_basic_one(bool with_pidfd) {
|
||||
safe_close_pair(b);
|
||||
safe_close_pair(d);
|
||||
safe_close_pair(k);
|
||||
|
||||
assert_se(unsetenv("SYSTEMD_PIDFD") >= 0);
|
||||
}
|
||||
|
||||
TEST(basic) {
|
||||
test_basic_one(true); /* test with pidfd */
|
||||
test_basic_one(false); /* test without pidfd */
|
||||
}
|
||||
|
||||
TEST(sd_event_now) {
|
||||
@@ -583,13 +572,7 @@ TEST(pidfd) {
|
||||
|
||||
assert_se(pid > 1);
|
||||
|
||||
pidfd = pidfd_open(pid, 0);
|
||||
if (pidfd < 0) {
|
||||
/* No pidfd_open() supported or blocked? */
|
||||
assert_se(ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno));
|
||||
(void) wait_for_terminate(pid, NULL);
|
||||
return;
|
||||
}
|
||||
ASSERT_OK(pidfd = pidfd_open(pid, 0));
|
||||
|
||||
pid2 = fork();
|
||||
if (pid2 == 0)
|
||||
|
||||
@@ -879,7 +879,7 @@ static int create_session_message(
|
||||
|
||||
if (!avoid_pidfd) {
|
||||
pidfd = pidfd_open(getpid_cached(), 0);
|
||||
if (pidfd < 0 && !ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
|
||||
if (pidfd < 0)
|
||||
return -errno;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user