mirror of
https://github.com/morgan9e/systemd
synced 2026-04-15 00:47:10 +09:00
324 lines
12 KiB
C
324 lines
12 KiB
C
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
|
/***
|
|
Copyright © 2010 ProFUSION embedded systems
|
|
***/
|
|
|
|
#include <errno.h>
|
|
#include <signal.h>
|
|
#include <sys/wait.h>
|
|
#include <unistd.h>
|
|
|
|
#include "alloc-util.h"
|
|
#include "constants.h"
|
|
#include "dirent-util.h"
|
|
#include "errno-util.h"
|
|
#include "fd-util.h"
|
|
#include "format-util.h"
|
|
#include "initrd-util.h"
|
|
#include "killall.h"
|
|
#include "parse-util.h"
|
|
#include "process-util.h"
|
|
#include "set.h"
|
|
#include "stdio-util.h"
|
|
#include "string-util.h"
|
|
#include "terminal-util.h"
|
|
|
|
static int argv_has_at(const PidRef *pid) {
|
|
int r;
|
|
|
|
assert(pidref_is_set(pid));
|
|
assert(!pidref_is_remote(pid));
|
|
|
|
const char *p = procfs_file_alloca(pid->pid, "cmdline");
|
|
_cleanup_fclose_ FILE *f = fopen(p, "re");
|
|
if (!f)
|
|
return log_debug_errno(errno, "Failed to open %s, ignoring: %m", p);
|
|
|
|
/* Try to read the first character of the command line. If the cmdline is empty (which might be the case for
|
|
* kernel threads but potentially also other stuff), this line won't do anything, but we don't care much, as
|
|
* actual kernel threads are already filtered out above. */
|
|
char c = 0;
|
|
(void) fread(&c, 1, 1, f);
|
|
|
|
r = pidref_verify(pid);
|
|
if (r < 0)
|
|
return log_debug_errno(r, "Failed to verify pid " PID_FMT ", ignoring: %m", pid->pid);
|
|
|
|
/* Processes with argv[0][0] = '@' we ignore from the killing spree.
|
|
*
|
|
* https://systemd.io/ROOT_STORAGE_DAEMONS */
|
|
return c == '@';
|
|
}
|
|
|
|
static bool is_in_survivor_cgroup(const PidRef *pid) {
|
|
_cleanup_free_ char *cgroup_path = NULL;
|
|
int r;
|
|
|
|
assert(pidref_is_set(pid));
|
|
|
|
r = cg_pidref_get_path(/* root= */ NULL, pid, &cgroup_path);
|
|
if (r == -EUNATCH) {
|
|
log_warning_errno(r, "Process " PID_FMT " appears to originate in foreign namespace, ignoring.", pid->pid);
|
|
return true;
|
|
}
|
|
if (r < 0) {
|
|
log_warning_errno(r, "Failed to get cgroup path of process " PID_FMT ", ignoring: %m", pid->pid);
|
|
return false;
|
|
}
|
|
|
|
r = cg_get_xattr_bool(cgroup_path, "user.survive_final_kill_signal");
|
|
/* user xattr support was added to kernel v5.7, try with the trusted namespace as a fallback */
|
|
if (ERRNO_IS_NEG_XATTR_ABSENT(r))
|
|
r = cg_get_xattr_bool(cgroup_path, "trusted.survive_final_kill_signal");
|
|
if (r < 0 && !ERRNO_IS_NEG_XATTR_ABSENT(r))
|
|
log_debug_errno(r,
|
|
"Failed to get survive_final_kill_signal xattr of %s, ignoring: %m",
|
|
cgroup_path);
|
|
|
|
return r > 0;
|
|
}
|
|
|
|
static bool ignore_proc(PidRef *pid, bool warn_rootfs) {
|
|
uid_t uid;
|
|
|
|
assert(pidref_is_set(pid));
|
|
|
|
/* We are PID 1, let's not commit suicide */
|
|
if (pid->pid == 1)
|
|
return true;
|
|
|
|
/* Ignore kernel threads */
|
|
if (pidref_is_kernel_thread(pid) != 0)
|
|
return true; /* also ignore processes where we can't determine this */
|
|
|
|
/* Ignore processes that are part of a cgroup marked with the user.survive_final_kill_signal xattr */
|
|
if (is_in_survivor_cgroup(pid))
|
|
return true;
|
|
|
|
if (pidref_get_uid(pid, &uid) < 0)
|
|
return true; /* not really, but better safe than sorry */
|
|
|
|
/* Non-root processes otherwise are always subject to be killed */
|
|
if (uid != 0)
|
|
return false;
|
|
|
|
if (argv_has_at(pid) == 0)
|
|
return false; /* if this fails, ignore the process */
|
|
|
|
if (warn_rootfs &&
|
|
pidref_from_same_root_fs(pid, NULL) > 0) {
|
|
_cleanup_free_ char *comm = NULL;
|
|
|
|
(void) pidref_get_comm(pid, &comm);
|
|
|
|
log_notice("Process " PID_FMT " (%s) has been marked to be excluded from killing. It is "
|
|
"running from the root file system, and thus likely to block re-mounting of the "
|
|
"root file system to read-only. Please consider moving it into an initrd file "
|
|
"system instead.", pid->pid, strna(comm));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void log_children_not_yet_killed(Set *pids) {
|
|
_cleanup_free_ char *lst_child = NULL;
|
|
int r;
|
|
|
|
void *p;
|
|
SET_FOREACH(p, pids) {
|
|
_cleanup_free_ char *s = NULL;
|
|
|
|
if (pid_get_comm(PTR_TO_PID(p), &s) >= 0)
|
|
r = strextendf_with_separator(&lst_child, ", ", PID_FMT " (%s)", PTR_TO_PID(p), s);
|
|
else
|
|
r = strextendf_with_separator(&lst_child, ", ", PID_FMT, PTR_TO_PID(p));
|
|
if (r < 0)
|
|
return (void) log_oom_warning();
|
|
}
|
|
|
|
if (isempty(lst_child))
|
|
return;
|
|
|
|
log_warning("Waiting for process: %s", lst_child);
|
|
}
|
|
|
|
static int wait_for_children(Set *pids, sigset_t *mask, usec_t timeout) {
|
|
usec_t until, date_log_child, n;
|
|
|
|
assert(mask);
|
|
|
|
/* Return the number of children remaining in the pids set: That correspond to the number
|
|
* of processes still "alive" after the timeout */
|
|
|
|
if (set_isempty(pids))
|
|
return 0;
|
|
|
|
n = now(CLOCK_MONOTONIC);
|
|
until = usec_add(n, timeout);
|
|
date_log_child = usec_add(n, 10u * USEC_PER_SEC);
|
|
if (date_log_child > until)
|
|
date_log_child = usec_add(n, timeout / 2u);
|
|
|
|
for (;;) {
|
|
struct timespec ts;
|
|
int k;
|
|
void *p;
|
|
|
|
/* First, let the kernel inform us about killed
|
|
* children. Most processes will probably be our
|
|
* children, but some are not (might be our
|
|
* grandchildren instead...). */
|
|
for (;;) {
|
|
pid_t pid;
|
|
|
|
pid = waitpid(-1, NULL, WNOHANG);
|
|
if (pid == 0)
|
|
break;
|
|
if (pid < 0) {
|
|
if (errno == ECHILD)
|
|
break;
|
|
|
|
return log_error_errno(errno, "waitpid() failed: %m");
|
|
}
|
|
|
|
(void) set_remove(pids, PID_TO_PTR(pid));
|
|
}
|
|
|
|
/* Now explicitly check who might be remaining, who
|
|
* might not be our child. */
|
|
SET_FOREACH(p, pids) {
|
|
|
|
/* kill(pid, 0) sends no signal, but it tells
|
|
* us whether the process still exists. */
|
|
if (kill(PTR_TO_PID(p), 0) == 0)
|
|
continue;
|
|
|
|
if (errno != ESRCH)
|
|
continue;
|
|
|
|
set_remove(pids, p);
|
|
}
|
|
|
|
if (set_isempty(pids))
|
|
return 0;
|
|
|
|
n = now(CLOCK_MONOTONIC);
|
|
if (date_log_child > 0 && n >= date_log_child) {
|
|
log_children_not_yet_killed(pids);
|
|
/* Log the children not yet killed only once */
|
|
date_log_child = 0;
|
|
}
|
|
|
|
if (n >= until)
|
|
return set_size(pids);
|
|
|
|
if (date_log_child > 0)
|
|
timespec_store(&ts, MIN(until - n, date_log_child - n));
|
|
else
|
|
timespec_store(&ts, until - n);
|
|
|
|
k = sigtimedwait(mask, NULL, &ts);
|
|
if (k != SIGCHLD) {
|
|
|
|
if (k < 0 && errno != EAGAIN)
|
|
return log_error_errno(errno, "sigtimedwait() failed: %m");
|
|
|
|
if (k >= 0)
|
|
log_warning("sigtimedwait() returned unexpected signal.");
|
|
}
|
|
}
|
|
}
|
|
|
|
static int killall(int sig, Set *pids, bool send_sighup) {
|
|
_cleanup_closedir_ DIR *dir = NULL;
|
|
int n_killed = 0, r;
|
|
|
|
/* Send the specified signal to all remaining processes, if not excluded by ignore_proc().
|
|
* Returns the number of processes to which the specified signal was sent */
|
|
|
|
r = proc_dir_open(&dir);
|
|
if (r < 0)
|
|
return log_warning_errno(r, "Failed to open /proc/: %m");
|
|
|
|
for (;;) {
|
|
_cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
|
|
|
|
r = proc_dir_read_pidref(dir, &pidref);
|
|
if (r < 0)
|
|
return log_warning_errno(r, "Failed to enumerate /proc/: %m");
|
|
if (r == 0)
|
|
break;
|
|
|
|
if (ignore_proc(&pidref, sig == SIGKILL && !in_initrd()))
|
|
continue;
|
|
|
|
if (sig == SIGKILL) {
|
|
_cleanup_free_ char *s = NULL;
|
|
|
|
(void) pidref_get_comm(&pidref, &s);
|
|
log_notice("Sending SIGKILL to PID "PID_FMT" (%s).", pidref.pid, strna(s));
|
|
}
|
|
|
|
r = pidref_kill(&pidref, sig);
|
|
if (r < 0) {
|
|
if (r != -ESRCH)
|
|
log_warning_errno(errno, "Could not kill " PID_FMT ", ignoring: %m", pidref.pid);
|
|
} else {
|
|
n_killed++;
|
|
if (pids) {
|
|
r = set_put(pids, PID_TO_PTR(pidref.pid));
|
|
if (r < 0)
|
|
(void) log_oom_warning();
|
|
}
|
|
}
|
|
|
|
if (send_sighup) {
|
|
/* Optionally, also send a SIGHUP signal, but only if the process has a controlling
|
|
* tty. This is useful to allow handling of shells which ignore SIGTERM but react to
|
|
* SIGHUP. We do not send this to processes that have no controlling TTY since we
|
|
* don't want to trigger reloads of daemon processes. Also we make sure to only send
|
|
* this after SIGTERM so that SIGTERM is always first in the queue. */
|
|
|
|
if (get_ctty_devnr(pidref.pid, NULL) >= 0)
|
|
/* it's OK if the process is gone, just ignore the result */
|
|
(void) pidref_kill(&pidref, SIGHUP);
|
|
}
|
|
}
|
|
|
|
return n_killed;
|
|
}
|
|
|
|
int broadcast_signal(int sig, bool wait_for_exit, bool send_sighup, usec_t timeout) {
|
|
int n_children_left;
|
|
sigset_t mask, oldmask;
|
|
_cleanup_set_free_ Set *pids = NULL;
|
|
|
|
/* Send the specified signal to all remaining processes, if not excluded by ignore_proc().
|
|
* Return:
|
|
* - The number of processes still "alive" after the timeout (that should have been killed)
|
|
* if the function needs to wait for the end of the processes (wait_for_exit).
|
|
* - Otherwise, the number of processes to which the specified signal was sent */
|
|
|
|
if (wait_for_exit)
|
|
pids = set_new(NULL);
|
|
|
|
assert_se(sigemptyset(&mask) == 0);
|
|
assert_se(sigaddset(&mask, SIGCHLD) == 0);
|
|
assert_se(sigprocmask(SIG_BLOCK, &mask, &oldmask) == 0);
|
|
|
|
if (kill(-1, SIGSTOP) < 0 && errno != ESRCH)
|
|
log_warning_errno(errno, "kill(-1, SIGSTOP) failed: %m");
|
|
|
|
n_children_left = killall(sig, pids, send_sighup);
|
|
|
|
if (kill(-1, SIGCONT) < 0 && errno != ESRCH)
|
|
log_warning_errno(errno, "kill(-1, SIGCONT) failed: %m");
|
|
|
|
if (wait_for_exit && n_children_left > 0)
|
|
n_children_left = wait_for_children(pids, &mask, timeout);
|
|
|
|
assert_se(sigprocmask(SIG_SETMASK, &oldmask, NULL) == 0);
|
|
|
|
return n_children_left;
|
|
}
|