mirror of
https://github.com/morgan9e/systemd
synced 2026-04-14 00:14:32 +09:00
sd-event: always operate on child source via pidfd
Follow-up for 6e14c46bac
Nowadays a pidfd is guarenteed to be around for child
event sources, hence drop the effectively unused pid-based
branches.
Addresses https://github.com/systemd/systemd/pull/36410#discussion_r1959930716
This commit is contained in:
@@ -201,13 +201,11 @@
|
||||
</para>
|
||||
|
||||
<para><function>sd_event_source_get_child_pidfd()</function> retrieves the file descriptor referencing
|
||||
the watched process ("pidfd") if this functionality is available. On kernels that support the concept the
|
||||
event loop will make use of pidfds to watch child processes, regardless of whether the individual event sources
|
||||
are allocated via <function>sd_event_add_child()</function> or
|
||||
<function>sd_event_add_child_pidfd()</function>. If the latter call was used to allocate the event
|
||||
source, this function returns the file descriptor used for allocation. On kernels that do not support the
|
||||
pidfd concept this function will fail with <constant>EOPNOTSUPP</constant>. This call takes the event
|
||||
source object as the <parameter>source</parameter> parameter and returns the numeric file descriptor.
|
||||
the watched process ("pidfd"). The event loop internally makes use of pidfds to watch child processes,
|
||||
regardless of whether the individual event sources are allocated via <function>sd_event_add_child()</function>
|
||||
or <function>sd_event_add_child_pidfd()</function>. If the latter call was used to allocate the event
|
||||
source, this function returns the original file descriptor used for allocation. This call takes
|
||||
the event source object as the <parameter>source</parameter> parameter and returns the numeric file descriptor.
|
||||
</para>
|
||||
|
||||
<para><function>sd_event_source_get_child_pidfd_own()</function> may be used to query whether the pidfd
|
||||
@@ -228,15 +226,10 @@
|
||||
exist. This behaviour might change eventually.</para>
|
||||
|
||||
<para><function>sd_event_source_send_child_signal()</function> may be used to send a UNIX signal to the
|
||||
watched process. If the pidfd concept is supported in the kernel, this is implemented via <citerefentry
|
||||
project='man-pages'><refentrytitle>pidfd_send_signal</refentrytitle><manvolnum>2</manvolnum></citerefentry>
|
||||
and otherwise via <citerefentry
|
||||
project='man-pages'><refentrytitle>rt_sigqueueinfo</refentrytitle><manvolnum>2</manvolnum></citerefentry>
|
||||
(or via <citerefentry
|
||||
project='man-pages'><refentrytitle>kill</refentrytitle><manvolnum>2</manvolnum></citerefentry> in case
|
||||
<parameter>info</parameter> is <constant>NULL</constant>). The specified parameters match those of these
|
||||
underlying system calls, except that the <parameter>info</parameter> is never modified (and is thus
|
||||
declared constant). Like for the underlying system calls, the <parameter>flags</parameter> parameter
|
||||
watched process via <citerefentry
|
||||
project='man-pages'><refentrytitle>pidfd_send_signal</refentrytitle><manvolnum>2</manvolnum></citerefentry>.
|
||||
The specified parameters match those of the underlying system call, except that the <parameter>info</parameter>
|
||||
is never modified (and is thus declared constant). Like for the underlying system call, the <parameter>flags</parameter> parameter
|
||||
currently must be zero.</para>
|
||||
</refsect1>
|
||||
|
||||
@@ -299,14 +292,6 @@
|
||||
<listitem><para>The passed event source is not a child process event source.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><constant>-EOPNOTSUPP</constant></term>
|
||||
|
||||
<listitem><para>A pidfd was requested but the kernel does not support this concept.</para>
|
||||
|
||||
<xi:include href="version-info.xml" xpointer="v245"/></listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect2>
|
||||
</refsect1>
|
||||
|
||||
@@ -43,11 +43,10 @@
|
||||
|
||||
#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
|
||||
|
||||
static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
|
||||
static bool EVENT_SOURCE_WATCH_PIDFD(const sd_event_source *s) {
|
||||
/* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
|
||||
return s &&
|
||||
s->type == SOURCE_CHILD &&
|
||||
s->child.pidfd >= 0 &&
|
||||
s->child.options == WEXITED;
|
||||
}
|
||||
|
||||
@@ -1088,12 +1087,11 @@ static sd_event_source* source_free(sd_event_source *s) {
|
||||
/* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
|
||||
|
||||
if (s->child.process_owned) {
|
||||
assert(s->child.pid > 0);
|
||||
assert(s->child.pidfd >= 0);
|
||||
|
||||
if (!s->child.exited) {
|
||||
if (s->child.pidfd >= 0)
|
||||
r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0));
|
||||
else
|
||||
r = RET_NERRNO(kill(s->child.pid, SIGKILL));
|
||||
r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0));
|
||||
if (r < 0 && r != -ESRCH)
|
||||
log_debug_errno(r, "Failed to kill process " PID_FMT ", ignoring: %m",
|
||||
s->child.pid);
|
||||
@@ -1103,10 +1101,7 @@ static sd_event_source* source_free(sd_event_source *s) {
|
||||
siginfo_t si = {};
|
||||
|
||||
/* Reap the child if we can */
|
||||
if (s->child.pidfd >= 0)
|
||||
(void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED);
|
||||
else
|
||||
(void) waitid(P_PID, s->child.pid, &si, WEXITED);
|
||||
(void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2999,13 +2994,13 @@ static int event_source_online(
|
||||
|
||||
case SOURCE_CHILD:
|
||||
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
|
||||
/* yes, we have pidfd */
|
||||
/* yes, we can rely on pidfd */
|
||||
|
||||
r = source_child_pidfd_register(s, enabled);
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else {
|
||||
/* no pidfd, or something other to watch for than WEXITED */
|
||||
/* something other to watch for than WEXITED */
|
||||
|
||||
r = event_make_signal_data(s->event, SIGCHLD, NULL);
|
||||
if (r < 0) {
|
||||
@@ -3198,9 +3193,6 @@ _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
|
||||
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||
assert_return(!event_origin_changed(s->event), -ECHILD);
|
||||
|
||||
if (s->child.pidfd < 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
return s->child.pidfd;
|
||||
}
|
||||
|
||||
@@ -3251,9 +3243,7 @@ _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
|
||||
assert_return(s, -EINVAL);
|
||||
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||
assert_return(!event_origin_changed(s->event), -ECHILD);
|
||||
|
||||
if (s->child.pidfd < 0)
|
||||
return -EOPNOTSUPP;
|
||||
assert(s->child.pidfd >= 0);
|
||||
|
||||
return s->child.pidfd_owned;
|
||||
}
|
||||
@@ -3262,9 +3252,7 @@ _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
|
||||
assert_return(s, -EINVAL);
|
||||
assert_return(s->type == SOURCE_CHILD, -EDOM);
|
||||
assert_return(!event_origin_changed(s->event), -ECHILD);
|
||||
|
||||
if (s->child.pidfd < 0)
|
||||
return -EOPNOTSUPP;
|
||||
assert(s->child.pidfd >= 0);
|
||||
|
||||
s->child.pidfd_owned = own;
|
||||
return 0;
|
||||
@@ -3723,9 +3711,9 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
|
||||
|
||||
e->need_process_child = false;
|
||||
|
||||
/* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
|
||||
* for, instead of using P_ALL. This is because we only want to get child information of very
|
||||
* specific child processes, and not all of them. We might not have processed the SIGCHLD event
|
||||
/* So, this is ugly. We iteratively invoke waitid() + WNOHANG with each child process we shall wait for,
|
||||
* instead of using P_ALL. This is because we only want to get child information of very specific
|
||||
* child processes, and not all of them. We might not have processed the SIGCHLD event
|
||||
* of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
|
||||
* hence we really don't want anything flushed out of the kernel's queue that we don't care
|
||||
* about. Since this is O(n) this means that if you have a lot of processes you probably want
|
||||
@@ -3736,6 +3724,7 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
|
||||
|
||||
HASHMAP_FOREACH(s, e->child_sources) {
|
||||
assert(s->type == SOURCE_CHILD);
|
||||
assert(s->child.pidfd >= 0);
|
||||
|
||||
if (s->priority > threshold)
|
||||
continue;
|
||||
@@ -3755,7 +3744,7 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
|
||||
continue;
|
||||
|
||||
zero(s->child.siginfo);
|
||||
if (waitid(P_PID, s->child.pid, &s->child.siginfo,
|
||||
if (waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo,
|
||||
WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
|
||||
return negative_errno();
|
||||
|
||||
@@ -3764,14 +3753,12 @@ static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priori
|
||||
|
||||
if (zombie)
|
||||
s->child.exited = true;
|
||||
|
||||
if (!zombie && (s->child.options & WEXITED)) {
|
||||
/* If the child isn't dead then let's immediately remove the state
|
||||
* change from the queue, since there's no benefit in leaving it
|
||||
* queued. */
|
||||
else if (s->child.options & WEXITED) {
|
||||
/* If the child isn't dead then let's immediately remove the state change
|
||||
* from the queue, since there's no benefit in leaving it queued. */
|
||||
|
||||
assert(s->child.options & (WSTOPPED|WCONTINUED));
|
||||
(void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
|
||||
(void) waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
|
||||
}
|
||||
|
||||
r = source_set_pending(s, true);
|
||||
@@ -3792,6 +3779,7 @@ static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
|
||||
assert(e);
|
||||
assert(s);
|
||||
assert(s->type == SOURCE_CHILD);
|
||||
assert(s->child.pidfd >= 0);
|
||||
|
||||
if (s->pending)
|
||||
return 0;
|
||||
@@ -3802,8 +3790,13 @@ static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
|
||||
if (!EVENT_SOURCE_WATCH_PIDFD(s))
|
||||
return 0;
|
||||
|
||||
/* Note that pidfd would also generate EPOLLHUP when the process gets reaped. But at this point we
|
||||
* only permit EPOLLIN, under the assumption that upon EPOLLHUP the child source should already
|
||||
* be set to pending, and we would have returned early above. */
|
||||
assert(!s->child.exited);
|
||||
|
||||
zero(s->child.siginfo);
|
||||
if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
|
||||
if (waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
|
||||
return -errno;
|
||||
|
||||
if (s->child.siginfo.si_pid == 0)
|
||||
@@ -4228,7 +4221,7 @@ static int source_dispatch(sd_event_source *s) {
|
||||
|
||||
/* Now, reap the PID for good. */
|
||||
if (zombie) {
|
||||
(void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
|
||||
(void) waitid(P_PIDFD, s->child.pidfd, &s->child.siginfo, WNOHANG|WEXITED);
|
||||
s->child.waited = true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user