mirror of
https://github.com/morgan9e/systemd
synced 2026-04-15 00:47:10 +09:00
pid1: introduce new SERVICE_{DEAD|FAILED}_BEFORE_AUTO_RESTART service substates
When a service deactivates and is then automatically restarted via
Restart= we currently quickly transition through
SERVICE_DEAD/SERVICE_FAILED. Which is weird given it's not the
normal ("permanent") dead/failed state, but a transitory one we
immediately leave from again. We do this so that software that looks for
failures/successes can take notice, even if we restart as a consequence
of the deactivation.
Let's clean this up a bit: let's introduce two new states:
SERVICE_DEAD_BEFORE_AUTO_RESTART and SERVICE_FAILED_BEFORE_AUTO_RESTART
that are used for the transitory states. Both the SERVICE_DEAD and
SERVICE_DEAD_BEFORE_AUTO_RESTART will map to the high-level
UNIT_INACTIVE state though. (and similar for the respective failed
states). This means the high-level state machine won't change by this,
only the low-level one.
This clearly seperates the substates, which makes the state engine
cleaner, and allows clients to follow precisely whether we are in a
transitory dead/failed state, or a permanent one, by looking at the
service substate. Moreover it allows us to remove the 'n_keep_fd_store'
which so far we used to ensure the fdstore was not released during this
transitory dead/failed state but only during the permanent one. Since we
can now distinguish these states properly we can just use that.
This has been bugging me for a while. Let's clean this up.
Note that the unit restart logic is already nicely covered in the
testsiute, hence this adds no new tests for that.
And yes, this could be considered a compat break, but sofar we took the
liberty to make changes to the low-level state machine (i.e. SERVICE_xyz
states, sometimes called "substates") without considering this a bad
breakage – the high-level state machine (i.e. UNIT_xyz states) should
be considered API that cannot be changed.
This commit is contained in:
@@ -180,27 +180,29 @@ static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
|
||||
DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState);
|
||||
|
||||
static const char* const service_state_table[_SERVICE_STATE_MAX] = {
|
||||
[SERVICE_DEAD] = "dead",
|
||||
[SERVICE_CONDITION] = "condition",
|
||||
[SERVICE_START_PRE] = "start-pre",
|
||||
[SERVICE_START] = "start",
|
||||
[SERVICE_START_POST] = "start-post",
|
||||
[SERVICE_RUNNING] = "running",
|
||||
[SERVICE_EXITED] = "exited",
|
||||
[SERVICE_RELOAD] = "reload",
|
||||
[SERVICE_RELOAD_SIGNAL] = "reload-signal",
|
||||
[SERVICE_RELOAD_NOTIFY] = "reload-notify",
|
||||
[SERVICE_STOP] = "stop",
|
||||
[SERVICE_STOP_WATCHDOG] = "stop-watchdog",
|
||||
[SERVICE_STOP_SIGTERM] = "stop-sigterm",
|
||||
[SERVICE_STOP_SIGKILL] = "stop-sigkill",
|
||||
[SERVICE_STOP_POST] = "stop-post",
|
||||
[SERVICE_FINAL_WATCHDOG] = "final-watchdog",
|
||||
[SERVICE_FINAL_SIGTERM] = "final-sigterm",
|
||||
[SERVICE_FINAL_SIGKILL] = "final-sigkill",
|
||||
[SERVICE_FAILED] = "failed",
|
||||
[SERVICE_AUTO_RESTART] = "auto-restart",
|
||||
[SERVICE_CLEANING] = "cleaning",
|
||||
[SERVICE_DEAD] = "dead",
|
||||
[SERVICE_CONDITION] = "condition",
|
||||
[SERVICE_START_PRE] = "start-pre",
|
||||
[SERVICE_START] = "start",
|
||||
[SERVICE_START_POST] = "start-post",
|
||||
[SERVICE_RUNNING] = "running",
|
||||
[SERVICE_EXITED] = "exited",
|
||||
[SERVICE_RELOAD] = "reload",
|
||||
[SERVICE_RELOAD_SIGNAL] = "reload-signal",
|
||||
[SERVICE_RELOAD_NOTIFY] = "reload-notify",
|
||||
[SERVICE_STOP] = "stop",
|
||||
[SERVICE_STOP_WATCHDOG] = "stop-watchdog",
|
||||
[SERVICE_STOP_SIGTERM] = "stop-sigterm",
|
||||
[SERVICE_STOP_SIGKILL] = "stop-sigkill",
|
||||
[SERVICE_STOP_POST] = "stop-post",
|
||||
[SERVICE_FINAL_WATCHDOG] = "final-watchdog",
|
||||
[SERVICE_FINAL_SIGTERM] = "final-sigterm",
|
||||
[SERVICE_FINAL_SIGKILL] = "final-sigkill",
|
||||
[SERVICE_FAILED] = "failed",
|
||||
[SERVICE_DEAD_BEFORE_AUTO_RESTART] = "dead-before-auto-restart",
|
||||
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart",
|
||||
[SERVICE_AUTO_RESTART] = "auto-restart",
|
||||
[SERVICE_CLEANING] = "cleaning",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState);
|
||||
|
||||
@@ -144,6 +144,8 @@ typedef enum ServiceState {
|
||||
SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */
|
||||
SERVICE_FINAL_SIGKILL,
|
||||
SERVICE_FAILED,
|
||||
SERVICE_DEAD_BEFORE_AUTO_RESTART,
|
||||
SERVICE_FAILED_BEFORE_AUTO_RESTART,
|
||||
SERVICE_AUTO_RESTART,
|
||||
SERVICE_CLEANING,
|
||||
_SERVICE_STATE_MAX,
|
||||
|
||||
@@ -66,6 +66,8 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
|
||||
[SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
|
||||
[SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
|
||||
[SERVICE_FAILED] = UNIT_FAILED,
|
||||
[SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE,
|
||||
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
|
||||
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
|
||||
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
|
||||
};
|
||||
@@ -92,6 +94,8 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
|
||||
[SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
|
||||
[SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
|
||||
[SERVICE_FAILED] = UNIT_FAILED,
|
||||
[SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE,
|
||||
[SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
|
||||
[SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
|
||||
[SERVICE_CLEANING] = UNIT_MAINTENANCE,
|
||||
};
|
||||
@@ -276,7 +280,7 @@ usec_t service_restart_usec(Service *s) {
|
||||
* between job enqueuing and running is usually neglectable compared to the time
|
||||
* we'll be sleeping. */
|
||||
n_restarts = s->n_restarts +
|
||||
(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART) ? 1 : 0);
|
||||
(IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART) ? 1 : 0);
|
||||
|
||||
/* n_restarts can equal to 0 if no restart has happened nor planned */
|
||||
if (n_restarts <= 1 ||
|
||||
@@ -380,9 +384,6 @@ static void service_fd_store_unlink(ServiceFDStore *fs) {
|
||||
static void service_release_fd_store(Service *s) {
|
||||
assert(s);
|
||||
|
||||
if (s->n_keep_fd_store > 0)
|
||||
return;
|
||||
|
||||
log_unit_debug(UNIT(s), "Releasing all stored fds");
|
||||
while (s->fd_store)
|
||||
service_fd_store_unlink(s->fd_store);
|
||||
@@ -395,6 +396,10 @@ static void service_release_resources(Unit *u) {
|
||||
|
||||
assert(s);
|
||||
|
||||
/* Don't release resources if this is a transitionary failed/dead state */
|
||||
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
|
||||
return;
|
||||
|
||||
if (!s->fd_store && s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
|
||||
return;
|
||||
|
||||
@@ -1201,7 +1206,9 @@ static void service_set_state(Service *s, ServiceState state) {
|
||||
s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
|
||||
}
|
||||
|
||||
if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART)) {
|
||||
if (IN_SET(state,
|
||||
SERVICE_DEAD, SERVICE_FAILED,
|
||||
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART)) {
|
||||
unit_unwatch_all_pids(UNIT(s));
|
||||
unit_dequeue_rewatch_pids(UNIT(s));
|
||||
}
|
||||
@@ -1314,7 +1321,10 @@ static int service_coldplug(Unit *u) {
|
||||
return r;
|
||||
}
|
||||
|
||||
if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART, SERVICE_CLEANING)) {
|
||||
if (!IN_SET(s->deserialized_state,
|
||||
SERVICE_DEAD, SERVICE_FAILED,
|
||||
SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
|
||||
SERVICE_CLEANING)) {
|
||||
(void) unit_enqueue_rewatch_pids(u);
|
||||
(void) unit_setup_dynamic_creds(u);
|
||||
(void) unit_setup_exec_runtime(u);
|
||||
@@ -1895,14 +1905,14 @@ static bool service_will_restart(Unit *u) {
|
||||
|
||||
if (s->will_auto_restart)
|
||||
return true;
|
||||
if (s->state == SERVICE_AUTO_RESTART)
|
||||
if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
|
||||
return true;
|
||||
|
||||
return unit_will_restart_default(u);
|
||||
}
|
||||
|
||||
static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) {
|
||||
ServiceState end_state;
|
||||
ServiceState end_state, restart_state;
|
||||
int r;
|
||||
|
||||
assert(s);
|
||||
@@ -1918,12 +1928,15 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
|
||||
if (s->result == SERVICE_SUCCESS) {
|
||||
unit_log_success(UNIT(s));
|
||||
end_state = SERVICE_DEAD;
|
||||
restart_state = SERVICE_DEAD_BEFORE_AUTO_RESTART;
|
||||
} else if (s->result == SERVICE_SKIP_CONDITION) {
|
||||
unit_log_skip(UNIT(s), service_result_to_string(s->result));
|
||||
end_state = SERVICE_DEAD;
|
||||
restart_state = SERVICE_DEAD_BEFORE_AUTO_RESTART;
|
||||
} else {
|
||||
unit_log_failure(UNIT(s), service_result_to_string(s->result));
|
||||
end_state = SERVICE_FAILED;
|
||||
restart_state = SERVICE_FAILED_BEFORE_AUTO_RESTART;
|
||||
}
|
||||
unit_warn_leftover_processes(UNIT(s), unit_log_leftover_process_stop);
|
||||
|
||||
@@ -1941,30 +1954,33 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
|
||||
s->will_auto_restart = true;
|
||||
}
|
||||
|
||||
/* Make sure service_release_resources() doesn't destroy our FD store, while we are changing through
|
||||
* SERVICE_FAILED/SERVICE_DEAD before entering into SERVICE_AUTO_RESTART. */
|
||||
s->n_keep_fd_store ++;
|
||||
|
||||
service_set_state(s, end_state);
|
||||
|
||||
if (s->will_auto_restart) {
|
||||
s->will_auto_restart = false;
|
||||
|
||||
/* We make two state changes here: one that maps to the high-level UNIT_INACTIVE/UNIT_FAILED
|
||||
* state (i.e. a state indicating deactivation), and then one that that maps to the
|
||||
* high-level UNIT_STARTING state (i.e. a state indicating activation). We do this so that
|
||||
* external software can watch the state changes and see all service failures, even if they
|
||||
* are only transitionary and followed by an automatic restart. We have fine-grained
|
||||
* low-level states for this though so that software can distinguish the permanent UNIT_INACTIVE
|
||||
* state from this transitionary UNIT_INACTIVE state by looking at the low-level states. */
|
||||
service_set_state(s, restart_state);
|
||||
|
||||
r = service_arm_timer(s, /* relative= */ true, service_restart_usec(s));
|
||||
if (r < 0) {
|
||||
s->n_keep_fd_store--;
|
||||
if (r < 0)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
service_set_state(s, SERVICE_AUTO_RESTART);
|
||||
} else
|
||||
} else {
|
||||
service_set_state(s, end_state);
|
||||
|
||||
/* If we shan't restart, then flush out the restart counter. But don't do that immediately, so that the
|
||||
* user can still introspect the counter. Do so on the next start. */
|
||||
s->flush_n_restarts = true;
|
||||
}
|
||||
|
||||
/* The new state is in effect, let's decrease the fd store ref counter again. Let's also re-add us to the GC
|
||||
* queue, so that the fd store is possibly gc'ed again */
|
||||
s->n_keep_fd_store--;
|
||||
unit_add_to_gc_queue(UNIT(s));
|
||||
|
||||
/* The next restart might not be a manual stop, hence reset the flag indicating manual stops */
|
||||
@@ -2653,14 +2669,11 @@ static int service_start(Unit *u) {
|
||||
if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
|
||||
return 0;
|
||||
|
||||
/* A service that will be restarted must be stopped first to
|
||||
* trigger BindsTo and/or OnFailure dependencies. If a user
|
||||
* does not want to wait for the holdoff time to elapse, the
|
||||
* service should be manually restarted, not started. We
|
||||
* simply return EAGAIN here, so that any start jobs stay
|
||||
* queued, and assume that the auto restart timer will
|
||||
* eventually trigger the restart. */
|
||||
if (s->state == SERVICE_AUTO_RESTART)
|
||||
/* A service that will be restarted must be stopped first to trigger BindsTo and/or OnFailure
|
||||
* dependencies. If a user does not want to wait for the holdoff time to elapse, the service should
|
||||
* be manually restarted, not started. We simply return EAGAIN here, so that any start jobs stay
|
||||
* queued, and assume that the auto restart timer will eventually trigger the restart. */
|
||||
if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
|
||||
return -EAGAIN;
|
||||
|
||||
assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
|
||||
@@ -2708,34 +2721,55 @@ static int service_stop(Unit *u) {
|
||||
/* Don't create restart jobs from manual stops. */
|
||||
s->forbid_restart = true;
|
||||
|
||||
/* Already on it */
|
||||
if (IN_SET(s->state,
|
||||
SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
|
||||
SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))
|
||||
switch (s->state) {
|
||||
|
||||
case SERVICE_STOP:
|
||||
case SERVICE_STOP_SIGTERM:
|
||||
case SERVICE_STOP_SIGKILL:
|
||||
case SERVICE_STOP_POST:
|
||||
case SERVICE_FINAL_WATCHDOG:
|
||||
case SERVICE_FINAL_SIGTERM:
|
||||
case SERVICE_FINAL_SIGKILL:
|
||||
/* Already on it */
|
||||
return 0;
|
||||
|
||||
/* A restart will be scheduled or is in progress. */
|
||||
if (s->state == SERVICE_AUTO_RESTART) {
|
||||
case SERVICE_AUTO_RESTART:
|
||||
/* A restart will be scheduled or is in progress. */
|
||||
service_set_state(s, SERVICE_DEAD);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If there's already something running we go directly into kill mode. */
|
||||
if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_STOP_WATCHDOG)) {
|
||||
case SERVICE_CONDITION:
|
||||
case SERVICE_START_PRE:
|
||||
case SERVICE_START:
|
||||
case SERVICE_START_POST:
|
||||
case SERVICE_RELOAD:
|
||||
case SERVICE_RELOAD_SIGNAL:
|
||||
case SERVICE_RELOAD_NOTIFY:
|
||||
case SERVICE_STOP_WATCHDOG:
|
||||
/* If there's already something running we go directly into kill mode. */
|
||||
service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_SUCCESS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If we are currently cleaning, then abort it, brutally. */
|
||||
if (s->state == SERVICE_CLEANING) {
|
||||
case SERVICE_CLEANING:
|
||||
/* If we are currently cleaning, then abort it, brutally. */
|
||||
service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_SUCCESS);
|
||||
return 0;
|
||||
|
||||
case SERVICE_RUNNING:
|
||||
case SERVICE_EXITED:
|
||||
service_enter_stop(s, SERVICE_SUCCESS);
|
||||
return 1;
|
||||
|
||||
case SERVICE_DEAD_BEFORE_AUTO_RESTART:
|
||||
case SERVICE_FAILED_BEFORE_AUTO_RESTART:
|
||||
case SERVICE_DEAD:
|
||||
case SERVICE_FAILED:
|
||||
default:
|
||||
/* Unknown state, or unit_stop() should already have handled these */
|
||||
assert_not_reached();
|
||||
}
|
||||
|
||||
assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
|
||||
|
||||
service_enter_stop(s, SERVICE_SUCCESS);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int service_reload(Unit *u) {
|
||||
@@ -3338,6 +3372,11 @@ static bool service_may_gc(Unit *u) {
|
||||
control_pid_good(s) > 0)
|
||||
return false;
|
||||
|
||||
/* Only allow collection of actually dead services, i.e. not those that are in the transitionary
|
||||
* SERVICE_DEAD_BEFORE_AUTO_RESTART/SERVICE_FAILED_BEFORE_AUTO_RESTART states. */
|
||||
if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -3499,11 +3538,9 @@ static void service_notify_cgroup_empty_event(Unit *u) {
|
||||
|
||||
switch (s->state) {
|
||||
|
||||
/* Waiting for SIGCHLD is usually more interesting,
|
||||
* because it includes return codes/signals. Which is
|
||||
* why we ignore the cgroup events for most cases,
|
||||
* except when we don't know pid which to expect the
|
||||
* SIGCHLD for. */
|
||||
/* Waiting for SIGCHLD is usually more interesting, because it includes return
|
||||
* codes/signals. Which is why we ignore the cgroup events for most cases, except when we
|
||||
* don't know pid which to expect the SIGCHLD for. */
|
||||
|
||||
case SERVICE_START:
|
||||
if (IN_SET(s->type, SERVICE_NOTIFY, SERVICE_NOTIFY_RELOAD) &&
|
||||
@@ -3561,6 +3598,9 @@ static void service_notify_cgroup_empty_event(Unit *u) {
|
||||
* up the cgroup earlier and should do it now. */
|
||||
case SERVICE_DEAD:
|
||||
case SERVICE_FAILED:
|
||||
case SERVICE_DEAD_BEFORE_AUTO_RESTART:
|
||||
case SERVICE_FAILED_BEFORE_AUTO_RESTART:
|
||||
case SERVICE_AUTO_RESTART:
|
||||
unit_prune_cgroup(u);
|
||||
break;
|
||||
|
||||
|
||||
@@ -207,7 +207,6 @@ struct Service {
|
||||
ServiceFDStore *fd_store;
|
||||
size_t n_fd_store;
|
||||
unsigned n_fd_store_max;
|
||||
unsigned n_keep_fd_store;
|
||||
|
||||
char *usb_function_descriptors;
|
||||
char *usb_function_strings;
|
||||
|
||||
@@ -2488,7 +2488,7 @@ static int socket_start(Unit *u) {
|
||||
|
||||
/* If the service is already active we cannot start the
|
||||
* socket */
|
||||
if (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART))
|
||||
if (!IN_SET(service->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
|
||||
return log_unit_error_errno(u, SYNTHETIC_ERRNO(EBUSY), "Socket service %s already active, refusing.", UNIT(service)->id);
|
||||
}
|
||||
|
||||
@@ -3291,7 +3291,7 @@ static void socket_trigger_notify(Unit *u, Unit *other) {
|
||||
return;
|
||||
|
||||
if (IN_SET(SERVICE(other)->state,
|
||||
SERVICE_DEAD, SERVICE_FAILED,
|
||||
SERVICE_DEAD, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED, SERVICE_FAILED_BEFORE_AUTO_RESTART,
|
||||
SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
|
||||
SERVICE_AUTO_RESTART))
|
||||
socket_enter_listening(s);
|
||||
|
||||
Reference in New Issue
Block a user