diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c index bfd748ada4..595e4414c7 100644 --- a/src/basic/unit-def.c +++ b/src/basic/unit-def.c @@ -204,6 +204,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = { [SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart", [SERVICE_DEAD_RESOURCES_PINNED] = "dead-resources-pinned", [SERVICE_AUTO_RESTART] = "auto-restart", + [SERVICE_AUTO_RESTART_QUEUED] = "auto-restart-queued", [SERVICE_CLEANING] = "cleaning", }; diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h index 7bd86cb1d2..f67fc1d620 100644 --- a/src/basic/unit-def.h +++ b/src/basic/unit-def.h @@ -149,6 +149,7 @@ typedef enum ServiceState { SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_DEAD_RESOURCES_PINNED, /* Like SERVICE_DEAD, but with pinned resources */ SERVICE_AUTO_RESTART, + SERVICE_AUTO_RESTART_QUEUED, SERVICE_CLEANING, _SERVICE_STATE_MAX, _SERVICE_STATE_INVALID = -EINVAL, diff --git a/src/core/job.c b/src/core/job.c index 50f9581d72..3d5e4e42d1 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -1634,6 +1634,7 @@ static const char* const job_mode_table[_JOB_MODE_MAX] = { [JOB_IGNORE_DEPENDENCIES] = "ignore-dependencies", [JOB_IGNORE_REQUIREMENTS] = "ignore-requirements", [JOB_TRIGGERING] = "triggering", + [JOB_RESTART_DEPENDENCIES] = "restart-dependencies", }; DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode); diff --git a/src/core/job.h b/src/core/job.h index f3e0b93fed..d3b98d98b6 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -79,6 +79,7 @@ enum JobMode { JOB_IGNORE_DEPENDENCIES, /* Ignore both requirement and ordering dependencies */ JOB_IGNORE_REQUIREMENTS, /* Ignore requirement dependencies */ JOB_TRIGGERING, /* Adds TRIGGERED_BY dependencies to the same transaction */ + JOB_RESTART_DEPENDENCIES,/* A "start" job for the specified unit becomes "restart" for depending units */ _JOB_MODE_MAX, _JOB_MODE_INVALID = -EINVAL, }; diff --git a/src/core/manager.c b/src/core/manager.c index f459100294..cc4fc1679c 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -2044,6 +2044,9 @@ int manager_add_job( if (mode == JOB_TRIGGERING && type != JOB_STOP) return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=triggering is only valid for stop."); + if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START) + return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start."); + log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode)); type = job_type_collapse(type, unit); @@ -2059,7 +2062,8 @@ int manager_add_job( /* by= */ NULL, TRANSACTION_MATTERS | (IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS) ? TRANSACTION_IGNORE_REQUIREMENTS : 0) | - (mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0), + (mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0) | + (mode == JOB_RESTART_DEPENDENCIES ? TRANSACTION_PROPAGATE_START_AS_RESTART : 0), error); if (r < 0) return r; diff --git a/src/core/service.c b/src/core/service.c index 93545a252a..6831b847e8 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -72,6 +72,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = { [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED, [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE, [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING, + [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING, [SERVICE_CLEANING] = UNIT_MAINTENANCE, }; @@ -101,6 +102,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED, [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE, [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING, + [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING, [SERVICE_CLEANING] = UNIT_MAINTENANCE, }; @@ -285,12 +287,10 @@ usec_t service_restart_usec_next(Service *s) { assert(s); - /* When the service state is in SERVICE_*_BEFORE_AUTO_RESTART or SERVICE_AUTO_RESTART, - * we still need to add 1 to s->n_restarts manually because s->n_restarts is not updated - * until a restart job is enqueued. Note that for SERVICE_AUTO_RESTART, that might have been - * the case, i.e. s->n_restarts is already increased. But we assume it's not since the time - * between job enqueuing and running is usually neglectable compared to the time we'll be sleeping. */ - n_restarts_next = s->n_restarts + 1; + /* When the service state is in SERVICE_*_BEFORE_AUTO_RESTART or SERVICE_AUTO_RESTART, we still need + * to add 1 to s->n_restarts manually, because s->n_restarts is not updated until a restart job is + * enqueued, i.e. state has transitioned to SERVICE_AUTO_RESTART_QUEUED. */ + n_restarts_next = s->n_restarts + (s->state == SERVICE_AUTO_RESTART_QUEUED ? 0 : 1); if (n_restarts_next <= 1 || s->restart_steps == 0 || @@ -303,7 +303,7 @@ usec_t service_restart_usec_next(Service *s) { /* Enforced in service_verify() and above */ assert(s->restart_max_delay_usec > s->restart_usec); - /* ((restart_usec_max - restart_usec)^(1/restart_steps))^(n_restart_next - 1) */ + /* ((restart_max_delay_usec - restart_usec)^(1/restart_steps))^(n_restart_next - 1) */ value = usec_add(s->restart_usec, (usec_t) powl(s->restart_max_delay_usec - s->restart_usec, (long double) (n_restarts_next - 1) / s->restart_steps)); @@ -1258,7 +1258,7 @@ static void service_set_state(Service *s, ServiceState state) { if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, - SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, + SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED, SERVICE_DEAD_RESOURCES_PINNED)) { unit_unwatch_all_pids(UNIT(s)); unit_dequeue_rewatch_pids(UNIT(s)); @@ -1363,7 +1363,7 @@ static int service_coldplug(Unit *u) { if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, - SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, + SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED, SERVICE_CLEANING, SERVICE_DEAD_RESOURCES_PINNED)) { (void) unit_enqueue_rewatch_pids(u); @@ -1945,7 +1945,7 @@ static bool service_will_restart(Unit *u) { assert(s); - if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART)) + if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED)) return true; return unit_will_restart_default(u); @@ -2511,17 +2511,15 @@ static void service_enter_restart(Service *s) { return; } - /* Any units that are bound to this service must also be - * restarted. We use JOB_RESTART (instead of the more obvious - * JOB_START) here so that those dependency jobs will be added - * as well. */ - r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, NULL, &error, NULL); + /* Any units that are bound to this service must also be restarted. We use JOB_START for ourselves + * but then set JOB_RESTART_DEPENDENCIES which will enqueue JOB_RESTART for those dependency jobs. */ + r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(s), JOB_RESTART_DEPENDENCIES, NULL, &error, NULL); if (r < 0) goto fail; - /* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully - * stopped, i.e. as long as it remains up or remains in auto-start states. The user can reset the counter - * explicitly however via the usual "systemctl reset-failure" logic. */ + /* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't + * fully stopped, i.e. as long as it remains up or remains in auto-start states. The user can reset + * the counter explicitly however via the usual "systemctl reset-failure" logic. */ s->n_restarts ++; s->flush_n_restarts = false; @@ -2534,13 +2532,10 @@ static void service_enter_restart(Service *s) { "Scheduled restart job, restart counter is at %u.", s->n_restarts), "N_RESTARTS=%u", s->n_restarts); + service_set_state(s, SERVICE_AUTO_RESTART_QUEUED); + /* Notify clients about changed restart counter */ unit_add_to_dbus_queue(UNIT(s)); - - /* Note that we stay in the SERVICE_AUTO_RESTART state here, - * it will be canceled as part of the service_stop() call that - * is executed as part of JOB_RESTART. */ - return; fail: @@ -2719,7 +2714,7 @@ static int service_start(Unit *u) { if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART)) return -EAGAIN; - assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED)); + assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED, SERVICE_AUTO_RESTART_QUEUED)); r = unit_acquire_invocation_id(u); if (r < 0) @@ -2777,7 +2772,8 @@ static int service_stop(Unit *u) { return 0; case SERVICE_AUTO_RESTART: - /* A restart will be scheduled or is in progress. */ + case SERVICE_AUTO_RESTART_QUEUED: + /* Give up on the auto restart */ service_set_state(s, service_determine_dead_state(s)); return 0; @@ -3650,6 +3646,7 @@ static void service_notify_cgroup_empty_event(Unit *u) { /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean * up the cgroup earlier and should do it now. */ case SERVICE_AUTO_RESTART: + case SERVICE_AUTO_RESTART_QUEUED: unit_prune_cgroup(u); break; diff --git a/src/core/transaction.c b/src/core/transaction.c index 9c1523c47a..68614de7a0 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -1054,34 +1054,38 @@ int transaction_add_job_and_dependencies( } } - if (IN_SET(type, JOB_STOP, JOB_RESTART)) { - _cleanup_set_free_ Set *propagated_restart = NULL; - /* We propagate STOP as STOP, but RESTART only as TRY_RESTART, in order not to start - * dependencies that are not around. */ + _cleanup_set_free_ Set *propagated_restart = NULL; - if (type == JOB_RESTART) - UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) { - JobType nt; + if (type == JOB_RESTART || (type == JOB_START && FLAGS_SET(flags, TRANSACTION_PROPAGATE_START_AS_RESTART))) { - r = set_ensure_put(&propagated_restart, NULL, dep); - if (r < 0) + /* We propagate RESTART only as TRY_RESTART, in order not to start dependencies that + * are not around. */ + + UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) { + JobType nt; + + r = set_ensure_put(&propagated_restart, NULL, dep); + if (r < 0) + return r; + + nt = job_type_collapse(JOB_TRY_RESTART, dep); + if (nt == JOB_NOP) + continue; + + r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e); + if (r < 0) { + if (r != -EBADR) /* job type not applicable */ return r; - nt = job_type_collapse(JOB_TRY_RESTART, dep); - if (nt == JOB_NOP) - continue; - - r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e); - if (r < 0) { - if (r != -EBADR) /* job type not applicable */ - return r; - - sd_bus_error_free(e); - } + sd_bus_error_free(e); } + } + } + if (type == JOB_STOP) { /* The 'stop' part of a restart job is also propagated to units with * UNIT_ATOM_PROPAGATE_STOP */ + UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_STOP) { /* Units experienced restart propagation are skipped */ if (set_contains(propagated_restart, dep)) diff --git a/src/core/transaction.h b/src/core/transaction.h index 5874077aef..db2d056649 100644 --- a/src/core/transaction.h +++ b/src/core/transaction.h @@ -21,10 +21,13 @@ Transaction *transaction_abort_and_free(Transaction *tr); DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free); typedef enum TransactionAddFlags { - TRANSACTION_MATTERS = 1 << 0, - TRANSACTION_CONFLICTS = 1 << 1, - TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2, - TRANSACTION_IGNORE_ORDER = 1 << 3, + TRANSACTION_MATTERS = 1 << 0, + TRANSACTION_CONFLICTS = 1 << 1, + TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2, + TRANSACTION_IGNORE_ORDER = 1 << 3, + + /* Propagate a START job to other units like a RESTART */ + TRANSACTION_PROPAGATE_START_AS_RESTART = 1 << 4, } TransactionAddFlags; void transaction_add_propagate_reload_jobs( diff --git a/test/units/success-failure-test-failure.service b/test/units/success-failure-test-failure.service new file mode 100644 index 0000000000..f4ce013da8 --- /dev/null +++ b/test/units/success-failure-test-failure.service @@ -0,0 +1,3 @@ +[Service] +Type=notify +ExecStart=bash -c "echo failure >> /tmp/success-failure-test-result && systemd-notify --ready && sleep infinity" diff --git a/test/units/success-failure-test-success.service b/test/units/success-failure-test-success.service new file mode 100644 index 0000000000..8503c45122 --- /dev/null +++ b/test/units/success-failure-test-success.service @@ -0,0 +1,3 @@ +[Service] +Type=notify +ExecStart=bash -c "echo success >> /tmp/success-failure-test-result && systemd-notify --ready && sleep infinity" diff --git a/test/units/success-failure-test.service b/test/units/success-failure-test.service new file mode 100644 index 0000000000..f66ff6cd6b --- /dev/null +++ b/test/units/success-failure-test.service @@ -0,0 +1,9 @@ +[Unit] +OnSuccess=success-failure-test-success.service +OnFailure=success-failure-test-failure.service + +[Service] +Type=notify +Restart=always +ExecStart=bash -c 'test -f /tmp/success-failure-test-ran && touch /tmp/success-failure-test-ran2 && systemd-notify --ready && sleep infinity' +ExecStopPost=touch /tmp/success-failure-test-ran diff --git a/test/units/testsuite-23.success-failure.sh b/test/units/testsuite-23.success-failure.sh new file mode 100755 index 0000000000..8fc9596cc5 --- /dev/null +++ b/test/units/testsuite-23.success-failure.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# Test OnSuccess=/OnFailure= in combination + +systemd-analyze log-level debug + +# Start-up should fail, but the automatic restart should fix it +(! systemctl start success-failure-test ) + +# Wait until the first invocation finished & failed +while test ! -f /tmp/success-failure-test-ran ; do + sleep .5 +done + +# Wait until the second invocation finished & succeeded +while test ! -f /tmp/success-failure-test-ran2 ; do + sleep .5 +done + +# Verify it is indeed running +systemctl is-active -q success-failure-test + +# The above should have caused the failure service to start (asynchronously) +while test "$(systemctl is-active success-failure-test-failure)" != "active" ; do + sleep .5 +done + +# But the success service should not have started +test "$(systemctl is-active success-failure-test-success)" = "inactive" + +systemctl stop success-failure-test-failure + +# Do a clean kill of the service now +systemctl kill success-failure-test + +# This should result in the success service to start +while test "$(systemctl is-active success-failure-test-success)" != "active" ; do + sleep .5 +done + +# But the failure service should not have started again +test "$(systemctl is-active success-failure-test-failure)" = "inactive" + +systemctl stop success-failure-test success-failure-test-success + +systemd-analyze log-level info