From 7ca94ff4b1f97d6c490eb4f522f8e4b8517b5612 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sun, 26 Oct 2025 21:39:52 +0100 Subject: [PATCH 1/5] core/manager-serialize: minor coding style cleanups --- src/core/manager-serialize.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/core/manager-serialize.c b/src/core/manager-serialize.c index 057ca9ad49..469f51f448 100644 --- a/src/core/manager-serialize.c +++ b/src/core/manager-serialize.c @@ -283,7 +283,7 @@ static void manager_deserialize_gid_refs_one(Manager *m, const char *value) { int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { bool deserialize_varlink_sockets = false; - int r = 0; + int r; assert(m); assert(f); @@ -350,22 +350,18 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { m->n_failed_jobs += n; } else if ((val = startswith(l, "taint-logged="))) { - int b; - - b = parse_boolean(val); - if (b < 0) + r = parse_boolean(val); + if (r < 0) log_notice("Failed to parse taint-logged flag '%s', ignoring.", val); else - m->taint_logged = m->taint_logged || b; + m->taint_logged = m->taint_logged || r; } else if ((val = startswith(l, "service-watchdogs="))) { - int b; - - b = parse_boolean(val); - if (b < 0) + r = parse_boolean(val); + if (r < 0) log_notice("Failed to parse service-watchdogs flag '%s', ignoring.", val); else - m->service_watchdogs = b; + m->service_watchdogs = r; } else if ((val = startswith(l, "show-status-overridden="))) { ShowStatus s; From 0d9e79d5caf541ab991fc6bd5f4063e876358ede Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sun, 5 Oct 2025 00:39:50 +0200 Subject: [PATCH 2/5] core/transaction: assign unique ids to transactions and encode them in log Preparation for later commits, but I think this one makes a ton of sense on its own. When debug logging is enabled it's otherwise difficult to dig up the portion of journal for transaction construction. --- src/core/manager-serialize.c | 12 +++++++++++- src/core/manager.c | 16 ++++++++++------ src/core/manager.h | 2 ++ src/core/transaction.c | 20 ++++++++++++-------- src/core/transaction.h | 4 +++- 5 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/core/manager-serialize.c b/src/core/manager-serialize.c index 469f51f448..df97fde97e 100644 --- a/src/core/manager-serialize.c +++ b/src/core/manager-serialize.c @@ -92,6 +92,8 @@ int manager_serialize( _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m); + (void) serialize_item_format(f, "last-transaction-id", "%" PRIu64, m->last_transaction_id); + (void) serialize_item_format(f, "current-job-id", "%" PRIu32, m->current_job_id); (void) serialize_item_format(f, "n-installed-jobs", "%u", m->n_installed_jobs); (void) serialize_item_format(f, "n-failed-jobs", "%u", m->n_failed_jobs); @@ -325,7 +327,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { if (r == 0) /* eof or end marker */ break; - if ((val = startswith(l, "current-job-id="))) { + if ((val = startswith(l, "last-transaction-id="))) { + uint64_t id; + + if (safe_atou64(val, &id) < 0) + log_notice("Failed to parse last transaction id value '%s', ignoring.", val); + else + m->last_transaction_id = MAX(m->last_transaction_id, id); + + } else if ((val = startswith(l, "current-job-id="))) { uint32_t id; if (safe_atou32(val, &id) < 0) diff --git a/src/core/manager.c b/src/core/manager.c index b501d3f1c5..bb2c1bece1 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -2153,14 +2153,16 @@ int manager_add_job_full( if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START) return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start."); + tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY, ++m->last_transaction_id); + if (!tr) + return -ENOMEM; + + LOG_CONTEXT_PUSHF("TRANSACTION_ID=%" PRIu64, tr->id); + log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode)); type = job_type_collapse(type, unit); - tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY); - if (!tr) - return -ENOMEM; - r = transaction_add_job_and_dependencies( tr, type, @@ -2247,18 +2249,20 @@ int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, } int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e) { - int r; _cleanup_(transaction_abort_and_freep) Transaction *tr = NULL; + int r; assert(m); assert(unit); assert(mode < _JOB_MODE_MAX); assert(mode != JOB_ISOLATE); /* Isolate is only valid for start */ - tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY); + tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY, ++m->last_transaction_id); if (!tr) return -ENOMEM; + LOG_CONTEXT_PUSHF("TRANSACTION_ID=%" PRIu64, tr->id); + /* We need an anchor job */ r = transaction_add_job_and_dependencies(tr, JOB_NOP, unit, NULL, TRANSACTION_IGNORE_REQUIREMENTS|TRANSACTION_IGNORE_ORDER, e); if (r < 0) diff --git a/src/core/manager.h b/src/core/manager.h index b7ee766b05..d0855260bf 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -236,6 +236,8 @@ typedef struct Manager { /* A set which contains all currently failed units */ Set *failed_units; + uint64_t last_transaction_id; + sd_event_source *run_queue_event_source; char *notify_socket; diff --git a/src/core/transaction.c b/src/core/transaction.c index 016bbb1f97..34a0faf556 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -1235,20 +1235,24 @@ int transaction_add_triggering_jobs(Transaction *tr, Unit *u) { return 0; } -Transaction* transaction_new(bool irreversible) { - Transaction *tr; +Transaction* transaction_new(bool irreversible, uint64_t id) { + _cleanup_free_ Transaction *tr = NULL; - tr = new0(Transaction, 1); + assert(id != 0); + + tr = new(Transaction, 1); if (!tr) return NULL; - tr->jobs = hashmap_new(NULL); + *tr = (Transaction) { + .jobs = hashmap_new(NULL), + .irreversible = irreversible, + .id = id, + }; if (!tr->jobs) - return mfree(tr); + return NULL; - tr->irreversible = irreversible; - - return tr; + return TAKE_PTR(tr); } Transaction* transaction_free(Transaction *tr) { diff --git a/src/core/transaction.h b/src/core/transaction.h index 4f5430d6d4..275dc5984f 100644 --- a/src/core/transaction.h +++ b/src/core/transaction.h @@ -8,9 +8,11 @@ typedef struct Transaction { Hashmap *jobs; /* Unit object => Job object list 1:1 */ Job *anchor_job; /* The job the user asked for */ bool irreversible; + + uint64_t id; } Transaction; -Transaction* transaction_new(bool irreversible); +Transaction* transaction_new(bool irreversible, uint64_t id); Transaction* transaction_free(Transaction *tr); Transaction* transaction_abort_and_free(Transaction *tr); DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free); From d3da74696bf03df821c1d9bf20cfdee893437fbe Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sun, 5 Oct 2025 01:07:48 +0200 Subject: [PATCH 3/5] core: record transactions that have seen ordering cycles --- catalog/systemd.catalog.in | 11 ++++++----- src/core/manager.c | 2 ++ src/core/manager.h | 3 +++ src/core/transaction.c | 13 +++++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/catalog/systemd.catalog.in b/catalog/systemd.catalog.in index 4152112078..8172a6128a 100644 --- a/catalog/systemd.catalog.in +++ b/catalog/systemd.catalog.in @@ -938,11 +938,12 @@ Defined-By: systemd Support: %SUPPORT_URL% Documentation: man:systemd(1) -A unit transaction was initiated that contains an ordering cycle, i.e. some -unit that was requested to be started (either directly, or indirectly due to a -requirement dependency such as Wants= or Requires=) is ordered before some -other unit (via After=/Before=), but that latter unit is also ordered before -the former by some dependency (either directly or indirectly). +A unit transaction (with ID @TRANSACTION_ID@) was initiated that contains +an ordering cycle, i.e. some unit that was requested to be started +(either directly, or indirectly due to a requirement dependency such as +Wants= or Requires=) is ordered before some other unit (via After=/Before=), +but that latter unit is also ordered before the former by some dependency +(either directly or indirectly). Ordering cycles consist of at least two units, but might involve many more. They generally indicate a bug in the unit definitions, as a unit diff --git a/src/core/manager.c b/src/core/manager.c index bb2c1bece1..0a6ea1658f 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1659,6 +1659,8 @@ static void manager_clear_jobs_and_units(Manager *m) { m->n_running_jobs = 0; m->n_installed_jobs = 0; m->n_failed_jobs = 0; + + m->transactions_with_cycle = set_free(m->transactions_with_cycle); } Manager* manager_free(Manager *m) { diff --git a/src/core/manager.h b/src/core/manager.h index d0855260bf..980aaab5f6 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -238,6 +238,9 @@ typedef struct Manager { uint64_t last_transaction_id; + /* IDs of transactions that once encountered ordering cycle */ + Set *transactions_with_cycle; + sd_event_source *run_queue_event_source; char *notify_socket; diff --git a/src/core/transaction.c b/src/core/transaction.c index 34a0faf556..536f9d78c3 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -8,6 +8,7 @@ #include "bus-common-errors.h" #include "bus-error.h" #include "dbus-unit.h" +#include "hash-funcs.h" #include "manager.h" #include "set.h" #include "slice.h" @@ -15,6 +16,8 @@ #include "strv.h" #include "transaction.h" +#define CYCLIC_TRANSACTIONS_MAX 4096U + static bool job_matters_to_anchor(Job *job); static void transaction_unlink_job(Transaction *tr, Job *j, bool delete_dependencies); @@ -399,6 +402,16 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi LOG_MESSAGE_ID(SD_MESSAGE_UNIT_ORDERING_CYCLE_STR), LOG_ITEM("%s", strempty(unit_ids))); + if (set_size(j->manager->transactions_with_cycle) >= CYCLIC_TRANSACTIONS_MAX) + log_warning("Too many transactions with ordering cycle, suppressing record."); + else { + uint64_t *id_buf = newdup(uint64_t, &tr->id, 1); + if (!id_buf) + log_oom_warning(); + else + (void) set_ensure_consume(&j->manager->transactions_with_cycle, &uint64_hash_ops_value_free, id_buf); + } + if (delete) { const char *status; /* logging for j not k here to provide a consistent narrative */ From 58686034ebcf089f4c3e0abed37fe6df3a831692 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sun, 5 Oct 2025 01:46:40 +0200 Subject: [PATCH 4/5] core: expose transactions with ordering cycle Closes #3829 Alternative to #35417 I don't think the individual "WasOnDependencyCycle" attrs on units are particularly helpful and comprehensible, as it's really about the dep relationship between them. And as discussed, the dependency cycle is not something persistent, rather local to the currently loaded set of units and shall be reset with daemon-reload (see also https://github.com/systemd/systemd/issues/35642#issuecomment-2591296586). Hence, let's report system state as degraded and point users to the involved transactions when ordering cycles are encountered instead. Combined with log messages added in 6912eb315fabe0bbf25593ab897265fa79a7e24b it should achieve the goal of making ordering cycles more observable, while avoiding all sorts of subtle bookkeeping in the service manager. The degraded state can be reset via the existing ResetFailed() manager-wide method. --- man/org.freedesktop.systemd1.xml | 8 +++++++ man/systemctl.xml | 4 ++-- src/core/dbus-manager.c | 30 +++++++++++++++++++++++++ src/core/manager.c | 6 +++-- src/core/varlink-manager.c | 19 ++++++++++++++++ src/shared/varlink-io.systemd.Manager.c | 2 ++ 6 files changed, 65 insertions(+), 4 deletions(-) diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index f1f433bbac..8d16d6618d 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -413,6 +413,8 @@ node /org/freedesktop/systemd1 { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly u NFailedJobs = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly at TransactionsWithOrderingCycle = [...]; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly d Progress = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly as Environment = ['...', ...]; @@ -1103,6 +1105,8 @@ node /org/freedesktop/systemd1 { + + @@ -1809,6 +1813,9 @@ node /org/freedesktop/systemd1 { NFailedJobs encodes how many jobs have ever failed in total. + TransactionsWithOrderingCycle encodes IDs of transactions that encountered + ordering cycle. + Progress encodes boot progress as a floating point value between 0.0 and 1.0. This value begins at 0.0 at early-boot and ends at 1.0 when boot is finished and is based on the number of executed and queued jobs. After startup, this field is always 1.0 indicating a finished @@ -12465,6 +12472,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ DefaultRestrictSUIDSGID, RemoveSubgroupFromUnit(), and KillUnitSubgroup() were added in version 258. + TransactionsWithOrderingCycle was added in version 259. Unit Objects diff --git a/man/systemctl.xml b/man/systemctl.xml index 8fb17e284d..a290e990e5 100644 --- a/man/systemctl.xml +++ b/man/systemctl.xml @@ -225,8 +225,8 @@ Sun 2017-02-26 20:57:49 EST 2h 3min left Sun 2017-02-26 11:56:36 EST 6h ago Check whether any of the specified units is in the "failed" state. If no unit is specified, - check whether there are any failed units, which corresponds to the degraded state - returned by is-system-running. Returns an exit code 0 + check whether there are any failed units or ordering cycles, which corresponds to the degraded + state returned by is-system-running. Returns an exit code 0 if at least one has failed, non-zero otherwise. Unless is specified, this will also print the current unit or system state to standard output. diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 2610442384..ac203f2971 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -455,6 +455,35 @@ static int property_get_oom_score_adjust( return sd_bus_message_append(reply, "i", n); } +static int property_get_transactions_with_cycle( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + Manager *m = ASSERT_PTR(userdata); + int r; + + assert(bus); + assert(reply); + + r = sd_bus_message_open_container(reply, 'a', "t"); + if (r < 0) + return r; + + uint64_t *id; + SET_FOREACH(id, m->transactions_with_cycle) { + r = sd_bus_message_append_basic(reply, 't', id); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) { Unit *u; int r; @@ -2870,6 +2899,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("NJobs", "u", property_get_hashmap_size, offsetof(Manager, jobs), 0), SD_BUS_PROPERTY("NInstalledJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_installed_jobs), 0), SD_BUS_PROPERTY("NFailedJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_failed_jobs), 0), + SD_BUS_PROPERTY("TransactionsWithOrderingCycle", "at", property_get_transactions_with_cycle, 0, 0), SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0), SD_BUS_PROPERTY("Environment", "as", property_get_environment, 0, 0), SD_BUS_PROPERTY("ConfirmSpawn", "b", bus_property_get_bool, offsetof(Manager, confirm_spawn), SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/core/manager.c b/src/core/manager.c index 0a6ea1658f..acf837941e 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3675,6 +3675,8 @@ void manager_reset_failed(Manager *m) { HASHMAP_FOREACH(u, m->units) unit_reset_failed(u); + + m->transactions_with_cycle = set_free(m->transactions_with_cycle); } bool manager_unit_inactive_or_pending(Manager *m, const char *name) { @@ -4623,8 +4625,8 @@ ManagerState manager_state(Manager *m) { return MANAGER_MAINTENANCE; } - /* Are there any failed units? If so, we are in degraded mode */ - if (!set_isempty(m->failed_units)) + /* Are there any failed units or ordering cycles? If so, we are in degraded mode */ + if (!set_isempty(m->failed_units) || !set_isempty(m->transactions_with_cycle)) return MANAGER_DEGRADED; return MANAGER_RUNNING; diff --git a/src/core/varlink-manager.c b/src/core/varlink-manager.c index 3414ccb9ed..951e0b4b96 100644 --- a/src/core/varlink-manager.c +++ b/src/core/varlink-manager.c @@ -112,6 +112,24 @@ static int manager_context_build_json(sd_json_variant **ret, const char *name, v JSON_BUILD_PAIR_STRING_NON_EMPTY("ControlGroup", m->cgroup_root)); } +static int transactions_with_cycle_build_json(sd_json_variant **ret, const char *name, void *userdata) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; + const Set *ids = userdata; + int r; + + assert(ret); + + uint64_t *id; + SET_FOREACH(id, ids) { + r = sd_json_variant_append_arrayb(&v, SD_JSON_BUILD_UNSIGNED(*id)); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(v); + return 0; +} + static int manager_runtime_build_json(sd_json_variant **ret, const char *name, void *userdata) { Manager *m = ASSERT_PTR(userdata); dual_timestamp watchdog_last_ping; @@ -154,6 +172,7 @@ static int manager_runtime_build_json(sd_json_variant **ret, const char *name, v SD_JSON_BUILD_PAIR_UNSIGNED("NJobs", hashmap_size(m->jobs)), SD_JSON_BUILD_PAIR_UNSIGNED("NInstalledJobs", m->n_installed_jobs), SD_JSON_BUILD_PAIR_UNSIGNED("NFailedJobs", m->n_failed_jobs), + JSON_BUILD_PAIR_CALLBACK_NON_NULL("TransactionsWithOrderingCycle", transactions_with_cycle_build_json, m->transactions_with_cycle), SD_JSON_BUILD_PAIR_REAL("Progress", manager_get_progress(m)), JSON_BUILD_PAIR_DUAL_TIMESTAMP_NON_NULL("WatchdogLastPingTimestamp", watchdog_get_last_ping_as_dual_timestamp(&watchdog_last_ping)), SD_JSON_BUILD_PAIR_STRING("SystemState", manager_state_to_string(manager_state(m))), diff --git a/src/shared/varlink-io.systemd.Manager.c b/src/shared/varlink-io.systemd.Manager.c index cdcaa53b71..a31e0e688b 100644 --- a/src/shared/varlink-io.systemd.Manager.c +++ b/src/shared/varlink-io.systemd.Manager.c @@ -155,6 +155,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( SD_VARLINK_DEFINE_FIELD(NInstalledJobs, SD_VARLINK_INT, 0), SD_VARLINK_FIELD_COMMENT("The total amount of failed jobs"), SD_VARLINK_DEFINE_FIELD(NFailedJobs, SD_VARLINK_INT, 0), + SD_VARLINK_FIELD_COMMENT("IDs of transactions that encountered ordering cycle"), + SD_VARLINK_DEFINE_FIELD(TransactionsWithOrderingCycle, SD_VARLINK_INT, SD_VARLINK_ARRAY|SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("Boot progress as a floating point value between 0.0 and 1.0"), SD_VARLINK_DEFINE_FIELD(Progress, SD_VARLINK_FLOAT, 0), SD_VARLINK_FIELD_COMMENT("Timestamp when the hardware watchdog was last pinged"), From 397681405a2d6fadda744029bb6ab94f076ae6db Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Wed, 12 Nov 2025 22:46:40 +0100 Subject: [PATCH 5/5] TEST-03-JOBS: add test for TransactionsWithOrderingCycle --- test/units/TEST-03-JOBS.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/units/TEST-03-JOBS.sh b/test/units/TEST-03-JOBS.sh index 719c16b986..4b8b67bd57 100755 --- a/test/units/TEST-03-JOBS.sh +++ b/test/units/TEST-03-JOBS.sh @@ -124,6 +124,13 @@ for i in {0..19}; do systemctl start "transaction-cycle$i.service" done +IDS_FILE="/tmp/TEST-03-JOBS-CYCLE-IDS-$RANDOM" +varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Manager.Describe '{}' | jq '.runtime.TransactionsWithOrderingCycle' >"$IDS_FILE" +[[ "$(jq length "$IDS_FILE")" -ge 20 ]] +for i in {0..19}; do + journalctl -b TRANSACTION_ID="$(jq -r ".[$i]" "$IDS_FILE")" --grep "cycle starting with" +done + # Test PropagatesStopTo= when restart (issue #26839) systemctl start propagatestopto-and-pullin.target systemctl --quiet is-active propagatestopto-and-pullin.target