diff --git a/catalog/systemd.catalog.in b/catalog/systemd.catalog.in index 4152112078..8172a6128a 100644 --- a/catalog/systemd.catalog.in +++ b/catalog/systemd.catalog.in @@ -938,11 +938,12 @@ Defined-By: systemd Support: %SUPPORT_URL% Documentation: man:systemd(1) -A unit transaction was initiated that contains an ordering cycle, i.e. some -unit that was requested to be started (either directly, or indirectly due to a -requirement dependency such as Wants= or Requires=) is ordered before some -other unit (via After=/Before=), but that latter unit is also ordered before -the former by some dependency (either directly or indirectly). +A unit transaction (with ID @TRANSACTION_ID@) was initiated that contains +an ordering cycle, i.e. some unit that was requested to be started +(either directly, or indirectly due to a requirement dependency such as +Wants= or Requires=) is ordered before some other unit (via After=/Before=), +but that latter unit is also ordered before the former by some dependency +(either directly or indirectly). Ordering cycles consist of at least two units, but might involve many more. They generally indicate a bug in the unit definitions, as a unit diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index f1f433bbac..8d16d6618d 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -413,6 +413,8 @@ node /org/freedesktop/systemd1 { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly u NFailedJobs = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly at TransactionsWithOrderingCycle = [...]; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly d Progress = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly as Environment = ['...', ...]; @@ -1103,6 +1105,8 @@ node /org/freedesktop/systemd1 { + + @@ -1809,6 +1813,9 @@ node /org/freedesktop/systemd1 { NFailedJobs encodes how many jobs have ever failed in total. + TransactionsWithOrderingCycle encodes IDs of transactions that encountered + ordering cycle. + Progress encodes boot progress as a floating point value between 0.0 and 1.0. This value begins at 0.0 at early-boot and ends at 1.0 when boot is finished and is based on the number of executed and queued jobs. After startup, this field is always 1.0 indicating a finished @@ -12465,6 +12472,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ DefaultRestrictSUIDSGID, RemoveSubgroupFromUnit(), and KillUnitSubgroup() were added in version 258. + TransactionsWithOrderingCycle was added in version 259. Unit Objects diff --git a/man/systemctl.xml b/man/systemctl.xml index 8fb17e284d..a290e990e5 100644 --- a/man/systemctl.xml +++ b/man/systemctl.xml @@ -225,8 +225,8 @@ Sun 2017-02-26 20:57:49 EST 2h 3min left Sun 2017-02-26 11:56:36 EST 6h ago Check whether any of the specified units is in the "failed" state. If no unit is specified, - check whether there are any failed units, which corresponds to the degraded state - returned by is-system-running. Returns an exit code 0 + check whether there are any failed units or ordering cycles, which corresponds to the degraded + state returned by is-system-running. Returns an exit code 0 if at least one has failed, non-zero otherwise. Unless is specified, this will also print the current unit or system state to standard output. diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 2610442384..ac203f2971 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -455,6 +455,35 @@ static int property_get_oom_score_adjust( return sd_bus_message_append(reply, "i", n); } +static int property_get_transactions_with_cycle( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + Manager *m = ASSERT_PTR(userdata); + int r; + + assert(bus); + assert(reply); + + r = sd_bus_message_open_container(reply, 'a', "t"); + if (r < 0) + return r; + + uint64_t *id; + SET_FOREACH(id, m->transactions_with_cycle) { + r = sd_bus_message_append_basic(reply, 't', id); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) { Unit *u; int r; @@ -2870,6 +2899,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("NJobs", "u", property_get_hashmap_size, offsetof(Manager, jobs), 0), SD_BUS_PROPERTY("NInstalledJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_installed_jobs), 0), SD_BUS_PROPERTY("NFailedJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_failed_jobs), 0), + SD_BUS_PROPERTY("TransactionsWithOrderingCycle", "at", property_get_transactions_with_cycle, 0, 0), SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0), SD_BUS_PROPERTY("Environment", "as", property_get_environment, 0, 0), SD_BUS_PROPERTY("ConfirmSpawn", "b", bus_property_get_bool, offsetof(Manager, confirm_spawn), SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/core/manager-serialize.c b/src/core/manager-serialize.c index 057ca9ad49..df97fde97e 100644 --- a/src/core/manager-serialize.c +++ b/src/core/manager-serialize.c @@ -92,6 +92,8 @@ int manager_serialize( _cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m); + (void) serialize_item_format(f, "last-transaction-id", "%" PRIu64, m->last_transaction_id); + (void) serialize_item_format(f, "current-job-id", "%" PRIu32, m->current_job_id); (void) serialize_item_format(f, "n-installed-jobs", "%u", m->n_installed_jobs); (void) serialize_item_format(f, "n-failed-jobs", "%u", m->n_failed_jobs); @@ -283,7 +285,7 @@ static void manager_deserialize_gid_refs_one(Manager *m, const char *value) { int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { bool deserialize_varlink_sockets = false; - int r = 0; + int r; assert(m); assert(f); @@ -325,7 +327,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { if (r == 0) /* eof or end marker */ break; - if ((val = startswith(l, "current-job-id="))) { + if ((val = startswith(l, "last-transaction-id="))) { + uint64_t id; + + if (safe_atou64(val, &id) < 0) + log_notice("Failed to parse last transaction id value '%s', ignoring.", val); + else + m->last_transaction_id = MAX(m->last_transaction_id, id); + + } else if ((val = startswith(l, "current-job-id="))) { uint32_t id; if (safe_atou32(val, &id) < 0) @@ -350,22 +360,18 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { m->n_failed_jobs += n; } else if ((val = startswith(l, "taint-logged="))) { - int b; - - b = parse_boolean(val); - if (b < 0) + r = parse_boolean(val); + if (r < 0) log_notice("Failed to parse taint-logged flag '%s', ignoring.", val); else - m->taint_logged = m->taint_logged || b; + m->taint_logged = m->taint_logged || r; } else if ((val = startswith(l, "service-watchdogs="))) { - int b; - - b = parse_boolean(val); - if (b < 0) + r = parse_boolean(val); + if (r < 0) log_notice("Failed to parse service-watchdogs flag '%s', ignoring.", val); else - m->service_watchdogs = b; + m->service_watchdogs = r; } else if ((val = startswith(l, "show-status-overridden="))) { ShowStatus s; diff --git a/src/core/manager.c b/src/core/manager.c index deed863e18..6e4510939c 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1660,6 +1660,8 @@ static void manager_clear_jobs_and_units(Manager *m) { m->n_running_jobs = 0; m->n_installed_jobs = 0; m->n_failed_jobs = 0; + + m->transactions_with_cycle = set_free(m->transactions_with_cycle); } Manager* manager_free(Manager *m) { @@ -2154,14 +2156,16 @@ int manager_add_job_full( if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START) return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start."); + tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY, ++m->last_transaction_id); + if (!tr) + return -ENOMEM; + + LOG_CONTEXT_PUSHF("TRANSACTION_ID=%" PRIu64, tr->id); + log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode)); type = job_type_collapse(type, unit); - tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY); - if (!tr) - return -ENOMEM; - r = transaction_add_job_and_dependencies( tr, type, @@ -2248,18 +2252,20 @@ int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, } int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e) { - int r; _cleanup_(transaction_abort_and_freep) Transaction *tr = NULL; + int r; assert(m); assert(unit); assert(mode < _JOB_MODE_MAX); assert(mode != JOB_ISOLATE); /* Isolate is only valid for start */ - tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY); + tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY, ++m->last_transaction_id); if (!tr) return -ENOMEM; + LOG_CONTEXT_PUSHF("TRANSACTION_ID=%" PRIu64, tr->id); + /* We need an anchor job */ r = transaction_add_job_and_dependencies(tr, JOB_NOP, unit, NULL, TRANSACTION_IGNORE_REQUIREMENTS|TRANSACTION_IGNORE_ORDER, e); if (r < 0) @@ -3670,6 +3676,8 @@ void manager_reset_failed(Manager *m) { HASHMAP_FOREACH(u, m->units) unit_reset_failed(u); + + m->transactions_with_cycle = set_free(m->transactions_with_cycle); } bool manager_unit_inactive_or_pending(Manager *m, const char *name) { @@ -4618,8 +4626,8 @@ ManagerState manager_state(Manager *m) { return MANAGER_MAINTENANCE; } - /* Are there any failed units? If so, we are in degraded mode */ - if (!set_isempty(m->failed_units)) + /* Are there any failed units or ordering cycles? If so, we are in degraded mode */ + if (!set_isempty(m->failed_units) || !set_isempty(m->transactions_with_cycle)) return MANAGER_DEGRADED; return MANAGER_RUNNING; diff --git a/src/core/manager.h b/src/core/manager.h index b7ee766b05..980aaab5f6 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -236,6 +236,11 @@ typedef struct Manager { /* A set which contains all currently failed units */ Set *failed_units; + uint64_t last_transaction_id; + + /* IDs of transactions that once encountered ordering cycle */ + Set *transactions_with_cycle; + sd_event_source *run_queue_event_source; char *notify_socket; diff --git a/src/core/transaction.c b/src/core/transaction.c index 016bbb1f97..536f9d78c3 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -8,6 +8,7 @@ #include "bus-common-errors.h" #include "bus-error.h" #include "dbus-unit.h" +#include "hash-funcs.h" #include "manager.h" #include "set.h" #include "slice.h" @@ -15,6 +16,8 @@ #include "strv.h" #include "transaction.h" +#define CYCLIC_TRANSACTIONS_MAX 4096U + static bool job_matters_to_anchor(Job *job); static void transaction_unlink_job(Transaction *tr, Job *j, bool delete_dependencies); @@ -399,6 +402,16 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi LOG_MESSAGE_ID(SD_MESSAGE_UNIT_ORDERING_CYCLE_STR), LOG_ITEM("%s", strempty(unit_ids))); + if (set_size(j->manager->transactions_with_cycle) >= CYCLIC_TRANSACTIONS_MAX) + log_warning("Too many transactions with ordering cycle, suppressing record."); + else { + uint64_t *id_buf = newdup(uint64_t, &tr->id, 1); + if (!id_buf) + log_oom_warning(); + else + (void) set_ensure_consume(&j->manager->transactions_with_cycle, &uint64_hash_ops_value_free, id_buf); + } + if (delete) { const char *status; /* logging for j not k here to provide a consistent narrative */ @@ -1235,20 +1248,24 @@ int transaction_add_triggering_jobs(Transaction *tr, Unit *u) { return 0; } -Transaction* transaction_new(bool irreversible) { - Transaction *tr; +Transaction* transaction_new(bool irreversible, uint64_t id) { + _cleanup_free_ Transaction *tr = NULL; - tr = new0(Transaction, 1); + assert(id != 0); + + tr = new(Transaction, 1); if (!tr) return NULL; - tr->jobs = hashmap_new(NULL); + *tr = (Transaction) { + .jobs = hashmap_new(NULL), + .irreversible = irreversible, + .id = id, + }; if (!tr->jobs) - return mfree(tr); + return NULL; - tr->irreversible = irreversible; - - return tr; + return TAKE_PTR(tr); } Transaction* transaction_free(Transaction *tr) { diff --git a/src/core/transaction.h b/src/core/transaction.h index 4f5430d6d4..275dc5984f 100644 --- a/src/core/transaction.h +++ b/src/core/transaction.h @@ -8,9 +8,11 @@ typedef struct Transaction { Hashmap *jobs; /* Unit object => Job object list 1:1 */ Job *anchor_job; /* The job the user asked for */ bool irreversible; + + uint64_t id; } Transaction; -Transaction* transaction_new(bool irreversible); +Transaction* transaction_new(bool irreversible, uint64_t id); Transaction* transaction_free(Transaction *tr); Transaction* transaction_abort_and_free(Transaction *tr); DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free); diff --git a/src/core/varlink-manager.c b/src/core/varlink-manager.c index 3414ccb9ed..951e0b4b96 100644 --- a/src/core/varlink-manager.c +++ b/src/core/varlink-manager.c @@ -112,6 +112,24 @@ static int manager_context_build_json(sd_json_variant **ret, const char *name, v JSON_BUILD_PAIR_STRING_NON_EMPTY("ControlGroup", m->cgroup_root)); } +static int transactions_with_cycle_build_json(sd_json_variant **ret, const char *name, void *userdata) { + _cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL; + const Set *ids = userdata; + int r; + + assert(ret); + + uint64_t *id; + SET_FOREACH(id, ids) { + r = sd_json_variant_append_arrayb(&v, SD_JSON_BUILD_UNSIGNED(*id)); + if (r < 0) + return r; + } + + *ret = TAKE_PTR(v); + return 0; +} + static int manager_runtime_build_json(sd_json_variant **ret, const char *name, void *userdata) { Manager *m = ASSERT_PTR(userdata); dual_timestamp watchdog_last_ping; @@ -154,6 +172,7 @@ static int manager_runtime_build_json(sd_json_variant **ret, const char *name, v SD_JSON_BUILD_PAIR_UNSIGNED("NJobs", hashmap_size(m->jobs)), SD_JSON_BUILD_PAIR_UNSIGNED("NInstalledJobs", m->n_installed_jobs), SD_JSON_BUILD_PAIR_UNSIGNED("NFailedJobs", m->n_failed_jobs), + JSON_BUILD_PAIR_CALLBACK_NON_NULL("TransactionsWithOrderingCycle", transactions_with_cycle_build_json, m->transactions_with_cycle), SD_JSON_BUILD_PAIR_REAL("Progress", manager_get_progress(m)), JSON_BUILD_PAIR_DUAL_TIMESTAMP_NON_NULL("WatchdogLastPingTimestamp", watchdog_get_last_ping_as_dual_timestamp(&watchdog_last_ping)), SD_JSON_BUILD_PAIR_STRING("SystemState", manager_state_to_string(manager_state(m))), diff --git a/src/shared/varlink-io.systemd.Manager.c b/src/shared/varlink-io.systemd.Manager.c index cdcaa53b71..a31e0e688b 100644 --- a/src/shared/varlink-io.systemd.Manager.c +++ b/src/shared/varlink-io.systemd.Manager.c @@ -155,6 +155,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( SD_VARLINK_DEFINE_FIELD(NInstalledJobs, SD_VARLINK_INT, 0), SD_VARLINK_FIELD_COMMENT("The total amount of failed jobs"), SD_VARLINK_DEFINE_FIELD(NFailedJobs, SD_VARLINK_INT, 0), + SD_VARLINK_FIELD_COMMENT("IDs of transactions that encountered ordering cycle"), + SD_VARLINK_DEFINE_FIELD(TransactionsWithOrderingCycle, SD_VARLINK_INT, SD_VARLINK_ARRAY|SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("Boot progress as a floating point value between 0.0 and 1.0"), SD_VARLINK_DEFINE_FIELD(Progress, SD_VARLINK_FLOAT, 0), SD_VARLINK_FIELD_COMMENT("Timestamp when the hardware watchdog was last pinged"), diff --git a/test/units/TEST-03-JOBS.sh b/test/units/TEST-03-JOBS.sh index 719c16b986..4b8b67bd57 100755 --- a/test/units/TEST-03-JOBS.sh +++ b/test/units/TEST-03-JOBS.sh @@ -124,6 +124,13 @@ for i in {0..19}; do systemctl start "transaction-cycle$i.service" done +IDS_FILE="/tmp/TEST-03-JOBS-CYCLE-IDS-$RANDOM" +varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Manager.Describe '{}' | jq '.runtime.TransactionsWithOrderingCycle' >"$IDS_FILE" +[[ "$(jq length "$IDS_FILE")" -ge 20 ]] +for i in {0..19}; do + journalctl -b TRANSACTION_ID="$(jq -r ".[$i]" "$IDS_FILE")" --grep "cycle starting with" +done + # Test PropagatesStopTo= when restart (issue #26839) systemctl start propagatestopto-and-pullin.target systemctl --quiet is-active propagatestopto-and-pullin.target