mirror of
https://github.com/morgan9e/systemd
synced 2026-04-14 00:14:32 +09:00
core: record transactions that have seen ordering cycles and expose them via IPC (#39210)
Closes #3829.
This commit is contained in:
@@ -938,11 +938,12 @@ Defined-By: systemd
|
||||
Support: %SUPPORT_URL%
|
||||
Documentation: man:systemd(1)
|
||||
|
||||
A unit transaction was initiated that contains an ordering cycle, i.e. some
|
||||
unit that was requested to be started (either directly, or indirectly due to a
|
||||
requirement dependency such as Wants= or Requires=) is ordered before some
|
||||
other unit (via After=/Before=), but that latter unit is also ordered before
|
||||
the former by some dependency (either directly or indirectly).
|
||||
A unit transaction (with ID @TRANSACTION_ID@) was initiated that contains
|
||||
an ordering cycle, i.e. some unit that was requested to be started
|
||||
(either directly, or indirectly due to a requirement dependency such as
|
||||
Wants= or Requires=) is ordered before some other unit (via After=/Before=),
|
||||
but that latter unit is also ordered before the former by some dependency
|
||||
(either directly or indirectly).
|
||||
|
||||
Ordering cycles consist of at least two units, but might involve many
|
||||
more. They generally indicate a bug in the unit definitions, as a unit
|
||||
|
||||
@@ -413,6 +413,8 @@ node /org/freedesktop/systemd1 {
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly u NFailedJobs = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly at TransactionsWithOrderingCycle = [...];
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly d Progress = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
|
||||
readonly as Environment = ['...', ...];
|
||||
@@ -1103,6 +1105,8 @@ node /org/freedesktop/systemd1 {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="NFailedJobs"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="TransactionsWithOrderingCycle"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="Progress"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="Environment"/>
|
||||
@@ -1809,6 +1813,9 @@ node /org/freedesktop/systemd1 {
|
||||
|
||||
<para><varname>NFailedJobs</varname> encodes how many jobs have ever failed in total.</para>
|
||||
|
||||
<para><varname>TransactionsWithOrderingCycle</varname> encodes IDs of transactions that encountered
|
||||
ordering cycle.</para>
|
||||
|
||||
<para><varname>Progress</varname> encodes boot progress as a floating point value between 0.0 and
|
||||
1.0. This value begins at 0.0 at early-boot and ends at 1.0 when boot is finished and is based on the
|
||||
number of executed and queued jobs. After startup, this field is always 1.0 indicating a finished
|
||||
@@ -12465,6 +12472,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<para><varname>DefaultRestrictSUIDSGID</varname>,
|
||||
<function>RemoveSubgroupFromUnit()</function>, and
|
||||
<function>KillUnitSubgroup()</function> were added in version 258.</para>
|
||||
<para><varname>TransactionsWithOrderingCycle</varname> was added in version 259.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Unit Objects</title>
|
||||
|
||||
@@ -225,8 +225,8 @@ Sun 2017-02-26 20:57:49 EST 2h 3min left Sun 2017-02-26 11:56:36 EST 6h ago
|
||||
|
||||
<listitem>
|
||||
<para>Check whether any of the specified units is in the "failed" state. If no unit is specified,
|
||||
check whether there are any failed units, which corresponds to the <literal>degraded</literal> state
|
||||
returned by <command>is-system-running</command>. Returns an exit code <constant>0</constant>
|
||||
check whether there are any failed units or ordering cycles, which corresponds to the <literal>degraded</literal>
|
||||
state returned by <command>is-system-running</command>. Returns an exit code <constant>0</constant>
|
||||
if at least one has failed, non-zero otherwise. Unless <option>--quiet</option> is specified, this
|
||||
will also print the current unit or system state to standard output.</para>
|
||||
|
||||
|
||||
@@ -455,6 +455,35 @@ static int property_get_oom_score_adjust(
|
||||
return sd_bus_message_append(reply, "i", n);
|
||||
}
|
||||
|
||||
static int property_get_transactions_with_cycle(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
const char *interface,
|
||||
const char *property,
|
||||
sd_bus_message *reply,
|
||||
void *userdata,
|
||||
sd_bus_error *error) {
|
||||
|
||||
Manager *m = ASSERT_PTR(userdata);
|
||||
int r;
|
||||
|
||||
assert(bus);
|
||||
assert(reply);
|
||||
|
||||
r = sd_bus_message_open_container(reply, 'a', "t");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
uint64_t *id;
|
||||
SET_FOREACH(id, m->transactions_with_cycle) {
|
||||
r = sd_bus_message_append_basic(reply, 't', id);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return sd_bus_message_close_container(reply);
|
||||
}
|
||||
|
||||
static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) {
|
||||
Unit *u;
|
||||
int r;
|
||||
@@ -2870,6 +2899,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
|
||||
SD_BUS_PROPERTY("NJobs", "u", property_get_hashmap_size, offsetof(Manager, jobs), 0),
|
||||
SD_BUS_PROPERTY("NInstalledJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_installed_jobs), 0),
|
||||
SD_BUS_PROPERTY("NFailedJobs", "u", bus_property_get_unsigned, offsetof(Manager, n_failed_jobs), 0),
|
||||
SD_BUS_PROPERTY("TransactionsWithOrderingCycle", "at", property_get_transactions_with_cycle, 0, 0),
|
||||
SD_BUS_PROPERTY("Progress", "d", property_get_progress, 0, 0),
|
||||
SD_BUS_PROPERTY("Environment", "as", property_get_environment, 0, 0),
|
||||
SD_BUS_PROPERTY("ConfirmSpawn", "b", bus_property_get_bool, offsetof(Manager, confirm_spawn), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
|
||||
@@ -92,6 +92,8 @@ int manager_serialize(
|
||||
|
||||
_cleanup_(manager_reloading_stopp) _unused_ Manager *reloading = manager_reloading_start(m);
|
||||
|
||||
(void) serialize_item_format(f, "last-transaction-id", "%" PRIu64, m->last_transaction_id);
|
||||
|
||||
(void) serialize_item_format(f, "current-job-id", "%" PRIu32, m->current_job_id);
|
||||
(void) serialize_item_format(f, "n-installed-jobs", "%u", m->n_installed_jobs);
|
||||
(void) serialize_item_format(f, "n-failed-jobs", "%u", m->n_failed_jobs);
|
||||
@@ -283,7 +285,7 @@ static void manager_deserialize_gid_refs_one(Manager *m, const char *value) {
|
||||
|
||||
int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
|
||||
bool deserialize_varlink_sockets = false;
|
||||
int r = 0;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
assert(f);
|
||||
@@ -325,7 +327,15 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
|
||||
if (r == 0) /* eof or end marker */
|
||||
break;
|
||||
|
||||
if ((val = startswith(l, "current-job-id="))) {
|
||||
if ((val = startswith(l, "last-transaction-id="))) {
|
||||
uint64_t id;
|
||||
|
||||
if (safe_atou64(val, &id) < 0)
|
||||
log_notice("Failed to parse last transaction id value '%s', ignoring.", val);
|
||||
else
|
||||
m->last_transaction_id = MAX(m->last_transaction_id, id);
|
||||
|
||||
} else if ((val = startswith(l, "current-job-id="))) {
|
||||
uint32_t id;
|
||||
|
||||
if (safe_atou32(val, &id) < 0)
|
||||
@@ -350,22 +360,18 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
|
||||
m->n_failed_jobs += n;
|
||||
|
||||
} else if ((val = startswith(l, "taint-logged="))) {
|
||||
int b;
|
||||
|
||||
b = parse_boolean(val);
|
||||
if (b < 0)
|
||||
r = parse_boolean(val);
|
||||
if (r < 0)
|
||||
log_notice("Failed to parse taint-logged flag '%s', ignoring.", val);
|
||||
else
|
||||
m->taint_logged = m->taint_logged || b;
|
||||
m->taint_logged = m->taint_logged || r;
|
||||
|
||||
} else if ((val = startswith(l, "service-watchdogs="))) {
|
||||
int b;
|
||||
|
||||
b = parse_boolean(val);
|
||||
if (b < 0)
|
||||
r = parse_boolean(val);
|
||||
if (r < 0)
|
||||
log_notice("Failed to parse service-watchdogs flag '%s', ignoring.", val);
|
||||
else
|
||||
m->service_watchdogs = b;
|
||||
m->service_watchdogs = r;
|
||||
|
||||
} else if ((val = startswith(l, "show-status-overridden="))) {
|
||||
ShowStatus s;
|
||||
|
||||
@@ -1660,6 +1660,8 @@ static void manager_clear_jobs_and_units(Manager *m) {
|
||||
m->n_running_jobs = 0;
|
||||
m->n_installed_jobs = 0;
|
||||
m->n_failed_jobs = 0;
|
||||
|
||||
m->transactions_with_cycle = set_free(m->transactions_with_cycle);
|
||||
}
|
||||
|
||||
Manager* manager_free(Manager *m) {
|
||||
@@ -2154,14 +2156,16 @@ int manager_add_job_full(
|
||||
if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START)
|
||||
return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start.");
|
||||
|
||||
tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY, ++m->last_transaction_id);
|
||||
if (!tr)
|
||||
return -ENOMEM;
|
||||
|
||||
LOG_CONTEXT_PUSHF("TRANSACTION_ID=%" PRIu64, tr->id);
|
||||
|
||||
log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
|
||||
|
||||
type = job_type_collapse(type, unit);
|
||||
|
||||
tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
|
||||
if (!tr)
|
||||
return -ENOMEM;
|
||||
|
||||
r = transaction_add_job_and_dependencies(
|
||||
tr,
|
||||
type,
|
||||
@@ -2248,18 +2252,20 @@ int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name,
|
||||
}
|
||||
|
||||
int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e) {
|
||||
int r;
|
||||
_cleanup_(transaction_abort_and_freep) Transaction *tr = NULL;
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
assert(unit);
|
||||
assert(mode < _JOB_MODE_MAX);
|
||||
assert(mode != JOB_ISOLATE); /* Isolate is only valid for start */
|
||||
|
||||
tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY);
|
||||
tr = transaction_new(mode == JOB_REPLACE_IRREVERSIBLY, ++m->last_transaction_id);
|
||||
if (!tr)
|
||||
return -ENOMEM;
|
||||
|
||||
LOG_CONTEXT_PUSHF("TRANSACTION_ID=%" PRIu64, tr->id);
|
||||
|
||||
/* We need an anchor job */
|
||||
r = transaction_add_job_and_dependencies(tr, JOB_NOP, unit, NULL, TRANSACTION_IGNORE_REQUIREMENTS|TRANSACTION_IGNORE_ORDER, e);
|
||||
if (r < 0)
|
||||
@@ -3670,6 +3676,8 @@ void manager_reset_failed(Manager *m) {
|
||||
|
||||
HASHMAP_FOREACH(u, m->units)
|
||||
unit_reset_failed(u);
|
||||
|
||||
m->transactions_with_cycle = set_free(m->transactions_with_cycle);
|
||||
}
|
||||
|
||||
bool manager_unit_inactive_or_pending(Manager *m, const char *name) {
|
||||
@@ -4618,8 +4626,8 @@ ManagerState manager_state(Manager *m) {
|
||||
return MANAGER_MAINTENANCE;
|
||||
}
|
||||
|
||||
/* Are there any failed units? If so, we are in degraded mode */
|
||||
if (!set_isempty(m->failed_units))
|
||||
/* Are there any failed units or ordering cycles? If so, we are in degraded mode */
|
||||
if (!set_isempty(m->failed_units) || !set_isempty(m->transactions_with_cycle))
|
||||
return MANAGER_DEGRADED;
|
||||
|
||||
return MANAGER_RUNNING;
|
||||
|
||||
@@ -236,6 +236,11 @@ typedef struct Manager {
|
||||
/* A set which contains all currently failed units */
|
||||
Set *failed_units;
|
||||
|
||||
uint64_t last_transaction_id;
|
||||
|
||||
/* IDs of transactions that once encountered ordering cycle */
|
||||
Set *transactions_with_cycle;
|
||||
|
||||
sd_event_source *run_queue_event_source;
|
||||
|
||||
char *notify_socket;
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "bus-common-errors.h"
|
||||
#include "bus-error.h"
|
||||
#include "dbus-unit.h"
|
||||
#include "hash-funcs.h"
|
||||
#include "manager.h"
|
||||
#include "set.h"
|
||||
#include "slice.h"
|
||||
@@ -15,6 +16,8 @@
|
||||
#include "strv.h"
|
||||
#include "transaction.h"
|
||||
|
||||
#define CYCLIC_TRANSACTIONS_MAX 4096U
|
||||
|
||||
static bool job_matters_to_anchor(Job *job);
|
||||
static void transaction_unlink_job(Transaction *tr, Job *j, bool delete_dependencies);
|
||||
|
||||
@@ -399,6 +402,16 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi
|
||||
LOG_MESSAGE_ID(SD_MESSAGE_UNIT_ORDERING_CYCLE_STR),
|
||||
LOG_ITEM("%s", strempty(unit_ids)));
|
||||
|
||||
if (set_size(j->manager->transactions_with_cycle) >= CYCLIC_TRANSACTIONS_MAX)
|
||||
log_warning("Too many transactions with ordering cycle, suppressing record.");
|
||||
else {
|
||||
uint64_t *id_buf = newdup(uint64_t, &tr->id, 1);
|
||||
if (!id_buf)
|
||||
log_oom_warning();
|
||||
else
|
||||
(void) set_ensure_consume(&j->manager->transactions_with_cycle, &uint64_hash_ops_value_free, id_buf);
|
||||
}
|
||||
|
||||
if (delete) {
|
||||
const char *status;
|
||||
/* logging for j not k here to provide a consistent narrative */
|
||||
@@ -1235,20 +1248,24 @@ int transaction_add_triggering_jobs(Transaction *tr, Unit *u) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Transaction* transaction_new(bool irreversible) {
|
||||
Transaction *tr;
|
||||
Transaction* transaction_new(bool irreversible, uint64_t id) {
|
||||
_cleanup_free_ Transaction *tr = NULL;
|
||||
|
||||
tr = new0(Transaction, 1);
|
||||
assert(id != 0);
|
||||
|
||||
tr = new(Transaction, 1);
|
||||
if (!tr)
|
||||
return NULL;
|
||||
|
||||
tr->jobs = hashmap_new(NULL);
|
||||
*tr = (Transaction) {
|
||||
.jobs = hashmap_new(NULL),
|
||||
.irreversible = irreversible,
|
||||
.id = id,
|
||||
};
|
||||
if (!tr->jobs)
|
||||
return mfree(tr);
|
||||
return NULL;
|
||||
|
||||
tr->irreversible = irreversible;
|
||||
|
||||
return tr;
|
||||
return TAKE_PTR(tr);
|
||||
}
|
||||
|
||||
Transaction* transaction_free(Transaction *tr) {
|
||||
|
||||
@@ -8,9 +8,11 @@ typedef struct Transaction {
|
||||
Hashmap *jobs; /* Unit object => Job object list 1:1 */
|
||||
Job *anchor_job; /* The job the user asked for */
|
||||
bool irreversible;
|
||||
|
||||
uint64_t id;
|
||||
} Transaction;
|
||||
|
||||
Transaction* transaction_new(bool irreversible);
|
||||
Transaction* transaction_new(bool irreversible, uint64_t id);
|
||||
Transaction* transaction_free(Transaction *tr);
|
||||
Transaction* transaction_abort_and_free(Transaction *tr);
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free);
|
||||
|
||||
@@ -112,6 +112,24 @@ static int manager_context_build_json(sd_json_variant **ret, const char *name, v
|
||||
JSON_BUILD_PAIR_STRING_NON_EMPTY("ControlGroup", m->cgroup_root));
|
||||
}
|
||||
|
||||
static int transactions_with_cycle_build_json(sd_json_variant **ret, const char *name, void *userdata) {
|
||||
_cleanup_(sd_json_variant_unrefp) sd_json_variant *v = NULL;
|
||||
const Set *ids = userdata;
|
||||
int r;
|
||||
|
||||
assert(ret);
|
||||
|
||||
uint64_t *id;
|
||||
SET_FOREACH(id, ids) {
|
||||
r = sd_json_variant_append_arrayb(&v, SD_JSON_BUILD_UNSIGNED(*id));
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
*ret = TAKE_PTR(v);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int manager_runtime_build_json(sd_json_variant **ret, const char *name, void *userdata) {
|
||||
Manager *m = ASSERT_PTR(userdata);
|
||||
dual_timestamp watchdog_last_ping;
|
||||
@@ -154,6 +172,7 @@ static int manager_runtime_build_json(sd_json_variant **ret, const char *name, v
|
||||
SD_JSON_BUILD_PAIR_UNSIGNED("NJobs", hashmap_size(m->jobs)),
|
||||
SD_JSON_BUILD_PAIR_UNSIGNED("NInstalledJobs", m->n_installed_jobs),
|
||||
SD_JSON_BUILD_PAIR_UNSIGNED("NFailedJobs", m->n_failed_jobs),
|
||||
JSON_BUILD_PAIR_CALLBACK_NON_NULL("TransactionsWithOrderingCycle", transactions_with_cycle_build_json, m->transactions_with_cycle),
|
||||
SD_JSON_BUILD_PAIR_REAL("Progress", manager_get_progress(m)),
|
||||
JSON_BUILD_PAIR_DUAL_TIMESTAMP_NON_NULL("WatchdogLastPingTimestamp", watchdog_get_last_ping_as_dual_timestamp(&watchdog_last_ping)),
|
||||
SD_JSON_BUILD_PAIR_STRING("SystemState", manager_state_to_string(manager_state(m))),
|
||||
|
||||
@@ -155,6 +155,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE(
|
||||
SD_VARLINK_DEFINE_FIELD(NInstalledJobs, SD_VARLINK_INT, 0),
|
||||
SD_VARLINK_FIELD_COMMENT("The total amount of failed jobs"),
|
||||
SD_VARLINK_DEFINE_FIELD(NFailedJobs, SD_VARLINK_INT, 0),
|
||||
SD_VARLINK_FIELD_COMMENT("IDs of transactions that encountered ordering cycle"),
|
||||
SD_VARLINK_DEFINE_FIELD(TransactionsWithOrderingCycle, SD_VARLINK_INT, SD_VARLINK_ARRAY|SD_VARLINK_NULLABLE),
|
||||
SD_VARLINK_FIELD_COMMENT("Boot progress as a floating point value between 0.0 and 1.0"),
|
||||
SD_VARLINK_DEFINE_FIELD(Progress, SD_VARLINK_FLOAT, 0),
|
||||
SD_VARLINK_FIELD_COMMENT("Timestamp when the hardware watchdog was last pinged"),
|
||||
|
||||
@@ -124,6 +124,13 @@ for i in {0..19}; do
|
||||
systemctl start "transaction-cycle$i.service"
|
||||
done
|
||||
|
||||
IDS_FILE="/tmp/TEST-03-JOBS-CYCLE-IDS-$RANDOM"
|
||||
varlinkctl call /run/systemd/io.systemd.Manager io.systemd.Manager.Describe '{}' | jq '.runtime.TransactionsWithOrderingCycle' >"$IDS_FILE"
|
||||
[[ "$(jq length "$IDS_FILE")" -ge 20 ]]
|
||||
for i in {0..19}; do
|
||||
journalctl -b TRANSACTION_ID="$(jq -r ".[$i]" "$IDS_FILE")" --grep "cycle starting with"
|
||||
done
|
||||
|
||||
# Test PropagatesStopTo= when restart (issue #26839)
|
||||
systemctl start propagatestopto-and-pullin.target
|
||||
systemctl --quiet is-active propagatestopto-and-pullin.target
|
||||
|
||||
Reference in New Issue
Block a user