From efa736d3835583a0464b404ae88945d55a180d92 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Wed, 7 Dec 2022 16:54:13 +0700 Subject: [PATCH 1/7] sleep: always thaw user.slice even if freezing failed `FreezeUnit` can fail even when some units did got frozen, causing some user units to be frozen. A possible symptom is `user@.service` being frozen while still being able to log in over SSH. --- src/sleep/sleep.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/sleep/sleep.c b/src/sleep/sleep.c index 6785ae2330..e74e334e33 100644 --- a/src/sleep/sleep.c +++ b/src/sleep/sleep.c @@ -374,7 +374,7 @@ static int freeze_thaw_user_slice(const char **method) { } static int execute_s2h(const SleepConfig *sleep_config) { - _unused_ _cleanup_(freeze_thaw_user_slice) const char *auto_method_thaw = NULL; + _unused_ _cleanup_(freeze_thaw_user_slice) const char *auto_method_thaw = "ThawUnit"; int r, k; assert(sleep_config); @@ -382,8 +382,6 @@ static int execute_s2h(const SleepConfig *sleep_config) { r = freeze_thaw_user_slice(&(const char*) { "FreezeUnit" }); if (r < 0) log_debug_errno(r, "Failed to freeze unit user.slice, ignoring: %m"); - else - auto_method_thaw = "ThawUnit"; /* from now on we want automatic thawing */; r = check_wakeup_type(); if (r < 0) From fcb0878f7563df9701a4d066378995c0b7ec32be Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Wed, 7 Dec 2022 16:38:05 +0700 Subject: [PATCH 2/7] core/slice: skip member units without realized cgroup during freeze or thaw This ensures that services with `RemainAfterExit` but without any process running won't cause failure during freeze. --- src/core/slice.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/slice.c b/src/core/slice.c index c453aa033e..4824a300d0 100644 --- a/src/core/slice.c +++ b/src/core/slice.c @@ -381,6 +381,9 @@ static int slice_freezer_action(Unit *s, FreezerAction action) { } UNIT_FOREACH_DEPENDENCY(member, s, UNIT_ATOM_SLICE_OF) { + if (!member->cgroup_realized) + continue; + if (action == FREEZER_FREEZE) r = UNIT_VTABLE(member)->freeze(member); else From a14137d90e5f50ad8627c85ae94731a5c9948227 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Wed, 7 Dec 2022 16:32:05 +0700 Subject: [PATCH 3/7] core/cgroup: thaw slice of unit when thawing unit This ensures starting a new unit under a frozen slice work as expected. --- src/core/cgroup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 1e9cb758de..9abcfe2b56 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -4175,9 +4175,23 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) { if (!cg_freezer_supported()) return 0; + /* Ignore all requests to thaw init.scope or -.slice and reject all requests to freeze them */ + if (unit_has_name(u, SPECIAL_ROOT_SLICE) || unit_has_name(u, SPECIAL_INIT_SCOPE)) + return action == FREEZER_FREEZE ? -EPERM : 0; + if (!u->cgroup_realized) return -EBUSY; + if (action == FREEZER_THAW) { + Unit *slice = UNIT_GET_SLICE(u); + + if (slice) { + r = unit_cgroup_freezer_action(slice, FREEZER_THAW); + if (r < 0) + return log_unit_error_errno(u, r, "Failed to thaw slice %s of unit: %m", slice->id); + } + } + target = action == FREEZER_FREEZE ? FREEZER_FROZEN : FREEZER_RUNNING; r = unit_freezer_state_kernel(u, &kernel); From 3d19e122cfe341b28dfcb58f1aac829c122da569 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Wed, 7 Dec 2022 20:46:01 +0700 Subject: [PATCH 4/7] core/unit: allow overriding an ongoing freeze operation Sometimes a freeze operation can hang due to the presence of kernel threads inside the unit cgroup (e.g. QEMU-KVM). This ensures that the ThawUnit operation invoked by systemd-sleep at wakeup always thaws the unit. --- src/core/dbus-unit.c | 27 +++++++++++++++++------ src/core/dbus-unit.h | 2 +- src/core/unit.c | 7 +++--- src/libsystemd/sd-bus/bus-common-errors.h | 1 + 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 19a71b6cb3..0702373ab0 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -782,14 +782,15 @@ static int bus_unit_method_freezer_generic(sd_bus_message *message, void *userda if (r == 0) reply_no_delay = true; - assert(!u->pending_freezer_message); + if (u->pending_freezer_message) { + bus_unit_send_pending_freezer_message(u, true); + assert(!u->pending_freezer_message); + } - r = sd_bus_message_new_method_return(message, &u->pending_freezer_message); - if (r < 0) - return r; + u->pending_freezer_message = sd_bus_message_ref(message); if (reply_no_delay) { - r = bus_unit_send_pending_freezer_message(u); + r = bus_unit_send_pending_freezer_message(u, false); if (r < 0) return r; } @@ -1661,7 +1662,8 @@ void bus_unit_send_pending_change_signal(Unit *u, bool including_new) { bus_unit_send_change_signal(u); } -int bus_unit_send_pending_freezer_message(Unit *u) { +int bus_unit_send_pending_freezer_message(Unit *u, bool cancelled) { + _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; int r; assert(u); @@ -1669,7 +1671,18 @@ int bus_unit_send_pending_freezer_message(Unit *u) { if (!u->pending_freezer_message) return 0; - r = sd_bus_send(NULL, u->pending_freezer_message, NULL); + if (cancelled) + r = sd_bus_message_new_method_error( + u->pending_freezer_message, + &reply, + &SD_BUS_ERROR_MAKE_CONST( + BUS_ERROR_FREEZE_CANCELLED, "Freeze operation aborted")); + else + r = sd_bus_message_new_method_return(u->pending_freezer_message, &reply); + if (r < 0) + return r; + + r = sd_bus_send(NULL, reply, NULL); if (r < 0) log_warning_errno(r, "Failed to send queued message, ignoring: %m"); diff --git a/src/core/dbus-unit.h b/src/core/dbus-unit.h index 643edcd87e..6b7828e4ba 100644 --- a/src/core/dbus-unit.h +++ b/src/core/dbus-unit.h @@ -10,7 +10,7 @@ extern const sd_bus_vtable bus_unit_cgroup_vtable[]; void bus_unit_send_change_signal(Unit *u); void bus_unit_send_pending_change_signal(Unit *u, bool including_new); -int bus_unit_send_pending_freezer_message(Unit *u); +int bus_unit_send_pending_freezer_message(Unit *u, bool cancelled); void bus_unit_send_removed_signal(Unit *u); int bus_unit_method_start_generic(sd_bus_message *message, Unit *u, JobType job_type, bool reload_if_possible, sd_bus_error *error); diff --git a/src/core/unit.c b/src/core/unit.c index 29b07a6e7a..e0d54d8fe1 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -5829,7 +5829,7 @@ void unit_frozen(Unit *u) { u->freezer_state = FREEZER_FROZEN; - bus_unit_send_pending_freezer_message(u); + bus_unit_send_pending_freezer_message(u, false); } void unit_thawed(Unit *u) { @@ -5837,7 +5837,7 @@ void unit_thawed(Unit *u) { u->freezer_state = FREEZER_RUNNING; - bus_unit_send_pending_freezer_message(u); + bus_unit_send_pending_freezer_message(u, false); } static int unit_freezer_action(Unit *u, FreezerAction action) { @@ -5862,7 +5862,8 @@ static int unit_freezer_action(Unit *u, FreezerAction action) { if (s != UNIT_ACTIVE) return -EHOSTDOWN; - if (IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_THAWING)) + if ((IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_THAWING) && action == FREEZER_FREEZE) || + (u->freezer_state == FREEZER_THAWING && action == FREEZER_THAW)) return -EALREADY; r = method(u); diff --git a/src/libsystemd/sd-bus/bus-common-errors.h b/src/libsystemd/sd-bus/bus-common-errors.h index d4a1fb689e..40d6abc413 100644 --- a/src/libsystemd/sd-bus/bus-common-errors.h +++ b/src/libsystemd/sd-bus/bus-common-errors.h @@ -31,6 +31,7 @@ #define BUS_ERROR_NOTHING_TO_CLEAN "org.freedesktop.systemd1.NothingToClean" #define BUS_ERROR_UNIT_BUSY "org.freedesktop.systemd1.UnitBusy" #define BUS_ERROR_UNIT_INACTIVE "org.freedesktop.systemd1.UnitInactive" +#define BUS_ERROR_FREEZE_CANCELLED "org.freedesktop.systemd1.FreezeCancelled" #define BUS_ERROR_NO_SUCH_MACHINE "org.freedesktop.machine1.NoSuchMachine" #define BUS_ERROR_NO_SUCH_IMAGE "org.freedesktop.machine1.NoSuchImage" From 7fcd26978484b9a328b976799fbca038430f942f Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Wed, 7 Dec 2022 23:09:33 +0700 Subject: [PATCH 5/7] core/cgroup: ignore kernel cgroup.events when thawing The `frozen` state can be `0` while the processes are indeed frozen (see last commit). Therefore do not respect cgroup.events when checking whether thawing is necessary. --- src/core/cgroup.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 9abcfe2b56..ecc3cb32ef 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -4167,7 +4167,7 @@ int compare_job_priority(const void *a, const void *b) { int unit_cgroup_freezer_action(Unit *u, FreezerAction action) { _cleanup_free_ char *path = NULL; FreezerState target, kernel = _FREEZER_STATE_INVALID; - int r; + int r, ret; assert(u); assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW)); @@ -4200,8 +4200,11 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) { if (target == kernel) { u->freezer_state = target; - return 0; - } + if (action == FREEZER_FREEZE) + return 0; + ret = 0; + } else + ret = 1; r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.freeze", &path); if (r < 0) @@ -4209,16 +4212,18 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) { log_unit_debug(u, "%s unit.", action == FREEZER_FREEZE ? "Freezing" : "Thawing"); - if (action == FREEZER_FREEZE) - u->freezer_state = FREEZER_FREEZING; - else - u->freezer_state = FREEZER_THAWING; + if (target != kernel) { + if (action == FREEZER_FREEZE) + u->freezer_state = FREEZER_FREEZING; + else + u->freezer_state = FREEZER_THAWING; + } r = write_string_file(path, one_zero(action == FREEZER_FREEZE), WRITE_STRING_FILE_DISABLE_BUFFER); if (r < 0) return r; - return 1; + return ret; } int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) { From 432a32117506657186e16bd8e0642bbb30326bc4 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Wed, 7 Dec 2022 23:22:05 +0700 Subject: [PATCH 6/7] core/sleep: set timeout for freeze/thaw operation to 1.5 seconds A FreezeUnit operation can hang due to the presence of kernel threads (see last 2 commits). Keeping the default configuration will mean the system will hang for 25 seconds in suspend waiting for the response. 1.5 seconds should be sufficient for most cases. --- src/sleep/sleep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/sleep/sleep.c b/src/sleep/sleep.c index e74e334e33..9b69a2a10d 100644 --- a/src/sleep/sleep.c +++ b/src/sleep/sleep.c @@ -366,6 +366,9 @@ static int freeze_thaw_user_slice(const char **method) { if (r < 0) return log_debug_errno(r, "Failed to open connection to systemd: %m"); + /* Wait for 1.5 seconds at maximum for freeze operation */ + (void) sd_bus_set_method_call_timeout(bus, 1500 * USEC_PER_MSEC); + r = bus_call_method(bus, bus_systemd_mgr, *method, &error, NULL, "s", SPECIAL_USER_SLICE); if (r < 0) return log_debug_errno(r, "Failed to execute operation: %s", bus_error_message(&error, r)); From af1e336589dc5db78fdfa56c6a7efb86a9196a90 Mon Sep 17 00:00:00 2001 From: msizanoen1 Date: Thu, 8 Dec 2022 02:35:32 +0100 Subject: [PATCH 7/7] core: pending_freezer_{message => invocation} Rename the field to reflect the new semantics. --- src/core/dbus-unit.c | 14 +++++++------- src/core/dbus.c | 4 ++-- src/core/unit.c | 2 +- src/core/unit.h | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 0702373ab0..1ef98da6fd 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -782,12 +782,12 @@ static int bus_unit_method_freezer_generic(sd_bus_message *message, void *userda if (r == 0) reply_no_delay = true; - if (u->pending_freezer_message) { + if (u->pending_freezer_invocation) { bus_unit_send_pending_freezer_message(u, true); - assert(!u->pending_freezer_message); + assert(!u->pending_freezer_invocation); } - u->pending_freezer_message = sd_bus_message_ref(message); + u->pending_freezer_invocation = sd_bus_message_ref(message); if (reply_no_delay) { r = bus_unit_send_pending_freezer_message(u, false); @@ -1668,17 +1668,17 @@ int bus_unit_send_pending_freezer_message(Unit *u, bool cancelled) { assert(u); - if (!u->pending_freezer_message) + if (!u->pending_freezer_invocation) return 0; if (cancelled) r = sd_bus_message_new_method_error( - u->pending_freezer_message, + u->pending_freezer_invocation, &reply, &SD_BUS_ERROR_MAKE_CONST( BUS_ERROR_FREEZE_CANCELLED, "Freeze operation aborted")); else - r = sd_bus_message_new_method_return(u->pending_freezer_message, &reply); + r = sd_bus_message_new_method_return(u->pending_freezer_invocation, &reply); if (r < 0) return r; @@ -1686,7 +1686,7 @@ int bus_unit_send_pending_freezer_message(Unit *u, bool cancelled) { if (r < 0) log_warning_errno(r, "Failed to send queued message, ignoring: %m"); - u->pending_freezer_message = sd_bus_message_unref(u->pending_freezer_message); + u->pending_freezer_invocation = sd_bus_message_unref(u->pending_freezer_invocation); return 0; } diff --git a/src/core/dbus.c b/src/core/dbus.c index 141c3ffe12..0bf2391b03 100644 --- a/src/core/dbus.c +++ b/src/core/dbus.c @@ -998,8 +998,8 @@ static void destroy_bus(Manager *m, sd_bus **bus) { u->bus_track = sd_bus_track_unref(u->bus_track); /* Get rid of pending freezer messages on this bus */ - if (u->pending_freezer_message && sd_bus_message_get_bus(u->pending_freezer_message) == *bus) - u->pending_freezer_message = sd_bus_message_unref(u->pending_freezer_message); + if (u->pending_freezer_invocation && sd_bus_message_get_bus(u->pending_freezer_invocation) == *bus) + u->pending_freezer_invocation = sd_bus_message_unref(u->pending_freezer_invocation); } /* Get rid of queued message on this bus */ diff --git a/src/core/unit.c b/src/core/unit.c index e0d54d8fe1..b052402cfb 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -687,7 +687,7 @@ Unit* unit_free(Unit *u) { u->match_bus_slot = sd_bus_slot_unref(u->match_bus_slot); u->bus_track = sd_bus_track_unref(u->bus_track); u->deserialized_refs = strv_free(u->deserialized_refs); - u->pending_freezer_message = sd_bus_message_unref(u->pending_freezer_message); + u->pending_freezer_invocation = sd_bus_message_unref(u->pending_freezer_invocation); unit_free_requires_mounts_for(u); diff --git a/src/core/unit.h b/src/core/unit.h index 3bc7de3d1c..58417ebd0e 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -239,7 +239,7 @@ typedef struct Unit { FILE *transient_file; /* Freezer state */ - sd_bus_message *pending_freezer_message; + sd_bus_message *pending_freezer_invocation; FreezerState freezer_state; /* Job timeout and action to take */