From 8db998981a4fefd0122bcf5f965726b63c9045c2 Mon Sep 17 00:00:00 2001 From: Richard Phibel Date: Tue, 23 May 2023 16:09:40 +0200 Subject: [PATCH 1/3] core: Don't GC unit if it is in cgroup_empty_queue The gc_unit_queue is dispatched before the cgroup_empty_queue. Because of this, when we enter in on_cgroup_empty_event, the unit in cgroup_empty_queue may already have been freed and we don't clean up the corresponding cgroup. With this change, we prevent the unit from being garbage collected if it is in the cgroup_empty_queue. --- src/core/unit.c | 3 ++ test/units/testsuite-19.cleanup-slice.sh | 49 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100755 test/units/testsuite-19.cleanup-slice.sh diff --git a/src/core/unit.c b/src/core/unit.c index 90f87a95f5..84e9185e82 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -441,6 +441,9 @@ bool unit_may_gc(Unit *u) { if (u->perpetual) return false; + if (u->in_cgroup_empty_queue) + return false; + if (sd_bus_track_count(u->bus_track) > 0) return false; diff --git a/test/units/testsuite-19.cleanup-slice.sh b/test/units/testsuite-19.cleanup-slice.sh new file mode 100755 index 0000000000..5d63160334 --- /dev/null +++ b/test/units/testsuite-19.cleanup-slice.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +export SYSTEMD_LOG_LEVEL=debug + +# Create service with KillMode=none inside a slice +cat </run/systemd/system/test19cleanup.service +[Unit] +Description=Test 19 cleanup Service +[Service] +Slice=test19cleanup.slice +Type=exec +ExecStart=sleep infinity +KillMode=none +EOF +cat </run/systemd/system/test19cleanup.slice +[Unit] +Description=Test 19 cleanup Slice +EOF + +# Start service +systemctl start test19cleanup.service +assert_rc 0 systemd-cgls /test19cleanup.slice + +pid=$(systemctl show --property MainPID --value test19cleanup) +ps "$pid" + +# Stop slice +# The sleep process will not be killed because of KillMode=none +# Since there is still a process running under it, the /test19cleanup.slice cgroup won't be removed +systemctl stop test19cleanup.slice + +ps "$pid" + +# Kill sleep process manually +kill -s TERM "$pid" +while kill -0 "$pid" 2>/dev/null; do sleep 0.1; done + +timeout 30 bash -c 'while systemd-cgls /test19cleanup.slice/test19cleanup.service >& /dev/null; do sleep .5; done' +assert_rc 1 systemd-cgls /test19cleanup.slice/test19cleanup.service + +# Check that empty cgroup /test19cleanup.slice has been removed +timeout 30 bash -c 'while systemd-cgls /test19cleanup.slice >& /dev/null; do sleep .5; done' +assert_rc 1 systemd-cgls /test19cleanup.slice From 380dd177798507ced9cde580c04539f54bb980c8 Mon Sep 17 00:00:00 2001 From: Richard Phibel Date: Thu, 25 May 2023 19:49:11 +0200 Subject: [PATCH 2/3] core: Handle cgroup pruning in on_cgroup_empty_event This change removes the pruning of cgroups for FAILED/INACTIVE units from per-unit-type handlers and moves it in on_cgroup_empty_event. --- src/core/cgroup.c | 4 +++- src/core/scope.c | 5 ----- src/core/service.c | 5 ----- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 4ec5dfa587..839b1676c8 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3142,7 +3142,9 @@ static int on_cgroup_empty_event(sd_event_source *s, void *userdata) { unit_add_to_gc_queue(u); - if (UNIT_VTABLE(u)->notify_cgroup_empty) + if (IN_SET(unit_active_state(u), UNIT_INACTIVE, UNIT_FAILED)) + unit_prune_cgroup(u); + else if (UNIT_VTABLE(u)->notify_cgroup_empty) UNIT_VTABLE(u)->notify_cgroup_empty(u); return 0; diff --git a/src/core/scope.c b/src/core/scope.c index 761eb5ea56..72253421e2 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -629,11 +629,6 @@ static void scope_notify_cgroup_empty_event(Unit *u) { if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL)) scope_enter_dead(s, SCOPE_SUCCESS); - - /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean - * up the cgroup earlier and should do it now. */ - if (IN_SET(s->state, SCOPE_DEAD, SCOPE_FAILED)) - unit_prune_cgroup(u); } static void scope_notify_cgroup_oom_event(Unit *u, bool managed_oom) { diff --git a/src/core/service.c b/src/core/service.c index 533d7d3771..38f8745674 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -3648,12 +3648,7 @@ static void service_notify_cgroup_empty_event(Unit *u) { /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean * up the cgroup earlier and should do it now. */ - case SERVICE_DEAD: - case SERVICE_FAILED: - case SERVICE_DEAD_BEFORE_AUTO_RESTART: - case SERVICE_FAILED_BEFORE_AUTO_RESTART: case SERVICE_AUTO_RESTART: - case SERVICE_DEAD_RESOURCES_PINNED: unit_prune_cgroup(u); break; From fc6172b1d844fb2e93cb1180810eba561aead3b8 Mon Sep 17 00:00:00 2001 From: Richard Phibel Date: Tue, 30 May 2023 00:45:09 +0200 Subject: [PATCH 3/3] Fix failing test In test-execute, only the unit was started, not the slice. Because of that the slice cgroup was pruned even if it was still needed. From what I can tell, this is because, in the test, we don't have all the mechanics that starts the slice for a service. To fix the issue the slice is started manually. --- src/test/test-execute.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/test/test-execute.c b/src/test/test-execute.c index ae6227c492..a07c837e3f 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -206,6 +206,17 @@ static bool is_inaccessible_available(void) { return true; } +static void start_parent_slices(Unit *unit) { + Unit *slice; + + slice = UNIT_GET_SLICE(unit); + if (slice) { + start_parent_slices(slice); + int r = unit_start(slice, NULL); + assert_se(r >= 0 || r == -EALREADY); + } +} + static void _test(const char *file, unsigned line, const char *func, Manager *m, const char *unit_name, int status_expected, int code_expected) { Unit *unit; @@ -213,6 +224,9 @@ static void _test(const char *file, unsigned line, const char *func, assert_se(unit_name); assert_se(manager_load_startable_unit_or_warn(m, unit_name, NULL, &unit) >= 0); + /* We need to start the slices as well otherwise the slice cgroups might be pruned + * in on_cgroup_empty_event. */ + start_parent_slices(unit); assert_se(unit_start(unit, NULL) >= 0); check_main_result(file, line, func, m, unit, status_expected, code_expected); }