From 6c71db763cb482c30870359dd3d188a6aa23c4da Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Mon, 6 Nov 2023 13:15:55 +0100 Subject: [PATCH 1/3] cgroup: add support for memory.peak Linux's Control Group v2 interfaces exposes memory.peak, which contains the "max memory usage recorded for the cgroup and its descendants since the creation of the cgroup." This commit adds a new property "MemoryPeak" for units and makes "systemctl show" display this value if it is available. Fixes #29878. Signed-off-by: Florian Schmaus --- NEWS | 3 ++ man/org.freedesktop.systemd1.xml | 66 ++++++++++++++++++++++++++------ src/core/cgroup.c | 54 ++++++++++++++++++++++++++ src/core/cgroup.h | 1 + src/core/dbus-unit.c | 24 ++++++++++++ src/core/unit.c | 1 + src/core/unit.h | 3 ++ src/systemctl/systemctl-show.c | 10 ++++- 8 files changed, 149 insertions(+), 13 deletions(-) diff --git a/NEWS b/NEWS index dd68a04479..38b94ad42b 100644 --- a/NEWS +++ b/NEWS @@ -130,6 +130,9 @@ CHANGES WITH 255 in spe: machinectl bind and mount-image verbs will now cause the new mount to replace the old mount (if any), instead of overmounting it. + * Units now have a MemoryPeak property, which contains the value of + cgroup v2's memory.peak property. + TPM2 Support + Disk Encryption & Authentication: * systemd-cryptenroll now allows specifying a PCR bank and explicit hash diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 199ce4f14c..466d71c55b 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2776,6 +2776,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -3403,6 +3405,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4035,6 +4039,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4831,6 +4837,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -5468,6 +5476,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6082,6 +6092,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6752,6 +6764,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -7317,6 +7331,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -7845,6 +7861,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -8638,6 +8656,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -9189,6 +9209,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -9703,6 +9725,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -10355,6 +10379,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -10532,6 +10558,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -10710,6 +10738,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice { + + @@ -10916,6 +10946,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryCurrent = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly t MemoryPeak = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t MemoryAvailable = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly t CPUUsageNSec = ...; @@ -11113,6 +11145,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11321,6 +11355,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope { + + @@ -11716,8 +11752,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MountImagePolicy, and ExtensionImagePolicy were added in version 254. NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Socket Unit Objects @@ -11743,8 +11780,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ PollLimitIntervalUSec, PollLimitBurst, NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Mount Unit Objects @@ -11768,8 +11806,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MountImagePolicy, and ExtensionImagePolicy were added in version 254. NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Swap Unit Objects @@ -11793,8 +11832,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ MountImagePolicy, and ExtensionImagePolicy were added in version 254. NFTSet, - SetLoginEnvironment and - CoredumpReceive were added in version 255. + SetLoginEnvironment, + CoredumpReceive, and + MemoryPeak were added in version 255. Slice Unit Objects @@ -11809,8 +11849,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ StartupMemoryZSwapMax, MemoryPressureWatch, and MemoryPressureThresholdUSec were added in version 254. - NFTSet and - CoredumpReceive were added in version 255. + NFTSet, + CoredumpReceive, and + MemoryPeak were added in version 255. Scope Unit Objects @@ -11826,8 +11867,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ StartupMemoryZSwapMax, MemoryPressureWatch, and MemoryPressureThresholdUSec were added in version 254. - NFTSet and - CoredumpReceive were added in version 255. + NFTSet, + CoredumpReceive, and + MemoryPeak were added in version 255. Job Objects diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 535f457fa8..53339f52fe 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -4040,6 +4040,60 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { return cg_get_attribute_as_uint64("memory", u->cgroup_path, r > 0 ? "memory.current" : "memory.usage_in_bytes", ret); } +static int unit_get_memory_peak_raw(Unit *u, uint64_t *ret) { + int r; + + assert(u); + assert(ret); + + if (!u->cgroup_path) + return -ENODATA; + + /* The root cgroup doesn't expose this information. */ + if (unit_has_host_root_cgroup(u)) + return -ENODATA; + + if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) + return -ENODATA; + + r = cg_all_unified(); + if (r < 0) + return r; + if (!r) + return -ENODATA; + + return cg_get_attribute_as_uint64("memory", u->cgroup_path, "memory.peak", ret); +} + +int unit_get_memory_peak(Unit *u, uint64_t *ret) { + uint64_t bytes; + int r; + + assert(u); + assert(ret); + + if (!UNIT_CGROUP_BOOL(u, memory_accounting)) + return -ENODATA; + + r = unit_get_memory_peak_raw(u, &bytes); + if (r == -ENODATA && u->memory_peak_last != UINT64_MAX) { + /* If we can't get the memory peak anymore (because the cgroup was already removed, for example), + * use our cached value. */ + + if (ret) + *ret = u->memory_peak_last; + return 0; + } + if (r < 0) + return r; + + u->memory_peak_last = bytes; + if (ret) + *ret = bytes; + + return 0; +} + int unit_get_tasks_current(Unit *u, uint64_t *ret) { assert(u); assert(ret); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index d7cc842835..0b073672a3 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -353,6 +353,7 @@ int unit_watch_all_pids(Unit *u); int unit_synthesize_cgroup_empty_event(Unit *u); int unit_get_memory_current(Unit *u, uint64_t *ret); +int unit_get_memory_peak(Unit *u, uint64_t *ret); int unit_get_memory_available(Unit *u, uint64_t *ret); int unit_get_tasks_current(Unit *u, uint64_t *ret); int unit_get_cpu_usage(Unit *u, nsec_t *ret); diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index 2fff6e135d..b3029ec158 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -1080,6 +1080,29 @@ static int property_get_current_memory( return sd_bus_message_append(reply, "t", sz); } +static int property_get_peak_memory( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + uint64_t sz = UINT64_MAX; + Unit *u = ASSERT_PTR(userdata); + int r; + + assert(bus); + assert(reply); + + r = unit_get_memory_peak(u, &sz); + if (r < 0 && r != -ENODATA) + log_unit_warning_errno(u, r, "Failed to get memory.peak attribute: %m"); + + return sd_bus_message_append(reply, "t", sz); +} + static int property_get_available_memory( sd_bus *bus, const char *path, @@ -1536,6 +1559,7 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = { SD_BUS_PROPERTY("ControlGroup", "s", property_get_cgroup, 0, 0), SD_BUS_PROPERTY("ControlGroupId", "t", NULL, offsetof(Unit, cgroup_id), 0), SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0), + SD_BUS_PROPERTY("MemoryPeak", "t", property_get_peak_memory, 0, 0), SD_BUS_PROPERTY("MemoryAvailable", "t", property_get_available_memory, 0, 0), SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0), SD_BUS_PROPERTY("EffectiveCPUs", "ay", property_get_cpuset_cpus, 0, 0), diff --git a/src/core/unit.c b/src/core/unit.c index c3613ca8fb..02c5cbd102 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -114,6 +114,7 @@ Unit* unit_new(Manager *m, size_t size) { u->ref_uid = UID_INVALID; u->ref_gid = GID_INVALID; u->cpu_usage_last = NSEC_INFINITY; + u->memory_peak_last = UINT64_MAX; u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL; u->failure_action_exit_status = u->success_action_exit_status = -1; diff --git a/src/core/unit.h b/src/core/unit.h index 44d0cd2e41..4070ccc5d4 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -365,6 +365,9 @@ typedef struct Unit { nsec_t cpu_usage_base; nsec_t cpu_usage_last; /* the most recently read value */ + /* Most recently read value of memory.peak */ + uint64_t memory_peak_last; + /* The current counter of OOM kills initiated by systemd-oomd */ uint64_t managed_oom_kill_last; diff --git a/src/systemctl/systemctl-show.c b/src/systemctl/systemctl-show.c index 0cb75d5dfc..1d061abc6b 100644 --- a/src/systemctl/systemctl-show.c +++ b/src/systemctl/systemctl-show.c @@ -250,6 +250,7 @@ typedef struct UnitStatusInfo { /* CGroup */ uint64_t memory_current; + uint64_t memory_peak; uint64_t memory_min; uint64_t memory_low; uint64_t startup_memory_low; @@ -702,7 +703,8 @@ static void print_status_info( if (i->memory_current != UINT64_MAX) { printf(" Memory: %s", FORMAT_BYTES(i->memory_current)); - if (i->memory_min > 0 || + if (i->memory_peak != CGROUP_LIMIT_MAX || + i->memory_min > 0 || i->memory_low > 0 || i->startup_memory_low > 0 || i->memory_high != CGROUP_LIMIT_MAX || i->startup_memory_high != CGROUP_LIMIT_MAX || i->memory_max != CGROUP_LIMIT_MAX || i->startup_memory_max != CGROUP_LIMIT_MAX || @@ -765,6 +767,10 @@ static void print_status_info( printf("%savailable: %s", prefix, FORMAT_BYTES(i->memory_available)); prefix = " "; } + if (i->memory_peak != CGROUP_LIMIT_MAX) { + printf("%speak: %s", prefix, FORMAT_BYTES(i->memory_peak)); + prefix = " "; + } printf(")"); } printf("\n"); @@ -2031,6 +2037,7 @@ static int show_one( { "Where", "s", NULL, offsetof(UnitStatusInfo, where) }, { "What", "s", NULL, offsetof(UnitStatusInfo, what) }, { "MemoryCurrent", "t", NULL, offsetof(UnitStatusInfo, memory_current) }, + { "MemoryPeak", "t", NULL, offsetof(UnitStatusInfo, memory_peak) }, { "MemoryAvailable", "t", NULL, offsetof(UnitStatusInfo, memory_available) }, { "DefaultMemoryMin", "t", NULL, offsetof(UnitStatusInfo, default_memory_min) }, { "DefaultMemoryLow", "t", NULL, offsetof(UnitStatusInfo, default_memory_low) }, @@ -2087,6 +2094,7 @@ static int show_one( .memory_zswap_max = CGROUP_LIMIT_MAX, .startup_memory_zswap_max = CGROUP_LIMIT_MAX, .memory_limit = CGROUP_LIMIT_MAX, + .memory_peak = CGROUP_LIMIT_MAX, .memory_available = CGROUP_LIMIT_MAX, .cpu_usage_nsec = UINT64_MAX, .tasks_current = UINT64_MAX, From 0531bded79dcdde93f2b076f86c40d02b81a18b9 Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Mon, 6 Nov 2023 17:24:14 +0100 Subject: [PATCH 2/3] core: include peak memory in unit_log_resources() Signed-off-by: Florian Schmaus --- src/core/unit.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/core/unit.c b/src/core/unit.c index 02c5cbd102..d7e7da40c2 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2326,6 +2326,7 @@ static int unit_log_resources(Unit *u) { size_t n_message_parts = 0, n_iovec = 0; char* message_parts[1 + 2 + 2 + 1], *t; nsec_t nsec = NSEC_INFINITY; + uint64_t memory_peak = UINT64_MAX; int r; const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = { [CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES", @@ -2369,6 +2370,24 @@ static int unit_log_resources(Unit *u) { nsec > NOTICEWORTHY_CPU_NSEC); } + (void) unit_get_memory_peak(u, &memory_peak); + if (memory_peak != UINT64_MAX) { + /* Format peak memory for inclusion in the structured log message */ + if (asprintf(&t, "MEMORY_PEAK=%" PRIu64, memory_peak) < 0) { + r = log_oom(); + goto finish; + } + iovec[n_iovec++] = IOVEC_MAKE_STRING(t); + + /* Format peak memory for inclusion in the human language message string */ + t = strjoin(FORMAT_BYTES(memory_peak), " memory peak"); + if (!t) { + r = log_oom(); + goto finish; + } + message_parts[n_message_parts++] = t; + } + for (CGroupIOAccountingMetric k = 0; k < _CGROUP_IO_ACCOUNTING_METRIC_MAX; k++) { uint64_t value = UINT64_MAX; From 324ec6b5d2033fae4dc3e087473d27010d948f65 Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Mon, 6 Nov 2023 17:28:41 +0100 Subject: [PATCH 3/3] run: include peak memory in output Fixes #28542. Signed-off-by: Florian Schmaus --- NEWS | 2 +- src/run/run.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 38b94ad42b..a83efb5f9c 100644 --- a/NEWS +++ b/NEWS @@ -436,7 +436,7 @@ CHANGES WITH 255 in spe: * systemd-sysupdate now accepts directories in the MatchPattern= option. * systemd-run will now output the invocation ID of the launched - transient unit. + transient unit and its peak memory usage. * systemd-analyze, systemd-tmpfiles, systemd-sysusers, systemd-sysctl, and systemd-binfmt gained a new --tldr option that can be used instead diff --git a/src/run/run.c b/src/run/run.c index edafd49567..1b20a8f459 100644 --- a/src/run/run.c +++ b/src/run/run.c @@ -1049,6 +1049,7 @@ typedef struct RunContext { uint64_t inactive_enter_usec; char *result; uint64_t cpu_usage_nsec; + uint64_t memory_peak; uint64_t ip_ingress_bytes; uint64_t ip_egress_bytes; uint64_t io_read_bytes; @@ -1110,6 +1111,7 @@ static int run_context_update(RunContext *c, const char *path) { { "ExecMainCode", "i", NULL, offsetof(RunContext, exit_code) }, { "ExecMainStatus", "i", NULL, offsetof(RunContext, exit_status) }, { "CPUUsageNSec", "t", NULL, offsetof(RunContext, cpu_usage_nsec) }, + { "MemoryPeak", "t", NULL, offsetof(RunContext, memory_peak) }, { "IPIngressBytes", "t", NULL, offsetof(RunContext, ip_ingress_bytes) }, { "IPEgressBytes", "t", NULL, offsetof(RunContext, ip_egress_bytes) }, { "IOReadBytes", "t", NULL, offsetof(RunContext, io_read_bytes) }, @@ -1391,6 +1393,7 @@ static int start_transient_service(sd_bus *bus) { if (arg_wait || arg_stdio != ARG_STDIO_NONE) { _cleanup_(run_context_free) RunContext c = { .cpu_usage_nsec = NSEC_INFINITY, + .memory_peak = UINT64_MAX, .ip_ingress_bytes = UINT64_MAX, .ip_egress_bytes = UINT64_MAX, .io_read_bytes = UINT64_MAX, @@ -1486,6 +1489,9 @@ static int start_transient_service(sd_bus *bus) { log_info("CPU time consumed: %s", FORMAT_TIMESPAN(DIV_ROUND_UP(c.cpu_usage_nsec, NSEC_PER_USEC), USEC_PER_MSEC)); + if (c.memory_peak != UINT64_MAX) + log_info("Memory peak: %s", FORMAT_BYTES(c.memory_peak)); + if (c.ip_ingress_bytes != UINT64_MAX) log_info("IP traffic received: %s", FORMAT_BYTES(c.ip_ingress_bytes));