diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md
index e219131ce6..ebb8ba536a 100644
--- a/docs/TRANSIENT-SETTINGS.md
+++ b/docs/TRANSIENT-SETTINGS.md
@@ -281,6 +281,7 @@ All cgroup/resource control settings are available for transient units
✓ ManagedOOMSwap=
✓ ManagedOOMMemoryPressure=
✓ ManagedOOMMemoryPressureLimit=
+✓ ManagedOOMMemoryPressureDurationSec=
✓ ManagedOOMPreference=
✓ CoredumpReceive=
```
diff --git a/man/oomd.conf.xml b/man/oomd.conf.xml
index 582fb27de1..13f1f22e53 100644
--- a/man/oomd.conf.xml
+++ b/man/oomd.conf.xml
@@ -90,7 +90,8 @@
DefaultMemoryPressureDurationSec=
Sets the amount of time a unit's control group needs to have exceeded memory pressure
- limits before systemd-oomd will take action. Memory pressure limits are defined by
+ limits before systemd-oomd will take action. A unit can override this value with
+ ManagedOOMMemoryPressureDurationSec=. Memory pressure limits are defined by
DefaultMemoryPressureLimit= and ManagedOOMMemoryPressureLimit=.
Must be set to 0, or at least 1 second. Defaults to 30 seconds when unset or 0.
diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml
index 1e34ddbc85..25905de8c8 100644
--- a/man/org.freedesktop.systemd1.xml
+++ b/man/org.freedesktop.systemd1.xml
@@ -2993,6 +2993,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t ManagedOOMMemoryPressureDurationUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@@ -4312,6 +4314,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
+
+
@@ -4849,6 +4853,11 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
method. See sd_listen_fds3
for more details on how to retrieve these file descriptors. Unlike the ExtraFileDescriptors
input property, ExtraFileDescriptorNames only contains names and not the file descriptors.
+
+ ManagedOOMMemoryPressureDurationUSec implement the destination parameter of the
+ unit file setting ManagedOOMMemoryPressureDurationSec= listed in
+ systemd.resource-control5.
+ Note the time unit is expressed in μs.
@@ -5148,6 +5157,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t ManagedOOMMemoryPressureDurationUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@@ -6451,6 +6462,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
+
+
@@ -7145,6 +7158,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t ManagedOOMMemoryPressureDurationUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@@ -8286,6 +8301,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
+
+
@@ -9109,6 +9126,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t ManagedOOMMemoryPressureDurationUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@@ -10222,6 +10241,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
+
+
@@ -10898,6 +10919,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t ManagedOOMMemoryPressureDurationUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@@ -11285,6 +11308,8 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
+
+
@@ -11309,6 +11334,11 @@ node /org/freedesktop/systemd1/unit/system_2eslice {
Properties
Most properties correspond directly with the matching settings in slice unit files.
+
+ ManagedOOMMemoryPressureDurationUSec implement the destination parameter of the
+ unit file setting ManagedOOMMemoryPressureDurationSec= listed in
+ systemd.resource-control5.
+ Note the time unit is expressed in μs.
@@ -11507,6 +11537,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly u ManagedOOMMemoryPressureLimit = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
+ readonly t ManagedOOMMemoryPressureDurationUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly s ManagedOOMPreference = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("false")
readonly a(ss) BPFProgram = [...];
@@ -11944,6 +11976,8 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
+
+
@@ -12004,6 +12038,11 @@ node /org/freedesktop/systemd1/unit/session_2d1_2escope {
the scope unit is to be shut down via a RequestStop() signal (see below). This is
set when the scope is created. If not set, the scope's processes will terminated with
SIGTERM directly.
+
+ ManagedOOMMemoryPressureDurationUSec implement the destination parameter of the
+ unit file setting ManagedOOMMemoryPressureDurationSec= listed in
+ systemd.resource-control5.
+ Note the time unit is expressed in μs.
@@ -12222,6 +12261,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
PrivateTmpEx,
ImportCredentialEx,
ExtraFileDescriptorNames,
+ ManagedOOMMemoryPressureDurationUSec,
BindLogSockets, and
PrivateUsersEx were added in version 257.
@@ -12362,6 +12402,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
EffectiveMemoryMax,
EffectiveTasksMax, and
MemoryZSwapWriteback were added in version 256.
+ ManagedOOMMemoryPressureDurationUSec was added in version 257.
Scope Unit Objects
@@ -12387,6 +12428,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
EffectiveMemoryMax,
EffectiveTasksMax, and
MemoryZSwapWriteback were added in version 256.
+ ManagedOOMMemoryPressureDurationUSec was added in version 257.
Job Objects
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 2ffc279a35..1f16052a33 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -1535,16 +1535,35 @@ DeviceAllow=/dev/loop-control
Overrides the default memory pressure limit set by
oomd.conf5 for
- this unit (cgroup). Takes a percentage value between 0% and 100%, inclusive. This property is
- ignored unless ManagedOOMMemoryPressure=. Defaults to 0%,
+ the cgroup of this unit. Takes a percentage value between 0% and 100%, inclusive. Defaults to 0%,
which means to use the default set by
oomd.conf5.
+ This property is ignored unless ManagedOOMMemoryPressure=.
+
+ ManagedOOMMemoryPressureDurationSec=
+
+
+ Overrides the default memory pressure duration set by
+ oomd.conf5 for
+ the cgroup of this unit. The specified value supports a time unit such as ms or
+ μs, see
+ systemd.time7
+ for details on the permitted syntax. Must be set to either empty or a value of at least 1s. Defaults
+ to empty, which means to use the default set by
+ oomd.conf5.
+ This property is ignored unless ManagedOOMMemoryPressure=.
+
+
+
+
+
+
ManagedOOMPreference=none|avoid|omit
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index fb89a22d2e..47a771d51e 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -194,6 +194,9 @@ void cgroup_context_init(CGroupContext *c) {
.moom_swap = MANAGED_OOM_AUTO,
.moom_mem_pressure = MANAGED_OOM_AUTO,
.moom_preference = MANAGED_OOM_PREFERENCE_NONE,
+ /* The default duration value in oomd.conf will be used when
+ * moom_mem_pressure_duration_usec is set to infinity. */
+ .moom_mem_pressure_duration_usec = USEC_INFINITY,
.memory_pressure_watch = _CGROUP_PRESSURE_WATCH_INVALID,
.memory_pressure_threshold_usec = USEC_INFINITY,
@@ -947,6 +950,10 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
fprintf(f, "%sMemoryPressureThresholdSec: %s\n",
prefix, FORMAT_TIMESPAN(c->memory_pressure_threshold_usec, 1));
+ if (c->moom_mem_pressure_duration_usec != USEC_INFINITY)
+ fprintf(f, "%sManagedOOMMemoryPressureDurationSec: %s\n",
+ prefix, FORMAT_TIMESPAN(c->moom_mem_pressure_duration_usec, 1));
+
LIST_FOREACH(device_allow, a, c->device_allow)
/* strna() below should be redundant, for avoiding -Werror=format-overflow= error. See #30223. */
fprintf(f,
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 7525da728e..550c1ea88f 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -236,6 +236,7 @@ struct CGroupContext {
ManagedOOMMode moom_swap;
ManagedOOMMode moom_mem_pressure;
uint32_t moom_mem_pressure_limit; /* Normalized to 2^32-1 == 100% */
+ usec_t moom_mem_pressure_duration_usec;
ManagedOOMPreference moom_preference;
/* Memory pressure logic */
diff --git a/src/core/core-varlink.c b/src/core/core-varlink.c
index 0ecc8e23f1..352fd28b0d 100644
--- a/src/core/core-varlink.c
+++ b/src/core/core-varlink.c
@@ -57,7 +57,7 @@ static bool user_match_lookup_parameters(LookupParameters *p, const char *name,
}
static int build_managed_oom_json_array_element(Unit *u, const char *property, sd_json_variant **ret_v) {
- bool use_limit = false;
+ bool use_limit = false, use_duration = false;
CGroupContext *c;
const char *mode;
@@ -84,7 +84,8 @@ static int build_managed_oom_json_array_element(Unit *u, const char *property, s
mode = managed_oom_mode_to_string(c->moom_swap);
else if (streq(property, "ManagedOOMMemoryPressure")) {
mode = managed_oom_mode_to_string(c->moom_mem_pressure);
- use_limit = true;
+ use_limit = c->moom_mem_pressure_limit > 0;
+ use_duration = c->moom_mem_pressure_duration_usec != USEC_INFINITY;
} else
return -EINVAL;
@@ -92,7 +93,8 @@ static int build_managed_oom_json_array_element(Unit *u, const char *property, s
SD_JSON_BUILD_PAIR("mode", SD_JSON_BUILD_STRING(mode)),
SD_JSON_BUILD_PAIR("path", SD_JSON_BUILD_STRING(crt->cgroup_path)),
SD_JSON_BUILD_PAIR("property", SD_JSON_BUILD_STRING(property)),
- SD_JSON_BUILD_PAIR_CONDITION(use_limit, "limit", SD_JSON_BUILD_UNSIGNED(c->moom_mem_pressure_limit)));
+ SD_JSON_BUILD_PAIR_CONDITION(use_limit, "limit", SD_JSON_BUILD_UNSIGNED(c->moom_mem_pressure_limit)),
+ SD_JSON_BUILD_PAIR_CONDITION(use_duration, "duration", SD_JSON_BUILD_UNSIGNED(c->moom_mem_pressure_duration_usec)));
}
static int build_managed_oom_cgroups_json(Manager *m, sd_json_variant **ret) {
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 459fa6f774..445132a659 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -502,6 +502,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("ManagedOOMSwap", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_swap), 0),
SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0),
SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimit", "u", NULL, offsetof(CGroupContext, moom_mem_pressure_limit), 0),
+ SD_BUS_PROPERTY("ManagedOOMMemoryPressureDurationUSec", "t", bus_property_get_usec, offsetof(CGroupContext, moom_mem_pressure_duration_usec), 0),
SD_BUS_PROPERTY("ManagedOOMPreference", "s", property_get_managed_oom_preference, offsetof(CGroupContext, moom_preference), 0),
SD_BUS_PROPERTY("BPFProgram", "a(ss)", property_get_bpf_foreign_program, 0, 0),
SD_BUS_PROPERTY("SocketBindAllow", "a(iiqq)", property_get_socket_bind, offsetof(CGroupContext, socket_bind_allow), 0),
@@ -2053,6 +2054,36 @@ int bus_cgroup_set_property(
return 1;
}
+ if (streq(name, "ManagedOOMMemoryPressureDurationUSec")) {
+ uint64_t t;
+
+ if (!UNIT_VTABLE(u)->can_set_managed_oom)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Cannot set %s for this unit type", name);
+
+ r = sd_bus_message_read(message, "t", &t);
+ if (r < 0)
+ return r;
+
+ if (t < 1 * USEC_PER_SEC)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= must be at least 1s, got %s", name,
+ FORMAT_TIMESPAN(t, USEC_PER_SEC));
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->memory_pressure_threshold_usec = t;
+ if (c->memory_pressure_threshold_usec == USEC_INFINITY)
+ unit_write_setting(u, flags, name, "ManagedOOMMemoryPressureDurationSec=");
+ else
+ unit_write_settingf(u, flags, name,
+ "ManagedOOMMemoryPressureDurationSec=%s",
+ FORMAT_TIMESPAN(c->memory_pressure_threshold_usec, 1));
+ }
+
+ if (c->moom_mem_pressure == MANAGED_OOM_KILL)
+ (void) manager_varlink_send_managed_oom_update(u);
+
+ return 1;
+ }
+
if (streq(name, "ManagedOOMPreference")) {
ManagedOOMPreference p;
const char *pref;
diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c
index 13e7078b1a..1b44c49238 100644
--- a/src/core/execute-serialize.c
+++ b/src/core/execute-serialize.c
@@ -328,6 +328,10 @@ static int exec_cgroup_context_serialize(const CGroupContext *c, FILE *f) {
if (r < 0)
return r;
+ r = serialize_usec(f, "exec-cgroup-context-managed-oom-memory-pressure-duration-usec", c->moom_mem_pressure_duration_usec);
+ if (r < 0)
+ return r;
+
r = serialize_item(f, "exec-cgroup-context-managed-oom-preference", managed_oom_preference_to_string(c->moom_preference));
if (r < 0)
return r;
@@ -781,6 +785,10 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
c->moom_preference = managed_oom_preference_from_string(val);
if (c->moom_preference < 0)
return -EINVAL;
+ } else if ((val = startswith(l, "exec-cgroup-context-managed-oom-memory-pressure-duration-usec="))) {
+ r = deserialize_usec(val, &c->moom_mem_pressure_duration_usec);
+ if (r < 0)
+ return r;
} else if ((val = startswith(l, "exec-cgroup-context-memory-pressure-watch="))) {
c->memory_pressure_watch = cgroup_pressure_watch_from_string(val);
if (c->memory_pressure_watch < 0)
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in
index e94b518a9d..df49633cee 100644
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -253,6 +253,7 @@
{{type}}.ManagedOOMSwap, config_parse_managed_oom_mode, 0, offsetof({{type}}, cgroup_context.moom_swap)
{{type}}.ManagedOOMMemoryPressure, config_parse_managed_oom_mode, 0, offsetof({{type}}, cgroup_context.moom_mem_pressure)
{{type}}.ManagedOOMMemoryPressureLimit, config_parse_managed_oom_mem_pressure_limit, 0, offsetof({{type}}, cgroup_context.moom_mem_pressure_limit)
+{{type}}.ManagedOOMMemoryPressureDurationSec, config_parse_managed_oom_mem_pressure_duration_sec, 0, offsetof({{type}}, cgroup_context.moom_mem_pressure_duration_usec)
{{type}}.ManagedOOMPreference, config_parse_managed_oom_preference, 0, offsetof({{type}}, cgroup_context.moom_preference)
{{type}}.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0
{{type}}.BPFProgram, config_parse_bpf_foreign_program, 0, offsetof({{type}}, cgroup_context)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index ba6aad2f2b..4b702038e6 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -4121,6 +4121,44 @@ int config_parse_managed_oom_mem_pressure_limit(
return 0;
}
+int config_parse_managed_oom_mem_pressure_duration_sec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ usec_t usec, *duration = ASSERT_PTR(data);
+ UnitType t;
+ int r;
+
+ t = unit_name_to_type(unit);
+ assert(t != _UNIT_TYPE_INVALID);
+
+ if (!unit_vtable[t]->can_set_managed_oom)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0, "%s= is not supported for this unit type, ignoring.", lvalue);
+
+ if (isempty(rvalue)) {
+ *duration = USEC_INFINITY;
+ return 0;
+ }
+
+ r = parse_sec(rvalue, &usec);
+ if (r < 0)
+ return log_syntax_parse_error(unit, filename, line, r, lvalue, rvalue);
+
+ if (usec < 1 * USEC_PER_SEC || usec == USEC_INFINITY)
+ return log_syntax(unit, LOG_WARNING, filename, line, 0, "%s= must be at least 1s and less than infinity, ignoring: %s", lvalue, rvalue);
+
+ *duration = usec;
+ return 0;
+}
+
int config_parse_device_allow(
const char *unit,
const char *filename,
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index c7301cec52..e8b2eaee52 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -88,6 +88,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_delegate);
CONFIG_PARSER_PROTOTYPE(config_parse_delegate_subgroup);
CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mode);
CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_duration_sec);
CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_preference);
CONFIG_PARSER_PROTOTYPE(config_parse_device_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_device_allow);
diff --git a/src/oom/oomd-manager.c b/src/oom/oomd-manager.c
index 6d1b4f024b..7437a6e889 100644
--- a/src/oom/oomd-manager.c
+++ b/src/oom/oomd-manager.c
@@ -24,6 +24,7 @@ typedef struct ManagedOOMMessage {
char *path;
char *property;
uint32_t limit;
+ usec_t duration;
} ManagedOOMMessage;
static void managed_oom_message_destroy(ManagedOOMMessage *message) {
@@ -43,6 +44,7 @@ static int process_managed_oom_message(Manager *m, uid_t uid, sd_json_variant *p
{ "path", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(ManagedOOMMessage, path), SD_JSON_MANDATORY },
{ "property", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(ManagedOOMMessage, property), SD_JSON_MANDATORY },
{ "limit", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint32, offsetof(ManagedOOMMessage, limit), 0 },
+ { "duration", _SD_JSON_VARIANT_TYPE_INVALID, sd_json_dispatch_uint64, offsetof(ManagedOOMMessage, duration), 0 },
{},
};
@@ -55,10 +57,13 @@ static int process_managed_oom_message(Manager *m, uid_t uid, sd_json_variant *p
/* Skip malformed elements and keep processing in case the others are good */
JSON_VARIANT_ARRAY_FOREACH(c, cgroups) {
- _cleanup_(managed_oom_message_destroy) ManagedOOMMessage message = {};
+ _cleanup_(managed_oom_message_destroy) ManagedOOMMessage message = {
+ .duration = USEC_INFINITY,
+ };
OomdCGroupContext *ctx;
Hashmap *monitor_hm;
loadavg_t limit;
+ usec_t duration;
if (!sd_json_variant_is_object(c))
continue;
@@ -104,6 +109,11 @@ static int process_managed_oom_message(Manager *m, uid_t uid, sd_json_variant *p
continue;
}
+ if (streq(message.property, "ManagedOOMMemoryPressure") && message.duration != USEC_INFINITY)
+ duration = message.duration;
+ else
+ duration = m->default_mem_pressure_duration_usec;
+
r = oomd_insert_cgroup_context(NULL, monitor_hm, message.path);
if (r == -ENOMEM)
return r;
@@ -113,8 +123,10 @@ static int process_managed_oom_message(Manager *m, uid_t uid, sd_json_variant *p
/* Always update the limit in case it was changed. For non-memory pressure detection the value is
* ignored so always updating it here is not a problem. */
ctx = hashmap_get(monitor_hm, empty_to_root(message.path));
- if (ctx)
+ if (ctx) {
ctx->mem_pressure_limit = limit;
+ ctx->mem_pressure_duration_usec = duration;
+ }
}
/* Toggle wake-ups for "ManagedOOMSwap" if entries are present. */
@@ -472,7 +484,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
m->mem_pressure_post_action_delay_start = 0;
}
- r = oomd_pressure_above(m->monitored_mem_pressure_cgroup_contexts, m->default_mem_pressure_duration_usec, &targets);
+ r = oomd_pressure_above(m->monitored_mem_pressure_cgroup_contexts, &targets);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
@@ -494,7 +506,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
t->path,
LOADAVG_INT_SIDE(t->memory_pressure.avg10), LOADAVG_DECIMAL_SIDE(t->memory_pressure.avg10),
LOADAVG_INT_SIDE(t->mem_pressure_limit), LOADAVG_DECIMAL_SIDE(t->mem_pressure_limit),
- FORMAT_TIMESPAN(m->default_mem_pressure_duration_usec, USEC_PER_SEC));
+ FORMAT_TIMESPAN(t->mem_pressure_duration_usec, USEC_PER_SEC));
r = update_monitored_cgroup_contexts_candidates(
m->monitored_mem_pressure_cgroup_contexts, &m->monitored_mem_pressure_cgroup_contexts_candidates);
@@ -526,7 +538,7 @@ static int monitor_memory_pressure_contexts_handler(sd_event_source *s, uint64_t
selected, t->path,
LOADAVG_INT_SIDE(t->memory_pressure.avg10), LOADAVG_DECIMAL_SIDE(t->memory_pressure.avg10),
LOADAVG_INT_SIDE(t->mem_pressure_limit), LOADAVG_DECIMAL_SIDE(t->mem_pressure_limit),
- FORMAT_TIMESPAN(m->default_mem_pressure_duration_usec, USEC_PER_SEC));
+ FORMAT_TIMESPAN(t->mem_pressure_duration_usec, USEC_PER_SEC));
/* send dbus signal */
(void) sd_bus_emit_signal(m->bus,
diff --git a/src/oom/oomd-util.c b/src/oom/oomd-util.c
index 6307c2783e..b996787039 100644
--- a/src/oom/oomd-util.c
+++ b/src/oom/oomd-util.c
@@ -69,7 +69,7 @@ OomdCGroupContext *oomd_cgroup_context_free(OomdCGroupContext *ctx) {
return mfree(ctx);
}
-int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret) {
+int oomd_pressure_above(Hashmap *h, Set **ret) {
_cleanup_set_free_ Set *targets = NULL;
OomdCGroupContext *ctx;
char *key;
@@ -90,7 +90,7 @@ int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret) {
ctx->mem_pressure_limit_hit_start = now(CLOCK_MONOTONIC);
diff = now(CLOCK_MONOTONIC) - ctx->mem_pressure_limit_hit_start;
- if (diff >= duration) {
+ if (diff >= ctx->mem_pressure_duration_usec) {
r = set_put(targets, ctx);
if (r < 0)
return -ENOMEM;
@@ -564,6 +564,7 @@ int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path)
curr_ctx->last_pgscan = old_ctx->pgscan;
curr_ctx->mem_pressure_limit = old_ctx->mem_pressure_limit;
curr_ctx->mem_pressure_limit_hit_start = old_ctx->mem_pressure_limit_hit_start;
+ curr_ctx->mem_pressure_duration_usec = old_ctx->mem_pressure_duration_usec;
curr_ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim;
}
@@ -594,6 +595,7 @@ void oomd_update_cgroup_contexts_between_hashmaps(Hashmap *old_h, Hashmap *curr_
ctx->last_pgscan = old_ctx->pgscan;
ctx->mem_pressure_limit = old_ctx->mem_pressure_limit;
ctx->mem_pressure_limit_hit_start = old_ctx->mem_pressure_limit_hit_start;
+ ctx->mem_pressure_duration_usec = old_ctx->mem_pressure_duration_usec;
ctx->last_had_mem_reclaim = old_ctx->last_had_mem_reclaim;
if (oomd_pgscan_rate(ctx) > 0)
@@ -626,10 +628,12 @@ void oomd_dump_memory_pressure_cgroup_context(const OomdCGroupContext *ctx, FILE
fprintf(f,
"%sPath: %s\n"
"%s\tMemory Pressure Limit: %lu.%02lu%%\n"
+ "%s\tMemory Pressure Duration: %s\n"
"%s\tPressure: Avg10: %lu.%02lu, Avg60: %lu.%02lu, Avg300: %lu.%02lu, Total: %s\n"
"%s\tCurrent Memory Usage: %s\n",
strempty(prefix), ctx->path,
strempty(prefix), LOADAVG_INT_SIDE(ctx->mem_pressure_limit), LOADAVG_DECIMAL_SIDE(ctx->mem_pressure_limit),
+ strempty(prefix), FORMAT_TIMESPAN(ctx->mem_pressure_duration_usec, USEC_PER_SEC),
strempty(prefix),
LOADAVG_INT_SIDE(ctx->memory_pressure.avg10), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg10),
LOADAVG_INT_SIDE(ctx->memory_pressure.avg60), LOADAVG_DECIMAL_SIDE(ctx->memory_pressure.avg60),
diff --git a/src/oom/oomd-util.h b/src/oom/oomd-util.h
index 95a236f48f..14fe5c5eba 100644
--- a/src/oom/oomd-util.h
+++ b/src/oom/oomd-util.h
@@ -37,6 +37,7 @@ struct OomdCGroupContext {
loadavg_t mem_pressure_limit;
usec_t mem_pressure_limit_hit_start;
usec_t last_had_mem_reclaim;
+ usec_t mem_pressure_duration_usec;
};
struct OomdSystemContext {
@@ -53,12 +54,12 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(OomdCGroupContext*, oomd_cgroup_context_free);
* key: cgroup paths -> value: OomdCGroupContext. */
/* Scans all the OomdCGroupContexts in `h` and returns 1 and a set of pointers to those OomdCGroupContexts in `ret`
- * if any of them have exceeded their supplied memory pressure limits for the `duration` length of time.
+ * if any of them have exceeded their supplied memory pressure limits for the `ctx->mem_pressure_duration_usec` length of time.
* `mem_pressure_limit_hit_start` is updated accordingly for the first time the limit is exceeded, and when it returns
* below the limit.
- * Returns 0 and sets `ret` to an empty set if no entries exceeded limits for `duration`.
+ * Returns 0 and sets `ret` to an empty set if no entries exceeded limits for `ctx->mem_pressure_duration_usec`.
* Returns -ENOMEM for allocation errors. */
-int oomd_pressure_above(Hashmap *h, usec_t duration, Set **ret);
+int oomd_pressure_above(Hashmap *h, Set **ret);
/* Returns true if the amount of memory available (see proc(5)) is below the permyriad of memory specified by `threshold_permyriad`. */
bool oomd_mem_available_below(const OomdSystemContext *ctx, int threshold_permyriad);
diff --git a/src/oom/test-oomd-util.c b/src/oom/test-oomd-util.c
index 1aef6039e1..53109c160d 100644
--- a/src/oom/test-oomd-util.c
+++ b/src/oom/test-oomd-util.c
@@ -138,6 +138,7 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) {
c1->pgscan = UINT64_MAX;
c1->mem_pressure_limit = 6789;
c1->mem_pressure_limit_hit_start = 42;
+ c1->mem_pressure_duration_usec = 1234;
c1->last_had_mem_reclaim = 888;
assert_se(h2 = hashmap_new(&oomd_cgroup_ctx_hash_ops));
assert_se(oomd_insert_cgroup_context(h1, h2, cgroup) == 0);
@@ -149,6 +150,7 @@ static void test_oomd_cgroup_context_acquire_and_insert(void) {
assert_se(c2->last_pgscan == UINT64_MAX);
assert_se(c2->mem_pressure_limit == 6789);
assert_se(c2->mem_pressure_limit_hit_start == 42);
+ assert_se(c2->mem_pressure_duration_usec == 1234);
assert_se(c2->last_had_mem_reclaim == 888); /* assumes the live pgscan is less than UINT64_MAX */
}
@@ -162,11 +164,13 @@ static void test_oomd_update_cgroup_contexts_between_hashmaps(void) {
{ .path = paths[0],
.mem_pressure_limit = 5,
.mem_pressure_limit_hit_start = 777,
+ .mem_pressure_duration_usec = 111,
.last_had_mem_reclaim = 888,
.pgscan = 57 },
{ .path = paths[1],
.mem_pressure_limit = 6,
.mem_pressure_limit_hit_start = 888,
+ .mem_pressure_duration_usec = 222,
.last_had_mem_reclaim = 888,
.pgscan = 42 },
};
@@ -193,6 +197,7 @@ static void test_oomd_update_cgroup_contexts_between_hashmaps(void) {
assert_se(c_old->pgscan == c_new->last_pgscan);
assert_se(c_old->mem_pressure_limit == c_new->mem_pressure_limit);
assert_se(c_old->mem_pressure_limit_hit_start == c_new->mem_pressure_limit_hit_start);
+ assert_se(c_old->mem_pressure_duration_usec == c_new->mem_pressure_duration_usec);
assert_se(c_old->last_had_mem_reclaim == c_new->last_had_mem_reclaim);
assert_se(c_old = hashmap_get(h_old, "/1.slice"));
@@ -200,6 +205,7 @@ static void test_oomd_update_cgroup_contexts_between_hashmaps(void) {
assert_se(c_old->pgscan == c_new->last_pgscan);
assert_se(c_old->mem_pressure_limit == c_new->mem_pressure_limit);
assert_se(c_old->mem_pressure_limit_hit_start == c_new->mem_pressure_limit_hit_start);
+ assert_se(c_old->mem_pressure_duration_usec == c_new->mem_pressure_duration_usec);
assert_se(c_new->last_had_mem_reclaim > c_old->last_had_mem_reclaim);
}
@@ -255,17 +261,21 @@ static void test_oomd_pressure_above(void) {
assert_se(store_loadavg_fixed_point(99, 99, &(ctx[0].memory_pressure.avg60)) == 0);
assert_se(store_loadavg_fixed_point(99, 99, &(ctx[0].memory_pressure.avg300)) == 0);
ctx[0].mem_pressure_limit = threshold;
+ /* Set memory pressure duration to 0 since we use the real system monotonic clock
+ * in oomd_pressure_above() and we want to avoid this test depending on timing. */
+ ctx[0].mem_pressure_duration_usec = 0;
/* /derp.slice */
assert_se(store_loadavg_fixed_point(1, 11, &(ctx[1].memory_pressure.avg10)) == 0);
assert_se(store_loadavg_fixed_point(1, 11, &(ctx[1].memory_pressure.avg60)) == 0);
assert_se(store_loadavg_fixed_point(1, 11, &(ctx[1].memory_pressure.avg300)) == 0);
ctx[1].mem_pressure_limit = threshold;
+ ctx[1].mem_pressure_duration_usec = 0;
/* High memory pressure */
assert_se(h1 = hashmap_new(&string_hash_ops));
assert_se(hashmap_put(h1, "/herp.slice", &ctx[0]) >= 0);
- assert_se(oomd_pressure_above(h1, 0 /* duration */, &t1) == 1);
+ assert_se(oomd_pressure_above(h1, &t1) == 1);
assert_se(set_contains(t1, &ctx[0]));
assert_se(c = hashmap_get(h1, "/herp.slice"));
assert_se(c->mem_pressure_limit_hit_start > 0);
@@ -273,14 +283,14 @@ static void test_oomd_pressure_above(void) {
/* Low memory pressure */
assert_se(h2 = hashmap_new(&string_hash_ops));
assert_se(hashmap_put(h2, "/derp.slice", &ctx[1]) >= 0);
- assert_se(oomd_pressure_above(h2, 0 /* duration */, &t2) == 0);
+ assert_se(oomd_pressure_above(h2, &t2) == 0);
assert_se(!t2);
assert_se(c = hashmap_get(h2, "/derp.slice"));
assert_se(c->mem_pressure_limit_hit_start == 0);
/* High memory pressure w/ multiple cgroups */
assert_se(hashmap_put(h1, "/derp.slice", &ctx[1]) >= 0);
- assert_se(oomd_pressure_above(h1, 0 /* duration */, &t3) == 1);
+ assert_se(oomd_pressure_above(h1, &t3) == 1);
assert_se(set_contains(t3, &ctx[0]));
assert_se(set_size(t3) == 1);
assert_se(c = hashmap_get(h1, "/herp.slice"));
diff --git a/src/shared/bus-print-properties.c b/src/shared/bus-print-properties.c
index 7da8cb1b12..5857fde5ad 100644
--- a/src/shared/bus-print-properties.c
+++ b/src/shared/bus-print-properties.c
@@ -109,6 +109,12 @@ static int bus_print_property(const char *name, const char *expected_value, sd_b
bus_print_property_value(name, expected_value, flags, FORMAT_TIMESTAMP(u));
+ /* Managed OOM pressure default implies "unset" and use the default set in oomd.conf. Without
+ * this condition, we will print "infinity" which implies there is no limit on memory
+ * pressure duration and is incorrect. */
+ else if (streq(name, "ManagedOOMMemoryPressureDurationUSec") && u == USEC_INFINITY)
+ bus_print_property_value(name, expected_value, flags, "[not set]");
+
else if (strstr(name, "USec"))
bus_print_property_value(name, expected_value, flags, FORMAT_TIMESPAN(u, 0));
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index b151826920..59e4901878 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -1008,6 +1008,11 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
if (streq(field, "NFTSet"))
return bus_append_nft_set(m, field, eq);
+ if (streq(field, "ManagedOOMMemoryPressureDurationSec"))
+ /* While infinity is disallowed in unit file, infinity is allowed in D-Bus API which
+ * means use the default memory pressure duration from oomd.conf. */
+ return bus_append_parse_sec_rename(m, field, isempty(eq) ? "infinity" : eq);
+
return 0;
}
diff --git a/src/shared/varlink-io.systemd.oom.c b/src/shared/varlink-io.systemd.oom.c
index 67beb6b780..350b933d03 100644
--- a/src/shared/varlink-io.systemd.oom.c
+++ b/src/shared/varlink-io.systemd.oom.c
@@ -12,7 +12,8 @@ SD_VARLINK_DEFINE_STRUCT_TYPE(
SD_VARLINK_DEFINE_FIELD(mode, SD_VARLINK_STRING, 0),
SD_VARLINK_DEFINE_FIELD(path, SD_VARLINK_STRING, 0),
SD_VARLINK_DEFINE_FIELD(property, SD_VARLINK_STRING, 0),
- SD_VARLINK_DEFINE_FIELD(limit, SD_VARLINK_INT, SD_VARLINK_NULLABLE));
+ SD_VARLINK_DEFINE_FIELD(limit, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
+ SD_VARLINK_DEFINE_FIELD(duration, SD_VARLINK_INT, SD_VARLINK_NULLABLE));
static SD_VARLINK_DEFINE_METHOD(
ReportManagedOOMCGroups,
diff --git a/test/fuzz/fuzz-unit-file/directives-all.service b/test/fuzz/fuzz-unit-file/directives-all.service
index 1cb212bcad..a0883d0ebe 100644
--- a/test/fuzz/fuzz-unit-file/directives-all.service
+++ b/test/fuzz/fuzz-unit-file/directives-all.service
@@ -154,6 +154,7 @@ MaxConnectionsPerSource=
ManagedOOMSwap=
ManagedOOMMemoryPressure=
ManagedOOMMemoryPressureLimitPercent=
+ManagedOOMMemoryPressureDurationSec=
ManagedOOMPreference=
MemoryAccounting=
MemoryHigh=
diff --git a/test/units/TEST-55-OOMD.sh b/test/units/TEST-55-OOMD.sh
index c615e7a4b2..10b3777df6 100755
--- a/test/units/TEST-55-OOMD.sh
+++ b/test/units/TEST-55-OOMD.sh
@@ -106,7 +106,7 @@ test_basic() {
# Verify systemd-oomd is monitoring the expected units.
timeout 1m bash -xec "until oomctl | grep -q -F 'Path: $cgroup_path'; do sleep 1; done"
assert_in 'Memory Pressure Limit: 20.00%' \
- "$(oomctl | tac | sed -e '/Memory Pressure Monitored CGroups:/q' | tac | grep -A7 "Path: $cgroup_path")"
+ "$(oomctl | tac | sed -e '/Memory Pressure Monitored CGroups:/q' | tac | grep -A8 "Path: $cgroup_path")"
systemctl "$@" start TEST-55-OOMD-testbloat.service
@@ -181,6 +181,86 @@ EOF
systemctl stop TEST-55-OOMD-testmunch.service
systemctl stop TEST-55-OOMD-testchill.service
systemctl stop TEST-55-OOMD-workload.slice
+
+ # clean up overrides since test cases can be run in any order
+ # and overrides shouldn't affect other tests
+ rm -rf /run/systemd/system/TEST-55-OOMD-testbloat.service.d
+ systemctl daemon-reload
+}
+
+testcase_duration_analyze() {
+ # Verify memory pressure duration is valid if >= 1 second
+ cat </tmp/TEST-55-OOMD-valid-duration.service
+[Service]
+ExecStart=echo hello
+ManagedOOMMemoryPressureDurationSec=1s
+EOF
+
+ # Verify memory pressure duration is invalid if < 1 second
+ cat </tmp/TEST-55-OOMD-invalid-duration.service
+[Service]
+ExecStart=echo hello
+ManagedOOMMemoryPressureDurationSec=0
+EOF
+
+ systemd-analyze --recursive-errors=no verify /tmp/TEST-55-OOMD-valid-duration.service
+ (! systemd-analyze --recursive-errors=no verify /tmp/TEST-55-OOMD-invalid-duration.service)
+
+ rm -f /tmp/TEST-55-OOMD-valid-duration.service
+ rm -f /tmp/TEST-55-OOMD-invalid-duration.service
+}
+
+testcase_duration_override() {
+ # Verify memory pressure duration can be overriden to non-zero values
+ mkdir -p /run/systemd/system/TEST-55-OOMD-testmunch.service.d/
+ cat >/run/systemd/system/TEST-55-OOMD-testmunch.service.d/99-duration-test.conf </run/systemd/system/TEST-55-OOMD-testchill.service.d/99-duration-test.conf <