From eadb4f19b3fce11fa341e9497cd518341e3c920b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 10 Mar 2022 13:22:32 +0100 Subject: [PATCH 1/2] main: add 'const' on two function arguments --- src/core/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/main.c b/src/core/main.c index 81db601a2c..cffe37de2a 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -1218,7 +1218,7 @@ static void bump_file_max_and_nr_open(void) { #endif } -static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { +static int bump_rlimit_nofile(const struct rlimit *saved_rlimit) { struct rlimit new_rlimit; int r, nr; @@ -1247,7 +1247,7 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) { return 0; } -static int bump_rlimit_memlock(struct rlimit *saved_rlimit) { +static int bump_rlimit_memlock(const struct rlimit *saved_rlimit) { struct rlimit new_rlimit; uint64_t mm; int r; From 852b62507b22c0a986032a2c9fa9cc464a5b7bd2 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 10 Mar 2022 13:22:57 +0100 Subject: [PATCH 2/2] pid1,nspawn: raise default RLIMIT_MEMLOCK to 8M This mirrors a similar check in Linux kernel 5.16 (9dcc38e2813e0cd3b195940c98b181ce6ede8f20) that raised the RLIMIT_MEMLOCK to 8M. This change does two things: raise the default limit for nspawn containers (where we try to mimic closely what the kernel does), and bump it when running on old kernels which still have the lower setting. Fixes: #16300 See: https://lwn.net/Articles/876288/ --- man/systemd-system.conf.xml | 10 ++++++---- src/basic/def.h | 5 +++++ src/core/main.c | 7 +++++++ src/core/system.conf.in | 2 +- src/nspawn/nspawn.c | 30 +++++++++++++++--------------- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index 351662b757..400e35c457 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -485,17 +485,19 @@ Most of these settings are unset, which means the resource limits are inherited from the kernel or, if invoked in a container, from the container manager. However, the following have defaults: - DefaultLimitNOFILE= defaults to 1024:&HIGH_RLIMIT_NOFILE;. + DefaultLimitNOFILE= defaults to 1024:&HIGH_RLIMIT_NOFILE;. + DefaultLimitMEMLOCK= defaults to 8M. + DefaultLimitCORE= does not have a default but it is worth mentioning that RLIMIT_CORE is set to infinity by PID 1 which is inherited by its children. - - Note that the service manager internally increases RLIMIT_MEMLOCK for - itself, however the limit is reverted to the original value for child processes forked off. + Note that the service manager internally in PID 1 bumps RLIMIT_NOFILE and + RLIMIT_MEMLOCK to higher values, however the limit is reverted to the mentioned + defaults for all child processes forked off. diff --git a/src/basic/def.h b/src/basic/def.h index eccee3d3fa..ffd462c456 100644 --- a/src/basic/def.h +++ b/src/basic/def.h @@ -57,8 +57,13 @@ #define CONF_PATHS_STRV(n) \ STRV_MAKE(CONF_PATHS(n)) +/* The limit for PID 1 itself (which is not inherited to children) */ #define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL) +/* Since kernel 5.16 the kernel default limit was raised to 8M. Let's adjust things on old kernels too, and + * in containers so that our children inherit that. */ +#define DEFAULT_RLIMIT_MEMLOCK (1024ULL*1024ULL*8ULL) + #define PLYMOUTH_SOCKET { \ .un.sun_family = AF_UNIX, \ .un.sun_path = "\0/org/freedesktop/plymouthd", \ diff --git a/src/core/main.c b/src/core/main.c index cffe37de2a..a05c24dd03 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -2310,6 +2310,13 @@ static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) { return; } + if (arg_system) { + /* Raise the default limit to 8M also on old kernels and in containers (8M is the kernel + * default for this since kernel 5.16) */ + rl->rlim_max = MAX(rl->rlim_max, (rlim_t) DEFAULT_RLIMIT_MEMLOCK); + rl->rlim_cur = MAX(rl->rlim_cur, (rlim_t) DEFAULT_RLIMIT_MEMLOCK); + } + arg_default_rlimit[RLIMIT_MEMLOCK] = rl; } diff --git a/src/core/system.conf.in b/src/core/system.conf.in index 67e55f10a2..e132b086a6 100644 --- a/src/core/system.conf.in +++ b/src/core/system.conf.in @@ -66,7 +66,7 @@ #DefaultLimitNOFILE=1024:{{HIGH_RLIMIT_NOFILE}} #DefaultLimitAS= #DefaultLimitNPROC= -#DefaultLimitMEMLOCK= +#DefaultLimitMEMLOCK=8M #DefaultLimitLOCKS= #DefaultLimitSIGPENDING= #DefaultLimitMSGQUEUE= diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 144e58ae89..5102c16438 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -5294,25 +5294,25 @@ static int run_container( } static int initialize_rlimits(void) { - /* The default resource limits the kernel passes to PID 1, as per kernel 4.16. Let's pass our container payload + /* The default resource limits the kernel passes to PID 1, as per kernel 5.16. Let's pass our container payload * the same values as the kernel originally passed to PID 1, in order to minimize differences between host and * container execution environments. */ static const struct rlimit kernel_defaults[_RLIMIT_MAX] = { - [RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_CORE] = { 0, RLIM_INFINITY }, - [RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_MEMLOCK] = { 65536, 65536 }, - [RLIMIT_MSGQUEUE] = { 819200, 819200 }, - [RLIMIT_NICE] = { 0, 0 }, - [RLIMIT_NOFILE] = { 1024, 4096 }, - [RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_RTPRIO] = { 0, 0 }, - [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, - [RLIMIT_STACK] = { 8388608, RLIM_INFINITY }, + [RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_CORE] = { 0, RLIM_INFINITY }, + [RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_MEMLOCK] = { DEFAULT_RLIMIT_MEMLOCK, DEFAULT_RLIMIT_MEMLOCK }, + [RLIMIT_MSGQUEUE] = { 819200, 819200 }, + [RLIMIT_NICE] = { 0, 0 }, + [RLIMIT_NOFILE] = { 1024, 4096 }, + [RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_RTPRIO] = { 0, 0 }, + [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, + [RLIMIT_STACK] = { 8388608, RLIM_INFINITY }, /* The kernel scales the default for RLIMIT_NPROC and RLIMIT_SIGPENDING based on the system's amount of * RAM. To provide best compatibility we'll read these limits off PID 1 instead of hardcoding them