pid1,nspawn: raise default RLIMIT_MEMLOCK to 8M

This mirrors a similar check in Linux kernel 5.16
(9dcc38e2813e0cd3b195940c98b181ce6ede8f20) that raised the
RLIMIT_MEMLOCK to 8M.

This change does two things: raise the default limit for nspawn
containers (where we try to mimic closely what the kernel does), and
bump it when running on old kernels which still have the lower setting.

Fixes: #16300
See: https://lwn.net/Articles/876288/
This commit is contained in:
Lennart Poettering
2022-03-10 13:22:57 +01:00
parent eadb4f19b3
commit 852b62507b
5 changed files with 34 additions and 20 deletions

View File

@@ -485,17 +485,19 @@
<para>Most of these settings are unset, which means the resource limits are inherited from the kernel or, if
invoked in a container, from the container manager. However, the following have defaults:</para>
<itemizedlist>
<listitem><para><varname>DefaultLimitNOFILE=</varname> defaults to <literal>1024:&HIGH_RLIMIT_NOFILE;</literal>.
<listitem><para><varname>DefaultLimitNOFILE=</varname> defaults to 1024:&HIGH_RLIMIT_NOFILE;.
</para></listitem>
<listitem><para><varname>DefaultLimitMEMLOCK=</varname> defaults to 8M.</para></listitem>
<listitem><para><varname>DefaultLimitCORE=</varname> does not have a default but it is worth mentioning that
<varname>RLIMIT_CORE</varname> is set to <literal>infinity</literal> by PID 1 which is inherited by its
children.</para></listitem>
<listitem><para>Note that the service manager internally increases <varname>RLIMIT_MEMLOCK</varname> for
itself, however the limit is reverted to the original value for child processes forked off.</para></listitem>
</itemizedlist>
<para>Note that the service manager internally in PID 1 bumps <varname>RLIMIT_NOFILE</varname> and
<varname>RLIMIT_MEMLOCK</varname> to higher values, however the limit is reverted to the mentioned
defaults for all child processes forked off.</para>
</listitem>
</varlistentry>

View File

@@ -57,8 +57,13 @@
#define CONF_PATHS_STRV(n) \
STRV_MAKE(CONF_PATHS(n))
/* The limit for PID 1 itself (which is not inherited to children) */
#define HIGH_RLIMIT_MEMLOCK (1024ULL*1024ULL*64ULL)
/* Since kernel 5.16 the kernel default limit was raised to 8M. Let's adjust things on old kernels too, and
* in containers so that our children inherit that. */
#define DEFAULT_RLIMIT_MEMLOCK (1024ULL*1024ULL*8ULL)
#define PLYMOUTH_SOCKET { \
.un.sun_family = AF_UNIX, \
.un.sun_path = "\0/org/freedesktop/plymouthd", \

View File

@@ -2310,6 +2310,13 @@ static void fallback_rlimit_memlock(const struct rlimit *saved_rlimit_memlock) {
return;
}
if (arg_system) {
/* Raise the default limit to 8M also on old kernels and in containers (8M is the kernel
* default for this since kernel 5.16) */
rl->rlim_max = MAX(rl->rlim_max, (rlim_t) DEFAULT_RLIMIT_MEMLOCK);
rl->rlim_cur = MAX(rl->rlim_cur, (rlim_t) DEFAULT_RLIMIT_MEMLOCK);
}
arg_default_rlimit[RLIMIT_MEMLOCK] = rl;
}

View File

@@ -66,7 +66,7 @@
#DefaultLimitNOFILE=1024:{{HIGH_RLIMIT_NOFILE}}
#DefaultLimitAS=
#DefaultLimitNPROC=
#DefaultLimitMEMLOCK=
#DefaultLimitMEMLOCK=8M
#DefaultLimitLOCKS=
#DefaultLimitSIGPENDING=
#DefaultLimitMSGQUEUE=

View File

@@ -5294,25 +5294,25 @@ static int run_container(
}
static int initialize_rlimits(void) {
/* The default resource limits the kernel passes to PID 1, as per kernel 4.16. Let's pass our container payload
/* The default resource limits the kernel passes to PID 1, as per kernel 5.16. Let's pass our container payload
* the same values as the kernel originally passed to PID 1, in order to minimize differences between host and
* container execution environments. */
static const struct rlimit kernel_defaults[_RLIMIT_MAX] = {
[RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_CORE] = { 0, RLIM_INFINITY },
[RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_MEMLOCK] = { 65536, 65536 },
[RLIMIT_MSGQUEUE] = { 819200, 819200 },
[RLIMIT_NICE] = { 0, 0 },
[RLIMIT_NOFILE] = { 1024, 4096 },
[RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_RTPRIO] = { 0, 0 },
[RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_STACK] = { 8388608, RLIM_INFINITY },
[RLIMIT_AS] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_CORE] = { 0, RLIM_INFINITY },
[RLIMIT_CPU] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_DATA] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_FSIZE] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_MEMLOCK] = { DEFAULT_RLIMIT_MEMLOCK, DEFAULT_RLIMIT_MEMLOCK },
[RLIMIT_MSGQUEUE] = { 819200, 819200 },
[RLIMIT_NICE] = { 0, 0 },
[RLIMIT_NOFILE] = { 1024, 4096 },
[RLIMIT_RSS] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_RTPRIO] = { 0, 0 },
[RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY },
[RLIMIT_STACK] = { 8388608, RLIM_INFINITY },
/* The kernel scales the default for RLIMIT_NPROC and RLIMIT_SIGPENDING based on the system's amount of
* RAM. To provide best compatibility we'll read these limits off PID 1 instead of hardcoding them