diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index fa6b965101..bf4f223a43 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -2411,6 +2411,11 @@ RestrictNamespaces=~cgroup net done with the namespace specific unit setting such as PrivateNetwork= or PrivateMounts=. + Note that some namespace sandboxing options might entail mount namespace for private API VFS instances, + such as PrivatePIDs=, ProtectControlGroups=private/strict, or + PrivateNetwork=. If any of the mentioned options are enabled, mount namespace + is implicitly delegated. + diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index c926a808e0..d9878e6088 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -4275,7 +4275,17 @@ static bool exec_namespace_is_delegated( if (context->delegate_namespaces == NAMESPACE_FLAGS_INITIAL) return params->runtime_scope == RUNTIME_SCOPE_USER; - return FLAGS_SET(context->delegate_namespaces, namespace); + if (FLAGS_SET(context->delegate_namespaces, namespace)) + return true; + + /* Various namespaces imply mountns for private procfs/sysfs/cgroupfs instances, which means when + * those are delegated mountns must be deferred too. + * + * The list should stay in sync with exec_needs_mount_namespace(). */ + if (namespace == CLONE_NEWNS) + return context->delegate_namespaces & (CLONE_NEWPID|CLONE_NEWCGROUP|CLONE_NEWNET); + + return false; } static int setup_delegated_namespaces( diff --git a/test/units/TEST-07-PID1.delegate-namespaces.sh b/test/units/TEST-07-PID1.delegate-namespaces.sh index 210635ebbc..6d8d51caff 100755 --- a/test/units/TEST-07-PID1.delegate-namespaces.sh +++ b/test/units/TEST-07-PID1.delegate-namespaces.sh @@ -40,7 +40,7 @@ testcase_pid() { # so we can't write to it when running in a container. if ! systemd-detect-virt --container; then (! systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid') - systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes -p DelegateNamespaces="mnt pid" --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid' + systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes -p DelegateNamespaces=pid --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid' fi }