diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 482dbbda80..b31e64f57c 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -2027,8 +2027,11 @@ BindReadOnlyPaths=/var/lib/systemd
often a good choice if proper user namespacing with distinct UID maps is not appropriate.
If the parameter is full, user namespacing is set up with an identity
- mapping for all UIDs/GIDs. Similar to identity, this does not provide UID/GID
- isolation, but it does provide process capability isolation.
+ mapping for all UIDs/GIDs. In addition, for system services, full allows the unit
+ to call setgroups() system calls (by setting
+ /proc/pid/setgroups to allow).
+ Similar to identity, this does not provide UID/GID isolation, but it does provide
+ process capability isolation.
If this mode is enabled, all unit processes are run without privileges in the host user
namespace (regardless if the unit's own user/group is root or not). Specifically
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
index 682d6449d7..da2d4abd3c 100644
--- a/src/core/exec-invoke.c
+++ b/src/core/exec-invoke.c
@@ -2077,7 +2077,7 @@ static int build_pass_environment(const ExecContext *c, char ***ret) {
return 0;
}
-static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
+static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogid, uid_t uid, gid_t gid, bool allow_setgroups) {
_cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
_cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR;
_cleanup_close_ int unshare_ready_fd = -EBADF;
@@ -2196,7 +2196,8 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
if (read(unshare_ready_fd, &c, sizeof(c)) < 0)
report_errno_and_exit(errno_pipe[1], -errno);
- /* Disable the setgroups() system call in the child user namespace, for good. */
+ /* Disable the setgroups() system call in the child user namespace, for good, unless PrivateUsers=full
+ * and using the system service manager. */
a = procfs_file_alloca(ppid, "setgroups");
fd = open(a, O_WRONLY|O_CLOEXEC);
if (fd < 0) {
@@ -2207,8 +2208,9 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
/* If the file is missing the kernel is too old, let's continue anyway. */
} else {
- if (write(fd, "deny\n", 5) < 0) {
- r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a);
+ const char *setgroups = allow_setgroups ? "allow\n" : "deny\n";
+ if (write(fd, setgroups, strlen(setgroups)) < 0) {
+ r = log_debug_errno(errno, "Failed to write '%s' to %s: %m", setgroups, a);
report_errno_and_exit(errno_pipe[1], r);
}
@@ -5007,7 +5009,9 @@ int exec_invoke(
if (pu == PRIVATE_USERS_NO)
pu = PRIVATE_USERS_SELF;
- r = setup_private_users(pu, saved_uid, saved_gid, uid, gid);
+ /* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in
+ * unprivileged user namespaces. */
+ r = setup_private_users(pu, saved_uid, saved_gid, uid, gid, /* allow_setgroups= */ false);
/* If it was requested explicitly and we can't set it up, fail early. Otherwise, continue and let
* the actual requested operations fail (or silently continue). */
if (r < 0 && context->private_users != PRIVATE_USERS_NO) {
@@ -5177,7 +5181,8 @@ int exec_invoke(
* different user namespace). */
if (needs_sandboxing && !userns_set_up) {
- r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid);
+ r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid,
+ /* allow_setgroups= */ context->private_users == PRIVATE_USERS_FULL);
if (r < 0) {
*exit_status = EXIT_USER;
return log_exec_error_errno(context, params, r, "Failed to set up user namespacing: %m");
diff --git a/test/units/TEST-07-PID1.private-users.sh b/test/units/TEST-07-PID1.private-users.sh
index ba85248f96..e788f52a2f 100755
--- a/test/units/TEST-07-PID1.private-users.sh
+++ b/test/units/TEST-07-PID1.private-users.sh
@@ -6,9 +6,12 @@ set -o pipefail
systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"'
systemd-run -p PrivateUsers=yes --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
+systemd-run -p PrivateUsersEx=yes --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"'
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 1"'
systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 1"'
+systemd-run -p PrivateUsersEx=self --wait bash -c 'test "$(cat /proc/self/setgroups)" == "deny"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"'
systemd-run -p PrivateUsersEx=identity --wait bash -c 'test "$(cat /proc/self/gid_map)" == " 0 0 65536"'
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/uid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/gid_map | tr -d "\n")" == " 0 0 1 1 1 4294967294"'
+systemd-run -p PrivateUsersEx=full --wait bash -c 'test "$(cat /proc/self/setgroups)" == "allow"'