From 3ebf0b0bd48eb1ce760fa9ab47bf1b3706f3b29e Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 9 Mar 2022 02:02:17 +0000 Subject: [PATCH 1/3] core: create parent directory for mount point of ExtensionDirectories This is used by ExtensionDirectories too, as they are bind-mounted in the propagate directory to check the extension-release files --- src/core/namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/namespace.c b/src/core/namespace.c index e75e003e71..e74e6ea778 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -2384,9 +2384,9 @@ int setup_namespace( if (setup_propagate) (void) mkdir_p(propagate_dir, 0600); - if (n_extension_images > 0) - /* ExtensionImages mountpoint directories will be created while parsing the mounts to create, - * so have the parent ready */ + if (n_extension_images > 0 || !strv_isempty(extension_directories)) + /* ExtensionImages/Directories mountpoint directories will be created while parsing the + * mounts to create, so have the parent ready */ (void) mkdir_p(extension_dir, 0600); /* Remount / as SLAVE so that nothing now mounted in the namespace From 4c0ab40ab8e173062db0d36a6007a047deb5abde Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 9 Mar 2022 02:08:15 +0000 Subject: [PATCH 2/3] test: set log level of user manager in TEST-43 to debug --- test/units/testsuite-43.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/units/testsuite-43.sh b/test/units/testsuite-43.sh index 9c0f93b518..3efe419377 100755 --- a/test/units/testsuite-43.sh +++ b/test/units/testsuite-43.sh @@ -15,6 +15,8 @@ runas() { runas testuser systemd-run --wait --user --unit=test-private-users \ -p PrivateUsers=yes -P echo hello +runas testuser systemctl --user log-level debug + runas testuser systemd-run --wait --user --unit=test-private-tmp-innerfile \ -p PrivateUsers=yes -p PrivateTmp=yes \ -P touch /tmp/innerfile.txt From ea63a260d43c27a6b5b5ae471a8d4617bb7be447 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 9 Mar 2022 02:07:34 +0000 Subject: [PATCH 3/3] core: support MountAPIVFS and RootDirectory in user manager The only piece missing was to somehow make /proc appear in the new user+mount namespace. It is not possible to mount a new /proc instance, not even with hidepid=invisible,subset=pid, in a user namespace unless a PID namespace is created too (and also at the same time as the other namespaces, it is not possible to mount a new /proc in a child process that creates a PID namespace forked from a parent that created a user+mount namespace, it has to happen at the same time). Use the host's /proc with a bind-mount as a fallback for this case. User session services would already run with it, so nothing is lost. --- man/systemd.exec.xml | 12 +++--------- src/core/namespace.c | 23 ++++++++++++++++++++--- test/TEST-43-PRIVATEUSER-UNPRIV/test.sh | 9 +++++++++ test/units/testsuite-43.sh | 18 ++++++++++++++++++ 4 files changed, 50 insertions(+), 12 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 38220958b4..3b57f8d2f1 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -143,9 +143,7 @@ Mounting logging sockets into root environment BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout - - - + @@ -276,9 +274,7 @@ In order to allow propagating mounts at runtime in a safe manner, /run/systemd/propagate on the host will be used to set up new mounts, and /run/host/incoming/ in the private namespace - will be used as an intermediate step to store them before being moved to the final mount point. - - + will be used as an intermediate step to store them before being moved to the final mount point. @@ -364,9 +360,7 @@ InaccessiblePaths=, or under /home/ and other protected directories if ProtectHome=yes is specified. TemporaryFileSystem= with :ro or - ProtectHome=tmpfs should be used instead. - - + ProtectHome=tmpfs should be used instead. diff --git a/src/core/namespace.c b/src/core/namespace.c index e74e6ea778..77dd473a48 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1128,9 +1128,15 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) { r = path_is_mount_point(entry_path, NULL, 0); if (r < 0) return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m"); - if (r == 0) - /* /proc is not mounted. Propagate the original error code. */ - return -EPERM; + if (r == 0) { + /* We lack permissions to mount a new instance of /proc, and it is not already + * mounted. But we can access the host's, so as a final fallback bind-mount it to + * the destination, as most likely we are inside a user manager in an unprivileged + * user namespace. */ + r = mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return -EPERM; + } } else if (r < 0) return r; @@ -2446,6 +2452,17 @@ int setup_namespace( /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root); + if (r == -EINVAL && root_directory) { + /* If we are using root_directory and we don't have privileges (ie: user manager in a user + * namespace) and the root_directory is already a mount point in the parent namespace, + * MS_MOVE will fail as we don't have permission to change it (with EINVAL rather than + * EPERM). Attempt to bind-mount it over itself (like we do above if it's not already a + * mount point) and try again. */ + r = mount_nofollow_verbose(LOG_DEBUG, root, root, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + goto finish; + r = mount_move_root(root); + } if (r < 0) { log_debug_errno(r, "Failed to mount root with MS_MOVE: %m"); goto finish; diff --git a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh index bb8bc18697..dafcdb58fc 100755 --- a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh +++ b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh @@ -3,10 +3,19 @@ set -e TEST_DESCRIPTION="Test PrivateUsers=yes on user manager" +IMAGE_NAME="private-users" # shellcheck source=test/test-functions . "${TEST_BASE_DIR:?}/test-functions" has_user_dbus_socket || exit 0 +command -v mksquashfs >/dev/null 2>&1 || exit 0 + +test_append_files() { + ( + inst_binary unsquashfs + install_verity_minimal + ) +} do_test "$@" diff --git a/test/units/testsuite-43.sh b/test/units/testsuite-43.sh index 3efe419377..cda1fe1fda 100755 --- a/test/units/testsuite-43.sh +++ b/test/units/testsuite-43.sh @@ -68,6 +68,24 @@ runas testuser systemd-run --wait --user --unit=test-group-fail \ -P true \ && { echo 'unexpected success'; exit 1; } +# Check that with a new user namespace we can bind mount +# files and use a different root directory +runas testuser systemd-run --wait --user --unit=test-bind-mount \ + -p PrivateUsers=yes -p BindPaths=/dev/null:/etc/os-release \ + test ! -s /etc/os-release + +unsquashfs -no-xattrs -d /tmp/img /usr/share/minimal_0.raw +runas testuser systemd-run --wait --user --unit=test-root-dir \ + -p PrivateUsers=yes -p RootDirectory=/tmp/img \ + grep MARKER=1 /etc/os-release + +mkdir /tmp/img_bind +mount --bind /tmp/img /tmp/img_bind +runas testuser systemd-run --wait --user --unit=test-root-dir-bind \ + -p PrivateUsers=yes -p RootDirectory=/tmp/img_bind \ + grep MARKER=1 /etc/os-release +umount /tmp/img_bind + systemd-analyze log-level info echo OK >/testok