Support ExtensionImages=/MountImages= in user services via mountfsd and PrivateUsers=yes (#39341)

This commit is contained in:
Luca Boccassi
2025-10-16 15:49:46 +01:00
committed by GitHub
8 changed files with 109 additions and 51 deletions

View File

@@ -0,0 +1,23 @@
<?xml version="1.0"?>
<!DOCTYPE refsect1 PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
<!--
SPDX-License-Identifier: LGPL-2.1-or-later
-->
<refsect1>
<title/>
<para id="singular">When enabled for services running in per-user instances of the service manager
this option implicitly enables <varname>PrivateUsers=</varname> (requires unprivileged user namespaces
support to be enabled in the kernel via the <literal>kernel.unprivileged_userns_clone=</literal> sysctl)
and also relies on
<citerefentry><refentrytitle>systemd-mountfsd.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
<para id="plural">When enabled for services running in per-user instances of the service manager
these options implicitly enable <varname>PrivateUsers=</varname> (requires unprivileged user namespaces
support to be enabled in the kernel via the <literal>kernel.unprivileged_userns_clone=</literal> sysctl)
and also rely on
<citerefentry><refentrytitle>systemd-mountfsd.service</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
</refsect1>

View File

@@ -201,7 +201,7 @@
<xi:include href="vpick.xml" xpointer="image"/>
<xi:include href="system-only.xml" xpointer="singular"/>
<xi:include href="system-or-user-ns-mountfsd.xml" xpointer="singular"/>
<xi:include href="version-info.xml" xpointer="v233"/></listitem>
</varlistentry>
@@ -225,7 +225,7 @@
<constant>esp</constant>, <constant>xbootldr</constant>, <constant>tmp</constant>,
<constant>var</constant>.</para>
<xi:include href="system-only.xml" xpointer="singular"/>
<xi:include href="system-or-user-ns-mountfsd.xml" xpointer="singular"/>
<xi:include href="version-info.xml" xpointer="v247"/></listitem>
</varlistentry>
@@ -523,7 +523,7 @@
<varname>PrivateDevices=</varname> below, as it may change the setting of
<varname>DevicePolicy=</varname>.</para>
<xi:include href="system-only.xml" xpointer="singular"/>
<xi:include href="system-or-user-ns-mountfsd.xml" xpointer="singular"/>
<xi:include href="version-info.xml" xpointer="v247"/></listitem>
</varlistentry>
@@ -590,7 +590,7 @@
<xi:include href="vpick.xml" xpointer="image"/>
<xi:include href="system-only.xml" xpointer="singular"/>
<xi:include href="system-or-user-ns-mountfsd.xml" xpointer="singular"/>
<xi:include href="version-info.xml" xpointer="v248"/></listitem>
</varlistentry>

View File

@@ -4491,6 +4491,9 @@ static bool exec_needs_cap_sys_admin(const ExecContext *context, const ExecParam
context->n_temporary_filesystems > 0 ||
context->root_directory ||
!strv_isempty(context->extension_directories) ||
context->root_image ||
context->n_mount_images > 0 ||
context->n_extension_images > 0 ||
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
exec_needs_pid_namespace(context, params) ||

View File

@@ -1617,7 +1617,8 @@ static int mount_mqueuefs(const MountEntry *m) {
static int mount_image(
MountEntry *m,
const char *root_directory,
const ImagePolicy *image_policy) {
const ImagePolicy *image_policy,
RuntimeScope runtime_scope) {
_cleanup_(extension_release_data_done) ExtensionReleaseData rdata = {};
ImageClass required_class = _IMAGE_CLASS_INVALID;
@@ -1652,6 +1653,7 @@ static int mount_image(
&rdata,
required_class,
&m->verity,
runtime_scope,
/* ret_image= */ NULL);
if (r == -ENOENT && m->ignore)
return 0;
@@ -2038,10 +2040,10 @@ static int apply_one_mount(
return mount_mqueuefs(m);
case MOUNT_IMAGE:
return mount_image(m, NULL, p->mount_image_policy);
return mount_image(m, NULL, p->mount_image_policy, p->runtime_scope);
case MOUNT_EXTENSION_IMAGE:
return mount_image(m, root_directory, p->extension_image_policy);
return mount_image(m, root_directory, p->extension_image_policy, p->runtime_scope);
case MOUNT_OVERLAY:
return mount_overlay(m);

View File

@@ -60,6 +60,7 @@
#include "proc-cmdline.h"
#include "process-util.h"
#include "resize-fs.h"
#include "runtime-scope.h"
#include "signal-util.h"
#include "siphash24.h"
#include "stat-util.h"
@@ -4416,11 +4417,13 @@ int verity_dissect_and_mount(
const ExtensionReleaseData *extension_release_data,
ImageClass required_class,
VeritySettings *verity,
RuntimeScope runtime_scope,
DissectedImage **ret_image) {
_cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
_cleanup_(verity_settings_done) VeritySettings local_verity = VERITY_SETTINGS_DEFAULT;
_cleanup_close_ int userns_fd = -EBADF;
DissectImageFlags dissect_image_flags;
bool relax_extension_release_check;
int r;
@@ -4451,55 +4454,70 @@ int verity_dissect_and_mount(
DISSECT_IMAGE_ALLOW_USERSPACE_VERITY |
DISSECT_IMAGE_VERITY_SHARE;
/* Note that we don't use loop_device_make here, as the FD is most likely O_PATH which would not be
* accepted by LOOP_CONFIGURE, so just let loop_device_make_by_path reopen it as a regular FD. */
r = loop_device_make_by_path(
src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
/* open_flags= */ -1,
/* sector_size= */ UINT32_MAX,
verity->data_path ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&loop_device);
if (r < 0)
return log_debug_errno(r, "Failed to create loop device for image: %m");
if (runtime_scope == RUNTIME_SCOPE_SYSTEM) {
/* Note that we don't use loop_device_make here, as the FD is most likely O_PATH which would not be
* accepted by LOOP_CONFIGURE, so just let loop_device_make_by_path reopen it as a regular FD. */
r = loop_device_make_by_path(
src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
/* open_flags= */ -1,
/* sector_size= */ UINT32_MAX,
verity->data_path ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&loop_device);
if (r < 0)
return log_debug_errno(r, "Failed to create loop device for image: %m");
r = dissect_loop_device(
loop_device,
verity,
options,
image_policy,
image_filter,
dissect_image_flags,
&dissected_image);
/* No partition table? Might be a single-filesystem image, try again */
if (!verity->data_path && r == -ENOPKG)
r = dissect_loop_device(
r = dissect_loop_device(
loop_device,
verity,
options,
image_policy,
image_filter,
dissect_image_flags | DISSECT_IMAGE_NO_PARTITION_TABLE,
dissect_image_flags,
&dissected_image);
if (r < 0)
return log_debug_errno(r, "Failed to dissect image: %m");
/* No partition table? Might be a single-filesystem image, try again */
if (!verity->data_path && r == -ENOPKG)
r = dissect_loop_device(
loop_device,
verity,
options,
image_policy,
image_filter,
dissect_image_flags | DISSECT_IMAGE_NO_PARTITION_TABLE,
&dissected_image);
if (r < 0)
return log_debug_errno(r, "Failed to dissect image: %m");
r = dissected_image_load_verity_sig_partition(dissected_image, loop_device->fd, verity);
if (r < 0)
return r;
r = dissected_image_load_verity_sig_partition(dissected_image, loop_device->fd, verity);
if (r < 0)
return r;
r = dissected_image_guess_verity_roothash(dissected_image, verity);
if (r < 0)
return r;
r = dissected_image_guess_verity_roothash(dissected_image, verity);
if (r < 0)
return r;
r = dissected_image_decrypt(
dissected_image,
NULL,
verity,
image_policy,
dissect_image_flags);
if (r < 0)
return log_debug_errno(r, "Failed to decrypt dissected image: %m");
r = dissected_image_decrypt(
dissected_image,
NULL,
verity,
image_policy,
dissect_image_flags);
if (r < 0)
return log_debug_errno(r, "Failed to decrypt dissected image: %m");
} else {
userns_fd = namespace_open_by_type(NAMESPACE_USER);
if (userns_fd < 0)
return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m");
r = mountfsd_mount_image(
src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
userns_fd,
image_policy,
dissect_image_flags,
&dissected_image);
if (r < 0)
return r;
}
if (dest) {
r = mkdir_p_label(dest, 0755);
@@ -4515,14 +4533,16 @@ int verity_dissect_and_mount(
dest,
/* uid_shift= */ UID_INVALID,
/* uid_range= */ UID_INVALID,
/* userns_fd= */ -EBADF,
userns_fd,
dissect_image_flags);
if (r < 0)
return log_debug_errno(r, "Failed to mount image: %m");
r = loop_device_flock(loop_device, LOCK_UN);
if (r < 0)
return log_debug_errno(r, "Failed to unlock loopback device: %m");
if (loop_device) {
r = loop_device_flock(loop_device, LOCK_UN);
if (r < 0)
return log_debug_errno(r, "Failed to unlock loopback device: %m");
}
/* If we got os-release values from the caller, then we need to match them with the image's
* extension-release.d/ content. Return -EINVAL if there's any mismatch.

View File

@@ -238,7 +238,7 @@ bool dissected_image_verity_sig_ready(const DissectedImage *image, PartitionDesi
int mount_image_privately_interactively(const char *path, const ImagePolicy *image_policy, DissectImageFlags flags, char **ret_directory, int *ret_dir_fd, LoopDevice **ret_loop_device);
int verity_dissect_and_mount(int src_fd, const char *src, const char *dest, const MountOptions *options, const ImagePolicy *image_policy, const ImageFilter *image_filter, const ExtensionReleaseData *required_release_data, ImageClass required_class, VeritySettings *verity, DissectedImage **ret_image);
int verity_dissect_and_mount(int src_fd, const char *src, const char *dest, const MountOptions *options, const ImagePolicy *image_policy, const ImageFilter *image_filter, const ExtensionReleaseData *required_release_data, ImageClass required_class, VeritySettings *verity, RuntimeScope runtime_scope, DissectedImage **ret_image);
int dissect_fstype_ok(const char *fstype);

View File

@@ -27,6 +27,7 @@
#include "path-util.h"
#include "pidref.h"
#include "process-util.h"
#include "runtime-scope.h"
#include "set.h"
#include "sort-util.h"
#include "stat-util.h"
@@ -1005,6 +1006,7 @@ static int mount_in_namespace_legacy(
/* extension_release_data= */ NULL,
/* required_class= */ _IMAGE_CLASS_INVALID,
/* verity= */ NULL,
RUNTIME_SCOPE_SYSTEM,
/* ret_image= */ NULL);
else
r = mount_follow_verbose(LOG_DEBUG, FORMAT_PROC_FD_PATH(chased_src_fd), mount_tmp, NULL, MS_BIND, NULL);
@@ -1227,6 +1229,7 @@ static int mount_in_namespace(
/* extension_release_data= */ NULL,
/* required_class= */ _IMAGE_CLASS_INVALID,
/* verity= */ NULL,
RUNTIME_SCOPE_SYSTEM,
&img);
if (r < 0)
return log_debug_errno(r,

View File

@@ -67,6 +67,13 @@ systemd-dissect --image-policy='root=verity+signed:=absent+unused' --mtree /var/
# This should fail before we install the key
(! systemd-dissect --image-policy='root=signed:=absent+unused' --mtree /var/tmp/unpriv.raw >/dev/null)
# If the kernel support is present unprivileged user units should be able to use verity images too
if [ "$VERITY_SIG_SUPPORTED" -eq 1 ]; then
systemd-run -M testuser@ --user --pipe --wait \
--property RootImage="$MINIMAL_IMAGE.gpt" \
test -e "/dev/mapper/${MINIMAL_IMAGE_ROOTHASH}-verity"
fi
# Install key in keychain
mkdir -p /run/verity.d
cp /tmp/test-50-unpriv-cert.crt /run/verity.d/