mountfsd: add call for creating a foreign UID owned dir in dir owned by caller

In order to fully support unpriv containers placed in directories owned
by the foreign UID range, below some unpriv user $HOME, we need to away
to actually create these hierarchies in the first place.

Let's add a method call for that. It takes a dir fd, then validates that
its ownership matches the client's identity, and then creates a subdir,
chown()ing it to the foreign UID range. It then returns an fd to the
result.

The result could then be passed to MountDirectory() in order to get a
mount which can then be populated by some code running in a dynamic
userns.
This commit is contained in:
Lennart Poettering
2025-05-26 12:16:43 +02:00
parent cb062410ec
commit 0261fe571b
3 changed files with 162 additions and 1 deletions

View File

@@ -116,4 +116,27 @@
<annotate key="org.freedesktop.policykit.imply">io.systemd.mount-file-system.mount-directory-privately</annotate>
</action>
<!-- Allow making foreign UID range owned directories -->
<action id="io.systemd.mount-file-system.make-directory">
<description gettext-domain="systemd">Allow creating directory owned by the foreign UID range</description>
<message gettext-domain="systemd">Authentication is required for an application to create $(directory) owned by the foreign UID range.</message>
<defaults>
<allow_any>yes</allow_any>
<allow_inactive>yes</allow_inactive>
<allow_active>yes</allow_active>
</defaults>
</action>
<action id="io.systemd.mount-file-system.make-directory-untrusted">
<description gettext-domain="systemd">Allow creating directory owned by the foreign UID range below directory not owned by the user</description>
<message gettext-domain="systemd">Authentication is required for an application to create $(directory) owned by the foreign UID range, below a directory not owned by the user.</message>
<defaults>
<allow_any>auth_admin</allow_any>
<allow_inactive>auth_admin</allow_inactive>
<allow_active>auth_admin</allow_active>
</defaults>
<annotate key="org.freedesktop.policykit.imply">io.systemd.mount-file-system.make-directory</annotate>
</action>
</policyconfig>

View File

@@ -17,6 +17,7 @@
#include "env-util.h"
#include "errno-util.h"
#include "fd-util.h"
#include "fs-util.h"
#include "format-util.h"
#include "hashmap.h"
#include "image-policy.h"
@@ -35,6 +36,7 @@
#include "string-table.h"
#include "string-util.h"
#include "strv.h"
#include "tmpfile-util.h"
#include "time-util.h"
#include "uid-classification.h"
#include "uid-range.h"
@@ -907,6 +909,130 @@ static int vl_method_mount_directory(
SD_JSON_BUILD_PAIR("mountFileDescriptor", SD_JSON_BUILD_INTEGER(fd_idx)));
}
typedef struct MakeDirectoryParameters {
unsigned parent_fd_idx;
const char *name;
} MakeDirectoryParameters;
static int vl_method_make_directory(
sd_varlink *link,
sd_json_variant *parameters,
sd_varlink_method_flags_t flags,
void *userdata) {
static const sd_json_dispatch_field dispatch_table[] = {
{ "parentFileDescriptor", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint, offsetof(MakeDirectoryParameters, parent_fd_idx), SD_JSON_MANDATORY },
{ "name", SD_JSON_VARIANT_STRING, json_dispatch_const_filename, offsetof(MakeDirectoryParameters, name), SD_JSON_MANDATORY },
VARLINK_DISPATCH_POLKIT_FIELD,
{}
};
MakeDirectoryParameters p = {
.parent_fd_idx = UINT_MAX,
};
Hashmap **polkit_registry = ASSERT_PTR(userdata);
int r;
r = sd_varlink_dispatch(link, parameters, dispatch_table, &p);
if (r != 0)
return r;
if (p.parent_fd_idx == UINT_MAX)
return sd_varlink_error_invalid_parameter_name(link, "parentFileDescriptor");
_cleanup_close_ int parent_fd = sd_varlink_peek_dup_fd(link, p.parent_fd_idx);
if (parent_fd < 0)
return log_debug_errno(parent_fd, "Failed to peek parent directory fd from client: %m");
uid_t peer_uid;
r = sd_varlink_get_peer_uid(link, &peer_uid);
if (r < 0)
return log_debug_errno(r, "Failed to get client UID: %m");
struct stat parent_stat;
if (fstat(parent_fd, &parent_stat) < 0)
return r;
r = stat_verify_directory(&parent_stat);
if (r < 0)
return r;
int fl = fd_verify_safe_flags_full(parent_fd, O_DIRECTORY);
if (fl < 0)
return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m");
_cleanup_free_ char *parent_path = NULL;
(void) fd_get_path(parent_fd, &parent_path);
_cleanup_free_ char *new_path = parent_path ? path_join(parent_path, p.name) : NULL;
log_debug("Asked to make directory: %s", strna(new_path));
const char *polkit_details[] = {
"directory", strna(new_path),
NULL,
};
const char *polkit_action;
PolkitFlags polkit_flags;
if (parent_stat.st_uid != peer_uid) {
polkit_action = "io.systemd.mount-file-system.make-directory-untrusted";
polkit_flags = 0;
} else {
polkit_action = "io.systemd.mount-file-system.make-directory";
polkit_flags = POLKIT_DEFAULT_ALLOW;
}
r = varlink_verify_polkit_async_full(
link,
/* bus= */ NULL,
polkit_action,
polkit_details,
/* good_user= */ UID_INVALID,
polkit_flags,
polkit_registry);
if (r <= 0)
return r;
_cleanup_free_ char *t = NULL;
r = tempfn_random(p.name, "mountfsd", &t);
if (r < 0)
return r;
_cleanup_close_ int fd = open_mkdir_at(parent_fd, t, O_CLOEXEC, 0700);
if (fd < 0)
return fd;
r = RET_NERRNO(fchmod(fd, 0700)); /* Set mode explicitly, as paranoia regarding umask games */
if (r < 0)
goto fail;
r = RET_NERRNO(fchown(fd, FOREIGN_UID_BASE, FOREIGN_UID_BASE));
if (r < 0)
goto fail;
r = rename_noreplace(parent_fd, t, parent_fd, p.name);
if (r < 0)
goto fail;
t = mfree(t); /* temporary filename no longer exists */
int fd_idx = sd_varlink_push_fd(link, fd);
if (fd_idx < 0) {
r = fd_idx;
goto fail;
}
TAKE_FD(fd);
return sd_varlink_replybo(
link,
SD_JSON_BUILD_PAIR("directoryFileDescriptor", SD_JSON_BUILD_INTEGER(fd_idx)));
fail:
(void) unlinkat(parent_fd, t ?: p.name, AT_REMOVEDIR);
return r;
}
static int process_connection(sd_varlink_server *server, int _fd) {
_cleanup_close_ int fd = TAKE_FD(_fd); /* always take possession */
_cleanup_(sd_varlink_close_unrefp) sd_varlink *vl = NULL;
@@ -977,7 +1103,8 @@ static int run(int argc, char *argv[]) {
r = sd_varlink_server_bind_method_many(
server,
"io.systemd.MountFileSystem.MountImage", vl_method_mount_image,
"io.systemd.MountFileSystem.MountDirectory", vl_method_mount_directory);
"io.systemd.MountFileSystem.MountDirectory", vl_method_mount_directory,
"io.systemd.MountFileSystem.MakeDirectory", vl_method_make_directory);
if (r < 0)
return log_error_errno(r, "Failed to bind methods: %m");

View File

@@ -99,6 +99,15 @@ static SD_VARLINK_DEFINE_METHOD(
SD_VARLINK_FIELD_COMMENT("The freshly allocated mount file descriptor for the mount."),
SD_VARLINK_DEFINE_OUTPUT(mountFileDescriptor, SD_VARLINK_INT, 0));
static SD_VARLINK_DEFINE_METHOD(
MakeDirectory,
SD_VARLINK_FIELD_COMMENT("File descriptor of the directory to create the new directory in. Must be a regular, i.e. non-O_PATH file descriptor."),
SD_VARLINK_DEFINE_INPUT(parentFileDescriptor, SD_VARLINK_INT, 0),
SD_VARLINK_FIELD_COMMENT("Name of the directory to create."),
SD_VARLINK_DEFINE_INPUT(name, SD_VARLINK_STRING, 0),
SD_VARLINK_FIELD_COMMENT("File descriptor referencing the newly created directory."),
SD_VARLINK_DEFINE_OUTPUT(directoryFileDescriptor, SD_VARLINK_INT, 0));
static SD_VARLINK_DEFINE_ERROR(IncompatibleImage);
static SD_VARLINK_DEFINE_ERROR(MultipleRootPartitionsFound);
static SD_VARLINK_DEFINE_ERROR(RootPartitionNotFound);
@@ -120,6 +129,8 @@ SD_VARLINK_DEFINE_INTERFACE(
&vl_method_MountImage,
SD_VARLINK_SYMBOL_COMMENT("Takes a directory file descriptor as input, returns a mount file descriptor."),
&vl_method_MountDirectory,
SD_VARLINK_SYMBOL_COMMENT("Creates an empty directory, owned by the foreign UID/GID range's root user, returns an open file descriptor to the directory. Access mode will be set to 0700."),
&vl_method_MakeDirectory,
SD_VARLINK_SYMBOL_COMMENT("Disk image is not compatible with this service."),
&vl_error_IncompatibleImage,
SD_VARLINK_SYMBOL_COMMENT("Multiple suitable root partitions found."),