mirror of
https://github.com/morgan9e/systemd
synced 2026-04-14 00:14:32 +09:00
mountfsd: make MountDirectory() work with systemd-homed
systemd-homed already applies an idmap to its mounts, hence we need to undo it before we can create our own.
This commit is contained in:
committed by
Daan De Meyer
parent
0d87de0b8e
commit
9f69ff69f7
@@ -27,6 +27,7 @@
|
||||
#include "loop-util.h"
|
||||
#include "main-func.h"
|
||||
#include "memory-util.h"
|
||||
#include "mount-util.h"
|
||||
#include "namespace-util.h"
|
||||
#include "nsresource.h"
|
||||
#include "nulstr-util.h"
|
||||
@@ -41,6 +42,7 @@
|
||||
#include "time-util.h"
|
||||
#include "uid-classification.h"
|
||||
#include "uid-range.h"
|
||||
#include "user-util.h"
|
||||
#include "varlink-io.systemd.MountFileSystem.h"
|
||||
#include "varlink-util.h"
|
||||
|
||||
@@ -637,10 +639,16 @@ static MountMapMode default_mount_map_mode(DirectoryOwnership ownership) {
|
||||
|
||||
static JSON_DISPATCH_ENUM_DEFINE(dispatch_mount_directory_mode, MountMapMode, mount_map_mode_from_string);
|
||||
|
||||
static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) {
|
||||
static DirectoryOwnership validate_directory_fd(
|
||||
int fd,
|
||||
uid_t peer_uid,
|
||||
uid_t *ret_current_owner_uid) {
|
||||
|
||||
int r, fl;
|
||||
|
||||
assert(fd >= 0);
|
||||
assert(uid_is_valid(peer_uid));
|
||||
assert(ret_current_owner_uid);
|
||||
|
||||
/* Checks if the specified directory fd looks sane. Returns a DirectoryOwnership that categorizes the
|
||||
* ownership situation in comparison to the peer's UID.
|
||||
@@ -665,6 +673,7 @@ static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) {
|
||||
return log_debug_errno(fl, "Directory file descriptor has unsafe flags set: %m");
|
||||
|
||||
if (st.st_uid == 0) {
|
||||
*ret_current_owner_uid = st.st_uid;
|
||||
if (peer_uid == 0) {
|
||||
log_debug("Directory file descriptor points to root owned directory, who is also the peer.");
|
||||
return DIRECTORY_IS_ROOT_PEER_OWNED;
|
||||
@@ -674,6 +683,7 @@ static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) {
|
||||
}
|
||||
if (st.st_uid == peer_uid) {
|
||||
log_debug("Directory file descriptor points to peer owned directory.");
|
||||
*ret_current_owner_uid = st.st_uid;
|
||||
return DIRECTORY_IS_PEER_OWNED;
|
||||
}
|
||||
|
||||
@@ -687,12 +697,14 @@ static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) {
|
||||
/* Stop iteration if we find a directory up the tree that is neither owned by the user, nor is from the foreign UID range */
|
||||
if (!uid_is_foreign(st.st_uid) || !gid_is_foreign(st.st_gid)) {
|
||||
log_debug("Directory file descriptor points to directory which itself or its parents is neither owned by foreign UID range nor by the user.");
|
||||
*ret_current_owner_uid = st.st_uid;
|
||||
return DIRECTORY_IS_OTHERWISE_OWNED;
|
||||
}
|
||||
|
||||
/* If the peer is root, then it doesn't matter if we find a parent owned by root, let's shortcut things. */
|
||||
if (peer_uid == 0) {
|
||||
log_debug("Directory file descriptor is owned by foreign UID range, and peer is root.");
|
||||
*ret_current_owner_uid = st.st_uid;
|
||||
return DIRECTORY_IS_FOREIGN_OWNED;
|
||||
}
|
||||
|
||||
@@ -708,11 +720,13 @@ static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) {
|
||||
/* Safety check to see if we hit the root dir */
|
||||
if (stat_inode_same(&st, &new_st)) {
|
||||
log_debug("Directory file descriptor is owned by foreign UID range, but didn't find parent directory that is owned by peer among ancestors.");
|
||||
*ret_current_owner_uid = st.st_uid;
|
||||
return DIRECTORY_IS_OTHERWISE_OWNED;
|
||||
}
|
||||
|
||||
if (new_st.st_uid == peer_uid) { /* Parent inode is owned by the peer. That's good! Everything's fine. */
|
||||
log_debug("Directory file descriptor is owned by foreign UID range, and ancestor is owned by peer.");
|
||||
*ret_current_owner_uid = st.st_uid;
|
||||
return DIRECTORY_IS_FOREIGN_OWNED;
|
||||
}
|
||||
|
||||
@@ -721,6 +735,7 @@ static DirectoryOwnership validate_directory_fd(int fd, uid_t peer_uid) {
|
||||
}
|
||||
|
||||
log_debug("Failed to find peer owned parent directory after %u levels, refusing.", n_level);
|
||||
*ret_current_owner_uid = st.st_uid;
|
||||
return DIRECTORY_IS_OTHERWISE_OWNED;
|
||||
}
|
||||
|
||||
@@ -771,7 +786,8 @@ static int vl_method_mount_directory(
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to get client UID: %m");
|
||||
|
||||
DirectoryOwnership owned_by = validate_directory_fd(directory_fd, peer_uid);
|
||||
uid_t current_owner_uid;
|
||||
DirectoryOwnership owned_by = validate_directory_fd(directory_fd, peer_uid, ¤t_owner_uid);
|
||||
if (owned_by == -EREMOTEIO)
|
||||
return sd_varlink_errorbo(link, "io.systemd.MountFileSystem.BadFileDescriptorFlags", SD_JSON_BUILD_PAIR_STRING("parameter", "directoryFileDescriptor"));
|
||||
if (owned_by < 0)
|
||||
@@ -836,10 +852,28 @@ static int vl_method_mount_directory(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
_cleanup_close_ int mount_fd = open_tree(directory_fd, "", OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
|
||||
_cleanup_close_ int mount_fd = open_tree_try_drop_idmap(
|
||||
directory_fd,
|
||||
"",
|
||||
OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH);
|
||||
if (mount_fd < 0)
|
||||
return log_debug_errno(errno, "Failed to issue open_tree() of provided directory '%s': %m", strna(directory_path));
|
||||
|
||||
/* MOUNT_ATTR_IDMAP has possibly been cleared. Let's verify that the underlying data matches our expectations. */
|
||||
struct stat unmapped_st;
|
||||
if (fstat(mount_fd, &unmapped_st) < 0)
|
||||
return log_debug_errno(errno, "Failed to stat unmapped inode: %m");
|
||||
|
||||
r = stat_verify_directory(&unmapped_st);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* For now, let's simply refuse things if dropping the idmapping changed anything. For now that
|
||||
* should be good enough, because the primary usecase for this (homed) will mount the foreign UID
|
||||
* range 1:1. */
|
||||
if (unmapped_st.st_uid != current_owner_uid)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EPERM), "Owner UID of mount after clearing ID mapping not the same anymore, refusing.");
|
||||
|
||||
if (p.read_only > 0 && mount_setattr(
|
||||
mount_fd, "", AT_EMPTY_PATH,
|
||||
&(struct mount_attr) {
|
||||
|
||||
@@ -826,17 +826,16 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u
|
||||
* caller's userns *without* any mount idmapping in place. To get that uid, we clone the
|
||||
* mount source tree and clear any existing idmapping and temporarily mount that tree over
|
||||
* the mount source before we stat the mount source to figure out the source uid. */
|
||||
_cleanup_close_ int fd_clone = open_tree_attr_with_fallback(
|
||||
_cleanup_close_ int fd_clone =
|
||||
idmapping == REMOUNT_IDMAPPING_NONE ?
|
||||
RET_NERRNO(open_tree(
|
||||
AT_FDCWD,
|
||||
m->source,
|
||||
open_tree_flags,
|
||||
&(struct mount_attr) {
|
||||
.attr_clr = idmapping != REMOUNT_IDMAPPING_NONE ? MOUNT_ATTR_IDMAP : 0,
|
||||
});
|
||||
if (ERRNO_IS_NEG_NOT_SUPPORTED(fd_clone))
|
||||
/* We can only clear idmapped mounts with open_tree_attr(), but there might not be one in
|
||||
* the first place, so we keep going if we get a not supported error. */
|
||||
fd_clone = open_tree(AT_FDCWD, m->source, open_tree_flags);
|
||||
open_tree_flags)) :
|
||||
open_tree_try_drop_idmap(
|
||||
AT_FDCWD,
|
||||
m->source,
|
||||
open_tree_flags);
|
||||
if (fd_clone < 0)
|
||||
return log_error_errno(errno, "Failed to clone %s: %m", m->source);
|
||||
|
||||
|
||||
@@ -1477,7 +1477,7 @@ int make_userns(uid_t uid_shift,
|
||||
return TAKE_FD(userns_fd);
|
||||
}
|
||||
|
||||
int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned int flags, struct mount_attr *attr) {
|
||||
int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned flags, struct mount_attr *attr) {
|
||||
_cleanup_close_ int fd = -EBADF;
|
||||
|
||||
assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
|
||||
@@ -1507,6 +1507,42 @@ int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned int flag
|
||||
return TAKE_FD(fd);
|
||||
}
|
||||
|
||||
int open_tree_try_drop_idmap(int dir_fd, const char *path, unsigned flags) {
|
||||
/* Tries to drop MOUNT_ATTR_IDMAP while calling open_tree_attr(), but if that doesn't work just uses
|
||||
* a regular open_tree() */
|
||||
|
||||
assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
|
||||
|
||||
if (isempty(path)) {
|
||||
path = "";
|
||||
flags |= AT_EMPTY_PATH;
|
||||
}
|
||||
|
||||
_cleanup_close_ int fd = open_tree_attr_with_fallback(
|
||||
dir_fd,
|
||||
path,
|
||||
flags,
|
||||
&(struct mount_attr) {
|
||||
.attr_clr = MOUNT_ATTR_IDMAP,
|
||||
});
|
||||
if (fd < 0) {
|
||||
if (!ERRNO_IS_NEG_NOT_SUPPORTED(fd))
|
||||
return log_debug_errno(fd, "Failed to clear idmap of directory with open_tree_attr(): %m");
|
||||
|
||||
log_debug_errno(fd, "Failed to clear idmap with open_tree_attr(), retrying open_tree() without clearing idmap: %m");
|
||||
|
||||
fd = RET_NERRNO(open_tree(dir_fd, path, flags));
|
||||
if (fd < 0)
|
||||
return log_debug_errno(fd, "Both open_tree() and open_tree_attr() failed, giving up: %m");
|
||||
|
||||
log_debug("open_tree() without clearing idmap worked.");
|
||||
return TAKE_FD(fd);
|
||||
}
|
||||
|
||||
log_debug("Successfully acquired mount fd with cleared idmap.");
|
||||
return TAKE_FD(fd);
|
||||
}
|
||||
|
||||
int remount_idmap_fd(
|
||||
char **paths,
|
||||
int userns_fd,
|
||||
|
||||
@@ -145,7 +145,8 @@ typedef enum RemountIdmapping {
|
||||
_REMOUNT_IDMAPPING_INVALID = -EINVAL,
|
||||
} RemountIdmapping;
|
||||
|
||||
int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned int flags, struct mount_attr *attr);
|
||||
int open_tree_attr_with_fallback(int dir_fd, const char *path, unsigned flags, struct mount_attr *attr);
|
||||
int open_tree_try_drop_idmap(int dir_fd, const char *path, unsigned flags);
|
||||
|
||||
int make_userns(uid_t uid_shift, uid_t uid_range, uid_t host_owner, uid_t dest_owner, RemountIdmapping idmapping);
|
||||
int remount_idmap_fd(char **p, int userns_fd, uint64_t extra_mount_attr_set);
|
||||
|
||||
@@ -29,9 +29,16 @@ ProtectHostname=yes
|
||||
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
|
||||
RestrictRealtime=yes
|
||||
RestrictSUIDSGID=yes
|
||||
SystemCallArchitectures=native
|
||||
SystemCallErrorNumber=EPERM
|
||||
SystemCallFilter=@system-service @mount
|
||||
|
||||
# FIXME: libseccomp (as of 2.6.0) doesn't know the open_tree_attr() system call
|
||||
# yet which we need. Thus, applying any system call filter would block this
|
||||
# system call unconditionally, which is not desirable. This should be added
|
||||
# back once libseccomp is updated. See
|
||||
# https://github.com/seccomp/libseccomp/issues/465
|
||||
|
||||
#SystemCallArchitectures=native
|
||||
#SystemCallErrorNumber=EPERM
|
||||
#SystemCallFilter=@system-service @mount
|
||||
Type=notify
|
||||
NotifyAccess=all
|
||||
FileDescriptorStoreMax=4096
|
||||
|
||||
Reference in New Issue
Block a user