nsresourced: permit differing uid/gid

This commit is contained in:
Lennart Poettering
2025-03-09 08:13:07 +01:00
parent 5c9327e353
commit 00b1f67313
4 changed files with 173 additions and 54 deletions

View File

@@ -344,10 +344,12 @@ static void manager_release_userns_by_inode(Manager *m, uint64_t inode) {
log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
"Failed to find userns for inode %" PRIu64 ", ignoring: %m", inode);
if (userns_info && uid_is_valid(userns_info->start))
log_debug("Removing user namespace mapping %" PRIu64 " for UID " UID_FMT ".", inode, userns_info->start);
else
log_debug("Removing user namespace mapping %" PRIu64 ".", inode);
if (DEBUG_LOGGING) {
if (userns_info && uid_is_valid(userns_info->start_uid))
log_debug("Removing user namespace mapping %" PRIu64 " for UID " UID_FMT ".", inode, userns_info->start_uid);
else
log_debug("Removing user namespace mapping %" PRIu64 ".", inode);
}
/* Remove the BPF rules */
manager_release_userns_bpf(m, inode);

View File

@@ -88,7 +88,7 @@ static int build_user_json(UserNamespaceInfo *userns_info, uid_t offset, sd_json
return sd_json_buildo(
ret,
SD_JSON_BUILD_PAIR("userName", SD_JSON_BUILD_STRING(name)),
SD_JSON_BUILD_PAIR("uid", SD_JSON_BUILD_UNSIGNED(userns_info->start + offset)),
SD_JSON_BUILD_PAIR("uid", SD_JSON_BUILD_UNSIGNED(userns_info->start_uid + offset)),
SD_JSON_BUILD_PAIR("gid", SD_JSON_BUILD_UNSIGNED(GID_NOBODY)),
SD_JSON_BUILD_PAIR("realName", SD_JSON_BUILD_STRING(realname)),
SD_JSON_BUILD_PAIR("homeDirectory", JSON_BUILD_CONST_STRING("/")),
@@ -155,7 +155,7 @@ static int vl_method_get_user_record(sd_varlink *link, sd_json_variant *paramete
if (offset >= userns_info->size) /* Outside of range? */
goto not_found;
if (uid_is_valid(p.uid) && p.uid != userns_info->start + offset)
if (uid_is_valid(p.uid) && p.uid != userns_info->start_uid + offset)
return sd_varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
} else if (uid_is_valid(p.uid)) {
@@ -219,7 +219,7 @@ static int build_group_json(UserNamespaceInfo *userns_info, gid_t offset, sd_jso
return sd_json_buildo(
ret,
SD_JSON_BUILD_PAIR("groupName", SD_JSON_BUILD_STRING(name)),
SD_JSON_BUILD_PAIR("gid", SD_JSON_BUILD_UNSIGNED(userns_info->start + offset)),
SD_JSON_BUILD_PAIR("gid", SD_JSON_BUILD_UNSIGNED(userns_info->start_gid + offset)),
SD_JSON_BUILD_PAIR("description", SD_JSON_BUILD_STRING(description)),
SD_JSON_BUILD_PAIR("service", JSON_BUILD_CONST_STRING("io.systemd.NamespaceResource")),
SD_JSON_BUILD_PAIR("disposition", SD_JSON_BUILD_STRING(user_disposition_to_string(disposition))));
@@ -282,7 +282,7 @@ static int vl_method_get_group_record(sd_varlink *link, sd_json_variant *paramet
if (offset >= userns_info->size) /* Outside of range? */
goto not_found;
if (gid_is_valid(p.gid) && p.gid != userns_info->start + offset)
if (gid_is_valid(p.gid) && p.gid != userns_info->start_gid + offset)
return sd_varlink_error(link, "io.systemd.UserDatabase.ConflictingRecordFound", NULL);
} else if (gid_is_valid(p.gid)) {
@@ -298,9 +298,9 @@ static int vl_method_get_group_record(sd_varlink *link, sd_json_variant *paramet
start = p.gid & gidmask;
offset = p.gid - start;
r = userns_registry_load_by_start_uid(
r = userns_registry_load_by_start_gid(
/* registry_fd= */ -EBADF,
(uid_t) start,
start,
&userns_info);
if (r == -ENOENT)
goto not_found;
@@ -362,6 +362,12 @@ static int uid_is_available(
if (r > 0)
return false;
r = userns_registry_gid_exists(registry_dir_fd, (gid_t) candidate);
if (r < 0)
return r;
if (r > 0)
return false;
r = userdb_by_uid(candidate, /* match= */ NULL, USERDB_AVOID_MULTIPLEXER, /* ret_record= */ NULL);
if (r >= 0)
return false;
@@ -508,7 +514,8 @@ static int allocate_now(
if (r < 0)
return log_debug_errno(r, "Can't determine if UID range " UID_FMT " is available: %m", candidate);
if (r > 0) {
info->start = candidate;
info->start_uid = candidate;
info->start_gid = (gid_t) candidate;
log_debug("Allocating UID range " UID_FMT "" UID_FMT, candidate, candidate + info->size - 1);
@@ -530,10 +537,13 @@ static int write_userns(int usernsfd, const UserNamespaceInfo *userns_info) {
assert(usernsfd >= 0);
assert(userns_info);
assert(uid_is_valid(userns_info->target));
assert(uid_is_valid(userns_info->start));
assert(uid_is_valid(userns_info->target_uid));
assert(uid_is_valid(userns_info->start_uid));
assert(gid_is_valid(userns_info->target_gid));
assert(gid_is_valid(userns_info->start_gid));
assert(userns_info->size > 0);
assert(userns_info->size <= UINT32_MAX - userns_info->start);
assert(userns_info->size <= UINT32_MAX - userns_info->start_uid);
assert(userns_info->size <= UINT32_MAX - userns_info->start_gid);
efd = eventfd(0, EFD_CLOEXEC);
if (efd < 0)
@@ -572,7 +582,7 @@ static int write_userns(int usernsfd, const UserNamespaceInfo *userns_info) {
if (asprintf(&pmap, "/proc/" PID_FMT "/uid_map", pid) < 0)
return log_oom();
r = write_string_filef(pmap, 0, UID_FMT " " UID_FMT " " UID_FMT "\n", userns_info->target, userns_info->start, userns_info->size);
r = write_string_filef(pmap, 0, UID_FMT " " UID_FMT " %" PRIu32 "\n", userns_info->target_uid, userns_info->start_uid, userns_info->size);
if (r < 0)
return log_error_errno(r, "Failed to write 'uid_map' file of user namespace: %m");
@@ -580,7 +590,7 @@ static int write_userns(int usernsfd, const UserNamespaceInfo *userns_info) {
if (asprintf(&pmap, "/proc/" PID_FMT "/gid_map", pid) < 0)
return log_oom();
r = write_string_filef(pmap, 0, GID_FMT " " GID_FMT " " GID_FMT "\n", (gid_t) userns_info->target, (gid_t) userns_info->start, (gid_t) userns_info->size);
r = write_string_filef(pmap, 0, GID_FMT " " GID_FMT " %" PRIu32 "\n", userns_info->target_gid, userns_info->start_gid, userns_info->size);
if (r < 0)
return log_error_errno(r, "Failed to write 'gid_map' file of user namespace: %m");
@@ -881,7 +891,8 @@ static int vl_method_allocate_user_range(sd_varlink *link, sd_json_variant *para
userns_info->owner = peer_uid;
userns_info->userns_inode = userns_st.st_ino;
userns_info->size = p.size;
userns_info->target = p.target;
userns_info->target_uid = p.target;
userns_info->target_gid = (gid_t) p.target;
r = allocate_now(registry_dir_fd, userns_info, &lock_fd);
if (r == -EHOSTDOWN) /* The needed UID range is not delegated to us */
@@ -1238,12 +1249,14 @@ static int vl_method_add_mount_to_user_namespace(sd_varlink *link, sd_json_varia
if (r < 0)
return r;
if (userns_info->size > 0)
log_debug("Granting access to mount %i to user namespace " INO_FMT " ('%s' @ UID " UID_FMT ")",
mnt_id, userns_st.st_ino, userns_info->name, userns_info->start);
else
log_debug("Granting access to mount %i to user namespace " INO_FMT " ('%s')",
mnt_id, userns_st.st_ino, userns_info->name);
if (DEBUG_LOGGING) {
if (userns_info->size > 0)
log_debug("Granting access to mount %i to user namespace " INO_FMT " ('%s' @ UID " UID_FMT ")",
mnt_id, userns_st.st_ino, userns_info->name, userns_info->start_uid);
else
log_debug("Granting access to mount %i to user namespace " INO_FMT " ('%s')",
mnt_id, userns_st.st_ino, userns_info->name);
}
return sd_varlink_replyb(link, SD_JSON_BUILD_EMPTY_OBJECT);
}
@@ -1379,7 +1392,7 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari
if (r < 0)
return r;
if (fchown(cgroup_fd, userns_info->start, userns_info->start) < 0)
if (fchown(cgroup_fd, userns_info->start_uid, userns_info->start_gid) < 0)
return log_debug_errno(errno, "Failed to change ownership of cgroup: %m");
if (fchmod(cgroup_fd, 0755) < 0)
@@ -1387,11 +1400,11 @@ static int vl_method_add_cgroup_to_user_namespace(sd_varlink *link, sd_json_vari
FOREACH_STRING(attr, "cgroup.procs", "cgroup.subtree_control", "cgroup.threads") {
(void) fchmodat(cgroup_fd, attr, 0644, AT_SYMLINK_NOFOLLOW);
(void) fchownat(cgroup_fd, attr, userns_info->start, userns_info->start, AT_SYMLINK_NOFOLLOW);
(void) fchownat(cgroup_fd, attr, userns_info->start_uid, userns_info->start_gid, AT_SYMLINK_NOFOLLOW);
}
log_debug("Granting ownership to cgroup %" PRIu64 " to userns " INO_FMT " ('%s' @ UID " UID_FMT ")",
cgroup_id, userns_st.st_ino, userns_info->name, userns_info->start);
cgroup_id, userns_st.st_ino, userns_info->name, userns_info->start_uid);
return sd_varlink_replyb(link, SD_JSON_BUILD_EMPTY_OBJECT);
}
@@ -1689,7 +1702,7 @@ static int vl_method_add_netif_to_user_namespace(sd_varlink *link, sd_json_varia
return r;
log_debug("Adding veth tunnel %s from host to userns " INO_FMT " ('%s' @ UID " UID_FMT ", interface %s).",
ifname_host, userns_st.st_ino, userns_info->name, userns_info->start, ifname_namespace);
ifname_host, userns_st.st_ino, userns_info->name, userns_info->start_uid, ifname_namespace);
return sd_varlink_replybo(
link,

View File

@@ -55,8 +55,10 @@ UserNamespaceInfo* userns_info_new(void) {
*info = (UserNamespaceInfo) {
.owner = UID_INVALID,
.start = UID_INVALID,
.target = UID_INVALID,
.start_uid = UID_INVALID,
.target_uid = UID_INVALID,
.start_gid = GID_INVALID,
.target_gid = GID_INVALID,
};
return info;
@@ -119,13 +121,15 @@ static int dispatch_cgroups_array(const char *name, sd_json_variant *variant, sd
static int userns_registry_load(int dir_fd, const char *fn, UserNamespaceInfo **ret) {
static const sd_json_dispatch_field dispatch_table[] = {
{ "owner", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, owner), SD_JSON_MANDATORY },
{ "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(UserNamespaceInfo, name), SD_JSON_MANDATORY },
{ "userns", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint64, offsetof(UserNamespaceInfo, userns_inode), SD_JSON_MANDATORY },
{ "start", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, start), 0 },
{ "size", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint32, offsetof(UserNamespaceInfo, size), 0 },
{ "target", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, target), 0 },
{ "cgroups", SD_JSON_VARIANT_ARRAY, dispatch_cgroups_array, 0, 0 },
{ "owner", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, owner), SD_JSON_MANDATORY },
{ "name", SD_JSON_VARIANT_STRING, sd_json_dispatch_string, offsetof(UserNamespaceInfo, name), SD_JSON_MANDATORY },
{ "userns", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint64, offsetof(UserNamespaceInfo, userns_inode), SD_JSON_MANDATORY },
{ "size", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uint32, offsetof(UserNamespaceInfo, size), 0 },
{ "start", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, start_uid), 0 },
{ "target", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, target_uid), 0 },
{ "startGid", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, start_gid), 0 },
{ "targetGid", SD_JSON_VARIANT_UNSIGNED, sd_json_dispatch_uid_gid, offsetof(UserNamespaceInfo, target_gid), 0 },
{ "cgroups", SD_JSON_VARIANT_ARRAY, dispatch_cgroups_array, 0, 0 },
{}
};
@@ -156,17 +160,32 @@ static int userns_registry_load(int dir_fd, const char *fn, UserNamespaceInfo **
if (userns_info->userns_inode == 0)
return -EBADMSG;
if (userns_info->start == 0)
if (userns_info->start_uid == 0 || userns_info->start_gid == 0)
return -EBADMSG;
if (userns_info->size == 0) {
if (uid_is_valid(userns_info->start) || uid_is_valid(userns_info->target))
return -EBADMSG;
} else {
if (!uid_is_valid(userns_info->start) || !uid_is_valid(userns_info->target))
if (uid_is_valid(userns_info->start_uid) || uid_is_valid(userns_info->target_uid))
return -EBADMSG;
if (userns_info->size > UINT32_MAX - userns_info->start ||
userns_info->size > UINT32_MAX - userns_info->target)
if (gid_is_valid(userns_info->start_gid) || gid_is_valid(userns_info->target_gid))
return -EBADMSG;
} else {
if (!uid_is_valid(userns_info->start_uid) || !uid_is_valid(userns_info->target_uid))
return -EBADMSG;
if (userns_info->size > UINT32_MAX - userns_info->start_uid ||
userns_info->size > UINT32_MAX - userns_info->target_uid)
return -EBADMSG;
/* Older versions of the registry didn't maintain UID/GID separately, hence copy over if not
* set */
if (!gid_is_valid(userns_info->start_gid))
userns_info->start_gid = userns_info->start_uid;
if (!gid_is_valid(userns_info->target_gid))
userns_info->target_gid = userns_info->target_gid;
if (userns_info->size > UINT32_MAX - userns_info->start_gid ||
userns_info->size > UINT32_MAX - userns_info->target_gid)
return -EBADMSG;
}
@@ -195,6 +214,26 @@ int userns_registry_uid_exists(int dir_fd, uid_t start) {
return true;
}
int userns_registry_gid_exists(int dir_fd, gid_t start) {
_cleanup_free_ char *fn = NULL;
assert(dir_fd >= 0);
if (!gid_is_valid(start))
return -ENOENT;
if (start == 0)
return true;
if (asprintf(&fn, "g" GID_FMT ".userns", start) < 0)
return -ENOMEM;
if (faccessat(dir_fd, fn, F_OK, AT_SYMLINK_NOFOLLOW) < 0)
return errno == ENOENT ? false : -errno;
return true;
}
int userns_registry_name_exists(int dir_fd, const char *name) {
_cleanup_free_ char *fn = NULL;
@@ -254,7 +293,40 @@ int userns_registry_load_by_start_uid(int dir_fd, uid_t start, UserNamespaceInfo
if (r < 0)
return r;
if (userns_info->start != start)
if (userns_info->start_uid != start)
return -EBADMSG;
if (ret)
*ret = TAKE_PTR(userns_info);
return 0;
}
int userns_registry_load_by_start_gid(int dir_fd, gid_t start, UserNamespaceInfo **ret) {
_cleanup_(userns_info_freep) UserNamespaceInfo *userns_info = NULL;
_cleanup_close_ int registry_fd = -EBADF;
_cleanup_free_ char *fn = NULL;
int r;
if (!gid_is_valid(start))
return -ENOENT;
if (dir_fd < 0) {
registry_fd = userns_registry_open_fd();
if (registry_fd < 0)
return registry_fd;
dir_fd = registry_fd;
}
if (asprintf(&fn, "g" GID_FMT ".userns", start) < 0)
return -ENOMEM;
r = userns_registry_load(dir_fd, fn, &userns_info);
if (r < 0)
return r;
if (userns_info->start_gid != start)
return -EBADMSG;
if (ret)
@@ -366,9 +438,11 @@ int userns_registry_store(int dir_fd, UserNamespaceInfo *info) {
SD_JSON_BUILD_PAIR("owner", SD_JSON_BUILD_UNSIGNED(info->owner)),
SD_JSON_BUILD_PAIR("name", SD_JSON_BUILD_STRING(info->name)),
SD_JSON_BUILD_PAIR("userns", SD_JSON_BUILD_UNSIGNED(info->userns_inode)),
SD_JSON_BUILD_PAIR_CONDITION(uid_is_valid(info->start), "start", SD_JSON_BUILD_UNSIGNED(info->start)),
SD_JSON_BUILD_PAIR_CONDITION(uid_is_valid(info->start), "size", SD_JSON_BUILD_UNSIGNED(info->size)),
SD_JSON_BUILD_PAIR_CONDITION(uid_is_valid(info->start), "target", SD_JSON_BUILD_UNSIGNED(info->target)),
SD_JSON_BUILD_PAIR_CONDITION(info->size > 0, "size", SD_JSON_BUILD_UNSIGNED(info->size)),
SD_JSON_BUILD_PAIR_CONDITION(uid_is_valid(info->start_uid), "start", SD_JSON_BUILD_UNSIGNED(info->start_uid)),
SD_JSON_BUILD_PAIR_CONDITION(uid_is_valid(info->target_uid), "target", SD_JSON_BUILD_UNSIGNED(info->target_uid)),
SD_JSON_BUILD_PAIR_CONDITION(gid_is_valid(info->start_gid), "startGid", SD_JSON_BUILD_UNSIGNED(info->start_gid)),
SD_JSON_BUILD_PAIR_CONDITION(gid_is_valid(info->target_gid), "targetGid", SD_JSON_BUILD_UNSIGNED(info->target_gid)),
SD_JSON_BUILD_PAIR_CONDITION(!!cgroup_array, "cgroups", SD_JSON_BUILD_VARIANT(cgroup_array)));
if (r < 0)
return r;
@@ -378,7 +452,7 @@ int userns_registry_store(int dir_fd, UserNamespaceInfo *info) {
if (r < 0)
return log_debug_errno(r, "Failed to format userns JSON object: %m");
_cleanup_free_ char *reg_fn = NULL, *link1_fn = NULL, *link2_fn = NULL, *owner_fn = NULL, *uid_fn = NULL;
_cleanup_free_ char *reg_fn = NULL, *link1_fn = NULL, *link2_fn = NULL, *link3_fn = NULL, *owner_fn = NULL, *uid_fn = NULL;
if (asprintf(&reg_fn, "i%" PRIu64 ".userns", info->userns_inode) < 0)
return log_oom_debug();
@@ -398,8 +472,8 @@ int userns_registry_store(int dir_fd, UserNamespaceInfo *info) {
goto fail;
}
if (uid_is_valid(info->start)) {
if (asprintf(&link2_fn, "u" UID_FMT ".userns", info->start) < 0) {
if (uid_is_valid(info->start_uid)) {
if (asprintf(&link2_fn, "u" UID_FMT ".userns", info->start_uid) < 0) {
r = log_oom_debug();
goto fail;
}
@@ -411,6 +485,19 @@ int userns_registry_store(int dir_fd, UserNamespaceInfo *info) {
}
}
if (gid_is_valid(info->start_gid)) {
if (asprintf(&link3_fn, "g" GID_FMT ".userns", info->start_gid) < 0) {
r = log_oom_debug();
goto fail;
}
r = linkat_replace(dir_fd, reg_fn, dir_fd, link3_fn);
if (r < 0) {
log_debug_errno(r, "Failed to link userns data to '%s' in registry: %m", link3_fn);
goto fail;
}
}
if (asprintf(&uid_fn, "o" UID_FMT ".owns", info->owner) < 0) {
r = log_oom_debug();
goto fail;
@@ -441,6 +528,8 @@ fail:
(void) unlinkat(dir_fd, link1_fn, /* flags= */ 0);
if (link2_fn)
(void) unlinkat(dir_fd, link2_fn, /* flags= */ 0);
if (link3_fn)
(void) unlinkat(dir_fd, link3_fn, /* flags= */ 0);
if (owner_fn)
(void) unlinkat(dir_fd, owner_fn, /* flags= */ 0);
if (uid_fn)
@@ -476,15 +565,24 @@ int userns_registry_remove(int dir_fd, UserNamespaceInfo *info) {
RET_GATHER(ret, RET_NERRNO(unlinkat(dir_fd, link1_fn, 0)));
if (uid_is_valid(info->start)) {
if (uid_is_valid(info->start_uid)) {
_cleanup_free_ char *link2_fn = NULL;
if (asprintf(&link2_fn, "u" UID_FMT ".userns", info->start) < 0)
if (asprintf(&link2_fn, "u" UID_FMT ".userns", info->start_uid) < 0)
return log_oom_debug();
RET_GATHER(ret, RET_NERRNO(unlinkat(dir_fd, link2_fn, 0)));
}
if (uid_is_valid(info->start_gid)) {
_cleanup_free_ char *link3_fn = NULL;
if (asprintf(&link3_fn, "g" GID_FMT ".userns", info->start_gid) < 0)
return log_oom_debug();
RET_GATHER(ret, RET_NERRNO(unlinkat(dir_fd, link3_fn, 0)));
}
_cleanup_free_ char *uid_fn = NULL;
if (asprintf(&uid_fn, "o" UID_FMT ".owns", info->owner) < 0)
return log_oom_debug();

View File

@@ -1,15 +1,19 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#pragma once
#include "macro.h"
#define USER_NAMESPACE_CGROUPS_DELEGATE_MAX 16
typedef struct UserNamespaceInfo {
uid_t owner;
char *name;
uint64_t userns_inode;
uid_t start;
uint32_t size;
uid_t target;
uid_t start_uid;
uid_t target_uid;
gid_t start_gid;
gid_t target_gid;
uint64_t *cgroups;
size_t n_cgroups;
} UserNamespaceInfo;
@@ -29,6 +33,7 @@ int userns_registry_open_fd(void);
int userns_registry_lock(int dir_fd);
int userns_registry_load_by_start_uid(int dir_fd, uid_t start, UserNamespaceInfo **ret);
int userns_registry_load_by_start_gid(int dir_fd, gid_t start, UserNamespaceInfo **ret);
int userns_registry_load_by_userns_inode(int dir_fd, uint64_t userns, UserNamespaceInfo **ret);
int userns_registry_load_by_name(int dir_fd, const char *name, UserNamespaceInfo **ret);
@@ -38,5 +43,6 @@ int userns_registry_remove(int dir_fd, UserNamespaceInfo *info);
int userns_registry_inode_exists(int dir_fd, uint64_t inode);
int userns_registry_name_exists(int dir_fd, const char *name);
int userns_registry_uid_exists(int dir_fd, uid_t start);
int userns_registry_gid_exists(int dir_fd, gid_t start);
int userns_registry_per_uid(int dir_fd, uid_t owner);