user-runtime-dir: enforce /tmp/ and /dev/shm/ quota

Enforce the quota on these two tmpfs at the same place where we mount
the per-user $XDG_RUNTIME_DIR. Conceptually these are very similar
concepts, and it makes sure to enforce the limits at the same place with
the same lifecycle.
This commit is contained in:
Lennart Poettering
2025-01-10 11:34:18 +01:00
parent 9ef12bc1d7
commit b1c95fb2e9
3 changed files with 173 additions and 33 deletions

2
README
View File

@@ -61,9 +61,11 @@ REQUIREMENTS:
≥ 5.9 for close_range()
≥ 5.12 for idmapped mount
≥ 5.14 for cgroup.kill
≥ 5.14 for quotactl_fd()
≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD,
and MOVE_MOUNT_BENEATH
≥ 6.6 for quota support on tmpfs
≥ 6.9 for pidfs
✅ systemd utilizes several new kernel APIs, but will fall back gracefully

View File

@@ -42,12 +42,13 @@
<citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for a
list of units that form the basis of the unit hierarchies of system and user units.</para>
<para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the
system unit <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which
creates the user's runtime directory
<filename>/run/user/<replaceable>UID</replaceable></filename>, and then removes it when this
unit is stopped. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>
executes the <filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para>
<para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the system unit
<filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which creates the user's
runtime directory <filename>/run/user/<replaceable>UID</replaceable></filename> when started, and removes
it when it is stopped. It also might apply runtime quota settings on <filename>/tmp/</filename> and/or
<filename>/dev/shm/</filename> for the
user. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename> executes the
<filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para>
<para>User processes may be started by the <filename>user@.service</filename> instance, in which
case they will be part of that unit in the system hierarchy. They may also be started elsewhere,

View File

@@ -8,15 +8,20 @@
#include "bus-error.h"
#include "bus-locator.h"
#include "dev-setup.h"
#include "devnum-util.h"
#include "fd-util.h"
#include "format-util.h"
#include "fs-util.h"
#include "label-util.h"
#include "limits-util.h"
#include "main-func.h"
#include "missing_magic.h"
#include "missing_syscall.h"
#include "mkdir-label.h"
#include "mount-util.h"
#include "mountpoint-util.h"
#include "path-util.h"
#include "quota-util.h"
#include "rm-rf.h"
#include "selinux-util.h"
#include "smack-util.h"
@@ -24,6 +29,7 @@
#include "string-util.h"
#include "strv.h"
#include "user-util.h"
#include "userdb.h"
static int acquire_runtime_dir_properties(uint64_t *ret_size, uint64_t *ret_inodes) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
@@ -126,6 +132,26 @@ static int user_mkdir_runtime_path(
return 0;
}
static int do_mount(UserRecord *ur) {
int r;
assert(ur);
if (!uid_is_valid(ur->uid) || !gid_is_valid(ur->gid))
return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID or GID, refusing.", ur->user_name);
uint64_t runtime_dir_size, runtime_dir_inodes;
r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
if (r < 0)
return r;
char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
xsprintf(runtime_path, "/run/user/" UID_FMT, ur->uid);
log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, ur->uid, ur->gid);
return user_mkdir_runtime_path(runtime_path, ur->uid, ur->gid, runtime_dir_size, runtime_dir_inodes);
}
static int user_remove_runtime_path(const char *runtime_path) {
int r;
@@ -149,31 +175,6 @@ static int user_remove_runtime_path(const char *runtime_path) {
return 0;
}
static int do_mount(const char *user) {
char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
uint64_t runtime_dir_size, runtime_dir_inodes;
uid_t uid;
gid_t gid;
int r;
r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
if (r < 0)
return log_error_errno(r,
r == -ESRCH ? "No such user \"%s\"" :
r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
: "Failed to look up user \"%s\": %m",
user);
r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
if (r < 0)
return r;
xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, uid, gid);
return user_mkdir_runtime_path(runtime_path, uid, gid, runtime_dir_size, runtime_dir_inodes);
}
static int do_umount(const char *user) {
char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
uid_t uid;
@@ -197,6 +198,126 @@ static int do_umount(const char *user) {
return user_remove_runtime_path(runtime_path);
}
static int apply_tmpfs_quota(
char **paths,
uid_t uid,
uint64_t limit,
uint32_t scale) {
_cleanup_set_free_ Set *processed = NULL;
int r;
assert(uid_is_valid(uid));
STRV_FOREACH(p, paths) {
_cleanup_close_ int fd = open(*p, O_DIRECTORY|O_CLOEXEC);
if (fd < 0) {
log_warning_errno(errno, "Failed to open '%s' in order to set quota, ignoring: %m", *p);
continue;
}
struct stat st;
if (fstat(fd, &st) < 0) {
log_warning_errno(errno, "Failed to stat '%s' in order to set quota, ignoring: %m", *p);
continue;
}
/* Cover for bind mounted or symlinked /var/tmp/ + /tmp/ */
if (set_contains(processed, DEVNUM_TO_PTR(st.st_dev))) {
log_debug("Not setting quota on '%s', since already processed.", *p);
continue;
}
/* Remember we already dealt with this fs, even if the subsequent operation fails, since
* there's no point in appyling quota twice, regardless if it succeeds or not. */
if (set_ensure_put(&processed, /* hash_ops= */ NULL, DEVNUM_TO_PTR(st.st_dev)) < 0)
return log_oom();
struct statfs sfs;
if (fstatfs(fd, &sfs) < 0) {
log_warning_errno(errno, "Failed to statfs '%s' in order to set quota, ignoring: %m", *p);
continue;
}
if (!is_fs_type(&sfs, TMPFS_MAGIC)) {
log_debug("Not setting quota on '%s', since not tmpfs.", *p);
continue;
}
struct dqblk req;
r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_GETQUOTA, USRQUOTA), uid, &req));
if (r == -ESRCH)
zero(req);
else if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) {
log_debug_errno(r, "No UID quota support on %s, not setting quota: %m", *p);
continue;
} else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
log_debug_errno(r, "Lacking privileges to query UID quota on %s, not setting quota: %m", *p);
continue;
} else if (r < 0) {
log_warning_errno(r, "Failed to query disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
continue;
}
uint64_t v =
(scale == 0) ? 0 :
(scale == UINT32_MAX) ? UINT64_MAX :
(uint64_t) ((double) (sfs.f_blocks * sfs.f_frsize) / scale * UINT32_MAX);
v = MIN(v, limit);
v /= QIF_DQBLKSIZE;
if (FLAGS_SET(req.dqb_valid, QIF_BLIMITS) && v == req.dqb_bhardlimit) {
/* Shortcut things if everything is set up properly already */
log_debug("Configured quota on '%s' already matches the intended setting, not updating quota.", *p);
continue;
}
req.dqb_valid = QIF_BLIMITS;
req.dqb_bsoftlimit = req.dqb_bhardlimit = v;
r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_SETQUOTA, USRQUOTA), uid, &req));
if (r == -ESRCH) {
log_debug_errno(r, "Not setting UID quota on %s since UID quota is not supported: %m", *p);
continue;
} else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
log_debug_errno(r, "Lacking privileges to set UID quota on %s, skipping: %m", *p);
continue;
} else if (r < 0) {
log_warning_errno(r, "Failed to set disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
continue;
}
log_info("Successfully configured disk quota for UID " UID_FMT " on %s to %s", uid, *p, FORMAT_BYTES(v * QIF_DQBLKSIZE));
}
return 0;
}
static int do_tmpfs_quota(UserRecord *ur) {
int r;
assert(ur);
if (user_record_is_root(ur)) {
log_debug("Not applying tmpfs quota to root user.");
return 0;
}
if (!uid_is_valid(ur->uid))
return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID, refusing.", ur->user_name);
r = apply_tmpfs_quota(STRV_MAKE("/tmp", "/var/tmp"), ur->uid, ur->tmp_limit.limit, user_record_tmp_limit_scale(ur));
if (r < 0)
return r;
r = apply_tmpfs_quota(STRV_MAKE("/dev/shm"), ur->uid, ur->dev_shm_limit.limit, user_record_dev_shm_limit_scale(ur));
if (r < 0)
return r;
return 0;
}
static int run(int argc, char *argv[]) {
int r;
@@ -218,10 +339,26 @@ static int run(int argc, char *argv[]) {
if (r < 0)
return r;
if (streq(verb, "start"))
return do_mount(user);
if (streq(verb, "start")) {
_cleanup_(user_record_unrefp) UserRecord *ur = NULL;
r = userdb_by_name(user, USERDB_PARSE_NUMERIC|USERDB_SUPPRESS_SHADOW, &ur);
if (r == -ESRCH)
return log_error_errno(r, "User '%s' does not exist: %m", user);
if (r < 0)
return log_error_errno(r, "Failed to resolve user '%s': %m", user);
/* We do two things here: mount the per-user XDG_RUNTIME_DIR, and set up tmpfs quota on /tmp/
* and /dev/shm/. */
r = 0;
RET_GATHER(r, do_mount(ur));
RET_GATHER(r, do_tmpfs_quota(ur));
return r;
}
if (streq(verb, "stop"))
return do_umount(user);
assert_not_reached();
}