user-runtime-dir: enforce /tmp/ and /dev/shm/ quota

Enforce the quota on these two tmpfs at the same place where we mount
the per-user $XDG_RUNTIME_DIR. Conceptually these are very similar
concepts, and it makes sure to enforce the limits at the same place with
the same lifecycle.
This commit is contained in:
Lennart Poettering
2025-01-10 11:34:18 +01:00
parent 9ef12bc1d7
commit b1c95fb2e9
3 changed files with 173 additions and 33 deletions

2
README
View File

@@ -61,9 +61,11 @@ REQUIREMENTS:
≥ 5.9 for close_range() ≥ 5.9 for close_range()
≥ 5.12 for idmapped mount ≥ 5.12 for idmapped mount
≥ 5.14 for cgroup.kill ≥ 5.14 for cgroup.kill
≥ 5.14 for quotactl_fd()
≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option ≥ 6.3 for MFD_EXEC/MFD_NOEXEC_SEAL and tmpfs noswap option
≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD, ≥ 6.5 for name_to_handle_at() AT_HANDLE_FID, SO_PEERPIDFD/SO_PASSPIDFD,
and MOVE_MOUNT_BENEATH and MOVE_MOUNT_BENEATH
≥ 6.6 for quota support on tmpfs
≥ 6.9 for pidfs ≥ 6.9 for pidfs
✅ systemd utilizes several new kernel APIs, but will fall back gracefully ✅ systemd utilizes several new kernel APIs, but will fall back gracefully

View File

@@ -42,12 +42,13 @@
<citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for a <citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for a
list of units that form the basis of the unit hierarchies of system and user units.</para> list of units that form the basis of the unit hierarchies of system and user units.</para>
<para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the <para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the system unit
system unit <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which creates the user's
creates the user's runtime directory runtime directory <filename>/run/user/<replaceable>UID</replaceable></filename> when started, and removes
<filename>/run/user/<replaceable>UID</replaceable></filename>, and then removes it when this it when it is stopped. It also might apply runtime quota settings on <filename>/tmp/</filename> and/or
unit is stopped. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename> <filename>/dev/shm/</filename> for the
executes the <filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para> user. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename> executes the
<filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para>
<para>User processes may be started by the <filename>user@.service</filename> instance, in which <para>User processes may be started by the <filename>user@.service</filename> instance, in which
case they will be part of that unit in the system hierarchy. They may also be started elsewhere, case they will be part of that unit in the system hierarchy. They may also be started elsewhere,

View File

@@ -8,15 +8,20 @@
#include "bus-error.h" #include "bus-error.h"
#include "bus-locator.h" #include "bus-locator.h"
#include "dev-setup.h" #include "dev-setup.h"
#include "devnum-util.h"
#include "fd-util.h"
#include "format-util.h" #include "format-util.h"
#include "fs-util.h" #include "fs-util.h"
#include "label-util.h" #include "label-util.h"
#include "limits-util.h" #include "limits-util.h"
#include "main-func.h" #include "main-func.h"
#include "missing_magic.h"
#include "missing_syscall.h"
#include "mkdir-label.h" #include "mkdir-label.h"
#include "mount-util.h" #include "mount-util.h"
#include "mountpoint-util.h" #include "mountpoint-util.h"
#include "path-util.h" #include "path-util.h"
#include "quota-util.h"
#include "rm-rf.h" #include "rm-rf.h"
#include "selinux-util.h" #include "selinux-util.h"
#include "smack-util.h" #include "smack-util.h"
@@ -24,6 +29,7 @@
#include "string-util.h" #include "string-util.h"
#include "strv.h" #include "strv.h"
#include "user-util.h" #include "user-util.h"
#include "userdb.h"
static int acquire_runtime_dir_properties(uint64_t *ret_size, uint64_t *ret_inodes) { static int acquire_runtime_dir_properties(uint64_t *ret_size, uint64_t *ret_inodes) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
@@ -126,6 +132,26 @@ static int user_mkdir_runtime_path(
return 0; return 0;
} }
static int do_mount(UserRecord *ur) {
int r;
assert(ur);
if (!uid_is_valid(ur->uid) || !gid_is_valid(ur->gid))
return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID or GID, refusing.", ur->user_name);
uint64_t runtime_dir_size, runtime_dir_inodes;
r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
if (r < 0)
return r;
char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
xsprintf(runtime_path, "/run/user/" UID_FMT, ur->uid);
log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, ur->uid, ur->gid);
return user_mkdir_runtime_path(runtime_path, ur->uid, ur->gid, runtime_dir_size, runtime_dir_inodes);
}
static int user_remove_runtime_path(const char *runtime_path) { static int user_remove_runtime_path(const char *runtime_path) {
int r; int r;
@@ -149,31 +175,6 @@ static int user_remove_runtime_path(const char *runtime_path) {
return 0; return 0;
} }
static int do_mount(const char *user) {
char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
uint64_t runtime_dir_size, runtime_dir_inodes;
uid_t uid;
gid_t gid;
int r;
r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
if (r < 0)
return log_error_errno(r,
r == -ESRCH ? "No such user \"%s\"" :
r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
: "Failed to look up user \"%s\": %m",
user);
r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
if (r < 0)
return r;
xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, uid, gid);
return user_mkdir_runtime_path(runtime_path, uid, gid, runtime_dir_size, runtime_dir_inodes);
}
static int do_umount(const char *user) { static int do_umount(const char *user) {
char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)]; char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
uid_t uid; uid_t uid;
@@ -197,6 +198,126 @@ static int do_umount(const char *user) {
return user_remove_runtime_path(runtime_path); return user_remove_runtime_path(runtime_path);
} }
static int apply_tmpfs_quota(
char **paths,
uid_t uid,
uint64_t limit,
uint32_t scale) {
_cleanup_set_free_ Set *processed = NULL;
int r;
assert(uid_is_valid(uid));
STRV_FOREACH(p, paths) {
_cleanup_close_ int fd = open(*p, O_DIRECTORY|O_CLOEXEC);
if (fd < 0) {
log_warning_errno(errno, "Failed to open '%s' in order to set quota, ignoring: %m", *p);
continue;
}
struct stat st;
if (fstat(fd, &st) < 0) {
log_warning_errno(errno, "Failed to stat '%s' in order to set quota, ignoring: %m", *p);
continue;
}
/* Cover for bind mounted or symlinked /var/tmp/ + /tmp/ */
if (set_contains(processed, DEVNUM_TO_PTR(st.st_dev))) {
log_debug("Not setting quota on '%s', since already processed.", *p);
continue;
}
/* Remember we already dealt with this fs, even if the subsequent operation fails, since
* there's no point in appyling quota twice, regardless if it succeeds or not. */
if (set_ensure_put(&processed, /* hash_ops= */ NULL, DEVNUM_TO_PTR(st.st_dev)) < 0)
return log_oom();
struct statfs sfs;
if (fstatfs(fd, &sfs) < 0) {
log_warning_errno(errno, "Failed to statfs '%s' in order to set quota, ignoring: %m", *p);
continue;
}
if (!is_fs_type(&sfs, TMPFS_MAGIC)) {
log_debug("Not setting quota on '%s', since not tmpfs.", *p);
continue;
}
struct dqblk req;
r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_GETQUOTA, USRQUOTA), uid, &req));
if (r == -ESRCH)
zero(req);
else if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) {
log_debug_errno(r, "No UID quota support on %s, not setting quota: %m", *p);
continue;
} else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
log_debug_errno(r, "Lacking privileges to query UID quota on %s, not setting quota: %m", *p);
continue;
} else if (r < 0) {
log_warning_errno(r, "Failed to query disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
continue;
}
uint64_t v =
(scale == 0) ? 0 :
(scale == UINT32_MAX) ? UINT64_MAX :
(uint64_t) ((double) (sfs.f_blocks * sfs.f_frsize) / scale * UINT32_MAX);
v = MIN(v, limit);
v /= QIF_DQBLKSIZE;
if (FLAGS_SET(req.dqb_valid, QIF_BLIMITS) && v == req.dqb_bhardlimit) {
/* Shortcut things if everything is set up properly already */
log_debug("Configured quota on '%s' already matches the intended setting, not updating quota.", *p);
continue;
}
req.dqb_valid = QIF_BLIMITS;
req.dqb_bsoftlimit = req.dqb_bhardlimit = v;
r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_SETQUOTA, USRQUOTA), uid, &req));
if (r == -ESRCH) {
log_debug_errno(r, "Not setting UID quota on %s since UID quota is not supported: %m", *p);
continue;
} else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
log_debug_errno(r, "Lacking privileges to set UID quota on %s, skipping: %m", *p);
continue;
} else if (r < 0) {
log_warning_errno(r, "Failed to set disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
continue;
}
log_info("Successfully configured disk quota for UID " UID_FMT " on %s to %s", uid, *p, FORMAT_BYTES(v * QIF_DQBLKSIZE));
}
return 0;
}
static int do_tmpfs_quota(UserRecord *ur) {
int r;
assert(ur);
if (user_record_is_root(ur)) {
log_debug("Not applying tmpfs quota to root user.");
return 0;
}
if (!uid_is_valid(ur->uid))
return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID, refusing.", ur->user_name);
r = apply_tmpfs_quota(STRV_MAKE("/tmp", "/var/tmp"), ur->uid, ur->tmp_limit.limit, user_record_tmp_limit_scale(ur));
if (r < 0)
return r;
r = apply_tmpfs_quota(STRV_MAKE("/dev/shm"), ur->uid, ur->dev_shm_limit.limit, user_record_dev_shm_limit_scale(ur));
if (r < 0)
return r;
return 0;
}
static int run(int argc, char *argv[]) { static int run(int argc, char *argv[]) {
int r; int r;
@@ -218,10 +339,26 @@ static int run(int argc, char *argv[]) {
if (r < 0) if (r < 0)
return r; return r;
if (streq(verb, "start")) if (streq(verb, "start")) {
return do_mount(user); _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
r = userdb_by_name(user, USERDB_PARSE_NUMERIC|USERDB_SUPPRESS_SHADOW, &ur);
if (r == -ESRCH)
return log_error_errno(r, "User '%s' does not exist: %m", user);
if (r < 0)
return log_error_errno(r, "Failed to resolve user '%s': %m", user);
/* We do two things here: mount the per-user XDG_RUNTIME_DIR, and set up tmpfs quota on /tmp/
* and /dev/shm/. */
r = 0;
RET_GATHER(r, do_mount(ur));
RET_GATHER(r, do_tmpfs_quota(ur));
return r;
}
if (streq(verb, "stop")) if (streq(verb, "stop"))
return do_umount(user); return do_umount(user);
assert_not_reached(); assert_not_reached();
} }