mirror of
https://github.com/morgan9e/systemd
synced 2026-04-14 08:25:20 +09:00
896 lines
28 KiB
C
896 lines
28 KiB
C
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
|
|
|
#include <unistd.h>
|
|
|
|
#include "cgroup-setup.h"
|
|
#include "cgroup-util.h"
|
|
#include "errno-util.h"
|
|
#include "fd-util.h"
|
|
#include "fileio.h"
|
|
#include "fs-util.h"
|
|
#include "missing_magic.h"
|
|
#include "mkdir.h"
|
|
#include "parse-util.h"
|
|
#include "path-util.h"
|
|
#include "process-util.h"
|
|
#include "recurse-dir.h"
|
|
#include "stdio-util.h"
|
|
#include "string-util.h"
|
|
#include "user-util.h"
|
|
|
|
int cg_weight_parse(const char *s, uint64_t *ret) {
|
|
uint64_t u;
|
|
int r;
|
|
|
|
assert(s);
|
|
assert(ret);
|
|
|
|
if (isempty(s)) {
|
|
*ret = CGROUP_WEIGHT_INVALID;
|
|
return 0;
|
|
}
|
|
|
|
r = safe_atou64(s, &u);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
|
|
return -ERANGE;
|
|
|
|
*ret = u;
|
|
return 0;
|
|
}
|
|
|
|
int cg_cpu_weight_parse(const char *s, uint64_t *ret) {
|
|
assert(s);
|
|
assert(ret);
|
|
|
|
if (streq(s, "idle"))
|
|
return *ret = CGROUP_WEIGHT_IDLE;
|
|
|
|
return cg_weight_parse(s, ret);
|
|
}
|
|
|
|
int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
|
|
uint64_t u;
|
|
int r;
|
|
|
|
assert(s);
|
|
assert(ret);
|
|
|
|
if (isempty(s)) {
|
|
*ret = CGROUP_CPU_SHARES_INVALID;
|
|
return 0;
|
|
}
|
|
|
|
r = safe_atou64(s, &u);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
|
|
return -ERANGE;
|
|
|
|
*ret = u;
|
|
return 0;
|
|
}
|
|
|
|
int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
|
|
uint64_t u;
|
|
int r;
|
|
|
|
assert(s);
|
|
assert(ret);
|
|
|
|
if (isempty(s)) {
|
|
*ret = CGROUP_BLKIO_WEIGHT_INVALID;
|
|
return 0;
|
|
}
|
|
|
|
r = safe_atou64(s, &u);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
|
|
return -ERANGE;
|
|
|
|
*ret = u;
|
|
return 0;
|
|
}
|
|
|
|
static int trim_cb(
|
|
RecurseDirEvent event,
|
|
const char *path,
|
|
int dir_fd,
|
|
int inode_fd,
|
|
const struct dirent *de,
|
|
const struct statx *sx,
|
|
void *userdata) {
|
|
|
|
/* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
|
|
if (event == RECURSE_DIR_LEAVE &&
|
|
de->d_type == DT_DIR &&
|
|
unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
|
|
!IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
|
|
log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
|
|
|
|
return RECURSE_DIR_CONTINUE;
|
|
}
|
|
|
|
int cg_trim(const char *controller, const char *path, bool delete_root) {
|
|
_cleanup_free_ char *fs = NULL;
|
|
int r, q;
|
|
|
|
assert(controller);
|
|
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = recurse_dir_at(
|
|
AT_FDCWD,
|
|
fs,
|
|
/* statx_mask = */ 0,
|
|
/* n_depth_max = */ UINT_MAX,
|
|
RECURSE_DIR_ENSURE_TYPE,
|
|
trim_cb,
|
|
/* userdata = */ NULL);
|
|
if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
|
|
r = 0;
|
|
else if (r < 0)
|
|
log_debug_errno(r, "Failed to trim subcgroups of '%s': %m", path);
|
|
|
|
/* If we shall delete the top-level cgroup, then propagate the failure to do so (except if it is
|
|
* already gone anyway). Also, let's debug log about this failure, except if the error code is an
|
|
* expected one. */
|
|
if (delete_root && !empty_or_root(path) &&
|
|
rmdir(fs) < 0 && errno != ENOENT) {
|
|
if (!IN_SET(errno, ENOTEMPTY, EBUSY))
|
|
log_debug_errno(errno, "Failed to trim cgroup '%s': %m", path);
|
|
RET_GATHER(r, -errno);
|
|
}
|
|
|
|
q = cg_hybrid_unified();
|
|
if (q < 0)
|
|
return q;
|
|
if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER))
|
|
(void) cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
|
|
|
|
return r;
|
|
}
|
|
|
|
/* Create a cgroup in the hierarchy of controller.
|
|
* Returns 0 if the group already existed, 1 on success, negative otherwise.
|
|
*/
|
|
int cg_create(const char *controller, const char *path) {
|
|
_cleanup_free_ char *fs = NULL;
|
|
int r;
|
|
|
|
assert(controller);
|
|
|
|
r = cg_get_path_and_check(controller, path, NULL, &fs);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = mkdir_parents(fs, 0755);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = RET_NERRNO(mkdir(fs, 0755));
|
|
if (r == -EEXIST)
|
|
return 0;
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = cg_hybrid_unified();
|
|
if (r < 0)
|
|
return r;
|
|
if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
|
|
r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
|
|
if (r < 0)
|
|
log_warning_errno(r, "Failed to create compat systemd cgroup '%s', ignoring: %m", path);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
int cg_attach(const char *controller, const char *path, pid_t pid) {
|
|
_cleanup_free_ char *fs = NULL;
|
|
char c[DECIMAL_STR_MAX(pid_t) + 2];
|
|
int r;
|
|
|
|
assert(controller);
|
|
assert(path);
|
|
assert(pid >= 0);
|
|
|
|
r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
if (pid == 0)
|
|
pid = getpid_cached();
|
|
|
|
xsprintf(c, PID_FMT "\n", pid);
|
|
|
|
r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
|
|
if (r == -EOPNOTSUPP && cg_is_threaded(path) > 0)
|
|
/* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
|
|
return -EUCLEAN;
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = cg_hybrid_unified();
|
|
if (r < 0)
|
|
return r;
|
|
if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
|
|
r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
|
|
if (r < 0)
|
|
log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup '%s', ignoring: %m", pid, path);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int cg_fd_attach(int fd, pid_t pid) {
|
|
char c[DECIMAL_STR_MAX(pid_t) + 2];
|
|
|
|
assert(fd >= 0);
|
|
assert(pid >= 0);
|
|
|
|
if (pid == 0)
|
|
pid = getpid_cached();
|
|
|
|
xsprintf(c, PID_FMT "\n", pid);
|
|
|
|
return write_string_file_at(fd, "cgroup.procs", c, WRITE_STRING_FILE_DISABLE_BUFFER);
|
|
}
|
|
|
|
int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
|
|
int r;
|
|
|
|
assert(controller);
|
|
assert(path);
|
|
assert(pid >= 0);
|
|
|
|
r = cg_attach(controller, path, pid);
|
|
if (r < 0) {
|
|
char prefix[strlen(path) + 1];
|
|
|
|
/* This didn't work? Then let's try all prefixes of the destination */
|
|
|
|
PATH_FOREACH_PREFIX(prefix, path) {
|
|
int q;
|
|
|
|
q = cg_attach(controller, prefix, pid);
|
|
if (q >= 0)
|
|
return q;
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
|
|
int r, q;
|
|
|
|
/* This does not remove the cgroup on failure */
|
|
|
|
assert(pid >= 0);
|
|
|
|
r = cg_create(controller, path);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
q = cg_attach(controller, path, pid);
|
|
if (q < 0)
|
|
return q;
|
|
|
|
return r;
|
|
}
|
|
|
|
int cg_set_access(
|
|
const char *controller,
|
|
const char *path,
|
|
uid_t uid,
|
|
gid_t gid) {
|
|
|
|
struct Attribute {
|
|
const char *name;
|
|
bool fatal;
|
|
};
|
|
|
|
/* cgroup v1, aka legacy/non-unified */
|
|
static const struct Attribute legacy_attributes[] = {
|
|
{ "cgroup.procs", true },
|
|
{ "tasks", false },
|
|
{ "cgroup.clone_children", false },
|
|
{},
|
|
};
|
|
|
|
/* cgroup v2, aka unified */
|
|
static const struct Attribute unified_attributes[] = {
|
|
{ "cgroup.procs", true },
|
|
{ "cgroup.subtree_control", true },
|
|
{ "cgroup.threads", false },
|
|
{ "memory.oom.group", false },
|
|
{ "memory.reclaim", false },
|
|
{},
|
|
};
|
|
|
|
static const struct Attribute* const attributes[] = {
|
|
[false] = legacy_attributes,
|
|
[true] = unified_attributes,
|
|
};
|
|
|
|
_cleanup_free_ char *fs = NULL;
|
|
const struct Attribute *i;
|
|
int r, unified;
|
|
|
|
assert(path);
|
|
|
|
if (uid == UID_INVALID && gid == GID_INVALID)
|
|
return 0;
|
|
|
|
unified = cg_unified_controller(controller);
|
|
if (unified < 0)
|
|
return unified;
|
|
|
|
/* Configure access to the cgroup itself */
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = chmod_and_chown(fs, 0755, uid, gid);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
/* Configure access to the cgroup's attributes */
|
|
for (i = attributes[unified]; i->name; i++) {
|
|
fs = mfree(fs);
|
|
|
|
r = cg_get_path(controller, path, i->name, &fs);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = chmod_and_chown(fs, 0644, uid, gid);
|
|
if (r < 0) {
|
|
if (i->fatal)
|
|
return r;
|
|
|
|
log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
|
|
}
|
|
}
|
|
|
|
if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
|
|
r = cg_hybrid_unified();
|
|
if (r < 0)
|
|
return r;
|
|
if (r > 0) {
|
|
/* Always propagate access mode from unified to legacy controller */
|
|
r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
|
|
if (r < 0)
|
|
log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct access_callback_data {
|
|
uid_t uid;
|
|
gid_t gid;
|
|
int error;
|
|
};
|
|
|
|
static int access_callback(
|
|
RecurseDirEvent event,
|
|
const char *path,
|
|
int dir_fd,
|
|
int inode_fd,
|
|
const struct dirent *de,
|
|
const struct statx *sx,
|
|
void *userdata) {
|
|
|
|
if (!IN_SET(event, RECURSE_DIR_ENTER, RECURSE_DIR_ENTRY))
|
|
return RECURSE_DIR_CONTINUE;
|
|
|
|
struct access_callback_data *d = ASSERT_PTR(userdata);
|
|
|
|
assert(path);
|
|
assert(inode_fd >= 0);
|
|
|
|
if (fchownat(inode_fd, "", d->uid, d->gid, AT_EMPTY_PATH) < 0)
|
|
RET_GATHER(d->error, log_debug_errno(errno, "Failed to change ownership of '%s', ignoring: %m", path));
|
|
|
|
return RECURSE_DIR_CONTINUE;
|
|
}
|
|
|
|
int cg_set_access_recursive(
|
|
const char *controller,
|
|
const char *path,
|
|
uid_t uid,
|
|
gid_t gid) {
|
|
|
|
_cleanup_close_ int fd = -EBADF;
|
|
_cleanup_free_ char *fs = NULL;
|
|
int r;
|
|
|
|
assert(controller);
|
|
assert(path);
|
|
|
|
/* A recursive version of cg_set_access(). But note that this one changes ownership of *all* files,
|
|
* not just the allowlist that cg_set_access() uses. Use cg_set_access() on the cgroup you want to
|
|
* delegate, and cg_set_access_recursive() for any subcgroups you might want to create below it. */
|
|
|
|
if (!uid_is_valid(uid) && !gid_is_valid(gid))
|
|
return 0;
|
|
|
|
r = cg_get_path(controller, path, NULL, &fs);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
fd = open(fs, O_DIRECTORY|O_CLOEXEC);
|
|
if (fd < 0)
|
|
return -errno;
|
|
|
|
struct access_callback_data d = {
|
|
.uid = uid,
|
|
.gid = gid,
|
|
};
|
|
|
|
r = recurse_dir(fd,
|
|
fs,
|
|
/* statx_mask= */ 0,
|
|
/* n_depth_max= */ UINT_MAX,
|
|
RECURSE_DIR_SAME_MOUNT|RECURSE_DIR_INODE_FD|RECURSE_DIR_TOPLEVEL,
|
|
access_callback,
|
|
&d);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
assert(d.error <= 0);
|
|
return d.error;
|
|
}
|
|
|
|
int cg_migrate(
|
|
const char *cfrom,
|
|
const char *pfrom,
|
|
const char *cto,
|
|
const char *pto,
|
|
CGroupFlags flags) {
|
|
|
|
_cleanup_set_free_ Set *s = NULL;
|
|
bool done;
|
|
int r, ret = 0;
|
|
|
|
assert(cfrom);
|
|
assert(pfrom);
|
|
assert(cto);
|
|
assert(pto);
|
|
|
|
do {
|
|
_cleanup_fclose_ FILE *f = NULL;
|
|
pid_t pid;
|
|
|
|
done = true;
|
|
|
|
r = cg_enumerate_processes(cfrom, pfrom, &f);
|
|
if (r < 0)
|
|
return RET_GATHER(ret, r);
|
|
|
|
while ((r = cg_read_pid(f, &pid, flags)) > 0) {
|
|
/* Throw an error if unmappable PIDs are in output, we can't migrate those. */
|
|
if (pid == 0)
|
|
return -EREMOTE;
|
|
|
|
/* This might do weird stuff if we aren't a single-threaded program. However, we
|
|
* luckily know we are. */
|
|
if (FLAGS_SET(flags, CGROUP_IGNORE_SELF) && pid == getpid_cached())
|
|
continue;
|
|
|
|
if (set_contains(s, PID_TO_PTR(pid)))
|
|
continue;
|
|
|
|
if (pid_is_kernel_thread(pid) > 0)
|
|
continue;
|
|
|
|
r = cg_attach(cto, pto, pid);
|
|
if (r < 0) {
|
|
if (r != -ESRCH)
|
|
RET_GATHER(ret, r);
|
|
} else if (ret == 0)
|
|
ret = 1;
|
|
|
|
done = false;
|
|
|
|
r = set_ensure_put(&s, /* hash_ops = */ NULL, PID_TO_PTR(pid));
|
|
if (r < 0)
|
|
return RET_GATHER(ret, r);
|
|
}
|
|
if (r < 0)
|
|
return RET_GATHER(ret, r);
|
|
} while (!done);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
|
|
CGroupController c;
|
|
CGroupMask done;
|
|
bool created;
|
|
int r;
|
|
|
|
/* This one will create a cgroup in our private tree, but also
|
|
* duplicate it in the trees specified in mask, and remove it
|
|
* in all others.
|
|
*
|
|
* Returns 0 if the group already existed in the systemd hierarchy,
|
|
* 1 on success, negative otherwise.
|
|
*/
|
|
|
|
/* First create the cgroup in our own hierarchy. */
|
|
r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
|
|
if (r < 0)
|
|
return r;
|
|
created = r;
|
|
|
|
/* If we are in the unified hierarchy, we are done now */
|
|
r = cg_all_unified();
|
|
if (r < 0)
|
|
return r;
|
|
if (r > 0)
|
|
return created;
|
|
|
|
supported &= CGROUP_MASK_V1;
|
|
mask = CGROUP_MASK_EXTEND_JOINED(mask);
|
|
done = 0;
|
|
|
|
/* Otherwise, do the same in the other hierarchies */
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
const char *n;
|
|
|
|
if (!FLAGS_SET(supported, bit))
|
|
continue;
|
|
|
|
if (FLAGS_SET(done, bit))
|
|
continue;
|
|
|
|
n = cgroup_controller_to_string(c);
|
|
if (FLAGS_SET(mask, bit))
|
|
(void) cg_create(n, path);
|
|
|
|
done |= CGROUP_MASK_EXTEND_JOINED(bit);
|
|
}
|
|
|
|
return created;
|
|
}
|
|
|
|
int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid) {
|
|
int r;
|
|
|
|
assert(path);
|
|
assert(pid >= 0);
|
|
|
|
r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
r = cg_all_unified();
|
|
if (r < 0)
|
|
return r;
|
|
if (r > 0)
|
|
return 0;
|
|
|
|
supported &= CGROUP_MASK_V1;
|
|
CGroupMask done = 0;
|
|
|
|
for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
if (!FLAGS_SET(supported, bit))
|
|
continue;
|
|
|
|
if (FLAGS_SET(done, bit))
|
|
continue;
|
|
|
|
(void) cg_attach_fallback(cgroup_controller_to_string(c), path, pid);
|
|
done |= CGROUP_MASK_EXTEND_JOINED(bit);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
|
|
int r, q;
|
|
|
|
assert(path);
|
|
|
|
r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
q = cg_all_unified();
|
|
if (q < 0)
|
|
return q;
|
|
if (q > 0)
|
|
return r;
|
|
|
|
return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
|
|
}
|
|
|
|
int cg_enable_everywhere(
|
|
CGroupMask supported,
|
|
CGroupMask mask,
|
|
const char *p,
|
|
CGroupMask *ret_result_mask) {
|
|
|
|
_cleanup_fclose_ FILE *f = NULL;
|
|
_cleanup_free_ char *fs = NULL;
|
|
CGroupController c;
|
|
CGroupMask ret = 0;
|
|
int r;
|
|
|
|
assert(p);
|
|
|
|
if (supported == 0) {
|
|
if (ret_result_mask)
|
|
*ret_result_mask = 0;
|
|
return 0;
|
|
}
|
|
|
|
r = cg_all_unified();
|
|
if (r < 0)
|
|
return r;
|
|
if (r == 0) {
|
|
/* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
|
|
* complete success right away. (If you wonder why we return the full mask here, rather than zero: the
|
|
* caller tends to use the returned mask later on to compare if all controllers where properly joined,
|
|
* and if not requeues realization. This use is the primary purpose of the return value, hence let's
|
|
* minimize surprises here and reduce triggers for re-realization by always saying we fully
|
|
* succeeded.) */
|
|
if (ret_result_mask)
|
|
*ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
|
|
* CGROUP_MASK_V2: The 'supported' mask
|
|
* might contain pure-V1 or BPF
|
|
* controllers, and we never want to
|
|
* claim that we could enable those with
|
|
* cgroup.subtree_control */
|
|
return 0;
|
|
}
|
|
|
|
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
|
|
if (r < 0)
|
|
return r;
|
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
const char *n;
|
|
|
|
if (!FLAGS_SET(CGROUP_MASK_V2, bit))
|
|
continue;
|
|
|
|
if (!FLAGS_SET(supported, bit))
|
|
continue;
|
|
|
|
n = cgroup_controller_to_string(c);
|
|
{
|
|
char s[1 + strlen(n) + 1];
|
|
|
|
s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
|
|
strcpy(s + 1, n);
|
|
|
|
if (!f) {
|
|
f = fopen(fs, "we");
|
|
if (!f)
|
|
return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
|
|
}
|
|
|
|
r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
|
|
if (r < 0) {
|
|
log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
|
|
FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
|
|
clearerr(f);
|
|
|
|
/* If we can't turn off a controller, leave it on in the reported resulting mask. This
|
|
* happens for example when we attempt to turn off a controller up in the tree that is
|
|
* used down in the tree. */
|
|
if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
|
|
* only here, and not follow the same logic
|
|
* for other errors such as EINVAL or
|
|
* EOPNOTSUPP or anything else. That's
|
|
* because EBUSY indicates that the
|
|
* controllers is currently enabled and
|
|
* cannot be disabled because something down
|
|
* the hierarchy is still using it. Any other
|
|
* error most likely means something like "I
|
|
* never heard of this controller" or
|
|
* similar. In the former case it's hence
|
|
* safe to assume the controller is still on
|
|
* after the failed operation, while in the
|
|
* latter case it's safer to assume the
|
|
* controller is unknown and hence certainly
|
|
* not enabled. */
|
|
ret |= bit;
|
|
} else {
|
|
/* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
|
|
if (FLAGS_SET(mask, bit))
|
|
ret |= bit;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Let's return the precise set of controllers now enabled for the cgroup. */
|
|
if (ret_result_mask)
|
|
*ret_result_mask = ret;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int cg_migrate_recursive(
|
|
const char *cfrom,
|
|
const char *pfrom,
|
|
const char *cto,
|
|
const char *pto,
|
|
CGroupFlags flags) {
|
|
|
|
_cleanup_closedir_ DIR *d = NULL;
|
|
int r, ret = 0;
|
|
char *fn;
|
|
|
|
assert(cfrom);
|
|
assert(pfrom);
|
|
assert(cto);
|
|
assert(pto);
|
|
|
|
ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
|
|
|
|
r = cg_enumerate_subgroups(cfrom, pfrom, &d);
|
|
if (r < 0) {
|
|
if (ret >= 0 && r != -ENOENT)
|
|
return r;
|
|
|
|
return ret;
|
|
}
|
|
|
|
while ((r = cg_read_subgroup(d, &fn)) > 0) {
|
|
_cleanup_free_ char *p = NULL;
|
|
|
|
p = path_join(empty_to_root(pfrom), fn);
|
|
free(fn);
|
|
if (!p)
|
|
return -ENOMEM;
|
|
|
|
r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
|
|
if (r != 0 && ret >= 0)
|
|
ret = r;
|
|
}
|
|
|
|
if (r < 0 && ret >= 0)
|
|
ret = r;
|
|
|
|
return ret;
|
|
}
|
|
|
|
int cg_migrate_recursive_fallback(
|
|
const char *cfrom,
|
|
const char *pfrom,
|
|
const char *cto,
|
|
const char *pto,
|
|
CGroupFlags flags) {
|
|
|
|
int r;
|
|
|
|
assert(cfrom);
|
|
assert(pfrom);
|
|
assert(cto);
|
|
assert(pto);
|
|
|
|
r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
|
|
if (r < 0) {
|
|
char prefix[strlen(pto) + 1];
|
|
|
|
/* This didn't work? Then let's try all prefixes of the destination */
|
|
|
|
PATH_FOREACH_PREFIX(prefix, pto) {
|
|
int q;
|
|
|
|
q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
|
|
if (q >= 0)
|
|
return q;
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
|
|
CGroupController c;
|
|
CGroupMask done;
|
|
int r = 0, q;
|
|
|
|
assert(to_callback);
|
|
|
|
supported &= CGROUP_MASK_V1;
|
|
mask = CGROUP_MASK_EXTEND_JOINED(mask);
|
|
done = 0;
|
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
const char *to = NULL;
|
|
|
|
if (!FLAGS_SET(supported, bit))
|
|
continue;
|
|
|
|
if (FLAGS_SET(done, bit))
|
|
continue;
|
|
|
|
if (!FLAGS_SET(mask, bit))
|
|
continue;
|
|
|
|
to = to_callback(bit, userdata);
|
|
|
|
/* Remember first error and try continuing */
|
|
q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
|
|
r = (r < 0) ? r : q;
|
|
|
|
done |= CGROUP_MASK_EXTEND_JOINED(bit);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
|
|
CGroupController c;
|
|
CGroupMask done;
|
|
int r = 0, q;
|
|
|
|
supported &= CGROUP_MASK_V1;
|
|
mask = CGROUP_MASK_EXTEND_JOINED(mask);
|
|
done = 0;
|
|
|
|
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
|
|
CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
|
|
|
|
if (!FLAGS_SET(supported, bit))
|
|
continue;
|
|
|
|
if (FLAGS_SET(done, bit))
|
|
continue;
|
|
|
|
if (FLAGS_SET(mask, bit)) {
|
|
/* Remember first error and try continuing */
|
|
q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
|
|
r = (r < 0) ? r : q;
|
|
}
|
|
done |= CGROUP_MASK_EXTEND_JOINED(bit);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
int cg_has_legacy(void) {
|
|
struct statfs fs;
|
|
|
|
/* Checks if any legacy controller/hierarchy is mounted. */
|
|
|
|
if (statfs("/sys/fs/cgroup/", &fs) < 0) {
|
|
if (errno == ENOENT) /* sysfs not mounted? */
|
|
return false;
|
|
|
|
return log_error_errno(errno, "Failed to statfs /sys/fs/cgroup/: %m");
|
|
}
|
|
|
|
if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC) ||
|
|
is_fs_type(&fs, SYSFS_MAGIC)) /* not mounted yet */
|
|
return false;
|
|
|
|
if (is_fs_type(&fs, TMPFS_MAGIC)) {
|
|
log_info("Found tmpfs on /sys/fs/cgroup/, assuming legacy hierarchy.");
|
|
return true;
|
|
}
|
|
|
|
return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
|
|
"Unknown filesystem type %llx mounted on /sys/fs/cgroup/.",
|
|
(unsigned long long) fs.f_type);
|
|
}
|