machined: optionally track machines in cgroup subgroups

This commit is contained in:
Lennart Poettering
2025-05-21 17:23:47 +02:00
parent 7bb1147b00
commit d5feeb373c
13 changed files with 110 additions and 38 deletions

View File

@@ -506,6 +506,8 @@ node /org/freedesktop/machine1/machine/rawhide {
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s Unit = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s Subgroup = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly u Leader = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly t LeaderPIDFDId = ...;
@@ -598,6 +600,8 @@ node /org/freedesktop/machine1/machine/rawhide {
<variablelist class="dbus-property" generated="True" extra-ref="Unit"/>
<variablelist class="dbus-property" generated="True" extra-ref="Subgroup"/>
<variablelist class="dbus-property" generated="True" extra-ref="Leader"/>
<variablelist class="dbus-property" generated="True" extra-ref="LeaderPIDFDId"/>
@@ -676,6 +680,9 @@ node /org/freedesktop/machine1/machine/rawhide {
<literal>running</literal>, or <literal>closing</literal>. Note that the state machine is not considered
part of the API and states might be removed or added without this being considered API breakage.
</para>
<para><varname>Subgroup</varname> contains the sub-control-group path this machine's processes reside
in, relative to the specified unit's control group.</para>
</refsect2>
</refsect1>
@@ -717,9 +724,9 @@ $ gdbus introspect --system \
<title>Machine Objects</title>
<para><function>CopyFromWithFlags()</function> and
<function>CopyToWithFlags()</function> were added in version 252.</para>
<para><function>GetSSHInfo()</function>, <varname>VSockCID</varname>, <varname>SSHAddress</varname>
<para><function>GetSSHInfo()</function>, <varname>VSockCID</varname>, <varname>SSHAddress</varname>,
and <varname>SSHPrivateKeyPath</varname> were added in version 256.</para>
<para><varname>LeaderPIDFDId</varname> was added in version 258.</para>
<para><varname>LeaderPIDFDId</varname> and <varname>Subgroup</varname> were added in version 258.</para>
</refsect2>
</refsect1>

View File

@@ -717,6 +717,7 @@ static const sd_bus_vtable machine_vtable[] = {
SD_BUS_PROPERTY("Service", "s", NULL, offsetof(Machine, service), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Unit", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Scope", "s", NULL, offsetof(Machine, unit), SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
SD_BUS_PROPERTY("Subgroup", "s", NULL, offsetof(Machine, subgroup), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Leader", "u", bus_property_get_pid, offsetof(Machine, leader.pid), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("LeaderPIDFDId", "t", bus_property_get_pidfdid, offsetof(Machine, leader), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Class", "s", property_get_class, offsetof(Machine, class), SD_BUS_VTABLE_PROPERTY_CONST),

View File

@@ -175,7 +175,7 @@ int vl_method_register(sd_varlink *link, sd_json_variant *parameters, sd_varlink
return r;
if (!machine->allocate_unit) {
r = cg_pidref_get_unit(&machine->leader, &machine->unit);
r = cg_pidref_get_unit_full(&machine->leader, &machine->unit, &machine->subgroup);
if (r < 0)
return r;
}

View File

@@ -134,13 +134,19 @@ Machine* machine_free(Machine *m) {
sd_bus_message_unref(m->create_message);
free(m->name);
free(m->scope_job);
free(m->state_file);
free(m->service);
free(m->root_directory);
free(m->unit);
free(m->subgroup);
free(m->scope_job);
free(m->netif);
free(m->ssh_address);
free(m->ssh_private_key_path);
return mfree(m);
}
@@ -156,7 +162,7 @@ int machine_save(Machine *m) {
return 0;
_cleanup_(unlink_and_freep) char *sl = NULL; /* auto-unlink! */
if (m->unit) {
if (m->unit && !m->subgroup) {
sl = strjoin("/run/systemd/machines/unit:", m->unit);
if (!sl)
return log_oom();
@@ -244,7 +250,7 @@ int machine_save(Machine *m) {
static void machine_unlink(Machine *m) {
assert(m);
if (m->unit) {
if (m->unit && !m->subgroup) {
const char *sl = strjoina("/run/systemd/machines/unit:", m->unit);
(void) unlink(sl);
}
@@ -266,6 +272,7 @@ int machine_load(Machine *m) {
r = parse_env_file(NULL, m->state_file,
"NAME", &name,
"SCOPE", &m->unit,
"SUBGROUP", &m->subgroup,
"SCOPE_JOB", &m->scope_job,
"SERVICE", &m->service,
"ROOT", &m->root_directory,
@@ -380,6 +387,7 @@ static int machine_start_scope(
assert(machine);
assert(pidref_is_set(&machine->leader));
assert(!machine->unit);
assert(!machine->subgroup);
escaped = unit_name_escape(machine->name);
if (!escaped)
@@ -476,9 +484,11 @@ static int machine_ensure_scope(Machine *m, sd_bus_message *properties, sd_bus_e
assert(m->unit);
r = hashmap_ensure_put(&m->manager->machines_by_unit, &string_hash_ops, m->unit, m);
if (r < 0)
return r;
if (!m->subgroup) {
r = hashmap_ensure_put(&m->manager->machines_by_unit, &string_hash_ops, m->unit, m);
if (r < 0)
return r;
}
return 0;
}
@@ -566,7 +576,7 @@ int machine_stop(Machine *m) {
if (!IN_SET(m->class, MACHINE_CONTAINER, MACHINE_VM))
return -EOPNOTSUPP;
if (m->unit) {
if (m->unit && !m->subgroup) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
char *job = NULL;
@@ -637,7 +647,7 @@ bool machine_may_gc(Machine *m, bool drop_not_started) {
return false;
}
if (m->unit) {
if (m->unit && !m->subgroup) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
r = manager_unit_is_active(m->manager, m->unit, &error);
@@ -683,14 +693,14 @@ int machine_kill(Machine *m, KillWhom whom, int signo) {
if (!IN_SET(m->class, MACHINE_VM, MACHINE_CONTAINER))
return -EOPNOTSUPP;
if (!m->unit)
return -ESRCH;
if (whom == KILL_LEADER) /* If we shall simply kill the leader, do so directly */
return pidref_kill(&m->leader, signo);
if (!m->unit)
return -ESRCH;
/* Otherwise, make PID 1 do it for us, for the entire cgroup */
return manager_kill_unit(m->manager, m->unit, signo, NULL);
return manager_kill_unit(m->manager, m->unit, m->subgroup, signo, /* error= */ NULL);
}
int machine_openpt(Machine *m, int flags, char **ret_peer) {
@@ -1124,8 +1134,13 @@ void machine_release_unit(Machine *m) {
m->referenced = false;
}
(void) hashmap_remove_value(m->manager->machines_by_unit, m->unit, m);
if (!m->subgroup)
(void) hashmap_remove_value(m->manager->machines_by_unit, m->unit, m);
m->unit = mfree(m->unit);
/* Also free the subgroup, because it only makes sense in the context of the unit */
m->subgroup = mfree(m->subgroup);
}
int machine_get_uid_shift(Machine *m, uid_t *ret) {

View File

@@ -45,6 +45,7 @@ typedef struct Machine {
char *root_directory;
char *unit;
char *subgroup;
char *scope_job;
PidRef leader;

View File

@@ -412,9 +412,15 @@ static int list_images(int argc, char *argv[], void *userdata) {
return show_table(table, "images");
}
static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) {
static int show_unit_cgroup(
sd_bus *bus,
const char *unit,
const char *subgroup,
pid_t leader) {
_cleanup_free_ char *cgroup = NULL;
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
OutputFlags extra_flags = 0;
int r;
assert(bus);
@@ -427,8 +433,16 @@ static int show_unit_cgroup(sd_bus *bus, const char *unit, pid_t leader) {
if (isempty(cgroup))
return 0;
if (!empty_or_root(subgroup)) {
if (!path_extend(&cgroup, subgroup))
return log_oom();
/* If we have a subcgroup, then hide all processes outside of it */
extra_flags |= OUTPUT_HIDE_EXTRA;
}
unsigned c = MAX(LESS_BY(columns(), 18U), 10U);
r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags(), &error);
r = unit_show_processes(bus, unit, cgroup, "\t\t ", c, get_output_flags() | extra_flags, &error);
if (r == -EBADR) {
if (arg_transport == BUS_TRANSPORT_REMOTE)
@@ -494,6 +508,7 @@ typedef struct MachineStatusInfo {
const char *class;
const char *service;
const char *unit;
const char *subgroup;
const char *root_directory;
pid_t leader;
struct dual_timestamp timestamp;
@@ -589,7 +604,11 @@ static void print_machine_status_info(sd_bus *bus, MachineStatusInfo *i) {
if (i->unit) {
printf("\t Unit: %s\n", i->unit);
show_unit_cgroup(bus, i->unit, i->leader);
if (!empty_or_root(i->subgroup))
printf("\tSubgroup: %s\n", i->subgroup);
show_unit_cgroup(bus, i->unit, i->subgroup, i->leader);
if (arg_transport == BUS_TRANSPORT_LOCAL)
@@ -636,6 +655,7 @@ static int show_machine_info(const char *verb, sd_bus *bus, const char *path, bo
{ "Class", "s", NULL, offsetof(MachineStatusInfo, class) },
{ "Service", "s", NULL, offsetof(MachineStatusInfo, service) },
{ "Unit", "s", NULL, offsetof(MachineStatusInfo, unit) },
{ "Subgroup", "s", NULL, offsetof(MachineStatusInfo, subgroup) },
{ "RootDirectory", "s", NULL, offsetof(MachineStatusInfo, root_directory) },
{ "Leader", "u", NULL, offsetof(MachineStatusInfo, leader) },
{ "Timestamp", "t", NULL, offsetof(MachineStatusInfo, timestamp.realtime) },

View File

@@ -22,28 +22,41 @@
#include "user-util.h"
int manager_get_machine_by_pidref(Manager *m, const PidRef *pidref, Machine **ret) {
Machine *mm;
int r;
_cleanup_(pidref_done) PidRef current = PIDREF_NULL;
Machine *mm = NULL;
assert(m);
assert(pidref_is_set(pidref));
assert(ret);
mm = hashmap_get(m->machines_by_leader, pidref);
if (!mm) {
_cleanup_free_ char *unit = NULL;
for (;;) {
/* First, compare by leader */
mm = hashmap_get(m->machines_by_leader, pidref);
if (mm)
break;
r = cg_pidref_get_unit(pidref, &unit);
if (r >= 0)
/* Then look for the unit */
_cleanup_free_ char *unit = NULL;
if (cg_pidref_get_unit(pidref, &unit) >= 0) {
mm = hashmap_get(m->machines_by_unit, unit);
}
if (!mm) {
*ret = NULL;
return 0;
if (mm)
break;
}
/* Maybe this process is in per-user unit? If so, let's go up the process tree, and check
* that, we should eventually hit PID 1 of the container tree, which we should be able to
* recognize. */
_cleanup_(pidref_done) PidRef parent = PIDREF_NULL;
if (pidref_get_ppid_as_pidref(pidref, &parent) < 0)
break;
pidref_done(&current);
current = TAKE_PIDREF(parent);
pidref = &current;
}
*ret = mm;
return 1;
return !!mm;
}
int manager_add_machine(Manager *m, const char *name, Machine **ret) {

View File

@@ -411,7 +411,7 @@ static int method_register_machine_internal(sd_bus_message *message, bool read_n
if (r == 0)
return 1; /* Will call us back */
r = cg_pidref_get_unit(&m->leader, &m->unit);
r = cg_pidref_get_unit_full(&m->leader, &m->unit, &m->subgroup);
if (r < 0) {
r = sd_bus_error_set_errnof(error, r,
"Failed to determine unit of process "PID_FMT" : %m",
@@ -1276,11 +1276,14 @@ int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, c
return 1;
}
int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error) {
int manager_kill_unit(Manager *manager, const char *unit, const char *subgroup, int signo, sd_bus_error *reterr_error) {
assert(manager);
assert(unit);
return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnit", error, NULL, "ssi", unit, "all", signo);
if (empty_or_root(subgroup))
return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnit", reterr_error, NULL, "ssi", unit, "all", signo);
return bus_call_method(manager->bus, bus_systemd_mgr, "KillUnitSubgroup", reterr_error, NULL, "sssi", unit, "cgroup", subgroup, signo);
}
int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *reterr_error) {

View File

@@ -477,6 +477,7 @@ static int list_machine_one_and_maybe_read_metadata(sd_varlink *link, Machine *m
JSON_BUILD_PAIR_STRING_NON_EMPTY("service", m->service),
JSON_BUILD_PAIR_STRING_NON_EMPTY("rootDirectory", m->root_directory),
JSON_BUILD_PAIR_STRING_NON_EMPTY("unit", m->unit),
JSON_BUILD_PAIR_STRING_NON_EMPTY("subgroup", m->subgroup),
SD_JSON_BUILD_PAIR_CONDITION(pidref_is_set(&m->leader), "leader", JSON_BUILD_PIDREF(&m->leader)),
SD_JSON_BUILD_PAIR_CONDITION(dual_timestamp_is_set(&m->timestamp), "timestamp", JSON_BUILD_DUAL_TIMESTAMP(&m->timestamp)),
JSON_BUILD_PAIR_UNSIGNED_NOT_EQUAL("vSockCid", m->vsock_cid, VMADDR_CID_ANY),

View File

@@ -10,7 +10,9 @@ typedef struct Manager {
sd_bus *bus;
Hashmap *machines;
Hashmap *machines_by_unit;
Hashmap *machines_by_unit; /* This hashmap only tracks machines where a system-level encapsulates
* the machine fully, and exclusively. It's not used if a machine is
* run in a cgroup further down the tree. */
Hashmap *machines_by_leader;
sd_event_source *deferred_gc_event_source;
@@ -44,7 +46,7 @@ int match_properties_changed(sd_bus_message *message, void *userdata, sd_bus_err
int match_job_removed(sd_bus_message *message, void *userdata, sd_bus_error *error);
int manager_stop_unit(Manager *manager, const char *unit, sd_bus_error *error, char **job);
int manager_kill_unit(Manager *manager, const char *unit, int signo, sd_bus_error *error);
int manager_kill_unit(Manager *manager, const char *unit, const char *subgroup, int signo, sd_bus_error *error);
int manager_unref_unit(Manager *m, const char *unit, sd_bus_error *error);
int manager_unit_is_active(Manager *manager, const char *unit, sd_bus_error *reterr_errno);
int manager_job_is_active(Manager *manager, const char *path, sd_bus_error *reterr_errno);

View File

@@ -394,7 +394,13 @@ int unit_show_processes(
if (r < 0)
goto finish;
r = dump_extra_processes(cgroups, prefix, n_columns, flags);
if (!FLAGS_SET(flags, OUTPUT_HIDE_EXTRA)) {
r = dump_extra_processes(cgroups, prefix, n_columns, flags);
if (r < 0)
goto finish;
}
r = 0;
finish:
while ((cg = hashmap_first(cgroups)))

View File

@@ -48,6 +48,7 @@ typedef enum OutputFlags {
OUTPUT_KERNEL_THREADS = 1 << 9,
OUTPUT_CGROUP_XATTRS = 1 << 10,
OUTPUT_CGROUP_ID = 1 << 11,
OUTPUT_HIDE_EXTRA = 1 << 12,
} OutputFlags;
sd_json_format_flags_t output_mode_to_json_format_flags(OutputMode m);

View File

@@ -96,7 +96,9 @@ static SD_VARLINK_DEFINE_METHOD_FULL(
SD_VARLINK_FIELD_COMMENT("OS release information of the machine. It contains an array of key value pairs read from the os-release(5) file in the image."),
SD_VARLINK_DEFINE_OUTPUT(OSRelease, SD_VARLINK_STRING, SD_VARLINK_NULLABLE|SD_VARLINK_ARRAY),
SD_VARLINK_FIELD_COMMENT("Return the base UID/GID of the machine"),
SD_VARLINK_DEFINE_OUTPUT(UIDShift, SD_VARLINK_INT, SD_VARLINK_NULLABLE));
SD_VARLINK_DEFINE_OUTPUT(UIDShift, SD_VARLINK_INT, SD_VARLINK_NULLABLE),
SD_VARLINK_FIELD_COMMENT("Subcgroup path of the machine, relative to the unit's cgroup path"),
SD_VARLINK_DEFINE_OUTPUT(Subgroup, SD_VARLINK_STRING, SD_VARLINK_NULLABLE));
static SD_VARLINK_DEFINE_ENUM_TYPE(
MachineOpenMode,