mirror of
https://github.com/morgan9e/systemd
synced 2026-04-14 00:14:32 +09:00
core: Add DelegateNamespaces= (#36532)
This commit is contained in:
@@ -3358,6 +3358,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t RestrictNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t DelegateNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly (bas) RestrictFileSystems = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly a(ssbt) BindPaths = [...];
|
||||
@@ -3963,6 +3965,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
|
||||
|
||||
<!--property RestrictNamespaces is not documented!-->
|
||||
|
||||
<!--property DelegateNamespaces is not documented!-->
|
||||
|
||||
<!--property RestrictFileSystems is not documented!-->
|
||||
|
||||
<!--property BindPaths is not documented!-->
|
||||
@@ -4685,6 +4689,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="DelegateNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictFileSystems"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="BindPaths"/>
|
||||
@@ -5559,6 +5565,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t RestrictNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t DelegateNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly (bas) RestrictFileSystems = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly a(ssbt) BindPaths = [...];
|
||||
@@ -6176,6 +6184,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
|
||||
|
||||
<!--property RestrictNamespaces is not documented!-->
|
||||
|
||||
<!--property DelegateNamespaces is not documented!-->
|
||||
|
||||
<!--property RestrictFileSystems is not documented!-->
|
||||
|
||||
<!--property BindPaths is not documented!-->
|
||||
@@ -6870,6 +6880,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="DelegateNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictFileSystems"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="BindPaths"/>
|
||||
@@ -7576,6 +7588,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t RestrictNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t DelegateNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly (bas) RestrictFileSystems = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly a(ssbt) BindPaths = [...];
|
||||
@@ -8123,6 +8137,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
|
||||
|
||||
<!--property RestrictNamespaces is not documented!-->
|
||||
|
||||
<!--property DelegateNamespaces is not documented!-->
|
||||
|
||||
<!--property RestrictFileSystems is not documented!-->
|
||||
|
||||
<!--property BindPaths is not documented!-->
|
||||
@@ -8733,6 +8749,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="DelegateNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictFileSystems"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="BindPaths"/>
|
||||
@@ -9566,6 +9584,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t RestrictNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly t DelegateNamespaces = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly (bas) RestrictFileSystems = ...;
|
||||
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
|
||||
readonly a(ssbt) BindPaths = [...];
|
||||
@@ -10095,6 +10115,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
|
||||
|
||||
<!--property RestrictNamespaces is not documented!-->
|
||||
|
||||
<!--property DelegateNamespaces is not documented!-->
|
||||
|
||||
<!--property RestrictFileSystems is not documented!-->
|
||||
|
||||
<!--property BindPaths is not documented!-->
|
||||
@@ -10687,6 +10709,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="DelegateNamespaces"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="RestrictFileSystems"/>
|
||||
|
||||
<variablelist class="dbus-property" generated="True" extra-ref="BindPaths"/>
|
||||
@@ -12385,7 +12409,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>ProtectControlGroupsEx</varname>,
|
||||
<varname>PrivateUsersEx</varname>, and
|
||||
<varname>PrivatePIDs</varname> were added in version 257.</para>
|
||||
<para><varname>ProtectHostnameEx</varname> and <function>RemoveSubGroup()</function> were added in version 258.</para>
|
||||
<para><varname>ProtectHostnameEx</varname>,
|
||||
<varname>DelegateNamespaces</varname>, and
|
||||
<function>RemoveSubGroup()</function> were added in version 258.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Socket Unit Objects</title>
|
||||
@@ -12429,7 +12455,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>ManagedOOMMemoryPressureDurationUSec</varname>,
|
||||
<varname>ProtectControlGroupsEx</varname>, and
|
||||
<varname>PrivatePIDs</varname> were added in version 257.</para>
|
||||
<para><varname>ProtectHostnameEx</varname> and <function>RemoveSubgroup()</function> were added in version 258.</para>
|
||||
<para><varname>ProtectHostnameEx</varname>,
|
||||
<varname>DelegateNamespaces</varname>, and
|
||||
<function>RemoveSubgroup()</function> were added in version 258.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Mount Unit Objects</title>
|
||||
@@ -12471,6 +12499,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>ProtectControlGroupsEx</varname>, and
|
||||
<varname>PrivatePIDs</varname> were added in version 257.</para>
|
||||
<para><varname>ProtectHostnameEx</varname>,
|
||||
<varname>DelegateNamespaces</varname>,
|
||||
<function>RemoveSubgroup()</function>,
|
||||
<varname>ReloadResult</varname>, and
|
||||
<varname>CleanResult</varname> were added in version 258.</para>
|
||||
@@ -12514,7 +12543,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
|
||||
<varname>ManagedOOMMemoryPressureDurationUSec</varname>,
|
||||
<varname>ProtectControlGroupsEx</varname>, and
|
||||
<varname>PrivatePIDs</varname> were added in version 257.</para>
|
||||
<para><varname>ProtectHostnameEx</varname> and <function>RemoveSubgroup()</function> were added in version 258.</para>
|
||||
<para><varname>ProtectHostnameEx</varname>,
|
||||
<varname>DelegateNamespaces</varname>, and
|
||||
<function>RemoveSubgroup()</function> were added in version 258.</para>
|
||||
</refsect2>
|
||||
<refsect2>
|
||||
<title>Slice Unit Objects</title>
|
||||
|
||||
@@ -2375,6 +2375,43 @@ RestrictNamespaces=~cgroup net</programlisting>
|
||||
<xi:include href="version-info.xml" xpointer="v233"/></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>DelegateNamespaces=</varname></term>
|
||||
|
||||
<listitem><para>Delegates ownership of the given namespace types to the user namespace of the
|
||||
processes of this unit. For details about Linux namespaces, see <citerefentry
|
||||
project='man-pages'><refentrytitle>namespaces</refentrytitle><manvolnum>7</manvolnum></citerefentry>.
|
||||
Either takes a boolean argument, or a space-separated list of namespace type identifiers. If false
|
||||
(the default), the unit's processes' user namespace will not have ownership over any namespaces
|
||||
created during setup of the unit's sandboxed environment. If true, ownership of all namespace types
|
||||
(except for user namespaces, where the concept doesn't apply) created during setup of the unit's
|
||||
sandboxed environment is delegated to the unit's processes' user namespace. Otherwise, a
|
||||
space-separated list of namespace type identifiers must be specified, consisting of any combination
|
||||
of: <constant>cgroup</constant>, <constant>ipc</constant>, <constant>net</constant>,
|
||||
<constant>mnt</constant>, <constant>pid</constant>, and <constant>uts</constant>. All namespaces of
|
||||
the listed types will be owned by the unit's processes' user namespace if they are created during
|
||||
setup of the unit's sandboxed environment (allow-listing). By prepending the list with a single tilde
|
||||
character (<literal>~</literal>) the effect may be inverted: all namespaces of types not listed and
|
||||
created during setup of the unit's sandboxed environment will be owned by the unit's processes' user
|
||||
namespace (deny-listing). If the empty string is assigned, the default namespace ownership is
|
||||
applied, which is equivalent to false. This option may appear more than once, in which case the
|
||||
namespace types are merged by <constant>OR</constant>, or by <constant>AND</constant> if the lines
|
||||
are prefixed with <literal>~</literal> (see examples below). Internally, this setting controls the
|
||||
order in which namespaces are unshared by systemd. Namespace types that should be owned by the unit's
|
||||
processes' user namespace will be unshared after unsharing the user namespace. Internally, this
|
||||
setting controls the order in which namespaces are unshared. Delegated namespaces will be unshared
|
||||
after the user namespace is unshared. Other namespaces will be unshared before the user namespace is
|
||||
unshared.</para>
|
||||
|
||||
<para>Delegating any namespace with <varname>DelegateNamespaces=</varname> implies
|
||||
<varname>PrivateUsers=self</varname> unless <varname>PrivateUsers=</varname> is explicitly enabled
|
||||
already by the unit. Delegating a namespace does not imply that the namespace is unshared, that is
|
||||
done with the namespace specific unit setting such as <varname>PrivateNetwork=</varname> or
|
||||
<varname>PrivateMounts=</varname>.</para>
|
||||
|
||||
<xi:include href="version-info.xml" xpointer="v258"/></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>LockPersonality=</varname></term>
|
||||
|
||||
|
||||
@@ -1263,6 +1263,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||
SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RestrictSUIDSGID", "b", bus_property_get_bool, offsetof(ExecContext, restrict_suid_sgid), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("DelegateNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, delegate_namespaces), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RestrictFileSystems", "(bas)", property_get_restrict_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
@@ -2194,6 +2195,9 @@ int bus_exec_context_set_transient_property(
|
||||
if (streq(name, "RestrictNamespaces"))
|
||||
return bus_set_transient_namespace_flag(u, name, &c->restrict_namespaces, message, flags, error);
|
||||
|
||||
if (streq(name, "DelegateNamespaces"))
|
||||
return bus_set_transient_namespace_flag(u, name, &c->delegate_namespaces, message, flags, error);
|
||||
|
||||
if (streq(name, "RestrictFileSystems")) {
|
||||
int allow_list;
|
||||
_cleanup_strv_free_ char **l = NULL;
|
||||
|
||||
@@ -4210,7 +4210,206 @@ static bool exec_context_need_unprivileged_private_users(
|
||||
!strv_isempty(context->read_only_paths) ||
|
||||
!strv_isempty(context->inaccessible_paths) ||
|
||||
!strv_isempty(context->exec_paths) ||
|
||||
!strv_isempty(context->no_exec_paths);
|
||||
!strv_isempty(context->no_exec_paths) ||
|
||||
context->delegate_namespaces != NAMESPACE_FLAGS_INITIAL;
|
||||
}
|
||||
|
||||
static PrivateUsers exec_context_get_effective_private_users(
|
||||
const ExecContext *context,
|
||||
const ExecParameters *params) {
|
||||
|
||||
assert(context);
|
||||
assert(params);
|
||||
|
||||
if (context->private_users != PRIVATE_USERS_NO)
|
||||
return context->private_users;
|
||||
|
||||
if (exec_context_need_unprivileged_private_users(context, params))
|
||||
return PRIVATE_USERS_SELF;
|
||||
|
||||
/* If any namespace is delegated with DelegateNamespaces=, always set up a user namespace. */
|
||||
if (context->delegate_namespaces != NAMESPACE_FLAGS_INITIAL)
|
||||
return PRIVATE_USERS_SELF;
|
||||
|
||||
return PRIVATE_USERS_NO;
|
||||
}
|
||||
|
||||
static bool exec_namespace_is_delegated(
|
||||
const ExecContext *context,
|
||||
const ExecParameters *params,
|
||||
unsigned long namespace) {
|
||||
|
||||
assert(context);
|
||||
assert(params);
|
||||
assert(namespace != CLONE_NEWUSER);
|
||||
|
||||
/* If we need unprivileged private users, we've already unshared a user namespace by the time we call
|
||||
* setup_delegated_namespaces() for the first time so let's make sure we do all other namespace
|
||||
* unsharing in the first call to setup_delegated_namespaces() by returning false here. */
|
||||
if (exec_context_need_unprivileged_private_users(context, params))
|
||||
return false;
|
||||
|
||||
if (context->delegate_namespaces == NAMESPACE_FLAGS_INITIAL)
|
||||
return false;
|
||||
|
||||
return FLAGS_SET(context->delegate_namespaces, namespace);
|
||||
}
|
||||
|
||||
static int setup_delegated_namespaces(
|
||||
const ExecContext *context,
|
||||
ExecParameters *params,
|
||||
ExecRuntime *runtime,
|
||||
bool delegate,
|
||||
const char *memory_pressure_path,
|
||||
uid_t uid,
|
||||
uid_t gid,
|
||||
const ExecCommand *command,
|
||||
bool needs_sandboxing,
|
||||
bool has_cap_sys_admin,
|
||||
int *reterr_exit_status) {
|
||||
|
||||
int r;
|
||||
|
||||
/* This function is called twice, once before unsharing the user namespace, and once after unsharing
|
||||
* the user namespace. When called before unsharing the user namespace, "delegate" is set to "false".
|
||||
* When called after unsharing the user namespace, "delegate" is set to "true". The net effect is
|
||||
* that all namespaces that should not be delegated are unshared when this function is called the
|
||||
* first time and all namespaces that should be delegated are unshared when this function is called
|
||||
* the second time. */
|
||||
|
||||
assert(context);
|
||||
assert(params);
|
||||
assert(reterr_exit_status);
|
||||
|
||||
if (exec_needs_network_namespace(context) &&
|
||||
exec_namespace_is_delegated(context, params, CLONE_NEWNET) == delegate &&
|
||||
runtime && runtime->shared && runtime->shared->netns_storage_socket[0] >= 0) {
|
||||
|
||||
/* Try to enable network namespacing if network namespacing is available and we have
|
||||
* CAP_NET_ADMIN in the current user namespace (either the system manager one or the unit's
|
||||
* own user namespace). We need CAP_NET_ADMIN to be able to configure the loopback device in
|
||||
* the new network namespace. And if we don't have that, then we could only create a network
|
||||
* namespace without the ability to set up "lo". Hence gracefully skip things then. */
|
||||
if (ns_type_supported(NAMESPACE_NET) && have_effective_cap(CAP_NET_ADMIN) > 0) {
|
||||
r = setup_shareable_ns(runtime->shared->netns_storage_socket, CLONE_NEWNET);
|
||||
if (ERRNO_IS_NEG_PRIVILEGE(r))
|
||||
log_exec_notice_errno(context, params, r,
|
||||
"PrivateNetwork=yes is configured, but network namespace setup not permitted, proceeding without: %m");
|
||||
else if (r < 0) {
|
||||
*reterr_exit_status = EXIT_NETWORK;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up network namespacing: %m");
|
||||
} else
|
||||
log_exec_debug(context, params, "Set up %snetwork namespace", delegate ? "delegated " : "");
|
||||
} else if (context->network_namespace_path) {
|
||||
*reterr_exit_status = EXIT_NETWORK;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"NetworkNamespacePath= is not supported, refusing.");
|
||||
} else
|
||||
log_exec_notice(context, params, "PrivateNetwork=yes is configured, but the kernel does not support or we lack privileges for network namespace, proceeding without.");
|
||||
}
|
||||
|
||||
if (exec_needs_ipc_namespace(context) &&
|
||||
exec_namespace_is_delegated(context, params, CLONE_NEWIPC) == delegate &&
|
||||
runtime && runtime->shared && runtime->shared->ipcns_storage_socket[0] >= 0) {
|
||||
|
||||
if (ns_type_supported(NAMESPACE_IPC)) {
|
||||
r = setup_shareable_ns(runtime->shared->ipcns_storage_socket, CLONE_NEWIPC);
|
||||
if (ERRNO_IS_NEG_PRIVILEGE(r))
|
||||
log_exec_warning_errno(context, params, r,
|
||||
"PrivateIPC=yes is configured, but IPC namespace setup failed, ignoring: %m");
|
||||
else if (r < 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up IPC namespacing: %m");
|
||||
} else
|
||||
log_exec_debug(context, params, "Set up %sIPC namespace", delegate ? "delegated " : "");
|
||||
} else if (context->ipc_namespace_path) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"IPCNamespacePath= is not supported, refusing.");
|
||||
} else
|
||||
log_exec_warning(context, params, "PrivateIPC=yes is configured, but the kernel does not support IPC namespaces, ignoring.");
|
||||
}
|
||||
|
||||
if (needs_sandboxing && exec_needs_cgroup_namespace(context, params) &&
|
||||
exec_namespace_is_delegated(context, params, CLONE_NEWCGROUP) == delegate) {
|
||||
if (unshare(CLONE_NEWCGROUP) < 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, errno, "Failed to set up cgroup namespacing: %m");
|
||||
}
|
||||
|
||||
log_exec_debug(context, params, "Set up %scgroup namespace", delegate ? "delegated " : "");
|
||||
}
|
||||
|
||||
/* Unshare a new PID namespace before setting up mounts to ensure /proc/ is mounted with only processes in PID namespace visible.
|
||||
* Note PrivatePIDs=yes implies MountAPIVFS=yes so we'll always ensure procfs is remounted. */
|
||||
if (needs_sandboxing && exec_needs_pid_namespace(context) &&
|
||||
exec_namespace_is_delegated(context, params, CLONE_NEWPID) == delegate) {
|
||||
if (params->pidref_transport_fd < 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ENOTCONN), "PidRef socket is not set up: %m");
|
||||
}
|
||||
|
||||
/* If we had CAP_SYS_ADMIN prior to joining the user namespace, then we are privileged and don't need
|
||||
* to check if we can mount /proc/.
|
||||
*
|
||||
* We need to check prior to entering the user namespace because if we're running unprivileged or in a
|
||||
* system without CAP_SYS_ADMIN, then we can have CAP_SYS_ADMIN in the current user namespace but not
|
||||
* once we unshare a mount namespace. */
|
||||
if (!has_cap_sys_admin) {
|
||||
r = can_mount_proc(context, params);
|
||||
if (r < 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to detect if /proc/ can be remounted: %m");
|
||||
}
|
||||
if (r == 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EPERM),
|
||||
"PrivatePIDs=yes is configured, but /proc/ cannot be re-mounted due to lack of privileges, refusing.");
|
||||
}
|
||||
}
|
||||
|
||||
r = setup_private_pids(context, params);
|
||||
if (r < 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up pid namespace: %m");
|
||||
}
|
||||
|
||||
log_exec_debug(context, params, "Set up %spid namespace", delegate ? "delegated " : "");
|
||||
}
|
||||
|
||||
/* If PrivatePIDs= yes is configured, we're now running as pid 1 in a pid namespace! */
|
||||
|
||||
if (exec_needs_mount_namespace(context, params, runtime) &&
|
||||
exec_namespace_is_delegated(context, params, CLONE_NEWNS) == delegate) {
|
||||
_cleanup_free_ char *error_path = NULL;
|
||||
|
||||
r = apply_mount_namespace(command->flags,
|
||||
context,
|
||||
params,
|
||||
runtime,
|
||||
memory_pressure_path,
|
||||
needs_sandboxing,
|
||||
&error_path,
|
||||
uid,
|
||||
gid);
|
||||
if (r < 0) {
|
||||
*reterr_exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",
|
||||
error_path ? ": " : "", strempty(error_path));
|
||||
}
|
||||
|
||||
log_exec_debug(context, params, "Set up %smount namespace", delegate ? "delegated " : "");
|
||||
}
|
||||
|
||||
if (needs_sandboxing && exec_namespace_is_delegated(context, params, CLONE_NEWUTS) == delegate) {
|
||||
r = apply_protect_hostname(context, params, reterr_exit_status);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
log_exec_debug(context, params, "Set up %sUTS namespace", delegate ? "delegated " : "");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool exec_context_shall_confirm_spawn(const ExecContext *context) {
|
||||
@@ -4409,7 +4608,6 @@ int exec_invoke(
|
||||
char **final_argv = NULL;
|
||||
dev_t journal_stream_dev = 0;
|
||||
ino_t journal_stream_ino = 0;
|
||||
bool userns_set_up = false;
|
||||
bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
|
||||
needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
|
||||
needs_mount_namespace; /* Do we need to set up a mount namespace for this kernel? */
|
||||
@@ -4877,6 +5075,19 @@ int exec_invoke(
|
||||
}
|
||||
}
|
||||
|
||||
if (context->memory_ksm >= 0)
|
||||
if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm, 0, 0, 0) < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
log_exec_debug_errno(context,
|
||||
params,
|
||||
errno,
|
||||
"KSM support not available, ignoring.");
|
||||
else {
|
||||
*exit_status = EXIT_KSM;
|
||||
return log_exec_error_errno(context, params, errno, "Failed to set KSM: %m");
|
||||
}
|
||||
}
|
||||
|
||||
#if ENABLE_UTMP
|
||||
if (context->utmp_id) {
|
||||
_cleanup_free_ char *username_alloc = NULL;
|
||||
@@ -5131,9 +5342,7 @@ int exec_invoke(
|
||||
/* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
|
||||
* Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
|
||||
* set up all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
|
||||
PrivateUsers pu = context->private_users;
|
||||
if (pu == PRIVATE_USERS_NO)
|
||||
pu = PRIVATE_USERS_SELF;
|
||||
PrivateUsers pu = exec_context_get_effective_private_users(context, params);
|
||||
|
||||
/* The kernel requires /proc/pid/setgroups be set to "deny" prior to writing /proc/pid/gid_map in
|
||||
* unprivileged user namespaces. */
|
||||
@@ -5148,130 +5357,25 @@ int exec_invoke(
|
||||
log_exec_info_errno(context, params, r, "Failed to set up user namespacing for unprivileged user, ignoring: %m");
|
||||
else {
|
||||
assert(r > 0);
|
||||
userns_set_up = true;
|
||||
log_debug("Set up unprivileged user namespace");
|
||||
}
|
||||
}
|
||||
|
||||
if (exec_needs_network_namespace(context) && runtime && runtime->shared && runtime->shared->netns_storage_socket[0] >= 0) {
|
||||
|
||||
/* Try to enable network namespacing if network namespacing is available and we have
|
||||
* CAP_NET_ADMIN. We need CAP_NET_ADMIN to be able to configure the loopback device in the
|
||||
* new network namespace. And if we don't have that, then we could only create a network
|
||||
* namespace without the ability to set up "lo". Hence gracefully skip things then. */
|
||||
if (ns_type_supported(NAMESPACE_NET) && have_effective_cap(CAP_NET_ADMIN) > 0) {
|
||||
r = setup_shareable_ns(runtime->shared->netns_storage_socket, CLONE_NEWNET);
|
||||
if (ERRNO_IS_NEG_PRIVILEGE(r))
|
||||
log_exec_notice_errno(context, params, r,
|
||||
"PrivateNetwork=yes is configured, but network namespace setup not permitted, proceeding without: %m");
|
||||
else if (r < 0) {
|
||||
*exit_status = EXIT_NETWORK;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up network namespacing: %m");
|
||||
}
|
||||
} else if (context->network_namespace_path) {
|
||||
*exit_status = EXIT_NETWORK;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"NetworkNamespacePath= is not supported, refusing.");
|
||||
} else
|
||||
log_exec_notice(context, params, "PrivateNetwork=yes is configured, but the kernel does not support or we lack privileges for network namespace, proceeding without.");
|
||||
}
|
||||
|
||||
if (exec_needs_ipc_namespace(context) && runtime && runtime->shared && runtime->shared->ipcns_storage_socket[0] >= 0) {
|
||||
|
||||
if (ns_type_supported(NAMESPACE_IPC)) {
|
||||
r = setup_shareable_ns(runtime->shared->ipcns_storage_socket, CLONE_NEWIPC);
|
||||
if (ERRNO_IS_NEG_PRIVILEGE(r))
|
||||
log_exec_warning_errno(context, params, r,
|
||||
"PrivateIPC=yes is configured, but IPC namespace setup failed, ignoring: %m");
|
||||
else if (r < 0) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up IPC namespacing: %m");
|
||||
}
|
||||
} else if (context->ipc_namespace_path) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EOPNOTSUPP),
|
||||
"IPCNamespacePath= is not supported, refusing.");
|
||||
} else
|
||||
log_exec_warning(context, params, "PrivateIPC=yes is configured, but the kernel does not support IPC namespaces, ignoring.");
|
||||
}
|
||||
|
||||
if (needs_sandboxing && exec_needs_cgroup_namespace(context, params)) {
|
||||
if (unshare(CLONE_NEWCGROUP) < 0) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, errno, "Failed to set up cgroup namespacing: %m");
|
||||
}
|
||||
}
|
||||
|
||||
/* Unshare a new PID namespace before setting up mounts to ensure /proc/ is mounted with only processes in PID namespace visible.
|
||||
* Note PrivatePIDs=yes implies MountAPIVFS=yes so we'll always ensure procfs is remounted. */
|
||||
if (needs_sandboxing && exec_needs_pid_namespace(context)) {
|
||||
if (params->pidref_transport_fd < 0) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ENOTCONN), "PidRef socket is not set up: %m");
|
||||
}
|
||||
|
||||
/* If we had CAP_SYS_ADMIN prior to joining the user namespace, then we are privileged and don't need
|
||||
* to check if we can mount /proc/.
|
||||
*
|
||||
* We need to check prior to entering the user namespace because if we're running unprivileged or in a
|
||||
* system without CAP_SYS_ADMIN, then we can have CAP_SYS_ADMIN in the current user namespace but not
|
||||
* once we unshare a mount namespace. */
|
||||
r = has_cap_sys_admin ? 1 : can_mount_proc(context, params);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to detect if /proc/ can be remounted: %m");
|
||||
}
|
||||
if (r == 0) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EPERM),
|
||||
"PrivatePIDs=yes is configured, but /proc/ cannot be re-mounted due to lack of privileges, refusing.");
|
||||
}
|
||||
|
||||
r = setup_private_pids(context, params);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up pid namespace: %m");
|
||||
}
|
||||
}
|
||||
|
||||
/* If PrivatePIDs= yes is configured, we're now running as pid 1 in a pid namespace! */
|
||||
|
||||
if (needs_mount_namespace) {
|
||||
_cleanup_free_ char *error_path = NULL;
|
||||
|
||||
r = apply_mount_namespace(command->flags,
|
||||
context,
|
||||
params,
|
||||
runtime,
|
||||
memory_pressure_path,
|
||||
needs_sandboxing,
|
||||
&error_path,
|
||||
uid,
|
||||
gid);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_NAMESPACE;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",
|
||||
error_path ? ": " : "", strempty(error_path));
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_sandboxing) {
|
||||
r = apply_protect_hostname(context, params, exit_status);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (context->memory_ksm >= 0)
|
||||
if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm, 0, 0, 0) < 0) {
|
||||
if (ERRNO_IS_NOT_SUPPORTED(errno))
|
||||
log_exec_debug_errno(context,
|
||||
params,
|
||||
errno,
|
||||
"KSM support not available, ignoring.");
|
||||
else {
|
||||
*exit_status = EXIT_KSM;
|
||||
return log_exec_error_errno(context, params, errno, "Failed to set KSM: %m");
|
||||
}
|
||||
}
|
||||
/* Call setup_delegated_namespaces() the first time to unshare all non-delegated namespaces. */
|
||||
r = setup_delegated_namespaces(
|
||||
context,
|
||||
params,
|
||||
runtime,
|
||||
/* delegate= */ false,
|
||||
memory_pressure_path,
|
||||
uid,
|
||||
gid,
|
||||
command,
|
||||
needs_sandboxing,
|
||||
has_cap_sys_admin,
|
||||
exit_status);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Drop groups as early as possible.
|
||||
* This needs to be done after PrivateDevices=yes setup as device nodes should be owned by the host's root.
|
||||
@@ -5305,15 +5409,35 @@ int exec_invoke(
|
||||
* case of mount namespaces being less privileged when the mount point list is copied from a
|
||||
* different user namespace). */
|
||||
|
||||
if (needs_sandboxing && !userns_set_up) {
|
||||
r = setup_private_users(context->private_users, saved_uid, saved_gid, uid, gid,
|
||||
/* allow_setgroups= */ context->private_users == PRIVATE_USERS_FULL);
|
||||
if (needs_sandboxing && !exec_context_need_unprivileged_private_users(context, params)) {
|
||||
PrivateUsers pu = exec_context_get_effective_private_users(context, params);
|
||||
|
||||
r = setup_private_users(pu, saved_uid, saved_gid, uid, gid,
|
||||
/* allow_setgroups= */ pu == PRIVATE_USERS_FULL);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_USER;
|
||||
return log_exec_error_errno(context, params, r, "Failed to set up user namespacing: %m");
|
||||
}
|
||||
|
||||
log_debug("Set up privileged user namespace");
|
||||
}
|
||||
|
||||
/* Call setup_delegated_namespaces() the second time to unshare all delegated namespaces. */
|
||||
r = setup_delegated_namespaces(
|
||||
context,
|
||||
params,
|
||||
runtime,
|
||||
/* delegate= */ true,
|
||||
memory_pressure_path,
|
||||
uid,
|
||||
gid,
|
||||
command,
|
||||
needs_sandboxing,
|
||||
has_cap_sys_admin,
|
||||
exit_status);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Now that the mount namespace has been set up and privileges adjusted, let's look for the thing we
|
||||
* shall execute. */
|
||||
|
||||
|
||||
@@ -2474,6 +2474,12 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
|
||||
return r;
|
||||
}
|
||||
|
||||
if (c->delegate_namespaces != NAMESPACE_FLAGS_INITIAL) {
|
||||
r = serialize_item_format(f, "exec-context-delegate-namespaces", "%lu", c->delegate_namespaces);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
#if HAVE_LIBBPF
|
||||
if (exec_context_restrict_filesystems_set(c)) {
|
||||
char *fs;
|
||||
@@ -3536,6 +3542,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
|
||||
r = safe_atolu(val, &c->restrict_namespaces);
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else if ((val = startswith(l, "exec-context-delegate-namespaces="))) {
|
||||
r = safe_atolu(val, &c->delegate_namespaces);
|
||||
if (r < 0)
|
||||
return r;
|
||||
} else if ((val = startswith(l, "exec-context-restrict-filesystems="))) {
|
||||
r = set_ensure_allocated(&c->restrict_filesystems, &string_hash_ops);
|
||||
if (r < 0)
|
||||
|
||||
@@ -634,6 +634,7 @@ void exec_context_init(ExecContext *c) {
|
||||
.timeout_clean_usec = USEC_INFINITY,
|
||||
.capability_bounding_set = CAP_MASK_UNSET,
|
||||
.restrict_namespaces = NAMESPACE_FLAGS_INITIAL,
|
||||
.delegate_namespaces = NAMESPACE_FLAGS_INITIAL,
|
||||
.log_level_max = -1,
|
||||
#if HAVE_SECCOMP
|
||||
.syscall_errno = SECCOMP_ERROR_NUMBER_KILL,
|
||||
|
||||
@@ -350,6 +350,7 @@ struct ExecContext {
|
||||
unsigned long personality;
|
||||
|
||||
unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
|
||||
unsigned long delegate_namespaces; /* The CLONE_NEWxyz flags delegated to the unit's processes */
|
||||
|
||||
Set *restrict_filesystems;
|
||||
bool restrict_filesystems_allow_list:1;
|
||||
|
||||
@@ -73,7 +73,8 @@
|
||||
{{type}}.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof({{type}}, exec_context)
|
||||
{{type}}.SystemCallLog, config_parse_syscall_log, 0, offsetof({{type}}, exec_context)
|
||||
{{type}}.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof({{type}}, exec_context.memory_deny_write_execute)
|
||||
{{type}}.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof({{type}}, exec_context)
|
||||
{{type}}.RestrictNamespaces, config_parse_namespace_flags, 0, offsetof({{type}}, exec_context.restrict_namespaces)
|
||||
{{type}}.DelegateNamespaces, config_parse_namespace_flags, 0, offsetof({{type}}, exec_context.delegate_namespaces)
|
||||
{{type}}.RestrictRealtime, config_parse_bool, 0, offsetof({{type}}, exec_context.restrict_realtime)
|
||||
{{type}}.RestrictSUIDSGID, config_parse_bool, 0, offsetof({{type}}, exec_context.restrict_suid_sgid)
|
||||
{{type}}.RestrictAddressFamilies, config_parse_address_families, 0, offsetof({{type}}, exec_context)
|
||||
|
||||
@@ -3566,7 +3566,7 @@ int config_parse_address_families(
|
||||
}
|
||||
}
|
||||
|
||||
int config_parse_restrict_namespaces(
|
||||
int config_parse_namespace_flags(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
@@ -3578,24 +3578,25 @@ int config_parse_restrict_namespaces(
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
ExecContext *c = data;
|
||||
unsigned long flags;
|
||||
unsigned long *flags = data;
|
||||
unsigned long all = UPDATE_FLAG(NAMESPACE_FLAGS_ALL, CLONE_NEWUSER, !streq(lvalue, "DelegateNamespaces"));
|
||||
unsigned long f;
|
||||
bool invert = false;
|
||||
int r;
|
||||
|
||||
if (isempty(rvalue)) {
|
||||
/* Reset to the default. */
|
||||
c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
|
||||
*flags = NAMESPACE_FLAGS_INITIAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Boolean parameter ignores the previous settings */
|
||||
r = parse_boolean(rvalue);
|
||||
if (r > 0) {
|
||||
c->restrict_namespaces = 0;
|
||||
*flags = 0;
|
||||
return 0;
|
||||
} else if (r == 0) {
|
||||
c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
|
||||
*flags = all;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3605,18 +3606,25 @@ int config_parse_restrict_namespaces(
|
||||
}
|
||||
|
||||
/* Not a boolean argument, in this case it's a list of namespace types. */
|
||||
r = namespace_flags_from_string(rvalue, &flags);
|
||||
r = namespace_flags_from_string(rvalue, &f);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse namespace type string, ignoring: %s", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (c->restrict_namespaces == NAMESPACE_FLAGS_INITIAL)
|
||||
if (*flags == NAMESPACE_FLAGS_INITIAL)
|
||||
/* Initial assignment. Just set the value. */
|
||||
c->restrict_namespaces = invert ? (~flags) & NAMESPACE_FLAGS_ALL : flags;
|
||||
f = invert ? (~f) & all : f;
|
||||
else
|
||||
/* Merge the value with the previous one. */
|
||||
SET_FLAG(c->restrict_namespaces, flags, !invert);
|
||||
f = UPDATE_FLAG(*flags, f, !invert);
|
||||
|
||||
if (FLAGS_SET(f, CLONE_NEWUSER) && streq(lvalue, "DelegateNamespaces")) {
|
||||
log_syntax(unit, LOG_WARNING, filename, line, r, "The user namespace cannot be delegated with DelegateNamespaces=, ignoring: %s", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*flags = f;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -6359,7 +6367,7 @@ void unit_dump_config_items(FILE *f) {
|
||||
{ config_parse_syscall_errno, "ERRNO" },
|
||||
{ config_parse_syscall_log, "SYSCALLS" },
|
||||
{ config_parse_address_families, "FAMILIES" },
|
||||
{ config_parse_restrict_namespaces, "NAMESPACES" },
|
||||
{ config_parse_namespace_flags, "NAMESPACES" },
|
||||
#endif
|
||||
{ config_parse_restrict_filesystems, "FILESYSTEMS" },
|
||||
{ config_parse_cpu_shares, "SHARES" },
|
||||
|
||||
@@ -127,7 +127,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_working_directory);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_fdname);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_user_group_compat);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_user_group_strv_compat);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_restrict_namespaces);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_namespace_flags);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_restrict_filesystems);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_bind_paths);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_exec_keyring_mode);
|
||||
|
||||
@@ -1667,7 +1667,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (streq(field, "RestrictNamespaces")) {
|
||||
if (STR_IN_SET(field, "RestrictNamespaces",
|
||||
"DelegateNamespaces")) {
|
||||
bool invert = false;
|
||||
unsigned long flags;
|
||||
|
||||
|
||||
80
test/units/TEST-07-PID1.delegate-namespaces.sh
Executable file
80
test/units/TEST-07-PID1.delegate-namespaces.sh
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
# shellcheck disable=SC2016
|
||||
set -eux
|
||||
set -o pipefail
|
||||
|
||||
# shellcheck source=test/units/test-control.sh
|
||||
. "$(dirname "$0")"/test-control.sh
|
||||
# shellcheck source=test/units/util.sh
|
||||
. "$(dirname "$0")"/util.sh
|
||||
|
||||
testcase_mount() {
|
||||
(! systemd-run -p PrivateUsersEx=self -p PrivateMounts=yes --wait --pipe -- mount --bind /usr /home)
|
||||
systemd-run -p PrivateUsersEx=self -p PrivateMounts=yes -p DelegateNamespaces=mnt --wait --pipe -- mount --bind /usr /home
|
||||
}
|
||||
|
||||
testcase_network() {
|
||||
(! systemd-run -p PrivateUsersEx=self -p PrivateNetwork=yes --wait --pipe -- ip link add veth1 type veth peer name veth2)
|
||||
systemd-run -p PrivateUsersEx=self -p PrivateMounts=yes -p DelegateNamespaces=mnt --wait --pipe -- ip link add veth1 type veth peer name veth2
|
||||
}
|
||||
|
||||
testcase_cgroup() {
|
||||
(! systemd-run -p PrivateUsersEx=self -p ProtectControlGroupsEx=private --wait --pipe -- sh -c 'echo 0 >/sys/fs/cgroup/cgroup.pressure')
|
||||
systemd-run -p PrivateUsersEx=self -p ProtectControlGroupsEx=private -p DelegateNamespaces=cgroup --wait --pipe -- sh -c 'echo 0 >/sys/fs/cgroup/cgroup.pressure'
|
||||
}
|
||||
|
||||
testcase_pid() {
|
||||
(! systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid')
|
||||
systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p DelegateNamespaces=pid --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid'
|
||||
}
|
||||
|
||||
testcase_uts() {
|
||||
(! systemd-run -p PrivateUsersEx=self -p ProtectHostnameEx=private --wait --pipe -- hostname abc)
|
||||
systemd-run -p PrivateUsersEx=self -p ProtectHostnameEx=private -p DelegateNamespaces=uts --wait --pipe -- hostname abc
|
||||
}
|
||||
|
||||
testcase_implied_private_users_self() {
|
||||
# If not explicitly set PrivateUsers=self is implied.
|
||||
systemd-run -p PrivateMounts=yes -p DelegateNamespaces=mnt --wait --pipe -- mount --bind /usr /home
|
||||
# If explicitly set it PrivateUsers= is not overridden.
|
||||
systemd-run -p PrivateUsersEx=identity -p PrivateMounts=yes -p DelegateNamespaces=mnt --wait --pipe -- mount --bind /usr /home
|
||||
systemd-run -p PrivateUsersEx=identity -p PrivateMounts=yes -p DelegateNamespaces=mnt --wait bash -c 'test "$(cat /proc/self/uid_map)" == " 0 0 65536"'
|
||||
}
|
||||
|
||||
testcase_multiple_features() {
|
||||
unsquashfs -no-xattrs -d /tmp/TEST-07-PID1-delegate-namespaces-root /usr/share/minimal_0.raw
|
||||
|
||||
systemd-run \
|
||||
-p PrivatePIDs=yes \
|
||||
-p RootDirectory=/tmp/TEST-07-PID1-delegate-namespaces-root \
|
||||
-p ProcSubset=pid \
|
||||
-p BindReadOnlyPaths=/usr/share \
|
||||
-p NoNewPrivileges=yes \
|
||||
-p ProtectSystem=strict \
|
||||
-p User=testuser\
|
||||
-p Group=testuser \
|
||||
-p RuntimeDirectory=abc \
|
||||
-p StateDirectory=qed \
|
||||
-p InaccessiblePaths=/usr/include \
|
||||
-p TemporaryFileSystem=/home \
|
||||
-p PrivateTmp=yes \
|
||||
-p PrivateDevices=yes \
|
||||
-p PrivateNetwork=yes \
|
||||
-p PrivateUsersEx=self \
|
||||
-p PrivateIPC=yes \
|
||||
-p ProtectHostname=yes \
|
||||
-p ProtectClock=yes \
|
||||
-p ProtectKernelTunables=yes \
|
||||
-p ProtectKernelModules=yes \
|
||||
-p ProtectKernelLogs=yes \
|
||||
-p ProtectControlGroupsEx=private \
|
||||
-p LockPersonality=yes \
|
||||
-p Environment=ABC=QED \
|
||||
-p DelegateNamespaces=yes \
|
||||
--wait \
|
||||
--pipe \
|
||||
grep MARKER=1 /etc/os-release
|
||||
|
||||
rm -rf /tmp/TEST-07-PID1-delegate-namespaces-root
|
||||
}
|
||||
Reference in New Issue
Block a user