diff --git a/man/systemd-run.xml b/man/systemd-run.xml
index 7d4b7011c5..d18b80faa8 100644
--- a/man/systemd-run.xml
+++ b/man/systemd-run.xml
@@ -291,6 +291,32 @@
+
+
+
+ Runs the service process with the specified root directory. Also see
+ RootDirectory= in
+ systemd.exec5.
+
+ Note that the path is looked up inside the file system namespace that systemd-run is running
+ in, which might be different that the file system namespace the manager process is running in. Use
+ the RootDirectory= property directly if you want the path to be looked up in the
+ manager process's file system namespace.
+
+
+
+
+
+
+
+
+
+ Similar to , but uses the root directory of the
+ systemd-run process as the root directory to execute the service in.
+
+
+
+
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index b3c3c32cea..2ff6272bd4 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -799,6 +799,9 @@ static int bus_service_set_transient_property(
return 1;
}
+ if (streq(name, "RootDirectoryFileDescriptor"))
+ return bus_set_transient_exec_context_fd(u, &s->root_directory_fd, &s->exec_context.root_directory_as_fd, message, flags, error);
+
return 0;
}
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
index 14e15cb8a6..92d6223beb 100644
--- a/src/core/exec-invoke.c
+++ b/src/core/exec-invoke.c
@@ -3470,7 +3470,7 @@ static bool insist_on_sandboxing(
if (context->n_temporary_filesystems > 0)
return true;
- if (root_dir || root_image)
+ if (root_dir || root_image || context->root_directory_as_fd)
return true;
if (context->n_mount_images > 0)
@@ -3506,6 +3506,7 @@ static int setup_ephemeral(
int r;
assert(context);
+ assert(!context->root_directory_as_fd);
assert(runtime);
assert(root_image);
assert(root_directory);
@@ -3645,6 +3646,7 @@ static int pick_versions(
int r;
assert(context);
+ assert(!context->root_directory_as_fd);
assert(params);
assert(ret_root_image);
assert(ret_root_directory);
@@ -3733,7 +3735,7 @@ static int apply_mount_namespace(
CLEANUP_ARRAY(bind_mounts, n_bind_mounts, bind_mount_free_many);
- if (params->flags & EXEC_APPLY_CHROOT) {
+ if (params->flags & EXEC_APPLY_CHROOT && !context->root_directory_as_fd) {
r = pick_versions(
context,
params,
@@ -3855,6 +3857,7 @@ static int apply_mount_namespace(
.root_directory = root_dir,
.root_image = root_image,
+ .root_directory_fd = params->flags & EXEC_APPLY_CHROOT ? params->root_directory_fd : -EBADF,
.root_image_options = context->root_image_options,
.root_image_policy = context->root_image_policy ?: &image_policy_service,
@@ -4495,6 +4498,7 @@ static bool exec_needs_cap_sys_admin(const ExecContext *context, const ExecParam
context->n_bind_mounts > 0 ||
context->n_temporary_filesystems > 0 ||
context->root_directory ||
+ context->root_directory_as_fd ||
!strv_isempty(context->extension_directories) ||
context->root_image ||
context->n_mount_images > 0 ||
@@ -5136,6 +5140,12 @@ int exec_invoke(
}
#endif
+ r = add_shifted_fd(&keep_fds, &n_keep_fds, ¶ms->root_directory_fd);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_error_errno(r, "Failed to collect shifted fd: %m");
+ }
+
r = close_remaining_fds(params, runtime, socket_fd, keep_fds, n_keep_fds);
if (r < 0) {
*exit_status = EXIT_FDS;
diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c
index 8033ecdb8e..575d34ff24 100644
--- a/src/core/execute-serialize.c
+++ b/src/core/execute-serialize.c
@@ -1177,6 +1177,10 @@ static int exec_parameters_serialize(const ExecParameters *p, const ExecContext
if (r < 0)
return r;
+ r = serialize_fd(f, fds, "exec-parameters-root-directory-fd", p->root_directory_fd);
+ if (r < 0)
+ return r;
+
r = serialize_fd(f, fds, "exec-parameters-exec-fd", p->exec_fd);
if (r < 0)
return r;
@@ -1422,6 +1426,16 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
continue;
close_and_replace(p->stderr_fd, fd);
+
+ } else if ((val = startswith(l, "exec-parameters-root-directory-fd="))) {
+ int fd;
+
+ fd = deserialize_fd(fds, val);
+ if (fd < 0)
+ continue;
+
+ close_and_replace(p->root_directory_fd, fd);
+
} else if ((val = startswith(l, "exec-parameters-exec-fd="))) {
int fd;
@@ -1994,6 +2008,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
if (r < 0)
return r;
+ r = serialize_bool_elide(f, "exec-context-root-directory-as-fd", c->root_directory_as_fd);
+ if (r < 0)
+ return r;
+
switch (c->std_input) {
case EXEC_INPUT_NAMED_FD:
r = serialize_item(f, "exec-context-std-input-fd-name", c->stdio_fdname[STDIN_FILENO]);
@@ -3000,6 +3018,11 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
if (r < 0)
return r;
c->stdio_as_fds = r;
+ } else if ((val = startswith(l, "exec-context-root-directory-as-fd="))) {
+ r = parse_boolean(val);
+ if (r < 0)
+ return r;
+ c->root_directory_as_fd = r;
} else if ((val = startswith(l, "exec-context-std-input-fd-name="))) {
r = free_and_strdup(&c->stdio_fdname[STDIN_FILENO], val);
if (r < 0)
diff --git a/src/core/execute.c b/src/core/execute.c
index 5b01758733..5d4c26934d 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -267,6 +267,9 @@ bool exec_needs_mount_namespace(
if (context->root_image)
return true;
+ if (context->root_directory_as_fd)
+ return true;
+
if (!strv_isempty(context->read_write_paths) ||
!strv_isempty(context->read_only_paths) ||
!strv_isempty(context->inaccessible_paths) ||
@@ -354,7 +357,7 @@ const char* exec_get_private_notify_socket_path(const ExecContext *context, cons
if (!needs_sandboxing)
return NULL;
- if (!context->root_directory && !context->root_image)
+ if (!context->root_directory && !context->root_image && !context->root_directory_as_fd)
return NULL;
if (!exec_context_get_effective_mount_apivfs(context))
@@ -2045,9 +2048,9 @@ bool exec_context_restrict_filesystems_set(const ExecContext *c) {
bool exec_context_with_rootfs(const ExecContext *c) {
assert(c);
- /* Checks if RootDirectory= or RootImage= are used */
+ /* Checks if RootDirectory=, RootImage= or RootDirectoryFileDescriptor= are used */
- return !empty_or_root(c->root_directory) || c->root_image;
+ return !empty_or_root(c->root_directory) || c->root_image || c->root_directory_as_fd;
}
int exec_context_has_vpicked_extensions(const ExecContext *context) {
@@ -2846,6 +2849,7 @@ void exec_params_deep_clear(ExecParameters *p) {
p->stdin_fd = safe_close(p->stdin_fd);
p->stdout_fd = safe_close(p->stdout_fd);
p->stderr_fd = safe_close(p->stderr_fd);
+ p->root_directory_fd = safe_close(p->root_directory_fd);
p->notify_socket = mfree(p->notify_socket);
diff --git a/src/core/execute.h b/src/core/execute.h
index 2c79a37d54..1ce78af6af 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -218,6 +218,7 @@ typedef struct ExecContext {
/* At least one of stdin/stdout/stderr was initialized from an fd passed in. This boolean survives
* the fds being closed. This only makes sense for transient units. */
bool stdio_as_fds;
+ bool root_directory_as_fd;
char *stdio_fdname[3];
char *stdio_file[3];
@@ -418,6 +419,7 @@ typedef struct ExecParameters {
int stdin_fd;
int stdout_fd;
int stderr_fd;
+ int root_directory_fd;
/* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done. */
int exec_fd;
@@ -449,6 +451,7 @@ typedef struct ExecParameters {
.stdin_fd = -EBADF, \
.stdout_fd = -EBADF, \
.stderr_fd = -EBADF, \
+ .root_directory_fd = -EBADF, \
.exec_fd = -EBADF, \
.bpf_restrict_fs_map_fd = -EBADF, \
.user_lookup_fd = -EBADF, \
diff --git a/src/core/fuzz-execute-serialize.c b/src/core/fuzz-execute-serialize.c
index 1e72918a39..8b8267f3c7 100644
--- a/src/core/fuzz-execute-serialize.c
+++ b/src/core/fuzz-execute-serialize.c
@@ -58,6 +58,7 @@ static void exec_fuzz_one(FILE *f, FDSet *fdset) {
params.stdin_fd = -EBADF;
params.stdout_fd = -EBADF;
params.stderr_fd = -EBADF;
+ params.root_directory_fd = -EBADF;
params.exec_fd = -EBADF;
params.user_lookup_fd = -EBADF;
params.bpf_restrict_fs_map_fd = -EBADF;
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 6cf4f8b0a1..0e12a16592 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -1359,7 +1359,7 @@ static int mount_private_dev(const MountEntry *m, const NamespaceParameters *p)
/* We assume /run/systemd/journal/ is available if not changing root, which isn't entirely accurate
* but shouldn't matter, as either way the user would get ENOENT when accessing /dev/log */
- if ((!p->root_image && !p->root_directory) || p->bind_log_sockets) {
+ if ((!p->root_image && !p->root_directory && p->root_directory_fd < 0) || p->bind_log_sockets) {
const char *devlog = strjoina(temporary_mount, "/dev/log");
if (symlink("/run/systemd/journal/dev-log", devlog) < 0)
log_debug_errno(errno,
@@ -2948,7 +2948,18 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) {
if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
return log_debug_errno(errno, "Failed to remount '/' as SLAVE: %m");
- if (p->root_image) {
+ if (p->root_directory_fd >= 0) {
+
+ if (move_mount(p->root_directory_fd, "", AT_FDCWD, root, MOVE_MOUNT_F_EMPTY_PATH) < 0)
+ return log_debug_errno(errno, "Failed to move detached mount to '%s': %m", root);
+
+ /* We just remounted / as slave, but that didn't affect the detached mount that we just
+ * mounted, so remount that one as slave recursive as well now. */
+
+ if (mount(NULL, root, NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ return log_debug_errno(errno, "Failed to remount '%s' as SLAVE: %m", root);
+
+ } else if (p->root_image) {
/* A root image is specified, mount it to the right place */
r = dissected_image_mount(
dissected_image,
@@ -2992,7 +3003,7 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) {
}
/* Try to set up the new root directory before mounting anything else there. */
- if (p->root_image || p->root_directory)
+ if (p->root_image || p->root_directory || p->root_directory_fd >= 0)
(void) base_filesystem_create(root, UID_INVALID, GID_INVALID);
/* Now make the magic happen */
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 66a88ae3c1..86c09ec231 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -128,6 +128,7 @@ typedef struct MountImage {
typedef struct NamespaceParameters {
RuntimeScope runtime_scope;
+ int root_directory_fd;
const char *root_directory;
const char *root_image;
const MountOptions *root_image_options;
diff --git a/src/core/service.c b/src/core/service.c
index 3d7861f377..f45d0c4801 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -165,6 +165,7 @@ static void service_init(Unit *u) {
s->type = _SERVICE_TYPE_INVALID;
s->socket_fd = -EBADF;
s->stdin_fd = s->stdout_fd = s->stderr_fd = -EBADF;
+ s->root_directory_fd = -EBADF;
s->guess_main_pid = true;
s->main_pid = PIDREF_NULL;
s->control_pid = PIDREF_NULL;
@@ -542,6 +543,7 @@ static void service_done(Unit *u) {
service_release_stdio_fd(s);
service_release_fd_store(s);
service_release_extra_fds(s);
+ s->root_directory_fd = asynchronous_close(s->root_directory_fd);
s->mount_request = sd_bus_message_unref(s->mount_request);
}
@@ -1108,6 +1110,9 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
f,
prefix);
+ if (s->root_directory_fd >= 0)
+ (void) service_dump_fd(s->root_directory_fd, "Root Directory File Descriptor", "", f, prefix);
+
if (s->open_files)
LIST_FOREACH(open_files, of, s->open_files) {
_cleanup_free_ char *ofs = NULL;
@@ -1925,6 +1930,7 @@ static int service_spawn_internal(
exec_params.stdin_fd = s->stdin_fd;
exec_params.stdout_fd = s->stdout_fd;
exec_params.stderr_fd = s->stderr_fd;
+ exec_params.root_directory_fd = s->root_directory_fd;
r = exec_spawn(UNIT(s),
c,
@@ -2834,6 +2840,7 @@ static void service_enter_refresh_extensions(Service *s) {
.n_extension_images = s->exec_context.n_extension_images,
.extension_directories = s->exec_context.extension_directories,
.extension_image_policy = s->exec_context.extension_image_policy,
+ .root_directory_fd = -EBADF,
};
/* Only reload confext, and not sysext as they also typically contain the executable(s) used
@@ -3226,13 +3233,19 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
r = serialize_fd(f, fds, "stdin-fd", s->stdin_fd);
if (r < 0)
return r;
+
r = serialize_fd(f, fds, "stdout-fd", s->stdout_fd);
if (r < 0)
return r;
+
r = serialize_fd(f, fds, "stderr-fd", s->stderr_fd);
if (r < 0)
return r;
+ r = serialize_fd(f, fds, "root-directory-fd", s->root_directory_fd);
+ if (r < 0)
+ return r;
+
if (s->exec_fd_event_source) {
r = serialize_fd(f, fds, "exec-fd", sd_event_source_get_io_fd(s->exec_fd_event_source));
if (r < 0)
@@ -3637,6 +3650,13 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
if (s->stderr_fd >= 0)
s->exec_context.stdio_as_fds = true;
+ } else if (streq(key, "root-directory-fd")) {
+
+ asynchronous_close(s->root_directory_fd);
+ s->root_directory_fd = deserialize_fd(fds, value);
+ if (s->root_directory_fd >= 0)
+ s->exec_context.root_directory_as_fd = true;
+
} else if (streq(key, "exec-fd")) {
_cleanup_close_ int fd = -EBADF;
@@ -5589,6 +5609,7 @@ static void service_release_resources(Unit *u) {
service_release_socket_fd(s);
service_release_stdio_fd(s);
service_release_extra_fds(s);
+ s->root_directory_fd = asynchronous_close(s->root_directory_fd);
if (s->fd_store_preserve_mode != EXEC_PRESERVE_YES)
service_release_fd_store(s);
@@ -5622,7 +5643,10 @@ int service_determine_exec_selinux_label(Service *s, char **ret) {
return -ENODATA;
_cleanup_free_ char *path = NULL;
- r = chase(c->path, s->exec_context.root_directory, CHASE_PREFIX_ROOT|CHASE_TRIGGER_AUTOFS, &path, NULL);
+ if (s->exec_context.root_directory_as_fd)
+ r = chaseat(s->root_directory_fd, c->path, CHASE_AT_RESOLVE_IN_ROOT|CHASE_TRIGGER_AUTOFS, &path, NULL);
+ else
+ r = chase(c->path, s->exec_context.root_directory, CHASE_PREFIX_ROOT|CHASE_TRIGGER_AUTOFS, &path, NULL);
if (r < 0) {
log_unit_debug_errno(UNIT(s), r, "Failed to resolve service binary '%s', ignoring.", c->path);
return -ENODATA;
diff --git a/src/core/service.h b/src/core/service.h
index c81b5b7637..b69f3008de 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -224,6 +224,9 @@ typedef struct Service {
int stdout_fd;
int stderr_fd;
+ /* File descriptor received from RootDirectoryFileDescriptor= */
+ int root_directory_fd;
+
/* If service spawned from transient unit, extra file descriptors can be passed via dbus API */
ServiceExtraFD *extra_fds;
size_t n_extra_fds;
diff --git a/src/run/run.c b/src/run/run.c
index b5030b9cb7..47757d19d0 100644
--- a/src/run/run.c
+++ b/src/run/run.c
@@ -4,6 +4,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -106,6 +107,7 @@ static bool arg_quiet = false;
static bool arg_verbose = false;
static bool arg_aggressive_gc = false;
static char *arg_working_directory = NULL;
+static char *arg_root_directory = NULL;
static bool arg_shell = false;
static JobMode arg_job_mode = JOB_FAIL;
static char **arg_cmdline = NULL;
@@ -168,6 +170,8 @@ static int help(void) {
" --nice=NICE Nice level\n"
" --working-directory=PATH Set working directory\n"
" -d --same-dir Inherit working directory from caller\n"
+ " --root-directory=PATH Set root directory\n"
+ " -R --same-root-dir Inherit root directory from caller\n"
" -E --setenv=NAME[=VALUE] Set environment variable\n"
" -t --pty Run service on pseudo TTY as STDIN/STDOUT/\n"
" STDERR\n"
@@ -326,6 +330,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_NO_ASK_PASSWORD,
ARG_WAIT,
ARG_WORKING_DIRECTORY,
+ ARG_ROOT_DIRECTORY,
ARG_SHELL,
ARG_JOB_MODE,
ARG_IGNORE_FAILURE,
@@ -379,6 +384,8 @@ static int parse_argv(int argc, char *argv[]) {
{ "collect", no_argument, NULL, 'G' },
{ "working-directory", required_argument, NULL, ARG_WORKING_DIRECTORY },
{ "same-dir", no_argument, NULL, 'd' },
+ { "root-directory", required_argument, NULL, ARG_ROOT_DIRECTORY },
+ { "same-root-dir", no_argument, NULL, 'R' },
{ "shell", no_argument, NULL, 'S' },
{ "job-mode", required_argument, NULL, ARG_JOB_MODE },
{ "ignore-failure", no_argument, NULL, ARG_IGNORE_FAILURE },
@@ -388,7 +395,7 @@ static int parse_argv(int argc, char *argv[]) {
{},
};
- bool with_trigger = false;
+ bool with_trigger = false, same_dir = false;
int r, c;
assert(argc >= 0);
@@ -653,6 +660,7 @@ static int parse_argv(int argc, char *argv[]) {
if (r < 0)
return r;
+ same_dir = false;
break;
case 'd': {
@@ -666,9 +674,25 @@ static int parse_argv(int argc, char *argv[]) {
arg_working_directory = mfree(arg_working_directory);
else
free_and_replace(arg_working_directory, p);
+
+ same_dir = true;
break;
}
+ case ARG_ROOT_DIRECTORY:
+ r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_root_directory);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case 'R':
+ r = free_and_strdup_warn(&arg_root_directory, "/");
+ if (r < 0)
+ return r;
+
+ break;
+
case 'G':
arg_aggressive_gc = true;
break;
@@ -842,6 +866,10 @@ static int parse_argv(int argc, char *argv[]) {
"--wait may not be combined with --scope.");
}
+ if (same_dir && arg_root_directory && !path_equal(arg_root_directory, "/"))
+ return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
+ "--same-dir cannot be used with a root directory other than '/'");
+
return 1;
}
@@ -1406,6 +1434,16 @@ static int transient_service_set_properties(sd_bus_message *m, const char *pty_p
return bus_log_create_error(r);
}
+ if (arg_root_directory) {
+ _cleanup_close_ int fd = open_tree(AT_FDCWD, arg_root_directory, OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_RECURSIVE);
+ if (fd < 0)
+ return log_error_errno(errno, "Failed to clone mount tree at '%s': %m", arg_root_directory);
+
+ r = sd_bus_message_append(m, "(sv)", "RootDirectoryFileDescriptor", "h", fd);
+ if (r < 0)
+ return bus_log_create_error(r);
+ }
+
if (pty_path) {
r = sd_bus_message_append(m, "(sv)(sv)(sv)(sv)",
"TTYPath", "s", pty_path,
diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c
index 7b66fb472d..c8b4b181ba 100644
--- a/src/test/test-namespace.c
+++ b/src/test/test-namespace.c
@@ -201,6 +201,7 @@ TEST(protect_kernel_logs) {
static const NamespaceParameters p = {
.runtime_scope = RUNTIME_SCOPE_SYSTEM,
.protect_kernel_logs = true,
+ .root_directory_fd = -EBADF,
};
pid_t pid;
diff --git a/src/test/test-ns.c b/src/test/test-ns.c
index 245bf34582..c6d6f2e423 100644
--- a/src/test/test-ns.c
+++ b/src/test/test-ns.c
@@ -81,6 +81,7 @@ int main(int argc, char *argv[]) {
.runtime_scope = RUNTIME_SCOPE_SYSTEM,
.root_directory = root_directory,
+ .root_directory_fd = -EBADF,
.read_write_paths = (char**) writable,
.read_only_paths = (char**) readonly,
diff --git a/test/units/TEST-50-DISSECT.dissect.sh b/test/units/TEST-50-DISSECT.dissect.sh
index 7daf7236ee..f6e5c07bc9 100755
--- a/test/units/TEST-50-DISSECT.dissect.sh
+++ b/test/units/TEST-50-DISSECT.dissect.sh
@@ -890,6 +890,9 @@ systemctl stop test-root-ephemeral
timeout 10 bash -c 'until test -z "$(ls -A /var/lib/systemd/ephemeral-trees)"; do sleep .5; done'
test ! -f /tmp/img/abc
+# Test RootDirectoryFileDescriptor=
+systemd-run --wait --pipe --root-directory=/tmp/img -- grep -q 'MARKER=1' /usr/lib/os-release
+
systemd-dissect --mtree /tmp/img >/dev/null
systemd-dissect --list /tmp/img >/dev/null