From 16498617443da94533ef9ae28be0ffaace40c526 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 9 Mar 2018 14:49:15 +0100 Subject: [PATCH 1/2] core: fall back to bind-mounts for PrivateDevices= execution environments In environments where CAP_MKNOD is not available or inside user namespaces it is still desirable to enable services to use PrivateDevices= . So fall back to using bind-mounts on EPERM. --- src/core/namespace.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/core/namespace.c b/src/core/namespace.c index 0e9c7b8fb4..9195037a36 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -578,7 +578,7 @@ static void drop_outside_root(const char *root_directory, MountEntry *m, unsigne *n = t - m; } -static int clone_device_node(const char *d, const char *temporary_mount) { +static int clone_device_node(const char *d, const char *temporary_mount, bool *make_devnode) { const char *dn; struct stat st; int r; @@ -598,11 +598,35 @@ static int clone_device_node(const char *d, const char *temporary_mount) { dn = strjoina(temporary_mount, d); - mac_selinux_create_file_prepare(d, st.st_mode); - r = mknod(dn, st.st_mode, st.st_rdev); + if (*make_devnode) { + mac_selinux_create_file_prepare(d, st.st_mode); + r = mknod(dn, st.st_mode, st.st_rdev); + mac_selinux_create_file_clear(); + + if (r == 0) + return 1; + if (errno != EPERM) + return log_debug_errno(errno, "mknod failed for %s: %m", d); + + *make_devnode = false; + } + + /* We're about to fallback to bind-mounting the device + * node. So create a dummy bind-mount target. */ + mac_selinux_create_file_prepare(d, 0); + r = mknod(dn, S_IFREG, 0); mac_selinux_create_file_clear(); - if (r < 0) - return log_debug_errno(errno, "mknod failed for %s: %m", d); + + if (r < 0 && errno != EEXIST) + return log_debug_errno(errno, "mknod fallback failed for %s: %m", d); + + /* Fallback to bind-mounting: + * The assumption here is that all used device nodes carry standard + * properties. Specifically, the devices nodes we bind-mount should + * either be owned by root:root or root:tty (e.g. /dev/tty, /dev/ptmx) + * and should not carry ACLs. */ + if (mount(d, dn, NULL, MS_BIND, NULL) < 0) + return log_debug_errno(errno, "mount failed for %s: %m", d); return 1; } @@ -618,6 +642,7 @@ static int mount_private_dev(MountEntry *m) { char temporary_mount[] = "/tmp/namespace-dev-XXXXXX"; const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL; + bool can_mknod = true; _cleanup_umask_ mode_t u; int r; @@ -658,7 +683,7 @@ static int mount_private_dev(MountEntry *m) { goto fail; } } else { - r = clone_device_node("/dev/ptmx", temporary_mount); + r = clone_device_node("/dev/ptmx", temporary_mount, &can_mknod); if (r < 0) goto fail; if (r == 0) { @@ -687,7 +712,7 @@ static int mount_private_dev(MountEntry *m) { (void) symlink("/run/systemd/journal/dev-log", devlog); NULSTR_FOREACH(d, devnodes) { - r = clone_device_node(d, temporary_mount); + r = clone_device_node(d, temporary_mount, &can_mknod); if (r < 0) goto fail; } From af984e137e7f53ca3e2fd885b03a25e17fdd0fad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 12 Apr 2018 17:48:22 +0200 Subject: [PATCH 2/2] core/namespace: rework the return semantics of clone_device_node yet again Returning 0 on not-found/wrong-type is confusing. Let's return -ENXIO in that case instead, and explicitly ignore it in the call site where we want to do that. I think this is clearer and less likely to be used errenously in case another call site is added. C.f. 152c475f95c7b9b20d293 and 98b1d2b8d9ea27087a598. --- src/core/namespace.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/core/namespace.c b/src/core/namespace.c index 9195037a36..9ed63390bb 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -585,7 +585,7 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m if (stat(d, &st) < 0) { if (errno == ENOENT) - return 0; + return -ENXIO; return -errno; } @@ -594,7 +594,7 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m return -EINVAL; if (st.st_rdev == 0) - return 0; + return -ENXIO; dn = strjoina(temporary_mount, d); @@ -604,7 +604,7 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m mac_selinux_create_file_clear(); if (r == 0) - return 1; + return 0; if (errno != EPERM) return log_debug_errno(errno, "mknod failed for %s: %m", d); @@ -628,7 +628,7 @@ static int clone_device_node(const char *d, const char *temporary_mount, bool *m if (mount(d, dn, NULL, MS_BIND, NULL) < 0) return log_debug_errno(errno, "mount failed for %s: %m", d); - return 1; + return 0; } static int mount_private_dev(MountEntry *m) { @@ -686,10 +686,6 @@ static int mount_private_dev(MountEntry *m) { r = clone_device_node("/dev/ptmx", temporary_mount, &can_mknod); if (r < 0) goto fail; - if (r == 0) { - r = -ENXIO; - goto fail; - } } devshm = strjoina(temporary_mount, "/dev/shm"); @@ -713,7 +709,8 @@ static int mount_private_dev(MountEntry *m) { NULSTR_FOREACH(d, devnodes) { r = clone_device_node(d, temporary_mount, &can_mknod); - if (r < 0) + /* ENXIO means the the *source* is not a device file, skip creation in that case */ + if (r < 0 && r != -ENXIO) goto fail; }