Files
systemd/src/basic/util.c
Lennart Poettering d9ab2bcf05 util: when determining the amount of memory on this system, take cgroup limit into account
When determining the amount of RAM in the system, let's make sure we also read
the root-level cgroup memory limit into account. This isn't particularly useful
on the host, but in containers it makes sure that whatever memory the container
got assigned is actually used for RAM size calculations.
2016-06-14 19:50:38 +02:00

836 lines
24 KiB
C

/***
This file is part of systemd.
Copyright 2010 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <alloca.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/statfs.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <unistd.h>
#include "alloc-util.h"
#include "build.h"
#include "cgroup-util.h"
#include "def.h"
#include "dirent-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "formats-util.h"
#include "hashmap.h"
#include "hostname-util.h"
#include "log.h"
#include "macro.h"
#include "missing.h"
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
#include "set.h"
#include "signal-util.h"
#include "stat-util.h"
#include "string-util.h"
#include "strv.h"
#include "time-util.h"
#include "umask-util.h"
#include "user-util.h"
#include "util.h"
/* Put this test here for a lack of better place */
assert_cc(EAGAIN == EWOULDBLOCK);
int saved_argc = 0;
char **saved_argv = NULL;
static int saved_in_initrd = -1;
size_t page_size(void) {
static thread_local size_t pgsz = 0;
long r;
if (_likely_(pgsz > 0))
return pgsz;
r = sysconf(_SC_PAGESIZE);
assert(r > 0);
pgsz = (size_t) r;
return pgsz;
}
static int do_execute(char **directories, usec_t timeout, char *argv[]) {
_cleanup_hashmap_free_free_ Hashmap *pids = NULL;
_cleanup_set_free_free_ Set *seen = NULL;
char **directory;
/* We fork this all off from a child process so that we can
* somewhat cleanly make use of SIGALRM to set a time limit */
(void) reset_all_signal_handlers();
(void) reset_signal_mask();
assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
pids = hashmap_new(NULL);
if (!pids)
return log_oom();
seen = set_new(&string_hash_ops);
if (!seen)
return log_oom();
STRV_FOREACH(directory, directories) {
_cleanup_closedir_ DIR *d;
struct dirent *de;
d = opendir(*directory);
if (!d) {
if (errno == ENOENT)
continue;
return log_error_errno(errno, "Failed to open directory %s: %m", *directory);
}
FOREACH_DIRENT(de, d, break) {
_cleanup_free_ char *path = NULL;
pid_t pid;
int r;
if (!dirent_is_file(de))
continue;
if (set_contains(seen, de->d_name)) {
log_debug("%1$s/%2$s skipped (%2$s was already seen).", *directory, de->d_name);
continue;
}
r = set_put_strdup(seen, de->d_name);
if (r < 0)
return log_oom();
path = strjoin(*directory, "/", de->d_name, NULL);
if (!path)
return log_oom();
if (null_or_empty_path(path)) {
log_debug("%s is empty (a mask).", path);
continue;
}
pid = fork();
if (pid < 0) {
log_error_errno(errno, "Failed to fork: %m");
continue;
} else if (pid == 0) {
char *_argv[2];
assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
if (!argv) {
_argv[0] = path;
_argv[1] = NULL;
argv = _argv;
} else
argv[0] = path;
execv(path, argv);
return log_error_errno(errno, "Failed to execute %s: %m", path);
}
log_debug("Spawned %s as " PID_FMT ".", path, pid);
r = hashmap_put(pids, PID_TO_PTR(pid), path);
if (r < 0)
return log_oom();
path = NULL;
}
}
/* Abort execution of this process after the timout. We simply
* rely on SIGALRM as default action terminating the process,
* and turn on alarm(). */
if (timeout != USEC_INFINITY)
alarm((timeout + USEC_PER_SEC - 1) / USEC_PER_SEC);
while (!hashmap_isempty(pids)) {
_cleanup_free_ char *path = NULL;
pid_t pid;
pid = PTR_TO_PID(hashmap_first_key(pids));
assert(pid > 0);
path = hashmap_remove(pids, PID_TO_PTR(pid));
assert(path);
wait_for_terminate_and_warn(path, pid, true);
}
return 0;
}
void execute_directories(const char* const* directories, usec_t timeout, char *argv[]) {
pid_t executor_pid;
int r;
char *name;
char **dirs = (char**) directories;
assert(!strv_isempty(dirs));
name = basename(dirs[0]);
assert(!isempty(name));
/* Executes all binaries in the directories in parallel and waits
* for them to finish. Optionally a timeout is applied. If a file
* with the same name exists in more than one directory, the
* earliest one wins. */
executor_pid = fork();
if (executor_pid < 0) {
log_error_errno(errno, "Failed to fork: %m");
return;
} else if (executor_pid == 0) {
r = do_execute(dirs, timeout, argv);
_exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
}
wait_for_terminate_and_warn(name, executor_pid, true);
}
bool plymouth_running(void) {
return access("/run/plymouth/pid", F_OK) >= 0;
}
bool display_is_local(const char *display) {
assert(display);
return
display[0] == ':' &&
display[1] >= '0' &&
display[1] <= '9';
}
int socket_from_display(const char *display, char **path) {
size_t k;
char *f, *c;
assert(display);
assert(path);
if (!display_is_local(display))
return -EINVAL;
k = strspn(display+1, "0123456789");
f = new(char, strlen("/tmp/.X11-unix/X") + k + 1);
if (!f)
return -ENOMEM;
c = stpcpy(f, "/tmp/.X11-unix/X");
memcpy(c, display+1, k);
c[k] = 0;
*path = f;
return 0;
}
int block_get_whole_disk(dev_t d, dev_t *ret) {
char *p, *s;
int r;
unsigned n, m;
assert(ret);
/* If it has a queue this is good enough for us */
if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0)
return -ENOMEM;
r = access(p, F_OK);
free(p);
if (r >= 0) {
*ret = d;
return 0;
}
/* If it is a partition find the originating device */
if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0)
return -ENOMEM;
r = access(p, F_OK);
free(p);
if (r < 0)
return -ENOENT;
/* Get parent dev_t */
if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0)
return -ENOMEM;
r = read_one_line_file(p, &s);
free(p);
if (r < 0)
return r;
r = sscanf(s, "%u:%u", &m, &n);
free(s);
if (r != 2)
return -EINVAL;
/* Only return this if it is really good enough for us. */
if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0)
return -ENOMEM;
r = access(p, F_OK);
free(p);
if (r >= 0) {
*ret = makedev(m, n);
return 0;
}
return -ENOENT;
}
bool kexec_loaded(void) {
bool loaded = false;
char *s;
if (read_one_line_file("/sys/kernel/kexec_loaded", &s) >= 0) {
if (s[0] == '1')
loaded = true;
free(s);
}
return loaded;
}
int prot_from_flags(int flags) {
switch (flags & O_ACCMODE) {
case O_RDONLY:
return PROT_READ;
case O_WRONLY:
return PROT_WRITE;
case O_RDWR:
return PROT_READ|PROT_WRITE;
default:
return -EINVAL;
}
}
int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
bool stdout_is_tty, stderr_is_tty;
pid_t parent_pid, agent_pid;
sigset_t ss, saved_ss;
unsigned n, i;
va_list ap;
char **l;
assert(pid);
assert(path);
/* Spawns a temporary TTY agent, making sure it goes away when
* we go away */
parent_pid = getpid();
/* First we temporarily block all signals, so that the new
* child has them blocked initially. This way, we can be sure
* that SIGTERMs are not lost we might send to the agent. */
assert_se(sigfillset(&ss) >= 0);
assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0);
agent_pid = fork();
if (agent_pid < 0) {
assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
return -errno;
}
if (agent_pid != 0) {
assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
*pid = agent_pid;
return 0;
}
/* In the child:
*
* Make sure the agent goes away when the parent dies */
if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
_exit(EXIT_FAILURE);
/* Make sure we actually can kill the agent, if we need to, in
* case somebody invoked us from a shell script that trapped
* SIGTERM or so... */
(void) reset_all_signal_handlers();
(void) reset_signal_mask();
/* Check whether our parent died before we were able
* to set the death signal and unblock the signals */
if (getppid() != parent_pid)
_exit(EXIT_SUCCESS);
/* Don't leak fds to the agent */
close_all_fds(except, n_except);
stdout_is_tty = isatty(STDOUT_FILENO);
stderr_is_tty = isatty(STDERR_FILENO);
if (!stdout_is_tty || !stderr_is_tty) {
int fd;
/* Detach from stdout/stderr. and reopen
* /dev/tty for them. This is important to
* ensure that when systemctl is started via
* popen() or a similar call that expects to
* read EOF we actually do generate EOF and
* not delay this indefinitely by because we
* keep an unused copy of stdin around. */
fd = open("/dev/tty", O_WRONLY);
if (fd < 0) {
log_error_errno(errno, "Failed to open /dev/tty: %m");
_exit(EXIT_FAILURE);
}
if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
_exit(EXIT_FAILURE);
}
if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
_exit(EXIT_FAILURE);
}
if (fd > STDERR_FILENO)
close(fd);
}
/* Count arguments */
va_start(ap, path);
for (n = 0; va_arg(ap, char*); n++)
;
va_end(ap);
/* Allocate strv */
l = alloca(sizeof(char *) * (n + 1));
/* Fill in arguments */
va_start(ap, path);
for (i = 0; i <= n; i++)
l[i] = va_arg(ap, char*);
va_end(ap);
execv(path, l);
_exit(EXIT_FAILURE);
}
bool in_initrd(void) {
struct statfs s;
if (saved_in_initrd >= 0)
return saved_in_initrd;
/* We make two checks here:
*
* 1. the flag file /etc/initrd-release must exist
* 2. the root file system must be a memory file system
*
* The second check is extra paranoia, since misdetecting an
* initrd can have bad bad consequences due the initrd
* emptying when transititioning to the main systemd.
*/
saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
statfs("/", &s) >= 0 &&
is_temporary_fs(&s);
return saved_in_initrd;
}
void in_initrd_force(bool value) {
saved_in_initrd = value;
}
/* hey glibc, APIs with callbacks without a user pointer are so useless */
void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
int (*compar) (const void *, const void *, void *), void *arg) {
size_t l, u, idx;
const void *p;
int comparison;
l = 0;
u = nmemb;
while (l < u) {
idx = (l + u) / 2;
p = (void *)(((const char *) base) + (idx * size));
comparison = compar(key, p, arg);
if (comparison < 0)
u = idx;
else if (comparison > 0)
l = idx + 1;
else
return (void *)p;
}
return NULL;
}
int on_ac_power(void) {
bool found_offline = false, found_online = false;
_cleanup_closedir_ DIR *d = NULL;
d = opendir("/sys/class/power_supply");
if (!d)
return errno == ENOENT ? true : -errno;
for (;;) {
struct dirent *de;
_cleanup_close_ int fd = -1, device = -1;
char contents[6];
ssize_t n;
errno = 0;
de = readdir(d);
if (!de && errno > 0)
return -errno;
if (!de)
break;
if (hidden_or_backup_file(de->d_name))
continue;
device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
if (device < 0) {
if (errno == ENOENT || errno == ENOTDIR)
continue;
return -errno;
}
fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
if (fd < 0) {
if (errno == ENOENT)
continue;
return -errno;
}
n = read(fd, contents, sizeof(contents));
if (n < 0)
return -errno;
if (n != 6 || memcmp(contents, "Mains\n", 6))
continue;
safe_close(fd);
fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
if (fd < 0) {
if (errno == ENOENT)
continue;
return -errno;
}
n = read(fd, contents, sizeof(contents));
if (n < 0)
return -errno;
if (n != 2 || contents[1] != '\n')
return -EIO;
if (contents[0] == '1') {
found_online = true;
break;
} else if (contents[0] == '0')
found_offline = true;
else
return -EIO;
}
return found_online || !found_offline;
}
bool id128_is_valid(const char *s) {
size_t i, l;
l = strlen(s);
if (l == 32) {
/* Simple formatted 128bit hex string */
for (i = 0; i < l; i++) {
char c = s[i];
if (!(c >= '0' && c <= '9') &&
!(c >= 'a' && c <= 'z') &&
!(c >= 'A' && c <= 'Z'))
return false;
}
} else if (l == 36) {
/* Formatted UUID */
for (i = 0; i < l; i++) {
char c = s[i];
if ((i == 8 || i == 13 || i == 18 || i == 23)) {
if (c != '-')
return false;
} else {
if (!(c >= '0' && c <= '9') &&
!(c >= 'a' && c <= 'z') &&
!(c >= 'A' && c <= 'Z'))
return false;
}
}
} else
return false;
return true;
}
int container_get_leader(const char *machine, pid_t *pid) {
_cleanup_free_ char *s = NULL, *class = NULL;
const char *p;
pid_t leader;
int r;
assert(machine);
assert(pid);
if (!machine_name_is_valid(machine))
return -EINVAL;
p = strjoina("/run/systemd/machines/", machine);
r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
if (r == -ENOENT)
return -EHOSTDOWN;
if (r < 0)
return r;
if (!s)
return -EIO;
if (!streq_ptr(class, "container"))
return -EIO;
r = parse_pid(s, &leader);
if (r < 0)
return r;
if (leader <= 1)
return -EIO;
*pid = leader;
return 0;
}
int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
_cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
int rfd = -1;
assert(pid >= 0);
if (mntns_fd) {
const char *mntns;
mntns = procfs_file_alloca(pid, "ns/mnt");
mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (mntnsfd < 0)
return -errno;
}
if (pidns_fd) {
const char *pidns;
pidns = procfs_file_alloca(pid, "ns/pid");
pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (pidnsfd < 0)
return -errno;
}
if (netns_fd) {
const char *netns;
netns = procfs_file_alloca(pid, "ns/net");
netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (netnsfd < 0)
return -errno;
}
if (userns_fd) {
const char *userns;
userns = procfs_file_alloca(pid, "ns/user");
usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (usernsfd < 0 && errno != ENOENT)
return -errno;
}
if (root_fd) {
const char *root;
root = procfs_file_alloca(pid, "root");
rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
if (rfd < 0)
return -errno;
}
if (pidns_fd)
*pidns_fd = pidnsfd;
if (mntns_fd)
*mntns_fd = mntnsfd;
if (netns_fd)
*netns_fd = netnsfd;
if (userns_fd)
*userns_fd = usernsfd;
if (root_fd)
*root_fd = rfd;
pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
return 0;
}
int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
if (userns_fd >= 0) {
/* Can't setns to your own userns, since then you could
* escalate from non-root to root in your own namespace, so
* check if namespaces equal before attempting to enter. */
_cleanup_free_ char *userns_fd_path = NULL;
int r;
if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
return -ENOMEM;
r = files_same(userns_fd_path, "/proc/self/ns/user");
if (r < 0)
return r;
if (r)
userns_fd = -1;
}
if (pidns_fd >= 0)
if (setns(pidns_fd, CLONE_NEWPID) < 0)
return -errno;
if (mntns_fd >= 0)
if (setns(mntns_fd, CLONE_NEWNS) < 0)
return -errno;
if (netns_fd >= 0)
if (setns(netns_fd, CLONE_NEWNET) < 0)
return -errno;
if (userns_fd >= 0)
if (setns(userns_fd, CLONE_NEWUSER) < 0)
return -errno;
if (root_fd >= 0) {
if (fchdir(root_fd) < 0)
return -errno;
if (chroot(".") < 0)
return -errno;
}
return reset_uid_gid();
}
uint64_t physical_memory(void) {
_cleanup_free_ char *root = NULL, *value = NULL;
uint64_t mem, lim;
size_t ps;
long sc;
/* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
* memory.
*
* In order to support containers nicely that have a configured memory limit we'll take the minimum of the
* physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
sc = sysconf(_SC_PHYS_PAGES);
assert(sc > 0);
ps = page_size();
mem = (uint64_t) sc * (uint64_t) ps;
if (cg_get_root_path(&root) < 0)
return mem;
if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
return mem;
if (safe_atou64(value, &lim) < 0)
return mem;
/* Make sure the limit is a multiple of our own page size */
lim /= ps;
lim *= ps;
return MIN(mem, lim);
}
int update_reboot_parameter_and_warn(const char *param) {
int r;
if (isempty(param)) {
if (unlink("/run/systemd/reboot-param") < 0) {
if (errno == ENOENT)
return 0;
return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
}
return 0;
}
RUN_WITH_UMASK(0022) {
r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
if (r < 0)
return log_warning_errno(r, "Failed to write reboot parameter file: %m");
}
return 0;
}
int version(void) {
puts(PACKAGE_STRING "\n"
SYSTEMD_FEATURES);
return 0;
}