From 4d2b9abbe11131d25aea4966a9c25a06703c6183 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Sun, 20 Apr 2025 12:14:23 +0900 Subject: [PATCH 1/8] core: replace cgroup_bpf_supported() with dlopen_bpf_full() After 3988e2489aaf30034e09918890f688780c154af7, the function is a simple wrapper of bpf_dlopen() with logging. Let's introduce dlopen_bpf_full() that takes log level, and replace cgroup_bpf_supported() with it. --- src/core/bpf-restrict-fs.c | 3 +-- src/core/bpf-restrict-ifaces.c | 3 +-- src/core/bpf-socket-bind.c | 3 +-- src/core/bpf-util.c | 24 ------------------------ src/core/bpf-util.h | 5 ----- src/core/meson.build | 6 ------ src/shared/bpf-dlopen.c | 23 ++++++++++++++--------- src/shared/bpf-dlopen.h | 8 +++++++- 8 files changed, 24 insertions(+), 51 deletions(-) delete mode 100644 src/core/bpf-util.c delete mode 100644 src/core/bpf-util.h diff --git a/src/core/bpf-restrict-fs.c b/src/core/bpf-restrict-fs.c index 3f2ea2b9e6..4e16371739 100644 --- a/src/core/bpf-restrict-fs.c +++ b/src/core/bpf-restrict-fs.c @@ -27,7 +27,6 @@ /* libbpf, clang and llc compile time dependencies are satisfied */ #include "bpf-dlopen.h" #include "bpf-link.h" -#include "bpf-util.h" #include "bpf/restrict_fs/restrict-fs-skel.h" #define CGROUP_HASH_SIZE_MAX 2048 @@ -102,7 +101,7 @@ bool bpf_restrict_fs_supported(bool initialize) { if (!initialize) return false; - if (!cgroup_bpf_supported()) + if (dlopen_bpf_full(LOG_WARNING) < 0) return (supported = false); r = lsm_supported("bpf"); diff --git a/src/core/bpf-restrict-ifaces.c b/src/core/bpf-restrict-ifaces.c index a162efbcdc..e5f9a3c53e 100644 --- a/src/core/bpf-restrict-ifaces.c +++ b/src/core/bpf-restrict-ifaces.c @@ -10,7 +10,6 @@ #include "bpf-dlopen.h" #include "bpf-link.h" -#include "bpf-util.h" #include "bpf/restrict_ifaces/restrict-ifaces-skel.h" static struct restrict_ifaces_bpf *restrict_ifaces_bpf_free(struct restrict_ifaces_bpf *obj) { @@ -81,7 +80,7 @@ int bpf_restrict_ifaces_supported(void) { if (supported >= 0) return supported; - if (!cgroup_bpf_supported()) + if (dlopen_bpf_full(LOG_WARNING) < 0) return (supported = false); if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SKB, /*opts=*/NULL)) { diff --git a/src/core/bpf-socket-bind.c b/src/core/bpf-socket-bind.c index c751cde8ba..374e684f04 100644 --- a/src/core/bpf-socket-bind.c +++ b/src/core/bpf-socket-bind.c @@ -12,7 +12,6 @@ /* libbpf, clang, llvm and bpftool compile time dependencies are satisfied */ #include "bpf-dlopen.h" #include "bpf-link.h" -#include "bpf-util.h" #include "bpf/socket_bind/socket-bind-api.bpf.h" #include "bpf/socket_bind/socket-bind-skel.h" @@ -127,7 +126,7 @@ int bpf_socket_bind_supported(void) { _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL; int r; - if (!cgroup_bpf_supported()) + if (dlopen_bpf_full(LOG_WARNING) < 0) return false; if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, /*opts=*/NULL)) { diff --git a/src/core/bpf-util.c b/src/core/bpf-util.c deleted file mode 100644 index 2c07a402cb..0000000000 --- a/src/core/bpf-util.c +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1-or-later */ - -#include "bpf-dlopen.h" -#include "bpf-util.h" -#include "cgroup-util.h" -#include "initrd-util.h" -#include "log.h" - -bool cgroup_bpf_supported(void) { - static int supported = -1; - int r; - - if (supported >= 0) - return supported; - - r = dlopen_bpf(); - if (r < 0) { - log_full_errno(in_initrd() ? LOG_DEBUG : LOG_INFO, - r, "Failed to open libbpf, cgroup BPF features disabled: %m"); - return (supported = false); - } - - return (supported = true); -} diff --git a/src/core/bpf-util.h b/src/core/bpf-util.h deleted file mode 100644 index a6c55cd7e5..0000000000 --- a/src/core/bpf-util.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1-or-later */ - -#include - -bool cgroup_bpf_supported(void); diff --git a/src/core/meson.build b/src/core/meson.build index 7f40b026c1..0a135171fc 100644 --- a/src/core/meson.build +++ b/src/core/meson.build @@ -65,12 +65,6 @@ libcore_sources = files( 'varlink.c', ) -if conf.get('BPF_FRAMEWORK') == 1 - libcore_sources += files( - 'bpf-util.c', - ) -endif - subdir('bpf/socket_bind') subdir('bpf/restrict_fs') subdir('bpf/restrict_ifaces') diff --git a/src/shared/bpf-dlopen.c b/src/shared/bpf-dlopen.c index c5abd2ef3b..debde5c734 100644 --- a/src/shared/bpf-dlopen.c +++ b/src/shared/bpf-dlopen.c @@ -72,10 +72,14 @@ static int bpf_print_func(enum libbpf_print_level level, const char *fmt, va_lis return log_internalv(LOG_DEBUG, errno, NULL, 0, NULL, fmt, ap); } -int dlopen_bpf(void) { +int dlopen_bpf_full(int log_level) { + static int cached = 0; void *dl; int r; + if (cached != 0) + return cached; + ELF_NOTE_DLOPEN("bpf", "Support firewalling and sandboxing with BPF", ELF_NOTE_DLOPEN_PRIORITY_SUGGESTED, @@ -91,8 +95,9 @@ int dlopen_bpf(void) { * list for both files, and when we assume 1.0+ is present we can remove this dlopen */ dl = dlopen("libbpf.so.0", RTLD_NOW|RTLD_NODELETE); if (!dl) - return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), - "neither libbpf.so.1 nor libbpf.so.0 are installed: %s", dlerror()); + return cached = log_full_errno(log_level, SYNTHETIC_ERRNO(EOPNOTSUPP), + "Neither libbpf.so.1 nor libbpf.so.0 are installed, cgroup BPF features disabled: %s", + dlerror()); log_debug("Loaded 'libbpf.so.0' via dlopen()"); @@ -129,7 +134,7 @@ int dlopen_bpf(void) { ); } if (r < 0) - return r; + return cached = log_full_errno(log_level, r, "Failed to load libbpf symbols, cgroup BPF features disabled: %m"); r = dlsym_many_or_warn( dl, LOG_DEBUG, @@ -171,14 +176,14 @@ int dlopen_bpf(void) { DLSYM_ARG(ring_buffer__new), DLSYM_ARG(ring_buffer__poll)); if (r < 0) - return r; + return cached = log_full_errno(log_level, r, "Failed to load libbpf symbols, cgroup BPF features disabled: %m"); /* We set the print helper unconditionally. Otherwise libbpf will emit not useful log messages. */ (void) sym_libbpf_set_print(bpf_print_func); REENABLE_WARNING; - return r; + return cached = true; } int bpf_get_error_translated(const void *ptr) { @@ -200,8 +205,8 @@ int bpf_get_error_translated(const void *ptr) { #else -int dlopen_bpf(void) { - return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), - "libbpf support is not compiled in."); +int dlopen_bpf_full(int log_level) { + return log_once_errno(log_level, SYNTHETIC_ERRNO(EOPNOTSUPP), + "libbpf support is not compiled in, cgroup BPF features disabled."); } #endif diff --git a/src/shared/bpf-dlopen.h b/src/shared/bpf-dlopen.h index 04cba7c3b1..bb78312363 100644 --- a/src/shared/bpf-dlopen.h +++ b/src/shared/bpf-dlopen.h @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #pragma once +#include +#include + #if HAVE_LIBBPF #include @@ -48,4 +51,7 @@ int bpf_get_error_translated(const void *ptr); #endif -int dlopen_bpf(void); +int dlopen_bpf_full(int log_level); +static inline int dlopen_bpf(void) { + return dlopen_bpf_full(LOG_DEBUG); +} From fb9991962d38a54decd75269ebe5d52677c1792a Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 30 Apr 2025 05:13:26 +0900 Subject: [PATCH 2/8] core/bpf: drop unnecessary check for probing program type BPF_PROG_TYPE_CGROUP_SKB is supported since kernel v4.10 (0e33661de493db325435d565a4a722120ae4cbf3) and BPF_PROG_TYPE_CGROUP_SOCK_ADDR is supported since kernel v4.17 (4fbac77d2d092b475dda9eea66da674369665427). Our baseline on the kernel is v5.4. The check is not necessary. --- src/core/bpf-restrict-ifaces.c | 5 ----- src/core/bpf-socket-bind.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/src/core/bpf-restrict-ifaces.c b/src/core/bpf-restrict-ifaces.c index e5f9a3c53e..e97682a19c 100644 --- a/src/core/bpf-restrict-ifaces.c +++ b/src/core/bpf-restrict-ifaces.c @@ -83,11 +83,6 @@ int bpf_restrict_ifaces_supported(void) { if (dlopen_bpf_full(LOG_WARNING) < 0) return (supported = false); - if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SKB, /*opts=*/NULL)) { - log_debug("restrict-interfaces: BPF program type cgroup_skb is not supported"); - return (supported = false); - } - r = prepare_restrict_ifaces_bpf(NULL, true, NULL, &obj); if (r < 0) { log_debug_errno(r, "restrict-interfaces: Failed to load BPF object: %m"); diff --git a/src/core/bpf-socket-bind.c b/src/core/bpf-socket-bind.c index 374e684f04..ce5f5c2f10 100644 --- a/src/core/bpf-socket-bind.c +++ b/src/core/bpf-socket-bind.c @@ -129,11 +129,6 @@ int bpf_socket_bind_supported(void) { if (dlopen_bpf_full(LOG_WARNING) < 0) return false; - if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, /*opts=*/NULL)) { - log_debug("bpf-socket-bind: BPF program type cgroup_sock_addr is not supported"); - return false; - } - r = prepare_socket_bind_bpf(/*unit=*/NULL, /*allow_rules=*/NULL, /*deny_rules=*/NULL, &obj); if (r < 0) { log_debug_errno(r, "bpf-socket-bind: socket bind filtering is not supported: %m"); From df80d728bef287a308d94f8db056848decae4c89 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 30 Apr 2025 05:20:06 +0900 Subject: [PATCH 3/8] bpf-compat: drop unused compat_libbpf_probe_bpf_prog_type() --- src/shared/bpf-compat.h | 9 --------- src/shared/bpf-dlopen.c | 10 ++-------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/shared/bpf-compat.h b/src/shared/bpf-compat.h index 27857e9d72..7c2c6a2db2 100644 --- a/src/shared/bpf-compat.h +++ b/src/shared/bpf-compat.h @@ -26,12 +26,10 @@ struct bpf_map_create_opts; * - before the compat static inline helpers that use them. * When removing this file move these back to bpf-dlopen.h */ extern int (*sym_bpf_map_create)(enum bpf_map_type, const char *, __u32, __u32, __u32, const struct bpf_map_create_opts *); -extern int (*sym_libbpf_probe_bpf_prog_type)(enum bpf_prog_type, const void *); extern struct bpf_map* (*sym_bpf_object__next_map)(const struct bpf_object *obj, const struct bpf_map *map); /* compat symbols removed in libbpf 1.0 */ extern int (*sym_bpf_create_map)(enum bpf_map_type, int key_size, int value_size, int max_entries, __u32 map_flags); -extern bool (*sym_bpf_probe_prog_type)(enum bpf_prog_type, __u32); /* helpers to use the available variant behind new API */ static inline int compat_bpf_map_create(enum bpf_map_type map_type, @@ -47,10 +45,3 @@ static inline int compat_bpf_map_create(enum bpf_map_type map_type, return sym_bpf_create_map(map_type, key_size, value_size, max_entries, 0 /* opts->map_flags, but opts is always NULL for us so skip build dependency on the type */); } - -static inline int compat_libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) { - if (sym_libbpf_probe_bpf_prog_type) - return sym_libbpf_probe_bpf_prog_type(prog_type, opts); - - return sym_bpf_probe_prog_type(prog_type, 0); -} diff --git a/src/shared/bpf-dlopen.c b/src/shared/bpf-dlopen.c index debde5c734..5a3b0b6cb3 100644 --- a/src/shared/bpf-dlopen.c +++ b/src/shared/bpf-dlopen.c @@ -51,12 +51,10 @@ DLSYM_PROTOTYPE(ring_buffer__poll) = NULL; /* new symbols available from libbpf 0.7.0 */ int (*sym_bpf_map_create)(enum bpf_map_type, const char *, __u32, __u32, __u32, const struct bpf_map_create_opts *); -int (*sym_libbpf_probe_bpf_prog_type)(enum bpf_prog_type, const void *); struct bpf_map* (*sym_bpf_object__next_map)(const struct bpf_object *obj, const struct bpf_map *map); /* compat symbols removed in libbpf 1.0 */ int (*sym_bpf_create_map)(enum bpf_map_type, int key_size, int value_size, int max_entries, __u32 map_flags); -bool (*sym_bpf_probe_prog_type)(enum bpf_prog_type, __u32); _printf_(2,0) static int bpf_print_func(enum libbpf_print_level level, const char *fmt, va_list ap) { @@ -106,11 +104,9 @@ int dlopen_bpf_full(int log_level) { dl, LOG_DEBUG, #if MODERN_LIBBPF /* Don't exist anymore in new libbpf, hence cannot type check them */ - DLSYM_ARG_FORCE(bpf_create_map), - DLSYM_ARG_FORCE(bpf_probe_prog_type) + DLSYM_ARG_FORCE(bpf_create_map) #else - DLSYM_ARG(bpf_create_map), - DLSYM_ARG(bpf_probe_prog_type) + DLSYM_ARG(bpf_create_map) #endif ); @@ -123,12 +119,10 @@ int dlopen_bpf_full(int log_level) { dl, LOG_DEBUG, #if MODERN_LIBBPF DLSYM_ARG(bpf_map_create), - DLSYM_ARG(libbpf_probe_bpf_prog_type), DLSYM_ARG(bpf_object__next_map) #else /* These symbols did not exist in old libbpf, hence we cannot type check them */ DLSYM_ARG_FORCE(bpf_map_create), - DLSYM_ARG_FORCE(libbpf_probe_bpf_prog_type), DLSYM_ARG_FORCE(bpf_object__next_map) #endif ); From 71be1f3875fc1b4a2a16779ab35f27ac5a02e212 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Thu, 17 Apr 2025 02:05:09 +0900 Subject: [PATCH 4/8] bpf-program: introduce bpf_program_supported() helper function It checks if the kernel is built with CONFIG_CGROUP_BPF. It is currently unused, but will be used later. --- src/shared/bpf-program.c | 47 ++++++++++++++++++++++++++++++++++++++++ src/shared/bpf-program.h | 2 ++ 2 files changed, 49 insertions(+) diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c index 96ce5e9006..83af54ec68 100644 --- a/src/shared/bpf-program.c +++ b/src/shared/bpf-program.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include +#include #include #include #include @@ -42,6 +43,52 @@ DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int); DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_free); +int bpf_program_supported(void) { + static int cached = 0; + + if (cached != 0) + return cached; + + /* Currently, we only use the following three types: + * - BPF_PROG_TYPE_CGROUP_SKB, supported since kernel v4.10 (0e33661de493db325435d565a4a722120ae4cbf3), + * - BPF_PROG_TYPE_CGROUP_DEVICE, supported since kernel v4.15 (ebc614f687369f9df99828572b1d85a7c2de3d92), + * - BPF_PROG_TYPE_CGROUP_SOCK_ADDR, supported since kernel v4.17 (4fbac77d2d092b475dda9eea66da674369665427). + * Hence, as our baseline on the kernel is v5.4, it is not necessary to check if we can create BPF + * programs of hthese types. + * + * However, unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB (maybe also other types) + * programs even when CONFIG_CGROUP_BPF is turned off at kernel compilation time. This sucks of course: + * why does it allow us to create a cgroup BPF program if we can't do a thing with it later? + * + * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if + * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the + * parameters are validated however, and that'll fail with EBADF then. + * + * The check seems also important when we are running with sanitizers. With sanitizers (at least with + * LLVM v20), the following check and other bpf() calls fails even if the kernel supports BPF. To + * avoid unexpected fail when running with sanitizers, let's explicitly check if bpf() syscall works. */ + + /* Clang and GCC (>=15) do not 0-pad with structured initialization, causing the kernel to reject the + * bpf_attr as invalid. See: https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65 + * Hence, we cannot use structured initialization here, and need to clear the structure with zero + * explicitly before use. */ + union bpf_attr attr; + zero(attr); + attr.attach_type = BPF_CGROUP_INET_EGRESS; /* since kernel v4.10 (0e33661de493db325435d565a4a722120ae4cbf3) */ + attr.target_fd = -EBADF; + attr.attach_bpf_fd = -EBADF; + + if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) { + if (errno == EBADF) /* YAY! */ + return cached = true; + + return cached = log_debug_errno(errno, "Didn't get EBADF from invalid BPF_PROG_DETACH call: %m"); + } + + return cached = log_debug_errno(SYNTHETIC_ERRNO(EBADE), + "Wut? Kernel accepted our invalid BPF_PROG_DETACH call? Something is weird, assuming BPF is broken and hence not supported."); +} + BPFProgram *bpf_program_free(BPFProgram *p) { if (!p) return NULL; diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h index 904f2b941f..e820ed8196 100644 --- a/src/shared/bpf-program.h +++ b/src/shared/bpf-program.h @@ -33,6 +33,8 @@ struct BPFProgram { uint32_t attached_flags; }; +int bpf_program_supported(void); + int bpf_program_new(uint32_t prog_type, const char *prog_name, BPFProgram **ret); int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret); BPFProgram *bpf_program_free(BPFProgram *p); From 22e2f0642897cfa7ba975527f5394bd7fcdf639b Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Thu, 17 Apr 2025 03:10:38 +0900 Subject: [PATCH 5/8] core/cgroup: foreign bpf programs needs to pass bpf_program_supported() As CONFIG_CGROUP_BPF may be disabled on the kernel or we are running on sanitizers. See comments in bpf_program_supported(). Follow-up for 3fcb98cbff0a5be8bf7c5deda6c1f7e8a31699bd. --- src/core/cgroup.c | 5 +++-- src/test/test-bpf-foreign-programs.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 2a74380a28..d58820df83 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3273,8 +3273,9 @@ static int cg_bpf_mask_supported(CGroupMask *ret) { if (r > 0) mask |= CGROUP_MASK_BPF_DEVICES; - /* BPF pinned prog (always supported by cgroup v2) */ - mask |= CGROUP_MASK_BPF_FOREIGN; + /* BPF pinned prog */ + if (bpf_program_supported() > 0) + mask |= CGROUP_MASK_BPF_FOREIGN; /* BPF-based bind{4|6} hooks */ r = bpf_socket_bind_supported(); diff --git a/src/test/test-bpf-foreign-programs.c b/src/test/test-bpf-foreign-programs.c index 658746afa0..3128b26b8e 100644 --- a/src/test/test-bpf-foreign-programs.c +++ b/src/test/test-bpf-foreign-programs.c @@ -279,8 +279,9 @@ int main(int argc, char *argv[]) { if (detect_container() > 0) return log_tests_skipped("test-bpf fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666"); - if (getuid() != 0) - return log_tests_skipped("not running as root"); + r = bpf_program_supported(); + if (r < 0) + return log_tests_skipped_errno(r, "not running as root"); ASSERT_OK(getrlimit(RLIMIT_MEMLOCK, &rl)); rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE); From ec3c5cfac74e8361a3b0153cc9e8cfdbbcbde0c6 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Thu, 17 Apr 2025 02:39:08 +0900 Subject: [PATCH 6/8] core/bpf-firewall: replace bpf_firewall_supported() with bpf_program_supported() Note, BPF_PROG_TYPE_CGROUP_SKB is supported since kernel v4.10, and BPF_F_ALLOW_MULTI and program name is supported since kernel v4.15. As our baseline on the kernel is v5.4, we can assume that the type, flag, and naming is supported when bpf_program_supported() succeeds. --- src/core/bpf-firewall.c | 192 +++++------------------------------ src/core/bpf-firewall.h | 8 -- src/core/cgroup.c | 14 +-- src/core/dbus-cgroup.c | 17 ++-- src/core/load-fragment.c | 11 +- src/core/socket.c | 24 ++--- src/test/test-bpf-firewall.c | 70 +++++-------- 7 files changed, 71 insertions(+), 265 deletions(-) diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index a1e557c420..428c0c9781 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -542,7 +542,7 @@ int bpf_firewall_compile(Unit *u) { bool ip_allow_any = false, ip_deny_any = false; CGroupContext *cc; CGroupRuntime *crt; - int r, supported; + int r; assert(u); @@ -554,27 +554,12 @@ int bpf_firewall_compile(Unit *u) { if (!crt) return -ENOMEM; - supported = bpf_firewall_supported(); - if (supported < 0) - return supported; - if (supported == BPF_FIREWALL_UNSUPPORTED) + if (bpf_program_supported() <= 0) return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-firewall: BPF firewalling not supported, proceeding without."); - if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) - /* If BPF_F_ALLOW_MULTI is not supported we don't support any BPF magic on inner nodes (i.e. on slice - * units), since that would mean leaf nodes couldn't do any BPF anymore at all. Under the assumption - * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is - * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at - * all, either. */ - return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), - "bpf-firewall: BPF_F_ALLOW_MULTI is not supported, not doing BPF firewall on slice units."); - /* If BPF_F_ALLOW_MULTI flag is supported program name is also supported (both were added to v4.15 - * kernel). */ - if (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI) { - ingress_name = "sd_fw_ingress"; - egress_name = "sd_fw_egress"; - } + ingress_name = "sd_fw_ingress"; + egress_name = "sd_fw_egress"; /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves, * but we reuse the accounting maps. That way the firewall in effect always maps to the actual @@ -646,7 +631,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set int bpf_firewall_load_custom(Unit *u) { CGroupContext *cc; CGroupRuntime *crt; - int r, supported; + int r; assert(u); @@ -660,13 +645,9 @@ int bpf_firewall_load_custom(Unit *u) { if (!(cc->ip_filters_ingress || cc->ip_filters_egress)) return 0; - supported = bpf_firewall_supported(); - if (supported < 0) - return supported; - - if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI) + if (bpf_program_supported() <= 0) return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), - "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs."); + "bpf-firewall: BPF firewalling not supported, cannot attach custom BPF programs."); r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &crt->ip_bpf_custom_ingress); if (r < 0) @@ -702,8 +683,7 @@ int bpf_firewall_install(Unit *u) { _cleanup_free_ char *path = NULL; CGroupContext *cc; CGroupRuntime *crt; - int r, supported; - uint32_t flags; + int r; assert(u); @@ -718,43 +698,23 @@ int bpf_firewall_install(Unit *u) { if (!crt->cgroup_realized) return -EINVAL; - supported = bpf_firewall_supported(); - if (supported < 0) - return supported; - if (supported == BPF_FIREWALL_UNSUPPORTED) + if (bpf_program_supported() <= 0) return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-firewall: BPF firewalling not supported, proceeding without."); - if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE) - return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), - "bpf-firewall: BPF_F_ALLOW_MULTI not supported, not doing BPF firewall on slice units."); - if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && - (!set_isempty(crt->ip_bpf_custom_ingress) || !set_isempty(crt->ip_bpf_custom_egress))) - return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), - "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs."); r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &path); if (r < 0) return log_unit_error_errno(u, r, "bpf-firewall: Failed to determine cgroup path: %m"); - flags = supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI ? BPF_F_ALLOW_MULTI : 0; - - if (FLAGS_SET(flags, BPF_F_ALLOW_MULTI)) { - /* If we have BPF_F_ALLOW_MULTI, then let's clear the fields, but destroy the programs only - * after attaching the new programs, so that there's no time window where neither program is - * attached. (There will be a program where both are attached, but that's OK, since this is a - * security feature where we rather want to lock down too much than too little */ - ip_bpf_egress_uninstall = TAKE_PTR(crt->ip_bpf_egress_installed); - ip_bpf_ingress_uninstall = TAKE_PTR(crt->ip_bpf_ingress_installed); - } else { - /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly - * detach them) right before attaching the new program, to minimize the time window when we - * don't account for IP traffic. */ - crt->ip_bpf_egress_installed = bpf_program_free(crt->ip_bpf_egress_installed); - crt->ip_bpf_ingress_installed = bpf_program_free(crt->ip_bpf_ingress_installed); - } + /* Let's clear the fields, but destroy the programs only after attaching the new programs, so that + * there's no time window where neither program is attached. (There will be a program where both are + * attached, but that's OK, since this is a security feature where we rather want to lock down too + * much than too little. */ + ip_bpf_egress_uninstall = TAKE_PTR(crt->ip_bpf_egress_installed); + ip_bpf_ingress_uninstall = TAKE_PTR(crt->ip_bpf_ingress_installed); if (crt->ip_bpf_egress) { - r = bpf_program_cgroup_attach(crt->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags); + r = bpf_program_cgroup_attach(crt->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, BPF_F_ALLOW_MULTI); if (r < 0) return log_unit_error_errno(u, r, "bpf-firewall: Attaching egress BPF program to cgroup %s failed: %m", path); @@ -764,7 +724,7 @@ int bpf_firewall_install(Unit *u) { } if (crt->ip_bpf_ingress) { - r = bpf_program_cgroup_attach(crt->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags); + r = bpf_program_cgroup_attach(crt->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, BPF_F_ALLOW_MULTI); if (r < 0) return log_unit_error_errno(u, r, "bpf-firewall: Attaching ingress BPF program to cgroup %s failed: %m", path); @@ -830,118 +790,9 @@ int bpf_firewall_reset_accounting(int map_fd) { return bpf_map_update_element(map_fd, &key, &value); } -static int bpf_firewall_unsupported_reason = 0; - -int bpf_firewall_supported(void) { - const struct bpf_insn trivial[] = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN() - }; - - _cleanup_(bpf_program_freep) BPFProgram *program = NULL; - static int supported = -1; - union bpf_attr attr; - int r; - - /* Checks whether BPF firewalling is supported. For this, we check the following things: - * - * - the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require - * - the BPF implementation in the kernel supports the BPF_PROG_DETACH call, which we require - */ - if (supported >= 0) - return supported; - - /* prog_name is NULL since it is supported only starting from v4.15 kernel. */ - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, NULL, &program); - if (r < 0) { - bpf_firewall_unsupported_reason = - log_debug_errno(r, "bpf-firewall: Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m"); - return supported = BPF_FIREWALL_UNSUPPORTED; - } - - r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial)); - if (r < 0) { - bpf_firewall_unsupported_reason = - log_debug_errno(r, "bpf-firewall: Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m"); - return supported = BPF_FIREWALL_UNSUPPORTED; - } - - r = bpf_program_load_kernel(program, NULL, 0); - if (r < 0) { - bpf_firewall_unsupported_reason = - log_debug_errno(r, "bpf-firewall: Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m"); - return supported = BPF_FIREWALL_UNSUPPORTED; - } - - /* Unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB programs even when CONFIG_CGROUP_BPF - * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF - * program if we can't do a thing with it later? - * - * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if - * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the - * parameters are validated however, and that'll fail with EBADF then. */ - - // FIXME: Clang doesn't 0-pad with structured initialization, causing - // the kernel to reject the bpf_attr as invalid. See: - // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65 - // Ideally it should behave like GCC, so that we can remove these workarounds. - zero(attr); - attr.attach_type = BPF_CGROUP_INET_EGRESS; - attr.target_fd = -EBADF; - attr.attach_bpf_fd = -EBADF; - - if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) { - if (errno != EBADF) { - bpf_firewall_unsupported_reason = - log_debug_errno(errno, "bpf-firewall: Didn't get EBADF from BPF_PROG_DETACH, BPF firewalling is not supported: %m"); - return supported = BPF_FIREWALL_UNSUPPORTED; - } - - /* YAY! */ - } else { - bpf_firewall_unsupported_reason = - log_debug_errno(SYNTHETIC_ERRNO(EBADE), - "bpf-firewall: Wut? Kernel accepted our invalid BPF_PROG_DETACH call? " - "Something is weird, assuming BPF firewalling is broken and hence not supported."); - return supported = BPF_FIREWALL_UNSUPPORTED; - } - - /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported - * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH - * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll - * get EINVAL if it's not supported, and EBADF as before if it is available. - * Use probe result as the indicator that program name is also supported since they both were - * added in kernel 4.15. */ - - zero(attr); - attr.attach_type = BPF_CGROUP_INET_EGRESS; - attr.target_fd = -EBADF; - attr.attach_bpf_fd = -EBADF; - attr.attach_flags = BPF_F_ALLOW_MULTI; - - if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) { - if (errno == EBADF) { - log_debug_errno(errno, "bpf-firewall: Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!"); - return supported = BPF_FIREWALL_SUPPORTED_WITH_MULTI; - } - - if (errno == EINVAL) - log_debug_errno(errno, "bpf-firewall: Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported."); - else - log_debug_errno(errno, "bpf-firewall: Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m"); - - return supported = BPF_FIREWALL_SUPPORTED; - } else { - bpf_firewall_unsupported_reason = - log_debug_errno(SYNTHETIC_ERRNO(EBADE), - "bpf-firewall: Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? " - "Something is weird, assuming BPF firewalling is broken and hence not supported."); - return supported = BPF_FIREWALL_UNSUPPORTED; - } -} - void emit_bpf_firewall_warning(Unit *u) { static bool warned = false; + int r; assert(u); assert(u->manager); @@ -949,9 +800,12 @@ void emit_bpf_firewall_warning(Unit *u) { if (warned || MANAGER_IS_TEST_RUN(u->manager)) return; - bool quiet = ERRNO_IS_PRIVILEGE(bpf_firewall_unsupported_reason) && detect_container() > 0; + r = bpf_program_supported(); + assert(r < 0); - log_unit_full_errno(u, quiet ? LOG_DEBUG : LOG_WARNING, bpf_firewall_unsupported_reason, + bool quiet = ERRNO_IS_NEG_PRIVILEGE(r) && detect_container() > 0; + + log_unit_full_errno(u, quiet ? LOG_DEBUG : LOG_WARNING, r, "unit configures an IP firewall, but %s.\n" "(This warning is only shown for the first unit using IP firewalling.)", getuid() != 0 ? "not running as root" : diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h index f7b72d0ea1..27cbdf857b 100644 --- a/src/core/bpf-firewall.h +++ b/src/core/bpf-firewall.h @@ -6,14 +6,6 @@ #include "cgroup.h" #include "unit.h" -enum { - BPF_FIREWALL_UNSUPPORTED = 0, - BPF_FIREWALL_SUPPORTED = 1, - BPF_FIREWALL_SUPPORTED_WITH_MULTI = 2, -}; - -int bpf_firewall_supported(void); - int bpf_firewall_compile(Unit *u); int bpf_firewall_install(Unit *u); int bpf_firewall_load_custom(Unit *u); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index d58820df83..896db2d647 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3259,12 +3259,10 @@ static int cg_bpf_mask_supported(CGroupMask *ret) { CGroupMask mask = 0; int r; - /* BPF-based firewall */ - r = bpf_firewall_supported(); - if (r < 0) - return r; - if (r > 0) - mask |= CGROUP_MASK_BPF_FIREWALL; + /* BPF-based firewall and pinned foreign prog */ + if (bpf_program_supported() > 0) + mask |= CGROUP_MASK_BPF_FIREWALL | + CGROUP_MASK_BPF_FOREIGN; /* BPF-based device access control */ r = bpf_devices_supported(); @@ -3273,10 +3271,6 @@ static int cg_bpf_mask_supported(CGroupMask *ret) { if (r > 0) mask |= CGROUP_MASK_BPF_DEVICES; - /* BPF pinned prog */ - if (bpf_program_supported() > 0) - mask |= CGROUP_MASK_BPF_FOREIGN; - /* BPF-based bind{4|6} hooks */ r = bpf_socket_bind_supported(); if (r < 0) diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index e73a3731f3..713bd30265 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -635,18 +635,13 @@ static int bus_cgroup_set_transient_property( unit_write_setting(u, flags, name, buf); - if (*filters) { - r = bpf_firewall_supported(); - if (r < 0) - return r; - if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) { - static bool warned = false; + if (*filters && bpf_program_supported() <= 0) { + static bool warned = false; - log_full(warned ? LOG_DEBUG : LOG_WARNING, - "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with multiple filters.\n" - "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id); - warned = true; - } + log_full(warned ? LOG_DEBUG : LOG_WARNING, + "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with multiple filters.\n" + "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id); + warned = true; } } diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index e718dbc76b..93270de82a 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -5712,15 +5712,12 @@ int config_parse_ip_filter_bpf_progs( if (r < 0) return log_oom(); - r = bpf_firewall_supported(); - if (r < 0) - return r; - if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) { + if (bpf_program_supported() <= 0) { static bool warned = false; - log_full(warned ? LOG_DEBUG : LOG_WARNING, - "File %s:%u configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters.\n" - "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", filename, line, lvalue, rvalue); + log_syntax(unit, warned ? LOG_DEBUG : LOG_WARNING, filename, line, 0, + "Configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters. " + "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", lvalue, rvalue); warned = true; } diff --git a/src/core/socket.c b/src/core/socket.c index 0de430b97e..af67ac3d06 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1477,9 +1477,7 @@ static int socket_address_listen_do( log_unit_error_errno(u, error, fmt, strna(_t)); \ }) -static int fork_needed(const SocketAddress *address, Socket *s) { - int r; - +static bool fork_needed(const SocketAddress *address, Socket *s) { assert(address); assert(s); @@ -1493,13 +1491,9 @@ static int fork_needed(const SocketAddress *address, Socket *s) { if (nft_set->source == NFT_SET_SOURCE_CGROUP) return true; - if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) { - r = bpf_firewall_supported(); - if (r < 0) - return r; - if (r != BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */ - return true; - } + if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6) && + bpf_program_supported() > 0) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */ + return true; return exec_needs_network_namespace(&s->exec_context); } @@ -1521,10 +1515,7 @@ static int socket_address_listen_in_cgroup( * the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and * such. */ - r = fork_needed(address, s); - if (r < 0) - return r; - if (r == 0) { + if (!fork_needed(address, s)) { /* Shortcut things... */ fd = socket_address_listen_do(s, address, label); if (fd < 0) @@ -3030,10 +3021,7 @@ static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) { if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6)) goto shortcut; - r = bpf_firewall_supported(); - if (r < 0) - return r; - if (r == BPF_FIREWALL_UNSUPPORTED) + if (bpf_program_supported() <= 0) goto shortcut; if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0) diff --git a/src/test/test-bpf-firewall.c b/src/test/test-bpf-firewall.c index 10bfa52e26..8ba3a888ec 100644 --- a/src/test/test-bpf-firewall.c +++ b/src/test/test-bpf-firewall.c @@ -31,7 +31,6 @@ int main(int argc, char *argv[]) { struct rlimit rl; int r; union bpf_attr attr; - bool test_custom_filter = false; const char *test_prog = "/sys/fs/bpf/test-dropper"; test_setup_logging(LOG_DEBUG); @@ -39,6 +38,11 @@ int main(int argc, char *argv[]) { if (detect_container() > 0) return log_tests_skipped("test-bpf-firewall fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666"); + r = bpf_program_supported(); + if (r < 0) + return log_tests_skipped_errno(r, "BPF firewalling not supported"); + ASSERT_TRUE(r); + ASSERT_OK(getrlimit(RLIMIT_MEMLOCK, &rl)); rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE); (void) setrlimit(RLIMIT_MEMLOCK, &rl); @@ -65,34 +69,17 @@ int main(int argc, char *argv[]) { r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn)); ASSERT_EQ(r, 0); - r = bpf_firewall_supported(); - if (r == BPF_FIREWALL_UNSUPPORTED) - return log_tests_skipped("BPF firewalling not supported"); - ASSERT_GT(r, 0); - - if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI) { - log_notice("BPF firewalling with BPF_F_ALLOW_MULTI supported. Yay!"); - test_custom_filter = true; - } else - log_notice("BPF firewalling (though without BPF_F_ALLOW_MULTI) supported. Good."); - r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf)); ASSERT_OK(r); - if (test_custom_filter) { - zero(attr); - attr.pathname = PTR_TO_UINT64(test_prog); - attr.bpf_fd = p->kernel_fd; - attr.file_flags = 0; + zero(attr); + attr.pathname = PTR_TO_UINT64(test_prog); + attr.bpf_fd = p->kernel_fd; + attr.file_flags = 0; - (void) unlink(test_prog); + (void) unlink(test_prog); - r = bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); - if (r < 0) { - log_warning_errno(errno, "BPF object pinning failed, will not run custom filter test: %m"); - test_custom_filter = false; - } - } + ASSERT_OK(bpf(BPF_OBJ_PIN, &attr, sizeof(attr))); p = bpf_program_free(p); @@ -192,31 +179,30 @@ int main(int argc, char *argv[]) { assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED || SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS); - if (test_custom_filter) { - assert_se(u = unit_new(m, sizeof(Service))); - assert_se(unit_add_name(u, "custom-filter.service") == 0); - assert_se(cc = unit_get_cgroup_context(u)); - u->perpetual = true; + /* testing custom filter */ + assert_se(u = unit_new(m, sizeof(Service))); + assert_se(unit_add_name(u, "custom-filter.service") == 0); + assert_se(cc = unit_get_cgroup_context(u)); + u->perpetual = true; - cc->ip_accounting = true; + cc->ip_accounting = true; - assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0); - assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0); + assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0); + assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0); - SERVICE(u)->type = SERVICE_ONESHOT; - u->load_state = UNIT_LOADED; + SERVICE(u)->type = SERVICE_ONESHOT; + u->load_state = UNIT_LOADED; - ASSERT_OK(unit_start(u, NULL)); + ASSERT_OK(unit_start(u, NULL)); - while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED)) - assert_se(sd_event_run(m->event, UINT64_MAX) >= 0); + while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED)) + assert_se(sd_event_run(m->event, UINT64_MAX) >= 0); - assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED || - SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS); + assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED || + SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS); - (void) unlink(test_prog); - assert_se(SERVICE(u)->state == SERVICE_DEAD); - } + (void) unlink(test_prog); + assert_se(SERVICE(u)->state == SERVICE_DEAD); return 0; } From ad446c8ceb97c03971f06fd43e97720afe33be5a Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Tue, 8 Apr 2025 17:26:58 +0900 Subject: [PATCH 7/8] core/bpf-devices: use bpf_program_supported() Note, BPF_PROG_TYPE_CGROUP_DEVICE is supported since kernel v4.15. As our baseline on the kernel is v5.4, we can assume the bpf type is always supported. --- src/core/bpf-devices.c | 45 ------------------------------------- src/core/bpf-devices.h | 1 - src/core/cgroup.c | 10 ++------- src/test/test-bpf-devices.c | 10 ++++----- 4 files changed, 7 insertions(+), 59 deletions(-) diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c index adaa929364..4417ab58e9 100644 --- a/src/core/bpf-devices.c +++ b/src/core/bpf-devices.c @@ -252,51 +252,6 @@ int bpf_devices_apply_policy( return 0; } -int bpf_devices_supported(void) { - const struct bpf_insn trivial[] = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN() - }; - - _cleanup_(bpf_program_freep) BPFProgram *program = NULL; - static int supported = -1; - int r; - - /* Checks whether BPF device controller is supported. For this, we check two things: - * - * a) whether we are privileged - * b) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require - */ - - if (supported >= 0) - return supported; - - if (geteuid() != 0) { - log_debug("Not enough privileges, BPF device control is not supported."); - return supported = 0; - } - - r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &program); - if (r < 0) { - log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m"); - return supported = 0; - } - - r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial)); - if (r < 0) { - log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m"); - return supported = 0; - } - - r = bpf_program_load_kernel(program, NULL, 0); - if (r < 0) { - log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m"); - return supported = 0; - } - - return supported = 1; -} - static int allow_list_device_pattern( BPFProgram *prog, const char *path, diff --git a/src/core/bpf-devices.h b/src/core/bpf-devices.h index 5660e1a03a..a1d261548d 100644 --- a/src/core/bpf-devices.h +++ b/src/core/bpf-devices.h @@ -15,7 +15,6 @@ int bpf_devices_apply_policy( const char *cgroup_path, BPFProgram **prog_installed); -int bpf_devices_supported(void); int bpf_devices_allow_list_device(BPFProgram *prog, const char *path, const char *node, CGroupDevicePermissions p); int bpf_devices_allow_list_major(BPFProgram *prog, const char *path, const char *name, char type, CGroupDevicePermissions p); int bpf_devices_allow_list_static(BPFProgram *prog, const char *path); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 896db2d647..dbab38c3c0 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3259,18 +3259,12 @@ static int cg_bpf_mask_supported(CGroupMask *ret) { CGroupMask mask = 0; int r; - /* BPF-based firewall and pinned foreign prog */ + /* BPF-based firewall, device access control, and pinned foreign prog */ if (bpf_program_supported() > 0) mask |= CGROUP_MASK_BPF_FIREWALL | + CGROUP_MASK_BPF_DEVICES | CGROUP_MASK_BPF_FOREIGN; - /* BPF-based device access control */ - r = bpf_devices_supported(); - if (r < 0) - return r; - if (r > 0) - mask |= CGROUP_MASK_BPF_DEVICES; - /* BPF-based bind{4|6} hooks */ r = bpf_socket_bind_supported(); if (r < 0) diff --git a/src/test/test-bpf-devices.c b/src/test/test-bpf-devices.c index 8b0b744892..1642673e75 100644 --- a/src/test/test-bpf-devices.c +++ b/src/test/test-bpf-devices.c @@ -257,6 +257,11 @@ int main(int argc, char *argv[]) { test_setup_logging(LOG_DEBUG); + r = bpf_program_supported(); + if (r < 0) + return log_tests_skipped_errno(r, "BPF device filter not supported"); + ASSERT_TRUE(r); + ASSERT_OK(getrlimit(RLIMIT_MEMLOCK, &rl)); rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE); (void) setrlimit(RLIMIT_MEMLOCK, &rl); @@ -274,11 +279,6 @@ int main(int argc, char *argv[]) { if (r < 0) return log_tests_skipped_errno(r, "Failed to prepare cgroup subtree"); - r = bpf_devices_supported(); - if (r == 0) - return log_tests_skipped("BPF device filter not supported"); - ASSERT_EQ(r, 1); - r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup, NULL, &controller_path); ASSERT_OK(r); From 7ded1bcf049d93f8e0c89b4894d2c0558cafc026 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Thu, 17 Apr 2025 03:03:53 +0900 Subject: [PATCH 8/8] test: allow to allocate test scope even running with unprivileged user --- src/shared/tests.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/shared/tests.c b/src/shared/tests.c index 06f26a2222..03ec3ba15e 100644 --- a/src/shared/tests.c +++ b/src/shared/tests.c @@ -219,7 +219,10 @@ static int allocate_scope(void) { return 0; } - r = sd_bus_default_system(&bus); + if (geteuid() == 0) + r = sd_bus_default_system(&bus); + else + r = sd_bus_default_user(&bus); if (r < 0) return log_error_errno(r, "Failed to connect to system bus: %m");