core/bpf: drop old kernels support (#37151)

2026-04-14 08:25:20 +09:00 · 2025-05-10 03:13:42 +09:00
parent b112a676d5 7ded1bcf04
commit ef12eadfc8
23 changed files with 159 additions and 403 deletions
--- a/src/core/bpf-devices.c
+++ b/src/core/bpf-devices.c
@@ -252,51 +252,6 @@ int bpf_devices_apply_policy(
        return 0;
 }

-int bpf_devices_supported(void) {
-        const struct bpf_insn trivial[] = {
-                BPF_MOV64_IMM(BPF_REG_0, 1),
-                BPF_EXIT_INSN()
-        };
-
-        _cleanup_(bpf_program_freep) BPFProgram *program = NULL;
-        static int supported = -1;
-        int r;
-
-        /* Checks whether BPF device controller is supported. For this, we check two things:
-         *
-         * a) whether we are privileged
-         * b) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
-         */
-
-        if (supported >= 0)
-                return supported;
-
-        if (geteuid() != 0) {
-                log_debug("Not enough privileges, BPF device control is not supported.");
-                return supported = 0;
-        }
-
-        r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &program);
-        if (r < 0) {
-                log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
-                return supported = 0;
-        }
-
-        r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
-        if (r < 0) {
-                log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
-                return supported = 0;
-        }
-
-        r = bpf_program_load_kernel(program, NULL, 0);
-        if (r < 0) {
-                log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
-                return supported = 0;
-        }
-
-        return supported = 1;
-}
-
 static int allow_list_device_pattern(
                BPFProgram *prog,
                const char *path,
--- a/src/core/bpf-devices.h
+++ b/src/core/bpf-devices.h
@@ -15,7 +15,6 @@ int bpf_devices_apply_policy(
                const char *cgroup_path,
                BPFProgram **prog_installed);

-int bpf_devices_supported(void);
 int bpf_devices_allow_list_device(BPFProgram *prog, const char *path, const char *node, CGroupDevicePermissions p);
 int bpf_devices_allow_list_major(BPFProgram *prog, const char *path, const char *name, char type, CGroupDevicePermissions p);
 int bpf_devices_allow_list_static(BPFProgram *prog, const char *path);
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@@ -542,7 +542,7 @@ int bpf_firewall_compile(Unit *u) {
        bool ip_allow_any = false, ip_deny_any = false;
        CGroupContext *cc;
        CGroupRuntime *crt;
-        int r, supported;
+        int r;

        assert(u);

@@ -554,27 +554,12 @@ int bpf_firewall_compile(Unit *u) {
        if (!crt)
                return -ENOMEM;

-        supported = bpf_firewall_supported();
-        if (supported < 0)
-                return supported;
-        if (supported == BPF_FIREWALL_UNSUPPORTED)
+        if (bpf_program_supported() <= 0)
                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                            "bpf-firewall: BPF firewalling not supported, proceeding without.");
-        if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE)
-                /* If BPF_F_ALLOW_MULTI is not supported we don't support any BPF magic on inner nodes (i.e. on slice
-                 * units), since that would mean leaf nodes couldn't do any BPF anymore at all. Under the assumption
-                 * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is
-                 * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at
-                 * all, either. */
-                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                            "bpf-firewall: BPF_F_ALLOW_MULTI is not supported, not doing BPF firewall on slice units.");

-        /* If BPF_F_ALLOW_MULTI flag is supported program name is also supported (both were added to v4.15
-         * kernel). */
-        if (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
-                ingress_name = "sd_fw_ingress";
-                egress_name = "sd_fw_egress";
-        }
+        ingress_name = "sd_fw_ingress";
+        egress_name = "sd_fw_egress";

        /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
         * but we reuse the accounting maps. That way the firewall in effect always maps to the actual
@@ -646,7 +631,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set
 int bpf_firewall_load_custom(Unit *u) {
        CGroupContext *cc;
        CGroupRuntime *crt;
-        int r, supported;
+        int r;

        assert(u);

@@ -660,13 +645,9 @@ int bpf_firewall_load_custom(Unit *u) {
        if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
                return 0;

-        supported = bpf_firewall_supported();
-        if (supported < 0)
-                return supported;
-
-        if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+        if (bpf_program_supported() <= 0)
                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                            "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");
+                                            "bpf-firewall: BPF firewalling not supported, cannot attach custom BPF programs.");

        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &crt->ip_bpf_custom_ingress);
        if (r < 0)
@@ -702,8 +683,7 @@ int bpf_firewall_install(Unit *u) {
        _cleanup_free_ char *path = NULL;
        CGroupContext *cc;
        CGroupRuntime *crt;
-        int r, supported;
-        uint32_t flags;
+        int r;

        assert(u);

@@ -718,43 +698,23 @@ int bpf_firewall_install(Unit *u) {
        if (!crt->cgroup_realized)
                return -EINVAL;

-        supported = bpf_firewall_supported();
-        if (supported < 0)
-                return supported;
-        if (supported == BPF_FIREWALL_UNSUPPORTED)
+        if (bpf_program_supported() <= 0)
                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                            "bpf-firewall: BPF firewalling not supported, proceeding without.");
-        if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE)
-                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                            "bpf-firewall: BPF_F_ALLOW_MULTI not supported, not doing BPF firewall on slice units.");
-        if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
-            (!set_isempty(crt->ip_bpf_custom_ingress) || !set_isempty(crt->ip_bpf_custom_egress)))
-                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                            "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");

        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &path);
        if (r < 0)
                return log_unit_error_errno(u, r, "bpf-firewall: Failed to determine cgroup path: %m");

-        flags = supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI ? BPF_F_ALLOW_MULTI : 0;
-
-        if (FLAGS_SET(flags, BPF_F_ALLOW_MULTI)) {
-                /* If we have BPF_F_ALLOW_MULTI, then let's clear the fields, but destroy the programs only
-                 * after attaching the new programs, so that there's no time window where neither program is
-                 * attached. (There will be a program where both are attached, but that's OK, since this is a
-                 * security feature where we rather want to lock down too much than too little */
-                ip_bpf_egress_uninstall = TAKE_PTR(crt->ip_bpf_egress_installed);
-                ip_bpf_ingress_uninstall = TAKE_PTR(crt->ip_bpf_ingress_installed);
-        } else {
-                /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly
-                 * detach them) right before attaching the new program, to minimize the time window when we
-                 * don't account for IP traffic. */
-                crt->ip_bpf_egress_installed = bpf_program_free(crt->ip_bpf_egress_installed);
-                crt->ip_bpf_ingress_installed = bpf_program_free(crt->ip_bpf_ingress_installed);
-        }
+        /* Let's clear the fields, but destroy the programs only after attaching the new programs, so that
+         * there's no time window where neither program is attached. (There will be a program where both are
+         * attached, but that's OK, since this is a security feature where we rather want to lock down too
+         * much than too little. */
+        ip_bpf_egress_uninstall = TAKE_PTR(crt->ip_bpf_egress_installed);
+        ip_bpf_ingress_uninstall = TAKE_PTR(crt->ip_bpf_ingress_installed);

        if (crt->ip_bpf_egress) {
-                r = bpf_program_cgroup_attach(crt->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
+                r = bpf_program_cgroup_attach(crt->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, BPF_F_ALLOW_MULTI);
                if (r < 0)
                        return log_unit_error_errno(u, r,
                                "bpf-firewall: Attaching egress BPF program to cgroup %s failed: %m", path);
@@ -764,7 +724,7 @@ int bpf_firewall_install(Unit *u) {
        }

        if (crt->ip_bpf_ingress) {
-                r = bpf_program_cgroup_attach(crt->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
+                r = bpf_program_cgroup_attach(crt->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, BPF_F_ALLOW_MULTI);
                if (r < 0)
                        return log_unit_error_errno(u, r,
                                "bpf-firewall: Attaching ingress BPF program to cgroup %s failed: %m", path);
@@ -830,118 +790,9 @@ int bpf_firewall_reset_accounting(int map_fd) {
        return bpf_map_update_element(map_fd, &key, &value);
 }

-static int bpf_firewall_unsupported_reason = 0;
-
-int bpf_firewall_supported(void) {
-        const struct bpf_insn trivial[] = {
-                BPF_MOV64_IMM(BPF_REG_0, 1),
-                BPF_EXIT_INSN()
-        };
-
-        _cleanup_(bpf_program_freep) BPFProgram *program = NULL;
-        static int supported = -1;
-        union bpf_attr attr;
-        int r;
-
-        /* Checks whether BPF firewalling is supported. For this, we check the following things:
-         *
-         * - the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require
-         * - the BPF implementation in the kernel supports the BPF_PROG_DETACH call, which we require
-         */
-        if (supported >= 0)
-                return supported;
-
-        /* prog_name is NULL since it is supported only starting from v4.15 kernel. */
-        r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, NULL, &program);
-        if (r < 0) {
-                bpf_firewall_unsupported_reason =
-                        log_debug_errno(r, "bpf-firewall: Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
-                return supported = BPF_FIREWALL_UNSUPPORTED;
-        }
-
-        r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
-        if (r < 0) {
-                bpf_firewall_unsupported_reason =
-                        log_debug_errno(r, "bpf-firewall: Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
-                return supported = BPF_FIREWALL_UNSUPPORTED;
-        }
-
-        r = bpf_program_load_kernel(program, NULL, 0);
-        if (r < 0) {
-                bpf_firewall_unsupported_reason =
-                        log_debug_errno(r, "bpf-firewall: Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
-                return supported = BPF_FIREWALL_UNSUPPORTED;
-        }
-
-        /* Unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB programs even when CONFIG_CGROUP_BPF
-         * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF
-         * program if we can't do a thing with it later?
-         *
-         * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if
-         * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
-         * parameters are validated however, and that'll fail with EBADF then. */
-
-        // FIXME: Clang doesn't 0-pad with structured initialization, causing
-        // the kernel to reject the bpf_attr as invalid. See:
-        // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
-        // Ideally it should behave like GCC, so that we can remove these workarounds.
-        zero(attr);
-        attr.attach_type = BPF_CGROUP_INET_EGRESS;
-        attr.target_fd = -EBADF;
-        attr.attach_bpf_fd = -EBADF;
-
-        if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) {
-                if (errno != EBADF) {
-                        bpf_firewall_unsupported_reason =
-                                log_debug_errno(errno, "bpf-firewall: Didn't get EBADF from BPF_PROG_DETACH, BPF firewalling is not supported: %m");
-                        return supported = BPF_FIREWALL_UNSUPPORTED;
-                }
-
-                /* YAY! */
-        } else {
-                bpf_firewall_unsupported_reason =
-                        log_debug_errno(SYNTHETIC_ERRNO(EBADE),
-                                        "bpf-firewall: Wut? Kernel accepted our invalid BPF_PROG_DETACH call? "
-                                        "Something is weird, assuming BPF firewalling is broken and hence not supported.");
-                return supported = BPF_FIREWALL_UNSUPPORTED;
-        }
-
-        /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported
-         * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH
-         * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll
-         * get EINVAL if it's not supported, and EBADF as before if it is available.
-         * Use probe result as the indicator that program name is also supported since they both were
-         * added in kernel 4.15. */
-
-        zero(attr);
-        attr.attach_type = BPF_CGROUP_INET_EGRESS;
-        attr.target_fd = -EBADF;
-        attr.attach_bpf_fd = -EBADF;
-        attr.attach_flags = BPF_F_ALLOW_MULTI;
-
-        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) {
-                if (errno == EBADF) {
-                        log_debug_errno(errno, "bpf-firewall: Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!");
-                        return supported = BPF_FIREWALL_SUPPORTED_WITH_MULTI;
-                }
-
-                if (errno == EINVAL)
-                        log_debug_errno(errno, "bpf-firewall: Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported.");
-                else
-                        log_debug_errno(errno, "bpf-firewall: Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m");
-
-                return supported = BPF_FIREWALL_SUPPORTED;
-        } else {
-                bpf_firewall_unsupported_reason =
-                        log_debug_errno(SYNTHETIC_ERRNO(EBADE),
-                                        "bpf-firewall: Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? "
-                                        "Something is weird, assuming BPF firewalling is broken and hence not supported.");
-                return supported = BPF_FIREWALL_UNSUPPORTED;
-        }
-}
-
 void emit_bpf_firewall_warning(Unit *u) {
        static bool warned = false;
+        int r;

        assert(u);
        assert(u->manager);
@@ -949,9 +800,12 @@ void emit_bpf_firewall_warning(Unit *u) {
        if (warned || MANAGER_IS_TEST_RUN(u->manager))
                return;

-        bool quiet = ERRNO_IS_PRIVILEGE(bpf_firewall_unsupported_reason) && detect_container() > 0;
+        r = bpf_program_supported();
+        assert(r < 0);

-        log_unit_full_errno(u, quiet ? LOG_DEBUG : LOG_WARNING, bpf_firewall_unsupported_reason,
+        bool quiet = ERRNO_IS_NEG_PRIVILEGE(r) && detect_container() > 0;
+
+        log_unit_full_errno(u, quiet ? LOG_DEBUG : LOG_WARNING, r,
                            "unit configures an IP firewall, but %s.\n"
                            "(This warning is only shown for the first unit using IP firewalling.)",
                            getuid() != 0 ? "not running as root" :
--- a/src/core/bpf-firewall.h
+++ b/src/core/bpf-firewall.h
@@ -6,14 +6,6 @@
 #include "cgroup.h"
 #include "unit.h"

-enum {
-        BPF_FIREWALL_UNSUPPORTED          = 0,
-        BPF_FIREWALL_SUPPORTED            = 1,
-        BPF_FIREWALL_SUPPORTED_WITH_MULTI = 2,
-};
-
-int bpf_firewall_supported(void);
-
 int bpf_firewall_compile(Unit *u);
 int bpf_firewall_install(Unit *u);
 int bpf_firewall_load_custom(Unit *u);
--- a/src/core/bpf-restrict-fs.c
+++ b/src/core/bpf-restrict-fs.c
@@ -27,7 +27,6 @@
 /* libbpf, clang and llc compile time dependencies are satisfied */
 #include "bpf-dlopen.h"
 #include "bpf-link.h"
-#include "bpf-util.h"
 #include "bpf/restrict_fs/restrict-fs-skel.h"

 #define CGROUP_HASH_SIZE_MAX 2048
@@ -102,7 +101,7 @@ bool bpf_restrict_fs_supported(bool initialize) {
        if (!initialize)
                return false;

-        if (!cgroup_bpf_supported())
+        if (dlopen_bpf_full(LOG_WARNING) < 0)
                return (supported = false);

        r = lsm_supported("bpf");
--- a/src/core/bpf-restrict-ifaces.c
+++ b/src/core/bpf-restrict-ifaces.c
@@ -10,7 +10,6 @@

 #include "bpf-dlopen.h"
 #include "bpf-link.h"
-#include "bpf-util.h"
 #include "bpf/restrict_ifaces/restrict-ifaces-skel.h"

 static struct restrict_ifaces_bpf *restrict_ifaces_bpf_free(struct restrict_ifaces_bpf *obj) {
@@ -81,14 +80,9 @@ int bpf_restrict_ifaces_supported(void) {
        if (supported >= 0)
                return supported;

-        if (!cgroup_bpf_supported())
+        if (dlopen_bpf_full(LOG_WARNING) < 0)
                return (supported = false);

-        if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SKB, /*opts=*/NULL)) {
-                log_debug("restrict-interfaces: BPF program type cgroup_skb is not supported");
-                return (supported = false);
-        }
-
        r = prepare_restrict_ifaces_bpf(NULL, true, NULL, &obj);
        if (r < 0) {
                log_debug_errno(r, "restrict-interfaces: Failed to load BPF object: %m");
--- a/src/core/bpf-socket-bind.c
+++ b/src/core/bpf-socket-bind.c
@@ -12,7 +12,6 @@
 /* libbpf, clang, llvm and bpftool compile time dependencies are satisfied */
 #include "bpf-dlopen.h"
 #include "bpf-link.h"
-#include "bpf-util.h"
 #include "bpf/socket_bind/socket-bind-api.bpf.h"
 #include "bpf/socket_bind/socket-bind-skel.h"

@@ -127,14 +126,9 @@ int bpf_socket_bind_supported(void) {
        _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
        int r;

-        if (!cgroup_bpf_supported())
+        if (dlopen_bpf_full(LOG_WARNING) < 0)
                return false;

-        if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, /*opts=*/NULL)) {
-                log_debug("bpf-socket-bind: BPF program type cgroup_sock_addr is not supported");
-                return false;
-        }
-
        r = prepare_socket_bind_bpf(/*unit=*/NULL, /*allow_rules=*/NULL, /*deny_rules=*/NULL, &obj);
        if (r < 0) {
                log_debug_errno(r, "bpf-socket-bind: socket bind filtering is not supported: %m");
--- a/src/core/bpf-util.c
+++ b/src/core/bpf-util.c
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1-or-later */
-
-#include "bpf-dlopen.h"
-#include "bpf-util.h"
-#include "cgroup-util.h"
-#include "initrd-util.h"
-#include "log.h"
-
-bool cgroup_bpf_supported(void) {
-        static int supported = -1;
-        int r;
-
-        if (supported >= 0)
-                return supported;
-
-        r = dlopen_bpf();
-        if (r < 0) {
-                log_full_errno(in_initrd() ? LOG_DEBUG : LOG_INFO,
-                               r, "Failed to open libbpf, cgroup BPF features disabled: %m");
-                return (supported = false);
-        }
-
-        return (supported = true);
-}
--- a/src/core/bpf-util.h
+++ b/src/core/bpf-util.h
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1-or-later */
-
-#include <stdbool.h>
-
-bool cgroup_bpf_supported(void);
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -3259,22 +3259,11 @@ static int cg_bpf_mask_supported(CGroupMask *ret) {
        CGroupMask mask = 0;
        int r;

-        /* BPF-based firewall */
-        r = bpf_firewall_supported();
-        if (r < 0)
-                return r;
-        if (r > 0)
-                mask |= CGROUP_MASK_BPF_FIREWALL;
-
-        /* BPF-based device access control */
-        r = bpf_devices_supported();
-        if (r < 0)
-                return r;
-        if (r > 0)
-                mask |= CGROUP_MASK_BPF_DEVICES;
-
-        /* BPF pinned prog (always supported by cgroup v2) */
-        mask |= CGROUP_MASK_BPF_FOREIGN;
+        /* BPF-based firewall, device access control, and pinned foreign prog */
+        if (bpf_program_supported() > 0)
+                mask |= CGROUP_MASK_BPF_FIREWALL |
+                        CGROUP_MASK_BPF_DEVICES |
+                        CGROUP_MASK_BPF_FOREIGN;

        /* BPF-based bind{4|6} hooks */
        r = bpf_socket_bind_supported();
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -635,18 +635,13 @@ static int bus_cgroup_set_transient_property(

                        unit_write_setting(u, flags, name, buf);

-                        if (*filters) {
-                                r = bpf_firewall_supported();
-                                if (r < 0)
-                                        return r;
-                                if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
-                                        static bool warned = false;
+                        if (*filters && bpf_program_supported() <= 0) {
+                                static bool warned = false;

-                                        log_full(warned ? LOG_DEBUG : LOG_WARNING,
-                                                 "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with multiple filters.\n"
-                                                 "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id);
-                                        warned = true;
-                                }
+                                log_full(warned ? LOG_DEBUG : LOG_WARNING,
+                                         "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with multiple filters.\n"
+                                         "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id);
+                                warned = true;
                        }
                }

--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -5712,15 +5712,12 @@ int config_parse_ip_filter_bpf_progs(
        if (r < 0)
                return log_oom();

-        r = bpf_firewall_supported();
-        if (r < 0)
-                return r;
-        if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+        if (bpf_program_supported() <= 0) {
                static bool warned = false;

-                log_full(warned ? LOG_DEBUG : LOG_WARNING,
-                         "File %s:%u configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters.\n"
-                         "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", filename, line, lvalue, rvalue);
+                log_syntax(unit, warned ? LOG_DEBUG : LOG_WARNING, filename, line, 0,
+                           "Configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters. "
+                           "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", lvalue, rvalue);

                warned = true;
        }
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -65,12 +65,6 @@ libcore_sources = files(
        'varlink.c',
 )

-if conf.get('BPF_FRAMEWORK') == 1
-        libcore_sources += files(
-                'bpf-util.c',
-        )
-endif
-
 subdir('bpf/socket_bind')
 subdir('bpf/restrict_fs')
 subdir('bpf/restrict_ifaces')
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1477,9 +1477,7 @@ static int socket_address_listen_do(
                log_unit_error_errno(u, error, fmt, strna(_t));  \
        })

-static int fork_needed(const SocketAddress *address, Socket *s) {
-        int r;
-
+static bool fork_needed(const SocketAddress *address, Socket *s) {
        assert(address);
        assert(s);

@@ -1493,13 +1491,9 @@ static int fork_needed(const SocketAddress *address, Socket *s) {
                        if (nft_set->source == NFT_SET_SOURCE_CGROUP)
                                return true;

-        if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) {
-                r = bpf_firewall_supported();
-                if (r < 0)
-                        return r;
-                if (r != BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
-                        return true;
-        }
+        if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6) &&
+            bpf_program_supported() > 0) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
+                return true;

        return exec_needs_network_namespace(&s->exec_context);
 }
@@ -1521,10 +1515,7 @@ static int socket_address_listen_in_cgroup(
         * the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and
         * such. */

-        r = fork_needed(address, s);
-        if (r < 0)
-                return r;
-        if (r == 0) {
+        if (!fork_needed(address, s)) {
                /* Shortcut things... */
                fd = socket_address_listen_do(s, address, label);
                if (fd < 0)
@@ -3030,10 +3021,7 @@ static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
        if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
                goto shortcut;

-        r = bpf_firewall_supported();
-        if (r < 0)
-                return r;
-        if (r == BPF_FIREWALL_UNSUPPORTED)
+        if (bpf_program_supported() <= 0)
                goto shortcut;

        if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
--- a/src/shared/bpf-compat.h
+++ b/src/shared/bpf-compat.h
@@ -26,12 +26,10 @@ struct bpf_map_create_opts;
 *  - before the compat static inline helpers that use them.
 * When removing this file move these back to bpf-dlopen.h */
 extern int (*sym_bpf_map_create)(enum bpf_map_type,  const char *, __u32, __u32, __u32, const struct bpf_map_create_opts *);
-extern int (*sym_libbpf_probe_bpf_prog_type)(enum bpf_prog_type, const void *);
 extern struct bpf_map* (*sym_bpf_object__next_map)(const struct bpf_object *obj, const struct bpf_map *map);

 /* compat symbols removed in libbpf 1.0 */
 extern int (*sym_bpf_create_map)(enum bpf_map_type, int key_size, int value_size, int max_entries, __u32 map_flags);
-extern bool (*sym_bpf_probe_prog_type)(enum bpf_prog_type, __u32);

 /* helpers to use the available variant behind new API */
 static inline int compat_bpf_map_create(enum bpf_map_type map_type,
@@ -47,10 +45,3 @@ static inline int compat_bpf_map_create(enum bpf_map_type map_type,
        return sym_bpf_create_map(map_type, key_size, value_size, max_entries,
                                  0 /* opts->map_flags, but opts is always NULL for us so skip build dependency on the type */);
 }
-
-static inline int compat_libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) {
-        if (sym_libbpf_probe_bpf_prog_type)
-                return sym_libbpf_probe_bpf_prog_type(prog_type, opts);
-
-        return sym_bpf_probe_prog_type(prog_type, 0);
-}
--- a/src/shared/bpf-dlopen.c
+++ b/src/shared/bpf-dlopen.c
@@ -51,12 +51,10 @@ DLSYM_PROTOTYPE(ring_buffer__poll) = NULL;

 /* new symbols available from libbpf 0.7.0 */
 int (*sym_bpf_map_create)(enum bpf_map_type,  const char *, __u32, __u32, __u32, const struct bpf_map_create_opts *);
-int (*sym_libbpf_probe_bpf_prog_type)(enum bpf_prog_type, const void *);
 struct bpf_map* (*sym_bpf_object__next_map)(const struct bpf_object *obj, const struct bpf_map *map);

 /* compat symbols removed in libbpf 1.0 */
 int (*sym_bpf_create_map)(enum bpf_map_type,  int key_size, int value_size, int max_entries, __u32 map_flags);
-bool (*sym_bpf_probe_prog_type)(enum bpf_prog_type, __u32);

 _printf_(2,0)
 static int bpf_print_func(enum libbpf_print_level level, const char *fmt, va_list ap) {
@@ -72,10 +70,14 @@ static int bpf_print_func(enum libbpf_print_level level, const char *fmt, va_lis
        return log_internalv(LOG_DEBUG, errno, NULL, 0, NULL, fmt, ap);
 }

-int dlopen_bpf(void) {
+int dlopen_bpf_full(int log_level) {
+        static int cached = 0;
        void *dl;
        int r;

+        if (cached != 0)
+                return cached;
+
        ELF_NOTE_DLOPEN("bpf",
                        "Support firewalling and sandboxing with BPF",
                        ELF_NOTE_DLOPEN_PRIORITY_SUGGESTED,
@@ -91,8 +93,9 @@ int dlopen_bpf(void) {
                 * list for both files, and when we assume 1.0+ is present we can remove this dlopen */
                dl = dlopen("libbpf.so.0", RTLD_NOW|RTLD_NODELETE);
                if (!dl)
-                        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                               "neither libbpf.so.1 nor libbpf.so.0 are installed: %s", dlerror());
+                        return cached = log_full_errno(log_level, SYNTHETIC_ERRNO(EOPNOTSUPP),
+                                                       "Neither libbpf.so.1 nor libbpf.so.0 are installed, cgroup BPF features disabled: %s",
+                                                       dlerror());

                log_debug("Loaded 'libbpf.so.0' via dlopen()");

@@ -101,11 +104,9 @@ int dlopen_bpf(void) {
                                dl, LOG_DEBUG,
 #if MODERN_LIBBPF
                                /* Don't exist anymore in new libbpf, hence cannot type check them */
-                                DLSYM_ARG_FORCE(bpf_create_map),
-                                DLSYM_ARG_FORCE(bpf_probe_prog_type)
+                                DLSYM_ARG_FORCE(bpf_create_map)
 #else
-                                DLSYM_ARG(bpf_create_map),
-                                DLSYM_ARG(bpf_probe_prog_type)
+                                DLSYM_ARG(bpf_create_map)
 #endif
                );

@@ -118,18 +119,16 @@ int dlopen_bpf(void) {
                                dl, LOG_DEBUG,
 #if MODERN_LIBBPF
                                DLSYM_ARG(bpf_map_create),
-                                DLSYM_ARG(libbpf_probe_bpf_prog_type),
                                DLSYM_ARG(bpf_object__next_map)
 #else
                                /* These symbols did not exist in old libbpf, hence we cannot type check them */
                                DLSYM_ARG_FORCE(bpf_map_create),
-                                DLSYM_ARG_FORCE(libbpf_probe_bpf_prog_type),
                                DLSYM_ARG_FORCE(bpf_object__next_map)
 #endif
                );
        }
        if (r < 0)
-                return r;
+                return cached = log_full_errno(log_level, r, "Failed to load libbpf symbols, cgroup BPF features disabled: %m");

        r = dlsym_many_or_warn(
                        dl, LOG_DEBUG,
@@ -171,14 +170,14 @@ int dlopen_bpf(void) {
                        DLSYM_ARG(ring_buffer__new),
                        DLSYM_ARG(ring_buffer__poll));
        if (r < 0)
-                return r;
+                return cached = log_full_errno(log_level, r, "Failed to load libbpf symbols, cgroup BPF features disabled: %m");

        /* We set the print helper unconditionally. Otherwise libbpf will emit not useful log messages. */
        (void) sym_libbpf_set_print(bpf_print_func);

        REENABLE_WARNING;

-        return r;
+        return cached = true;
 }

 int bpf_get_error_translated(const void *ptr) {
@@ -200,8 +199,8 @@ int bpf_get_error_translated(const void *ptr) {

 #else

-int dlopen_bpf(void) {
-        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                               "libbpf support is not compiled in.");
+int dlopen_bpf_full(int log_level) {
+        return log_once_errno(log_level, SYNTHETIC_ERRNO(EOPNOTSUPP),
+                              "libbpf support is not compiled in, cgroup BPF features disabled.");
 }
 #endif
--- a/src/shared/bpf-dlopen.h
+++ b/src/shared/bpf-dlopen.h
@@ -1,6 +1,9 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 #pragma once

+#include <stdbool.h>
+#include <syslog.h>
+
 #if HAVE_LIBBPF

 #include <bpf/bpf.h>
@@ -48,4 +51,7 @@ int bpf_get_error_translated(const void *ptr);

 #endif

-int dlopen_bpf(void);
+int dlopen_bpf_full(int log_level);
+static inline int dlopen_bpf(void) {
+        return dlopen_bpf_full(LOG_DEBUG);
+}
--- a/src/shared/bpf-program.c
+++ b/src/shared/bpf-program.c
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */

 #include <fcntl.h>
+#include <linux/bpf_insn.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -42,6 +43,52 @@ DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);

 DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_free);

+int bpf_program_supported(void) {
+        static int cached = 0;
+
+        if (cached != 0)
+                return cached;
+
+        /* Currently, we only use the following three types:
+         * - BPF_PROG_TYPE_CGROUP_SKB, supported since kernel v4.10 (0e33661de493db325435d565a4a722120ae4cbf3),
+         * - BPF_PROG_TYPE_CGROUP_DEVICE, supported since kernel v4.15 (ebc614f687369f9df99828572b1d85a7c2de3d92),
+         * - BPF_PROG_TYPE_CGROUP_SOCK_ADDR, supported since kernel v4.17 (4fbac77d2d092b475dda9eea66da674369665427).
+         * Hence, as our baseline on the kernel is v5.4, it is not necessary to check if we can create BPF
+         * programs of hthese types.
+         *
+         * However, unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB (maybe also other types)
+         * programs even when CONFIG_CGROUP_BPF is turned off at kernel compilation time. This sucks of course:
+         * why does it allow us to create a cgroup BPF program if we can't do a thing with it later?
+         *
+         * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if
+         * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
+         * parameters are validated however, and that'll fail with EBADF then.
+         *
+         * The check seems also important when we are running with sanitizers. With sanitizers (at least with
+         * LLVM v20), the following check and other bpf() calls fails even if the kernel supports BPF. To
+         * avoid unexpected fail when running with sanitizers, let's explicitly check if bpf() syscall works. */
+
+        /* Clang and GCC (>=15) do not 0-pad with structured initialization, causing the kernel to reject the
+         * bpf_attr as invalid. See: https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
+         * Hence, we cannot use structured initialization here, and need to clear the structure with zero
+         * explicitly before use. */
+        union bpf_attr attr;
+        zero(attr);
+        attr.attach_type = BPF_CGROUP_INET_EGRESS; /* since kernel v4.10 (0e33661de493db325435d565a4a722120ae4cbf3) */
+        attr.target_fd = -EBADF;
+        attr.attach_bpf_fd = -EBADF;
+
+        if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) {
+                if (errno == EBADF) /* YAY! */
+                        return cached = true;
+
+                return cached = log_debug_errno(errno, "Didn't get EBADF from invalid BPF_PROG_DETACH call: %m");
+        }
+
+        return cached = log_debug_errno(SYNTHETIC_ERRNO(EBADE),
+                                        "Wut? Kernel accepted our invalid BPF_PROG_DETACH call? Something is weird, assuming BPF is broken and hence not supported.");
+}
+
 BPFProgram *bpf_program_free(BPFProgram *p) {
        if (!p)
                return NULL;
--- a/src/shared/bpf-program.h
+++ b/src/shared/bpf-program.h
@@ -33,6 +33,8 @@ struct BPFProgram {
        uint32_t attached_flags;
 };

+int bpf_program_supported(void);
+
 int bpf_program_new(uint32_t prog_type, const char *prog_name, BPFProgram **ret);
 int bpf_program_new_from_bpffs_path(const char *path, BPFProgram **ret);
 BPFProgram *bpf_program_free(BPFProgram *p);
--- a/src/shared/tests.c
+++ b/src/shared/tests.c
@@ -219,7 +219,10 @@ static int allocate_scope(void) {
                return 0;
        }

-        r = sd_bus_default_system(&bus);
+        if (geteuid() == 0)
+                r = sd_bus_default_system(&bus);
+        else
+                r = sd_bus_default_user(&bus);
        if (r < 0)
                return log_error_errno(r, "Failed to connect to system bus: %m");

--- a/src/test/test-bpf-devices.c
+++ b/src/test/test-bpf-devices.c
@@ -257,6 +257,11 @@ int main(int argc, char *argv[]) {

        test_setup_logging(LOG_DEBUG);

+        r = bpf_program_supported();
+        if (r < 0)
+                return log_tests_skipped_errno(r, "BPF device filter not supported");
+        ASSERT_TRUE(r);
+
        ASSERT_OK(getrlimit(RLIMIT_MEMLOCK, &rl));
        rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
        (void) setrlimit(RLIMIT_MEMLOCK, &rl);
@@ -274,11 +279,6 @@ int main(int argc, char *argv[]) {
        if (r < 0)
                return log_tests_skipped_errno(r, "Failed to prepare cgroup subtree");

-        r = bpf_devices_supported();
-        if (r == 0)
-                return log_tests_skipped("BPF device filter not supported");
-        ASSERT_EQ(r, 1);
-
        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup, NULL, &controller_path);
        ASSERT_OK(r);

--- a/src/test/test-bpf-firewall.c
+++ b/src/test/test-bpf-firewall.c
@@ -31,7 +31,6 @@ int main(int argc, char *argv[]) {
        struct rlimit rl;
        int r;
        union bpf_attr attr;
-        bool test_custom_filter = false;
        const char *test_prog = "/sys/fs/bpf/test-dropper";

        test_setup_logging(LOG_DEBUG);
@@ -39,6 +38,11 @@ int main(int argc, char *argv[]) {
        if (detect_container() > 0)
                return log_tests_skipped("test-bpf-firewall fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");

+        r = bpf_program_supported();
+        if (r < 0)
+                return log_tests_skipped_errno(r, "BPF firewalling not supported");
+        ASSERT_TRUE(r);
+
        ASSERT_OK(getrlimit(RLIMIT_MEMLOCK, &rl));
        rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);
        (void) setrlimit(RLIMIT_MEMLOCK, &rl);
@@ -65,34 +69,17 @@ int main(int argc, char *argv[]) {
        r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
        ASSERT_EQ(r, 0);

-        r = bpf_firewall_supported();
-        if (r == BPF_FIREWALL_UNSUPPORTED)
-                return log_tests_skipped("BPF firewalling not supported");
-        ASSERT_GT(r, 0);
-
-        if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
-                log_notice("BPF firewalling with BPF_F_ALLOW_MULTI supported. Yay!");
-                test_custom_filter = true;
-        } else
-                log_notice("BPF firewalling (though without BPF_F_ALLOW_MULTI) supported. Good.");
-
        r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
        ASSERT_OK(r);

-        if (test_custom_filter) {
-                zero(attr);
-                attr.pathname = PTR_TO_UINT64(test_prog);
-                attr.bpf_fd = p->kernel_fd;
-                attr.file_flags = 0;
+        zero(attr);
+        attr.pathname = PTR_TO_UINT64(test_prog);
+        attr.bpf_fd = p->kernel_fd;
+        attr.file_flags = 0;

-                (void) unlink(test_prog);
+        (void) unlink(test_prog);

-                r = bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
-                if (r < 0) {
-                        log_warning_errno(errno, "BPF object pinning failed, will not run custom filter test: %m");
-                        test_custom_filter = false;
-                }
-        }
+        ASSERT_OK(bpf(BPF_OBJ_PIN, &attr, sizeof(attr)));

        p = bpf_program_free(p);

@@ -192,31 +179,30 @@ int main(int argc, char *argv[]) {
        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
                  SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);

-        if (test_custom_filter) {
-                assert_se(u = unit_new(m, sizeof(Service)));
-                assert_se(unit_add_name(u, "custom-filter.service") == 0);
-                assert_se(cc = unit_get_cgroup_context(u));
-                u->perpetual = true;
+        /* testing custom filter */
+        assert_se(u = unit_new(m, sizeof(Service)));
+        assert_se(unit_add_name(u, "custom-filter.service") == 0);
+        assert_se(cc = unit_get_cgroup_context(u));
+        u->perpetual = true;

-                cc->ip_accounting = true;
+        cc->ip_accounting = true;

-                assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0);
-                assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0);
+        assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0);
+        assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0);

-                SERVICE(u)->type = SERVICE_ONESHOT;
-                u->load_state = UNIT_LOADED;
+        SERVICE(u)->type = SERVICE_ONESHOT;
+        u->load_state = UNIT_LOADED;

-                ASSERT_OK(unit_start(u, NULL));
+        ASSERT_OK(unit_start(u, NULL));

-                while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
-                        assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+        while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+                assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);

-                assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED ||
-                          SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS);
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED ||
+                  SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS);

-                (void) unlink(test_prog);
-                assert_se(SERVICE(u)->state == SERVICE_DEAD);
-        }
+        (void) unlink(test_prog);
+        assert_se(SERVICE(u)->state == SERVICE_DEAD);

        return 0;
 }
--- a/src/test/test-bpf-foreign-programs.c
+++ b/src/test/test-bpf-foreign-programs.c
@@ -279,8 +279,9 @@ int main(int argc, char *argv[]) {
        if (detect_container() > 0)
                return log_tests_skipped("test-bpf fails inside LXC and Docker containers: https://github.com/systemd/systemd/issues/9666");

-        if (getuid() != 0)
-                return log_tests_skipped("not running as root");
+        r = bpf_program_supported();
+        if (r < 0)
+                return log_tests_skipped_errno(r, "not running as root");

        ASSERT_OK(getrlimit(RLIMIT_MEMLOCK, &rl));
        rl.rlim_cur = rl.rlim_max = MAX(rl.rlim_max, CAN_MEMLOCK_SIZE);