sd-netlink: read error message for NFNL_MSG_BATCH_BEGIN (#39967)

Before:
```
$ unshare --user --map-users=0:$(id -u):1 --map-groups=0:$(id -g):1 build/test-firewall-util
/* test_v6 */
src/test/test-firewall-util.c:34: Assertion failed: "r = fw_nftables_add_masquerade(nfnl, true, AF_INET6, &u1, 128)" failed with unexpected error: -110/ETIMEDOUT
Aborted (core dumped)
```

After:
```
$ unshare --user --map-users=0:$(id -u):1 --map-groups=0:$(id -g):1 /var/build/test-firewall-util
/* test_v6 */
test-firewall-util: Failed to add IPv6 masquerade, skipping tests: Operation not permitted
/* test_v4 */
test-firewall-util: Failed to add IPv4 masquerade, skipping tests: Operation not permitted
```
This commit is contained in:
Zbigniew Jędrzejewski-Szmek
2025-12-05 12:12:03 +01:00
committed by GitHub
7 changed files with 160 additions and 67 deletions

View File

@@ -55,6 +55,11 @@ typedef struct sd_netlink_slot {
};
} sd_netlink_slot;
typedef struct NetlinkIgnoredSerial {
uint32_t serial;
usec_t timeout_usec; /* timestamp in CLOCK_MONOTONIC */
} NetlinkIgnoredSerial;
typedef struct sd_netlink {
unsigned n_ref;
@@ -78,6 +83,7 @@ typedef struct sd_netlink {
bool processing:1;
uint32_t serial;
Hashmap *ignored_serials;
struct Prioq *reply_callbacks_prioq;
Hashmap *reply_callbacks;
@@ -181,8 +187,7 @@ int sd_nfnl_call_batch(
sd_netlink *nfnl,
sd_netlink_message **messages,
size_t n_messages,
uint64_t usec,
sd_netlink_message ***ret_messages);
uint64_t usec);
int sd_nfnl_message_new(
sd_netlink *nfnl,
sd_netlink_message **ret,

View File

@@ -7,7 +7,6 @@
#include "sd-netlink.h"
#include "alloc-util.h"
#include "errno-util.h"
#include "iovec-util.h"
#include "netlink-internal.h"
#include "netlink-util.h"
@@ -119,7 +118,7 @@ int sd_nfnl_send_batch(
return -ENOMEM;
if (ret_serials) {
serials = new(uint32_t, n_messages);
serials = new(uint32_t, n_messages + 2);
if (!serials)
return -ENOMEM;
}
@@ -133,6 +132,9 @@ int sd_nfnl_send_batch(
return r;
netlink_seal_message(nfnl, batch_begin);
if (serials)
serials[c] = message_get_serial(batch_begin);
iovs[c++] = IOVEC_MAKE(batch_begin->hdr, batch_begin->hdr->nlmsg_len);
for (size_t i = 0; i < n_messages; i++) {
@@ -147,7 +149,7 @@ int sd_nfnl_send_batch(
netlink_seal_message(nfnl, messages[i]);
if (serials)
serials[i] = message_get_serial(messages[i]);
serials[c] = message_get_serial(messages[i]);
/* It seems that the kernel accepts an arbitrary number. Let's set the lower 16 bits of the
* serial of the first message. */
@@ -161,6 +163,9 @@ int sd_nfnl_send_batch(
return r;
netlink_seal_message(nfnl, batch_end);
if (serials)
serials[c] = message_get_serial(batch_end);
iovs[c++] = IOVEC_MAKE(batch_end->hdr, batch_end->hdr->nlmsg_len);
assert(c == n_messages + 2);
@@ -178,10 +183,8 @@ int sd_nfnl_call_batch(
sd_netlink *nfnl,
sd_netlink_message **messages,
size_t n_messages,
uint64_t usec,
sd_netlink_message ***ret_messages) {
uint64_t usec) {
_cleanup_free_ sd_netlink_message **replies = NULL;
_cleanup_free_ uint32_t *serials = NULL;
int r;
@@ -190,26 +193,40 @@ int sd_nfnl_call_batch(
assert_return(messages, -EINVAL);
assert_return(n_messages > 0, -EINVAL);
if (ret_messages) {
replies = new0(sd_netlink_message*, n_messages);
if (!replies)
return -ENOMEM;
}
r = sd_nfnl_send_batch(nfnl, messages, n_messages, &serials);
if (r < 0)
return r;
for (size_t i = 0; i < n_messages; i++)
RET_GATHER(r,
sd_netlink_read(nfnl, serials[i], usec, ret_messages ? replies + i : NULL));
if (r < 0)
return r;
for (size_t i = 1; i <= n_messages; i++) {
/* If we have received an error, kernel may not send replies for later messages. Let's ignore
* remaining replies. */
if (r < 0) {
(void) sd_netlink_ignore_serial(nfnl, serials[i], usec);
continue;
}
if (ret_messages)
*ret_messages = TAKE_PTR(replies);
r = sd_netlink_read(nfnl, serials[i], usec, /* ret= */ NULL);
if (r != -ETIMEDOUT)
continue;
return 0;
/* The kernel returns some errors, e.g. unprivileged, to the BATCH_BEGIN. Hence, if we have
* not received any replies for the batch body, try to read an error in the reply for the
* batch begin. Note, since v6.10 (bf2ac490d28c21a349e9eef81edc45320fca4a3c), we can expect
* that the kernel always replies the batch begin and end. When we bump the kernel baseline,
* we can read the reply for the batch begin at first. */
int k = sd_netlink_read(nfnl, serials[0], usec, /* ret= */ NULL);
if (k < 0)
r = k;
serials[0] = 0; /* indicates that we have read the reply. */
}
/* Ignore replies for batch begin and end if we have not read them. */
if (serials[0] != 0)
(void) sd_netlink_ignore_serial(nfnl, serials[0], usec);
(void) sd_netlink_ignore_serial(nfnl, serials[n_messages + 1], usec);
return r;
}
int sd_nfnl_nft_message_new_basechain(

View File

@@ -222,6 +222,16 @@ static int netlink_queue_received_message(sd_netlink *nl, sd_netlink_message *m)
assert(nl);
assert(m);
serial = message_get_serial(m);
if (serial != 0) {
NetlinkIgnoredSerial *s = hashmap_remove(nl->ignored_serials, UINT32_TO_PTR(serial));
if (s) {
/* We are not interested in the message anymore. */
free(s);
return 0;
}
}
if (ordered_set_size(nl->rqueue) >= NETLINK_RQUEUE_MAX)
return log_debug_errno(SYNTHETIC_ERRNO(ENOBUFS),
"sd-netlink: exhausted the read queue size (%d)", NETLINK_RQUEUE_MAX);
@@ -235,7 +245,6 @@ static int netlink_queue_received_message(sd_netlink *nl, sd_netlink_message *m)
if (sd_netlink_message_is_broadcast(m))
return 0;
serial = message_get_serial(m);
if (serial == 0)
return 0;

View File

@@ -126,6 +126,8 @@ static sd_netlink *netlink_free(sd_netlink *nl) {
assert(nl);
hashmap_free(nl->ignored_serials);
ordered_set_free(nl->rqueue);
hashmap_free(nl->rqueue_by_serial);
hashmap_free(nl->rqueue_partial_by_serial);
@@ -152,6 +154,96 @@ static sd_netlink *netlink_free(sd_netlink *nl) {
DEFINE_TRIVIAL_REF_UNREF_FUNC(sd_netlink, sd_netlink, netlink_free);
static usec_t netlink_now(sd_netlink *nl, clock_t clock) {
assert(nl);
usec_t now_usec;
if (nl->event && sd_event_now(nl->event, clock, &now_usec) > 0)
return now_usec;
return now(clock);
}
static usec_t timespan_to_timestamp(sd_netlink *nl, usec_t usec) {
static bool default_timeout_set = false;
static usec_t default_timeout;
int r;
assert(nl);
if (usec == 0) {
if (!default_timeout_set) {
const char *e;
default_timeout_set = true;
default_timeout = NETLINK_DEFAULT_TIMEOUT_USEC;
e = secure_getenv("SYSTEMD_NETLINK_DEFAULT_TIMEOUT");
if (e) {
r = parse_sec(e, &default_timeout);
if (r < 0)
log_debug_errno(r, "sd-netlink: Failed to parse $SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable, ignoring: %m");
}
}
usec = default_timeout;
}
return usec_add(netlink_now(nl, CLOCK_MONOTONIC), usec);
}
static void netlink_trim_ignored_serials(sd_netlink *nl) {
NetlinkIgnoredSerial *s;
usec_t now_usec = 0;
assert(nl);
HASHMAP_FOREACH(s, nl->ignored_serials) {
if (s->timeout_usec == USEC_INFINITY)
continue;
if (now_usec == 0)
now_usec = netlink_now(nl, CLOCK_MONOTONIC);
if (s->timeout_usec < now_usec)
free(hashmap_remove(nl->ignored_serials, UINT32_TO_PTR(s->serial)));
}
}
int sd_netlink_ignore_serial(sd_netlink *nl, uint32_t serial, uint64_t timeout_usec) {
int r;
assert_return(nl, -EINVAL);
assert_return(!netlink_pid_changed(nl), -ECHILD);
assert_return(serial != 0, -EINVAL);
timeout_usec = timespan_to_timestamp(nl, timeout_usec);
NetlinkIgnoredSerial *existing = hashmap_get(nl->ignored_serials, UINT32_TO_PTR(serial));
if (existing) {
existing->timeout_usec = timeout_usec;
return 0;
}
netlink_trim_ignored_serials(nl);
_cleanup_free_ NetlinkIgnoredSerial *s = new(NetlinkIgnoredSerial, 1);
if (!s)
return -ENOMEM;
*s = (NetlinkIgnoredSerial) {
.serial = serial,
.timeout_usec = timeout_usec,
};
r = hashmap_ensure_put(&nl->ignored_serials, &trivial_hash_ops_value_free, UINT32_TO_PTR(s->serial), s);
if (r < 0)
return r;
TAKE_PTR(s);
return 0;
}
int sd_netlink_send(
sd_netlink *nl,
sd_netlink_message *message,
@@ -204,7 +296,6 @@ static int process_timeout(sd_netlink *nl) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
struct reply_callback *c;
sd_netlink_slot *slot;
usec_t n;
int r;
assert(nl);
@@ -213,8 +304,7 @@ static int process_timeout(sd_netlink *nl) {
if (!c)
return 0;
n = now(CLOCK_MONOTONIC);
if (c->timeout > n)
if (c->timeout > netlink_now(nl, CLOCK_MONOTONIC))
return 0;
r = message_new_synthetic_error(nl, -ETIMEDOUT, c->serial, &m);
@@ -337,6 +427,8 @@ static int process_running(sd_netlink *nl, sd_netlink_message **ret) {
assert(nl);
netlink_trim_ignored_serials(nl);
r = process_timeout(nl);
if (r != 0)
goto null_message;
@@ -384,32 +476,6 @@ int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) {
return r;
}
static usec_t timespan_to_timestamp(usec_t usec) {
static bool default_timeout_set = false;
static usec_t default_timeout;
int r;
if (usec == 0) {
if (!default_timeout_set) {
const char *e;
default_timeout_set = true;
default_timeout = NETLINK_DEFAULT_TIMEOUT_USEC;
e = secure_getenv("SYSTEMD_NETLINK_DEFAULT_TIMEOUT");
if (e) {
r = parse_sec(e, &default_timeout);
if (r < 0)
log_debug_errno(r, "sd-netlink: Failed to parse $SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable, ignoring: %m");
}
}
usec = default_timeout;
}
return usec_add(now(CLOCK_MONOTONIC), usec);
}
static int netlink_poll(sd_netlink *nl, bool need_more, usec_t timeout_usec) {
usec_t m = USEC_INFINITY;
int r, e;
@@ -434,7 +500,7 @@ static int netlink_poll(sd_netlink *nl, bool need_more, usec_t timeout_usec) {
if (r < 0)
return r;
m = usec_sub_unsigned(until, now(CLOCK_MONOTONIC));
m = usec_sub_unsigned(until, netlink_now(nl, CLOCK_MONOTONIC));
}
r = fd_wait_for_event(nl->fd, e, MIN(m, timeout_usec));
@@ -508,7 +574,7 @@ int sd_netlink_call_async(
return r;
slot->reply_callback.callback = callback;
slot->reply_callback.timeout = timespan_to_timestamp(usec);
slot->reply_callback.timeout = timespan_to_timestamp(nl, usec);
k = sd_netlink_send(nl, m, &slot->reply_callback.serial);
if (k < 0)
@@ -549,7 +615,7 @@ int sd_netlink_read(
assert_return(nl, -EINVAL);
assert_return(!netlink_pid_changed(nl), -ECHILD);
usec = timespan_to_timestamp(timeout);
usec = timespan_to_timestamp(nl, timeout);
for (;;) {
_cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
@@ -591,7 +657,7 @@ int sd_netlink_read(
if (usec != USEC_INFINITY) {
usec_t n;
n = now(CLOCK_MONOTONIC);
n = netlink_now(nl, CLOCK_MONOTONIC);
if (n >= usec)
return -ETIMEDOUT;

View File

@@ -807,7 +807,7 @@ static int fw_nftables_init_family(sd_netlink *nfnl, int family) {
return r;
assert(msgcnt < ELEMENTSOF(messages));
r = sd_nfnl_call_batch(nfnl, messages, msgcnt, NFNL_DEFAULT_TIMEOUT_USECS, NULL);
r = sd_nfnl_call_batch(nfnl, messages, msgcnt, NFNL_DEFAULT_TIMEOUT_USECS);
if (r < 0 && r != -EEXIST)
return r;
@@ -919,7 +919,7 @@ int nft_set_element_modify_iprange(
if (r < 0)
return r;
return sd_nfnl_call_batch(nfnl, &m, 1, NFNL_DEFAULT_TIMEOUT_USECS, NULL);
return sd_nfnl_call_batch(nfnl, &m, 1, NFNL_DEFAULT_TIMEOUT_USECS);
}
int nft_set_element_modify_ip(
@@ -959,7 +959,7 @@ int nft_set_element_modify_ip(
if (r < 0)
return r;
return sd_nfnl_call_batch(nfnl, &m, 1, NFNL_DEFAULT_TIMEOUT_USECS, NULL);
return sd_nfnl_call_batch(nfnl, &m, 1, NFNL_DEFAULT_TIMEOUT_USECS);
}
int nft_set_element_modify_any(
@@ -987,7 +987,7 @@ int nft_set_element_modify_any(
if (r < 0)
return r;
return sd_nfnl_call_batch(nfnl, &m, 1, NFNL_DEFAULT_TIMEOUT_USECS, NULL);
return sd_nfnl_call_batch(nfnl, &m, 1, NFNL_DEFAULT_TIMEOUT_USECS);
}
static int af_to_nfproto(int af) {
@@ -1124,7 +1124,7 @@ static int fw_nftables_add_local_dnat_internal(
return r;
assert(msgcnt < ELEMENTSOF(messages));
r = sd_nfnl_call_batch(nfnl, messages, msgcnt, NFNL_DEFAULT_TIMEOUT_USECS, NULL);
r = sd_nfnl_call_batch(nfnl, messages, msgcnt, NFNL_DEFAULT_TIMEOUT_USECS);
if (r == -EOVERFLOW && af == AF_INET6) {
/* The current implementation of DNAT in systemd requires kernel's
* fdb9c405e35bdc6e305b9b4e20ebc141ed14fc81 (v5.8), and the older kernel returns

View File

@@ -56,6 +56,8 @@ int sd_netlink_call_async(sd_netlink *nl, sd_netlink_slot **ret_slot, sd_netlink
int sd_netlink_call(sd_netlink *nl, sd_netlink_message *message, uint64_t timeout, sd_netlink_message **ret);
int sd_netlink_read(sd_netlink *nl, uint32_t serial, uint64_t timeout, sd_netlink_message **ret);
int sd_netlink_ignore_serial(sd_netlink *nl, uint32_t serial, uint64_t timeout_usec);
int sd_netlink_get_events(sd_netlink *nl);
int sd_netlink_get_timeout(sd_netlink *nl, uint64_t *ret);
int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret);

View File

@@ -1,8 +1,5 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <stdlib.h>
#include <unistd.h>
#include "sd-netlink.h"
#include "firewall-util.h"
@@ -79,9 +76,6 @@ TEST(v4) {
static int intro(void) {
int r;
if (getuid() != 0)
return log_tests_skipped("not root");
ASSERT_OK_ERRNO(setenv("SYSTEMD_FIREWALL_UTIL_NFT_TABLE_NAME", "io.systemd-test.nat", /* overwrite = */ true));
ASSERT_OK_ERRNO(setenv("SYSTEMD_FIREWALL_UTIL_DNAT_MAP_NAME", "test_map_port_ipport", /* overwrite = */ true));