From 8e24558e611e2ef66552b1da0b62b7ee1220e255 Mon Sep 17 00:00:00 2001 From: Tim Small Date: Sun, 27 Apr 2025 12:47:53 +0100 Subject: [PATCH 1/5] man/network: clarify SR-IOV section description and usage Document effect of the SR-IOV section in .link vs .network files and restructure the SR-IOV section introduction for clarity. --- man/systemd.link.xml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/man/systemd.link.xml b/man/systemd.link.xml index 217352f979..86507d3e3a 100644 --- a/man/systemd.link.xml +++ b/man/systemd.link.xml @@ -1269,11 +1269,18 @@ [SR-IOV] Section Options - The [SR-IOV] section accepts the following keys. Specify several [SR-IOV] sections to - configure several SR-IOVs. SR-IOV provides the ability to partition a single physical PCI resource - into virtual PCI functions which can then be injected into a VM. In the case of network VFs, SR-IOV - improves north-south network performance (that is, traffic with endpoints outside the host machine) - by allowing traffic to bypass the host machine’s network stack. + SR-IOV provides the ability to partition a single physical PCI resource into virtual PCI + functions which can then be e.g. injected into a VM. In the case of network VFs, SR-IOV reduces + latency and CPU utilisation for north-south network traffic (that is, traffic with endpoints + outside the host machine), by allowing traffic to bypass the host machine’s network stack. + + + The presence of an [SR-IOV] section in a .link file will cause the creation and + configuration of the specified virtual function. Within a .network file, the specified virtual + function will be configured, but must already exist. Specify several [SR-IOV] sections to + configure several SR-IOVs. + + The [SR-IOV] section accepts the following keys. From 45114e399921697613407236631a23613bd81681 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Mon, 28 Apr 2025 10:59:16 +0900 Subject: [PATCH 2/5] network: ignore error in configuring SR-IOV VFs The configuration can easily fail when the target virtual function does not exist, and there is nothing networkd can do in such case. Also, it is overkill to make the physical interface entered to the failed state in such case. Let's warn but ignore the failure. --- src/network/networkd-sriov.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/network/networkd-sriov.c b/src/network/networkd-sriov.c index 78d8cefe7c..b1a5830d45 100644 --- a/src/network/networkd-sriov.c +++ b/src/network/networkd-sriov.c @@ -16,11 +16,8 @@ static int sr_iov_handler(sd_netlink *rtnl, sd_netlink_message *m, Request *req, assert(link); r = sd_netlink_message_get_errno(m); - if (r < 0 && r != -EEXIST) { - log_link_message_error_errno(link, m, r, "Could not set up SR-IOV"); - link_enter_failed(link); - return 1; - } + if (r < 0) + log_link_message_warning_errno(link, m, r, "Failed to set up SR-IOV virtual function, ignoring"); if (link->sr_iov_messages == 0) { log_link_debug(link, "SR-IOV configured"); From 9cb6017e6f9b58385c6325117573a5b815986d8c Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Mon, 28 Apr 2025 11:11:52 +0900 Subject: [PATCH 3/5] network,udev: reword log messages in setting SR-IOV VFs --- src/network/networkd-sriov.c | 10 +++++----- src/udev/net/link-config.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/network/networkd-sriov.c b/src/network/networkd-sriov.c index b1a5830d45..3552d213c2 100644 --- a/src/network/networkd-sriov.c +++ b/src/network/networkd-sriov.c @@ -20,7 +20,7 @@ static int sr_iov_handler(sd_netlink *rtnl, sd_netlink_message *m, Request *req, log_link_message_warning_errno(link, m, r, "Failed to set up SR-IOV virtual function, ignoring"); if (link->sr_iov_messages == 0) { - log_link_debug(link, "SR-IOV configured"); + log_link_debug(link, "Applied settings for SR-IOV virtual functions."); link->sr_iov_configured = true; link_check_ready(link); } @@ -39,7 +39,7 @@ static int sr_iov_configure(SRIOV *sr_iov, Link *link, Request *req) { assert(link->ifindex > 0); assert(req); - log_link_debug(link, "Setting SR-IOV virtual function %"PRIu32".", sr_iov->vf); + log_link_debug(link, "Setting up SR-IOV virtual function %"PRIu32".", sr_iov->vf); r = sd_rtnl_message_new_link(link->manager->rtnl, &m, RTM_SETLINK, link->ifindex); if (r < 0) @@ -65,7 +65,7 @@ static int sr_iov_process_request(Request *req, Link *link, SRIOV *sr_iov) { r = sr_iov_configure(sr_iov, link, req); if (r < 0) return log_link_warning_errno(link, r, - "Failed to configure SR-IOV virtual function %"PRIu32": %m", + "Failed to set up SR-IOV virtual function %"PRIu32": %m", sr_iov->vf); return 1; @@ -91,7 +91,7 @@ int link_request_sr_iov_vfs(Link *link) { NULL); if (r < 0) return log_link_warning_errno(link, r, - "Failed to request SR-IOV virtual function %"PRIu32": %m", + "Failed to request to set up SR-IOV virtual function %"PRIu32": %m", sr_iov->vf); } @@ -99,7 +99,7 @@ int link_request_sr_iov_vfs(Link *link) { link->sr_iov_configured = true; link_check_ready(link); } else - log_link_debug(link, "Configuring SR-IOV"); + log_link_debug(link, "Setting up SR-IOV virtual functions."); return 0; } diff --git a/src/udev/net/link-config.c b/src/udev/net/link-config.c index 1e15bc4b8a..6c61ff743c 100644 --- a/src/udev/net/link-config.c +++ b/src/udev/net/link-config.c @@ -923,7 +923,7 @@ static int link_apply_sr_iov_config(Link *link) { r = sr_iov_configure(link, &link->event->rtnl, sr_iov); if (r < 0) log_link_warning_errno(link, r, - "Failed to configure SR-IOV virtual function %"PRIu32", ignoring: %m", + "Failed to set up SR-IOV virtual function %"PRIu32", ignoring: %m", sr_iov->vf); } From c8b3f1d47d0d21a1f18667a781c6dc106d32025d Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 30 Apr 2025 14:47:40 +0900 Subject: [PATCH 4/5] netif-sriov: align table --- src/shared/netif-sriov.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/shared/netif-sriov.h b/src/shared/netif-sriov.h index cd79980b4e..a7ebb5f3cb 100644 --- a/src/shared/netif-sriov.h +++ b/src/shared/netif-sriov.h @@ -11,9 +11,9 @@ #include "hashmap.h" typedef enum SRIOVLinkState { - SR_IOV_LINK_STATE_AUTO = IFLA_VF_LINK_STATE_AUTO, - SR_IOV_LINK_STATE_ENABLE = IFLA_VF_LINK_STATE_ENABLE, - SR_IOV_LINK_STATE_DISABLE = IFLA_VF_LINK_STATE_DISABLE, + SR_IOV_LINK_STATE_AUTO = IFLA_VF_LINK_STATE_AUTO, + SR_IOV_LINK_STATE_ENABLE = IFLA_VF_LINK_STATE_ENABLE, + SR_IOV_LINK_STATE_DISABLE = IFLA_VF_LINK_STATE_DISABLE, _SR_IOV_LINK_STATE_MAX, _SR_IOV_LINK_STATE_INVALID = -EINVAL, } SRIOVLinkState; From 371005ac98902ae93ac0f7804b45a2f2f03b0aa9 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 30 Apr 2025 15:37:28 +0900 Subject: [PATCH 5/5] network,udev: configure SR-IOV VF attribute one-by-one When a [SR-IOV] section has no setting, e.g. ```ini [SR-IOV] VirtualFunction=0 ``` then the kernel previously replied -EINVAL, as we send a rtnl message with an empty IFLA_VF_INFO container. See See do_setvfinfo() in net/core/rtnetlink.c of the kernel. When a [SR-IOV] section that has an unsupported settings by the interface driver, then previously the kernel partially applied settings and returned -EOPNOTSUPP. E.f. ```ini [SR-IOV] VirtualFunction=0 LinkState=auto Trust=true MACAddress=02:01:00:3e:61:34 ``` and the interface does not support configuring the link state, then the MAC address is assigned, but the trust is not applied: ``` enp3s0f0: Failed to configure SR-IOV virtual function 0, ignoring: Operation not supported vf 0 link/ether 02:01:00:3e:61:34 brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off ``` To fix such issues, this makes networkd/udevd send each attribute for VF one-by-one. Fixes #37257 and #37275. --- src/network/networkd-queue.c | 7 ++- src/network/networkd-queue.h | 9 +++- src/network/networkd-sriov.c | 41 ++++++++++------ src/shared/netif-sriov.c | 91 ++++++++++++++++++++++++++++-------- src/shared/netif-sriov.h | 16 ++++++- src/udev/net/link-config.c | 35 ++++++++------ 6 files changed, 148 insertions(+), 51 deletions(-) diff --git a/src/network/networkd-queue.c b/src/network/networkd-queue.c index 7b5060a049..361aa7eec2 100644 --- a/src/network/networkd-queue.c +++ b/src/network/networkd-queue.c @@ -384,7 +384,12 @@ static const char *const request_type_table[_REQUEST_TYPE_MAX] = { [REQUEST_TYPE_SET_LINK_MAC] = "MAC address", [REQUEST_TYPE_SET_LINK_MASTER] = "master interface", [REQUEST_TYPE_SET_LINK_MTU] = "MTU", - [REQUEST_TYPE_SRIOV] = "SR-IOV", + [REQUEST_TYPE_SRIOV_VF_MAC] = "SR-IOV VF MAC address", + [REQUEST_TYPE_SRIOV_VF_SPOOFCHK] = "SR-IOV VF spoof check", + [REQUEST_TYPE_SRIOV_VF_RSS_QUERY_EN] = "SR-IOV VF RSS query", + [REQUEST_TYPE_SRIOV_VF_TRUST] = "SR-IOV VF trust", + [REQUEST_TYPE_SRIOV_VF_LINK_STATE] = "SR-IOV VF link state", + [REQUEST_TYPE_SRIOV_VF_VLAN_LIST] = "SR-IOV VF vlan list", [REQUEST_TYPE_TC_QDISC] = "QDisc", [REQUEST_TYPE_TC_CLASS] = "TClass", [REQUEST_TYPE_UP_DOWN] = "bring link up or down", diff --git a/src/network/networkd-queue.h b/src/network/networkd-queue.h index 8600ffe869..00a514266d 100644 --- a/src/network/networkd-queue.h +++ b/src/network/networkd-queue.h @@ -5,6 +5,7 @@ #include "alloc-util.h" #include "hash-funcs.h" +#include "netif-sriov.h" typedef struct Link Link; typedef struct NetDev NetDev; @@ -44,7 +45,13 @@ typedef enum RequestType { REQUEST_TYPE_SET_LINK_MAC, /* Setting MAC address. */ REQUEST_TYPE_SET_LINK_MASTER, /* Setting IFLA_MASTER. */ REQUEST_TYPE_SET_LINK_MTU, /* Setting MTU. */ - REQUEST_TYPE_SRIOV, + _REQUEST_TYPE_SRIOV_BASE, + REQUEST_TYPE_SRIOV_VF_MAC = _REQUEST_TYPE_SRIOV_BASE + SR_IOV_VF_MAC, + REQUEST_TYPE_SRIOV_VF_SPOOFCHK = _REQUEST_TYPE_SRIOV_BASE + SR_IOV_VF_SPOOFCHK, + REQUEST_TYPE_SRIOV_VF_RSS_QUERY_EN = _REQUEST_TYPE_SRIOV_BASE + SR_IOV_VF_RSS_QUERY_EN, + REQUEST_TYPE_SRIOV_VF_TRUST = _REQUEST_TYPE_SRIOV_BASE + SR_IOV_VF_TRUST, + REQUEST_TYPE_SRIOV_VF_LINK_STATE = _REQUEST_TYPE_SRIOV_BASE + SR_IOV_VF_LINK_STATE, + REQUEST_TYPE_SRIOV_VF_VLAN_LIST = _REQUEST_TYPE_SRIOV_BASE + SR_IOV_VF_VLAN_LIST, REQUEST_TYPE_TC_CLASS, REQUEST_TYPE_TC_QDISC, REQUEST_TYPE_UP_DOWN, diff --git a/src/network/networkd-sriov.c b/src/network/networkd-sriov.c index 3552d213c2..44b5c6ecf3 100644 --- a/src/network/networkd-sriov.c +++ b/src/network/networkd-sriov.c @@ -39,13 +39,18 @@ static int sr_iov_configure(SRIOV *sr_iov, Link *link, Request *req) { assert(link->ifindex > 0); assert(req); - log_link_debug(link, "Setting up SR-IOV virtual function %"PRIu32".", sr_iov->vf); + SRIOVAttribute attr = req->type - _REQUEST_TYPE_SRIOV_BASE; + assert(attr >= 0); + assert(attr < _SR_IOV_ATTRIBUTE_MAX); + + log_link_debug(link, "Setting up %s for SR-IOV virtual function %"PRIu32".", + sr_iov_attribute_to_string(attr), sr_iov->vf); r = sd_rtnl_message_new_link(link->manager->rtnl, &m, RTM_SETLINK, link->ifindex); if (r < 0) return r; - r = sr_iov_set_netlink_message(sr_iov, m); + r = sr_iov_set_netlink_message(sr_iov, attr, m); if (r < 0) return r; @@ -81,18 +86,26 @@ int link_request_sr_iov_vfs(Link *link) { link->sr_iov_configured = false; ORDERED_HASHMAP_FOREACH(sr_iov, link->network->sr_iov_by_section) { - r = link_queue_request_safe(link, REQUEST_TYPE_SRIOV, - sr_iov, NULL, - sr_iov_hash_func, - sr_iov_compare_func, - sr_iov_process_request, - &link->sr_iov_messages, - sr_iov_handler, - NULL); - if (r < 0) - return log_link_warning_errno(link, r, - "Failed to request to set up SR-IOV virtual function %"PRIu32": %m", - sr_iov->vf); + for (SRIOVAttribute attr = 0; attr < _SR_IOV_ATTRIBUTE_MAX; attr++) { + if (!sr_iov_has_config(sr_iov, attr)) + continue; + + r = link_queue_request_safe( + link, + _REQUEST_TYPE_SRIOV_BASE + attr, + sr_iov, + NULL, + sr_iov_hash_func, + sr_iov_compare_func, + sr_iov_process_request, + &link->sr_iov_messages, + sr_iov_handler, + NULL); + if (r < 0) + return log_link_warning_errno(link, r, + "Failed to request to set up SR-IOV virtual function %"PRIu32": %m", + sr_iov->vf); + } } if (link->sr_iov_messages == 0) { diff --git a/src/shared/netif-sriov.c b/src/shared/netif-sriov.c index fc851689b0..4889422c6d 100644 --- a/src/shared/netif-sriov.c +++ b/src/shared/netif-sriov.c @@ -7,6 +7,7 @@ #include "parse-util.h" #include "set.h" #include "stdio-util.h" +#include "string-table.h" #include "string-util.h" static SRIOV* sr_iov_free(SRIOV *sr_iov) { @@ -107,7 +108,46 @@ DEFINE_PRIVATE_HASH_OPS( sr_iov_hash_func, sr_iov_compare_func); -int sr_iov_set_netlink_message(SRIOV *sr_iov, sd_netlink_message *req) { +static const char * const sr_iov_attribute_table[_SR_IOV_ATTRIBUTE_MAX] = { + [SR_IOV_VF_MAC] = "MAC address", + [SR_IOV_VF_SPOOFCHK] = "spoof check", + [SR_IOV_VF_RSS_QUERY_EN] = "RSS query", + [SR_IOV_VF_TRUST] = "trust", + [SR_IOV_VF_LINK_STATE] = "link state", + [SR_IOV_VF_VLAN_LIST] = "vlan list", +}; + +DEFINE_STRING_TABLE_LOOKUP_TO_STRING(sr_iov_attribute, SRIOVAttribute); + +bool sr_iov_has_config(SRIOV *sr_iov, SRIOVAttribute attr) { + assert(sr_iov); + + switch (attr) { + case SR_IOV_VF_MAC: + return !ether_addr_is_null(&sr_iov->mac); + + case SR_IOV_VF_SPOOFCHK: + return sr_iov->vf_spoof_check_setting >= 0; + + case SR_IOV_VF_RSS_QUERY_EN: + return sr_iov->query_rss >= 0; + + case SR_IOV_VF_TRUST: + return sr_iov->trust >= 0; + + case SR_IOV_VF_LINK_STATE: + return sr_iov->link_state >= 0; + + case SR_IOV_VF_VLAN_LIST: + return sr_iov->vlan > 0; + + default: + assert_not_reached(); + } +} + +int sr_iov_set_netlink_message(SRIOV *sr_iov, SRIOVAttribute attr, sd_netlink_message *req) { + int r; assert(sr_iov); @@ -121,62 +161,71 @@ int sr_iov_set_netlink_message(SRIOV *sr_iov, sd_netlink_message *req) { if (r < 0) return r; - if (!ether_addr_is_null(&sr_iov->mac)) { + switch (attr) { + case SR_IOV_VF_MAC: { + if (ether_addr_is_null(&sr_iov->mac)) + return -ENODATA; + struct ifla_vf_mac ivm = { .vf = sr_iov->vf, }; memcpy(ivm.mac, &sr_iov->mac, ETH_ALEN); r = sd_netlink_message_append_data(req, IFLA_VF_MAC, &ivm, sizeof(struct ifla_vf_mac)); - if (r < 0) - return r; + break; } + case SR_IOV_VF_SPOOFCHK: { + if (sr_iov->vf_spoof_check_setting < 0) + return -ENODATA; - if (sr_iov->vf_spoof_check_setting >= 0) { struct ifla_vf_spoofchk ivs = { .vf = sr_iov->vf, .setting = sr_iov->vf_spoof_check_setting, }; r = sd_netlink_message_append_data(req, IFLA_VF_SPOOFCHK, &ivs, sizeof(struct ifla_vf_spoofchk)); - if (r < 0) - return r; + break; } + case SR_IOV_VF_RSS_QUERY_EN: { + if (sr_iov->query_rss < 0) + return -ENODATA; - if (sr_iov->query_rss >= 0) { struct ifla_vf_rss_query_en ivs = { .vf = sr_iov->vf, .setting = sr_iov->query_rss, }; r = sd_netlink_message_append_data(req, IFLA_VF_RSS_QUERY_EN, &ivs, sizeof(struct ifla_vf_rss_query_en)); - if (r < 0) - return r; + break; } + case SR_IOV_VF_TRUST: { + if (sr_iov->trust < 0) + return -ENODATA; - if (sr_iov->trust >= 0) { struct ifla_vf_trust ivt = { .vf = sr_iov->vf, .setting = sr_iov->trust, }; r = sd_netlink_message_append_data(req, IFLA_VF_TRUST, &ivt, sizeof(struct ifla_vf_trust)); - if (r < 0) - return r; + break; } + case SR_IOV_VF_LINK_STATE: { + if (sr_iov->link_state < 0) + return -ENODATA; - if (sr_iov->link_state >= 0) { struct ifla_vf_link_state ivl = { .vf = sr_iov->vf, .link_state = sr_iov->link_state, }; r = sd_netlink_message_append_data(req, IFLA_VF_LINK_STATE, &ivl, sizeof(struct ifla_vf_link_state)); - if (r < 0) - return r; + break; } + case SR_IOV_VF_VLAN_LIST: { + if (sr_iov->vlan <= 0) + return -ENODATA; - if (sr_iov->vlan > 0) { /* Because of padding, first the buffer must be initialized with 0. */ struct ifla_vf_vlan_info ivvi = {}; ivvi.vf = sr_iov->vf; @@ -193,9 +242,13 @@ int sr_iov_set_netlink_message(SRIOV *sr_iov, sd_netlink_message *req) { return r; r = sd_netlink_message_close_container(req); - if (r < 0) - return r; + break; } + default: + assert_not_reached(); + } + if (r < 0) + return r; r = sd_netlink_message_close_container(req); if (r < 0) diff --git a/src/shared/netif-sriov.h b/src/shared/netif-sriov.h index a7ebb5f3cb..49e05fa285 100644 --- a/src/shared/netif-sriov.h +++ b/src/shared/netif-sriov.h @@ -10,6 +10,17 @@ #include "ether-addr-util.h" #include "hashmap.h" +typedef enum SRIOVAttribute { + SR_IOV_VF_MAC, + SR_IOV_VF_SPOOFCHK, + SR_IOV_VF_RSS_QUERY_EN, + SR_IOV_VF_TRUST, + SR_IOV_VF_LINK_STATE, + SR_IOV_VF_VLAN_LIST, + _SR_IOV_ATTRIBUTE_MAX, + _SR_IOV_ATTRIBUTE_INVALID = -EINVAL, +} SRIOVAttribute; + typedef enum SRIOVLinkState { SR_IOV_LINK_STATE_AUTO = IFLA_VF_LINK_STATE_AUTO, SR_IOV_LINK_STATE_ENABLE = IFLA_VF_LINK_STATE_ENABLE, @@ -33,9 +44,12 @@ typedef struct SRIOV { struct ether_addr mac; } SRIOV; +const char* sr_iov_attribute_to_string(SRIOVAttribute a) _const_; + void sr_iov_hash_func(const SRIOV *sr_iov, struct siphash *state); int sr_iov_compare_func(const SRIOV *s1, const SRIOV *s2); -int sr_iov_set_netlink_message(SRIOV *sr_iov, sd_netlink_message *req); +bool sr_iov_has_config(SRIOV *sr_iov, SRIOVAttribute attr); +int sr_iov_set_netlink_message(SRIOV *sr_iov, SRIOVAttribute attr, sd_netlink_message *req); int sr_iov_get_num_vfs(sd_device *device, uint32_t *ret); int sr_iov_set_num_vfs(sd_device *device, uint32_t num_vfs, OrderedHashmap *sr_iov_by_section); int sr_iov_drop_invalid_sections(uint32_t num_vfs, OrderedHashmap *sr_iov_by_section); diff --git a/src/udev/net/link-config.c b/src/udev/net/link-config.c index 6c61ff743c..5c67cf2dc6 100644 --- a/src/udev/net/link-config.c +++ b/src/udev/net/link-config.c @@ -855,31 +855,36 @@ static int link_generate_alternative_names(Link *link) { } static int sr_iov_configure(Link *link, sd_netlink **rtnl, SRIOV *sr_iov) { - _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL; int r; assert(link); assert(rtnl); assert(link->ifindex > 0); - if (!*rtnl) { - r = sd_netlink_open(rtnl); + for (SRIOVAttribute attr = 0; attr < _SR_IOV_ATTRIBUTE_MAX; attr++) { + if (!sr_iov_has_config(sr_iov, attr)) + continue; + + if (!*rtnl) { + r = sd_netlink_open(rtnl); + if (r < 0) + return r; + } + + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL; + r = sd_rtnl_message_new_link(*rtnl, &req, RTM_SETLINK, link->ifindex); + if (r < 0) + return r; + + r = sr_iov_set_netlink_message(sr_iov, attr, req); + if (r < 0) + return r; + + r = sd_netlink_call(*rtnl, req, 0, NULL); if (r < 0) return r; } - r = sd_rtnl_message_new_link(*rtnl, &req, RTM_SETLINK, link->ifindex); - if (r < 0) - return r; - - r = sr_iov_set_netlink_message(sr_iov, req); - if (r < 0) - return r; - - r = sd_netlink_call(*rtnl, req, 0, NULL); - if (r < 0) - return r; - return 0; }