diff --git a/man/systemd.link.xml b/man/systemd.link.xml index 3e98e4ddba..7c0a84dff8 100644 --- a/man/systemd.link.xml +++ b/man/systemd.link.xml @@ -967,6 +967,21 @@ + + ReceivePacketSteeringCPUMask= + + Configures Receive Packet Steering (RPS) list of CPUs to which RPS may forward traffic. + Takes a list of CPU indices or ranges separated by either whitespace or commas. Alternatively, + takes the special value all in which will include all available CPUs in the mask. + CPU ranges are specified by the lower and upper CPU indices separated by a dash (e.g. 2-6). + This option may be specified more than once, in which case the specified CPU affinity masks are merged. + If an empty string is assigned, the mask is reset, all assignments prior to this will have no effect. + Defaults to unset and RPS CPU list is unchanged. To disable RPS when it was previously enabled, use the + special value disable. + + + + ReceiveVLANCTAGHardwareAcceleration= diff --git a/src/shared/cpu-set-util.c b/src/shared/cpu-set-util.c index d096576cd6..1112de1333 100644 --- a/src/shared/cpu-set-util.c +++ b/src/shared/cpu-set-util.c @@ -11,6 +11,7 @@ #include "errno-util.h" #include "extract-word.h" #include "fd-util.h" +#include "hexdecoct.h" #include "log.h" #include "macro.h" #include "memory-util.h" @@ -82,6 +83,63 @@ char *cpu_set_to_range_string(const CPUSet *set) { return TAKE_PTR(str) ?: strdup(""); } +char* cpu_set_to_mask_string(const CPUSet *a) { + _cleanup_free_ char *str = NULL; + size_t len = 0; + bool found_nonzero = false; + + assert(a); + + /* Return CPU set in hexadecimal bitmap mask, e.g. + * CPU 0 -> "1" + * CPU 1 -> "2" + * CPU 0,1 -> "3" + * CPU 0-3 -> "f" + * CPU 0-7 -> "ff" + * CPU 4-7 -> "f0" + * CPU 7 -> "80" + * None -> "0" + * + * When there are more than 32 CPUs, separate every 32 CPUs by comma, e.g. + * CPU 0-47 -> "ffff,ffffffff" + * CPU 0-63 -> "ffffffff,ffffffff" + * CPU 0-71 -> "ff,ffffffff,ffffffff" */ + + for (ssize_t i = a->allocated * 8; i >= 0; i -= 4) { + uint8_t m = 0; + + for (size_t j = 0; j < 4; j++) + if (CPU_ISSET_S(i + j, a->allocated, a->set)) + m |= 1U << j; + + if (!found_nonzero) + found_nonzero = m > 0; + + if (!found_nonzero && m == 0) + /* Skip leading zeros */ + continue; + + if (!GREEDY_REALLOC(str, len + 3)) + return NULL; + + str[len++] = hexchar(m); + if (i >= 4 && i % 32 == 0) + /* Separate by comma for each 32 CPUs. */ + str[len++] = ','; + str[len] = 0; + } + + return TAKE_PTR(str) ?: strdup("0"); +} + +CPUSet* cpu_set_free(CPUSet *c) { + if (!c) + return c; + + cpu_set_reset(c); + return mfree(c); +} + int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus) { size_t need; @@ -290,3 +348,22 @@ int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set) { *set = TAKE_STRUCT(s); return 0; } + +int cpu_mask_add_all(CPUSet *mask) { + long m; + int r; + + assert(mask); + + m = sysconf(_SC_NPROCESSORS_ONLN); + if (m < 0) + return -errno; + + for (unsigned i = 0; i < (unsigned) m; i++) { + r = cpu_set_add(mask, i); + if (r < 0) + return r; + } + + return 0; +} diff --git a/src/shared/cpu-set-util.h b/src/shared/cpu-set-util.h index 3c63a58826..618fe1b0a3 100644 --- a/src/shared/cpu-set-util.h +++ b/src/shared/cpu-set-util.h @@ -19,11 +19,15 @@ static inline void cpu_set_reset(CPUSet *a) { *a = (CPUSet) {}; } +CPUSet* cpu_set_free(CPUSet *c); +DEFINE_TRIVIAL_CLEANUP_FUNC(CPUSet*, cpu_set_free); + int cpu_set_add_all(CPUSet *a, const CPUSet *b); int cpu_set_add(CPUSet *a, unsigned cpu); char* cpu_set_to_string(const CPUSet *a); char *cpu_set_to_range_string(const CPUSet *a); +char* cpu_set_to_mask_string(const CPUSet *a); int cpu_set_realloc(CPUSet *cpu_set, unsigned ncpus); int parse_cpu_set_full( @@ -50,3 +54,4 @@ int cpu_set_to_dbus(const CPUSet *set, uint8_t **ret, size_t *allocated); int cpu_set_from_dbus(const uint8_t *bits, size_t size, CPUSet *set); int cpus_in_affinity_mask(void); +int cpu_mask_add_all(CPUSet *mask); diff --git a/src/test/test-cpu-set-util.c b/src/test/test-cpu-set-util.c index a0660f579e..0c2304e2c0 100644 --- a/src/test/test-cpu-set-util.c +++ b/src/test/test-cpu-set-util.c @@ -25,6 +25,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "1")); + str = mfree(str); cpu_set_reset(&c); /* Simple range (from CPUAffinity example) */ @@ -43,6 +47,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "1-2 4")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "16")); + str = mfree(str); cpu_set_reset(&c); /* A more interesting range */ @@ -61,6 +69,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-3 8-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f0f")); + str = mfree(str); cpu_set_reset(&c); /* Quoted strings */ @@ -76,6 +88,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "8-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f00")); + str = mfree(str); cpu_set_reset(&c); /* Use commas as separators */ @@ -106,6 +122,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-7 63")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "80000000,000000ff")); + str = mfree(str); cpu_set_reset(&c); /* Ranges */ @@ -120,6 +140,28 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_string: %s", str); str = mfree(str); cpu_set_reset(&c); + assert_se(parse_cpu_set_full("36-39,44-47", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); + assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8)); + assert_se(CPU_COUNT_S(c.allocated, c.set) == 8); + for (cpu = 36; cpu < 40; cpu++) + assert_se(CPU_ISSET_S(cpu, c.allocated, c.set)); + for (cpu = 44; cpu < 48; cpu++) + assert_se(CPU_ISSET_S(cpu, c.allocated, c.set)); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f0f0,00000000")); + str = mfree(str); + cpu_set_reset(&c); + assert_se(parse_cpu_set_full("64-71", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); + assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8)); + assert_se(CPU_COUNT_S(c.allocated, c.set) == 8); + for (cpu = 64; cpu < 72; cpu++) + assert_se(CPU_ISSET_S(cpu, c.allocated, c.set)); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "ff,00000000,00000000")); + str = mfree(str); + cpu_set_reset(&c); /* Ranges with trailing comma, space */ assert_se(parse_cpu_set_full("0-3 8-11, ", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); @@ -136,12 +178,20 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-3 8-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "f0f")); + str = mfree(str); cpu_set_reset(&c); /* Negative range (returns empty cpu_set) */ assert_se(parse_cpu_set_full("3-0", &c, true, NULL, "fake", 1, "CPUAffinity") >= 0); assert_se(c.allocated >= DIV_ROUND_UP(sizeof(__cpu_mask), 8)); assert_se(CPU_COUNT_S(c.allocated, c.set) == 0); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "0")); + str = mfree(str); cpu_set_reset(&c); /* Overlapping ranges */ @@ -157,6 +207,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "fff")); + str = mfree(str); cpu_set_reset(&c); /* Mix ranges and individual CPUs */ @@ -174,6 +228,10 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "0 2 4-11")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "ff5")); + str = mfree(str); cpu_set_reset(&c); /* Garbage */ @@ -190,6 +248,10 @@ TEST(parse_cpu_set) { assert_se(parse_cpu_set_full("", &c, true, NULL, "fake", 1, "CPUAffinity") == 0); assert_se(!c.set); /* empty string returns NULL */ assert_se(c.allocated == 0); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + assert_se(streq(str, "0")); + str = mfree(str); /* Runaway quoted string */ assert_se(parse_cpu_set_full("0 1 2 3 \"4 5 6 7 ", &c, true, NULL, "fake", 1, "CPUAffinity") == -EINVAL); @@ -206,6 +268,23 @@ TEST(parse_cpu_set) { log_info("cpu_set_to_range_string: %s", str); assert_se(streq(str, "8000-8191")); str = mfree(str); + assert_se(str = cpu_set_to_mask_string(&c)); + log_info("cpu_set_to_mask_string: %s", str); + for (size_t i = 0; i < strlen(str); i++) { + if (i < 54) { + if (i >= 8 && (i + 1) % 9 == 0) + assert_se(str[i] == ','); + else + assert_se(str[i] == 'f'); + } + else { + if (i >= 8 && (i + 1) % 9 == 0) + assert_se(str[i] == ','); + else + assert_se(str[i] == '0'); + } + } + str = mfree(str); cpu_set_reset(&c); } diff --git a/src/udev/net/link-config-gperf.gperf b/src/udev/net/link-config-gperf.gperf index 42d7cc7ee2..b77759d094 100644 --- a/src/udev/net/link-config-gperf.gperf +++ b/src/udev/net/link-config-gperf.gperf @@ -108,6 +108,7 @@ Link.RxMaxCoalescedHighFrames, config_parse_coalesce_u32, Link.TxCoalesceHighSec, config_parse_coalesce_sec, 0, offsetof(LinkConfig, coalesce.tx_coalesce_usecs_high) Link.TxMaxCoalescedHighFrames, config_parse_coalesce_u32, 0, offsetof(LinkConfig, coalesce.tx_max_coalesced_frames_high) Link.CoalescePacketRateSampleIntervalSec, config_parse_coalesce_sec, 0, offsetof(LinkConfig, coalesce.rate_sample_interval) +Link.ReceivePacketSteeringCPUMask, config_parse_rps_cpu_mask, 0, offsetof(LinkConfig, rps_cpu_mask) Link.MDI, config_parse_mdi, 0, offsetof(LinkConfig, mdi) Link.SR-IOVVirtualFunctions, config_parse_sr_iov_num_vfs, 0, offsetof(LinkConfig, sr_iov_num_vfs) SR-IOV.VirtualFunction, config_parse_sr_iov_uint32, 0, offsetof(LinkConfig, sr_iov_by_section) diff --git a/src/udev/net/link-config.c b/src/udev/net/link-config.c index a8b2cc23a2..8eee527bbf 100644 --- a/src/udev/net/link-config.c +++ b/src/udev/net/link-config.c @@ -73,6 +73,7 @@ static LinkConfig* link_config_free(LinkConfig *config) { free(config->alias); free(config->wol_password_file); erase_and_free(config->wol_password); + cpu_set_free(config->rps_cpu_mask); ordered_hashmap_free_with_destructor(config->sr_iov_by_section, sr_iov_free); @@ -937,6 +938,49 @@ static int link_apply_sr_iov_config(Link *link, sd_netlink **rtnl) { return 0; } +static int link_apply_rps_cpu_mask(Link *link) { + _cleanup_free_ char *mask_str = NULL; + LinkConfig *config; + int r; + + assert(link); + config = ASSERT_PTR(link->config); + + /* Skip if the config is not specified. */ + if (!config->rps_cpu_mask) + return 0; + + mask_str = cpu_set_to_mask_string(config->rps_cpu_mask); + if (!mask_str) + return log_oom(); + + log_link_debug(link, "Applying RPS CPU mask: %s", mask_str); + + /* Currently, this will set CPU mask to all rx queue of matched device. */ + FOREACH_DEVICE_SYSATTR(link->device, attr) { + const char *c; + + c = path_startswith(attr, "queues/"); + if (!c) + continue; + + c = startswith(c, "rx-"); + if (!c) + continue; + + c += strcspn(c, "/"); + + if (!path_equal(c, "/rps_cpus")) + continue; + + r = sd_device_set_sysattr_value(link->device, attr, mask_str); + if (r < 0) + log_link_warning_errno(link, r, "Failed to write %s sysfs attribute, ignoring: %m", attr); + } + + return 0; +} + static int link_apply_udev_properties(Link *link, bool test) { LinkConfig *config; sd_device *device; @@ -1024,6 +1068,10 @@ int link_apply_config(LinkConfigContext *ctx, sd_netlink **rtnl, Link *link, boo if (r < 0) return r; + r = link_apply_rps_cpu_mask(link); + if (r < 0) + return r; + return 0; } @@ -1314,6 +1362,65 @@ int config_parse_wol_password( return 0; } +int config_parse_rps_cpu_mask( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_(cpu_set_freep) CPUSet *allocated = NULL; + CPUSet *mask, **rps_cpu_mask = ASSERT_PTR(data); + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + *rps_cpu_mask = cpu_set_free(*rps_cpu_mask); + return 0; + } + + if (*rps_cpu_mask) + mask = *rps_cpu_mask; + else { + allocated = new0(CPUSet, 1); + if (!allocated) + return log_oom(); + + mask = allocated; + } + + if (streq(rvalue, "disable")) { + cpu_set_reset(mask); + return 0; + } + + if (streq(rvalue, "all")) { + r = cpu_mask_add_all(mask); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to create CPU affinity mask representing \"all\" cpus, ignoring: %m"); + return 0; + } + } else { + r = parse_cpu_set_extend(rvalue, mask, /* warn= */ true, unit, filename, line, lvalue); + if (r < 0) + return 0; + } + + if (allocated) + *rps_cpu_mask = TAKE_PTR(allocated); + + return 0; +} + static const char* const mac_address_policy_table[_MAC_ADDRESS_POLICY_MAX] = { [MAC_ADDRESS_POLICY_PERSISTENT] = "persistent", [MAC_ADDRESS_POLICY_RANDOM] = "random", diff --git a/src/udev/net/link-config.h b/src/udev/net/link-config.h index 98cadc212e..f6abff89e8 100644 --- a/src/udev/net/link-config.h +++ b/src/udev/net/link-config.h @@ -6,6 +6,7 @@ #include "condition.h" #include "conf-parser.h" +#include "cpu-set-util.h" #include "ethtool-util.h" #include "hashmap.h" #include "list.h" @@ -84,6 +85,7 @@ struct LinkConfig { int autoneg_flow_control; netdev_coalesce_param coalesce; uint8_t mdi; + CPUSet *rps_cpu_mask; uint32_t sr_iov_num_vfs; OrderedHashmap *sr_iov_by_section; @@ -121,3 +123,4 @@ CONFIG_PARSER_PROTOTYPE(config_parse_wol_password); CONFIG_PARSER_PROTOTYPE(config_parse_mac_address_policy); CONFIG_PARSER_PROTOTYPE(config_parse_name_policy); CONFIG_PARSER_PROTOTYPE(config_parse_alternative_names_policy); +CONFIG_PARSER_PROTOTYPE(config_parse_rps_cpu_mask);