From 239c9a2e5fd9cd6d0163886fa8cd667cfd946432 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Thu, 31 Jul 2025 20:18:54 +0900 Subject: [PATCH 1/5] udev/node: split out link_search_and_update() and reduce indentation No functional change, just refactoring and preparation for later change. --- src/udev/udev-node.c | 110 +++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 52 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index 75e8b09e89..1266b7afe5 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -213,6 +213,24 @@ static int stack_directory_find_prioritized_devnode(sd_device *dev, int dirfd, b return !!*ret; } +static int link_search_and_update(sd_device *dev, const char *slink, int dirfd, bool add) { + int r; + + assert(dev); + assert(slink); + assert(dirfd >= 0); + + _cleanup_free_ char *devnode = NULL; + r = stack_directory_find_prioritized_devnode(dev, dirfd, add, &devnode); + if (r < 0) + return log_device_debug_errno(dev, r, "Failed to determine device node with the highest priority for '%s': %m", slink); + if (r > 0) + return node_create_symlink(dev, devnode, slink); + + log_device_debug(dev, "No reference left for '%s', removing", slink); + return node_remove_symlink(dev, slink); +} + static int stack_directory_update(sd_device *dev, int fd, bool add) { const char *id; int r; @@ -420,7 +438,7 @@ static int link_update(sd_device *dev, const char *slink, bool add) { _cleanup_(release_lock_file) LockFile lockfile = LOCK_FILE_INIT; /* #3 */ _cleanup_(rmdir_and_freep) char *dirpath = NULL; /* #2 */ _cleanup_close_ int dirfd = -EBADF; /* #1 */ - _cleanup_free_ char *current_id = NULL, *devnode = NULL; + _cleanup_free_ char *current_id = NULL; int r, current_prio; assert(dev); @@ -438,64 +456,52 @@ static int link_update(sd_device *dev, const char *slink, bool add) { if (r < 0) return log_device_debug_errno(dev, r, "Failed to update stack directory for '%s': %m", slink); - if (current_id) { - const char *id; - - r = sd_device_get_device_id(dev, &id); - if (r < 0) - return log_device_debug_errno(dev, r, "Failed to get device id: %m"); - - if (add) { - int prio; - - r = device_get_devlink_priority(dev, &prio); - if (r < 0) - return log_device_debug_errno(dev, r, "Failed to get devlink priority: %m"); - - if (streq(current_id, id)) { - if (current_prio <= prio) - /* The devlink is ours and already exists, and the new priority is - * equal or higher than the previous. Hence, it is not necessary to - * recreate it. */ - return 0; - - /* The devlink priority is downgraded. Another device may have a higher - * priority now. Let's find the device node with the highest priority. */ - } else { - if (current_prio > prio) - /* The devlink with a higher priority already exists and is owned by - * another device. Hence, it is not necessary to recreate it. */ - return 0; - - /* This device has the equal or a higher priority than the current. Let's - * create the devlink to our device node. */ - return node_create_symlink(dev, /* devnode = */ NULL, slink); - } - - } else { - if (!streq(current_id, id)) - /* The devlink already exists and is owned by another device. Hence, it is - * not necessary to recreate it. */ - return 0; - - /* The current devlink is ours, and the target device will be removed. Hence, we need - * to search the device that has the highest priority. and update the devlink. */ - } - } else { + if (!current_id) /* The requested devlink does not exist, or the target device does not exist and the devlink * points to a non-existing device. Let's search the device that has the highest priority, * and update the devlink. */ - ; + return link_search_and_update(dev, slink, dirfd, add); + + const char *id; + r = sd_device_get_device_id(dev, &id); + if (r < 0) + return log_device_debug_errno(dev, r, "Failed to get device id: %m"); + + if (!add) { + if (!streq(current_id, id)) + /* The devlink already exists and is owned by another device. Hence, it is + * not necessary to recreate it. */ + return 0; + + /* The current devlink is ours, and the target device will be removed. Hence, we need + * to search the device that has the highest priority. and update the devlink. */ + return link_search_and_update(dev, slink, dirfd, add); } - r = stack_directory_find_prioritized_devnode(dev, dirfd, add, &devnode); + int prio; + r = device_get_devlink_priority(dev, &prio); if (r < 0) - return log_device_debug_errno(dev, r, "Failed to determine device node with the highest priority for '%s': %m", slink); - if (r > 0) - return node_create_symlink(dev, devnode, slink); + return log_device_debug_errno(dev, r, "Failed to get devlink priority: %m"); - log_device_debug(dev, "No reference left for '%s', removing", slink); - return node_remove_symlink(dev, slink); + if (streq(current_id, id)) { + if (current_prio <= prio) + /* The devlink is ours and already exists, and the new priority is equal or higher + * than the previous. Hence, it is not necessary to recreate it. */ + return 0; + + /* The devlink priority is downgraded. Another device may have a higher priority now. Let's + * find the device node with the highest priority. */ + return link_search_and_update(dev, slink, dirfd, add); + } + + if (current_prio > prio) + /* The devlink with a higher priority already exists and is owned by another device. Hence, + * it is not necessary to recreate it. */ + return 0; + + /* This device has the equal or a higher priority than the current. Let's create the devlink to our + * device node. */ + return node_create_symlink(dev, /* devnode = */ NULL, slink); } static int device_get_devpath_by_devnum(sd_device *dev, char **ret) { From 453e1375d0606a212433e137c7f204279a8ecc35 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 1 Aug 2025 02:06:08 +0900 Subject: [PATCH 2/5] udev/node: check the target device node of devlink on removal If the removal of the devlink is requested due to this is a 'remove' event, it is trivial that the devlink will not be owned by this device anymore. Let's read the devlink and if it points to our device node, then we need to update the devlink. If it points to another device node, then it is already owned by another device, hence we should not touch it and keep it as is. Fixes #37823. --- src/udev/udev-node.c | 72 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/src/udev/udev-node.c b/src/udev/udev-node.c index 1266b7afe5..412775b38b 100644 --- a/src/udev/udev-node.c +++ b/src/udev/udev-node.c @@ -448,14 +448,69 @@ static int link_update(sd_device *dev, const char *slink, bool add) { if (r < 0) return r; - r = node_get_current(slink, dirfd, ¤t_id, add ? ¤t_prio : NULL); - if (r < 0 && !ERRNO_IS_DEVICE_ABSENT_OR_EMPTY(r)) - return log_device_debug_errno(dev, r, "Failed to get the current device node priority for '%s': %m", slink); + if (add) { + r = node_get_current(slink, dirfd, ¤t_id, ¤t_prio); + if (r < 0 && !ERRNO_IS_DEVICE_ABSENT_OR_EMPTY(r)) + return log_device_debug_errno(dev, r, "Failed to get the current device node priority for '%s': %m", slink); + } r = stack_directory_update(dev, dirfd, add); if (r < 0) return log_device_debug_errno(dev, r, "Failed to update stack directory for '%s': %m", slink); + if (!add) { + _cleanup_free_ char *target = NULL; + + /* Especially on 'remove' event, the device ID obtained by node_get_current() may not be + * reliable, as the device node and/or device number may be reused. Hence, let's read the + * devlink here and if it points to our device node, then we need to update the devlink. If + * it points to another device node, then it is already owned by another device, hence we + * should not touch it and keep it as is. */ + + r = readlink_malloc(slink, &target); + if (r < 0) { + if (r != -ENOENT) + log_device_debug_errno(dev, r, "Failed to read symbolic link '%s', ignoring: %m", slink); + + /* The devlink does not exist. Let's find the most suitable owner, and create the + * devlink. This is typically not necessary and does nothing, but for safety in the + * case that the devlink is removed manually. */ + return link_search_and_update(dev, slink, dirfd, add); + } + + const char *node; + r = sd_device_get_devname(dev, &node); + if (r < 0) + return log_device_debug_errno(dev, r, "Failed to get device node: %m"); + + if (streq(node, target)) /* owned by us, needs to update. */ + return link_search_and_update(dev, slink, dirfd, add); + + /* The devlink does not point to our device node. For extra safety, let's validate the + * current devlink, in case that the devlink is manually modified by user. */ + + if (!path_startswith(target, "/dev/")) { + log_device_debug(dev, "Symbolic link '%s' points to '%s' which is outside of '/dev/', updating it.", slink, target); + return link_search_and_update(dev, slink, dirfd, add); + } + + struct stat st; + if (lstat(target, &st) < 0) { + if (errno != ENOENT) + log_device_debug_errno(dev, errno, "Failed to stat '%s', ignoring: %m", target); + + /* The current target device is also already removed? Let's update. */ + return link_search_and_update(dev, slink, dirfd, add); + } + + if (!IN_SET(st.st_mode & S_IFMT, S_IFBLK, S_IFCHR)) { + log_device_debug(dev, "Symbolic link '%s' points to '%s' which is not a device node, updating it.", slink, target); + return link_search_and_update(dev, slink, dirfd, add); + } + + return 0; /* the devlink is owned by another device, and we should keep it as is. */ + } + if (!current_id) /* The requested devlink does not exist, or the target device does not exist and the devlink * points to a non-existing device. Let's search the device that has the highest priority, @@ -467,17 +522,6 @@ static int link_update(sd_device *dev, const char *slink, bool add) { if (r < 0) return log_device_debug_errno(dev, r, "Failed to get device id: %m"); - if (!add) { - if (!streq(current_id, id)) - /* The devlink already exists and is owned by another device. Hence, it is - * not necessary to recreate it. */ - return 0; - - /* The current devlink is ours, and the target device will be removed. Hence, we need - * to search the device that has the highest priority. and update the devlink. */ - return link_search_and_update(dev, slink, dirfd, add); - } - int prio; r = device_get_devlink_priority(dev, &prio); if (r < 0) From 453cbbe47b4e268f239f75d7c19f2ddef495bd81 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 1 Aug 2025 03:35:55 +0900 Subject: [PATCH 3/5] TEST-64-UDEV-STORAGE: several fixlets for check_device_units() To suppress the following warnings in case check_device_unit() failed e.g. when the device is already removed: ``` sed: couldn't write 130 items to stdout: Broken pipe awk: write failure (Broken pipe) awk: close failed on file "/dev/stdout" (Broken pipe) ``` --- test/units/TEST-64-UDEV-STORAGE.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/units/TEST-64-UDEV-STORAGE.sh b/test/units/TEST-64-UDEV-STORAGE.sh index af93532625..d489159eb7 100755 --- a/test/units/TEST-64-UDEV-STORAGE.sh +++ b/test/units/TEST-64-UDEV-STORAGE.sh @@ -117,7 +117,7 @@ check_device_unit() {( fi done - read -r -a links < <(udevadm info "$syspath" | sed -ne '/SYSTEMD_ALIAS=/ { s/^E: SYSTEMD_ALIAS=//; p }' 2>/dev/null) + read -r -a links < <(udevadm info -q property --property SYSTEMD_ALIAS --value "$syspath" 2>/dev/null) for link in "${links[@]}"; do if [[ "$link" == "$path" ]]; then # SYSTEMD_ALIAS= are absolute return 0 @@ -131,7 +131,7 @@ check_device_unit() {( check_device_units() {( set +x - local log_level path paths + local log_level path paths unit units log_level="${1?}" shift @@ -143,12 +143,13 @@ check_device_units() {( fi done - while read -r unit _; do + read -r -a units < <(systemctl list-units --all --type=device --no-legend dev-* | awk '$1 !~ /dev-tty.+/ && $4 == "plugged" { print $1 }' | sed -e 's/\.device$//') + for unit in "${units[@]}"; do path=$(systemd-escape --path --unescape "$unit") if ! check_device_unit "$log_level" "$path"; then return 1 fi - done < <(systemctl list-units --all --type=device --no-legend dev-* | awk '$1 !~ /dev-tty.+/ && $4 == "plugged" { print $1 }' | sed -e 's/\.device$//') + done return 0 )} From 35e4cfa83db1348157f0dd33a3cfc880c369b932 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 30 Jul 2025 21:43:06 +0900 Subject: [PATCH 4/5] TEST-64-UDEV-STORAGE: several cleanups - drop unused variables, - adjust number of partitions, interations, and timeout, - clear partitions on each test case finished, - check if unnecessary devlinks are removed, - several coding style cleanups. --- test/units/TEST-64-UDEV-STORAGE.sh | 95 ++++++++++++++++++------------ 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/test/units/TEST-64-UDEV-STORAGE.sh b/test/units/TEST-64-UDEV-STORAGE.sh index d489159eb7..856f44595d 100755 --- a/test/units/TEST-64-UDEV-STORAGE.sh +++ b/test/units/TEST-64-UDEV-STORAGE.sh @@ -381,9 +381,8 @@ EOF } testcase_simultaneous_events_1() { - local disk expected i iterations key link num_part part partscript rule target timeout - local -a devices symlinks - local -A running + local disk expected i iterations link num_part part partscript rule target timeout + local -a devices symlinks running if [[ -v ASAN_OPTIONS || "$(systemd-detect-virt -v)" == "qemu" ]]; then num_part=2 @@ -438,18 +437,20 @@ EOF # On unpatched udev versions the delete-recreate cycle may trigger a race # leading to dead symlinks in /dev/disk/ for ((i = 1; i <= iterations; i++)); do + running=() for disk in {0..9}; do if ((disk % 2 == i % 2)); then udevadm lock --timeout=30 --device="${devices[$disk]}" sfdisk -q --delete "${devices[$disk]}" & else udevadm lock --timeout=30 --device="${devices[$disk]}" sfdisk -q -X gpt "${devices[$disk]}" <"$partscript" & fi - running[$disk]=$! + + # shellcheck disable=SC2190 + running+=( "$!" ) done - for key in "${!running[@]}"; do - wait "${running[$key]}" - unset "running[$key]" + for j in "${running[@]}"; do + wait "$j" done if ((i % 10 <= 1)); then @@ -473,28 +474,36 @@ EOF done helper_check_device_units - rm -f "$rule" "$partscript" + # Cleanup and check if unnecessary devlinks are removed. + for disk in {0..9}; do + udevadm lock --timeout="$timeout" --device="${devices[$disk]}" sfdisk -q --delete "${devices[$disk]}" || : + done + udevadm settle --timeout="$timeout" + for ((part = 1; part <= num_part; part++)); do + udevadm wait --timeout=10 --removed "/dev/disk/by-partlabel/test${part}" + done + + rm -f "$rule" "$partscript" udevadm control --reload } testcase_simultaneous_events_2() { - local disk expected i iterations key link num_part part script_dir target timeout - local -a devices symlinks - local -A running + local disk i iterations link num_part part script_dir target timeout + local -a devices running script_dir="$(mktemp --directory "/tmp/test-udev-storage.script.XXXXXXXXXX")" # shellcheck disable=SC2064 trap "rm -rf '$script_dir'" RETURN if [[ -v ASAN_OPTIONS || "$(systemd-detect-virt -v)" == "qemu" ]]; then - num_part=20 - iterations=1 - timeout=2400 - else - num_part=100 - iterations=3 + num_part=10 + iterations=2 timeout=300 + else + num_part=40 + iterations=5 + timeout=200 fi for disk in {0..9}; do @@ -514,32 +523,40 @@ $(for ((part = 1; part <= num_part; part++)); do printf 'name="testlabel-%d", si EOF done + ls -l /dev/disk/by-partlabel + echo "## $iterations iterations start: $(date '+%H:%M:%S.%N')" - for ((i = 1; i <= iterations; i++)); do + running=() + for disk in "${devices[@]}"; do + udevadm lock --timeout=30 --device="$disk" \ + bash -c "for ((i = 1; i <= $iterations; i++)); do sfdisk -q --delete $disk; sfdisk -q -X gpt $disk <$script_dir/partscript-\$i; done" & - for disk in {0..9}; do - udevadm lock --timeout="$timeout" --device="${devices[$disk]}" sfdisk -q --delete "${devices[$disk]}" & - running[$disk]=$! - done - - for key in "${!running[@]}"; do - wait "${running[$key]}" - unset "running[$key]" - done - - for disk in {0..9}; do - udevadm lock --timeout="$timeout" --device="${devices[$disk]}" sfdisk -q -X gpt "${devices[$disk]}" <"$script_dir/partscript-$i" & - running[$disk]=$! - done - - for key in "${!running[@]}"; do - wait "${running[$key]}" - unset "running[$key]" - done - - udevadm wait --settle --timeout="$timeout" "${devices[@]}" "/dev/disk/by-partlabel/testlabel-$i" + # shellcheck disable=SC2190 + running+=( "$!" ) done + + for i in "${running[@]}"; do + wait "$i" + done + + udevadm settle --timeout="$timeout" echo "## $iterations iterations end: $(date '+%H:%M:%S.%N')" + + ls -l /dev/disk/by-partlabel + + # Check if unnecessary devlinks are removed. + for ((i = 1; i < iterations; i++)); do + udevadm wait --timeout=10 --removed "/dev/disk/by-partlabel/testlabel-$i" + done + + helper_check_device_units + + # Cleanup + for disk in "${devices[@]}"; do + udevadm lock --timeout=30 --device="$disk" sfdisk -q --delete "$disk" + done + udevadm settle --timeout="$timeout" + udevadm wait --timeout=10 --removed "/dev/disk/by-partlabel/testlabel-$iterations" } testcase_simultaneous_events() { From fdb86005755ab0e6764696e1c862ec25bf1bca60 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 30 Jul 2025 21:44:04 +0900 Subject: [PATCH 5/5] TEST-64-UDEV-STORAGE: add stress tests for creating/removing device node symlinks For issue #37823. --- test/units/TEST-64-UDEV-STORAGE.sh | 52 ++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/units/TEST-64-UDEV-STORAGE.sh b/test/units/TEST-64-UDEV-STORAGE.sh index 856f44595d..b9769b0ff1 100755 --- a/test/units/TEST-64-UDEV-STORAGE.sh +++ b/test/units/TEST-64-UDEV-STORAGE.sh @@ -559,9 +559,61 @@ EOF udevadm wait --timeout=10 --removed "/dev/disk/by-partlabel/testlabel-$iterations" } +testcase_simultaneous_events_3() { + local device i iterations link num_part part script_dir target timeout + + # for issue #37823 + + script_dir="$(mktemp --directory "/tmp/test-udev-storage.script.XXXXXXXXXX")" + # shellcheck disable=SC2064 + trap "rm -rf '$script_dir'" RETURN + + num_part=5 + iterations=30 + if [[ -v ASAN_OPTIONS || "$(systemd-detect-virt -v)" == "qemu" ]]; then + timeout=120 + else + timeout=60 + fi + + link="/dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_deadbeeftest0" + device="$(readlink -f "$link")" + if [[ ! -b "$device" ]]; then + echo "ERROR: failed to find the test SCSI block device $link" + return 1 + fi + + for ((i = 1; i <= iterations; i++)); do + cat >"$script_dir/partscript-$i" <