From 6056663a14863da81ef6dad3258e514382c31adc Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 8 Oct 2024 11:51:48 +0200
Subject: [PATCH 1/2] fd-util: introduce fd_validate() helper

It just uses F_GETFD to validate an fd. it's a bit easier to read
though, and handles the < 0 case internally.
---
 src/basic/fd-util.c                  | 10 ++++++++++
 src/basic/fd-util.h                  |  1 +
 src/libsystemd/sd-daemon/sd-daemon.c |  5 ++++-
 src/test/test-fd-util.c              | 23 ++++++++++++++++++-----
 src/test/test-fdset.c                | 25 +++++++++----------------
 5 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
index 88b76c51d0..3f8c5b92d3 100644
--- a/src/basic/fd-util.c
+++ b/src/basic/fd-util.c
@@ -511,6 +511,16 @@ int pack_fds(int fds[], size_t n_fds) {
         return 0;
 }
 
+int fd_validate(int fd) {
+        if (fd < 0)
+                return -EBADF;
+
+        if (fcntl(fd, F_GETFD) < 0)
+                return -errno;
+
+        return 0;
+}
+
 int same_fd(int a, int b) {
         struct stat sta, stb;
         pid_t pid;
diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h
index 3a56d2cbbf..93b254c680 100644
--- a/src/basic/fd-util.h
+++ b/src/basic/fd-util.h
@@ -80,6 +80,7 @@ int close_all_fds_without_malloc(const int except[], size_t n_except);
 
 int pack_fds(int fds[], size_t n);
 
+int fd_validate(int fd);
 int same_fd(int a, int b);
 
 void cmsg_close_all(struct msghdr *mh);
diff --git a/src/libsystemd/sd-daemon/sd-daemon.c b/src/libsystemd/sd-daemon/sd-daemon.c
index b5469b5d98..1e28a45fde 100644
--- a/src/libsystemd/sd-daemon/sd-daemon.c
+++ b/src/libsystemd/sd-daemon/sd-daemon.c
@@ -400,9 +400,12 @@ _public_ int sd_is_socket_unix(int fd, int type, int listening, const char *path
 
 _public_ int sd_is_mq(int fd, const char *path) {
         struct mq_attr attr;
+        int r;
 
         /* Check that the fd is valid */
-        assert_return(fcntl(fd, F_GETFD) >= 0, -errno);
+        r = fd_validate(fd);
+        if (r < 0)
+                return r;
 
         if (mq_getattr(fd, &attr) < 0) {
                 if (errno == EBADF)
diff --git a/src/test/test-fd-util.c b/src/test/test-fd-util.c
index f2b65d492a..e49a5dde45 100644
--- a/src/test/test-fd-util.c
+++ b/src/test/test-fd-util.c
@@ -40,9 +40,9 @@ TEST(close_many) {
 
         close_many(fds, 2);
 
-        assert_se(fcntl(fds[0], F_GETFD) == -1);
-        assert_se(fcntl(fds[1], F_GETFD) == -1);
-        assert_se(fcntl(fds[2], F_GETFD) >= 0);
+        assert_se(fd_validate(fds[0]) == -EBADF);
+        assert_se(fd_validate(fds[1]) == -EBADF);
+        assert_se(fd_validate(fds[2]) >= 0);
 
         safe_close(fds[2]);
 }
@@ -57,6 +57,19 @@ TEST(close_nointr) {
         assert_se(close_nointr(fd) < 0);
 }
 
+TEST(fd_validate) {
+        assert_se(fd_validate(-EINVAL) == -EBADF);
+        assert_se(fd_validate(-EBADF) == -EBADF);
+
+        _cleanup_close_ int b = -EBADF;
+        assert_se((b = open("/dev/null", O_RDONLY|O_CLOEXEC)) >= 0);
+
+        assert_se(fd_validate(b) == 0);
+        safe_close(b);
+        assert_se(fd_validate(b) == -EBADF);
+        TAKE_FD(b);
+}
+
 TEST(same_fd) {
         _cleanup_close_pair_ int p[2];
         _cleanup_close_ int a, b, c;
@@ -222,9 +235,9 @@ static size_t validate_fds(
                         continue;
 
                 if (opened)
-                        assert_se(fcntl(fds[i], F_GETFD) >= 0);
+                        assert_se(fd_validate(fds[i]) >= 0);
                 else
-                        assert_se(fcntl(fds[i], F_GETFD) < 0 && errno == EBADF);
+                        assert_se(fd_validate(fds[i]) == -EBADF);
 
                 c++;
         }
diff --git a/src/test/test-fdset.c b/src/test/test-fdset.c
index cfbd8e270a..7aeaf5f129 100644
--- a/src/test/test-fdset.c
+++ b/src/test/test-fdset.c
@@ -23,8 +23,7 @@ TEST(fdset_new_fill) {
         assert_se(fdset_new_fill(/* filter_cloexec= */ -1, &fdset) >= 0);
         assert_se(fdset_contains(fdset, fd));
         fdset = fdset_free(fdset);
-        assert_se(fcntl(fd, F_GETFD) < 0);
-        assert_se(errno == EBADF);
+        assert_se(fd_validate(fd) == -EBADF);
 
         fd = open("/dev/null", O_CLOEXEC|O_RDONLY);
         assert_se(fd >= 0);
@@ -32,13 +31,12 @@ TEST(fdset_new_fill) {
         assert_se(fdset_new_fill(/* filter_cloexec= */ 0, &fdset) >= 0);
         assert_se(!fdset_contains(fdset, fd));
         fdset = fdset_free(fdset);
-        assert_se(fcntl(fd, F_GETFD) >= 0);
+        assert_se(fd_validate(fd) >= 0);
 
         assert_se(fdset_new_fill(/* filter_cloexec= */ 1, &fdset) >= 0);
         assert_se(fdset_contains(fdset, fd));
         fdset = fdset_free(fdset);
-        assert_se(fcntl(fd, F_GETFD) < 0);
-        assert_se(errno == EBADF);
+        assert_se(fd_validate(fd) == -EBADF);
 
         fd = open("/dev/null", O_RDONLY);
         assert_se(fd >= 0);
@@ -46,7 +44,7 @@ TEST(fdset_new_fill) {
         assert_se(fdset_new_fill(/* filter_cloexec= */ 1, &fdset) >= 0);
         assert_se(!fdset_contains(fdset, fd));
         fdset = fdset_free(fdset);
-        assert_se(fcntl(fd, F_GETFD) >= 0);
+        assert_se(fd_validate(fd) >= 0);
 
         assert_se(fdset_new_fill(/* filter_cloexec= */ 0, &fdset) >= 0);
         assert_se(fdset_contains(fdset, fd));
@@ -54,8 +52,7 @@ TEST(fdset_new_fill) {
         assert_se(flags >= 0);
         assert_se(FLAGS_SET(flags, FD_CLOEXEC));
         fdset = fdset_free(fdset);
-        assert_se(fcntl(fd, F_GETFD) < 0);
-        assert_se(errno == EBADF);
+        assert_se(fd_validate(fd) == -EBADF);
 
         log_open();
 }
@@ -102,10 +99,8 @@ TEST(fdset_cloexec) {
 }
 
 TEST(fdset_close_others) {
-        int fd = -EBADF;
-        int copyfd = -EBADF;
+        int fd = -EBADF, copyfd = -EBADF;
         _cleanup_fdset_free_ FDSet *fdset = NULL;
-        int flags = -1;
         _cleanup_(unlink_tempfilep) char name[] = "/tmp/test-fdset_close_others.XXXXXX";
 
         fd = mkostemp_safe(name);
@@ -121,15 +116,13 @@ TEST(fdset_close_others) {
         log_close();
         assert_se(fdset_close_others(fdset) >= 0);
 
-        flags = fcntl(fd, F_GETFD);
-        assert_se(flags < 0);
+        assert_se(fd_validate(fd) == -EBADF);
 
         /* Open log again after checking that fd is invalid, since reopening the log might make fd a valid
          * file descriptor again. */
         (void) log_open();
 
-        flags = fcntl(copyfd, F_GETFD);
-        assert_se(flags >= 0);
+        assert_se(fd_validate(copyfd) >= 0);
 }
 
 TEST(fdset_remove) {
@@ -146,7 +139,7 @@ TEST(fdset_remove) {
         assert_se(fdset_remove(fdset, fd) >= 0);
         assert_se(!fdset_contains(fdset, fd));
 
-        assert_se(fcntl(fd, F_GETFD) >= 0);
+        assert_se(fd_validate(fd) >= 0);
 }
 
 TEST(fdset_iterate) {

From e7f905347526dd17543dd54561f56a047e6ff9f4 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 8 Oct 2024 10:01:22 +0200
Subject: [PATCH 2/2] fd-util: use F_DUPFD_QUERY for same_fd()

Catch up with the nice little toys the kernel fs developers have added
for us. Preferably, let's make use of the new F_DUPFD_QUERY fcntl() call
that checks whether two fds are just duplicates of each other
(duplicates as in dup(), not as in open() of the same inode, i.e.
whether they share a single file offset and so on).

This API is much nicer, since it is a core kernel feature, unlike the
kcmp() call we so far used, which is part of the (optional)
checkpoint/restore stuff.

F_DUPFD_QUERY is available since kernel 6.10.
---
 src/basic/fd-util.c       | 52 +++++++++++++++++++++++++++++++--------
 src/basic/missing_fcntl.h |  4 +++
 src/test/test-fd-util.c   | 18 +++++++++++++-
 3 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
index 3f8c5b92d3..c112f8dbad 100644
--- a/src/basic/fd-util.c
+++ b/src/basic/fd-util.c
@@ -530,25 +530,57 @@ int same_fd(int a, int b) {
         assert(b >= 0);
 
         /* Compares two file descriptors. Note that semantics are quite different depending on whether we
-         * have kcmp() or we don't. If we have kcmp() this will only return true for dup()ed file
-         * descriptors, but not otherwise. If we don't have kcmp() this will also return true for two fds of
-         * the same file, created by separate open() calls. Since we use this call mostly for filtering out
-         * duplicates in the fd store this difference hopefully doesn't matter too much. */
+         * have F_DUPFD_QUERY/kcmp() or we don't. If we have F_DUPFD_QUERY/kcmp() this will only return true
+         * for dup()ed file descriptors, but not otherwise. If we don't have F_DUPFD_QUERY/kcmp() this will
+         * also return true for two fds of the same file, created by separate open() calls. Since we use this
+         * call mostly for filtering out duplicates in the fd store this difference hopefully doesn't matter
+         * too much.
+         *
+         * Guarantees that if either of the passed fds is not allocated we'll return -EBADF. */
+
+        if (a == b) {
+                /* Let's validate that the fd is valid */
+                r = fd_validate(a);
+                if (r < 0)
+                        return r;
 
-        if (a == b)
                 return true;
+        }
+
+        /* Try to use F_DUPFD_QUERY if we have it first, as it is the nicest API */
+        r = fcntl(a, F_DUPFD_QUERY, b);
+        if (r > 0)
+                return true;
+        if (r == 0) {
+                /* The kernel will return 0 in case the first fd is allocated, but the 2nd is not. (Which is different in the kcmp() case) Explicitly validate it hence. */
+                r = fd_validate(b);
+                if (r < 0)
+                        return r;
+
+                return false;
+        }
+        /* On old kernels (< 6.10) that do not support F_DUPFD_QUERY this will return EINVAL for regular fds, and EBADF on O_PATH fds. Confusing. */
+        if (errno == EBADF) {
+                /* EBADF could mean two things: the first fd is not valid, or it is valid and is O_PATH and
+                 * F_DUPFD_QUERY is not supported. Let's validate the fd explicitly, to distinguish this
+                 * case. */
+                r = fd_validate(a);
+                if (r < 0)
+                        return r;
+
+                /* If the fd is valid, but we got EBADF, then let's try kcmp(). */
+        } else if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno) && errno != EINVAL)
+                return -errno;
 
         /* Try to use kcmp() if we have it. */
         pid = getpid_cached();
         r = kcmp(pid, pid, KCMP_FILE, a, b);
-        if (r == 0)
-                return true;
-        if (r > 0)
-                return false;
+        if (r >= 0)
+                return !r;
         if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
                 return -errno;
 
-        /* We don't have kcmp(), use fstat() instead. */
+        /* We have neither F_DUPFD_QUERY nor kcmp(), use fstat() instead. */
         if (fstat(a, &sta) < 0)
                 return -errno;
 
diff --git a/src/basic/missing_fcntl.h b/src/basic/missing_fcntl.h
index 1248eb7e4d..a6188879c1 100644
--- a/src/basic/missing_fcntl.h
+++ b/src/basic/missing_fcntl.h
@@ -7,6 +7,10 @@
 #define F_LINUX_SPECIFIC_BASE 1024
 #endif
 
+#ifndef F_DUPFD_QUERY
+#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
+#endif
+
 #ifndef F_SETPIPE_SZ
 #define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
 #endif
diff --git a/src/test/test-fd-util.c b/src/test/test-fd-util.c
index e49a5dde45..20cf7b7627 100644
--- a/src/test/test-fd-util.c
+++ b/src/test/test-fd-util.c
@@ -72,12 +72,14 @@ TEST(fd_validate) {
 
 TEST(same_fd) {
         _cleanup_close_pair_ int p[2];
-        _cleanup_close_ int a, b, c;
+        _cleanup_close_ int a, b, c, d, e;
 
         assert_se(pipe2(p, O_CLOEXEC) >= 0);
         assert_se((a = fcntl(p[0], F_DUPFD, 3)) >= 0);
         assert_se((b = open("/dev/null", O_RDONLY|O_CLOEXEC)) >= 0);
         assert_se((c = fcntl(a, F_DUPFD, 3)) >= 0);
+        assert_se((d = open("/dev/null", O_RDONLY|O_CLOEXEC|O_PATH)) >= 0); /* O_PATH changes error returns in F_DUPFD_QUERY, let's test explicitly */
+        assert_se((e = fcntl(d, F_DUPFD, 3)) >= 0);
 
         assert_se(same_fd(p[0], p[0]) > 0);
         assert_se(same_fd(p[1], p[1]) > 0);
@@ -102,6 +104,20 @@ TEST(same_fd) {
 
         assert_se(same_fd(a, b) == 0);
         assert_se(same_fd(b, a) == 0);
+
+        assert_se(same_fd(a, d) == 0);
+        assert_se(same_fd(d, a) == 0);
+        assert_se(same_fd(d, d) > 0);
+        assert_se(same_fd(d, e) > 0);
+        assert_se(same_fd(e, d) > 0);
+
+        /* Let's now compare with a valid fd nr, that is definitely closed, and verify it returns the right error code */
+        safe_close(d);
+        assert_se(same_fd(d, d) == -EBADF);
+        assert_se(same_fd(e, d) == -EBADF);
+        assert_se(same_fd(d, e) == -EBADF);
+        assert_se(same_fd(e, e) > 0);
+        TAKE_FD(d);
 }
 
 TEST(open_serialization_fd) {