From 99e6f682d3627c2c06d99bb3c3756e448e8d9bd1 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 3 Jan 2024 04:27:59 +0900 Subject: [PATCH 1/6] sd-journal: cache last entry offset and journal file state When the offset of the last entry object (or last object for journal files generated by an old journald) is not changed, the timestamps should be updated by journal_file_read_tail_timestamp() are unchanged. So, we can drop to call fstat() in the function. As, the journal header is always mapped, so we can read the offset and journal file state without calling fstat. Still, when the last entry offset is changed, we may need to call fstat() to read the entry object. But, hopefully the number of fstat() call can be reduced. --- src/libsystemd/sd-journal/journal-file.c | 1 - src/libsystemd/sd-journal/journal-file.h | 3 ++- src/libsystemd/sd-journal/sd-journal.c | 18 +++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 64bf8ef9af..8039b2aaf9 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2244,7 +2244,6 @@ static int journal_file_link_entry( f->header->tail_entry_monotonic = o->entry.monotonic; if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) f->header->tail_entry_offset = htole64(offset); - f->newest_mtime = 0; /* we have a new tail entry now, explicitly invalidate newest boot id/timestamp info */ /* Link up the items */ for (uint64_t i = 0; i < n_items; i++) { diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h index 81fafb9bec..6b378a2039 100644 --- a/src/libsystemd/sd-journal/journal-file.h +++ b/src/libsystemd/sd-journal/journal-file.h @@ -129,7 +129,8 @@ typedef struct JournalFile { uint64_t newest_monotonic_usec; uint64_t newest_realtime_usec; unsigned newest_boot_id_prioq_idx; - usec_t newest_mtime; + uint64_t newest_entry_offset; + uint8_t newest_state; } JournalFile; typedef enum JournalFileFlags { diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c index 5a8c8a8579..d7136d250e 100644 --- a/src/libsystemd/sd-journal/sd-journal.c +++ b/src/libsystemd/sd-journal/sd-journal.c @@ -2428,11 +2428,10 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { /* Tries to read the timestamp of the most recently written entry. */ - r = journal_file_fstat(f); - if (r < 0) - return r; - if (f->newest_mtime == timespec_load(&f->last_stat.st_mtim)) - return 0; /* mtime didn't change since last time, don't bother */ + if (f->header->state == f->newest_state && + f->header->state == STATE_ARCHIVED && + f->newest_entry_offset != 0) + return 0; /* We have already read archived file. */ if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) { offset = le64toh(READ_NOW(f->header->tail_entry_offset)); @@ -2443,6 +2442,8 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { } if (offset == 0) return -ENODATA; /* not a single object/entry, hence no tail timestamp */ + if (offset == f->newest_entry_offset) + return 0; /* No new entry is added after we read last time. */ /* Move to the last object in the journal file, in the hope it is an entry (which it usually will * be). If we lack the "tail_entry_offset" field in the header, we specify the type as OBJECT_UNUSED @@ -2452,6 +2453,7 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { if (r < 0) { log_debug_errno(r, "Failed to move to last object in journal file, ignoring: %m"); o = NULL; + offset = 0; } if (o && o->object.type == OBJECT_ENTRY) { /* Yay, last object is an entry, let's use the data. */ @@ -2469,10 +2471,11 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { mo = le64toh(f->header->tail_entry_monotonic); rt = le64toh(f->header->tail_entry_realtime); id = f->header->tail_entry_boot_id; + offset = UINT64_MAX; } else { /* Otherwise let's find the last entry manually (this possibly means traversing the * chain of entry arrays, till the end */ - r = journal_file_next_entry(f, 0, DIRECTION_UP, &o, NULL); + r = journal_file_next_entry(f, 0, DIRECTION_UP, &o, offset == 0 ? &offset : NULL); if (r < 0) return r; if (r == 0) @@ -2494,7 +2497,8 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { f->newest_monotonic_usec = mo; f->newest_realtime_usec = rt; f->newest_machine_id = f->header->machine_id; - f->newest_mtime = timespec_load(&f->last_stat.st_mtim); + f->newest_entry_offset = offset; + f->newest_state = f->header->state; r = journal_file_reshuffle_newest_by_boot_id(j, f); if (r < 0) From 51b2bcf8356b847e7ab778adfad930691713c7ef Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 3 Jan 2024 04:28:03 +0900 Subject: [PATCH 2/6] sd-journal: make journal_file_read_tail_timestamp() notify to the caller that some new journal entries added Tiny optimization for journal_file_find_newest_for_boot_id(). --- src/libsystemd/sd-journal/sd-journal.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c index d7136d250e..f082f3f6f9 100644 --- a/src/libsystemd/sd-journal/sd-journal.c +++ b/src/libsystemd/sd-journal/sd-journal.c @@ -449,6 +449,11 @@ static int journal_file_find_newest_for_boot_id( r = journal_file_read_tail_timestamp(j, f); if (r < 0) return log_debug_errno(r, "Failed to read tail timestamp while trying to find newest journal file for boot ID %s.", SD_ID128_TO_STRING(id)); + if (r == 0) { + /* No new entry found. */ + *ret = f; + return 0; + } /* Refreshing the timestamp we read might have reshuffled the prioq, hence let's check the * prioq again and only use the information once we reached an equilibrium or hit a limit */ @@ -2490,6 +2495,17 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { if (mo > rt) /* monotonic clock is further ahead than realtime? that's weird, refuse to use the data */ return -ENODATA; + if (offset == f->newest_entry_offset) { + /* Cached data and the current one should be equivalent. */ + if (!sd_id128_equal(f->newest_machine_id, f->header->machine_id) || + !sd_id128_equal(f->newest_boot_id, id) || + f->newest_monotonic_usec != mo || + f->newest_realtime_usec != rt) + return -EBADMSG; + + return 0; /* No new entry is added after we read last time. */ + } + if (!sd_id128_equal(f->newest_boot_id, id)) journal_file_unlink_newest_by_boot_id(j, f); @@ -2504,7 +2520,7 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { if (r < 0) return r; - return 0; + return 1; /* Updated. */ } _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) { From bd1af1d5fb1db9a77e8738ec108bf60ff0cb15f1 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 3 Jan 2024 04:28:06 +0900 Subject: [PATCH 3/6] sd-journal: introduce SD_JOURNAL_ASSUME_IMMUTABLE flag The flag can be used when it is not necessary to follow journal file update. Co-authored-by: Costa Tsaousis --- src/libsystemd/sd-journal/sd-journal.c | 33 +++++++++++++++++++++----- src/systemd/sd-journal.h | 1 + 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c index f082f3f6f9..d0193f124b 100644 --- a/src/libsystemd/sd-journal/sd-journal.c +++ b/src/libsystemd/sd-journal/sd-journal.c @@ -2092,7 +2092,8 @@ static sd_journal *journal_new(int flags, const char *path, const char *namespac SD_JOURNAL_SYSTEM | \ SD_JOURNAL_CURRENT_USER | \ SD_JOURNAL_ALL_NAMESPACES | \ - SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE) + SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE | \ + SD_JOURNAL_ASSUME_IMMUTABLE) _public_ int sd_journal_open_namespace(sd_journal **ret, const char *namespace, int flags) { _cleanup_(sd_journal_closep) sd_journal *j = NULL; @@ -2118,7 +2119,9 @@ _public_ int sd_journal_open(sd_journal **ret, int flags) { } #define OPEN_CONTAINER_ALLOWED_FLAGS \ - (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM) + (SD_JOURNAL_LOCAL_ONLY | \ + SD_JOURNAL_SYSTEM | \ + SD_JOURNAL_ASSUME_IMMUTABLE) _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) { _cleanup_free_ char *root = NULL, *class = NULL; @@ -2162,7 +2165,9 @@ _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, in #define OPEN_DIRECTORY_ALLOWED_FLAGS \ (SD_JOURNAL_OS_ROOT | \ - SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER ) + SD_JOURNAL_SYSTEM | \ + SD_JOURNAL_CURRENT_USER | \ + SD_JOURNAL_ASSUME_IMMUTABLE) _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) { _cleanup_(sd_journal_closep) sd_journal *j = NULL; @@ -2187,12 +2192,15 @@ _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int f return 0; } +#define OPEN_FILES_ALLOWED_FLAGS \ + (SD_JOURNAL_ASSUME_IMMUTABLE) + _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) { _cleanup_(sd_journal_closep) sd_journal *j = NULL; int r; assert_return(ret, -EINVAL); - assert_return(flags == 0, -EINVAL); + assert_return((flags & ~OPEN_FILES_ALLOWED_FLAGS) == 0, -EINVAL); j = journal_new(flags, NULL, NULL); if (!j) @@ -2214,7 +2222,8 @@ _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int fla (SD_JOURNAL_OS_ROOT | \ SD_JOURNAL_SYSTEM | \ SD_JOURNAL_CURRENT_USER | \ - SD_JOURNAL_TAKE_DIRECTORY_FD) + SD_JOURNAL_TAKE_DIRECTORY_FD | \ + SD_JOURNAL_ASSUME_IMMUTABLE) _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) { _cleanup_(sd_journal_closep) sd_journal *j = NULL; @@ -2252,6 +2261,9 @@ _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) { return 0; } +#define OPEN_FILES_FD_ALLOWED_FLAGS \ + (SD_JOURNAL_ASSUME_IMMUTABLE) + _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) { JournalFile *f; _cleanup_(sd_journal_closep) sd_journal *j = NULL; @@ -2259,7 +2271,7 @@ _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fd assert_return(ret, -EINVAL); assert_return(n_fds > 0, -EBADF); - assert_return(flags == 0, -EINVAL); + assert_return((flags & ~OPEN_FILES_FD_ALLOWED_FLAGS) == 0, -EINVAL); j = journal_new(flags, NULL, NULL); if (!j) @@ -2433,6 +2445,9 @@ static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { /* Tries to read the timestamp of the most recently written entry. */ + if (FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE) && f->newest_entry_offset != 0) + return 0; /* We have already read the file, and we assume that the file is immutable. */ + if (f->header->state == f->newest_state && f->header->state == STATE_ARCHIVED && f->newest_entry_offset != 0) @@ -2791,6 +2806,7 @@ _public_ int sd_journal_get_fd(sd_journal *j) { assert_return(j, -EINVAL); assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH); if (j->no_inotify) return -EMEDIUMTYPE; @@ -2817,6 +2833,7 @@ _public_ int sd_journal_get_events(sd_journal *j) { assert_return(j, -EINVAL); assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH); fd = sd_journal_get_fd(j); if (fd < 0) @@ -2830,6 +2847,7 @@ _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) { assert_return(j, -EINVAL); assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH); assert_return(timeout_usec, -EINVAL); fd = sd_journal_get_fd(j); @@ -2957,6 +2975,8 @@ _public_ int sd_journal_process(sd_journal *j) { if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */ return 0; + assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH); + j->last_process_usec = now(CLOCK_MONOTONIC); j->last_invalidate_counter = j->current_invalidate_counter; @@ -2985,6 +3005,7 @@ _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) { assert_return(j, -EINVAL); assert_return(!journal_origin_changed(j), -ECHILD); + assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH); if (j->inotify_fd < 0) { JournalFile *f; diff --git a/src/systemd/sd-journal.h b/src/systemd/sd-journal.h index 7d2d75dd89..e4a67f048b 100644 --- a/src/systemd/sd-journal.h +++ b/src/systemd/sd-journal.h @@ -72,6 +72,7 @@ enum { SD_JOURNAL_ALL_NAMESPACES = 1 << 5, /* Show all namespaces, not just the default or specified one */ SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE = 1 << 6, /* Show default namespace in addition to specified one */ SD_JOURNAL_TAKE_DIRECTORY_FD = 1 << 7, /* sd_journal_open_directory_fd() will take ownership of the provided file descriptor. */ + SD_JOURNAL_ASSUME_IMMUTABLE = 1 << 8, /* Assume the opened journal files are immutable. Journal entries added later may be ignored. */ SD_JOURNAL_SYSTEM_ONLY _sd_deprecated_ = SD_JOURNAL_SYSTEM /* old name */ }; From f7f062bf9f194dc14a4ffc6e7fff9d8bfdea2147 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 3 Jan 2024 04:28:08 +0900 Subject: [PATCH 4/6] tree-wide: set SD_JOURNAL_ASSUME_IMMUTABLE where appropriate Co-authored-by: Costa Tsaousis --- src/coredump/coredumpctl.c | 8 ++--- src/journal-remote/fuzz-journal-remote.c | 2 +- src/journal-remote/journal-upload.c | 2 +- src/journal/bsod.c | 2 +- src/journal/journalctl.c | 16 ++++++---- src/libsystemd/sd-journal/test-journal-enum.c | 2 +- .../sd-journal/test-journal-flush.c | 6 ++-- src/libsystemd/sd-journal/test-journal-init.c | 4 +-- .../sd-journal/test-journal-interleaving.c | 32 +++++++++---------- .../sd-journal/test-journal-match.c | 2 +- .../sd-journal/test-journal-stream.c | 2 +- src/network/networkctl.c | 2 +- src/shared/journal-util.c | 4 +-- src/shared/journal-util.h | 2 +- src/shared/logs-show.c | 5 ++- 15 files changed, 49 insertions(+), 42 deletions(-) diff --git a/src/coredump/coredumpctl.c b/src/coredump/coredumpctl.c index 90b2fe4a7a..53769f8d4c 100644 --- a/src/coredump/coredumpctl.c +++ b/src/coredump/coredumpctl.c @@ -128,19 +128,19 @@ static int acquire_journal(sd_journal **ret, char **matches) { assert(ret); if (arg_directory) { - r = sd_journal_open_directory(&j, arg_directory, 0); + r = sd_journal_open_directory(&j, arg_directory, SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) return log_error_errno(r, "Failed to open journals in directory: %s: %m", arg_directory); } else if (arg_root) { - r = sd_journal_open_directory(&j, arg_root, SD_JOURNAL_OS_ROOT); + r = sd_journal_open_directory(&j, arg_root, SD_JOURNAL_OS_ROOT | SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) return log_error_errno(r, "Failed to open journals in root directory: %s: %m", arg_root); } else if (arg_file) { - r = sd_journal_open_files(&j, (const char**)arg_file, 0); + r = sd_journal_open_files(&j, (const char**)arg_file, SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) return log_error_errno(r, "Failed to open journal files: %m"); } else { - r = sd_journal_open(&j, arg_all ? 0 : SD_JOURNAL_LOCAL_ONLY); + r = sd_journal_open(&j, arg_all ? 0 : SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) return log_error_errno(r, "Failed to open journal: %m"); } diff --git a/src/journal-remote/fuzz-journal-remote.c b/src/journal-remote/fuzz-journal-remote.c index 557100bb96..774389dee3 100644 --- a/src/journal-remote/fuzz-journal-remote.c +++ b/src/journal-remote/fuzz-journal-remote.c @@ -67,7 +67,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { /* Out */ - r = sd_journal_open_files(&j, (const char**) STRV_MAKE(name), 0); + r = sd_journal_open_files(&j, (const char**) STRV_MAKE(name), SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) { log_error_errno(r, "sd_journal_open_files([\"%s\"]) failed: %m", name); assert_se(IN_SET(r, -ENOMEM, -EMFILE, -ENFILE, -ENODATA)); diff --git a/src/journal-remote/journal-upload.c b/src/journal-remote/journal-upload.c index db74355842..97b5f929ab 100644 --- a/src/journal-remote/journal-upload.c +++ b/src/journal-remote/journal-upload.c @@ -777,7 +777,7 @@ static int open_journal(sd_journal **j) { else if (arg_file) r = sd_journal_open_files(j, (const char**) arg_file, 0); else if (arg_machine) - r = journal_open_machine(j, arg_machine); + r = journal_open_machine(j, arg_machine, 0); else r = sd_journal_open_namespace(j, arg_namespace, (arg_merge ? 0 : SD_JOURNAL_LOCAL_ONLY) | arg_namespace_flags | arg_journal_type); diff --git a/src/journal/bsod.c b/src/journal/bsod.c index 059e255ea4..32525437df 100644 --- a/src/journal/bsod.c +++ b/src/journal/bsod.c @@ -60,7 +60,7 @@ static int acquire_first_emergency_log_message(char **ret) { assert(ret); - r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); + r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) return log_error_errno(r, "Failed to open journal: %m"); diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index d3ec22e224..ce07f1d59e 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -130,6 +130,7 @@ static const char *arg_field = NULL; static bool arg_catalog = false; static bool arg_reverse = false; static int arg_journal_type = 0; +static int arg_journal_additional_open_flags = 0; static int arg_namespace_flags = 0; static char *arg_root = NULL; static char *arg_image = NULL; @@ -1153,6 +1154,9 @@ static int parse_argv(int argc, char *argv[]) { arg_reverse = true; } + if (!arg_follow) + arg_journal_additional_open_flags = SD_JOURNAL_ASSUME_IMMUTABLE; + return 1; } @@ -2413,21 +2417,21 @@ static int run(int argc, char *argv[]) { } if (arg_directory) - r = sd_journal_open_directory(&j, arg_directory, arg_journal_type); + r = sd_journal_open_directory(&j, arg_directory, arg_journal_type | arg_journal_additional_open_flags); else if (arg_root) - r = sd_journal_open_directory(&j, arg_root, arg_journal_type | SD_JOURNAL_OS_ROOT); + r = sd_journal_open_directory(&j, arg_root, arg_journal_type | arg_journal_additional_open_flags | SD_JOURNAL_OS_ROOT); else if (arg_file_stdin) - r = sd_journal_open_files_fd(&j, (int[]) { STDIN_FILENO }, 1, 0); + r = sd_journal_open_files_fd(&j, (int[]) { STDIN_FILENO }, 1, arg_journal_additional_open_flags); else if (arg_file) - r = sd_journal_open_files(&j, (const char**) arg_file, 0); + r = sd_journal_open_files(&j, (const char**) arg_file, arg_journal_additional_open_flags); else if (arg_machine) - r = journal_open_machine(&j, arg_machine); + r = journal_open_machine(&j, arg_machine, arg_journal_additional_open_flags); else r = sd_journal_open_namespace( &j, arg_namespace, (arg_merge ? 0 : SD_JOURNAL_LOCAL_ONLY) | - arg_namespace_flags | arg_journal_type); + arg_namespace_flags | arg_journal_type | arg_journal_additional_open_flags); if (r < 0) return log_error_errno(r, "Failed to open %s: %m", arg_directory ?: arg_file ? "files" : "journal"); diff --git a/src/libsystemd/sd-journal/test-journal-enum.c b/src/libsystemd/sd-journal/test-journal-enum.c index 03fe8e2b30..93d4b8f3c7 100644 --- a/src/libsystemd/sd-journal/test-journal-enum.c +++ b/src/libsystemd/sd-journal/test-journal-enum.c @@ -15,7 +15,7 @@ int main(int argc, char *argv[]) { test_setup_logging(LOG_DEBUG); - assert_se(sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY) >= 0); + assert_se(sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_ASSUME_IMMUTABLE) >= 0); assert_se(sd_journal_add_match(j, "_TRANSPORT=syslog", 0) >= 0); assert_se(sd_journal_add_match(j, "_UID=0", 0) >= 0); diff --git a/src/libsystemd/sd-journal/test-journal-flush.c b/src/libsystemd/sd-journal/test-journal-flush.c index 3f0783571c..95c2757f17 100644 --- a/src/libsystemd/sd-journal/test-journal-flush.c +++ b/src/libsystemd/sd-journal/test-journal-flush.c @@ -36,9 +36,9 @@ static void test_journal_flush_one(int argc, char *argv[]) { assert_se(r >= 0); if (argc > 1) - r = sd_journal_open_files(&j, (const char **) strv_skip(argv, 1), 0); + r = sd_journal_open_files(&j, (const char **) strv_skip(argv, 1), SD_JOURNAL_ASSUME_IMMUTABLE); else - r = sd_journal_open(&j, 0); + r = sd_journal_open(&j, SD_JOURNAL_ASSUME_IMMUTABLE); assert_se(r == 0); sd_journal_set_data_threshold(j, 0); @@ -75,7 +75,7 @@ static void test_journal_flush_one(int argc, char *argv[]) { /* Open the new journal before archiving and offlining the file. */ sd_journal_close(j); - assert_se(sd_journal_open_directory(&j, dn, 0) >= 0); + assert_se(sd_journal_open_directory(&j, dn, SD_JOURNAL_ASSUME_IMMUTABLE) >= 0); /* Read the online journal. */ assert_se(sd_journal_seek_tail(j) >= 0); diff --git a/src/libsystemd/sd-journal/test-journal-init.c b/src/libsystemd/sd-journal/test-journal-init.c index 5fe4b7a2d6..ef66efdabf 100644 --- a/src/libsystemd/sd-journal/test-journal-init.c +++ b/src/libsystemd/sd-journal/test-journal-init.c @@ -31,12 +31,12 @@ int main(int argc, char *argv[]) { (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL); for (i = 0; i < I; i++) { - r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); + r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_ASSUME_IMMUTABLE); assert_se(r == 0); sd_journal_close(j); - r = sd_journal_open_directory(&j, t, 0); + r = sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE); assert_se(r == 0); assert_se(sd_journal_seek_head(j) == 0); diff --git a/src/libsystemd/sd-journal/test-journal-interleaving.c b/src/libsystemd/sd-journal/test-journal-interleaving.c index 93c2c4aaca..688fea392b 100644 --- a/src/libsystemd/sd-journal/test-journal-interleaving.c +++ b/src/libsystemd/sd-journal/test-journal-interleaving.c @@ -260,14 +260,14 @@ static void test_skip_one(void (*setup)(void)) { setup(); /* Seek to head, iterate down. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_head(j)); assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ test_check_numbers_down(j, 9); sd_journal_close(j); /* Seek to head, iterate down. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_head(j)); assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ assert_se(sd_journal_previous(j) == 0); /* no-op */ @@ -275,7 +275,7 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to head twice, iterate down. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_head(j)); assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ assert_ret(sd_journal_seek_head(j)); @@ -284,7 +284,7 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to head, move to previous, then iterate down. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_head(j)); assert_se(sd_journal_previous(j) == 0); /* no-op */ assert_se(sd_journal_next(j) == 1); /* pointing to the first entry */ @@ -292,7 +292,7 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to head, walk several steps, then iterate down. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_head(j)); assert_se(sd_journal_previous(j) == 0); /* no-op */ assert_se(sd_journal_previous(j) == 0); /* no-op */ @@ -304,14 +304,14 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to tail, iterate up. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_tail(j)); assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */ test_check_numbers_up(j, 9); sd_journal_close(j); /* Seek to tail twice, iterate up. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_tail(j)); assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */ assert_ret(sd_journal_seek_tail(j)); @@ -320,7 +320,7 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to tail, move to next, then iterate up. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_tail(j)); assert_se(sd_journal_next(j) == 0); /* no-op */ assert_se(sd_journal_previous(j) == 1); /* pointing to the last entry */ @@ -328,7 +328,7 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to tail, walk several steps, then iterate up. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_tail(j)); assert_se(sd_journal_next(j) == 0); /* no-op */ assert_se(sd_journal_next(j) == 0); /* no-op */ @@ -340,14 +340,14 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to tail, skip to head, iterate down. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_tail(j)); assert_se(sd_journal_previous_skip(j, 9) == 9); /* pointing to the first entry. */ test_check_numbers_down(j, 9); sd_journal_close(j); /* Seek to tail, skip to head in a more complex way, then iterate down. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_tail(j)); assert_se(sd_journal_next(j) == 0); assert_se(sd_journal_previous_skip(j, 4) == 4); @@ -366,14 +366,14 @@ static void test_skip_one(void (*setup)(void)) { sd_journal_close(j); /* Seek to head, skip to tail, iterate up. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_head(j)); assert_se(sd_journal_next_skip(j, 9) == 9); test_check_numbers_up(j, 9); sd_journal_close(j); /* Seek to head, skip to tail in a more complex way, then iterate up. */ - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_ret(sd_journal_seek_head(j)); assert_se(sd_journal_previous(j) == 0); assert_se(sd_journal_next_skip(j, 4) == 4); @@ -409,14 +409,14 @@ static void test_boot_id_one(void (*setup)(void), size_t n_boots_expected) { setup(); - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_se(journal_get_boots(j, &boots, &n_boots) >= 0); assert_se(boots); assert_se(n_boots == n_boots_expected); sd_journal_close(j); FOREACH_ARRAY(b, boots, n_boots) { - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_se(journal_find_boot_by_id(j, b->id) == 1); sd_journal_close(j); } @@ -424,7 +424,7 @@ static void test_boot_id_one(void (*setup)(void), size_t n_boots_expected) { for (int i = - (int) n_boots + 1; i <= (int) n_boots; i++) { sd_id128_t id; - assert_ret(sd_journal_open_directory(&j, t, 0)); + assert_ret(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE)); assert_se(journal_find_boot_by_offset(j, i, &id) == 1); if (i <= 0) assert_se(sd_id128_equal(id, boots[n_boots + i - 1].id)); diff --git a/src/libsystemd/sd-journal/test-journal-match.c b/src/libsystemd/sd-journal/test-journal-match.c index 571a88c1ac..e56b27dde3 100644 --- a/src/libsystemd/sd-journal/test-journal-match.c +++ b/src/libsystemd/sd-journal/test-journal-match.c @@ -16,7 +16,7 @@ int main(int argc, char *argv[]) { test_setup_logging(LOG_DEBUG); - assert_se(sd_journal_open(&j, 0) >= 0); + assert_se(sd_journal_open(&j, SD_JOURNAL_ASSUME_IMMUTABLE) >= 0); assert_se(sd_journal_add_match(j, "foobar", 0) < 0); assert_se(sd_journal_add_match(j, "foobar=waldo", 0) < 0); diff --git a/src/libsystemd/sd-journal/test-journal-stream.c b/src/libsystemd/sd-journal/test-journal-stream.c index 3a370ef311..800001cb93 100644 --- a/src/libsystemd/sd-journal/test-journal-stream.c +++ b/src/libsystemd/sd-journal/test-journal-stream.c @@ -119,7 +119,7 @@ static void run_test(void) { (void) journal_file_offline_close(two); (void) journal_file_offline_close(three); - assert_se(sd_journal_open_directory(&j, t, 0) >= 0); + assert_se(sd_journal_open_directory(&j, t, SD_JOURNAL_ASSUME_IMMUTABLE) >= 0); assert_se(sd_journal_add_match(j, "MAGIC=quux", 0) >= 0); SD_JOURNAL_FOREACH_BACKWARDS(j) { diff --git a/src/network/networkctl.c b/src/network/networkctl.c index 6f5100c605..61fa0ed556 100644 --- a/src/network/networkctl.c +++ b/src/network/networkctl.c @@ -1597,7 +1597,7 @@ static int show_logs(const LinkInfo *info) { if (arg_lines == 0) return 0; - r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); + r = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) return log_error_errno(r, "Failed to open journal: %m"); diff --git a/src/shared/journal-util.c b/src/shared/journal-util.c index d73d7c47d0..ab70f4da0d 100644 --- a/src/shared/journal-util.c +++ b/src/shared/journal-util.c @@ -145,7 +145,7 @@ int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_use return r; } -int journal_open_machine(sd_journal **ret, const char *machine) { +int journal_open_machine(sd_journal **ret, const char *machine, int flags) { _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL; _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; @@ -178,7 +178,7 @@ int journal_open_machine(sd_journal **ret, const char *machine) { if (machine_fd < 0) return log_error_errno(errno, "Failed to duplicate file descriptor: %m"); - r = sd_journal_open_directory_fd(&j, machine_fd, SD_JOURNAL_OS_ROOT | SD_JOURNAL_TAKE_DIRECTORY_FD); + r = sd_journal_open_directory_fd(&j, machine_fd, SD_JOURNAL_OS_ROOT | SD_JOURNAL_TAKE_DIRECTORY_FD | flags); if (r < 0) return log_error_errno(r, "Failed to open journal in machine '%s': %m", machine); diff --git a/src/shared/journal-util.h b/src/shared/journal-util.h index afad249c90..5bd8e340b2 100644 --- a/src/shared/journal-util.h +++ b/src/shared/journal-util.h @@ -8,4 +8,4 @@ int journal_access_blocked(sd_journal *j); int journal_access_check_and_warn(sd_journal *j, bool quiet, bool want_other_users); -int journal_open_machine(sd_journal **ret, const char *machine); +int journal_open_machine(sd_journal **ret, const char *machine, int flags); diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c index 1f0279cde3..18780ad30a 100644 --- a/src/shared/logs-show.c +++ b/src/shared/logs-show.c @@ -1801,7 +1801,10 @@ int show_journal_by_unit( if (how_many <= 0) return 0; - r = sd_journal_open_namespace(&j, log_namespace, journal_open_flags | SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE); + r = sd_journal_open_namespace(&j, log_namespace, + journal_open_flags | + SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE | + SD_JOURNAL_ASSUME_IMMUTABLE); if (r < 0) return log_error_errno(r, "Failed to open journal: %m"); From cb2b0326ee375eb8de3aee4cc800596c5be84d57 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 3 Jan 2024 04:28:11 +0900 Subject: [PATCH 5/6] sd-journal: drop to use Hashmap to manage journal files per boot ID As reported at https://github.com/systemd/systemd/pull/30209#issuecomment-1831344431, using hashmap in frequently called function reduces performance. Let's replace it with a single array and bsearch. Replaces #29366. Co-authored-by: Costa Tsaousis --- src/libsystemd/sd-journal/journal-internal.h | 12 +- src/libsystemd/sd-journal/sd-journal.c | 189 ++++++++++--------- 2 files changed, 111 insertions(+), 90 deletions(-) diff --git a/src/libsystemd/sd-journal/journal-internal.h b/src/libsystemd/sd-journal/journal-internal.h index e57c5208c1..0293389a78 100644 --- a/src/libsystemd/sd-journal/journal-internal.h +++ b/src/libsystemd/sd-journal/journal-internal.h @@ -12,7 +12,7 @@ #include "journal-def.h" #include "journal-file.h" #include "list.h" -#include "set.h" +#include "prioq.h" #define JOURNAL_FILES_MAX 7168u @@ -69,6 +69,11 @@ struct Directory { unsigned last_seen_generation; }; +typedef struct NewestByBootId { + sd_id128_t boot_id; + Prioq *prioq; /* JournalFile objects ordered by monotonic timestamp of last update. */ +} NewestByBootId; + struct sd_journal { int toplevel_fd; @@ -79,7 +84,10 @@ struct sd_journal { OrderedHashmap *files; IteratedCache *files_cache; MMapCache *mmap; - Hashmap *newest_by_boot_id; /* key: boot_id, value: prioq, ordered by monotonic timestamp of last update */ + + /* a bisectable array of NewestByBootId, ordered by boot id. */ + NewestByBootId *newest_by_boot_id; + size_t n_newest_by_boot_id; Location current_location; diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c index d0193f124b..1c2e273841 100644 --- a/src/libsystemd/sd-journal/sd-journal.c +++ b/src/libsystemd/sd-journal/sd-journal.c @@ -38,6 +38,7 @@ #include "prioq.h" #include "process-util.h" #include "replace-var.h" +#include "sort-util.h" #include "stat-util.h" #include "stdio-util.h" #include "string-util.h" @@ -413,6 +414,99 @@ _public_ void sd_journal_flush_matches(sd_journal *j) { detach_location(j); } +static int newest_by_boot_id_compare(const NewestByBootId *a, const NewestByBootId *b) { + return id128_compare_func(&a->boot_id, &b->boot_id); +} + +static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f) { + NewestByBootId *found; + + assert(j); + assert(f); + + if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) /* not linked currently, hence this is a NOP */ + return; + + found = typesafe_bsearch(&(NewestByBootId) { .boot_id = f->newest_boot_id }, + j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare); + assert(found); + + assert_se(prioq_remove(found->prioq, f, &f->newest_boot_id_prioq_idx) > 0); + f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL; + + /* The prioq may be empty, but that should not cause any issue. Let's keep it. */ +} + +static void journal_clear_newest_by_boot_id(sd_journal *j) { + FOREACH_ARRAY(i, j->newest_by_boot_id, j->n_newest_by_boot_id) { + JournalFile *f; + + while ((f = prioq_peek(i->prioq))) + journal_file_unlink_newest_by_boot_id(j, f); + + prioq_free(i->prioq); + } + + j->newest_by_boot_id = mfree(j->newest_by_boot_id); + j->n_newest_by_boot_id = 0; +} + +static int journal_file_newest_monotonic_compare(const void *a, const void *b) { + const JournalFile *x = a, *y = b; + + return -CMP(x->newest_monotonic_usec, y->newest_monotonic_usec); /* Invert order, we want newest first! */ +} + +static int journal_file_reshuffle_newest_by_boot_id(sd_journal *j, JournalFile *f) { + NewestByBootId *found; + int r; + + assert(j); + assert(f); + + found = typesafe_bsearch(&(NewestByBootId) { .boot_id = f->newest_boot_id }, + j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare); + if (found) { + /* There's already a priority queue for this boot ID */ + + if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) { + r = prioq_put(found->prioq, f, &f->newest_boot_id_prioq_idx); /* Insert if we aren't in there yet */ + if (r < 0) + return r; + } else + prioq_reshuffle(found->prioq, f, &f->newest_boot_id_prioq_idx); /* Reshuffle otherwise */ + + } else { + _cleanup_(prioq_freep) Prioq *q = NULL; + + /* No priority queue yet, then allocate one */ + + assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL); /* we can't be a member either */ + + q = prioq_new(journal_file_newest_monotonic_compare); + if (!q) + return -ENOMEM; + + r = prioq_put(q, f, &f->newest_boot_id_prioq_idx); + if (r < 0) + return r; + + if (!GREEDY_REALLOC(j->newest_by_boot_id, j->n_newest_by_boot_id + 1)) { + f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL; + return -ENOMEM; + } + + j->newest_by_boot_id[j->n_newest_by_boot_id++] = (NewestByBootId) { + .boot_id = f->newest_boot_id, + .prioq = TAKE_PTR(q), + }; + + typesafe_qsort(j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare); + } + + return 0; +} + static int journal_file_find_newest_for_boot_id( sd_journal *j, sd_id128_t id, @@ -427,16 +521,17 @@ static int journal_file_find_newest_for_boot_id( /* Before we use it, let's refresh the timestamp from the header, and reshuffle our prioq * accordingly. We do this only a bunch of times, to not be caught in some update loop. */ for (unsigned n_tries = 0;; n_tries++) { + NewestByBootId *found; JournalFile *f; - Prioq *q; - q = hashmap_get(j->newest_by_boot_id, &id); - if (!q) + found = typesafe_bsearch(&(NewestByBootId) { .boot_id = id }, + j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare); + + f = found ? prioq_peek(found->prioq) : NULL; + if (!f) return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "Requested delta for boot ID %s, but we have no information about that boot ID.", SD_ID128_TO_STRING(id)); - assert_se(f = prioq_peek(q)); /* we delete hashmap entries once the prioq is empty, so this must hold */ - if (f == prev || n_tries >= 5) { /* This was already the best answer in the previous run, or we tried too often, use it */ *ret = f; @@ -2315,14 +2410,10 @@ fail: } _public_ void sd_journal_close(sd_journal *j) { - Prioq *p; - if (!j || journal_origin_changed(j)) return; - while ((p = hashmap_first(j->newest_by_boot_id))) - journal_file_unlink_newest_by_boot_id(j, prioq_peek(p)); - hashmap_free(j->newest_by_boot_id); + journal_clear_newest_by_boot_id(j); sd_journal_flush_matches(j); @@ -2354,84 +2445,6 @@ _public_ void sd_journal_close(sd_journal *j) { free(j); } -static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f) { - JournalFile *nf; - Prioq *p; - - assert(j); - assert(f); - - if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) /* not linked currently, hence this is a NOP */ - return; - - assert_se(p = hashmap_get(j->newest_by_boot_id, &f->newest_boot_id)); - assert_se(prioq_remove(p, f, &f->newest_boot_id_prioq_idx) > 0); - - nf = prioq_peek(p); - if (nf) - /* There's still a member in the prioq? Then make sure the hashmap key now points to its - * .newest_boot_id field (and not ours!). Not we only replace the memory of the key here, the - * value of the key (and the data associated with it) remain the same. */ - assert_se(hashmap_replace(j->newest_by_boot_id, &nf->newest_boot_id, p) >= 0); - else { - assert_se(hashmap_remove(j->newest_by_boot_id, &f->newest_boot_id) == p); - prioq_free(p); - } - - f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL; -} - -static int journal_file_newest_monotonic_compare(const void *a, const void *b) { - const JournalFile *x = a, *y = b; - - return -CMP(x->newest_monotonic_usec, y->newest_monotonic_usec); /* Invert order, we want newest first! */ -} - -static int journal_file_reshuffle_newest_by_boot_id(sd_journal *j, JournalFile *f) { - Prioq *p; - int r; - - assert(j); - assert(f); - - p = hashmap_get(j->newest_by_boot_id, &f->newest_boot_id); - if (p) { - /* There's already a priority queue for this boot ID */ - - if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) { - r = prioq_put(p, f, &f->newest_boot_id_prioq_idx); /* Insert if we aren't in there yet */ - if (r < 0) - return r; - } else - prioq_reshuffle(p, f, &f->newest_boot_id_prioq_idx); /* Reshuffle otherwise */ - - } else { - _cleanup_(prioq_freep) Prioq *q = NULL; - - /* No priority queue yet, then allocate one */ - - assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL); /* we can't be a member either */ - - q = prioq_new(journal_file_newest_monotonic_compare); - if (!q) - return -ENOMEM; - - r = prioq_put(q, f, &f->newest_boot_id_prioq_idx); - if (r < 0) - return r; - - r = hashmap_ensure_put(&j->newest_by_boot_id, &id128_hash_ops, &f->newest_boot_id, q); - if (r < 0) { - f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL; - return r; - } - - TAKE_PTR(q); - } - - return 0; -} - static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) { uint64_t offset, mo, rt; sd_id128_t id; From 63d2c7554e12dca3cde77431acc7d32ec8a3ba8c Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 3 Jan 2024 04:28:25 +0900 Subject: [PATCH 6/6] logs-show: get timestamp and boot ID only when necessary Previously, get_display_timestamp() is unconditionally called even if we will show logs in e.g. json format. This drops unnecessary call of get_display_timestamp(). This also makes journal fields in each entry parsed only once in output_short(). Still output_verbose() twice though. This should improve performance of dumping journals. Replaces #29365. Co-authored-by: Costa Tsaousis --- src/shared/logs-show.c | 253 ++++++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 132 deletions(-) diff --git a/src/shared/logs-show.c b/src/shared/logs-show.c index 18780ad30a..598c8ed12a 100644 --- a/src/shared/logs-show.c +++ b/src/shared/logs-show.c @@ -453,6 +453,39 @@ static int output_timestamp_realtime( return (int) strlen(buf); } +static void parse_display_timestamp( + sd_journal *j, + const char *realtime, + const char *monotonic, + dual_timestamp *ret_display_ts, + sd_id128_t *ret_boot_id) { + + bool realtime_good = false, monotonic_good = false, boot_id_good = false; + + assert(j); + assert(ret_display_ts); + assert(ret_boot_id); + + if (realtime) + realtime_good = safe_atou64(realtime, &ret_display_ts->realtime) >= 0; + if (!realtime_good || !VALID_REALTIME(ret_display_ts->realtime)) + realtime_good = sd_journal_get_realtime_usec(j, &ret_display_ts->realtime) >= 0; + if (!realtime_good) + ret_display_ts->realtime = USEC_INFINITY; + + if (monotonic) + monotonic_good = safe_atou64(monotonic, &ret_display_ts->monotonic) >= 0; + if (!monotonic_good || !VALID_MONOTONIC(ret_display_ts->monotonic)) + monotonic_good = boot_id_good = sd_journal_get_monotonic_usec(j, &ret_display_ts->monotonic, ret_boot_id) >= 0; + if (!monotonic_good) + ret_display_ts->monotonic = USEC_INFINITY; + + if (!boot_id_good) + boot_id_good = sd_journal_get_monotonic_usec(j, NULL, ret_boot_id) >= 0; + if (!boot_id_good) + *ret_boot_id = SD_ID128_NULL; +} + static int output_short( FILE *f, sd_journal *j, @@ -461,42 +494,43 @@ static int output_short( OutputFlags flags, const Set *output_fields, const size_t highlight[2], - const dual_timestamp *display_ts, - const sd_id128_t *boot_id, - const dual_timestamp *previous_display_ts, - const sd_id128_t *previous_boot_id) { + dual_timestamp *previous_display_ts, /* in and out */ + sd_id128_t *previous_boot_id) { /* in and out */ int r; const void *data; size_t length, n = 0; _cleanup_free_ char *hostname = NULL, *identifier = NULL, *comm = NULL, *pid = NULL, *fake_pid = NULL, *message = NULL, *priority = NULL, *transport = NULL, - *config_file = NULL, *unit = NULL, *user_unit = NULL, *documentation_url = NULL; + *config_file = NULL, *unit = NULL, *user_unit = NULL, *documentation_url = NULL, + *realtime = NULL, *monotonic = NULL; size_t hostname_len = 0, identifier_len = 0, comm_len = 0, pid_len = 0, fake_pid_len = 0, message_len = 0, priority_len = 0, transport_len = 0, config_file_len = 0, unit_len = 0, user_unit_len = 0, documentation_url_len = 0; + dual_timestamp display_ts; + sd_id128_t boot_id; int p = LOG_INFO; bool ellipsized = false, audit; const ParseFieldVec fields[] = { - PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len), - PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len), - PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len), - PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len), - PARSE_FIELD_VEC_ENTRY("_TRANSPORT=", &transport, &transport_len), - PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len), - PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len), - PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len), - PARSE_FIELD_VEC_ENTRY("CONFIG_FILE=", &config_file, &config_file_len), - PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len), - PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len), - PARSE_FIELD_VEC_ENTRY("DOCUMENTATION=", &documentation_url, &documentation_url_len), + PARSE_FIELD_VEC_ENTRY("_PID=", &pid, &pid_len ), + PARSE_FIELD_VEC_ENTRY("_COMM=", &comm, &comm_len ), + PARSE_FIELD_VEC_ENTRY("MESSAGE=", &message, &message_len ), + PARSE_FIELD_VEC_ENTRY("PRIORITY=", &priority, &priority_len ), + PARSE_FIELD_VEC_ENTRY("_TRANSPORT=", &transport, &transport_len ), + PARSE_FIELD_VEC_ENTRY("_HOSTNAME=", &hostname, &hostname_len ), + PARSE_FIELD_VEC_ENTRY("SYSLOG_PID=", &fake_pid, &fake_pid_len ), + PARSE_FIELD_VEC_ENTRY("SYSLOG_IDENTIFIER=", &identifier, &identifier_len ), + PARSE_FIELD_VEC_ENTRY("CONFIG_FILE=", &config_file, &config_file_len ), + PARSE_FIELD_VEC_ENTRY("_SYSTEMD_UNIT=", &unit, &unit_len ), + PARSE_FIELD_VEC_ENTRY("_SYSTEMD_USER_UNIT=", &user_unit, &user_unit_len ), + PARSE_FIELD_VEC_ENTRY("DOCUMENTATION=", &documentation_url, &documentation_url_len), + PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, NULL ), + PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, NULL ), }; size_t highlight_shifted[] = {highlight ? highlight[0] : 0, highlight ? highlight[1] : 0}; assert(f); assert(j); - assert(display_ts); - assert(boot_id); assert(previous_display_ts); assert(previous_boot_id); @@ -526,6 +560,8 @@ static int output_short( if (identifier && set_contains(j->exclude_syslog_identifiers, identifier)) return 0; + parse_display_timestamp(j, realtime, monotonic, &display_ts, &boot_id); + if (!(flags & OUTPUT_SHOW_ALL)) strip_tab_ansi(&message, &message_len, highlight_shifted); @@ -538,9 +574,9 @@ static int output_short( audit = streq_ptr(transport, "audit"); if (IN_SET(mode, OUTPUT_SHORT_MONOTONIC, OUTPUT_SHORT_DELTA)) - r = output_timestamp_monotonic(f, mode, display_ts, boot_id, previous_display_ts, previous_boot_id); + r = output_timestamp_monotonic(f, mode, &display_ts, &boot_id, previous_display_ts, previous_boot_id); else - r = output_timestamp_realtime(f, j, mode, flags, display_ts); + r = output_timestamp_realtime(f, j, mode, flags, &display_ts); if (r < 0) return r; n += r; @@ -661,9 +697,51 @@ static int output_short( if (flags & OUTPUT_CATALOG) (void) print_catalog(f, j); + *previous_display_ts = display_ts; + *previous_boot_id = boot_id; + return ellipsized; } +static int get_display_timestamp( + sd_journal *j, + dual_timestamp *ret_display_ts, + sd_id128_t *ret_boot_id) { + + const void *data; + _cleanup_free_ char *realtime = NULL, *monotonic = NULL; + size_t length; + const ParseFieldVec message_fields[] = { + PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, NULL), + PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, NULL), + }; + int r; + + assert(j); + assert(ret_display_ts); + assert(ret_boot_id); + + JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { + r = parse_fieldv(data, length, message_fields, ELEMENTSOF(message_fields)); + if (r < 0) + return r; + + if (realtime && monotonic) + break; + } + if (r < 0) + return r; + + (void) parse_display_timestamp(j, realtime, monotonic, ret_display_ts, ret_boot_id); + + /* Restart all data before */ + sd_journal_restart_data(j); + sd_journal_restart_unique(j); + sd_journal_restart_fields(j); + + return 0; +} + static int output_verbose( FILE *f, sd_journal *j, @@ -672,35 +750,39 @@ static int output_verbose( OutputFlags flags, const Set *output_fields, const size_t highlight[2], - const dual_timestamp *display_ts, - const sd_id128_t *boot_id, - const dual_timestamp *previous_display_ts, - const sd_id128_t *previous_boot_id) { + dual_timestamp *previous_display_ts, /* unused */ + sd_id128_t *previous_boot_id) { /* unused */ const void *data; size_t length; _cleanup_free_ char *cursor = NULL; char buf[FORMAT_TIMESTAMP_MAX + 7]; + dual_timestamp display_ts; + sd_id128_t boot_id; const char *timestamp; int r; assert(f); assert(j); - assert(display_ts); - assert(boot_id); - assert(previous_display_ts); - assert(previous_boot_id); (void) sd_journal_set_data_threshold(j, 0); - if (!VALID_REALTIME(display_ts->realtime)) + r = get_display_timestamp(j, &display_ts, &boot_id); + if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) { + log_debug_errno(r, "Skipping message we can't read: %m"); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to get journal fields: %m"); + + if (!VALID_REALTIME(display_ts.realtime)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No valid realtime timestamp available"); r = sd_journal_get_cursor(j, &cursor); if (r < 0) return log_error_errno(r, "Failed to get cursor: %m"); - timestamp = format_timestamp_style(buf, sizeof buf, display_ts->realtime, + timestamp = format_timestamp_style(buf, sizeof buf, display_ts.realtime, flags & OUTPUT_UTC ? TIMESTAMP_US_UTC : TIMESTAMP_US); fprintf(f, "%s%s%s %s[%s]%s\n", timestamp && (flags & OUTPUT_COLOR) ? ANSI_UNDERLINE : "", @@ -789,10 +871,8 @@ static int output_export( OutputFlags flags, const Set *output_fields, const size_t highlight[2], - const dual_timestamp *display_ts, - const sd_id128_t *boot_id, - const dual_timestamp *previous_display_ts, - const sd_id128_t *previous_boot_id) { + dual_timestamp *previous_display_ts, /* unused */ + sd_id128_t *previous_boot_id) { /* unused */ sd_id128_t journal_boot_id, seqnum_id; _cleanup_free_ char *cursor = NULL; @@ -803,10 +883,6 @@ static int output_export( int r; assert(j); - assert(display_ts); - assert(boot_id); - assert(previous_display_ts); - assert(previous_boot_id); (void) sd_journal_set_data_threshold(j, 0); @@ -1058,10 +1134,8 @@ static int output_json( OutputFlags flags, const Set *output_fields, const size_t highlight[2], - const dual_timestamp *display_ts, - const sd_id128_t *boot_id, - const dual_timestamp *previous_display_ts, - const sd_id128_t *previous_boot_id) { + dual_timestamp *previous_display_ts, /* unused */ + sd_id128_t *previous_boot_id) { /* unused */ char usecbuf[CONST_MAX(DECIMAL_STR_MAX(usec_t), DECIMAL_STR_MAX(uint64_t))]; _cleanup_(json_variant_unrefp) JsonVariant *object = NULL; @@ -1076,10 +1150,6 @@ static int output_json( int r; assert(j); - assert(display_ts); - assert(boot_id); - assert(previous_display_ts); - assert(previous_boot_id); (void) sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD); @@ -1241,20 +1311,14 @@ static int output_cat( OutputFlags flags, const Set *output_fields, const size_t highlight[2], - const dual_timestamp *display_ts, - const sd_id128_t *boot_id, - const dual_timestamp *previous_display_ts, - const sd_id128_t *previous_boot_id) { + dual_timestamp *previous_display_ts, /* unused */ + sd_id128_t *previous_boot_id) { /* unused */ int r, prio = LOG_INFO; const char *field; assert(j); assert(f); - assert(display_ts); - assert(boot_id); - assert(previous_display_ts); - assert(previous_boot_id); (void) sd_journal_set_data_threshold(j, 0); @@ -1294,63 +1358,6 @@ static int output_cat( return 0; } -static int get_display_timestamp( - sd_journal *j, - dual_timestamp *ret_display_ts, - sd_id128_t *ret_boot_id) { - - const void *data; - _cleanup_free_ char *realtime = NULL, *monotonic = NULL; - size_t length = 0, realtime_len = 0, monotonic_len = 0; - const ParseFieldVec message_fields[] = { - PARSE_FIELD_VEC_ENTRY("_SOURCE_REALTIME_TIMESTAMP=", &realtime, &realtime_len), - PARSE_FIELD_VEC_ENTRY("_SOURCE_MONOTONIC_TIMESTAMP=", &monotonic, &monotonic_len), - }; - int r; - bool realtime_good = false, monotonic_good = false, boot_id_good = false; - - assert(j); - assert(ret_display_ts); - assert(ret_boot_id); - - JOURNAL_FOREACH_DATA_RETVAL(j, data, length, r) { - r = parse_fieldv(data, length, message_fields, ELEMENTSOF(message_fields)); - if (r < 0) - return r; - - if (realtime && monotonic) - break; - } - if (r < 0) - return r; - - if (realtime) - realtime_good = safe_atou64(realtime, &ret_display_ts->realtime) >= 0; - if (!realtime_good || !VALID_REALTIME(ret_display_ts->realtime)) - realtime_good = sd_journal_get_realtime_usec(j, &ret_display_ts->realtime) >= 0; - if (!realtime_good) - ret_display_ts->realtime = USEC_INFINITY; - - if (monotonic) - monotonic_good = safe_atou64(monotonic, &ret_display_ts->monotonic) >= 0; - if (!monotonic_good || !VALID_MONOTONIC(ret_display_ts->monotonic)) - monotonic_good = boot_id_good = sd_journal_get_monotonic_usec(j, &ret_display_ts->monotonic, ret_boot_id) >= 0; - if (!monotonic_good) - ret_display_ts->monotonic = USEC_INFINITY; - - if (!boot_id_good) - boot_id_good = sd_journal_get_monotonic_usec(j, NULL, ret_boot_id) >= 0; - if (!boot_id_good) - *ret_boot_id = SD_ID128_NULL; - - /* Restart all data before */ - sd_journal_restart_data(j); - sd_journal_restart_unique(j); - sd_journal_restart_fields(j); - - return 0; -} - typedef int (*output_func_t)( FILE *f, sd_journal *j, @@ -1359,10 +1366,8 @@ typedef int (*output_func_t)( OutputFlags flags, const Set *output_fields, const size_t highlight[2], - const dual_timestamp *display_ts, - const sd_id128_t *boot_id, - const dual_timestamp *previous_display_ts, - const sd_id128_t *previous_boot_id); + dual_timestamp *previous_display_ts, + sd_id128_t *previous_boot_id); static output_func_t output_funcs[_OUTPUT_MODE_MAX] = { @@ -1396,8 +1401,6 @@ int show_journal_entry( dual_timestamp *previous_display_ts, sd_id128_t *previous_boot_id) { - dual_timestamp display_ts = DUAL_TIMESTAMP_NULL; - sd_id128_t boot_id = SD_ID128_NULL; int r; assert(mode >= 0); @@ -1408,14 +1411,6 @@ int show_journal_entry( if (n_columns <= 0) n_columns = columns(); - r = get_display_timestamp(j, &display_ts, &boot_id); - if (IN_SET(r, -EBADMSG, -EADDRNOTAVAIL)) { - log_debug_errno(r, "Skipping message we can't read: %m"); - return 0; - } - if (r < 0) - return log_error_errno(r, "Failed to get journal fields: %m"); - r = output_funcs[mode]( f, j, @@ -1424,15 +1419,9 @@ int show_journal_entry( flags, output_fields, highlight, - &display_ts, - &boot_id, previous_display_ts, previous_boot_id); - /* Store timestamp and boot ID for next iteration */ - *previous_display_ts = display_ts; - *previous_boot_id = boot_id; - if (ellipsized && r > 0) *ellipsized = true;