utf8: add helper that determines length in bytes of last UTF-8 character in string

This commit is contained in:
Lennart Poettering
2025-02-05 10:44:19 +01:00
parent 428146dc89
commit 104a6b8c39
3 changed files with 37 additions and 0 deletions

View File

@@ -609,3 +609,26 @@ size_t utf8_console_width(const char *str) {
return n;
}
size_t utf8_last_length(const char *s, size_t n) {
int r;
if (n == SIZE_MAX)
n = strlen(s);
/* Determines length in bytes of last UTF-8 codepoint in string. If the string is empty, returns
* zero. Treats invalid UTF-8 codepoints as 1 sized ones. */
for (size_t last = 0;;) {
if (n == 0)
return last;
r = utf8_encoded_valid_unichar(s, n);
if (r <= 0)
r = 1; /* treat invalid UTF-8 as byte-wide */
s += r;
n -= r;
last = r;
}
}

View File

@@ -62,3 +62,5 @@ static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t t
size_t utf8_n_codepoints(const char *str);
int utf8_char_console_width(const char *str);
size_t utf8_console_width(const char *str);
size_t utf8_last_length(const char *s, size_t n);

View File

@@ -227,6 +227,18 @@ TEST(utf8_to_utf16) {
}
}
TEST(utf8_last_length) {
ASSERT_EQ(utf8_last_length("", 0), 0U);
ASSERT_EQ(utf8_last_length("", SIZE_MAX), 0U);
ASSERT_EQ(utf8_last_length("a", 1), 1U);
ASSERT_EQ(utf8_last_length("a", SIZE_MAX), 1U);
ASSERT_EQ(utf8_last_length("ä", SIZE_MAX), strlen("ä"));
ASSERT_EQ(utf8_last_length("👊", SIZE_MAX), strlen("👊"));
ASSERT_EQ(utf8_last_length("koffa", SIZE_MAX), 1U);
ASSERT_EQ(utf8_last_length("koffä", SIZE_MAX), strlen("ä"));
ASSERT_EQ(utf8_last_length("koff👊", SIZE_MAX), strlen("👊"));
}
static int intro(void) {
log_show_color(true);
return EXIT_SUCCESS;