mirror of
https://github.com/morgan9e/systemd
synced 2026-04-14 00:14:32 +09:00
utf8: add helper that determines length in bytes of last UTF-8 character in string
This commit is contained in:
@@ -609,3 +609,26 @@ size_t utf8_console_width(const char *str) {
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
size_t utf8_last_length(const char *s, size_t n) {
|
||||
int r;
|
||||
|
||||
if (n == SIZE_MAX)
|
||||
n = strlen(s);
|
||||
|
||||
/* Determines length in bytes of last UTF-8 codepoint in string. If the string is empty, returns
|
||||
* zero. Treats invalid UTF-8 codepoints as 1 sized ones. */
|
||||
|
||||
for (size_t last = 0;;) {
|
||||
if (n == 0)
|
||||
return last;
|
||||
|
||||
r = utf8_encoded_valid_unichar(s, n);
|
||||
if (r <= 0)
|
||||
r = 1; /* treat invalid UTF-8 as byte-wide */
|
||||
|
||||
s += r;
|
||||
n -= r;
|
||||
last = r;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,3 +62,5 @@ static inline char32_t utf16_surrogate_pair_to_unichar(char16_t lead, char16_t t
|
||||
size_t utf8_n_codepoints(const char *str);
|
||||
int utf8_char_console_width(const char *str);
|
||||
size_t utf8_console_width(const char *str);
|
||||
|
||||
size_t utf8_last_length(const char *s, size_t n);
|
||||
|
||||
@@ -227,6 +227,18 @@ TEST(utf8_to_utf16) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(utf8_last_length) {
|
||||
ASSERT_EQ(utf8_last_length("", 0), 0U);
|
||||
ASSERT_EQ(utf8_last_length("", SIZE_MAX), 0U);
|
||||
ASSERT_EQ(utf8_last_length("a", 1), 1U);
|
||||
ASSERT_EQ(utf8_last_length("a", SIZE_MAX), 1U);
|
||||
ASSERT_EQ(utf8_last_length("ä", SIZE_MAX), strlen("ä"));
|
||||
ASSERT_EQ(utf8_last_length("👊", SIZE_MAX), strlen("👊"));
|
||||
ASSERT_EQ(utf8_last_length("koffa", SIZE_MAX), 1U);
|
||||
ASSERT_EQ(utf8_last_length("koffä", SIZE_MAX), strlen("ä"));
|
||||
ASSERT_EQ(utf8_last_length("koff👊", SIZE_MAX), strlen("👊"));
|
||||
}
|
||||
|
||||
static int intro(void) {
|
||||
log_show_color(true);
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
Reference in New Issue
Block a user