string-util: add a function to determine levenshtein distance of two strings

This commit is contained in:
Lennart Poettering
2023-08-21 13:38:03 +02:00
parent 1126841553
commit 7ef5b0a4d8
3 changed files with 90 additions and 0 deletions

View File

@@ -1446,3 +1446,67 @@ bool version_is_valid_versionspec(const char *s) {
return true;
}
ssize_t strlevenshtein(const char *x, const char *y) {
_cleanup_free_ size_t *t0 = NULL, *t1 = NULL, *t2 = NULL;
size_t xl, yl;
/* This is inspired from the Linux kernel's Levenshtein implementation */
if (streq_ptr(x, y))
return 0;
xl = strlen_ptr(x);
if (xl > SSIZE_MAX)
return -E2BIG;
yl = strlen_ptr(y);
if (yl > SSIZE_MAX)
return -E2BIG;
if (isempty(x))
return yl;
if (isempty(y))
return xl;
t0 = new0(size_t, yl + 1);
if (!t0)
return -ENOMEM;
t1 = new0(size_t, yl + 1);
if (!t1)
return -ENOMEM;
t2 = new0(size_t, yl + 1);
if (!t2)
return -ENOMEM;
for (size_t i = 0; i <= yl; i++)
t1[i] = i;
for (size_t i = 0; i < xl; i++) {
t2[0] = i + 1;
for (size_t j = 0; j < yl; j++) {
/* Substitution */
t2[j+1] = t1[j] + (x[i] != y[j]);
/* Swap */
if (i > 0 && j > 0 && x[i-1] == y[j] && x[i] == y[j-1] && t2[j+1] > t0[j-1] + 1)
t2[j+1] = t0[j-1] + 1;
/* Deletion */
if (t2[j+1] > t1[j+1] + 1)
t2[j+1] = t1[j+1] + 1;
/* Insertion */
if (t2[j+1] > t2[j] + 1)
t2[j+1] = t2[j] + 1;
}
size_t *dummy = t0;
t0 = t1;
t1 = t2;
t2 = dummy;
}
return t1[yl];
}