diff options
Diffstat (limited to 'src/string-utils.c')
-rw-r--r-- | src/string-utils.c | 53 |
1 files changed, 44 insertions, 9 deletions
diff --git a/src/string-utils.c b/src/string-utils.c index f2cd45e..b284760 100644 --- a/src/string-utils.c +++ b/src/string-utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -55,13 +55,20 @@ teco_string_echo(const gchar *str, gsize len) return ret; } -/** @memberof teco_string_t */ +/** + * Get character coordinates for a given byte index. + * + * The given string must be valid UTF-8. + * + * @memberof teco_string_t + */ void -teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column) +teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column) { + *pos = 0; *line = *column = 1; - for (guint i = 0; i < pos; i++) { + for (guint i = 0; i < off; i = g_utf8_next_char(str+i) - str) { switch (str[i]) { case '\r': if (str[i+1] == '\n') @@ -75,10 +82,21 @@ teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column) (*column)++; break; } + (*pos)++; } } -/** @memberof teco_string_t */ +/** + * Get the length of the prefix common to two strings. + * Works with UTF-8 and single-byte encodings. + * + * @param a Left string. + * @param b Right string. + * @param b_len Length of right string. + * @return Length of the common prefix in bytes. + * + * @memberof teco_string_t + */ gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len) { @@ -91,15 +109,32 @@ teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len) return len; } -/** @memberof teco_string_t */ +/** + * Get the length of the prefix common to two UTF-8 strings + * without considering case. + * + * The UTF-8 strings must be validated, which should be the case + * for help labels and short Q-Register names. + * + * @param a Left UTF-8 string. + * @param b Right UTF-8 string. + * @param b_len Length of right UTF-8 string. + * @return Length of the common prefix in bytes. + * + * @memberof teco_string_t + */ gsize teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len) { gsize len = 0; - while (len < a->len && len < b_len && - g_ascii_tolower(a->data[len]) == g_ascii_tolower(b[len])) - len++; + while (len < a->len && len < b_len) { + gunichar a_chr = g_utf8_get_char(a->data+len); + gunichar b_chr = g_utf8_get_char(b+len); + if (g_unichar_tolower(a_chr) != g_unichar_tolower(b_chr)) + break; + len = g_utf8_next_char(b+len) - b; + } return len; } |