aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/string-utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string-utils.c')
-rw-r--r--src/string-utils.c53
1 files changed, 44 insertions, 9 deletions
diff --git a/src/string-utils.c b/src/string-utils.c
index f2cd45e..b284760 100644
--- a/src/string-utils.c
+++ b/src/string-utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -55,13 +55,20 @@ teco_string_echo(const gchar *str, gsize len)
return ret;
}
-/** @memberof teco_string_t */
+/**
+ * Get character coordinates for a given byte index.
+ *
+ * The given string must be valid UTF-8.
+ *
+ * @memberof teco_string_t
+ */
void
-teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column)
+teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column)
{
+ *pos = 0;
*line = *column = 1;
- for (guint i = 0; i < pos; i++) {
+ for (guint i = 0; i < off; i = g_utf8_next_char(str+i) - str) {
switch (str[i]) {
case '\r':
if (str[i+1] == '\n')
@@ -75,10 +82,21 @@ teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column)
(*column)++;
break;
}
+ (*pos)++;
}
}
-/** @memberof teco_string_t */
+/**
+ * Get the length of the prefix common to two strings.
+ * Works with UTF-8 and single-byte encodings.
+ *
+ * @param a Left string.
+ * @param b Right string.
+ * @param b_len Length of right string.
+ * @return Length of the common prefix in bytes.
+ *
+ * @memberof teco_string_t
+ */
gsize
teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
{
@@ -91,15 +109,32 @@ teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
return len;
}
-/** @memberof teco_string_t */
+/**
+ * Get the length of the prefix common to two UTF-8 strings
+ * without considering case.
+ *
+ * The UTF-8 strings must be validated, which should be the case
+ * for help labels and short Q-Register names.
+ *
+ * @param a Left UTF-8 string.
+ * @param b Right UTF-8 string.
+ * @param b_len Length of right UTF-8 string.
+ * @return Length of the common prefix in bytes.
+ *
+ * @memberof teco_string_t
+ */
gsize
teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len)
{
gsize len = 0;
- while (len < a->len && len < b_len &&
- g_ascii_tolower(a->data[len]) == g_ascii_tolower(b[len]))
- len++;
+ while (len < a->len && len < b_len) {
+ gunichar a_chr = g_utf8_get_char(a->data+len);
+ gunichar b_chr = g_utf8_get_char(b+len);
+ if (g_unichar_tolower(a_chr) != g_unichar_tolower(b_chr))
+ break;
+ len = g_utf8_next_char(b+len) - b;
+ }
return len;
}