aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/string-utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/string-utils.h')
-rw-r--r--src/string-utils.h52
1 files changed, 46 insertions, 6 deletions
diff --git a/src/string-utils.h b/src/string-utils.h
index 26b660b..ebe25d5 100644
--- a/src/string-utils.h
+++ b/src/string-utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -26,15 +26,25 @@
/**
* Upper-case SciTECO command character.
*
- * There are implementations in glib (g_ascii_toupper) and libc,
+ * There are implementations in glib (g_ascii_toupper() and g_unichar_toupper()) and libc,
* but this implementation is sufficient for all letters used by SciTECO commands.
*/
-static inline gchar
-teco_ascii_toupper(gchar chr)
+static inline gunichar
+teco_ascii_toupper(gunichar chr)
{
return chr >= 'a' && chr <= 'z' ? chr & ~0x20 : chr;
}
+static inline gchar *
+teco_strv_remove(gchar **strv, guint i)
+{
+ gchar *ret = strv[i];
+ do
+ strv[i] = strv[i+1];
+ while (strv[++i]);
+ return ret;
+}
+
/**
* An 8-bit clean null-terminated string.
*
@@ -42,6 +52,7 @@ teco_ascii_toupper(gchar chr)
* and the allocation length is not stored.
* Just like GString, teco_string_t are always null-terminated but at the
* same time 8-bit clean (can contain null-characters).
+ * It may or may not contain UTF-8 byte sequences.
*
* The API is designed such that teco_string_t operations operate on plain
* (null-terminated) C strings, a single character or character array as well as
@@ -51,6 +62,12 @@ teco_ascii_toupper(gchar chr)
* A target teco_string_t::data is always null-terminated and thus safe to pass
* to functions expecting traditional null-terminated C strings if you can
* guarantee that it contains no null-character other than the trailing one.
+ *
+ * @warning For consistency with C idioms the underlying character type is
+ * `char`, which might be signed!
+ * Accessing individual characters may yield signed integers and that sign
+ * might be preserved when upcasting to a larger signed integer.
+ * In this case you should always cast to `guchar` first.
*/
typedef struct {
/**
@@ -58,7 +75,7 @@ typedef struct {
* The pointer is guaranteed to be non-NULL after initialization.
*/
gchar *data;
- /** Length of `data` without the trailing null-byte. */
+ /** Length of `data` without the trailing null-byte in bytes. */
gsize len;
} teco_string_t;
@@ -112,6 +129,16 @@ teco_string_append_c(teco_string_t *str, gchar chr)
teco_string_append(str, &chr, sizeof(chr));
}
+/** @memberof teco_string_t */
+static inline void
+teco_string_append_wc(teco_string_t *target, gunichar chr)
+{
+ /* 4 bytes should be enough, but we better follow the documentation */
+ target->data = g_realloc(target->data, target->len + 6 + 1);
+ target->len += g_unichar_to_utf8(chr, target->data+target->len);
+ target->data[target->len] = '\0';
+}
+
/**
* @fixme Should this also realloc str->data?
*
@@ -135,7 +162,7 @@ void undo__teco_string_truncate(teco_string_t *, gsize);
gchar *teco_string_echo(const gchar *str, gsize len);
-void teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column);
+void teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column);
typedef gsize (*teco_string_diff_t)(const teco_string_t *a, const gchar *b, gsize b_len);
gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len);
@@ -170,6 +197,19 @@ teco_string_rindex(const teco_string_t *str, gchar chr)
const gchar *teco_string_last_occurrence(const teco_string_t *str, const gchar *chars);
+/**
+ * Validate whether string consists exclusively of valid UTF-8, but accept null bytes.
+ * @note there is g_utf8_validate_len() in Glib 2.60
+ */
+static inline gboolean
+teco_string_validate_utf8(const teco_string_t *str)
+{
+ const gchar *p = str->data;
+ while (!g_utf8_validate(p, str->len - (p - str->data), &p) && !*p)
+ p++;
+ return p - str->data == str->len;
+}
+
/** @memberof teco_string_t */
static inline void
teco_string_clear(teco_string_t *str)