diff options
Diffstat (limited to 'src/string-utils.h')
-rw-r--r-- | src/string-utils.h | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/src/string-utils.h b/src/string-utils.h index 26b660b..ebe25d5 100644 --- a/src/string-utils.h +++ b/src/string-utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,15 +26,25 @@ /** * Upper-case SciTECO command character. * - * There are implementations in glib (g_ascii_toupper) and libc, + * There are implementations in glib (g_ascii_toupper() and g_unichar_toupper()) and libc, * but this implementation is sufficient for all letters used by SciTECO commands. */ -static inline gchar -teco_ascii_toupper(gchar chr) +static inline gunichar +teco_ascii_toupper(gunichar chr) { return chr >= 'a' && chr <= 'z' ? chr & ~0x20 : chr; } +static inline gchar * +teco_strv_remove(gchar **strv, guint i) +{ + gchar *ret = strv[i]; + do + strv[i] = strv[i+1]; + while (strv[++i]); + return ret; +} + /** * An 8-bit clean null-terminated string. * @@ -42,6 +52,7 @@ teco_ascii_toupper(gchar chr) * and the allocation length is not stored. * Just like GString, teco_string_t are always null-terminated but at the * same time 8-bit clean (can contain null-characters). + * It may or may not contain UTF-8 byte sequences. * * The API is designed such that teco_string_t operations operate on plain * (null-terminated) C strings, a single character or character array as well as @@ -51,6 +62,12 @@ teco_ascii_toupper(gchar chr) * A target teco_string_t::data is always null-terminated and thus safe to pass * to functions expecting traditional null-terminated C strings if you can * guarantee that it contains no null-character other than the trailing one. + * + * @warning For consistency with C idioms the underlying character type is + * `char`, which might be signed! + * Accessing individual characters may yield signed integers and that sign + * might be preserved when upcasting to a larger signed integer. + * In this case you should always cast to `guchar` first. */ typedef struct { /** @@ -58,7 +75,7 @@ typedef struct { * The pointer is guaranteed to be non-NULL after initialization. */ gchar *data; - /** Length of `data` without the trailing null-byte. */ + /** Length of `data` without the trailing null-byte in bytes. */ gsize len; } teco_string_t; @@ -112,6 +129,16 @@ teco_string_append_c(teco_string_t *str, gchar chr) teco_string_append(str, &chr, sizeof(chr)); } +/** @memberof teco_string_t */ +static inline void +teco_string_append_wc(teco_string_t *target, gunichar chr) +{ + /* 4 bytes should be enough, but we better follow the documentation */ + target->data = g_realloc(target->data, target->len + 6 + 1); + target->len += g_unichar_to_utf8(chr, target->data+target->len); + target->data[target->len] = '\0'; +} + /** * @fixme Should this also realloc str->data? * @@ -135,7 +162,7 @@ void undo__teco_string_truncate(teco_string_t *, gsize); gchar *teco_string_echo(const gchar *str, gsize len); -void teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column); +void teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column); typedef gsize (*teco_string_diff_t)(const teco_string_t *a, const gchar *b, gsize b_len); gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len); @@ -170,6 +197,19 @@ teco_string_rindex(const teco_string_t *str, gchar chr) const gchar *teco_string_last_occurrence(const teco_string_t *str, const gchar *chars); +/** + * Validate whether string consists exclusively of valid UTF-8, but accept null bytes. + * @note there is g_utf8_validate_len() in Glib 2.60 + */ +static inline gboolean +teco_string_validate_utf8(const teco_string_t *str) +{ + const gchar *p = str->data; + while (!g_utf8_validate(p, str->len - (p - str->data), &p) && !*p) + p++; + return p - str->data == str->len; +} + /** @memberof teco_string_t */ static inline void teco_string_clear(teco_string_t *str) |