fixed searches in single-byte encoded documents

* while code is guaranteed to be in valid UTF-8, this cannot be said about the result of string building. * The search pattern can end up with invalid Unicode bytes even when searching on UTF-8 buffers, e.g. if ^EQq inserts garbage. There are currently no checks. * When searching on a raw buffer, it must be possible to search for arbitrary bytes (^EUq). Since teco_pattern2regexp() was always expecting clean UTF-8 input, this would sometimes skip over too many bytes and could even crash. * Instead, teco_pattern2regexp() now takes the <S> target codepage into account.
author: Robin Haberkorn <robin.haberkorn@googlemail.com> 2024-09-11 14:30:24 +0200
committer: Robin Haberkorn <robin.haberkorn@googlemail.com> 2024-09-11 16:14:27 +0200
commit: 2a050759ab621b87d0782cc8235907a1757b46cc (patch)
tree: cde0c666146f833fc948a13c22056061f62d2619 /src/parser.c
parent: 68578072bfaf6054a96bb6bcedfccb6e56a508fe (diff)
download: sciteco-2a050759ab621b87d0782cc8235907a1757b46cc.tar.gz
1 files changed, 5 insertions, 11 deletions
diff --git a/src/parser.c b/src/parser.c
index 321803a..3c37f81 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -158,17 +158,11 @@ gboolean
 teco_execute_macro(const gchar *macro, gsize macro_len,
                    teco_qreg_table_t *qreg_table_locals, GError **error)
 {
-	/*
-	 * Validate UTF-8, but accept null bytes.
-	 * NOTE: there is g_utf8_validate_len() in Glib 2.60
-	 */
-	const gchar *p = macro;
-	while (!g_utf8_validate(p, macro_len - (p - macro), &p) && !*p)
-		p++;
-	if (p - macro < macro_len) {
-		g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
-		            "Invalid UTF-8 byte sequence at %" G_GSIZE_FORMAT,
-		            p - macro);
+	const teco_string_t str = {(gchar *)macro, macro_len};
+
+	if (!teco_string_validate_utf8(&str)) {
+		g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+		                    "Invalid UTF-8 byte sequence in macro");
 		return FALSE;
 	}
author	Robin Haberkorn <robin.haberkorn@googlemail.com>	2024-09-11 14:30:24 +0200
committer	Robin Haberkorn <robin.haberkorn@googlemail.com>	2024-09-11 16:14:27 +0200
commit	2a050759ab621b87d0782cc8235907a1757b46cc (patch)
tree	cde0c666146f833fc948a13c22056061f62d2619 /src/parser.c
parent	68578072bfaf6054a96bb6bcedfccb6e56a508fe (diff)
download	sciteco-2a050759ab621b87d0782cc8235907a1757b46cc.tar.gz