From 6cd143f86d3c39be633c1dcf5a4165f2f06a04f6 Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <robin.haberkorn@googlemail.com>
Date: Sun, 22 Dec 2024 18:16:17 +0300
Subject: fixed lexing (syntax highlighting) of the null-character (^@) in
 SciTECO code

* Apparently g_utf8_get_char_validated() sometimes(!) returns -2 for null-characters,
  so it was considered an invalid byte sequence.
* What's strange and unexplainable is that other uses of the function, as are behind nA and nQq,
  did not cause problems and returned 0 for null-bytes.
* This also fixes syntax higlighting of .teco_session files which use the null-byte as the
  string terminator.
  (.teco_session files are not highlighted automatically, though.)
---
 src/lexer.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src/lexer.c')

diff --git a/src/lexer.c b/src/lexer.c
index 4fbc313..ff43c1b 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -104,8 +104,12 @@ teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine,
 
 	teco_style_t style = SCE_SCITECO_DEFAULT;
 
-	gint32 chr = g_utf8_get_char_validated(macro+machine->macro_pc,
-	                                       max_len-machine->macro_pc);
+	/*
+	 * g_utf8_get_char_validated() sometimes(?) returns -2 for "\0".
+	 */
+	gint32 chr = macro[machine->macro_pc]
+			? g_utf8_get_char_validated(macro+machine->macro_pc,
+			                            max_len-machine->macro_pc) : 0;
 	if (chr < 0) {
 		/*
 		 * Invalid UTF-8 byte sequence:
-- 
cgit v1.2.3