aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-22 18:16:17 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-22 19:33:48 +0300
commit6cd143f86d3c39be633c1dcf5a4165f2f06a04f6 (patch)
treef922eb2d979042c7a89da1a3dfc8851c90776d2d
parentf4fd6de46a8d66032efd6fa86a5674edd0ec839f (diff)
downloadsciteco-6cd143f86d3c39be633c1dcf5a4165f2f06a04f6.tar.gz
fixed lexing (syntax highlighting) of the null-character (^@) in SciTECO code
* Apparently g_utf8_get_char_validated() sometimes(!) returns -2 for null-characters, so it was considered an invalid byte sequence. * What's strange and unexplainable is that other uses of the function, as are behind nA and nQq, did not cause problems and returned 0 for null-bytes. * This also fixes syntax higlighting of .teco_session files which use the null-byte as the string terminator. (.teco_session files are not highlighted automatically, though.)
-rw-r--r--src/lexer.c8
-rw-r--r--src/qreg.c2
-rw-r--r--src/view.c2
3 files changed, 8 insertions, 4 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 4fbc313..ff43c1b 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -104,8 +104,12 @@ teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine,
teco_style_t style = SCE_SCITECO_DEFAULT;
- gint32 chr = g_utf8_get_char_validated(macro+machine->macro_pc,
- max_len-machine->macro_pc);
+ /*
+ * g_utf8_get_char_validated() sometimes(?) returns -2 for "\0".
+ */
+ gint32 chr = macro[machine->macro_pc]
+ ? g_utf8_get_char_validated(macro+machine->macro_pc,
+ max_len-machine->macro_pc) : 0;
if (chr < 0) {
/*
* Invalid UTF-8 byte sequence:
diff --git a/src/qreg.c b/src/qreg.c
index 91ee630..271e7cb 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -539,7 +539,7 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position,
* The sign bit in UCS-4/UTF-32 is unused, so this will even
* suffice if TECO_INTEGER == 32.
*/
- *chr = (gint32)g_utf8_get_char_validated(p, -1);
+ *chr = *p ? (gint32)g_utf8_get_char_validated(p, -1) : 0;
return TRUE;
}
diff --git a/src/view.c b/src/view.c
index 1f09ce6..b8c72a5 100644
--- a/src/view.c
+++ b/src/view.c
@@ -657,7 +657,7 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len)
* The sign bit in UCS-4/UTF-32 is unused, so this will even
* suffice if TECO_INTEGER == 32.
*/
- return (gint32)g_utf8_get_char_validated(buf, -1);
+ return *buf ? (gint32)g_utf8_get_char_validated(buf, -1) : 0;
}
void