From 6cd143f86d3c39be633c1dcf5a4165f2f06a04f6 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sun, 22 Dec 2024 18:16:17 +0300 Subject: fixed lexing (syntax highlighting) of the null-character (^@) in SciTECO code * Apparently g_utf8_get_char_validated() sometimes(!) returns -2 for null-characters, so it was considered an invalid byte sequence. * What's strange and unexplainable is that other uses of the function, as are behind nA and nQq, did not cause problems and returned 0 for null-bytes. * This also fixes syntax higlighting of .teco_session files which use the null-byte as the string terminator. (.teco_session files are not highlighted automatically, though.) --- src/lexer.c | 8 ++++++-- src/qreg.c | 2 +- src/view.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 4fbc313..ff43c1b 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -104,8 +104,12 @@ teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine, teco_style_t style = SCE_SCITECO_DEFAULT; - gint32 chr = g_utf8_get_char_validated(macro+machine->macro_pc, - max_len-machine->macro_pc); + /* + * g_utf8_get_char_validated() sometimes(?) returns -2 for "\0". + */ + gint32 chr = macro[machine->macro_pc] + ? g_utf8_get_char_validated(macro+machine->macro_pc, + max_len-machine->macro_pc) : 0; if (chr < 0) { /* * Invalid UTF-8 byte sequence: diff --git a/src/qreg.c b/src/qreg.c index 91ee630..271e7cb 100644 --- a/src/qreg.c +++ b/src/qreg.c @@ -539,7 +539,7 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position, * The sign bit in UCS-4/UTF-32 is unused, so this will even * suffice if TECO_INTEGER == 32. */ - *chr = (gint32)g_utf8_get_char_validated(p, -1); + *chr = *p ? (gint32)g_utf8_get_char_validated(p, -1) : 0; return TRUE; } diff --git a/src/view.c b/src/view.c index 1f09ce6..b8c72a5 100644 --- a/src/view.c +++ b/src/view.c @@ -657,7 +657,7 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) * The sign bit in UCS-4/UTF-32 is unused, so this will even * suffice if TECO_INTEGER == 32. */ - return (gint32)g_utf8_get_char_validated(buf, -1); + return *buf ? (gint32)g_utf8_get_char_validated(buf, -1) : 0; } void -- cgit v1.2.3