From e46352bc614cf9777ca76deb47330fb408bc1a23 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sat, 2 Aug 2025 13:16:16 +0300 Subject: fixed serious bug with certain alternative string termination chars in commands with multiple string arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * When `@`-modifying a command with several string arguments and choosing `{` as the alternative string termination character, the parser would get totally confused. Any sequence of `{` would be ignored and only the first non-`{` would become the termination character. Consequently you also couldn't choose a new terminator after the closing `}`. So even a documented code example from sciteco(7) wouldn't work. The same was true when using $ (escape) or ^A as the alternative termination character. * We can now correctly parse e.g. `@FR{foo}{bar}` or `@FR$foo$bar$` (even though the latter one is quite pointless). * has probably been broken forever (has been broken even before v2.0). * Whitespace is now ignored in front of alternative termination characters as in TECO-64, so we can also write `@S /foo/` or even ``` @^Um { !* blabla *! } ``` I wanted to disallow whitespace termination characters, so the alternative would have been to throw an error. The new implementation at least adds some functionality. * Avoid redundancies when parsing no-op characters via teco_is_noop(). I assume that this is inlined and drawn into any jump-table what would be generated for the switch-statement in teco_state_start_input(). * Alternative termination characters are still case-folded, even if they are Unicode glyphs, so `@IЖfooж` would work and insert `foo`. This should perhaps be restricted to ANSI characters? --- src/parser.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index 347c1a6..6d4cd60 100644 --- a/src/parser.c +++ b/src/parser.c @@ -996,6 +996,11 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch for (guint i = 0; i < len; ) { gunichar chr = g_utf8_get_char(str+i); + /* + * NOTE: We support both `[` and `{`, so this works for autocompleting + * long Q-register specifications as well. + * This may therefore insert unnecessary ^Q, but they won't hurt. + */ if (g_unichar_toupper(chr) == ctx->escape_char || (ctx->escape_char == '[' && chr == ']') || (ctx->escape_char == '{' && chr == '}')) @@ -1032,34 +1037,28 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e teco_state_t *current = ctx->parent.current; /* - * String termination handling + * Ignore whitespace immediately after @-modified commands. + * This is inspired by TECO-64. + * The alternative would have been to throw an error, + * as allowing whitespace escape_chars is harmful. */ - if (ctx->flags.modifier_at) { - if (current->expectstring.last) - /* also clears the "@" modifier flag */ - teco_machine_main_eval_at(ctx); + if (ctx->flags.modifier_at && teco_is_noop(chr)) + return current; + /* + * String termination handling + */ + if (teco_machine_main_eval_at(ctx)) { /* - * FIXME: Exclude setting at least whitespace characters as the - * new string escape character to avoid accidental errors? - * * FIXME: Should we perhaps restrict case folding escape characters * to the ANSI range (teco_ascii_toupper())? - * This would be faster than case folding each and every character + * This would be faster than case folding almost all characters * of a string argument to check against the escape char. - * - * FIXME: This has undesired effects if you try to use one of - * of these characters with multiple string arguments. */ - switch (ctx->expectstring.machine.escape_char) { - case TECO_CTL_KEY('A'): - case '\e': - case '{': - if (ctx->parent.must_undo) - teco_undo_gunichar(ctx->expectstring.machine.escape_char); - ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); - return current; - } + if (ctx->parent.must_undo) + teco_undo_gunichar(ctx->expectstring.machine.escape_char); + ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); + return current; } /* @@ -1113,6 +1112,14 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e if (ctx->parent.must_undo) teco_undo_gunichar(ctx->expectstring.machine.escape_char); ctx->expectstring.machine.escape_char = '\e'; + } else if (ctx->expectstring.machine.escape_char == '{') { + /* + * Makes sure that after all but the last string argument, + * the escape character is reset, as in @FR{foo}{bar}. + */ + if (ctx->parent.must_undo) + teco_undo_flags(ctx->flags); + ctx->flags.modifier_at = TRUE; } ctx->expectstring.nesting = 1; -- cgit v1.2.3