From e46352bc614cf9777ca76deb47330fb408bc1a23 Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <robin.haberkorn@googlemail.com>
Date: Sat, 2 Aug 2025 13:16:16 +0300
Subject: fixed serious bug with certain alternative string termination chars
 in commands with multiple string arguments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* When `@`-modifying a command with several string arguments and choosing `{` as the alternative
  string termination character, the parser would get totally confused.
  Any sequence of `{` would be ignored and only the first non-`{` would become the termination character.
  Consequently you also couldn't choose a new terminator after the closing `}`.
  So even a documented code example from sciteco(7) wouldn't work.
  The same was true when using $ (escape) or ^A as the alternative termination character.
* We can now correctly parse e.g. `@FR{foo}{bar}` or `@FR$foo$bar$` (even though the
  latter one is quite pointless).
* has probably been broken forever (has been broken even before v2.0).
* Whitespace is now ignored in front of alternative termination characters as in TECO-64, so
  we can also write `@S /foo/` or even
  ```
  @^Um
  {
    !* blabla *!
  }
  ```
  I wanted to disallow whitespace termination characters, so the alternative would have been
  to throw an error.
  The new implementation at least adds some functionality.
  * Avoid redundancies when parsing no-op characters via teco_is_noop().
    I assume that this is inlined and drawn into any jump-table what would be
    generated for the switch-statement in teco_state_start_input().
 * Alternative termination characters are still case-folded, even if they are Unicode glyphs,
   so `@IЖfooж` would work and insert `foo`.
   This should perhaps be restricted to ANSI characters?
---
 src/parser.c | 49 ++++++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

(limited to 'src/parser.c')

diff --git a/src/parser.c b/src/parser.c
index 347c1a6..6d4cd60 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -996,6 +996,11 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch
 	for (guint i = 0; i < len; ) {
 		gunichar chr = g_utf8_get_char(str+i);
 
+		/*
+		 * NOTE: We support both `[` and `{`, so this works for autocompleting
+		 * long Q-register specifications as well.
+		 * This may therefore insert unnecessary ^Q, but they won't hurt.
+		 */
 		if (g_unichar_toupper(chr) == ctx->escape_char ||
 		    (ctx->escape_char == '[' && chr == ']') ||
 		    (ctx->escape_char == '{' && chr == '}'))
@@ -1032,34 +1037,28 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e
 	teco_state_t *current = ctx->parent.current;
 
 	/*
-	 * String termination handling
+	 * Ignore whitespace immediately after @-modified commands.
+	 * This is inspired by TECO-64.
+	 * The alternative would have been to throw an error,
+	 * as allowing whitespace escape_chars is harmful.
 	 */
-	if (ctx->flags.modifier_at) {
-		if (current->expectstring.last)
-			/* also clears the "@" modifier flag */
-			teco_machine_main_eval_at(ctx);
+	if (ctx->flags.modifier_at && teco_is_noop(chr))
+		return current;
 
+	/*
+	 * String termination handling
+	 */
+	if (teco_machine_main_eval_at(ctx)) {
 		/*
-		 * FIXME: Exclude setting at least whitespace characters as the
-		 * new string escape character to avoid accidental errors?
-		 *
 		 * FIXME: Should we perhaps restrict case folding escape characters
 		 * to the ANSI range (teco_ascii_toupper())?
-		 * This would be faster than case folding each and every character
+		 * This would be faster than case folding almost all characters
 		 * of a string argument to check against the escape char.
-		 *
-		 * FIXME: This has undesired effects if you try to use one of
-		 * of these characters with multiple string arguments.
 		 */
-		switch (ctx->expectstring.machine.escape_char) {
-		case TECO_CTL_KEY('A'):
-		case '\e':
-		case '{':
-			if (ctx->parent.must_undo)
-				teco_undo_gunichar(ctx->expectstring.machine.escape_char);
-			ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
-			return current;
-		}
+		if (ctx->parent.must_undo)
+			teco_undo_gunichar(ctx->expectstring.machine.escape_char);
+		ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
+		return current;
 	}
 
 	/*
@@ -1113,6 +1112,14 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e
 			if (ctx->parent.must_undo)
 				teco_undo_gunichar(ctx->expectstring.machine.escape_char);
 			ctx->expectstring.machine.escape_char = '\e';
+		} else if (ctx->expectstring.machine.escape_char == '{') {
+			/*
+			 * Makes sure that after all but the last string argument,
+			 * the escape character is reset, as in @FR{foo}{bar}.
+			 */
+			if (ctx->parent.must_undo)
+				teco_undo_flags(ctx->flags);
+			ctx->flags.modifier_at = TRUE;
 		}
 		ctx->expectstring.nesting = 1;
 
-- 
cgit v1.2.3