From e46352bc614cf9777ca76deb47330fb408bc1a23 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sat, 2 Aug 2025 13:16:16 +0300 Subject: fixed serious bug with certain alternative string termination chars in commands with multiple string arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * When `@`-modifying a command with several string arguments and choosing `{` as the alternative string termination character, the parser would get totally confused. Any sequence of `{` would be ignored and only the first non-`{` would become the termination character. Consequently you also couldn't choose a new terminator after the closing `}`. So even a documented code example from sciteco(7) wouldn't work. The same was true when using $ (escape) or ^A as the alternative termination character. * We can now correctly parse e.g. `@FR{foo}{bar}` or `@FR$foo$bar$` (even though the latter one is quite pointless). * has probably been broken forever (has been broken even before v2.0). * Whitespace is now ignored in front of alternative termination characters as in TECO-64, so we can also write `@S /foo/` or even ``` @^Um { !* blabla *! } ``` I wanted to disallow whitespace termination characters, so the alternative would have been to throw an error. The new implementation at least adds some functionality. * Avoid redundancies when parsing no-op characters via teco_is_noop(). I assume that this is inlined and drawn into any jump-table what would be generated for the switch-statement in teco_state_start_input(). * Alternative termination characters are still case-folded, even if they are Unicode glyphs, so `@IЖfooж` would work and insert `foo`. This should perhaps be restricted to ANSI characters? --- src/cmdline.c | 4 ++-- src/core-commands.c | 11 ++++------- src/core-commands.h | 8 ++++++-- src/goto-commands.c | 3 +++ src/parser.c | 49 ++++++++++++++++++++++++++++--------------------- src/parser.h | 2 ++ 6 files changed, 45 insertions(+), 32 deletions(-) (limited to 'src') diff --git a/src/cmdline.c b/src/cmdline.c index 1f12c7b..089bd7a 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -531,7 +531,7 @@ teco_state_command_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa while (ctx->parent.current->is_start && teco_cmdline.effective_len < teco_cmdline.str.len && - strchr(TECO_NOOPS, teco_cmdline.str.data[teco_cmdline.effective_len])) + teco_is_noop(teco_cmdline.str.data[teco_cmdline.effective_len])) if (!teco_cmdline_rubin(error)) return FALSE; @@ -541,7 +541,7 @@ teco_state_command_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa /* rubout command */ while (ctx->parent.current->is_start && teco_cmdline.effective_len > 0 && - strchr(TECO_NOOPS, teco_cmdline.str.data[teco_cmdline.effective_len-1])) + teco_is_noop(teco_cmdline.str.data[teco_cmdline.effective_len-1])) teco_cmdline_rubout(); do diff --git a/src/core-commands.c b/src/core-commands.c index c71ee95..f384272 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -722,24 +722,21 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) ['T'] = {&teco_state_start, teco_state_start_typeout} }; - switch (chr) { /* - * No-ops (same as TECO_NOOPS): + * Non-operational commands. * These are explicitly not handled in teco_state_control, * so that we can potentially reuse the upcaret notations like ^J. */ - case ' ': - case '\f': - case '\r': - case '\n': - case '\v': + if (teco_is_noop(chr)) { if (ctx->flags.modifier_at || (ctx->flags.mode == TECO_MODE_NORMAL && ctx->flags.modifier_colon)) { teco_error_modifier_set(error, chr); return NULL; } return &teco_state_start; + } + switch (chr) { /*$ 0 1 2 3 4 5 6 7 8 9 digit number * [n]0|1|2|3|4|5|6|7|8|9 -> n*Radix+X -- Append digit * diff --git a/src/core-commands.h b/src/core-commands.h index bf73b8c..cb28dce 100644 --- a/src/core-commands.h +++ b/src/core-commands.h @@ -22,8 +22,12 @@ #include "parser.h" #include "string-utils.h" -/** non-operational characters in teco_state_start */ -#define TECO_NOOPS " \f\r\n\v" +/** Check whether c is a non-operational command in teco_state_start */ +static inline gboolean +teco_is_noop(gunichar c) +{ + return c == ' ' || c == '\f' || c == '\r' || c == '\n' || c == '\v'; +} gboolean teco_get_range_args(const gchar *cmd, gsize *from_ret, gsize *len_ret, GError **error); diff --git a/src/goto-commands.c b/src/goto-commands.c index 97c58d0..d95886d 100644 --- a/src/goto-commands.c +++ b/src/goto-commands.c @@ -218,6 +218,9 @@ teco_state_blockcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **e TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment); +/* + * `!!` line comments are inspired by TECO-64. + */ static teco_state_t * teco_state_eolcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { diff --git a/src/parser.c b/src/parser.c index 347c1a6..6d4cd60 100644 --- a/src/parser.c +++ b/src/parser.c @@ -996,6 +996,11 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch for (guint i = 0; i < len; ) { gunichar chr = g_utf8_get_char(str+i); + /* + * NOTE: We support both `[` and `{`, so this works for autocompleting + * long Q-register specifications as well. + * This may therefore insert unnecessary ^Q, but they won't hurt. + */ if (g_unichar_toupper(chr) == ctx->escape_char || (ctx->escape_char == '[' && chr == ']') || (ctx->escape_char == '{' && chr == '}')) @@ -1032,34 +1037,28 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e teco_state_t *current = ctx->parent.current; /* - * String termination handling + * Ignore whitespace immediately after @-modified commands. + * This is inspired by TECO-64. + * The alternative would have been to throw an error, + * as allowing whitespace escape_chars is harmful. */ - if (ctx->flags.modifier_at) { - if (current->expectstring.last) - /* also clears the "@" modifier flag */ - teco_machine_main_eval_at(ctx); + if (ctx->flags.modifier_at && teco_is_noop(chr)) + return current; + /* + * String termination handling + */ + if (teco_machine_main_eval_at(ctx)) { /* - * FIXME: Exclude setting at least whitespace characters as the - * new string escape character to avoid accidental errors? - * * FIXME: Should we perhaps restrict case folding escape characters * to the ANSI range (teco_ascii_toupper())? - * This would be faster than case folding each and every character + * This would be faster than case folding almost all characters * of a string argument to check against the escape char. - * - * FIXME: This has undesired effects if you try to use one of - * of these characters with multiple string arguments. */ - switch (ctx->expectstring.machine.escape_char) { - case TECO_CTL_KEY('A'): - case '\e': - case '{': - if (ctx->parent.must_undo) - teco_undo_gunichar(ctx->expectstring.machine.escape_char); - ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); - return current; - } + if (ctx->parent.must_undo) + teco_undo_gunichar(ctx->expectstring.machine.escape_char); + ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); + return current; } /* @@ -1113,6 +1112,14 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e if (ctx->parent.must_undo) teco_undo_gunichar(ctx->expectstring.machine.escape_char); ctx->expectstring.machine.escape_char = '\e'; + } else if (ctx->expectstring.machine.escape_char == '{') { + /* + * Makes sure that after all but the last string argument, + * the escape character is reset, as in @FR{foo}{bar}. + */ + if (ctx->parent.must_undo) + teco_undo_flags(ctx->flags); + ctx->flags.modifier_at = TRUE; } ctx->expectstring.nesting = 1; diff --git a/src/parser.h b/src/parser.h index a1583d2..095f523 100644 --- a/src/parser.h +++ b/src/parser.h @@ -75,7 +75,9 @@ void undo__remove_index__teco_loop_stack(guint); * FIXME: Maybe use TECO_DECLARE_VTABLE_METHOD()? */ typedef const struct { + /** whether string building characters are enabled by default */ guint string_building : 1; + /** whether this string argument is the last of the command */ guint last : 1; /** -- cgit v1.2.3