aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2025-08-02 13:16:16 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2025-08-02 13:16:16 +0300
commite46352bc614cf9777ca76deb47330fb408bc1a23 (patch)
tree2e900970b9eebbeb9bab12bef451a51a7f09ed13 /src
parent963cd2db9b266f7521374adacb664ca8ec43d36b (diff)
downloadsciteco-e46352bc614cf9777ca76deb47330fb408bc1a23.tar.gz
fixed serious bug with certain alternative string termination chars in commands with multiple string arguments
* When `@`-modifying a command with several string arguments and choosing `{` as the alternative string termination character, the parser would get totally confused. Any sequence of `{` would be ignored and only the first non-`{` would become the termination character. Consequently you also couldn't choose a new terminator after the closing `}`. So even a documented code example from sciteco(7) wouldn't work. The same was true when using $ (escape) or ^A as the alternative termination character. * We can now correctly parse e.g. `@FR{foo}{bar}` or `@FR$foo$bar$` (even though the latter one is quite pointless). * has probably been broken forever (has been broken even before v2.0). * Whitespace is now ignored in front of alternative termination characters as in TECO-64, so we can also write `@S /foo/` or even ``` @^Um { !* blabla *! } ``` I wanted to disallow whitespace termination characters, so the alternative would have been to throw an error. The new implementation at least adds some functionality. * Avoid redundancies when parsing no-op characters via teco_is_noop(). I assume that this is inlined and drawn into any jump-table what would be generated for the switch-statement in teco_state_start_input(). * Alternative termination characters are still case-folded, even if they are Unicode glyphs, so `@IЖfooж` would work and insert `foo`. This should perhaps be restricted to ANSI characters?
Diffstat (limited to 'src')
-rw-r--r--src/cmdline.c4
-rw-r--r--src/core-commands.c11
-rw-r--r--src/core-commands.h8
-rw-r--r--src/goto-commands.c3
-rw-r--r--src/parser.c49
-rw-r--r--src/parser.h2
6 files changed, 45 insertions, 32 deletions
diff --git a/src/cmdline.c b/src/cmdline.c
index 1f12c7b..089bd7a 100644
--- a/src/cmdline.c
+++ b/src/cmdline.c
@@ -531,7 +531,7 @@ teco_state_command_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa
while (ctx->parent.current->is_start &&
teco_cmdline.effective_len < teco_cmdline.str.len &&
- strchr(TECO_NOOPS, teco_cmdline.str.data[teco_cmdline.effective_len]))
+ teco_is_noop(teco_cmdline.str.data[teco_cmdline.effective_len]))
if (!teco_cmdline_rubin(error))
return FALSE;
@@ -541,7 +541,7 @@ teco_state_command_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa
/* rubout command */
while (ctx->parent.current->is_start &&
teco_cmdline.effective_len > 0 &&
- strchr(TECO_NOOPS, teco_cmdline.str.data[teco_cmdline.effective_len-1]))
+ teco_is_noop(teco_cmdline.str.data[teco_cmdline.effective_len-1]))
teco_cmdline_rubout();
do
diff --git a/src/core-commands.c b/src/core-commands.c
index c71ee95..f384272 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -722,24 +722,21 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
['T'] = {&teco_state_start, teco_state_start_typeout}
};
- switch (chr) {
/*
- * No-ops (same as TECO_NOOPS):
+ * Non-operational commands.
* These are explicitly not handled in teco_state_control,
* so that we can potentially reuse the upcaret notations like ^J.
*/
- case ' ':
- case '\f':
- case '\r':
- case '\n':
- case '\v':
+ if (teco_is_noop(chr)) {
if (ctx->flags.modifier_at ||
(ctx->flags.mode == TECO_MODE_NORMAL && ctx->flags.modifier_colon)) {
teco_error_modifier_set(error, chr);
return NULL;
}
return &teco_state_start;
+ }
+ switch (chr) {
/*$ 0 1 2 3 4 5 6 7 8 9 digit number
* [n]0|1|2|3|4|5|6|7|8|9 -> n*Radix+X -- Append digit
*
diff --git a/src/core-commands.h b/src/core-commands.h
index bf73b8c..cb28dce 100644
--- a/src/core-commands.h
+++ b/src/core-commands.h
@@ -22,8 +22,12 @@
#include "parser.h"
#include "string-utils.h"
-/** non-operational characters in teco_state_start */
-#define TECO_NOOPS " \f\r\n\v"
+/** Check whether c is a non-operational command in teco_state_start */
+static inline gboolean
+teco_is_noop(gunichar c)
+{
+ return c == ' ' || c == '\f' || c == '\r' || c == '\n' || c == '\v';
+}
gboolean teco_get_range_args(const gchar *cmd, gsize *from_ret, gsize *len_ret, GError **error);
diff --git a/src/goto-commands.c b/src/goto-commands.c
index 97c58d0..d95886d 100644
--- a/src/goto-commands.c
+++ b/src/goto-commands.c
@@ -218,6 +218,9 @@ teco_state_blockcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **e
TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment);
+/*
+ * `!!` line comments are inspired by TECO-64.
+ */
static teco_state_t *
teco_state_eolcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
diff --git a/src/parser.c b/src/parser.c
index 347c1a6..6d4cd60 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -996,6 +996,11 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch
for (guint i = 0; i < len; ) {
gunichar chr = g_utf8_get_char(str+i);
+ /*
+ * NOTE: We support both `[` and `{`, so this works for autocompleting
+ * long Q-register specifications as well.
+ * This may therefore insert unnecessary ^Q, but they won't hurt.
+ */
if (g_unichar_toupper(chr) == ctx->escape_char ||
(ctx->escape_char == '[' && chr == ']') ||
(ctx->escape_char == '{' && chr == '}'))
@@ -1032,34 +1037,28 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e
teco_state_t *current = ctx->parent.current;
/*
- * String termination handling
+ * Ignore whitespace immediately after @-modified commands.
+ * This is inspired by TECO-64.
+ * The alternative would have been to throw an error,
+ * as allowing whitespace escape_chars is harmful.
*/
- if (ctx->flags.modifier_at) {
- if (current->expectstring.last)
- /* also clears the "@" modifier flag */
- teco_machine_main_eval_at(ctx);
+ if (ctx->flags.modifier_at && teco_is_noop(chr))
+ return current;
+ /*
+ * String termination handling
+ */
+ if (teco_machine_main_eval_at(ctx)) {
/*
- * FIXME: Exclude setting at least whitespace characters as the
- * new string escape character to avoid accidental errors?
- *
* FIXME: Should we perhaps restrict case folding escape characters
* to the ANSI range (teco_ascii_toupper())?
- * This would be faster than case folding each and every character
+ * This would be faster than case folding almost all characters
* of a string argument to check against the escape char.
- *
- * FIXME: This has undesired effects if you try to use one of
- * of these characters with multiple string arguments.
*/
- switch (ctx->expectstring.machine.escape_char) {
- case TECO_CTL_KEY('A'):
- case '\e':
- case '{':
- if (ctx->parent.must_undo)
- teco_undo_gunichar(ctx->expectstring.machine.escape_char);
- ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
- return current;
- }
+ if (ctx->parent.must_undo)
+ teco_undo_gunichar(ctx->expectstring.machine.escape_char);
+ ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
+ return current;
}
/*
@@ -1113,6 +1112,14 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e
if (ctx->parent.must_undo)
teco_undo_gunichar(ctx->expectstring.machine.escape_char);
ctx->expectstring.machine.escape_char = '\e';
+ } else if (ctx->expectstring.machine.escape_char == '{') {
+ /*
+ * Makes sure that after all but the last string argument,
+ * the escape character is reset, as in @FR{foo}{bar}.
+ */
+ if (ctx->parent.must_undo)
+ teco_undo_flags(ctx->flags);
+ ctx->flags.modifier_at = TRUE;
}
ctx->expectstring.nesting = 1;
diff --git a/src/parser.h b/src/parser.h
index a1583d2..095f523 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -75,7 +75,9 @@ void undo__remove_index__teco_loop_stack(guint);
* FIXME: Maybe use TECO_DECLARE_VTABLE_METHOD()?
*/
typedef const struct {
+ /** whether string building characters are enabled by default */
guint string_building : 1;
+ /** whether this string argument is the last of the command */
guint last : 1;
/**