diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2025-04-08 23:26:38 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2025-04-09 00:33:40 +0300 |
commit | 7c0e4fbb1d1f0d19d11c7417c55a305654ab1c83 (patch) | |
tree | 35a99cefee7b63510f6765be4193b5a069c1eec2 | |
parent | a7e66807871c70a99909fcf78335309ae1505055 (diff) | |
download | sciteco-7c0e4fbb1d1f0d19d11c7417c55a305654ab1c83.tar.gz |
tightened rules for specifying modifiers
* Instead of separate stand-alone commands, they are now allowed only immediately
in front of the commands that accept them.
* The order is still insignificant if both `@` and `:` are accepted.
* The number of colon modifiers is now also checked.
We basically get this for free.
* `@` has syntactic significance, so it could not be set conditionally anyway.
Still, it was possible to provoke bugs were `@` was interpreted conditionally
as in `@ 2<I/foo/$>`.
* Even when not causing bugs, a mistyped `@` would often influence the
__next__ command, causing unexpected behavior, for instance when
typing `@(233C)W`.
* While it was theoretically possible to set `:` conditionally, it could also
be "passed through" accidentally to some command where it wasn't expected as in
`:Ifoo$ C`.
I do not know of any real useful application or idiom of a conditionally set `:`.
If there would happen to be some kind of useful application, `:'` and `:|` could
be re-allowed easily, though.
* I was condidering introducing a common parser state for modified commands,
but that would have been tricky and introduce a lot of redundant command lists.
So instead, we now simply everywhere check for excess modifiers.
To simplify this task, teco_machine_main_transition_t now contains flags
signaling whether the transition is allowed with `@` or `:` modifiers set.
It currently only has to be checked in the start state, after `E` and `F`.
-rw-r--r-- | doc/sciteco.7.template | 8 | ||||
-rw-r--r-- | src/core-commands.c | 176 | ||||
-rw-r--r-- | src/error.h | 4 | ||||
-rw-r--r-- | src/parser.c | 7 | ||||
-rw-r--r-- | src/parser.h | 19 | ||||
-rw-r--r-- | tests/testsuite.at | 15 |
6 files changed, 170 insertions, 59 deletions
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template index 1fe1dba..b43b4d2 100644 --- a/doc/sciteco.7.template +++ b/doc/sciteco.7.template @@ -1364,7 +1364,7 @@ are handled interactively. .SCITECO_TOPIC modifiers . A command's behaviour or syntax may be influenced by so called -modifiers written in front of commands. +modifiers written immediately in front of commands. Their specific influence of a modifier always depends on the concrete command following it. When specifying more than one modifier, their order is insignificant. @@ -1383,8 +1383,8 @@ return 0 instead. .LP .SCITECO_TOPIC :: Two colons (\fB::\fP) can sometimes further modify a command's behavior \(em -currently it is used by the \fB::S\fP search comparison command. -On all other commands it will behave like a single colon. +currently it is used by the \fB::S\fP search comparison command +and a few related search-and-replace operations. .LP .SCITECO_TOPIC @ at When put in front of a command with string arguments, @@ -1422,6 +1422,8 @@ escape character itself. The at (\fB@\fP) modifier may also sometimes be supported by commands, that do not accept string arguments. For instance, \fB@X\fIq\fR cuts text into Q-Register \fIq\fP. +In front of word movement and deletion commands (e.g. \fBW\fP), +it toggles the order of word vs. non-word characters that are skipped or deleted. \# But there is no common semantics for @ on regular commands, yet. \# We may some day add @Mq/.../ for passing string arguments to macro calls, \# but it will be yet another special case. diff --git a/src/core-commands.c b/src/core-commands.c index 7845dc2..b182f2e 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -620,24 +620,36 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) */ ['$'] = {&teco_state_escape}, ['!'] = {&teco_state_label}, - ['O'] = {&teco_state_goto}, - ['^'] = {&teco_state_control}, - ['F'] = {&teco_state_fcommand}, + ['O'] = {&teco_state_goto, + .modifier_at = TRUE}, + ['^'] = {&teco_state_control, + .modifier_at = TRUE, .modifier_colon = 2}, + ['F'] = {&teco_state_fcommand, + .modifier_at = TRUE, .modifier_colon = 2}, ['"'] = {&teco_state_condcommand}, - ['E'] = {&teco_state_ecommand}, - ['I'] = {&teco_state_insert_building}, - ['?'] = {&teco_state_help}, - ['S'] = {&teco_state_search}, - ['N'] = {&teco_state_search_all}, + ['E'] = {&teco_state_ecommand, + .modifier_at = TRUE, .modifier_colon = 2}, + ['I'] = {&teco_state_insert_building, + .modifier_at = TRUE}, + ['?'] = {&teco_state_help, + .modifier_at = TRUE}, + ['S'] = {&teco_state_search, + .modifier_at = TRUE, .modifier_colon = 2}, + ['N'] = {&teco_state_search_all, + .modifier_at = TRUE, .modifier_colon = 1}, ['['] = {&teco_state_pushqreg}, [']'] = {&teco_state_popqreg}, ['G'] = {&teco_state_getqregstring}, - ['Q'] = {&teco_state_queryqreg}, - ['U'] = {&teco_state_setqreginteger}, + ['Q'] = {&teco_state_queryqreg, + .modifier_colon = 1}, + ['U'] = {&teco_state_setqreginteger, + .modifier_at = TRUE, .modifier_colon = 1}, ['%'] = {&teco_state_increaseqreg}, - ['M'] = {&teco_state_macro}, - ['X'] = {&teco_state_copytoqreg}, + ['M'] = {&teco_state_macro, + .modifier_colon = 1}, + ['X'] = {&teco_state_copytoqreg, + .modifier_at = TRUE, .modifier_colon = 1}, /* * Arithmetics @@ -660,9 +672,12 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) /* * Control Structures (loops) */ - ['<'] = {&teco_state_start, teco_state_start_loop_open}, - ['>'] = {&teco_state_start, teco_state_start_loop_close}, - [';'] = {&teco_state_start, teco_state_start_break}, + ['<'] = {&teco_state_start, teco_state_start_loop_open, + .modifier_colon = 1}, + ['>'] = {&teco_state_start, teco_state_start_loop_close, + .modifier_colon = 1}, + [';'] = {&teco_state_start, teco_state_start_break, + .modifier_colon = 1}, /* * Command-line Editing @@ -673,13 +688,20 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) /* * Commands */ - ['J'] = {&teco_state_start, teco_state_start_jump}, - ['C'] = {&teco_state_start, teco_state_start_move}, - ['R'] = {&teco_state_start, teco_state_start_reverse}, - ['L'] = {&teco_state_start, teco_state_start_line}, - ['B'] = {&teco_state_start, teco_state_start_back}, - ['K'] = {&teco_state_start, teco_state_start_kill_lines}, - ['D'] = {&teco_state_start, teco_state_start_delete_chars}, + ['J'] = {&teco_state_start, teco_state_start_jump, + .modifier_colon = 1}, + ['C'] = {&teco_state_start, teco_state_start_move, + .modifier_colon = 1}, + ['R'] = {&teco_state_start, teco_state_start_reverse, + .modifier_colon = 1}, + ['L'] = {&teco_state_start, teco_state_start_line, + .modifier_colon = 1}, + ['B'] = {&teco_state_start, teco_state_start_back, + .modifier_colon = 1}, + ['K'] = {&teco_state_start, teco_state_start_kill_lines, + .modifier_colon = 1}, + ['D'] = {&teco_state_start, teco_state_start_delete_chars, + .modifier_colon = 1}, ['='] = {&teco_state_start, teco_state_start_print}, ['A'] = {&teco_state_start, teco_state_start_get} }; @@ -695,6 +717,11 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) case '\r': case '\n': case '\v': + if (ctx->modifier_at || + (ctx->mode == TECO_MODE_NORMAL && ctx->modifier_colon)) { + teco_error_modifier_set(error, chr); + return NULL; + } return &teco_state_start; /*$ 0 1 2 3 4 5 6 7 8 9 digit number @@ -714,6 +741,11 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) * current radix - this may be changed in the future. */ case '0' ... '9': + if (ctx->modifier_at || + (ctx->mode == TECO_MODE_NORMAL && ctx->modifier_colon)) { + teco_error_modifier_set(error, chr); + return NULL; + } if (ctx->mode == TECO_MODE_NORMAL) teco_expressions_add_digit(chr, ctx->qreg_table_locals->radix); return &teco_state_start; @@ -731,6 +763,11 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) break; case '<': + if (ctx->modifier_at) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_MODIFIER, + "Unexpected modifier on loop start"); + return NULL; + } if (ctx->mode != TECO_MODE_PARSE_ONLY_LOOP) break; if (ctx->parent.must_undo) @@ -739,6 +776,11 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) return &teco_state_start; case '>': + if (ctx->modifier_at) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_MODIFIER, + "Unexpected modifier on loop end"); + return NULL; + } if (ctx->mode != TECO_MODE_PARSE_ONLY_LOOP) break; if (!ctx->nest_level) { @@ -756,6 +798,11 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) * Control Structures (conditionals) */ case '|': + if (ctx->modifier_at || + (ctx->mode == TECO_MODE_NORMAL && ctx->modifier_colon)) { + teco_error_modifier_set(error, '|'); + return NULL; + } if (ctx->parent.must_undo) teco_undo_guint(ctx->__flags); if (ctx->mode == TECO_MODE_PARSE_ONLY_COND && !ctx->nest_level) @@ -766,6 +813,11 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) return &teco_state_start; case '\'': + if (ctx->modifier_at || + (ctx->mode == TECO_MODE_NORMAL && ctx->modifier_colon)) { + teco_error_modifier_set(error, '\''); + return NULL; + } switch (ctx->mode) { case TECO_MODE_PARSE_ONLY_COND: case TECO_MODE_PARSE_ONLY_COND_FORCE: @@ -788,6 +840,11 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) * Word movement and deletion commands. * These are not in the transitions table, so we can * evaluate the @-modifier. + * + * All of these commands support both : and @-modifiers. + * + * FIXME: This will currently accept two colons as well, + * but should accept only one colon modifier. */ case 'w': case 'W': return teco_state_start_words(ctx, "W", 1, error); @@ -978,17 +1035,24 @@ teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error /* * Simple transitions */ - ['K'] = {&teco_state_search_kill}, - ['D'] = {&teco_state_search_delete}, - ['S'] = {&teco_state_replace}, - ['R'] = {&teco_state_replace_default}, - ['G'] = {&teco_state_changedir}, + ['K'] = {&teco_state_search_kill, + .modifier_at = TRUE, .modifier_colon = 1}, + ['D'] = {&teco_state_search_delete, + .modifier_at = TRUE, .modifier_colon = 2}, + ['S'] = {&teco_state_replace, + .modifier_at = TRUE, .modifier_colon = 2}, + ['R'] = {&teco_state_replace_default, + .modifier_at = TRUE, .modifier_colon = 2}, + ['G'] = {&teco_state_changedir, + .modifier_at = TRUE}, /* * Loop Flow Control */ - ['<'] = {&teco_state_start, teco_state_fcommand_loop_start}, - ['>'] = {&teco_state_start, teco_state_fcommand_loop_end}, + ['<'] = {&teco_state_start, teco_state_fcommand_loop_start, + .modifier_colon = 1}, + ['>'] = {&teco_state_start, teco_state_fcommand_loop_end, + .modifier_colon = 1}, /* * Conditional Flow Control @@ -1513,8 +1577,10 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) /* * Simple transitions */ - ['I'] = {&teco_state_insert_indent}, - ['U'] = {&teco_state_ctlucommand}, + ['I'] = {&teco_state_insert_indent, + .modifier_at = TRUE}, + ['U'] = {&teco_state_ctlucommand, + .modifier_at = TRUE, .modifier_colon = 1}, ['^'] = {&teco_state_ascii}, ['['] = {&teco_state_escape}, @@ -1533,8 +1599,10 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) ['O'] = {&teco_state_start, teco_state_control_octal}, ['D'] = {&teco_state_start, teco_state_control_decimal}, ['R'] = {&teco_state_start, teco_state_control_radix}, - ['Q'] = {&teco_state_start, teco_state_control_lines2glyphs}, - ['E'] = {&teco_state_start, teco_state_control_glyphs2bytes}, + ['Q'] = {&teco_state_start, teco_state_control_lines2glyphs, + .modifier_colon = 1}, + ['E'] = {&teco_state_start, teco_state_control_glyphs2bytes, + .modifier_colon = 1}, ['X'] = {&teco_state_start, teco_state_control_search_mode}, ['Y'] = {&teco_state_start, teco_state_control_last_range}, ['S'] = {&teco_state_start, teco_state_control_last_length} @@ -2447,27 +2515,41 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error /* * Simple Transitions */ - ['%'] = {&teco_state_epctcommand}, - ['B'] = {&teco_state_edit_file}, - ['C'] = {&teco_state_execute}, - ['G'] = {&teco_state_egcommand}, + ['%'] = {&teco_state_epctcommand, + .modifier_at = TRUE}, + ['B'] = {&teco_state_edit_file, + .modifier_at = TRUE}, + ['C'] = {&teco_state_execute, + .modifier_at = TRUE, .modifier_colon = 1}, + ['G'] = {&teco_state_egcommand, + .modifier_at = TRUE, .modifier_colon = 1}, ['I'] = {&teco_state_insert_nobuilding}, - ['M'] = {&teco_state_macrofile}, - ['N'] = {&teco_state_glob_pattern}, - ['S'] = {&teco_state_scintilla_symbols}, - ['Q'] = {&teco_state_eqcommand}, - ['U'] = {&teco_state_eucommand}, - ['W'] = {&teco_state_save_file}, + ['M'] = {&teco_state_macrofile, + .modifier_at = TRUE, .modifier_colon = 1}, + ['N'] = {&teco_state_glob_pattern, + .modifier_at = TRUE, .modifier_colon = 1}, + ['S'] = {&teco_state_scintilla_symbols, + .modifier_at = TRUE}, + ['Q'] = {&teco_state_eqcommand, + .modifier_at = TRUE}, + ['U'] = {&teco_state_eucommand, + .modifier_at = TRUE, .modifier_colon = 1}, + ['W'] = {&teco_state_save_file, + .modifier_at = TRUE}, /* * Commands */ - ['F'] = {&teco_state_start, teco_state_ecommand_close}, + ['F'] = {&teco_state_start, teco_state_ecommand_close, + .modifier_colon = 1}, ['D'] = {&teco_state_start, teco_state_ecommand_flags}, ['J'] = {&teco_state_start, teco_state_ecommand_properties}, - ['L'] = {&teco_state_start, teco_state_ecommand_eol}, - ['E'] = {&teco_state_start, teco_state_ecommand_encoding}, - ['X'] = {&teco_state_start, teco_state_ecommand_exit} + ['L'] = {&teco_state_start, teco_state_ecommand_eol, + .modifier_colon = 1}, + ['E'] = {&teco_state_start, teco_state_ecommand_encoding, + .modifier_colon = 1}, + ['X'] = {&teco_state_start, teco_state_ecommand_exit, + .modifier_colon = 1}, }; /* diff --git a/src/error.h b/src/error.h index b672024..3c81257 100644 --- a/src/error.h +++ b/src/error.h @@ -77,10 +77,10 @@ teco_error_syntax_set(GError **error, gunichar chr) } static inline void -teco_error_modifier_set(GError **error, gchar modifier) +teco_error_modifier_set(GError **error, gchar chr) { g_set_error(error, TECO_ERROR, TECO_ERROR_MODIFIER, - "Excess \"%c\"-modifier", modifier); + "Unexpected modifier on <%c>", chr); } static inline void diff --git a/src/parser.c b/src/parser.c index f4f7595..044a741 100644 --- a/src/parser.c +++ b/src/parser.c @@ -355,6 +355,13 @@ teco_machine_main_transition_input(teco_machine_main_t *ctx, return NULL; } + if ((ctx->modifier_at && !transitions[chr].modifier_at) || + (ctx->mode == TECO_MODE_NORMAL && + ctx->modifier_colon > transitions[chr].modifier_colon)) { + teco_error_modifier_set(error, chr); + return NULL; + } + if (ctx->mode == TECO_MODE_NORMAL && transitions[chr].transition_cb) { /* * NOTE: We could also just let transition_cb return a boolean... diff --git a/src/parser.h b/src/parser.h index fe8e764..050467c 100644 --- a/src/parser.h +++ b/src/parser.h @@ -544,8 +544,27 @@ gboolean teco_execute_macro(const gchar *macro, gsize macro_len, gboolean teco_execute_file(const gchar *filename, teco_qreg_table_t *qreg_table_locals, GError **error); typedef const struct { + /** next state after receiving the input character */ teco_state_t *next; + /** + * Optional function to call during the state transition. + * + * It is called only in normal execution mode. + */ void (*transition_cb)(teco_machine_main_t *ctx, GError **error); + /** + * Maximum number of `:` modifiers, that \b can be set on the input character. + * + * Colon modifiers are completely ignored in parse-only modes. + */ + guint modifier_colon : 2; + /** + * TRUE if `@`-modifier \b can be set on the input character. + * + * Since `@` has syntactic significance, + * it is checked even in parse-only mode. + */ + bool modifier_at : 1; } teco_machine_main_transition_t; /* diff --git a/tests/testsuite.at b/tests/testsuite.at index 20869f4..20a76c5 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -59,6 +59,14 @@ AT_CHECK([$SCITECO -e "(1-6*5-1)+30\"N(0/0)'"], 0, ignore, ignore) AT_CHECK([$SCITECO -e "(1-6*5-1*2*2)+33\"N(0/0)'"], 0, ignore, ignore) AT_CLEANUP +AT_SETUP([Modifiers]) +AT_CHECK([$SCITECO -e '@:W$ :@W$'], 0, ignore, ignore) +# Detect invalid modifiers +AT_CHECK([$SCITECO -e '@J'], 1, ignore, ignore) +AT_CHECK([$SCITECO -e ': '], 1, ignore, ignore) +AT_CHECK([$SCITECO -e '::C$'], 1, ignore, ignore) +AT_CLEANUP + AT_SETUP([Closing loops at the correct macro level]) AT_CHECK([$SCITECO -e '@^Ua{>} <Ma'], 1, ignore, ignore) AT_CLEANUP @@ -352,13 +360,6 @@ AT_CHECK([$SCITECO -e "| (0/0) '"], 1, ignore, ignore) AT_XFAIL_IF(true) AT_CLEANUP -AT_SETUP([Out-of-place modifiers]) -# NOTE: `J` does not currently support @-modifiers. -# This should be a syntax error. -AT_CHECK([$SCITECO -e "@I/foo^J/ @J Xa"], 1, ignore, ignore) -AT_XFAIL_IF(true) -AT_CLEANUP - # NOTE: This bug depends on specific build options of Glib's # PCRE which is not predictable. # It segfaults at least on Ubuntu 20.04 (libpcre3 v2:8.39). |