diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-12-24 13:29:32 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-12-24 13:29:32 +0300 |
commit | ef897b418a4487196e1dbc18a97046f8f0aea2e8 (patch) | |
tree | 264b026de6ce805f1bf371d1b01371c4ba1aed0c /src | |
parent | e5d1253d363a209ecd1288278808e38ac87b34d9 (diff) | |
download | sciteco-ef897b418a4487196e1dbc18a97046f8f0aea2e8.tar.gz |
introduced true block and EOL comments
* The previous convention of !* ... *! are now true block comments,
i.e. they are parsed faster, don't spam the goto table and allow
embedding of exclamation marks - only "*!" terminates the comment.
* It is therefore now forbidden to have goto labels beginning with "*".
* Also support "!!" to introduce EOL comments (like C++'s //).
This disallows empty labels, but they weren't useful anyway.
This is the shortest way to begin a comment.
* All comment labels have been converted to true comments, to ensure
that syntax highlighting works correctly.
EOL comments are used for single line commented-out code, since it's
easiest to uncomment - you don't have to jump to the line end.
This is a pure convention / coding style.
Other people might do it differently.
* It's of course still possible to abuse goto labels as comments
as TECO did for ages.
* In lexing / syntax highlighting, labels and comments are highlighted differently.
* When syntax highlighting, a single "!" will first be highlighted as a label
since it's not yet unambiguous. Once you type the second character (* or !),
the first character is retroactively styled as a comment as well.
Diffstat (limited to 'src')
-rw-r--r-- | src/goto-commands.c | 60 | ||||
-rw-r--r-- | src/lexer.c | 51 |
2 files changed, 85 insertions, 26 deletions
diff --git a/src/goto-commands.c b/src/goto-commands.c index 2144fb0..2035277 100644 --- a/src/goto-commands.c +++ b/src/goto-commands.c @@ -33,6 +33,9 @@ #include "goto.h" #include "goto-commands.h" +TECO_DECLARE_STATE(teco_state_blockcomment); +TECO_DECLARE_STATE(teco_state_eolcomment); + teco_string_t teco_goto_skip_label = {NULL, 0}; static gboolean @@ -46,16 +49,18 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error) * NOTE: The comma is theoretically not allowed in a label * (see <O> syntax), but is accepted anyway since labels * are historically used as comments. - * - * TODO: Add support for "true" comments of the form !* ... *! - * This would be almost trivial to implement, but if we don't - * want any (even temporary) overhead for comments at all, we need - * to add a new parser state. - * I'm unsure whether !-signs should be allowed within comments. + * SciTECO has true block and EOL comments, though as well. */ static teco_state_t * teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { + if (!ctx->goto_label.len) { + switch (chr) { + case '*': return &teco_state_blockcomment; /* `!*` */ + case '!': return &teco_state_eolcomment; /* `!!` */ + } + } + if (chr == '!') { /* * NOTE: If the label already existed, its PC will be restored @@ -84,6 +89,12 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) return &teco_state_start; } + /* + * The goto label is collected in parse-only mode as well + * since we could jump into a currently dead branch later. + * + * FIXME: Theoretically, we could avoid that at least in TECO_MODE_LEXING. + */ if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len); teco_string_append_wc(&ctx->goto_label, chr); @@ -171,3 +182,40 @@ gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine TECO_DEFINE_STATE_EXPECTSTRING(teco_state_goto, .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_goto_process_edit_cmd ); + +/* + * True comments: + * They don't add entries to the goto table. + * + * NOTE: This still needs some special handling in the Scintilla lexer + * (for syntax highlighting) since comments always start with `!`. + */ +#define TECO_DEFINE_STATE_COMMENT(NAME, ...) \ + TECO_DEFINE_STATE(NAME, \ + .style = SCE_SCITECO_COMMENT, \ + ##__VA_ARGS__ \ + ) + +static teco_state_t * +teco_state_blockcomment_star_input(teco_machine_main_t *ctx, gunichar chr, GError **error) +{ + return chr == '!' ? &teco_state_start : &teco_state_blockcomment; +} + +TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment_star); + +static teco_state_t * +teco_state_blockcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error) +{ + return chr == '*' ? &teco_state_blockcomment_star : &teco_state_blockcomment; +} + +TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment); + +static teco_state_t * +teco_state_eolcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error) +{ + return chr == '\n' ? &teco_state_start : &teco_state_eolcomment; +} + +TECO_DEFINE_STATE_COMMENT(teco_state_eolcomment); diff --git a/src/lexer.c b/src/lexer.c index ff43c1b..c0c7847 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -19,6 +19,8 @@ #include "config.h" #endif +#include <string.h> + #include <glib.h> #include "sciteco.h" @@ -39,20 +41,12 @@ teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine, * FIXME: What about ^* and ^/? * They are currently highlighted as commands. */ - if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START) { - switch (chr) { - case '0'...'9': + if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START && + chr <= 0xFF) { + if (g_ascii_isdigit(chr)) style = SCE_SCITECO_NUMBER; - break; - case '+': - case '-': - case '*': - case '/': - case '#': - case '&': + else if (strchr("+-*/#&", chr)) style = SCE_SCITECO_OPERATOR; - break; - } } /* @@ -64,10 +58,7 @@ teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine, /* * Probably a syntax error, so the erroneous symbol * is highlighted and we reset the parser's state machine. - */ - style = SCE_SCITECO_INVALID; - - /* + * * FIXME: Perhaps we should simply reset the state to teco_state_start? */ gsize macro_pc = machine->macro_pc; @@ -75,9 +66,22 @@ teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine, teco_machine_main_init(machine, NULL, FALSE); machine->mode = TECO_MODE_LEXING; machine->macro_pc = macro_pc; - } else if (machine->parent.current->style == SCE_SCITECO_LABEL) { - /* don't highlight the leading `!` as SCE_SCITECO_COMMAND */ - style = SCE_SCITECO_LABEL; + + return SCE_SCITECO_INVALID; + } + + /* + * Don't highlight the leading `!` in comments as SCE_SCITECO_COMMAND. + * True comments also begin with `!`, so make sure they are highlighted + * already from the second character. + * This is then extended back by one character in teco_lexer_step(). + */ + switch (machine->parent.current->style) { + case SCE_SCITECO_COMMENT: + case SCE_SCITECO_LABEL: + return machine->parent.current->style; + default: + break; } return style; @@ -100,7 +104,7 @@ teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine, return; } - gsize old_pc = machine->macro_pc; + gssize old_pc = machine->macro_pc; teco_style_t style = SCE_SCITECO_DEFAULT; @@ -140,6 +144,13 @@ teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine, *cur_col += machine->macro_pc - old_pc; + /* + * True comments begin with `!*` or `!!`, but only the second character gets + * the correct style by default, so we extend it backwards. + */ + if (style == SCE_SCITECO_COMMENT) + old_pc--; + teco_view_ssm(view, SCI_STARTSTYLING, start+old_pc, 0); teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc-old_pc, style); |