aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-24 13:29:32 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-24 13:29:32 +0300
commitef897b418a4487196e1dbc18a97046f8f0aea2e8 (patch)
tree264b026de6ce805f1bf371d1b01371c4ba1aed0c /src
parente5d1253d363a209ecd1288278808e38ac87b34d9 (diff)
downloadsciteco-ef897b418a4487196e1dbc18a97046f8f0aea2e8.tar.gz
introduced true block and EOL comments
* The previous convention of !* ... *! are now true block comments, i.e. they are parsed faster, don't spam the goto table and allow embedding of exclamation marks - only "*!" terminates the comment. * It is therefore now forbidden to have goto labels beginning with "*". * Also support "!!" to introduce EOL comments (like C++'s //). This disallows empty labels, but they weren't useful anyway. This is the shortest way to begin a comment. * All comment labels have been converted to true comments, to ensure that syntax highlighting works correctly. EOL comments are used for single line commented-out code, since it's easiest to uncomment - you don't have to jump to the line end. This is a pure convention / coding style. Other people might do it differently. * It's of course still possible to abuse goto labels as comments as TECO did for ages. * In lexing / syntax highlighting, labels and comments are highlighted differently. * When syntax highlighting, a single "!" will first be highlighted as a label since it's not yet unambiguous. Once you type the second character (* or !), the first character is retroactively styled as a comment as well.
Diffstat (limited to 'src')
-rw-r--r--src/goto-commands.c60
-rw-r--r--src/lexer.c51
2 files changed, 85 insertions, 26 deletions
diff --git a/src/goto-commands.c b/src/goto-commands.c
index 2144fb0..2035277 100644
--- a/src/goto-commands.c
+++ b/src/goto-commands.c
@@ -33,6 +33,9 @@
#include "goto.h"
#include "goto-commands.h"
+TECO_DECLARE_STATE(teco_state_blockcomment);
+TECO_DECLARE_STATE(teco_state_eolcomment);
+
teco_string_t teco_goto_skip_label = {NULL, 0};
static gboolean
@@ -46,16 +49,18 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error)
* NOTE: The comma is theoretically not allowed in a label
* (see <O> syntax), but is accepted anyway since labels
* are historically used as comments.
- *
- * TODO: Add support for "true" comments of the form !* ... *!
- * This would be almost trivial to implement, but if we don't
- * want any (even temporary) overhead for comments at all, we need
- * to add a new parser state.
- * I'm unsure whether !-signs should be allowed within comments.
+ * SciTECO has true block and EOL comments, though as well.
*/
static teco_state_t *
teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
+ if (!ctx->goto_label.len) {
+ switch (chr) {
+ case '*': return &teco_state_blockcomment; /* `!*` */
+ case '!': return &teco_state_eolcomment; /* `!!` */
+ }
+ }
+
if (chr == '!') {
/*
* NOTE: If the label already existed, its PC will be restored
@@ -84,6 +89,12 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
return &teco_state_start;
}
+ /*
+ * The goto label is collected in parse-only mode as well
+ * since we could jump into a currently dead branch later.
+ *
+ * FIXME: Theoretically, we could avoid that at least in TECO_MODE_LEXING.
+ */
if (ctx->parent.must_undo)
undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len);
teco_string_append_wc(&ctx->goto_label, chr);
@@ -171,3 +182,40 @@ gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine
TECO_DEFINE_STATE_EXPECTSTRING(teco_state_goto,
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_goto_process_edit_cmd
);
+
+/*
+ * True comments:
+ * They don't add entries to the goto table.
+ *
+ * NOTE: This still needs some special handling in the Scintilla lexer
+ * (for syntax highlighting) since comments always start with `!`.
+ */
+#define TECO_DEFINE_STATE_COMMENT(NAME, ...) \
+ TECO_DEFINE_STATE(NAME, \
+ .style = SCE_SCITECO_COMMENT, \
+ ##__VA_ARGS__ \
+ )
+
+static teco_state_t *
+teco_state_blockcomment_star_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
+{
+ return chr == '!' ? &teco_state_start : &teco_state_blockcomment;
+}
+
+TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment_star);
+
+static teco_state_t *
+teco_state_blockcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
+{
+ return chr == '*' ? &teco_state_blockcomment_star : &teco_state_blockcomment;
+}
+
+TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment);
+
+static teco_state_t *
+teco_state_eolcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
+{
+ return chr == '\n' ? &teco_state_start : &teco_state_eolcomment;
+}
+
+TECO_DEFINE_STATE_COMMENT(teco_state_eolcomment);
diff --git a/src/lexer.c b/src/lexer.c
index ff43c1b..c0c7847 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -19,6 +19,8 @@
#include "config.h"
#endif
+#include <string.h>
+
#include <glib.h>
#include "sciteco.h"
@@ -39,20 +41,12 @@ teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine,
* FIXME: What about ^* and ^/?
* They are currently highlighted as commands.
*/
- if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START) {
- switch (chr) {
- case '0'...'9':
+ if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START &&
+ chr <= 0xFF) {
+ if (g_ascii_isdigit(chr))
style = SCE_SCITECO_NUMBER;
- break;
- case '+':
- case '-':
- case '*':
- case '/':
- case '#':
- case '&':
+ else if (strchr("+-*/#&", chr))
style = SCE_SCITECO_OPERATOR;
- break;
- }
}
/*
@@ -64,10 +58,7 @@ teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine,
/*
* Probably a syntax error, so the erroneous symbol
* is highlighted and we reset the parser's state machine.
- */
- style = SCE_SCITECO_INVALID;
-
- /*
+ *
* FIXME: Perhaps we should simply reset the state to teco_state_start?
*/
gsize macro_pc = machine->macro_pc;
@@ -75,9 +66,22 @@ teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine,
teco_machine_main_init(machine, NULL, FALSE);
machine->mode = TECO_MODE_LEXING;
machine->macro_pc = macro_pc;
- } else if (machine->parent.current->style == SCE_SCITECO_LABEL) {
- /* don't highlight the leading `!` as SCE_SCITECO_COMMAND */
- style = SCE_SCITECO_LABEL;
+
+ return SCE_SCITECO_INVALID;
+ }
+
+ /*
+ * Don't highlight the leading `!` in comments as SCE_SCITECO_COMMAND.
+ * True comments also begin with `!`, so make sure they are highlighted
+ * already from the second character.
+ * This is then extended back by one character in teco_lexer_step().
+ */
+ switch (machine->parent.current->style) {
+ case SCE_SCITECO_COMMENT:
+ case SCE_SCITECO_LABEL:
+ return machine->parent.current->style;
+ default:
+ break;
}
return style;
@@ -100,7 +104,7 @@ teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine,
return;
}
- gsize old_pc = machine->macro_pc;
+ gssize old_pc = machine->macro_pc;
teco_style_t style = SCE_SCITECO_DEFAULT;
@@ -140,6 +144,13 @@ teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine,
*cur_col += machine->macro_pc - old_pc;
+ /*
+ * True comments begin with `!*` or `!!`, but only the second character gets
+ * the correct style by default, so we extend it backwards.
+ */
+ if (style == SCE_SCITECO_COMMENT)
+ old_pc--;
+
teco_view_ssm(view, SCI_STARTSTYLING, start+old_pc, 0);
teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc-old_pc, style);