aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am1
-rw-r--r--src/core-commands.c20
-rw-r--r--src/goto-commands.c4
-rw-r--r--src/interface-curses/interface.c2
-rw-r--r--src/interface-gtk/interface.c2
-rw-r--r--src/interface.c8
-rw-r--r--src/interface.h1
-rw-r--r--src/lexer.c235
-rw-r--r--src/lexer.h36
-rw-r--r--src/parser.h13
-rw-r--r--src/qreg-commands.h2
-rw-r--r--src/symbols.c11
-rw-r--r--src/view.c13
-rw-r--r--src/view.h2
14 files changed, 329 insertions, 21 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e2056e..055cde7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -52,6 +52,7 @@ libsciteco_base_la_SOURCES = main.c sciteco.h list.h \
help.c help.h \
rb3str.c rb3str.h \
symbols.c symbols.h \
+ lexer.c lexer.h \
view.c view.h \
interface.c interface.h
# NOTE: We cannot link in Scintilla (static library) into
diff --git a/src/core-commands.c b/src/core-commands.c
index 2f473ce..52b577d 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -31,6 +31,7 @@
#include "expressions.h"
#include "ring.h"
#include "parser.h"
+#include "lexer.h"
#include "symbols.h"
#include "search.h"
#include "spawn.h"
@@ -1293,7 +1294,8 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_start,
.end_of_macro_cb = NULL, /* Allowed at the end of a macro! */
.is_start = TRUE,
- .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE
+ .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE,
+ .style = SCE_SCITECO_COMMAND
);
/*$ F<
@@ -1450,7 +1452,9 @@ teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error
teco_ascii_toupper(chr), error);
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand,
+ .style = SCE_SCITECO_COMMAND
+);
static void
teco_undo_change_dir_action(gchar **dir, gboolean run)
@@ -1657,7 +1661,9 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **er
return &teco_state_start;
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand,
+ .style = SCE_SCITECO_OPERATOR
+);
/*$ ^_ negate
* n^_ -> ~n -- Binary negation
@@ -2055,7 +2061,9 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
teco_ascii_toupper(chr), error);
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control,
+ .style = SCE_SCITECO_COMMAND
+);
static teco_state_t *
teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
@@ -2956,7 +2964,9 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error
teco_ascii_toupper(chr), error);
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand,
+ .style = SCE_SCITECO_COMMAND
+);
gboolean
teco_state_insert_initial(teco_machine_main_t *ctx, GError **error)
diff --git a/src/goto-commands.c b/src/goto-commands.c
index a8a9689..2144fb0 100644
--- a/src/goto-commands.c
+++ b/src/goto-commands.c
@@ -27,6 +27,7 @@
#include "string-utils.h"
#include "expressions.h"
#include "parser.h"
+#include "lexer.h"
#include "core-commands.h"
#include "undo.h"
#include "goto.h"
@@ -90,7 +91,8 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
}
TECO_DEFINE_STATE(teco_state_label,
- .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial
+ .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial,
+ .style = SCE_SCITECO_LABEL
);
static teco_state_t *
diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c
index 3cff2d8..b9ca516 100644
--- a/src/interface-curses/interface.c
+++ b/src/interface-curses/interface.c
@@ -276,7 +276,7 @@ teco_xterm_version(void)
static void
teco_view_scintilla_notify(void *sci, int iMessage, SCNotification *notify, void *user_data)
{
- teco_interface_process_notify(notify);
+ teco_view_process_notify((teco_view_t *)sci, notify);
}
teco_view_t *
diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c
index 829310a..0dbd2ba 100644
--- a/src/interface-gtk/interface.c
+++ b/src/interface-gtk/interface.c
@@ -112,7 +112,7 @@ static void
teco_view_scintilla_notify(ScintillaObject *sci, gint iMessage,
SCNotification *notify, gpointer user_data)
{
- teco_interface_process_notify(notify);
+ teco_view_process_notify((teco_view_t *)sci, notify);
}
teco_view_t *
diff --git a/src/interface.c b/src/interface.c
index 2e2d64e..2973dd2 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -110,11 +110,3 @@ teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap)
g_vfprintf(stream, fmt, ap);
fputc('\n', stream);
}
-
-void
-teco_interface_process_notify(SCNotification *notify)
-{
-#ifdef DEBUG
- g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code);
-#endif
-}
diff --git a/src/interface.h b/src/interface.h
index 32db6b5..80da8d9 100644
--- a/src/interface.h
+++ b/src/interface.h
@@ -149,7 +149,6 @@ gboolean teco_interface_event_loop(GError **error);
*/
/** @protected */
void teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap);
-void teco_interface_process_notify(SCNotification *notify);
/** @pure */
void teco_interface_cleanup(void);
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..4fbc313
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2012-2024 Robin Haberkorn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <glib.h>
+
+#include "sciteco.h"
+#include "view.h"
+#include "parser.h"
+#include "lexer.h"
+
+static teco_style_t
+teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine,
+ gunichar chr)
+{
+ teco_style_t style = machine->parent.current->style;
+
+ /*
+ * FIXME: At least this special workaround for numbers might be
+ * unnecessary once we get a special parser state for parsing numbers.
+ *
+ * FIXME: What about ^* and ^/?
+ * They are currently highlighted as commands.
+ */
+ if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START) {
+ switch (chr) {
+ case '0'...'9':
+ style = SCE_SCITECO_NUMBER;
+ break;
+ case '+':
+ case '-':
+ case '*':
+ case '/':
+ case '#':
+ case '&':
+ style = SCE_SCITECO_OPERATOR;
+ break;
+ }
+ }
+
+ /*
+ * FIXME: Perhaps as an optional lexer property, we should support
+ * styling commands with SCE_SCITECO_DEFAULT or SCE_SCITECO_COMMAND
+ * in alternating order, so you can discern chains of commands.
+ */
+ if (!teco_machine_input(&machine->parent, chr, NULL)) {
+ /*
+ * Probably a syntax error, so the erroneous symbol
+ * is highlighted and we reset the parser's state machine.
+ */
+ style = SCE_SCITECO_INVALID;
+
+ /*
+ * FIXME: Perhaps we should simply reset the state to teco_state_start?
+ */
+ gsize macro_pc = machine->macro_pc;
+ teco_machine_main_clear(machine);
+ teco_machine_main_init(machine, NULL, FALSE);
+ machine->mode = TECO_MODE_LEXING;
+ machine->macro_pc = macro_pc;
+ } else if (machine->parent.current->style == SCE_SCITECO_LABEL) {
+ /* don't highlight the leading `!` as SCE_SCITECO_COMMAND */
+ style = SCE_SCITECO_LABEL;
+ }
+
+ return style;
+}
+
+static void
+teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine,
+ teco_machine_main_t *macrodef_machine,
+ const gchar *macro, gsize start, gsize max_len,
+ guint *cur_line, guint *cur_col, gint *safe_col)
+{
+ if (*cur_line == 0 && *cur_col == 0 && *macro == '#') {
+ /* hash-bang line */
+ machine->macro_pc = teco_view_ssm(view, SCI_POSITIONFROMLINE, 1, 0);
+ teco_view_ssm(view, SCI_STARTSTYLING, 0, 0);
+ teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc, SCE_SCITECO_COMMENT);
+ teco_view_ssm(view, SCI_SETLINESTATE, 0, -1);
+ (*cur_line)++;
+ *safe_col = 0;
+ return;
+ }
+
+ gsize old_pc = machine->macro_pc;
+
+ teco_style_t style = SCE_SCITECO_DEFAULT;
+
+ gint32 chr = g_utf8_get_char_validated(macro+machine->macro_pc,
+ max_len-machine->macro_pc);
+ if (chr < 0) {
+ /*
+ * Invalid UTF-8 byte sequence:
+ * A source file could contain all sorts of data garbage or
+ * you could manually M[lexer.set.sciteco] on an ANSI-encoded file.
+ */
+ machine->macro_pc++;
+ style = SCE_SCITECO_INVALID;
+ } else {
+ machine->macro_pc = g_utf8_next_char(macro+machine->macro_pc) - macro;
+
+ gunichar escape_char = machine->expectstring.machine.escape_char;
+ style = teco_lexer_getstyle(view, machine, chr);
+
+ /*
+ * Optionally style @^Uq{ ... } contents like macro definitions.
+ * The curly braces will be styled like regular commands.
+ *
+ * FIXME: This will not work with nested macro definitions.
+ * FIXME: This cannot currently be disabled since SCI_SETPROPERTY
+ * cannot be accessed with ES.
+ * We could only map it to an ED flag.
+ */
+ if ((escape_char == '{' || machine->expectstring.machine.escape_char == '{') &&
+ teco_view_ssm(view, SCI_GETPROPERTYINT, (uptr_t)"lexer.sciteco.macrodef", TRUE))
+ style = teco_lexer_getstyle(view, macrodef_machine, chr);
+ }
+
+ *cur_col += machine->macro_pc - old_pc;
+
+ teco_view_ssm(view, SCI_STARTSTYLING, start+old_pc, 0);
+ teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc-old_pc, style);
+
+ if (chr == '\n') {
+ /* update line state to the last column with a clean start state */
+ teco_view_ssm(view, SCI_SETLINESTATE, *cur_line, *safe_col);
+ (*cur_line)++;
+ *cur_col = 0;
+ *safe_col = -1; /* no clean state by default */
+ }
+
+ if (style != SCE_SCITECO_INVALID &&
+ machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START &&
+ !machine->modifier_at)
+ /* clean parser state */
+ *safe_col = *cur_col;
+}
+
+/**
+ * Style SciTECO source code, i.e. perform syntax highlighting
+ * for the SciTECO language.
+ *
+ * @para view The Scintilla view to operate on.
+ * @para end The position in bytes where to stop styling.
+ */
+void
+teco_lexer_style(teco_view_t *view, gsize end)
+{
+ /* should always be TRUE */
+ gboolean old_undo_enabled = teco_undo_enabled;
+ teco_undo_enabled = FALSE;
+
+ gsize start = teco_view_ssm(view, SCI_GETENDSTYLED, 0, 0);
+ guint start_line = teco_view_ssm(view, SCI_LINEFROMPOSITION, start, 0);
+ gint start_col = 0;
+
+ /*
+ * The line state stores the laster character (column) in bytes,
+ * that starts from a fresh parser state.
+ * It's -1 if the line does not have a clean parser state.
+ * Therefore we search for the first line before `start` that has a
+ * known clean parser state.
+ */
+ if (start_line > 0) {
+ do
+ start_line--;
+ while ((start_col = teco_view_ssm(view, SCI_GETLINESTATE, start_line, 0)) < 0 &&
+ start_line > 0);
+ start_col = MAX(start_col, 0);
+ }
+ start = teco_view_ssm(view, SCI_POSITIONFROMLINE, start_line, 0) + start_col;
+ g_assert(end > start);
+
+ g_auto(teco_machine_main_t) machine;
+ teco_machine_main_init(&machine, NULL, FALSE);
+ machine.mode = TECO_MODE_LEXING;
+
+ /* for lexing the contents of @^Uq{...} */
+ g_auto(teco_machine_main_t) macrodef_machine;
+ teco_machine_main_init(&macrodef_machine, NULL, FALSE);
+ macrodef_machine.mode = TECO_MODE_LEXING;
+
+ g_assert(start_col >= 0);
+ guint col = start_col;
+
+ /*
+ * NOTE: We could have also used teco_view_get_character(),
+ * but this will use much less Scintilla messages without
+ * removing dot.
+ */
+ const gchar *macro;
+ sptr_t gap = teco_view_ssm(view, SCI_GETGAPPOSITION, 0, 0);
+ if (start < gap && gap < end) {
+ macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, gap);
+ while (machine.macro_pc < gap-start)
+ teco_lexer_step(view, &machine, &macrodef_machine,
+ macro, start, gap-start,
+ &start_line, &col, &start_col);
+ /*
+ * This might have lexed more than gap-start bytes
+ * (e.g. a hash-bang line)
+ */
+ start += machine.macro_pc;
+ }
+
+ macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, end-start);
+ machine.macro_pc = 0;
+ while (machine.macro_pc < end-start)
+ teco_lexer_step(view, &machine, &macrodef_machine,
+ macro, start, end-start,
+ &start_line, &col, &start_col);
+
+ /* set line state on the very last line */
+ teco_view_ssm(view, SCI_SETLINESTATE, start_line, start_col);
+
+ teco_undo_enabled = old_undo_enabled;
+}
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..87b0d0f
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2012-2024 Robin Haberkorn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <glib.h>
+
+#include "view.h"
+
+/** Scintilla style ids for lexing SciTECO code */
+typedef enum {
+ SCE_SCITECO_DEFAULT = 0,
+ SCE_SCITECO_COMMAND = 1,
+ SCE_SCITECO_OPERATOR = 2,
+ SCE_SCITECO_QREG = 3,
+ SCE_SCITECO_STRING = 4,
+ SCE_SCITECO_NUMBER = 5,
+ SCE_SCITECO_LABEL = 6,
+ SCE_SCITECO_COMMENT = 7,
+ SCE_SCITECO_INVALID = 8
+} teco_style_t;
+
+void teco_lexer_style(teco_view_t *view, gsize end);
diff --git a/src/parser.h b/src/parser.h
index 20f73fb..7ca5ab3 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -27,6 +27,7 @@
#include "goto.h"
#include "undo.h"
#include "qreg.h"
+#include "lexer.h"
/*
* Forward Declarations
@@ -203,6 +204,12 @@ struct teco_state_t {
teco_keymacro_mask_t keymacro_mask : 8;
/**
+ * Scintilla style to apply to all input characters in this state
+ * when syntax highlighting SciTECO code.
+ */
+ teco_style_t style : 8;
+
+ /**
* Additional state-dependent callbacks and settings.
* This wastes some bytes compared to other techniques for extending teco_state_t
* but this is acceptable since there is only a limited number of constant instances.
@@ -241,6 +248,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent
.process_edit_cmd_cb = teco_state_process_edit_cmd, \
.is_start = FALSE, \
.keymacro_mask = TECO_KEYMACRO_MASK_DEFAULT, \
+ .style = SCE_SCITECO_DEFAULT, \
##__VA_ARGS__ \
}
@@ -441,7 +449,9 @@ typedef enum {
/** Parse, but don't execute until reaching end of conditional or its else-clause */
TECO_MODE_PARSE_ONLY_COND,
/** Parse, but don't execute until reaching the very end of conditional */
- TECO_MODE_PARSE_ONLY_COND_FORCE
+ TECO_MODE_PARSE_ONLY_COND_FORCE,
+ /** Parse, but don't execute until end of macro (for Scintilla lexing) */
+ TECO_MODE_LEXING
} teco_mode_t;
/** @extends teco_machine_t */
@@ -568,6 +578,7 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \
teco_state_expectstring_process_edit_cmd, \
.keymacro_mask = TECO_KEYMACRO_MASK_STRING, \
+ .style = SCE_SCITECO_STRING, \
.expectstring.string_building = TRUE, \
.expectstring.last = TRUE, \
.expectstring.process_cb = NULL, /* do nothing */ \
diff --git a/src/qreg-commands.h b/src/qreg-commands.h
index 27a6a5c..d999587 100644
--- a/src/qreg-commands.h
+++ b/src/qreg-commands.h
@@ -20,6 +20,7 @@
#include "sciteco.h"
#include "parser.h"
+#include "lexer.h"
#include "qreg.h"
static inline void
@@ -55,6 +56,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m
.initial_cb = (teco_state_initial_cb_t)teco_state_expectqreg_initial, \
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \
teco_state_expectqreg_process_edit_cmd, \
+ .style = SCE_SCITECO_QREG, \
.expectqreg.type = TECO_QREG_REQUIRED, \
.expectqreg.got_register_cb = NAME##_got_register, /* always required */ \
##__VA_ARGS__ \
diff --git a/src/symbols.c b/src/symbols.c
index 944d01d..798b89c 100644
--- a/src/symbols.c
+++ b/src/symbols.c
@@ -321,8 +321,13 @@ teco_state_scintilla_lparam_done(teco_machine_main_t *ctx, const teco_string_t *
sptr_t lParam = 0;
+ if (ctx->scintilla.iMessage == SCI_SETILEXER &&
+ !teco_string_cmp(str, "sciteco", 7)) {
+ /* perform lexing in the container (see teco_lexer_style()) */
+ lParam = 0;
+ }
#ifdef HAVE_LEXILLA
- if (ctx->scintilla.iMessage == SCI_SETILEXER) {
+ else if (ctx->scintilla.iMessage == SCI_SETILEXER) {
if (teco_string_contains(str, '\0')) {
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
"Lexer name must not contain null-byte.");
@@ -336,9 +341,9 @@ teco_state_scintilla_lparam_done(teco_machine_main_t *ctx, const teco_string_t *
"Lexilla lexer \"%s\" not found.", lexer);
return NULL;
}
- } else
+ }
#endif
- if (str->len > 0) {
+ else if (str->len > 0) {
/*
* NOTE: There may even be messages that read strings
* with embedded nulls.
diff --git a/src/view.c b/src/view.c
index 7cdc987..f14c658 100644
--- a/src/view.c
+++ b/src/view.c
@@ -46,6 +46,7 @@
#include "qreg.h"
#include "eol.h"
#include "memory.h"
+#include "lexer.h"
#include "view.h"
/** @memberof teco_view_t */
@@ -636,3 +637,15 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len)
*/
return (gint32)g_utf8_get_char_validated(buf, -1);
}
+
+void
+teco_view_process_notify(teco_view_t *ctx, SCNotification *notify)
+{
+#ifdef DEBUG
+ g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code);
+#endif
+
+ if (notify->nmhdr.code == SCN_STYLENEEDED)
+ /* Lexing in the container: only used for SciTECO */
+ teco_lexer_style(ctx, notify->position);
+}
diff --git a/src/view.h b/src/view.h
index 8f54fdd..eebafbf 100644
--- a/src/view.h
+++ b/src/view.h
@@ -83,3 +83,5 @@ teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos);
gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n);
teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len);
+
+void teco_view_process_notify(teco_view_t *ctx, SCNotification *notify);