aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am1
-rw-r--r--src/core-commands.c25
-rw-r--r--src/doc.c3
-rw-r--r--src/error.h9
-rw-r--r--src/goto-commands.c64
-rw-r--r--src/interface-curses/interface.c30
-rw-r--r--src/interface-gtk/interface.c2
-rw-r--r--src/interface.c8
-rw-r--r--src/interface.h1
-rw-r--r--src/lexer.c250
-rw-r--r--src/lexer.h36
-rw-r--r--src/parser.h13
-rw-r--r--src/qreg-commands.h2
-rw-r--r--src/qreg.c14
-rw-r--r--src/search.c12
-rw-r--r--src/symbols.c108
-rw-r--r--src/view.c66
-rw-r--r--src/view.h2
18 files changed, 576 insertions, 70 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e2056e..055cde7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -52,6 +52,7 @@ libsciteco_base_la_SOURCES = main.c sciteco.h list.h \
help.c help.h \
rb3str.c rb3str.h \
symbols.c symbols.h \
+ lexer.c lexer.h \
view.c view.h \
interface.c interface.h
# NOTE: We cannot link in Scintilla (static library) into
diff --git a/src/core-commands.c b/src/core-commands.c
index 2f473ce..4ee0c5c 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -31,6 +31,7 @@
#include "expressions.h"
#include "ring.h"
#include "parser.h"
+#include "lexer.h"
#include "symbols.h"
#include "search.h"
#include "spawn.h"
@@ -1293,7 +1294,8 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_start,
.end_of_macro_cb = NULL, /* Allowed at the end of a macro! */
.is_start = TRUE,
- .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE
+ .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE,
+ .style = SCE_SCITECO_COMMAND
);
/*$ F<
@@ -1450,7 +1452,9 @@ teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error
teco_ascii_toupper(chr), error);
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand,
+ .style = SCE_SCITECO_COMMAND
+);
static void
teco_undo_change_dir_action(gchar **dir, gboolean run)
@@ -1657,7 +1661,9 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **er
return &teco_state_start;
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand,
+ .style = SCE_SCITECO_OPERATOR
+);
/*$ ^_ negate
* n^_ -> ~n -- Binary negation
@@ -1984,6 +1990,8 @@ teco_state_control_last_range(teco_machine_main_t *ctx, GError **error)
*
* A common idiom \(lq^SC\(rq can be used for jumping to the
* beginning of the matched pattern or inserted string.
+ * Since the result is always negative, you can use \(lq^SR\(rq
+ * to skip the matched pattern after \fBFK\fP.
*/
static void
teco_state_control_last_length(teco_machine_main_t *ctx, GError **error)
@@ -2055,7 +2063,9 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
teco_ascii_toupper(chr), error);
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control,
+ .style = SCE_SCITECO_COMMAND
+);
static teco_state_t *
teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
@@ -2188,7 +2198,8 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_escape,
* when it comes to function key macro masking.
*/
.is_start = TRUE,
- .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE
+ .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE,
+ .style = SCE_SCITECO_COMMAND
);
/*$ EF close
@@ -2956,7 +2967,9 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error
teco_ascii_toupper(chr), error);
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand);
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand,
+ .style = SCE_SCITECO_COMMAND
+);
gboolean
teco_state_insert_initial(teco_machine_main_t *ctx, GError **error)
diff --git a/src/doc.c b/src/doc.c
index a1ebe2c..019603a 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -176,7 +176,8 @@ teco_doc_undo_set_string(teco_doc_t *ctx)
* @param str Pointer to a variable to hold the return string.
* It can be NULL if you are interested only in the string's length.
* Strings must be freed via g_free().
- * @param len Where to store the string's length (mandatory).
+ * @param len Where to store the string's length or NULL
+ * if that information is not necessary.
* @param codepage Where to store the document's codepage or NULL
* if that information is not necessary.
*
diff --git a/src/error.h b/src/error.h
index c51f528..021f759 100644
--- a/src/error.h
+++ b/src/error.h
@@ -17,6 +17,7 @@
#pragma once
#include <glib.h>
+#include <gmodule.h>
#include "sciteco.h"
#include "string-utils.h"
@@ -53,6 +54,7 @@ typedef enum {
TECO_ERROR_MEMLIMIT,
TECO_ERROR_CLIPBOARD,
TECO_ERROR_WIN32,
+ TECO_ERROR_MODULE,
/** Interrupt current operation */
TECO_ERROR_INTERRUPTED,
@@ -165,6 +167,13 @@ teco_error_win32_set(GError **error, const gchar *prefix, gint err)
#endif
static inline void
+teco_error_module_set(GError **error, const gchar *prefix)
+{
+ g_set_error(error, TECO_ERROR, TECO_ERROR_MODULE, "%s: %s",
+ prefix, g_module_error());
+}
+
+static inline void
teco_error_interrupted_set(GError **error)
{
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_INTERRUPTED, "Interrupted");
diff --git a/src/goto-commands.c b/src/goto-commands.c
index a8a9689..2035277 100644
--- a/src/goto-commands.c
+++ b/src/goto-commands.c
@@ -27,11 +27,15 @@
#include "string-utils.h"
#include "expressions.h"
#include "parser.h"
+#include "lexer.h"
#include "core-commands.h"
#include "undo.h"
#include "goto.h"
#include "goto-commands.h"
+TECO_DECLARE_STATE(teco_state_blockcomment);
+TECO_DECLARE_STATE(teco_state_eolcomment);
+
teco_string_t teco_goto_skip_label = {NULL, 0};
static gboolean
@@ -45,16 +49,18 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error)
* NOTE: The comma is theoretically not allowed in a label
* (see <O> syntax), but is accepted anyway since labels
* are historically used as comments.
- *
- * TODO: Add support for "true" comments of the form !* ... *!
- * This would be almost trivial to implement, but if we don't
- * want any (even temporary) overhead for comments at all, we need
- * to add a new parser state.
- * I'm unsure whether !-signs should be allowed within comments.
+ * SciTECO has true block and EOL comments, though as well.
*/
static teco_state_t *
teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
+ if (!ctx->goto_label.len) {
+ switch (chr) {
+ case '*': return &teco_state_blockcomment; /* `!*` */
+ case '!': return &teco_state_eolcomment; /* `!!` */
+ }
+ }
+
if (chr == '!') {
/*
* NOTE: If the label already existed, its PC will be restored
@@ -83,6 +89,12 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
return &teco_state_start;
}
+ /*
+ * The goto label is collected in parse-only mode as well
+ * since we could jump into a currently dead branch later.
+ *
+ * FIXME: Theoretically, we could avoid that at least in TECO_MODE_LEXING.
+ */
if (ctx->parent.must_undo)
undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len);
teco_string_append_wc(&ctx->goto_label, chr);
@@ -90,7 +102,8 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
}
TECO_DEFINE_STATE(teco_state_label,
- .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial
+ .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial,
+ .style = SCE_SCITECO_LABEL
);
static teco_state_t *
@@ -169,3 +182,40 @@ gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine
TECO_DEFINE_STATE_EXPECTSTRING(teco_state_goto,
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_goto_process_edit_cmd
);
+
+/*
+ * True comments:
+ * They don't add entries to the goto table.
+ *
+ * NOTE: This still needs some special handling in the Scintilla lexer
+ * (for syntax highlighting) since comments always start with `!`.
+ */
+#define TECO_DEFINE_STATE_COMMENT(NAME, ...) \
+ TECO_DEFINE_STATE(NAME, \
+ .style = SCE_SCITECO_COMMENT, \
+ ##__VA_ARGS__ \
+ )
+
+static teco_state_t *
+teco_state_blockcomment_star_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
+{
+ return chr == '!' ? &teco_state_start : &teco_state_blockcomment;
+}
+
+TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment_star);
+
+static teco_state_t *
+teco_state_blockcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
+{
+ return chr == '*' ? &teco_state_blockcomment_star : &teco_state_blockcomment;
+}
+
+TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment);
+
+static teco_state_t *
+teco_state_eolcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
+{
+ return chr == '\n' ? &teco_state_start : &teco_state_eolcomment;
+}
+
+TECO_DEFINE_STATE_COMMENT(teco_state_eolcomment);
diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c
index 5984bcb..f713bc1 100644
--- a/src/interface-curses/interface.c
+++ b/src/interface-curses/interface.c
@@ -276,7 +276,7 @@ teco_xterm_version(void)
static void
teco_view_scintilla_notify(void *sci, int iMessage, SCNotification *notify, void *user_data)
{
- teco_interface_process_notify(notify);
+ teco_view_process_notify((teco_view_t *)sci, notify);
}
teco_view_t *
@@ -575,13 +575,13 @@ teco_interface_init_screen(void)
if (isatty(1)) {
teco_interface.stdout_orig = dup(1);
g_assert(teco_interface.stdout_orig >= 0);
- FILE *stdout_new = g_freopen("/dev/null", "a+", stdout);
+ G_GNUC_UNUSED FILE *stdout_new = g_freopen("/dev/null", "a+", stdout);
g_assert(stdout_new != NULL);
}
if (isatty(2)) {
teco_interface.stderr_orig = dup(2);
g_assert(teco_interface.stderr_orig >= 0);
- FILE *stderr_new = g_freopen("/dev/null", "a+", stderr);
+ G_GNUC_UNUSED FILE *stderr_new = g_freopen("/dev/null", "a+", stderr);
g_assert(stderr_new != NULL);
}
}
@@ -1706,15 +1706,15 @@ teco_interface_event_loop_iter(void)
return;
#ifdef __PDCURSES__
- /*
- * Especially PDCurses/WinGUI likes to report two keypresses,
- * e.g. for CTRL+Shift+6 (CTRL+^).
- * Make sure we don't filter out AltGr, which may be reported as CTRL+ALT.
- */
- if ((PDC_get_key_modifiers() &
- (PDC_KEY_MODIFIER_CONTROL | PDC_KEY_MODIFIER_ALT)) == PDC_KEY_MODIFIER_CONTROL &&
- !TECO_IS_CTL(key))
- return;
+ /*
+ * Especially PDCurses/WinGUI likes to report two keypresses,
+ * e.g. for CTRL+Shift+6 (CTRL+^).
+ * Make sure we don't filter out AltGr, which may be reported as CTRL+ALT.
+ */
+ if ((PDC_get_key_modifiers() &
+ (PDC_KEY_MODIFIER_CONTROL | PDC_KEY_MODIFIER_ALT)) == PDC_KEY_MODIFIER_CONTROL &&
+ !TECO_IS_CTL(key))
+ return;
#endif
/*
@@ -1723,10 +1723,10 @@ teco_interface_event_loop_iter(void)
*/
keybuf[keybuf_i++] = key;
gsize len = keybuf_i;
- gunichar cp = g_utf8_get_char_validated(keybuf, len);
- if (keybuf_i >= sizeof(keybuf) || cp != (gunichar)-2)
+ gint32 cp = *keybuf ? g_utf8_get_char_validated(keybuf, len) : 0;
+ if (keybuf_i >= sizeof(keybuf) || cp != -2)
keybuf_i = 0;
- if ((gint32)cp < 0)
+ if (cp < 0)
/* incomplete or invalid */
return;
switch (teco_cmdline_keymacro(keybuf, len, error)) {
diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c
index 829310a..0dbd2ba 100644
--- a/src/interface-gtk/interface.c
+++ b/src/interface-gtk/interface.c
@@ -112,7 +112,7 @@ static void
teco_view_scintilla_notify(ScintillaObject *sci, gint iMessage,
SCNotification *notify, gpointer user_data)
{
- teco_interface_process_notify(notify);
+ teco_view_process_notify((teco_view_t *)sci, notify);
}
teco_view_t *
diff --git a/src/interface.c b/src/interface.c
index 2e2d64e..2973dd2 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -110,11 +110,3 @@ teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap)
g_vfprintf(stream, fmt, ap);
fputc('\n', stream);
}
-
-void
-teco_interface_process_notify(SCNotification *notify)
-{
-#ifdef DEBUG
- g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code);
-#endif
-}
diff --git a/src/interface.h b/src/interface.h
index 32db6b5..80da8d9 100644
--- a/src/interface.h
+++ b/src/interface.h
@@ -149,7 +149,6 @@ gboolean teco_interface_event_loop(GError **error);
*/
/** @protected */
void teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap);
-void teco_interface_process_notify(SCNotification *notify);
/** @pure */
void teco_interface_cleanup(void);
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..c0c7847
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2012-2024 Robin Haberkorn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string.h>
+
+#include <glib.h>
+
+#include "sciteco.h"
+#include "view.h"
+#include "parser.h"
+#include "lexer.h"
+
+static teco_style_t
+teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine,
+ gunichar chr)
+{
+ teco_style_t style = machine->parent.current->style;
+
+ /*
+ * FIXME: At least this special workaround for numbers might be
+ * unnecessary once we get a special parser state for parsing numbers.
+ *
+ * FIXME: What about ^* and ^/?
+ * They are currently highlighted as commands.
+ */
+ if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START &&
+ chr <= 0xFF) {
+ if (g_ascii_isdigit(chr))
+ style = SCE_SCITECO_NUMBER;
+ else if (strchr("+-*/#&", chr))
+ style = SCE_SCITECO_OPERATOR;
+ }
+
+ /*
+ * FIXME: Perhaps as an optional lexer property, we should support
+ * styling commands with SCE_SCITECO_DEFAULT or SCE_SCITECO_COMMAND
+ * in alternating order, so you can discern chains of commands.
+ */
+ if (!teco_machine_input(&machine->parent, chr, NULL)) {
+ /*
+ * Probably a syntax error, so the erroneous symbol
+ * is highlighted and we reset the parser's state machine.
+ *
+ * FIXME: Perhaps we should simply reset the state to teco_state_start?
+ */
+ gsize macro_pc = machine->macro_pc;
+ teco_machine_main_clear(machine);
+ teco_machine_main_init(machine, NULL, FALSE);
+ machine->mode = TECO_MODE_LEXING;
+ machine->macro_pc = macro_pc;
+
+ return SCE_SCITECO_INVALID;
+ }
+
+ /*
+ * Don't highlight the leading `!` in comments as SCE_SCITECO_COMMAND.
+ * True comments also begin with `!`, so make sure they are highlighted
+ * already from the second character.
+ * This is then extended back by one character in teco_lexer_step().
+ */
+ switch (machine->parent.current->style) {
+ case SCE_SCITECO_COMMENT:
+ case SCE_SCITECO_LABEL:
+ return machine->parent.current->style;
+ default:
+ break;
+ }
+
+ return style;
+}
+
+static void
+teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine,
+ teco_machine_main_t *macrodef_machine,
+ const gchar *macro, gsize start, gsize max_len,
+ guint *cur_line, guint *cur_col, gint *safe_col)
+{
+ if (*cur_line == 0 && *cur_col == 0 && *macro == '#') {
+ /* hash-bang line */
+ machine->macro_pc = teco_view_ssm(view, SCI_POSITIONFROMLINE, 1, 0);
+ teco_view_ssm(view, SCI_STARTSTYLING, 0, 0);
+ teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc, SCE_SCITECO_COMMENT);
+ teco_view_ssm(view, SCI_SETLINESTATE, 0, -1);
+ (*cur_line)++;
+ *safe_col = 0;
+ return;
+ }
+
+ gssize old_pc = machine->macro_pc;
+
+ teco_style_t style = SCE_SCITECO_DEFAULT;
+
+ /*
+ * g_utf8_get_char_validated() sometimes(?) returns -2 for "\0".
+ */
+ gint32 chr = macro[machine->macro_pc]
+ ? g_utf8_get_char_validated(macro+machine->macro_pc,
+ max_len-machine->macro_pc) : 0;
+ if (chr < 0) {
+ /*
+ * Invalid UTF-8 byte sequence:
+ * A source file could contain all sorts of data garbage or
+ * you could manually M[lexer.set.sciteco] on an ANSI-encoded file.
+ */
+ machine->macro_pc++;
+ style = SCE_SCITECO_INVALID;
+ } else {
+ machine->macro_pc = g_utf8_next_char(macro+machine->macro_pc) - macro;
+
+ gunichar escape_char = machine->expectstring.machine.escape_char;
+ style = teco_lexer_getstyle(view, machine, chr);
+
+ /*
+ * Optionally style @^Uq{ ... } contents like macro definitions.
+ * The curly braces will be styled like regular commands.
+ *
+ * FIXME: This will not work with nested macro definitions.
+ * FIXME: This cannot currently be disabled since SCI_SETPROPERTY
+ * cannot be accessed with ES.
+ * We could only map it to an ED flag.
+ */
+ if ((escape_char == '{' || machine->expectstring.machine.escape_char == '{') &&
+ teco_view_ssm(view, SCI_GETPROPERTYINT, (uptr_t)"lexer.sciteco.macrodef", TRUE))
+ style = teco_lexer_getstyle(view, macrodef_machine, chr);
+ }
+
+ *cur_col += machine->macro_pc - old_pc;
+
+ /*
+ * True comments begin with `!*` or `!!`, but only the second character gets
+ * the correct style by default, so we extend it backwards.
+ */
+ if (style == SCE_SCITECO_COMMENT)
+ old_pc--;
+
+ teco_view_ssm(view, SCI_STARTSTYLING, start+old_pc, 0);
+ teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc-old_pc, style);
+
+ if (chr == '\n') {
+ /* update line state to the last column with a clean start state */
+ teco_view_ssm(view, SCI_SETLINESTATE, *cur_line, *safe_col);
+ (*cur_line)++;
+ *cur_col = 0;
+ *safe_col = -1; /* no clean state by default */
+ }
+
+ if (style != SCE_SCITECO_INVALID &&
+ machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START &&
+ !machine->modifier_at)
+ /* clean parser state */
+ *safe_col = *cur_col;
+}
+
+/**
+ * Style SciTECO source code, i.e. perform syntax highlighting
+ * for the SciTECO language.
+ *
+ * @para view The Scintilla view to operate on.
+ * @para end The position in bytes where to stop styling.
+ */
+void
+teco_lexer_style(teco_view_t *view, gsize end)
+{
+ /* should always be TRUE */
+ gboolean old_undo_enabled = teco_undo_enabled;
+ teco_undo_enabled = FALSE;
+
+ gsize start = teco_view_ssm(view, SCI_GETENDSTYLED, 0, 0);
+ guint start_line = teco_view_ssm(view, SCI_LINEFROMPOSITION, start, 0);
+ gint start_col = 0;
+
+ /*
+ * The line state stores the laster character (column) in bytes,
+ * that starts from a fresh parser state.
+ * It's -1 if the line does not have a clean parser state.
+ * Therefore we search for the first line before `start` that has a
+ * known clean parser state.
+ */
+ if (start_line > 0) {
+ do
+ start_line--;
+ while ((start_col = teco_view_ssm(view, SCI_GETLINESTATE, start_line, 0)) < 0 &&
+ start_line > 0);
+ start_col = MAX(start_col, 0);
+ }
+ start = teco_view_ssm(view, SCI_POSITIONFROMLINE, start_line, 0) + start_col;
+ g_assert(end > start);
+
+ g_auto(teco_machine_main_t) machine;
+ teco_machine_main_init(&machine, NULL, FALSE);
+ machine.mode = TECO_MODE_LEXING;
+
+ /* for lexing the contents of @^Uq{...} */
+ g_auto(teco_machine_main_t) macrodef_machine;
+ teco_machine_main_init(&macrodef_machine, NULL, FALSE);
+ macrodef_machine.mode = TECO_MODE_LEXING;
+
+ g_assert(start_col >= 0);
+ guint col = start_col;
+
+ /*
+ * NOTE: We could have also used teco_view_get_character(),
+ * but this will use much less Scintilla messages without
+ * removing dot.
+ */
+ const gchar *macro;
+ sptr_t gap = teco_view_ssm(view, SCI_GETGAPPOSITION, 0, 0);
+ if (start < gap && gap < end) {
+ macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, gap);
+ while (machine.macro_pc < gap-start)
+ teco_lexer_step(view, &machine, &macrodef_machine,
+ macro, start, gap-start,
+ &start_line, &col, &start_col);
+ /*
+ * This might have lexed more than gap-start bytes
+ * (e.g. a hash-bang line)
+ */
+ start += machine.macro_pc;
+ }
+
+ macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, end-start);
+ machine.macro_pc = 0;
+ while (machine.macro_pc < end-start)
+ teco_lexer_step(view, &machine, &macrodef_machine,
+ macro, start, end-start,
+ &start_line, &col, &start_col);
+
+ /* set line state on the very last line */
+ teco_view_ssm(view, SCI_SETLINESTATE, start_line, start_col);
+
+ teco_undo_enabled = old_undo_enabled;
+}
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..87b0d0f
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2012-2024 Robin Haberkorn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <glib.h>
+
+#include "view.h"
+
+/** Scintilla style ids for lexing SciTECO code */
+typedef enum {
+ SCE_SCITECO_DEFAULT = 0,
+ SCE_SCITECO_COMMAND = 1,
+ SCE_SCITECO_OPERATOR = 2,
+ SCE_SCITECO_QREG = 3,
+ SCE_SCITECO_STRING = 4,
+ SCE_SCITECO_NUMBER = 5,
+ SCE_SCITECO_LABEL = 6,
+ SCE_SCITECO_COMMENT = 7,
+ SCE_SCITECO_INVALID = 8
+} teco_style_t;
+
+void teco_lexer_style(teco_view_t *view, gsize end);
diff --git a/src/parser.h b/src/parser.h
index 20f73fb..7ca5ab3 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -27,6 +27,7 @@
#include "goto.h"
#include "undo.h"
#include "qreg.h"
+#include "lexer.h"
/*
* Forward Declarations
@@ -203,6 +204,12 @@ struct teco_state_t {
teco_keymacro_mask_t keymacro_mask : 8;
/**
+ * Scintilla style to apply to all input characters in this state
+ * when syntax highlighting SciTECO code.
+ */
+ teco_style_t style : 8;
+
+ /**
* Additional state-dependent callbacks and settings.
* This wastes some bytes compared to other techniques for extending teco_state_t
* but this is acceptable since there is only a limited number of constant instances.
@@ -241,6 +248,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent
.process_edit_cmd_cb = teco_state_process_edit_cmd, \
.is_start = FALSE, \
.keymacro_mask = TECO_KEYMACRO_MASK_DEFAULT, \
+ .style = SCE_SCITECO_DEFAULT, \
##__VA_ARGS__ \
}
@@ -441,7 +449,9 @@ typedef enum {
/** Parse, but don't execute until reaching end of conditional or its else-clause */
TECO_MODE_PARSE_ONLY_COND,
/** Parse, but don't execute until reaching the very end of conditional */
- TECO_MODE_PARSE_ONLY_COND_FORCE
+ TECO_MODE_PARSE_ONLY_COND_FORCE,
+ /** Parse, but don't execute until end of macro (for Scintilla lexing) */
+ TECO_MODE_LEXING
} teco_mode_t;
/** @extends teco_machine_t */
@@ -568,6 +578,7 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \
teco_state_expectstring_process_edit_cmd, \
.keymacro_mask = TECO_KEYMACRO_MASK_STRING, \
+ .style = SCE_SCITECO_STRING, \
.expectstring.string_building = TRUE, \
.expectstring.last = TRUE, \
.expectstring.process_cb = NULL, /* do nothing */ \
diff --git a/src/qreg-commands.h b/src/qreg-commands.h
index 27a6a5c..d999587 100644
--- a/src/qreg-commands.h
+++ b/src/qreg-commands.h
@@ -20,6 +20,7 @@
#include "sciteco.h"
#include "parser.h"
+#include "lexer.h"
#include "qreg.h"
static inline void
@@ -55,6 +56,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m
.initial_cb = (teco_state_initial_cb_t)teco_state_expectqreg_initial, \
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \
teco_state_expectqreg_process_edit_cmd, \
+ .style = SCE_SCITECO_QREG, \
.expectqreg.type = TECO_QREG_REQUIRED, \
.expectqreg.got_register_cb = NAME##_got_register, /* always required */ \
##__VA_ARGS__ \
diff --git a/src/qreg.c b/src/qreg.c
index 061b685..271e7cb 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -539,7 +539,7 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position,
* The sign bit in UCS-4/UTF-32 is unused, so this will even
* suffice if TECO_INTEGER == 32.
*/
- *chr = (gint32)g_utf8_get_char_validated(p, -1);
+ *chr = *p ? (gint32)g_utf8_get_char_validated(p, -1) : 0;
return TRUE;
}
@@ -665,9 +665,6 @@ teco_qreg_bufferinfo_append_string(teco_qreg_t *qreg, const gchar *str, gsize le
return FALSE;
}
-/*
- * NOTE: The `string` component is currently unused on the "*" register.
- */
static gboolean
teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
guint *codepage, GError **error)
@@ -684,7 +681,8 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
/*
* NOTE: teco_file_normalize_path() does not change the size of the string.
*/
- *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0;
+ if (len)
+ *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0;
if (codepage)
*codepage = teco_default_codepage();
return TRUE;
@@ -775,7 +773,8 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
* the return value for str == NULL is still correct.
*/
gchar *dir = g_get_current_dir();
- *len = strlen(dir);
+ if (len)
+ *len = strlen(dir);
if (str)
*str = teco_file_normalize_path(dir);
else
@@ -919,11 +918,12 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
&str_converted.len, error) == G_IO_STATUS_ERROR)
return FALSE;
+ if (len)
+ *len = str_converted.len;
if (str)
*str = str_converted.data;
else
teco_string_clear(&str_converted);
- *len = str_converted.len;
if (codepage)
*codepage = teco_default_codepage();
diff --git a/src/search.c b/src/search.c
index e05a6b9..1945f5c 100644
--- a/src/search.c
+++ b/src/search.c
@@ -1075,18 +1075,24 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str,
if (teco_search_parameters.dot < dot) {
/* kill forwards */
sptr_t anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0);
+ gsize len = anchor - teco_search_parameters.dot;
if (teco_current_doc_must_undo())
undo__teco_interface_ssm(SCI_GOTOPOS, dot, 0);
teco_interface_ssm(SCI_GOTOPOS, anchor, 0);
- teco_interface_ssm(SCI_DELETERANGE, teco_search_parameters.dot,
- anchor - teco_search_parameters.dot);
+ teco_interface_ssm(SCI_DELETERANGE, teco_search_parameters.dot, len);
/* NOTE: An undo action is not always created. */
if (teco_current_doc_must_undo() &&
teco_search_parameters.dot != anchor)
undo__teco_interface_ssm(SCI_UNDO, 0, 0);
+
+ /* fix up ranges (^Y) */
+ for (guint i = 0; i < teco_ranges_count; i++) {
+ teco_ranges[i].from -= len;
+ teco_ranges[i].to -= len;
+ }
} else {
/* kill backwards */
teco_interface_ssm(SCI_DELETERANGE, dot, teco_search_parameters.dot - dot);
@@ -1113,7 +1119,7 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str,
* from,to:FK[pattern]$ -> Success|Failure
*
* \fBFK\fP searches for <pattern> just like the regular search
- * command (\fBS\fP) but when found deletes all text from dot
+ * command (\fBS\fP) but when found, deletes all text from dot
* up to but not including the found text instance.
* When searching backwards the characters beginning after
* the occurrence of <pattern> up to dot are deleted.
diff --git a/src/symbols.c b/src/symbols.c
index 944d01d..7198639 100644
--- a/src/symbols.c
+++ b/src/symbols.c
@@ -23,6 +23,7 @@
#include <string.h>
#include <glib.h>
+#include <gmodule.h>
#include <Scintilla.h>
#ifdef HAVE_LEXILLA
@@ -288,6 +289,27 @@ gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx,
* Lexilla lexer name as a string argument for the \fBSCI_SETILEXER\fP
* message, i.e. in order to load a Lexilla lexer
* (this works similar to the old \fBSCI_SETLEXERLANGUAGE\fP message).
+ * If the lexer name contains a null-byte, the second string
+ * argument is split into two:
+ * Up until the null-byte, the path of an external lexer library
+ * (shared library or DLL) is expected,
+ * that implements the Lexilla protocol.
+ * The \(lq.so\(rq or \(lq.dll\(rq extension is optional.
+ * The concrete lexer name is the remaining of the string after
+ * the null-byte.
+ * This allows you to use lexers from external lexer libraries
+ * like Scintillua.
+ * When detecting Scintillua, \*(ST will automatically pass down
+ * the \fBSCITECO_SCINTILLUA_LEXERS\fP environment variable as
+ * the \(lqscintillua.lexers\(rq library property for specifying
+ * the location of Scintillua's Lua lexer files.
+ *
+ * In order to facilitate the use of Scintillua lexers, the semantics
+ * of \fBSCI_NAMEOFSTYLE\fP have also been changed.
+ * Instead of returning the name for a given style id, it now
+ * returns the style id when given the name of a style in the
+ * second string argument of \fBES\fP, i.e. it allows you
+ * to look up style ids by name.
*
* .BR Warning :
* Almost all Scintilla messages may be dispatched using
@@ -321,24 +343,94 @@ teco_state_scintilla_lparam_done(teco_machine_main_t *ctx, const teco_string_t *
sptr_t lParam = 0;
-#ifdef HAVE_LEXILLA
- if (ctx->scintilla.iMessage == SCI_SETILEXER) {
+ if (ctx->scintilla.iMessage == SCI_NAMEOFSTYLE) {
+ /*
+ * FIXME: This customized version of SCI_NAMEOFSTYLE could be avoided
+ * if we had a way to call Scintilla messages that return strings into
+ * Q-Registers.
+ */
if (teco_string_contains(str, '\0')) {
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
- "Lexer name must not contain null-byte.");
+ "Style name must not contain null-byte.");
return NULL;
}
- const gchar *lexer = str->data ? : "";
- lParam = (sptr_t)CreateLexer(lexer);
+ /*
+ * FIXME: Should we cache the name to style id?
+ */
+ guint count = teco_interface_ssm(SCI_GETNAMEDSTYLES, 0, 0);
+ for (guint id = 0; id < count; id++) {
+ gchar style[128] = "";
+ teco_interface_ssm(SCI_NAMEOFSTYLE, id, (sptr_t)style);
+ if (!teco_string_cmp(str, style, strlen(style))) {
+ teco_expressions_push(id);
+ return &teco_state_start;
+ }
+ }
+
+ g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
+ "Style name \"%s\" not found.", str->data ? : "");
+ return NULL;
+ }
+#ifdef HAVE_LEXILLA
+ else if (ctx->scintilla.iMessage == SCI_SETILEXER) {
+ CreateLexerFn CreateLexerFn = CreateLexer;
+
+ const gchar *lexer = memchr(str->data ? : "", '\0', str->len);
+ if (lexer) {
+ /* external lexer */
+ lexer++;
+
+ /*
+ * NOTE: The same module can be opened multiple times.
+ * They are internally reference counted.
+ */
+ GModule *module = g_module_open(str->data, G_MODULE_BIND_LAZY);
+ if (!module) {
+ teco_error_module_set(error, "Error opening lexer module");
+ return NULL;
+ }
+
+ GetNameSpaceFn GetNameSpaceFn;
+ SetLibraryPropertyFn SetLibraryPropertyFn;
+
+ if (!g_module_symbol(module, LEXILLA_GETNAMESPACE, (gpointer *)&GetNameSpaceFn) ||
+ !g_module_symbol(module, LEXILLA_SETLIBRARYPROPERTY, (gpointer *)&SetLibraryPropertyFn) ||
+ !g_module_symbol(module, LEXILLA_CREATELEXER, (gpointer *)&CreateLexerFn)) {
+ teco_error_module_set(error, "Cannot find lexer function");
+ return NULL;
+ }
+
+ if (!g_strcmp0(GetNameSpaceFn(), "scintillua")) {
+ /*
+ * Scintillua's lexer directory must be configured before calling CreateLexer().
+ *
+ * FIXME: In Scintillua distributions, the lexers are usually contained in the
+ * same directory as the prebuilt shared libraries.
+ * Perhaps we should default scintillua.lexers to the dirname in str->data?
+ */
+ teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SCITECO_SCINTILLUA_LEXERS", 26);
+ if (reg) {
+ teco_string_t dir;
+ if (!reg->vtable->get_string(reg, &dir.data, &dir.len, NULL, error))
+ return NULL;
+ SetLibraryPropertyFn("scintillua.lexers", dir.data ? : "");
+ }
+ }
+ } else {
+ /* Lexilla lexer */
+ lexer = str->data ? : "";
+ }
+
+ lParam = (sptr_t)CreateLexerFn(lexer);
if (!lParam) {
g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
- "Lexilla lexer \"%s\" not found.", lexer);
+ "Lexer \"%s\" not found.", lexer);
return NULL;
}
- } else
+ }
#endif
- if (str->len > 0) {
+ else if (str->len > 0) {
/*
* NOTE: There may even be messages that read strings
* with embedded nulls.
diff --git a/src/view.c b/src/view.c
index 7cdc987..b8c72a5 100644
--- a/src/view.c
+++ b/src/view.c
@@ -46,6 +46,7 @@
#include "qreg.h"
#include "eol.h"
#include "memory.h"
+#include "lexer.h"
#include "view.h"
/** @memberof teco_view_t */
@@ -205,9 +206,24 @@ teco_view_set_representations(teco_view_t *ctx)
gboolean
teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **error)
{
+ gboolean ret = TRUE;
+
g_auto(teco_eol_reader_t) reader;
teco_eol_reader_init_gio(&reader, channel);
+ /*
+ * Temporarily disable the line character index.
+ * This tremendously speeds up reading UTF-8 documents.
+ * The reason is, that UTF-8 consistency checks are rather
+ * costly. Also, when reading in chunks of 1024 bytes,
+ * we can very well add incomplete UTF-8 sequences,
+ * resulting in unnecessary recalculations of the line index.
+ */
+ guint cp = teco_view_get_codepage(ctx);
+ if (cp == SC_CP_UTF8)
+ teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+
teco_view_ssm(ctx, SCI_BEGINUNDOACTION, 0, 0);
teco_view_ssm(ctx, SCI_CLEARALL, 0, 0);
@@ -221,8 +237,9 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
struct stat stat_buf = {.st_size = 0};
if (!fstat(g_io_channel_unix_get_fd(channel), &stat_buf) &&
stat_buf.st_size > 0) {
- if (!teco_memory_check(stat_buf.st_size, error))
- goto error;
+ ret = teco_memory_check(stat_buf.st_size, error);
+ if (!ret)
+ goto cleanup;
teco_view_ssm(ctx, SCI_ALLOCATE, stat_buf.st_size, 0);
}
@@ -234,8 +251,10 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
teco_string_t str;
GIOStatus rc = teco_eol_reader_convert(&reader, &str.data, &str.len, error);
- if (rc == G_IO_STATUS_ERROR)
- goto error;
+ if (rc == G_IO_STATUS_ERROR) {
+ ret = FALSE;
+ goto cleanup;
+ }
if (rc == G_IO_STATUS_EOF)
break;
@@ -245,12 +264,14 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
* Even if we checked initially, knowing the file size,
* Scintilla could allocate much more bytes.
*/
- if (!teco_memory_check(0, error))
- goto error;
+ ret = teco_memory_check(0, error);
+ if (!ret)
+ goto cleanup;
if (G_UNLIKELY(teco_interface_is_interrupted())) {
teco_error_interrupted_set(error);
- goto error;
+ ret = FALSE;
+ goto cleanup;
}
}
@@ -271,12 +292,14 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
teco_interface_msg(TECO_MSG_WARNING,
"Inconsistent EOL styles normalized");
+cleanup:
teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0);
- return TRUE;
-error:
- teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0);
- return FALSE;
+ if (cp == SC_CP_UTF8)
+ teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+
+ return ret;
}
/**
@@ -634,5 +657,24 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len)
* The sign bit in UCS-4/UTF-32 is unused, so this will even
* suffice if TECO_INTEGER == 32.
*/
- return (gint32)g_utf8_get_char_validated(buf, -1);
+ return *buf ? (gint32)g_utf8_get_char_validated(buf, -1) : 0;
+}
+
+void
+teco_view_process_notify(teco_view_t *ctx, SCNotification *notify)
+{
+#ifdef DEBUG
+ g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code);
+#endif
+
+ /*
+ * Lexing in the container: only used for SciTECO.
+ *
+ * The "identifier" is abused to enable/disable lexing.
+ * It could be extended later on for several internal lexers.
+ * The alternative would be an ILexer5 wrapper, written in C++.
+ */
+ if (notify->nmhdr.code == SCN_STYLENEEDED &&
+ teco_view_ssm(ctx, SCI_GETIDENTIFIER, 0, 0) != 0)
+ teco_lexer_style(ctx, notify->position);
}
diff --git a/src/view.h b/src/view.h
index 8f54fdd..eebafbf 100644
--- a/src/view.h
+++ b/src/view.h
@@ -83,3 +83,5 @@ teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos);
gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n);
teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len);
+
+void teco_view_process_notify(teco_view_t *ctx, SCNotification *notify);