diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/core-commands.c | 25 | ||||
-rw-r--r-- | src/doc.c | 3 | ||||
-rw-r--r-- | src/error.h | 9 | ||||
-rw-r--r-- | src/goto-commands.c | 64 | ||||
-rw-r--r-- | src/interface-curses/interface.c | 30 | ||||
-rw-r--r-- | src/interface-gtk/interface.c | 2 | ||||
-rw-r--r-- | src/interface.c | 8 | ||||
-rw-r--r-- | src/interface.h | 1 | ||||
-rw-r--r-- | src/lexer.c | 250 | ||||
-rw-r--r-- | src/lexer.h | 36 | ||||
-rw-r--r-- | src/parser.h | 13 | ||||
-rw-r--r-- | src/qreg-commands.h | 2 | ||||
-rw-r--r-- | src/qreg.c | 14 | ||||
-rw-r--r-- | src/search.c | 12 | ||||
-rw-r--r-- | src/symbols.c | 108 | ||||
-rw-r--r-- | src/view.c | 66 | ||||
-rw-r--r-- | src/view.h | 2 |
18 files changed, 576 insertions, 70 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 1e2056e..055cde7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -52,6 +52,7 @@ libsciteco_base_la_SOURCES = main.c sciteco.h list.h \ help.c help.h \ rb3str.c rb3str.h \ symbols.c symbols.h \ + lexer.c lexer.h \ view.c view.h \ interface.c interface.h # NOTE: We cannot link in Scintilla (static library) into diff --git a/src/core-commands.c b/src/core-commands.c index 2f473ce..4ee0c5c 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -31,6 +31,7 @@ #include "expressions.h" #include "ring.h" #include "parser.h" +#include "lexer.h" #include "symbols.h" #include "search.h" #include "spawn.h" @@ -1293,7 +1294,8 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_start, .end_of_macro_cb = NULL, /* Allowed at the end of a macro! */ .is_start = TRUE, - .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE + .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE, + .style = SCE_SCITECO_COMMAND ); /*$ F< @@ -1450,7 +1452,9 @@ teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error teco_ascii_toupper(chr), error); } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand, + .style = SCE_SCITECO_COMMAND +); static void teco_undo_change_dir_action(gchar **dir, gboolean run) @@ -1657,7 +1661,9 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **er return &teco_state_start; } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand, + .style = SCE_SCITECO_OPERATOR +); /*$ ^_ negate * n^_ -> ~n -- Binary negation @@ -1984,6 +1990,8 @@ teco_state_control_last_range(teco_machine_main_t *ctx, GError **error) * * A common idiom \(lq^SC\(rq can be used for jumping to the * beginning of the matched pattern or inserted string. + * Since the result is always negative, you can use \(lq^SR\(rq + * to skip the matched pattern after \fBFK\fP. */ static void teco_state_control_last_length(teco_machine_main_t *ctx, GError **error) @@ -2055,7 +2063,9 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) teco_ascii_toupper(chr), error); } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control, + .style = SCE_SCITECO_COMMAND +); static teco_state_t * teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error) @@ -2188,7 +2198,8 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_escape, * when it comes to function key macro masking. */ .is_start = TRUE, - .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE + .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE, + .style = SCE_SCITECO_COMMAND ); /*$ EF close @@ -2956,7 +2967,9 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error teco_ascii_toupper(chr), error); } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand, + .style = SCE_SCITECO_COMMAND +); gboolean teco_state_insert_initial(teco_machine_main_t *ctx, GError **error) @@ -176,7 +176,8 @@ teco_doc_undo_set_string(teco_doc_t *ctx) * @param str Pointer to a variable to hold the return string. * It can be NULL if you are interested only in the string's length. * Strings must be freed via g_free(). - * @param len Where to store the string's length (mandatory). + * @param len Where to store the string's length or NULL + * if that information is not necessary. * @param codepage Where to store the document's codepage or NULL * if that information is not necessary. * diff --git a/src/error.h b/src/error.h index c51f528..021f759 100644 --- a/src/error.h +++ b/src/error.h @@ -17,6 +17,7 @@ #pragma once #include <glib.h> +#include <gmodule.h> #include "sciteco.h" #include "string-utils.h" @@ -53,6 +54,7 @@ typedef enum { TECO_ERROR_MEMLIMIT, TECO_ERROR_CLIPBOARD, TECO_ERROR_WIN32, + TECO_ERROR_MODULE, /** Interrupt current operation */ TECO_ERROR_INTERRUPTED, @@ -165,6 +167,13 @@ teco_error_win32_set(GError **error, const gchar *prefix, gint err) #endif static inline void +teco_error_module_set(GError **error, const gchar *prefix) +{ + g_set_error(error, TECO_ERROR, TECO_ERROR_MODULE, "%s: %s", + prefix, g_module_error()); +} + +static inline void teco_error_interrupted_set(GError **error) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_INTERRUPTED, "Interrupted"); diff --git a/src/goto-commands.c b/src/goto-commands.c index a8a9689..2035277 100644 --- a/src/goto-commands.c +++ b/src/goto-commands.c @@ -27,11 +27,15 @@ #include "string-utils.h" #include "expressions.h" #include "parser.h" +#include "lexer.h" #include "core-commands.h" #include "undo.h" #include "goto.h" #include "goto-commands.h" +TECO_DECLARE_STATE(teco_state_blockcomment); +TECO_DECLARE_STATE(teco_state_eolcomment); + teco_string_t teco_goto_skip_label = {NULL, 0}; static gboolean @@ -45,16 +49,18 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error) * NOTE: The comma is theoretically not allowed in a label * (see <O> syntax), but is accepted anyway since labels * are historically used as comments. - * - * TODO: Add support for "true" comments of the form !* ... *! - * This would be almost trivial to implement, but if we don't - * want any (even temporary) overhead for comments at all, we need - * to add a new parser state. - * I'm unsure whether !-signs should be allowed within comments. + * SciTECO has true block and EOL comments, though as well. */ static teco_state_t * teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { + if (!ctx->goto_label.len) { + switch (chr) { + case '*': return &teco_state_blockcomment; /* `!*` */ + case '!': return &teco_state_eolcomment; /* `!!` */ + } + } + if (chr == '!') { /* * NOTE: If the label already existed, its PC will be restored @@ -83,6 +89,12 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) return &teco_state_start; } + /* + * The goto label is collected in parse-only mode as well + * since we could jump into a currently dead branch later. + * + * FIXME: Theoretically, we could avoid that at least in TECO_MODE_LEXING. + */ if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len); teco_string_append_wc(&ctx->goto_label, chr); @@ -90,7 +102,8 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) } TECO_DEFINE_STATE(teco_state_label, - .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial + .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial, + .style = SCE_SCITECO_LABEL ); static teco_state_t * @@ -169,3 +182,40 @@ gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine TECO_DEFINE_STATE_EXPECTSTRING(teco_state_goto, .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_goto_process_edit_cmd ); + +/* + * True comments: + * They don't add entries to the goto table. + * + * NOTE: This still needs some special handling in the Scintilla lexer + * (for syntax highlighting) since comments always start with `!`. + */ +#define TECO_DEFINE_STATE_COMMENT(NAME, ...) \ + TECO_DEFINE_STATE(NAME, \ + .style = SCE_SCITECO_COMMENT, \ + ##__VA_ARGS__ \ + ) + +static teco_state_t * +teco_state_blockcomment_star_input(teco_machine_main_t *ctx, gunichar chr, GError **error) +{ + return chr == '!' ? &teco_state_start : &teco_state_blockcomment; +} + +TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment_star); + +static teco_state_t * +teco_state_blockcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error) +{ + return chr == '*' ? &teco_state_blockcomment_star : &teco_state_blockcomment; +} + +TECO_DEFINE_STATE_COMMENT(teco_state_blockcomment); + +static teco_state_t * +teco_state_eolcomment_input(teco_machine_main_t *ctx, gunichar chr, GError **error) +{ + return chr == '\n' ? &teco_state_start : &teco_state_eolcomment; +} + +TECO_DEFINE_STATE_COMMENT(teco_state_eolcomment); diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c index 5984bcb..f713bc1 100644 --- a/src/interface-curses/interface.c +++ b/src/interface-curses/interface.c @@ -276,7 +276,7 @@ teco_xterm_version(void) static void teco_view_scintilla_notify(void *sci, int iMessage, SCNotification *notify, void *user_data) { - teco_interface_process_notify(notify); + teco_view_process_notify((teco_view_t *)sci, notify); } teco_view_t * @@ -575,13 +575,13 @@ teco_interface_init_screen(void) if (isatty(1)) { teco_interface.stdout_orig = dup(1); g_assert(teco_interface.stdout_orig >= 0); - FILE *stdout_new = g_freopen("/dev/null", "a+", stdout); + G_GNUC_UNUSED FILE *stdout_new = g_freopen("/dev/null", "a+", stdout); g_assert(stdout_new != NULL); } if (isatty(2)) { teco_interface.stderr_orig = dup(2); g_assert(teco_interface.stderr_orig >= 0); - FILE *stderr_new = g_freopen("/dev/null", "a+", stderr); + G_GNUC_UNUSED FILE *stderr_new = g_freopen("/dev/null", "a+", stderr); g_assert(stderr_new != NULL); } } @@ -1706,15 +1706,15 @@ teco_interface_event_loop_iter(void) return; #ifdef __PDCURSES__ - /* - * Especially PDCurses/WinGUI likes to report two keypresses, - * e.g. for CTRL+Shift+6 (CTRL+^). - * Make sure we don't filter out AltGr, which may be reported as CTRL+ALT. - */ - if ((PDC_get_key_modifiers() & - (PDC_KEY_MODIFIER_CONTROL | PDC_KEY_MODIFIER_ALT)) == PDC_KEY_MODIFIER_CONTROL && - !TECO_IS_CTL(key)) - return; + /* + * Especially PDCurses/WinGUI likes to report two keypresses, + * e.g. for CTRL+Shift+6 (CTRL+^). + * Make sure we don't filter out AltGr, which may be reported as CTRL+ALT. + */ + if ((PDC_get_key_modifiers() & + (PDC_KEY_MODIFIER_CONTROL | PDC_KEY_MODIFIER_ALT)) == PDC_KEY_MODIFIER_CONTROL && + !TECO_IS_CTL(key)) + return; #endif /* @@ -1723,10 +1723,10 @@ teco_interface_event_loop_iter(void) */ keybuf[keybuf_i++] = key; gsize len = keybuf_i; - gunichar cp = g_utf8_get_char_validated(keybuf, len); - if (keybuf_i >= sizeof(keybuf) || cp != (gunichar)-2) + gint32 cp = *keybuf ? g_utf8_get_char_validated(keybuf, len) : 0; + if (keybuf_i >= sizeof(keybuf) || cp != -2) keybuf_i = 0; - if ((gint32)cp < 0) + if (cp < 0) /* incomplete or invalid */ return; switch (teco_cmdline_keymacro(keybuf, len, error)) { diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c index 829310a..0dbd2ba 100644 --- a/src/interface-gtk/interface.c +++ b/src/interface-gtk/interface.c @@ -112,7 +112,7 @@ static void teco_view_scintilla_notify(ScintillaObject *sci, gint iMessage, SCNotification *notify, gpointer user_data) { - teco_interface_process_notify(notify); + teco_view_process_notify((teco_view_t *)sci, notify); } teco_view_t * diff --git a/src/interface.c b/src/interface.c index 2e2d64e..2973dd2 100644 --- a/src/interface.c +++ b/src/interface.c @@ -110,11 +110,3 @@ teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap) g_vfprintf(stream, fmt, ap); fputc('\n', stream); } - -void -teco_interface_process_notify(SCNotification *notify) -{ -#ifdef DEBUG - g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code); -#endif -} diff --git a/src/interface.h b/src/interface.h index 32db6b5..80da8d9 100644 --- a/src/interface.h +++ b/src/interface.h @@ -149,7 +149,6 @@ gboolean teco_interface_event_loop(GError **error); */ /** @protected */ void teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap); -void teco_interface_process_notify(SCNotification *notify); /** @pure */ void teco_interface_cleanup(void); diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..c0c7847 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include <glib.h> + +#include "sciteco.h" +#include "view.h" +#include "parser.h" +#include "lexer.h" + +static teco_style_t +teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine, + gunichar chr) +{ + teco_style_t style = machine->parent.current->style; + + /* + * FIXME: At least this special workaround for numbers might be + * unnecessary once we get a special parser state for parsing numbers. + * + * FIXME: What about ^* and ^/? + * They are currently highlighted as commands. + */ + if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START && + chr <= 0xFF) { + if (g_ascii_isdigit(chr)) + style = SCE_SCITECO_NUMBER; + else if (strchr("+-*/#&", chr)) + style = SCE_SCITECO_OPERATOR; + } + + /* + * FIXME: Perhaps as an optional lexer property, we should support + * styling commands with SCE_SCITECO_DEFAULT or SCE_SCITECO_COMMAND + * in alternating order, so you can discern chains of commands. + */ + if (!teco_machine_input(&machine->parent, chr, NULL)) { + /* + * Probably a syntax error, so the erroneous symbol + * is highlighted and we reset the parser's state machine. + * + * FIXME: Perhaps we should simply reset the state to teco_state_start? + */ + gsize macro_pc = machine->macro_pc; + teco_machine_main_clear(machine); + teco_machine_main_init(machine, NULL, FALSE); + machine->mode = TECO_MODE_LEXING; + machine->macro_pc = macro_pc; + + return SCE_SCITECO_INVALID; + } + + /* + * Don't highlight the leading `!` in comments as SCE_SCITECO_COMMAND. + * True comments also begin with `!`, so make sure they are highlighted + * already from the second character. + * This is then extended back by one character in teco_lexer_step(). + */ + switch (machine->parent.current->style) { + case SCE_SCITECO_COMMENT: + case SCE_SCITECO_LABEL: + return machine->parent.current->style; + default: + break; + } + + return style; +} + +static void +teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine, + teco_machine_main_t *macrodef_machine, + const gchar *macro, gsize start, gsize max_len, + guint *cur_line, guint *cur_col, gint *safe_col) +{ + if (*cur_line == 0 && *cur_col == 0 && *macro == '#') { + /* hash-bang line */ + machine->macro_pc = teco_view_ssm(view, SCI_POSITIONFROMLINE, 1, 0); + teco_view_ssm(view, SCI_STARTSTYLING, 0, 0); + teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc, SCE_SCITECO_COMMENT); + teco_view_ssm(view, SCI_SETLINESTATE, 0, -1); + (*cur_line)++; + *safe_col = 0; + return; + } + + gssize old_pc = machine->macro_pc; + + teco_style_t style = SCE_SCITECO_DEFAULT; + + /* + * g_utf8_get_char_validated() sometimes(?) returns -2 for "\0". + */ + gint32 chr = macro[machine->macro_pc] + ? g_utf8_get_char_validated(macro+machine->macro_pc, + max_len-machine->macro_pc) : 0; + if (chr < 0) { + /* + * Invalid UTF-8 byte sequence: + * A source file could contain all sorts of data garbage or + * you could manually M[lexer.set.sciteco] on an ANSI-encoded file. + */ + machine->macro_pc++; + style = SCE_SCITECO_INVALID; + } else { + machine->macro_pc = g_utf8_next_char(macro+machine->macro_pc) - macro; + + gunichar escape_char = machine->expectstring.machine.escape_char; + style = teco_lexer_getstyle(view, machine, chr); + + /* + * Optionally style @^Uq{ ... } contents like macro definitions. + * The curly braces will be styled like regular commands. + * + * FIXME: This will not work with nested macro definitions. + * FIXME: This cannot currently be disabled since SCI_SETPROPERTY + * cannot be accessed with ES. + * We could only map it to an ED flag. + */ + if ((escape_char == '{' || machine->expectstring.machine.escape_char == '{') && + teco_view_ssm(view, SCI_GETPROPERTYINT, (uptr_t)"lexer.sciteco.macrodef", TRUE)) + style = teco_lexer_getstyle(view, macrodef_machine, chr); + } + + *cur_col += machine->macro_pc - old_pc; + + /* + * True comments begin with `!*` or `!!`, but only the second character gets + * the correct style by default, so we extend it backwards. + */ + if (style == SCE_SCITECO_COMMENT) + old_pc--; + + teco_view_ssm(view, SCI_STARTSTYLING, start+old_pc, 0); + teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc-old_pc, style); + + if (chr == '\n') { + /* update line state to the last column with a clean start state */ + teco_view_ssm(view, SCI_SETLINESTATE, *cur_line, *safe_col); + (*cur_line)++; + *cur_col = 0; + *safe_col = -1; /* no clean state by default */ + } + + if (style != SCE_SCITECO_INVALID && + machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START && + !machine->modifier_at) + /* clean parser state */ + *safe_col = *cur_col; +} + +/** + * Style SciTECO source code, i.e. perform syntax highlighting + * for the SciTECO language. + * + * @para view The Scintilla view to operate on. + * @para end The position in bytes where to stop styling. + */ +void +teco_lexer_style(teco_view_t *view, gsize end) +{ + /* should always be TRUE */ + gboolean old_undo_enabled = teco_undo_enabled; + teco_undo_enabled = FALSE; + + gsize start = teco_view_ssm(view, SCI_GETENDSTYLED, 0, 0); + guint start_line = teco_view_ssm(view, SCI_LINEFROMPOSITION, start, 0); + gint start_col = 0; + + /* + * The line state stores the laster character (column) in bytes, + * that starts from a fresh parser state. + * It's -1 if the line does not have a clean parser state. + * Therefore we search for the first line before `start` that has a + * known clean parser state. + */ + if (start_line > 0) { + do + start_line--; + while ((start_col = teco_view_ssm(view, SCI_GETLINESTATE, start_line, 0)) < 0 && + start_line > 0); + start_col = MAX(start_col, 0); + } + start = teco_view_ssm(view, SCI_POSITIONFROMLINE, start_line, 0) + start_col; + g_assert(end > start); + + g_auto(teco_machine_main_t) machine; + teco_machine_main_init(&machine, NULL, FALSE); + machine.mode = TECO_MODE_LEXING; + + /* for lexing the contents of @^Uq{...} */ + g_auto(teco_machine_main_t) macrodef_machine; + teco_machine_main_init(¯odef_machine, NULL, FALSE); + macrodef_machine.mode = TECO_MODE_LEXING; + + g_assert(start_col >= 0); + guint col = start_col; + + /* + * NOTE: We could have also used teco_view_get_character(), + * but this will use much less Scintilla messages without + * removing dot. + */ + const gchar *macro; + sptr_t gap = teco_view_ssm(view, SCI_GETGAPPOSITION, 0, 0); + if (start < gap && gap < end) { + macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, gap); + while (machine.macro_pc < gap-start) + teco_lexer_step(view, &machine, ¯odef_machine, + macro, start, gap-start, + &start_line, &col, &start_col); + /* + * This might have lexed more than gap-start bytes + * (e.g. a hash-bang line) + */ + start += machine.macro_pc; + } + + macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, end-start); + machine.macro_pc = 0; + while (machine.macro_pc < end-start) + teco_lexer_step(view, &machine, ¯odef_machine, + macro, start, end-start, + &start_line, &col, &start_col); + + /* set line state on the very last line */ + teco_view_ssm(view, SCI_SETLINESTATE, start_line, start_col); + + teco_undo_enabled = old_undo_enabled; +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..87b0d0f --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#pragma once + +#include <glib.h> + +#include "view.h" + +/** Scintilla style ids for lexing SciTECO code */ +typedef enum { + SCE_SCITECO_DEFAULT = 0, + SCE_SCITECO_COMMAND = 1, + SCE_SCITECO_OPERATOR = 2, + SCE_SCITECO_QREG = 3, + SCE_SCITECO_STRING = 4, + SCE_SCITECO_NUMBER = 5, + SCE_SCITECO_LABEL = 6, + SCE_SCITECO_COMMENT = 7, + SCE_SCITECO_INVALID = 8 +} teco_style_t; + +void teco_lexer_style(teco_view_t *view, gsize end); diff --git a/src/parser.h b/src/parser.h index 20f73fb..7ca5ab3 100644 --- a/src/parser.h +++ b/src/parser.h @@ -27,6 +27,7 @@ #include "goto.h" #include "undo.h" #include "qreg.h" +#include "lexer.h" /* * Forward Declarations @@ -203,6 +204,12 @@ struct teco_state_t { teco_keymacro_mask_t keymacro_mask : 8; /** + * Scintilla style to apply to all input characters in this state + * when syntax highlighting SciTECO code. + */ + teco_style_t style : 8; + + /** * Additional state-dependent callbacks and settings. * This wastes some bytes compared to other techniques for extending teco_state_t * but this is acceptable since there is only a limited number of constant instances. @@ -241,6 +248,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent .process_edit_cmd_cb = teco_state_process_edit_cmd, \ .is_start = FALSE, \ .keymacro_mask = TECO_KEYMACRO_MASK_DEFAULT, \ + .style = SCE_SCITECO_DEFAULT, \ ##__VA_ARGS__ \ } @@ -441,7 +449,9 @@ typedef enum { /** Parse, but don't execute until reaching end of conditional or its else-clause */ TECO_MODE_PARSE_ONLY_COND, /** Parse, but don't execute until reaching the very end of conditional */ - TECO_MODE_PARSE_ONLY_COND_FORCE + TECO_MODE_PARSE_ONLY_COND_FORCE, + /** Parse, but don't execute until end of macro (for Scintilla lexing) */ + TECO_MODE_LEXING } teco_mode_t; /** @extends teco_machine_t */ @@ -568,6 +578,7 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ teco_state_expectstring_process_edit_cmd, \ .keymacro_mask = TECO_KEYMACRO_MASK_STRING, \ + .style = SCE_SCITECO_STRING, \ .expectstring.string_building = TRUE, \ .expectstring.last = TRUE, \ .expectstring.process_cb = NULL, /* do nothing */ \ diff --git a/src/qreg-commands.h b/src/qreg-commands.h index 27a6a5c..d999587 100644 --- a/src/qreg-commands.h +++ b/src/qreg-commands.h @@ -20,6 +20,7 @@ #include "sciteco.h" #include "parser.h" +#include "lexer.h" #include "qreg.h" static inline void @@ -55,6 +56,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m .initial_cb = (teco_state_initial_cb_t)teco_state_expectqreg_initial, \ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ teco_state_expectqreg_process_edit_cmd, \ + .style = SCE_SCITECO_QREG, \ .expectqreg.type = TECO_QREG_REQUIRED, \ .expectqreg.got_register_cb = NAME##_got_register, /* always required */ \ ##__VA_ARGS__ \ @@ -539,7 +539,7 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position, * The sign bit in UCS-4/UTF-32 is unused, so this will even * suffice if TECO_INTEGER == 32. */ - *chr = (gint32)g_utf8_get_char_validated(p, -1); + *chr = *p ? (gint32)g_utf8_get_char_validated(p, -1) : 0; return TRUE; } @@ -665,9 +665,6 @@ teco_qreg_bufferinfo_append_string(teco_qreg_t *qreg, const gchar *str, gsize le return FALSE; } -/* - * NOTE: The `string` component is currently unused on the "*" register. - */ static gboolean teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, guint *codepage, GError **error) @@ -684,7 +681,8 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, /* * NOTE: teco_file_normalize_path() does not change the size of the string. */ - *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0; + if (len) + *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0; if (codepage) *codepage = teco_default_codepage(); return TRUE; @@ -775,7 +773,8 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, * the return value for str == NULL is still correct. */ gchar *dir = g_get_current_dir(); - *len = strlen(dir); + if (len) + *len = strlen(dir); if (str) *str = teco_file_normalize_path(dir); else @@ -919,11 +918,12 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, &str_converted.len, error) == G_IO_STATUS_ERROR) return FALSE; + if (len) + *len = str_converted.len; if (str) *str = str_converted.data; else teco_string_clear(&str_converted); - *len = str_converted.len; if (codepage) *codepage = teco_default_codepage(); diff --git a/src/search.c b/src/search.c index e05a6b9..1945f5c 100644 --- a/src/search.c +++ b/src/search.c @@ -1075,18 +1075,24 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str, if (teco_search_parameters.dot < dot) { /* kill forwards */ sptr_t anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0); + gsize len = anchor - teco_search_parameters.dot; if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_GOTOPOS, dot, 0); teco_interface_ssm(SCI_GOTOPOS, anchor, 0); - teco_interface_ssm(SCI_DELETERANGE, teco_search_parameters.dot, - anchor - teco_search_parameters.dot); + teco_interface_ssm(SCI_DELETERANGE, teco_search_parameters.dot, len); /* NOTE: An undo action is not always created. */ if (teco_current_doc_must_undo() && teco_search_parameters.dot != anchor) undo__teco_interface_ssm(SCI_UNDO, 0, 0); + + /* fix up ranges (^Y) */ + for (guint i = 0; i < teco_ranges_count; i++) { + teco_ranges[i].from -= len; + teco_ranges[i].to -= len; + } } else { /* kill backwards */ teco_interface_ssm(SCI_DELETERANGE, dot, teco_search_parameters.dot - dot); @@ -1113,7 +1119,7 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str, * from,to:FK[pattern]$ -> Success|Failure * * \fBFK\fP searches for <pattern> just like the regular search - * command (\fBS\fP) but when found deletes all text from dot + * command (\fBS\fP) but when found, deletes all text from dot * up to but not including the found text instance. * When searching backwards the characters beginning after * the occurrence of <pattern> up to dot are deleted. diff --git a/src/symbols.c b/src/symbols.c index 944d01d..7198639 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -23,6 +23,7 @@ #include <string.h> #include <glib.h> +#include <gmodule.h> #include <Scintilla.h> #ifdef HAVE_LEXILLA @@ -288,6 +289,27 @@ gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, * Lexilla lexer name as a string argument for the \fBSCI_SETILEXER\fP * message, i.e. in order to load a Lexilla lexer * (this works similar to the old \fBSCI_SETLEXERLANGUAGE\fP message). + * If the lexer name contains a null-byte, the second string + * argument is split into two: + * Up until the null-byte, the path of an external lexer library + * (shared library or DLL) is expected, + * that implements the Lexilla protocol. + * The \(lq.so\(rq or \(lq.dll\(rq extension is optional. + * The concrete lexer name is the remaining of the string after + * the null-byte. + * This allows you to use lexers from external lexer libraries + * like Scintillua. + * When detecting Scintillua, \*(ST will automatically pass down + * the \fBSCITECO_SCINTILLUA_LEXERS\fP environment variable as + * the \(lqscintillua.lexers\(rq library property for specifying + * the location of Scintillua's Lua lexer files. + * + * In order to facilitate the use of Scintillua lexers, the semantics + * of \fBSCI_NAMEOFSTYLE\fP have also been changed. + * Instead of returning the name for a given style id, it now + * returns the style id when given the name of a style in the + * second string argument of \fBES\fP, i.e. it allows you + * to look up style ids by name. * * .BR Warning : * Almost all Scintilla messages may be dispatched using @@ -321,24 +343,94 @@ teco_state_scintilla_lparam_done(teco_machine_main_t *ctx, const teco_string_t * sptr_t lParam = 0; -#ifdef HAVE_LEXILLA - if (ctx->scintilla.iMessage == SCI_SETILEXER) { + if (ctx->scintilla.iMessage == SCI_NAMEOFSTYLE) { + /* + * FIXME: This customized version of SCI_NAMEOFSTYLE could be avoided + * if we had a way to call Scintilla messages that return strings into + * Q-Registers. + */ if (teco_string_contains(str, '\0')) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, - "Lexer name must not contain null-byte."); + "Style name must not contain null-byte."); return NULL; } - const gchar *lexer = str->data ? : ""; - lParam = (sptr_t)CreateLexer(lexer); + /* + * FIXME: Should we cache the name to style id? + */ + guint count = teco_interface_ssm(SCI_GETNAMEDSTYLES, 0, 0); + for (guint id = 0; id < count; id++) { + gchar style[128] = ""; + teco_interface_ssm(SCI_NAMEOFSTYLE, id, (sptr_t)style); + if (!teco_string_cmp(str, style, strlen(style))) { + teco_expressions_push(id); + return &teco_state_start; + } + } + + g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, + "Style name \"%s\" not found.", str->data ? : ""); + return NULL; + } +#ifdef HAVE_LEXILLA + else if (ctx->scintilla.iMessage == SCI_SETILEXER) { + CreateLexerFn CreateLexerFn = CreateLexer; + + const gchar *lexer = memchr(str->data ? : "", '\0', str->len); + if (lexer) { + /* external lexer */ + lexer++; + + /* + * NOTE: The same module can be opened multiple times. + * They are internally reference counted. + */ + GModule *module = g_module_open(str->data, G_MODULE_BIND_LAZY); + if (!module) { + teco_error_module_set(error, "Error opening lexer module"); + return NULL; + } + + GetNameSpaceFn GetNameSpaceFn; + SetLibraryPropertyFn SetLibraryPropertyFn; + + if (!g_module_symbol(module, LEXILLA_GETNAMESPACE, (gpointer *)&GetNameSpaceFn) || + !g_module_symbol(module, LEXILLA_SETLIBRARYPROPERTY, (gpointer *)&SetLibraryPropertyFn) || + !g_module_symbol(module, LEXILLA_CREATELEXER, (gpointer *)&CreateLexerFn)) { + teco_error_module_set(error, "Cannot find lexer function"); + return NULL; + } + + if (!g_strcmp0(GetNameSpaceFn(), "scintillua")) { + /* + * Scintillua's lexer directory must be configured before calling CreateLexer(). + * + * FIXME: In Scintillua distributions, the lexers are usually contained in the + * same directory as the prebuilt shared libraries. + * Perhaps we should default scintillua.lexers to the dirname in str->data? + */ + teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SCITECO_SCINTILLUA_LEXERS", 26); + if (reg) { + teco_string_t dir; + if (!reg->vtable->get_string(reg, &dir.data, &dir.len, NULL, error)) + return NULL; + SetLibraryPropertyFn("scintillua.lexers", dir.data ? : ""); + } + } + } else { + /* Lexilla lexer */ + lexer = str->data ? : ""; + } + + lParam = (sptr_t)CreateLexerFn(lexer); if (!lParam) { g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, - "Lexilla lexer \"%s\" not found.", lexer); + "Lexer \"%s\" not found.", lexer); return NULL; } - } else + } #endif - if (str->len > 0) { + else if (str->len > 0) { /* * NOTE: There may even be messages that read strings * with embedded nulls. @@ -46,6 +46,7 @@ #include "qreg.h" #include "eol.h" #include "memory.h" +#include "lexer.h" #include "view.h" /** @memberof teco_view_t */ @@ -205,9 +206,24 @@ teco_view_set_representations(teco_view_t *ctx) gboolean teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **error) { + gboolean ret = TRUE; + g_auto(teco_eol_reader_t) reader; teco_eol_reader_init_gio(&reader, channel); + /* + * Temporarily disable the line character index. + * This tremendously speeds up reading UTF-8 documents. + * The reason is, that UTF-8 consistency checks are rather + * costly. Also, when reading in chunks of 1024 bytes, + * we can very well add incomplete UTF-8 sequences, + * resulting in unnecessary recalculations of the line index. + */ + guint cp = teco_view_get_codepage(ctx); + if (cp == SC_CP_UTF8) + teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + teco_view_ssm(ctx, SCI_BEGINUNDOACTION, 0, 0); teco_view_ssm(ctx, SCI_CLEARALL, 0, 0); @@ -221,8 +237,9 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro struct stat stat_buf = {.st_size = 0}; if (!fstat(g_io_channel_unix_get_fd(channel), &stat_buf) && stat_buf.st_size > 0) { - if (!teco_memory_check(stat_buf.st_size, error)) - goto error; + ret = teco_memory_check(stat_buf.st_size, error); + if (!ret) + goto cleanup; teco_view_ssm(ctx, SCI_ALLOCATE, stat_buf.st_size, 0); } @@ -234,8 +251,10 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro teco_string_t str; GIOStatus rc = teco_eol_reader_convert(&reader, &str.data, &str.len, error); - if (rc == G_IO_STATUS_ERROR) - goto error; + if (rc == G_IO_STATUS_ERROR) { + ret = FALSE; + goto cleanup; + } if (rc == G_IO_STATUS_EOF) break; @@ -245,12 +264,14 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro * Even if we checked initially, knowing the file size, * Scintilla could allocate much more bytes. */ - if (!teco_memory_check(0, error)) - goto error; + ret = teco_memory_check(0, error); + if (!ret) + goto cleanup; if (G_UNLIKELY(teco_interface_is_interrupted())) { teco_error_interrupted_set(error); - goto error; + ret = FALSE; + goto cleanup; } } @@ -271,12 +292,14 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro teco_interface_msg(TECO_MSG_WARNING, "Inconsistent EOL styles normalized"); +cleanup: teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); - return TRUE; -error: - teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); - return FALSE; + if (cp == SC_CP_UTF8) + teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + + return ret; } /** @@ -634,5 +657,24 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) * The sign bit in UCS-4/UTF-32 is unused, so this will even * suffice if TECO_INTEGER == 32. */ - return (gint32)g_utf8_get_char_validated(buf, -1); + return *buf ? (gint32)g_utf8_get_char_validated(buf, -1) : 0; +} + +void +teco_view_process_notify(teco_view_t *ctx, SCNotification *notify) +{ +#ifdef DEBUG + g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code); +#endif + + /* + * Lexing in the container: only used for SciTECO. + * + * The "identifier" is abused to enable/disable lexing. + * It could be extended later on for several internal lexers. + * The alternative would be an ILexer5 wrapper, written in C++. + */ + if (notify->nmhdr.code == SCN_STYLENEEDED && + teco_view_ssm(ctx, SCI_GETIDENTIFIER, 0, 0) != 0) + teco_lexer_style(ctx, notify->position); } @@ -83,3 +83,5 @@ teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos); gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n); teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len); + +void teco_view_process_notify(teco_view_t *ctx, SCNotification *notify); |