diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/core-commands.c | 20 | ||||
-rw-r--r-- | src/goto-commands.c | 4 | ||||
-rw-r--r-- | src/interface-curses/interface.c | 2 | ||||
-rw-r--r-- | src/interface-gtk/interface.c | 2 | ||||
-rw-r--r-- | src/interface.c | 8 | ||||
-rw-r--r-- | src/interface.h | 1 | ||||
-rw-r--r-- | src/lexer.c | 235 | ||||
-rw-r--r-- | src/lexer.h | 36 | ||||
-rw-r--r-- | src/parser.h | 13 | ||||
-rw-r--r-- | src/qreg-commands.h | 2 | ||||
-rw-r--r-- | src/symbols.c | 11 | ||||
-rw-r--r-- | src/view.c | 13 | ||||
-rw-r--r-- | src/view.h | 2 |
14 files changed, 329 insertions, 21 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 1e2056e..055cde7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -52,6 +52,7 @@ libsciteco_base_la_SOURCES = main.c sciteco.h list.h \ help.c help.h \ rb3str.c rb3str.h \ symbols.c symbols.h \ + lexer.c lexer.h \ view.c view.h \ interface.c interface.h # NOTE: We cannot link in Scintilla (static library) into diff --git a/src/core-commands.c b/src/core-commands.c index 2f473ce..52b577d 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -31,6 +31,7 @@ #include "expressions.h" #include "ring.h" #include "parser.h" +#include "lexer.h" #include "symbols.h" #include "search.h" #include "spawn.h" @@ -1293,7 +1294,8 @@ teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_start, .end_of_macro_cb = NULL, /* Allowed at the end of a macro! */ .is_start = TRUE, - .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE + .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE, + .style = SCE_SCITECO_COMMAND ); /*$ F< @@ -1450,7 +1452,9 @@ teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error teco_ascii_toupper(chr), error); } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_fcommand, + .style = SCE_SCITECO_COMMAND +); static void teco_undo_change_dir_action(gchar **dir, gboolean run) @@ -1657,7 +1661,9 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **er return &teco_state_start; } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_condcommand, + .style = SCE_SCITECO_OPERATOR +); /*$ ^_ negate * n^_ -> ~n -- Binary negation @@ -2055,7 +2061,9 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) teco_ascii_toupper(chr), error); } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control, + .style = SCE_SCITECO_COMMAND +); static teco_state_t * teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error) @@ -2956,7 +2964,9 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error teco_ascii_toupper(chr), error); } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand); +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_ecommand, + .style = SCE_SCITECO_COMMAND +); gboolean teco_state_insert_initial(teco_machine_main_t *ctx, GError **error) diff --git a/src/goto-commands.c b/src/goto-commands.c index a8a9689..2144fb0 100644 --- a/src/goto-commands.c +++ b/src/goto-commands.c @@ -27,6 +27,7 @@ #include "string-utils.h" #include "expressions.h" #include "parser.h" +#include "lexer.h" #include "core-commands.h" #include "undo.h" #include "goto.h" @@ -90,7 +91,8 @@ teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) } TECO_DEFINE_STATE(teco_state_label, - .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial + .initial_cb = (teco_state_initial_cb_t)teco_state_label_initial, + .style = SCE_SCITECO_LABEL ); static teco_state_t * diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c index 3cff2d8..b9ca516 100644 --- a/src/interface-curses/interface.c +++ b/src/interface-curses/interface.c @@ -276,7 +276,7 @@ teco_xterm_version(void) static void teco_view_scintilla_notify(void *sci, int iMessage, SCNotification *notify, void *user_data) { - teco_interface_process_notify(notify); + teco_view_process_notify((teco_view_t *)sci, notify); } teco_view_t * diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c index 829310a..0dbd2ba 100644 --- a/src/interface-gtk/interface.c +++ b/src/interface-gtk/interface.c @@ -112,7 +112,7 @@ static void teco_view_scintilla_notify(ScintillaObject *sci, gint iMessage, SCNotification *notify, gpointer user_data) { - teco_interface_process_notify(notify); + teco_view_process_notify((teco_view_t *)sci, notify); } teco_view_t * diff --git a/src/interface.c b/src/interface.c index 2e2d64e..2973dd2 100644 --- a/src/interface.c +++ b/src/interface.c @@ -110,11 +110,3 @@ teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap) g_vfprintf(stream, fmt, ap); fputc('\n', stream); } - -void -teco_interface_process_notify(SCNotification *notify) -{ -#ifdef DEBUG - g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code); -#endif -} diff --git a/src/interface.h b/src/interface.h index 32db6b5..80da8d9 100644 --- a/src/interface.h +++ b/src/interface.h @@ -149,7 +149,6 @@ gboolean teco_interface_event_loop(GError **error); */ /** @protected */ void teco_interface_stdio_vmsg(teco_msg_t type, const gchar *fmt, va_list ap); -void teco_interface_process_notify(SCNotification *notify); /** @pure */ void teco_interface_cleanup(void); diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..4fbc313 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <glib.h> + +#include "sciteco.h" +#include "view.h" +#include "parser.h" +#include "lexer.h" + +static teco_style_t +teco_lexer_getstyle(teco_view_t *view, teco_machine_main_t *machine, + gunichar chr) +{ + teco_style_t style = machine->parent.current->style; + + /* + * FIXME: At least this special workaround for numbers might be + * unnecessary once we get a special parser state for parsing numbers. + * + * FIXME: What about ^* and ^/? + * They are currently highlighted as commands. + */ + if (machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START) { + switch (chr) { + case '0'...'9': + style = SCE_SCITECO_NUMBER; + break; + case '+': + case '-': + case '*': + case '/': + case '#': + case '&': + style = SCE_SCITECO_OPERATOR; + break; + } + } + + /* + * FIXME: Perhaps as an optional lexer property, we should support + * styling commands with SCE_SCITECO_DEFAULT or SCE_SCITECO_COMMAND + * in alternating order, so you can discern chains of commands. + */ + if (!teco_machine_input(&machine->parent, chr, NULL)) { + /* + * Probably a syntax error, so the erroneous symbol + * is highlighted and we reset the parser's state machine. + */ + style = SCE_SCITECO_INVALID; + + /* + * FIXME: Perhaps we should simply reset the state to teco_state_start? + */ + gsize macro_pc = machine->macro_pc; + teco_machine_main_clear(machine); + teco_machine_main_init(machine, NULL, FALSE); + machine->mode = TECO_MODE_LEXING; + machine->macro_pc = macro_pc; + } else if (machine->parent.current->style == SCE_SCITECO_LABEL) { + /* don't highlight the leading `!` as SCE_SCITECO_COMMAND */ + style = SCE_SCITECO_LABEL; + } + + return style; +} + +static void +teco_lexer_step(teco_view_t *view, teco_machine_main_t *machine, + teco_machine_main_t *macrodef_machine, + const gchar *macro, gsize start, gsize max_len, + guint *cur_line, guint *cur_col, gint *safe_col) +{ + if (*cur_line == 0 && *cur_col == 0 && *macro == '#') { + /* hash-bang line */ + machine->macro_pc = teco_view_ssm(view, SCI_POSITIONFROMLINE, 1, 0); + teco_view_ssm(view, SCI_STARTSTYLING, 0, 0); + teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc, SCE_SCITECO_COMMENT); + teco_view_ssm(view, SCI_SETLINESTATE, 0, -1); + (*cur_line)++; + *safe_col = 0; + return; + } + + gsize old_pc = machine->macro_pc; + + teco_style_t style = SCE_SCITECO_DEFAULT; + + gint32 chr = g_utf8_get_char_validated(macro+machine->macro_pc, + max_len-machine->macro_pc); + if (chr < 0) { + /* + * Invalid UTF-8 byte sequence: + * A source file could contain all sorts of data garbage or + * you could manually M[lexer.set.sciteco] on an ANSI-encoded file. + */ + machine->macro_pc++; + style = SCE_SCITECO_INVALID; + } else { + machine->macro_pc = g_utf8_next_char(macro+machine->macro_pc) - macro; + + gunichar escape_char = machine->expectstring.machine.escape_char; + style = teco_lexer_getstyle(view, machine, chr); + + /* + * Optionally style @^Uq{ ... } contents like macro definitions. + * The curly braces will be styled like regular commands. + * + * FIXME: This will not work with nested macro definitions. + * FIXME: This cannot currently be disabled since SCI_SETPROPERTY + * cannot be accessed with ES. + * We could only map it to an ED flag. + */ + if ((escape_char == '{' || machine->expectstring.machine.escape_char == '{') && + teco_view_ssm(view, SCI_GETPROPERTYINT, (uptr_t)"lexer.sciteco.macrodef", TRUE)) + style = teco_lexer_getstyle(view, macrodef_machine, chr); + } + + *cur_col += machine->macro_pc - old_pc; + + teco_view_ssm(view, SCI_STARTSTYLING, start+old_pc, 0); + teco_view_ssm(view, SCI_SETSTYLING, machine->macro_pc-old_pc, style); + + if (chr == '\n') { + /* update line state to the last column with a clean start state */ + teco_view_ssm(view, SCI_SETLINESTATE, *cur_line, *safe_col); + (*cur_line)++; + *cur_col = 0; + *safe_col = -1; /* no clean state by default */ + } + + if (style != SCE_SCITECO_INVALID && + machine->parent.current->keymacro_mask & TECO_KEYMACRO_MASK_START && + !machine->modifier_at) + /* clean parser state */ + *safe_col = *cur_col; +} + +/** + * Style SciTECO source code, i.e. perform syntax highlighting + * for the SciTECO language. + * + * @para view The Scintilla view to operate on. + * @para end The position in bytes where to stop styling. + */ +void +teco_lexer_style(teco_view_t *view, gsize end) +{ + /* should always be TRUE */ + gboolean old_undo_enabled = teco_undo_enabled; + teco_undo_enabled = FALSE; + + gsize start = teco_view_ssm(view, SCI_GETENDSTYLED, 0, 0); + guint start_line = teco_view_ssm(view, SCI_LINEFROMPOSITION, start, 0); + gint start_col = 0; + + /* + * The line state stores the laster character (column) in bytes, + * that starts from a fresh parser state. + * It's -1 if the line does not have a clean parser state. + * Therefore we search for the first line before `start` that has a + * known clean parser state. + */ + if (start_line > 0) { + do + start_line--; + while ((start_col = teco_view_ssm(view, SCI_GETLINESTATE, start_line, 0)) < 0 && + start_line > 0); + start_col = MAX(start_col, 0); + } + start = teco_view_ssm(view, SCI_POSITIONFROMLINE, start_line, 0) + start_col; + g_assert(end > start); + + g_auto(teco_machine_main_t) machine; + teco_machine_main_init(&machine, NULL, FALSE); + machine.mode = TECO_MODE_LEXING; + + /* for lexing the contents of @^Uq{...} */ + g_auto(teco_machine_main_t) macrodef_machine; + teco_machine_main_init(¯odef_machine, NULL, FALSE); + macrodef_machine.mode = TECO_MODE_LEXING; + + g_assert(start_col >= 0); + guint col = start_col; + + /* + * NOTE: We could have also used teco_view_get_character(), + * but this will use much less Scintilla messages without + * removing dot. + */ + const gchar *macro; + sptr_t gap = teco_view_ssm(view, SCI_GETGAPPOSITION, 0, 0); + if (start < gap && gap < end) { + macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, gap); + while (machine.macro_pc < gap-start) + teco_lexer_step(view, &machine, ¯odef_machine, + macro, start, gap-start, + &start_line, &col, &start_col); + /* + * This might have lexed more than gap-start bytes + * (e.g. a hash-bang line) + */ + start += machine.macro_pc; + } + + macro = (const gchar *)teco_view_ssm(view, SCI_GETRANGEPOINTER, start, end-start); + machine.macro_pc = 0; + while (machine.macro_pc < end-start) + teco_lexer_step(view, &machine, ¯odef_machine, + macro, start, end-start, + &start_line, &col, &start_col); + + /* set line state on the very last line */ + teco_view_ssm(view, SCI_SETLINESTATE, start_line, start_col); + + teco_undo_enabled = old_undo_enabled; +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..87b0d0f --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#pragma once + +#include <glib.h> + +#include "view.h" + +/** Scintilla style ids for lexing SciTECO code */ +typedef enum { + SCE_SCITECO_DEFAULT = 0, + SCE_SCITECO_COMMAND = 1, + SCE_SCITECO_OPERATOR = 2, + SCE_SCITECO_QREG = 3, + SCE_SCITECO_STRING = 4, + SCE_SCITECO_NUMBER = 5, + SCE_SCITECO_LABEL = 6, + SCE_SCITECO_COMMENT = 7, + SCE_SCITECO_INVALID = 8 +} teco_style_t; + +void teco_lexer_style(teco_view_t *view, gsize end); diff --git a/src/parser.h b/src/parser.h index 20f73fb..7ca5ab3 100644 --- a/src/parser.h +++ b/src/parser.h @@ -27,6 +27,7 @@ #include "goto.h" #include "undo.h" #include "qreg.h" +#include "lexer.h" /* * Forward Declarations @@ -203,6 +204,12 @@ struct teco_state_t { teco_keymacro_mask_t keymacro_mask : 8; /** + * Scintilla style to apply to all input characters in this state + * when syntax highlighting SciTECO code. + */ + teco_style_t style : 8; + + /** * Additional state-dependent callbacks and settings. * This wastes some bytes compared to other techniques for extending teco_state_t * but this is acceptable since there is only a limited number of constant instances. @@ -241,6 +248,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent .process_edit_cmd_cb = teco_state_process_edit_cmd, \ .is_start = FALSE, \ .keymacro_mask = TECO_KEYMACRO_MASK_DEFAULT, \ + .style = SCE_SCITECO_DEFAULT, \ ##__VA_ARGS__ \ } @@ -441,7 +449,9 @@ typedef enum { /** Parse, but don't execute until reaching end of conditional or its else-clause */ TECO_MODE_PARSE_ONLY_COND, /** Parse, but don't execute until reaching the very end of conditional */ - TECO_MODE_PARSE_ONLY_COND_FORCE + TECO_MODE_PARSE_ONLY_COND_FORCE, + /** Parse, but don't execute until end of macro (for Scintilla lexing) */ + TECO_MODE_LEXING } teco_mode_t; /** @extends teco_machine_t */ @@ -568,6 +578,7 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ teco_state_expectstring_process_edit_cmd, \ .keymacro_mask = TECO_KEYMACRO_MASK_STRING, \ + .style = SCE_SCITECO_STRING, \ .expectstring.string_building = TRUE, \ .expectstring.last = TRUE, \ .expectstring.process_cb = NULL, /* do nothing */ \ diff --git a/src/qreg-commands.h b/src/qreg-commands.h index 27a6a5c..d999587 100644 --- a/src/qreg-commands.h +++ b/src/qreg-commands.h @@ -20,6 +20,7 @@ #include "sciteco.h" #include "parser.h" +#include "lexer.h" #include "qreg.h" static inline void @@ -55,6 +56,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m .initial_cb = (teco_state_initial_cb_t)teco_state_expectqreg_initial, \ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ teco_state_expectqreg_process_edit_cmd, \ + .style = SCE_SCITECO_QREG, \ .expectqreg.type = TECO_QREG_REQUIRED, \ .expectqreg.got_register_cb = NAME##_got_register, /* always required */ \ ##__VA_ARGS__ \ diff --git a/src/symbols.c b/src/symbols.c index 944d01d..798b89c 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -321,8 +321,13 @@ teco_state_scintilla_lparam_done(teco_machine_main_t *ctx, const teco_string_t * sptr_t lParam = 0; + if (ctx->scintilla.iMessage == SCI_SETILEXER && + !teco_string_cmp(str, "sciteco", 7)) { + /* perform lexing in the container (see teco_lexer_style()) */ + lParam = 0; + } #ifdef HAVE_LEXILLA - if (ctx->scintilla.iMessage == SCI_SETILEXER) { + else if (ctx->scintilla.iMessage == SCI_SETILEXER) { if (teco_string_contains(str, '\0')) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, "Lexer name must not contain null-byte."); @@ -336,9 +341,9 @@ teco_state_scintilla_lparam_done(teco_machine_main_t *ctx, const teco_string_t * "Lexilla lexer \"%s\" not found.", lexer); return NULL; } - } else + } #endif - if (str->len > 0) { + else if (str->len > 0) { /* * NOTE: There may even be messages that read strings * with embedded nulls. @@ -46,6 +46,7 @@ #include "qreg.h" #include "eol.h" #include "memory.h" +#include "lexer.h" #include "view.h" /** @memberof teco_view_t */ @@ -636,3 +637,15 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) */ return (gint32)g_utf8_get_char_validated(buf, -1); } + +void +teco_view_process_notify(teco_view_t *ctx, SCNotification *notify) +{ +#ifdef DEBUG + g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code); +#endif + + if (notify->nmhdr.code == SCN_STYLENEEDED) + /* Lexing in the container: only used for SciTECO */ + teco_lexer_style(ctx, notify->position); +} @@ -83,3 +83,5 @@ teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos); gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n); teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len); + +void teco_view_process_notify(teco_view_t *ctx, SCNotification *notify); |