aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am9
-rw-r--r--src/cmdline.c382
-rw-r--r--src/cmdline.h41
-rw-r--r--src/core-commands.c527
-rw-r--r--src/core-commands.h4
-rw-r--r--src/doc.c116
-rw-r--r--src/doc.h10
-rw-r--r--src/eol.c2
-rw-r--r--src/eol.h2
-rw-r--r--src/error.c14
-rw-r--r--src/error.h32
-rw-r--r--src/expressions.c39
-rw-r--r--src/expressions.h4
-rw-r--r--src/file-utils.c28
-rw-r--r--src/file-utils.h2
-rw-r--r--src/glob.c39
-rw-r--r--src/glob.h17
-rw-r--r--src/goto-commands.c14
-rw-r--r--src/goto-commands.h2
-rw-r--r--src/goto.c22
-rw-r--r--src/goto.h10
-rw-r--r--src/help.c14
-rw-r--r--src/help.h2
-rw-r--r--src/interface-curses/Makefile.am3
-rw-r--r--src/interface-curses/curses-icons.c398
-rw-r--r--src/interface-curses/curses-icons.h28
-rw-r--r--src/interface-curses/curses-info-popup.c23
-rw-r--r--src/interface-curses/curses-info-popup.h2
-rw-r--r--src/interface-curses/curses-utils.c72
-rw-r--r--src/interface-curses/curses-utils.h17
-rw-r--r--src/interface-curses/interface.c246
-rw-r--r--src/interface-gtk/gtk-info-popup.c2
-rw-r--r--src/interface-gtk/gtk-info-popup.h2
-rw-r--r--src/interface-gtk/gtk-label.c2
-rw-r--r--src/interface-gtk/gtk-label.h2
-rw-r--r--src/interface-gtk/interface.c301
-rw-r--r--src/interface.c2
-rw-r--r--src/interface.h38
-rw-r--r--src/list.h2
-rw-r--r--src/main.c62
-rw-r--r--src/memory.c31
-rw-r--r--src/memory.h2
-rw-r--r--src/parser.c341
-rw-r--r--src/parser.h100
-rw-r--r--src/qreg-commands.c128
-rw-r--r--src/qreg-commands.h8
-rw-r--r--src/qreg.c594
-rw-r--r--src/qreg.h31
-rw-r--r--src/rb3str.c10
-rw-r--r--src/rb3str.h4
-rw-r--r--src/ring.c6
-rw-r--r--src/ring.h2
-rw-r--r--src/sciteco.h28
-rw-r--r--src/search.c211
-rw-r--r--src/search.h2
-rw-r--r--src/spawn.c96
-rw-r--r--src/spawn.h2
-rw-r--r--src/string-utils.c53
-rw-r--r--src/string-utils.h52
-rwxr-xr-xsrc/symbols-extract.tes7
-rw-r--r--src/symbols.c4
-rw-r--r--src/symbols.h2
-rw-r--r--src/undo.c6
-rw-r--r--src/undo.h8
-rw-r--r--src/view.c205
-rw-r--r--src/view.h15
66 files changed, 3286 insertions, 1196 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index b9aca8a..5b2572e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -67,10 +67,9 @@ noinst_PROGRAMS = sciteco-minimal
sciteco_minimal_SOURCES =
symbols-scintilla.c symbols-scilexer.c : sciteco-minimal$(EXEEXT)
endif
-sciteco_minimal_LDADD = libsciteco-base.la \
- @SCINTILLA_PATH@/bin/scintilla.a
+sciteco_minimal_LDADD = libsciteco-base.la $(LIBSCINTILLA)
if LEXILLA
-sciteco_minimal_LDADD += @LEXILLA_PATH@/bin/liblexilla.a
+sciteco_minimal_LDADD += $(LIBLEXILLA)
endif
# Scintilla is unfortunately still written in C++, so we must force
# Automake to use the C++ linker when linking the binaries.
@@ -99,10 +98,10 @@ CLEANFILES = $(BUILT_SOURCES) \
symbols-scintilla.c : @SCINTILLA_PATH@/include/Scintilla.h \
symbols-extract.tes
- $(SCITECO_MINIMAL) -m -- @srcdir@/symbols-extract.tes \
+ $(SCITECO_MINIMAL) -8m -- @srcdir@/symbols-extract.tes \
-p "SCI_" -n teco_symbol_list_scintilla $@ $<
symbols-scilexer.c : @LEXILLA_PATH@/include/SciLexer.h \
symbols-extract.tes
- $(SCITECO_MINIMAL) -m -- @srcdir@/symbols-extract.tes \
+ $(SCITECO_MINIMAL) -8m -- @srcdir@/symbols-extract.tes \
-p "SCE_" -n teco_symbol_list_scilexer $@ $<
diff --git a/src/cmdline.c b/src/cmdline.c
index 58d48b4..816816c 100644
--- a/src/cmdline.c
+++ b/src/cmdline.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -52,6 +52,7 @@
#include "eol.h"
#include "error.h"
#include "qreg.h"
+#include "glob.h"
#include "cmdline.h"
#if defined(HAVE_MALLOC_TRIM) && !defined(HAVE_DECL_MALLOC_TRIM)
@@ -81,12 +82,12 @@ static teco_string_t teco_last_cmdline = {NULL, 0};
* @param error A GError.
* @return FALSE to throw a GError
*/
-gboolean
+static gboolean
teco_cmdline_insert(const gchar *data, gsize len, GError **error)
{
const teco_string_t src = {(gchar *)data, len};
- teco_string_t old_cmdline = {NULL, 0};
- guint repl_pc = 0;
+ g_auto(teco_string_t) old_cmdline = {NULL, 0};
+ gsize repl_pc = 0;
teco_cmdline.machine.macro_pc = teco_cmdline.pc = teco_cmdline.effective_len;
@@ -109,8 +110,6 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error)
/*
* Parse/execute characters, one at a time so
* undo tokens get emitted for the corresponding characters.
- *
- * FIXME: The inner loop should be factored out.
*/
while (teco_cmdline.pc < teco_cmdline.effective_len) {
g_autoptr(GError) tmp_error = NULL;
@@ -125,7 +124,8 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error)
teco_qreg_t *cmdline_reg = teco_qreg_table_find(&teco_qreg_table_globals, "\e", 1);
teco_string_t new_cmdline;
- if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len, error))
+ if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len,
+ NULL, error))
return FALSE;
/*
@@ -160,6 +160,7 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error)
teco_string_clear(&teco_cmdline.str);
teco_cmdline.str = old_cmdline;
+ memset(&old_cmdline, 0, sizeof(old_cmdline));
teco_cmdline.machine.macro_pc = teco_cmdline.pc = repl_pc;
/* rubout cmdline replacement command */
@@ -179,55 +180,65 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error)
return TRUE;
}
+static gboolean
+teco_cmdline_rubin(GError **error)
+{
+ if (!teco_cmdline.str.len)
+ return TRUE;
+
+ const gchar *start, *end, *next;
+ start = teco_cmdline.str.data+teco_cmdline.effective_len;
+ end = teco_cmdline.str.data+teco_cmdline.str.len;
+ next = g_utf8_find_next_char(start, end) ? : end;
+ return teco_cmdline_insert(start, next-start, error);
+}
+
+/**
+ * Process key press or expansion of key macro.
+ *
+ * Should be called only with the results of a single keypress.
+ * They are considered an unity and in case of errors, we
+ * rubout the entire sequence (unless there was a $$ return in the
+ * middle).
+ *
+ * @param data Key presses in UTF-8.
+ * @param len Length of data.
+ * @param error A GError.
+ * @return FALSE if error was set.
+ * If TRUE was returned, there could still have been an error,
+ * but it has already been handled.
+ */
gboolean
-teco_cmdline_keypress_c(gchar key, GError **error)
+teco_cmdline_keypress(const gchar *data, gsize len, GError **error)
{
+ const teco_string_t str = {(gchar *)data, len};
teco_machine_t *machine = &teco_cmdline.machine.parent;
- g_autoptr(GError) tmp_error = NULL;
+
+ if (!teco_string_validate_utf8(&str)) {
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Invalid UTF-8 sequence");
+ return FALSE;
+ }
/*
- * Cleanup messages,etc...
+ * Cleanup messages, etc...
*/
teco_interface_msg_clear();
- /*
- * Process immediate editing commands, inserting
- * characters as necessary into the command line.
- */
- if (!machine->current->process_edit_cmd_cb(machine, NULL, key, &tmp_error)) {
- if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) {
- /*
- * Return from top-level macro, results
- * in command line termination.
- * The return "arguments" are currently
- * ignored.
- */
- g_assert(machine->current == &teco_state_start);
+ gsize start_pc = teco_cmdline.effective_len;
- teco_interface_popup_clear();
+ for (guint i = 0; i < len; i = g_utf8_next_char(data+i) - data) {
+ gunichar chr = g_utf8_get_char(data+i);
+ g_autoptr(GError) tmp_error = NULL;
- if (teco_quit_requested) {
- /* cought by user interface */
- g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, "");
- return FALSE;
- }
+ /*
+ * Process immediate editing commands, inserting
+ * characters as necessary into the command line.
+ */
+ if (machine->current->process_edit_cmd_cb(machine, NULL, chr, &tmp_error))
+ continue;
- teco_undo_clear();
- /* also empties all Scintilla undo buffers */
- teco_ring_set_scintilla_undo(TRUE);
- teco_view_set_scintilla_undo(teco_qreg_view, TRUE);
- /*
- * FIXME: Reset main machine?
- */
- teco_goto_table_clear(&teco_cmdline.machine.goto_table);
- teco_expressions_clear();
- g_array_remove_range(teco_loop_stack, 0, teco_loop_stack->len);
-
- teco_string_clear(&teco_last_cmdline);
- teco_last_cmdline = teco_cmdline.str;
- memset(&teco_cmdline.str, 0, sizeof(teco_cmdline.str));
- teco_cmdline.effective_len = 0;
- } else {
+ if (!g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) {
/*
* NOTE: Error message already displayed in
* teco_cmdline_insert().
@@ -237,29 +248,76 @@ teco_cmdline_keypress_c(gchar key, GError **error)
* is thrown. They must be executed so
* as if the character had never been
* inserted.
+ * Actually we rub out the entire command line
+ * up until the insertion point.
*/
- teco_undo_pop(teco_cmdline.pc);
- teco_cmdline.effective_len = teco_cmdline.pc;
+ teco_undo_pop(start_pc);
+ teco_cmdline.effective_len = start_pc;
/* program counter could be messed up */
teco_cmdline.machine.macro_pc = teco_cmdline.effective_len;
- }
#ifdef HAVE_MALLOC_TRIM
+ /*
+ * Undo stacks can grow very large - sometimes large enough to
+ * make the system swap and become unresponsive.
+ * This shrinks the program break after lots of memory has
+ * been freed, reducing the virtual memory size and aiding
+ * in recovering from swapping issues.
+ *
+ * This is particularily important with some memory limiting backends
+ * after hitting the memory limit* as otherwise the program's resident
+ * size won't shrink and it would be impossible to recover.
+ */
+ if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_MEMLIMIT))
+ malloc_trim(0);
+#endif
+
+ break;
+ }
+
/*
- * Undo stacks can grow very large - sometimes large enough to
- * make the system swap and become unresponsive.
- * This shrinks the program break after lots of memory has
- * been freed, reducing the virtual memory size and aiding
- * in recovering from swapping issues.
- *
- * This is particularily important with some memory limiting backends
- * after hitting the memory limit* as otherwise the program's resident
- * size won't shrink and it would be impossible to recover.
+ * Return from top-level macro, results
+ * in command line termination.
+ * The return "arguments" are currently
+ * ignored.
+ */
+ g_assert(machine->current == &teco_state_start);
+
+ teco_interface_popup_clear();
+
+ if (teco_quit_requested) {
+ /* caught by user interface */
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, "");
+ return FALSE;
+ }
+
+ teco_undo_clear();
+ /* also empties all Scintilla undo buffers */
+ teco_ring_set_scintilla_undo(TRUE);
+ teco_view_set_scintilla_undo(teco_qreg_view, TRUE);
+ /*
+ * FIXME: Reset main machine?
*/
- if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN) ||
- g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_MEMLIMIT))
- malloc_trim(0);
+ teco_goto_table_clear(&teco_cmdline.machine.goto_table);
+ teco_expressions_clear();
+ g_array_remove_range(teco_loop_stack, 0, teco_loop_stack->len);
+
+ teco_string_clear(&teco_last_cmdline);
+ teco_last_cmdline = teco_cmdline.str;
+ memset(&teco_cmdline.str, 0, sizeof(teco_cmdline.str));
+ teco_cmdline.effective_len = 0;
+
+#ifdef HAVE_MALLOC_TRIM
+ /* see above */
+ malloc_trim(0);
#endif
+
+ /*
+ * Continue with the other keys,
+ * but we obviously can't rub out beyond the return if any
+ * error occurs later on.
+ */
+ start_pc = 0;
}
/*
@@ -269,33 +327,40 @@ teco_cmdline_keypress_c(gchar key, GError **error)
return TRUE;
}
-gboolean
-teco_cmdline_fnmacro(const gchar *name, GError **error)
+teco_keymacro_status_t
+teco_cmdline_keymacro(const gchar *name, gssize name_len, GError **error)
{
g_assert(name != NULL);
+ if (name_len < 0)
+ name_len = strlen(name);
+
/*
* NOTE: It should be safe to allocate on the stack since
* there are only a limited number of possible function key macros.
*/
- gchar macro_name[1 + strlen(name)];
- macro_name[0] = TECO_CTL_KEY('F');
- memcpy(macro_name+1, name, sizeof(macro_name)-1);
+ gchar macro_name[1 + name_len];
+ macro_name[0] = TECO_CTL_KEY('K');
+ memcpy(macro_name+1, name, name_len);
- teco_qreg_t *macro_reg;
-
- if (teco_ed & TECO_ED_FNKEYS &&
- (macro_reg = teco_qreg_table_find(&teco_qreg_table_globals, macro_name, sizeof(macro_name)))) {
+ teco_qreg_t *macro_reg = teco_qreg_table_find(&teco_qreg_table_globals, macro_name, sizeof(macro_name));
+ if (macro_reg) {
teco_int_t macro_mask;
if (!macro_reg->vtable->get_integer(macro_reg, &macro_mask, error))
- return FALSE;
+ return TECO_KEYMACRO_ERROR;
- if (macro_mask & teco_cmdline.machine.parent.current->fnmacro_mask)
- return TRUE;
+ /*
+ * FIXME: This does not work with Q-Register specs embedded into string arguments.
+ * There should be a keymacro_mask_cb() instead.
+ */
+ if (!((teco_cmdline.machine.parent.current->keymacro_mask |
+ teco_cmdline.machine.expectstring.machine.parent.current->keymacro_mask) & ~macro_mask))
+ return TECO_KEYMACRO_UNDEFINED;
g_auto(teco_string_t) macro_str = {NULL, 0};
- return macro_reg->vtable->get_string(macro_reg, &macro_str.data, &macro_str.len, error) &&
- teco_cmdline_keypress(macro_str.data, macro_str.len, error);
+ return macro_reg->vtable->get_string(macro_reg, &macro_str.data, &macro_str.len, NULL, error) &&
+ teco_cmdline_keypress(macro_str.data, macro_str.len, error)
+ ? TECO_KEYMACRO_SUCCESS : TECO_KEYMACRO_ERROR;
}
/*
@@ -303,28 +368,34 @@ teco_cmdline_fnmacro(const gchar *name, GError **error)
* except "CLOSE" which quits the application
* (this may loose unsaved data but is better than
* not doing anything if the user closes the window).
- * NOTE: Doing the check here is less efficient than
- * doing it in the UI implementations, but defines
- * the default actions centrally.
- * Also, fnmacros are only handled after key presses.
*/
- if (!strcmp(name, "CLOSE")) {
+ if (name_len == 5 && !strncmp(name, "CLOSE", name_len)) {
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, "");
- return FALSE;
+ return TECO_KEYMACRO_ERROR;
}
- return TRUE;
+ return TECO_KEYMACRO_UNDEFINED;
+}
+
+static void
+teco_cmdline_rubout(void)
+{
+ const gchar *p;
+ p = g_utf8_find_prev_char(teco_cmdline.str.data,
+ teco_cmdline.str.data+teco_cmdline.effective_len);
+ if (p) {
+ teco_cmdline.effective_len = p - teco_cmdline.str.data;
+ teco_undo_pop(teco_cmdline.effective_len);
+ }
}
-#ifndef NDEBUG
-static void __attribute__((destructor))
+static void TECO_DEBUG_CLEANUP
teco_cmdline_cleanup(void)
{
teco_machine_main_clear(&teco_cmdline.machine);
teco_string_clear(&teco_cmdline.str);
teco_string_clear(&teco_last_cmdline);
}
-#endif
/*
* Commandline key processing.
@@ -337,7 +408,7 @@ teco_cmdline_cleanup(void)
*/
gboolean
-teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
switch (key) {
case '\n': /* insert EOL sequence */
@@ -407,23 +478,30 @@ teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gch
}
teco_interface_popup_clear();
- return teco_cmdline_insert(&key, sizeof(key), error);
+
+ gchar buf[6];
+ gsize len = g_unichar_to_utf8(key, buf);
+ return teco_cmdline_insert(buf, len, error);
}
gboolean
-teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
+ /*
+ * Auto case folding is for syntactic characters,
+ * so this could be done by working only with a-z and A-Z.
+ * However, it's also not speed critical.
+ */
if (teco_ed & TECO_ED_AUTOCASEFOLD)
- /* will not modify non-letter keys */
- key = g_ascii_islower(key) ? g_ascii_toupper(key)
- : g_ascii_tolower(key);
+ key = g_unichar_islower(key) ? g_unichar_toupper(key)
+ : g_unichar_tolower(key);
return teco_state_process_edit_cmd(ctx, parent_ctx, key, error);
}
gboolean
teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
- gchar key, GError **error)
+ gunichar key, GError **error)
{
teco_state_t *current = ctx->parent.current;
@@ -460,20 +538,15 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *
* get the default behaviour of teco_state_process_edit_cmd().
* This may not be a real-life issue serious enough to maintain
* a result string even in parse-only mode.
- *
- * FIXME: Does not properly rubout string-building commands at the
- * start of the string argument -- ctx->result->len is not
- * a valid indicator of argument emptyness.
- * Since it chains to teco_state_process_edit_cmd() we will instead
- * rubout the entire command.
*/
if (ctx->result && ctx->result->len > 0) {
gboolean is_wordchar = teco_string_contains(&wchars, teco_cmdline.str.data[teco_cmdline.effective_len-1]);
teco_cmdline_rubout();
if (ctx->parent.current != current) {
/* rub out string building command */
- while (ctx->result->len > 0 && ctx->parent.current != current)
+ do
teco_cmdline_rubout();
+ while (ctx->parent.current != current);
return TRUE;
}
@@ -572,8 +645,29 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *
}
gboolean
+teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
+ gunichar key, GError **error)
+{
+ /*
+ * Allow insertion of characters that would otherwise be interpreted as
+ * immediate editing commands after ^Q/^R.
+ */
+ switch (key) {
+ //case TECO_CTL_KEY('G'):
+ case TECO_CTL_KEY('W'):
+ case TECO_CTL_KEY('U'):
+ teco_interface_popup_clear();
+
+ gchar c = key;
+ return teco_cmdline_insert(&c, sizeof(c), error);
+ }
+
+ return teco_state_process_edit_cmd(parent_ctx, NULL, key, error);
+}
+
+gboolean
teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
- gchar chr, GError **error)
+ gunichar chr, GError **error)
{
g_assert(ctx->machine_qregspec != NULL);
/* We downcast since teco_machine_qregspec_t is private in qreg.c */
@@ -582,7 +676,7 @@ teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *c
}
gboolean
-teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -590,7 +684,7 @@ teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_
}
gboolean
-teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -626,7 +720,7 @@ teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *par
}
gboolean
-teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -696,8 +790,8 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t
gboolean unambiguous = teco_file_auto_complete(ctx->expectstring.string.data, G_FILE_TEST_EXISTS, &new_chars);
teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped);
if (unambiguous && ctx->expectstring.nesting == 1)
- teco_string_append_c(&new_chars_escaped,
- ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
+ teco_string_append_wc(&new_chars_escaped,
+ ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
return teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error);
}
@@ -707,7 +801,61 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t
}
gboolean
-teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectglob_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
+{
+ teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
+ teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
+
+ /*
+ * NOTE: We don't just define teco_state_stringbuilding_start_process_edit_cmd(),
+ * as it would be hard to subclass/overwrite for different main machine states.
+ */
+ if (!stringbuilding_current->is_start)
+ return stringbuilding_current->process_edit_cmd_cb(&stringbuilding_ctx->parent, &ctx->parent, key, error);
+
+ switch (key) {
+ case '\t': { /* autocomplete file name */
+ if (teco_cmdline.modifier_enabled)
+ break;
+
+ if (teco_interface_popup_is_shown()) {
+ /* cycle through popup pages */
+ teco_interface_popup_show();
+ return TRUE;
+ }
+
+ if (teco_string_contains(&ctx->expectstring.string, '\0'))
+ /* null-byte not allowed in file names */
+ return TRUE;
+
+ /*
+ * We do not support autocompleting glob patterns.
+ *
+ * FIXME: What if the last autocompletion inserted escaped glob
+ * characters?
+ * Perhaps teco_file_auto_complete() should natively support glob patterns.
+ */
+ if (teco_globber_is_pattern(ctx->expectstring.string.data))
+ return TRUE;
+
+ g_auto(teco_string_t) new_chars, new_chars_escaped;
+ gboolean unambiguous = teco_file_auto_complete(ctx->expectstring.string.data, G_FILE_TEST_EXISTS, &new_chars);
+ g_autofree gchar *pattern_escaped = teco_globber_escape_pattern(new_chars.data);
+ teco_machine_stringbuilding_escape(stringbuilding_ctx, pattern_escaped, strlen(pattern_escaped), &new_chars_escaped);
+ if (unambiguous && ctx->expectstring.nesting == 1)
+ teco_string_append_wc(&new_chars_escaped,
+ ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
+
+ return teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error);
+ }
+ }
+
+ /* ^W should behave like in commands accepting files */
+ return teco_state_expectfile_process_edit_cmd(ctx, parent_ctx, key, error);
+}
+
+gboolean
+teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -745,11 +893,12 @@ teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *
}
}
- return stringbuilding_current->process_edit_cmd_cb(&stringbuilding_ctx->parent, &ctx->parent, key, error);
+ /* ^W should behave like in commands accepting files */
+ return teco_state_expectfile_process_edit_cmd(ctx, parent_ctx, key, error);
}
gboolean
-teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
g_assert(ctx->expectqreg != NULL);
/*
@@ -761,7 +910,7 @@ teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t
}
gboolean
-teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
switch (key) {
case '\t': { /* autocomplete Q-Register name */
@@ -796,7 +945,7 @@ teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_
}
gboolean
-teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = teco_machine_qregspec_get_stringbuilding(ctx);
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -836,7 +985,7 @@ teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_m
}
gboolean
-teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -881,7 +1030,7 @@ teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa
}
gboolean
-teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -926,7 +1075,7 @@ teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_mac
}
gboolean
-teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -973,7 +1122,7 @@ teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren
}
gboolean
-teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
{
teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -1004,8 +1153,8 @@ teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren
gboolean unambiguous = teco_help_auto_complete(ctx->expectstring.string.data, &new_chars);
teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped);
if (unambiguous && ctx->expectstring.nesting == 1)
- teco_string_append_c(&new_chars_escaped,
- ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
+ teco_string_append_wc(&new_chars_escaped,
+ ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
return new_chars_escaped.len ? teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error) : TRUE;
}
@@ -1028,7 +1177,8 @@ teco_state_save_cmdline_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg
return &teco_state_start;
if (!qreg->vtable->undo_set_string(qreg, error) ||
- !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, error))
+ !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len,
+ teco_default_codepage(), error))
return NULL;
return &teco_state_start;
diff --git a/src/cmdline.h b/src/cmdline.h
index 85e657a..f4b84e4 100644
--- a/src/cmdline.h
+++ b/src/cmdline.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -46,7 +46,7 @@ typedef struct {
gsize effective_len;
/** Program counter within the command-line macro */
- guint pc;
+ gsize pc;
/**
* Specifies whether the immediate editing modifier
@@ -60,35 +60,30 @@ typedef struct {
extern teco_cmdline_t teco_cmdline;
-gboolean teco_cmdline_insert(const gchar *data, gsize len, GError **error);
+gboolean teco_cmdline_keypress(const gchar *data, gsize len, GError **error);
-static inline gboolean
-teco_cmdline_rubin(GError **error)
-{
- return teco_cmdline.effective_len >= teco_cmdline.str.len ||
- teco_cmdline_insert(teco_cmdline.str.data + teco_cmdline.effective_len, 1, error);
-}
+typedef enum {
+ TECO_KEYMACRO_ERROR = 0, /**< GError occurred */
+ TECO_KEYMACRO_SUCCESS, /**< key macro found and inserted */
+ TECO_KEYMACRO_UNDEFINED /**< no key macro found */
+} teco_keymacro_status_t;
-gboolean teco_cmdline_keypress_c(gchar key, GError **error);
+teco_keymacro_status_t teco_cmdline_keymacro(const gchar *name, gssize name_len, GError **error);
static inline gboolean
-teco_cmdline_keypress(const gchar *str, gsize len, GError **error)
+teco_cmdline_keymacro_c(gchar key, GError **error)
{
- for (guint i = 0; i < len; i++)
- if (!teco_cmdline_keypress_c(str[i], error))
- return FALSE;
+ switch (teco_cmdline_keymacro(&key, sizeof(key), error)) {
+ case TECO_KEYMACRO_ERROR:
+ return FALSE;
+ case TECO_KEYMACRO_SUCCESS:
+ break;
+ case TECO_KEYMACRO_UNDEFINED:
+ return teco_cmdline_keypress(&key, sizeof(key), error);
+ }
return TRUE;
}
-gboolean teco_cmdline_fnmacro(const gchar *name, GError **error);
-
-static inline void
-teco_cmdline_rubout(void)
-{
- if (teco_cmdline.effective_len)
- teco_undo_pop(--teco_cmdline.effective_len);
-}
-
extern gboolean teco_quit_requested;
/*
diff --git a/src/core-commands.c b/src/core-commands.c
index 4d5b378..0cde7e0 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -45,7 +45,7 @@
#include "goto-commands.h"
#include "core-commands.h"
-static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error);
+static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error);
/*
* NOTE: This needs some extra code in teco_state_start_input().
@@ -129,7 +129,8 @@ teco_state_start_dot(teco_machine_main_t *ctx, GError **error)
{
if (!teco_expressions_eval(FALSE, error))
return;
- teco_expressions_push(teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0));
+ sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ teco_expressions_push(teco_interface_bytes2glyphs(pos));
}
/*$ Z size
@@ -145,7 +146,8 @@ teco_state_start_zed(teco_machine_main_t *ctx, GError **error)
{
if (!teco_expressions_eval(FALSE, error))
return;
- teco_expressions_push(teco_interface_ssm(SCI_GETLENGTH, 0, 0));
+ sptr_t pos = teco_interface_ssm(SCI_GETLENGTH, 0, 0);
+ teco_expressions_push(teco_interface_bytes2glyphs(pos));
}
/*$ H
@@ -162,10 +164,11 @@ teco_state_start_range(teco_machine_main_t *ctx, GError **error)
if (!teco_expressions_eval(FALSE, error))
return;
teco_expressions_push(0);
- teco_expressions_push(teco_interface_ssm(SCI_GETLENGTH, 0, 0));
+ sptr_t pos = teco_interface_ssm(SCI_GETLENGTH, 0, 0);
+ teco_expressions_push(teco_interface_bytes2glyphs(pos));
}
-/*$ "\\"
+/*$ \[rs]
* n\\ -- Insert or read ASCII numbers
* \\ -> n
*
@@ -241,6 +244,7 @@ teco_state_start_loop_open(teco_machine_main_t *ctx, GError **error)
if (!teco_expressions_eval(FALSE, error) ||
!teco_expressions_pop_num_calc(&lctx.counter, -1, error))
return;
+ lctx.brace_level = teco_brace_level;
lctx.pass_through = teco_machine_main_eval_colon(ctx);
if (lctx.counter) {
@@ -280,6 +284,14 @@ teco_state_start_loop_close(teco_machine_main_t *ctx, GError **error)
teco_loop_context_t *lctx = &g_array_index(teco_loop_stack, teco_loop_context_t,
teco_loop_stack->len-1);
+
+ /* only non-pass-through loops increase the brace level */
+ if (teco_brace_level != lctx->brace_level + !lctx->pass_through) {
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ "Brace left open at loop end command");
+ return;
+ }
+
gboolean colon_modified = teco_machine_main_eval_colon(ctx);
/*
@@ -348,7 +360,7 @@ teco_state_start_break(teco_machine_main_t *ctx, GError **error)
{
if (teco_loop_stack->len <= ctx->loop_stack_fp) {
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
- "<;> only allowed in iterations");
+ "<;> only allowed in loops");
return;
}
@@ -373,7 +385,7 @@ teco_state_start_break(teco_machine_main_t *ctx, GError **error)
if (!teco_expressions_discard_args(error))
return;
if (!lctx.pass_through &&
- !teco_expressions_brace_close(error))
+ !teco_expressions_brace_return(lctx.brace_level, 0, error))
return;
undo__insert_val__teco_loop_stack(teco_loop_stack->len, lctx);
@@ -511,11 +523,12 @@ teco_state_start_jump(teco_machine_main_t *ctx, GError **error)
if (!teco_expressions_pop_num_calc(&v, 0, error))
return;
- if (teco_validate_pos(v)) {
+ gssize pos = teco_interface_glyphs2bytes(v);
+ if (pos >= 0) {
if (teco_current_doc_must_undo())
undo__teco_interface_ssm(SCI_GOTOPOS,
teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0), 0);
- teco_interface_ssm(SCI_GOTOPOS, v, 0);
+ teco_interface_ssm(SCI_GOTOPOS, pos, 0);
if (teco_machine_main_eval_colon(ctx))
teco_expressions_push(TECO_SUCCESS);
@@ -531,11 +544,11 @@ static teco_bool_t
teco_move_chars(teco_int_t n)
{
sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
-
- if (!teco_validate_pos(pos + n))
+ gssize next_pos = teco_interface_glyphs2bytes_relative(pos, n);
+ if (next_pos < 0)
return TECO_FAILURE;
- teco_interface_ssm(SCI_GOTOPOS, pos + n, 0);
+ teco_interface_ssm(SCI_GOTOPOS, next_pos, 0);
if (teco_current_doc_must_undo())
undo__teco_interface_ssm(SCI_GOTOPOS, pos, 0);
@@ -879,7 +892,7 @@ static gboolean
teco_state_start_kill(teco_machine_main_t *ctx, const gchar *cmd, gboolean by_lines, GError **error)
{
teco_bool_t rc;
- teco_int_t from, len;
+ gssize from, len; /* in bytes */
if (!teco_expressions_eval(FALSE, error))
return FALSE;
@@ -894,20 +907,24 @@ teco_state_start_kill(teco_machine_main_t *ctx, const gchar *cmd, gboolean by_li
len = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0) - from;
rc = teco_bool(teco_validate_line(line));
} else {
- if (!teco_expressions_pop_num_calc(&len, teco_num_sign, error))
+ teco_int_t len_glyphs;
+ if (!teco_expressions_pop_num_calc(&len_glyphs, teco_num_sign, error))
return FALSE;
- rc = teco_bool(teco_validate_pos(from + len));
+ gssize to = teco_interface_glyphs2bytes_relative(from, len_glyphs);
+ rc = teco_bool(to >= 0);
+ len = to-from;
}
if (len < 0) {
len *= -1;
from -= len;
}
} else {
- teco_int_t to = teco_expressions_pop_num(0);
- from = teco_expressions_pop_num(0);
+ teco_int_t to_glyphs = teco_expressions_pop_num(0);
+ gssize to = teco_interface_glyphs2bytes(to_glyphs);
+ teco_int_t from_glyphs = teco_expressions_pop_num(0);
+ from = teco_interface_glyphs2bytes(from_glyphs);
len = to - from;
- rc = teco_bool(len >= 0 && teco_validate_pos(from) &&
- teco_validate_pos(to));
+ rc = teco_bool(len >= 0 && from >= 0 && to >= 0);
}
if (teco_machine_main_eval_colon(ctx)) {
@@ -1002,6 +1019,9 @@ teco_state_start_delete_chars(teco_machine_main_t *ctx, GError **error)
* This can be an ASCII <code> or Unicode codepoint
* depending on Scintilla's encoding of the current
* buffer.
+ * Invalid Unicode byte sequences are reported as
+ * -1 or -2.
+ *
* - If <n> is 0, return the <code> of the character
* pointed to by dot.
* - If <n> is 1, return the <code> of the character
@@ -1012,28 +1032,33 @@ teco_state_start_delete_chars(teco_machine_main_t *ctx, GError **error)
*
* If the position of the queried character is off-page,
* the command will yield an error.
+ *
+ * If the document is encoded as UTF-8 and there is
+ * an incomplete sequence at the requested position,
+ * -1 is returned.
+ * All other invalid Unicode sequences are returned as -2.
*/
-/** @todo does Scintilla really return code points??? */
static void
teco_state_start_get(teco_machine_main_t *ctx, GError **error)
{
teco_int_t v;
if (!teco_expressions_pop_num_calc(&v, teco_num_sign, error))
return;
- v += teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
- /*
- * NOTE: We cannot use teco_validate_pos() here since
- * the end of the buffer is not a valid position for <A>.
- */
- if (v < 0 || v >= teco_interface_ssm(SCI_GETLENGTH, 0, 0)) {
+
+ sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ gssize get_pos = teco_interface_glyphs2bytes_relative(pos, v);
+ sptr_t len = teco_interface_ssm(SCI_GETLENGTH, 0, 0);
+
+ if (get_pos < 0 || get_pos == len) {
teco_error_range_set(error, "A");
return;
}
- teco_expressions_push(teco_interface_ssm(SCI_GETCHARAT, v, 0));
+
+ teco_expressions_push(teco_interface_get_character(get_pos, len));
}
static teco_state_t *
-teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
static teco_machine_main_transition_t transitions[] = {
/*
@@ -1148,7 +1173,7 @@ teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error)
*
* FIXME: Maybe, there should be a special teco_state_t
* for beginnings of command-lines?
- * It could also be used for a corresponding FNMACRO mask.
+ * It could also be used for a corresponding KEYMACRO mask.
*/
if (teco_cmdline.effective_len == 1 && teco_cmdline.str.data[0] == '*')
return &teco_state_save_cmdline;
@@ -1244,7 +1269,7 @@ teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error)
TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_start,
.end_of_macro_cb = NULL, /* Allowed at the end of a macro! */
.is_start = TRUE,
- .fnmacro_mask = TECO_FNMACRO_MASK_START
+ .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE
);
/*$ F<
@@ -1372,7 +1397,7 @@ teco_state_fcommand_cond_else(teco_machine_main_t *ctx, GError **error)
}
static teco_state_t *
-teco_state_fcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
static teco_machine_main_transition_t transitions[] = {
/*
@@ -1435,7 +1460,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE
teco_qreg_t *qreg = teco_qreg_table_find(&teco_qreg_table_globals, "$HOME", 5);
g_assert(qreg != NULL);
teco_string_t home;
- if (!qreg->vtable->get_string(qreg, &home.data, &home.len, error))
+ if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, error))
return NULL;
/*
@@ -1496,7 +1521,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE
TECO_DEFINE_STATE_EXPECTDIR(teco_state_changedir);
static teco_state_t *
-teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
teco_int_t value = 0;
gboolean result = TRUE;
@@ -1536,20 +1561,20 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error
break;
case 'A':
if (ctx->mode == TECO_MODE_NORMAL)
- result = g_ascii_isalpha((gchar)value);
+ result = g_unichar_isalpha(value);
break;
case 'C':
if (ctx->mode == TECO_MODE_NORMAL)
- result = g_ascii_isalnum((gchar)value) ||
+ result = g_unichar_isalnum(value) ||
value == '.' || value == '$' || value == '_';
break;
case 'D':
if (ctx->mode == TECO_MODE_NORMAL)
- result = g_ascii_isdigit((gchar)value);
+ result = g_unichar_isdigit(value);
break;
case 'I':
if (ctx->mode == TECO_MODE_NORMAL)
- result = G_IS_DIR_SEPARATOR((gchar)value);
+ result = G_IS_DIR_SEPARATOR(value);
break;
case 'S':
case 'T':
@@ -1582,15 +1607,15 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error
break;
case 'R':
if (ctx->mode == TECO_MODE_NORMAL)
- result = g_ascii_isalnum((gchar)value);
+ result = g_unichar_isalnum(value);
break;
case 'V':
if (ctx->mode == TECO_MODE_NORMAL)
- result = g_ascii_islower((gchar)value);
+ result = g_unichar_islower(value);
break;
case 'W':
if (ctx->mode == TECO_MODE_NORMAL)
- result = g_ascii_isupper((gchar)value);
+ result = g_unichar_isupper(value);
break;
default:
g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
@@ -1720,8 +1745,71 @@ teco_state_control_radix(teco_machine_main_t *ctx, GError **error)
}
}
+/*$ ^E glyphs2bytes bytes2glyphs
+ * glyphs^E -> bytes -- Translate between glyph and byte indexes
+ * bytes:^E -> glyphs
+ * ^E -> bytes
+ * :^E -> length
+ *
+ * Translates from glyph/character to byte indexes when called
+ * without a colon.
+ * Otherwise when colon-modified, translates from byte indexes
+ * back to glyph indexes.
+ * These values can differ in documents with multi-byte
+ * encodings (of which only UTF-8 is supported).
+ * It is especially useful to translate between these indexes
+ * when manually invoking Scintilla messages (\fBES\fP command), as
+ * they almost always take byte positions.
+ *
+ * When called without arguments, \fB^E\fP returns the current
+ * position (dot) in bytes.
+ * This is equivalent, but faster than \(lq.^E\(rq.
+ * \fB:^E\fP without arguments returns the length of the current
+ * document in bytes, which is equivalent but faster than \(lqZ^E\(rq.
+ *
+ * When passing in indexes outside of the document's valid area,
+ * -1 is returned, so the return value can also be interpreted
+ * as a TECO boolean, signalling truth/success for invalid indexes.
+ * This provides an elegant and effective way to validate
+ * buffer addresses.
+ */
+static void
+teco_state_control_glyphs2bytes(teco_machine_main_t *ctx, GError **error)
+{
+ teco_int_t res;
+
+ if (!teco_expressions_eval(FALSE, error))
+ return;
+
+ gboolean colon_modified = teco_machine_main_eval_colon(ctx);
+
+ if (!teco_expressions_args()) {
+ /*
+ * This is shorter than .^E or Z^E and avoids unnecessary glyph to
+ * byte index translations.
+ * On the other hand :^E is inconsistent, as it will return a byte
+ * index, instead of glyph index.
+ */
+ res = teco_interface_ssm(colon_modified ? SCI_GETLENGTH : SCI_GETCURRENTPOS, 0, 0);
+ } else {
+ teco_int_t pos;
+ if (!teco_expressions_pop_num_calc(&pos, 0, error))
+ return;
+ if (colon_modified) {
+ /* teco_interface_bytes2glyphs() does not check addresses */
+ res = 0 <= pos && pos <= teco_interface_ssm(SCI_GETLENGTH, 0, 0)
+ ? teco_interface_bytes2glyphs(pos) : -1;
+ } else {
+ /* negative values for invalid indexes are passed down. */
+ res = teco_interface_glyphs2bytes(pos);
+ }
+ }
+
+ teco_expressions_push(res);
+}
+
static teco_state_t *
-teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
static teco_machine_main_transition_t transitions[] = {
/*
@@ -1746,7 +1834,8 @@ teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error)
['C'] = {&teco_state_start, teco_state_control_exit},
['O'] = {&teco_state_start, teco_state_control_octal},
['D'] = {&teco_state_start, teco_state_control_decimal},
- ['R'] = {&teco_state_start, teco_state_control_radix}
+ ['R'] = {&teco_state_start, teco_state_control_radix},
+ ['E'] = {&teco_state_start, teco_state_control_glyphs2bytes}
};
/*
@@ -1761,7 +1850,7 @@ teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error)
TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control);
static teco_state_t *
-teco_state_ascii_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
if (ctx->mode == TECO_MODE_NORMAL)
teco_expressions_push(chr);
@@ -1797,7 +1886,7 @@ TECO_DEFINE_STATE(teco_state_ascii);
* only be seen when executing the following command.
*/
static teco_state_t *
-teco_state_escape_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_escape_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
/*$ ^[^[ ^[$ $$ terminate return
* [a1,a2,...]$$ -- Terminate command line or return from macro
@@ -1891,7 +1980,7 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_escape,
* when it comes to function key macro masking.
*/
.is_start = TRUE,
- .fnmacro_mask = TECO_FNMACRO_MASK_START
+ .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE
);
/*$ EF close
@@ -1958,6 +2047,11 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error)
* Without any argument ED returns the current flags.
*
* Currently, the following flags are used by \*(ST:
+ * - 4: If enabled, prefer raw single-byte ANSI encoding
+ * for all new buffers and registers.
+ * This does not change the encoding of any existing
+ * buffers and any initialized default register when set via
+ * \fBED\fP, so you might want to launch \*(ST with \fB--8bit\fP.
* - 8: Enable/disable automatic folding of case-insensitive
* command characters during interactive key translation.
* The case of letter keys is inverted, so one or two
@@ -1973,14 +2067,17 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error)
* of files.
* - 32: Enable/Disable buffer editing hooks
* (via execution of macro in global Q-Register \(lqED\(rq)
- * - 64: Enable/Disable function key macros
* - 128: Enable/Disable enforcement of UNIX98
* \(lq/bin/sh\(rq emulation for operating system command
* executions
- * - 256: Enable/Disable \fBxterm\fP(1) clipboard support.
- * Should only be enabled if XTerm allows the
- * \fIGetSelection\fP and \fISetSelection\fP window
- * operations.
+ * - 256: Enable/Disable OSC-52 clipboard support.
+ * Must only be enabled if the terminal emulator is configured
+ * properly.
+ * - 512: Enable/Disable Unicode icons in the Curses UI.
+ * This requires a capable font, like the ones provided
+ * by the \(lqNerd Fonts\(rq project.
+ * Changes to this flag in interactive mode may not become
+ * effective immediately.
*
* The features controlled thus are discribed in other sections
* of this manual.
@@ -2098,6 +2195,12 @@ teco_state_ecommand_flags(teco_machine_main_t *ctx, GError **error)
* on exit the author is aware of is \fBxterm\fP(1) and
* the Linux console driver.
* You have been warned. Good luck.
+ * .IP 4
+ * The column after the last horizontal movement.
+ * This is only used by \fBfnkeys.tes\fP and is similar to the Scintilla-internal
+ * setting \fBSCI_CHOOSECARETX\fP.
+ * Unless most other settings, this is on purpose not restored on rubout,
+ * so it "survives" command line replacements.
*/
static void
teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error)
@@ -2106,9 +2209,12 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error)
EJ_USER_INTERFACE = 0,
EJ_BUFFERS,
EJ_MEMORY_LIMIT,
- EJ_INIT_COLOR
+ EJ_INIT_COLOR,
+ EJ_CARETX
};
+ static teco_int_t caret_x = 0;
+
teco_int_t property;
if (!teco_expressions_eval(FALSE, error) ||
!teco_expressions_pop_num_calc(&property, teco_num_sign, error))
@@ -2144,6 +2250,10 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error)
teco_interface_init_color((guint)value, (guint32)color);
break;
+ case EJ_CARETX:
+ caret_x = value;
+ break;
+
default:
g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
"Cannot set property %" TECO_INT_FORMAT " "
@@ -2180,6 +2290,10 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error)
teco_expressions_push(teco_memory_limit);
break;
+ case EJ_CARETX:
+ teco_expressions_push(caret_x);
+ break;
+
default:
g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
"Invalid property %" TECO_INT_FORMAT " "
@@ -2292,6 +2406,252 @@ teco_state_ecommand_eol(teco_machine_main_t *ctx, GError **error)
}
}
+static const gchar *
+teco_codepage2str(guint codepage)
+{
+ /*
+ * The multi-byte charsets are excluded, since we don't
+ * support them in SciTECO, even though Scintilla has them.
+ * Contrary to the Scintilla documentation, Gtk supports
+ * most of them.
+ * Those that are supported are tested, so the codepage
+ * mapping should be definitive (although there could be
+ * similar related codepages).
+ */
+ switch (codepage) {
+ case SC_CP_UTF8: return "UTF-8";
+ case SC_CHARSET_ANSI:
+ case SC_CHARSET_DEFAULT: return "ISO-8859-1"; /* LATIN1 */
+ case SC_CHARSET_BALTIC: return "ISO-8859-13"; /* LATIN7 */
+ //case SC_CHARSET_CHINESEBIG5: return "BIG5";
+ case SC_CHARSET_EASTEUROPE: return "ISO-8859-2"; /* LATIN2 */
+ //case SC_CHARSET_GB2312: return "GB2312";
+ case SC_CHARSET_GREEK: return "ISO-8859-7"; // CP1253???
+ //case SC_CHARSET_HANGUL: return "UHC";
+ /* unsure whether this is supported on Gtk */
+ case SC_CHARSET_MAC: return "MAC";
+ /* not supported by Gtk */
+ case SC_CHARSET_OEM: return "CP437";
+ /*
+ * Apparently, this can be CP1251 on the native Windows
+ * port of Scintilla.
+ */
+ case SC_CHARSET_RUSSIAN: return "KOI8-R";
+ case SC_CHARSET_OEM866: return "CP866";
+ case SC_CHARSET_CYRILLIC: return "CP1251";
+ //case SC_CHARSET_SHIFTJIS: return "SHIFT-JIS";
+ //case SC_CHARSET_SYMBOL:
+ case SC_CHARSET_TURKISH: return "ISO-8859-9"; /* LATIN5 */
+ //case SC_CHARSET_JOHAB: return "JOHAB";
+ case SC_CHARSET_HEBREW: return "ISO-8859-8"; // CP1255?
+ /*
+ * FIXME: Some arabic codepage is supported by Gtk,
+ * but I am not sure which.
+ */
+ case SC_CHARSET_ARABIC: return "ISO-8859-6"; // CP720, CP1256???
+ /* apparently not supported by Gtk */
+ case SC_CHARSET_VIETNAMESE: return "CP1258";
+ case SC_CHARSET_THAI: return "ISO-8859-11";
+ case SC_CHARSET_8859_15: return "ISO-8859-15"; /* LATIN9 */
+ }
+
+ return NULL;
+}
+
+/*$ EE encoding codepage charset
+ * codepageEE -- Edit current document's encoding (codepage/charset)
+ * EE -> codepage
+ * codepage:EE
+ * :EE -> codepage
+ *
+ * When called with an argument, it sets the current codepage,
+ * otherwise returns it.
+ * The following codepages are supported:
+ * - 0: ANSI (raw bytes)
+ * - 1: ISO-8859-1 (latin1)
+ * - 77: Macintosh Latin encoding
+ * - 161: ISO-8859-7
+ * - 162: ISO-8859-9 (latin5)
+ * - 163: CP1258
+ * - 177: ISO-8859-8
+ * - 178: ISO-8859-6
+ * - 186: ISO-8859-13 (latin7)
+ * - 204: KOI8-R
+ * - 222: ISO-8859-11
+ * - 238: ISO-8859-2 (latin2)
+ * - 255: CP437
+ * - 866: CP866
+ * - 1000: ISO-8859-15 (latin9)
+ * - 1251: CP1251
+ * - 65001: UTF-8
+ *
+ * Displaying characters in the single-byte (non-UTF-8) codepages might
+ * be supported only with the Gtk UI.
+ * At least 77, 178, 163 and 255 are not displayed correctly on Gtk.
+ * 65001 (UTF-8) is the default for new buffers.
+ * 0 (ANSI) should be used when working with raw bytes,
+ * but is currently displayed like ISO-8859-1 (latin1).
+ *
+ * \fBEE\fP does not change the buffer contents itself by default, only
+ * how it is displayed and how \*(ST interacts with it.
+ * This allows fixing up the codepage if it is not in the default UTF-8
+ * or if codepage guessing failed.
+ *
+ * When colon-modified the \fB:EE\fP command will also additionally convert
+ * the current buffer contents into the new code page, preserving the
+ * current position (dot).
+ * This will fail if the conversion would be lossy.
+ * Conversions from and to UTF-8 \fIshould\fP always be successful.
+ */
+static void
+teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error)
+{
+ if (!teco_expressions_eval(FALSE, error))
+ return;
+
+ gboolean colon_modified = teco_machine_main_eval_colon(ctx);
+
+ guint old_cp = teco_interface_get_codepage();
+
+ if (!teco_expressions_args()) {
+ /* get current code page */
+ teco_expressions_push(old_cp);
+ return;
+ }
+
+ /*
+ * Set code page
+ */
+ teco_int_t new_cp;
+ if (!teco_expressions_pop_num_calc(&new_cp, 0, error))
+ return;
+
+ if (old_cp == SC_CP_UTF8 && new_cp == SC_CP_UTF8)
+ return;
+
+ if (teco_current_doc_must_undo() && teco_undo_enabled) {
+ if (old_cp == SC_CP_UTF8) { /* new_cp != SC_CP_UTF8 */
+ undo__teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+ undo__teco_interface_ssm(SCI_SETCODEPAGE, SC_CP_UTF8, 0);
+ } else {
+ undo__teco_interface_ssm(SCI_SETCODEPAGE, 0, 0);
+ for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
+ undo__teco_interface_ssm(SCI_STYLESETCHARACTERSET, style, old_cp);
+ /*
+ * The index is internally reference-counted and could underflow,
+ * so don't do it more than necessary.
+ */
+ if (new_cp == SC_CP_UTF8)
+ undo__teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+ }
+ }
+
+ teco_int_t dot_glyphs;
+ if (colon_modified) {
+ sptr_t dot_bytes = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ dot_glyphs = teco_interface_bytes2glyphs(dot_bytes);
+
+ /*
+ * Convert buffer to new codepage.
+ *
+ * FIXME: Could be optimized slightly by converting first
+ * before the gap, inserting the converted text and then
+ * converting after the gap.
+ */
+ const gchar *to_codepage = teco_codepage2str(new_cp);
+ const gchar *from_codepage = teco_codepage2str(old_cp);
+ if (!to_codepage || !from_codepage) {
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ "Unknown or unsupported codepage/charset");
+ return;
+ }
+
+ const gchar *buf = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0);
+ gsize len = teco_interface_ssm(SCI_GETLENGTH, 0, 0);
+ g_autofree gchar *converted;
+ gsize converted_len;
+
+ /*
+ * This fails if there is no direct translation.
+ * If we'd use g_convert_with_fallback(), it would be tricky to choose
+ * fallback characters that will always work.
+ */
+ converted = g_convert(buf, len, to_codepage, from_codepage,
+ NULL, &converted_len, error);
+ if (!converted)
+ return;
+
+ teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
+ teco_interface_ssm(SCI_CLEARALL, 0, 0);
+ teco_interface_ssm(SCI_APPENDTEXT, converted_len, (sptr_t)converted);
+ teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
+ teco_ring_dirtify();
+
+ if (teco_current_doc_must_undo()) {
+ undo__teco_interface_ssm(SCI_GOTOPOS, dot_bytes, 0);
+ undo__teco_interface_ssm(SCI_UNDO, 0, 0);
+ }
+ }
+
+ if (new_cp == SC_CP_UTF8) {
+ teco_interface_ssm(SCI_SETCODEPAGE, SC_CP_UTF8, 0);
+ /*
+ * UTF-8 documents strictly require the line character index.
+ * See teco_view_glyphs2bytes() and teco_view_bytes2glyphs().
+ */
+ g_assert(!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0)
+ & SC_LINECHARACTERINDEX_UTF32));
+ teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+ } else {
+ /*
+ * The index is NOT released automatically when setting the codepage.
+ * But it is internally reference-counted and could underflow,
+ * so don't do it more than necessary.
+ */
+ if (old_cp == SC_CP_UTF8) {
+ teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+ g_assert(!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0)
+ & SC_LINECHARACTERINDEX_UTF32));
+ }
+
+ /*
+ * Configure a single-byte codepage/charset.
+ * This requires setting it on all of the possible styles.
+ * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles.
+ * This is important only for display purposes - other than that
+ * all single-byte encodings are handled the same.
+ *
+ * FIXME: Should we avoid this if new_cp == 0?
+ * It will be used for raw byte handling mostly.
+ */
+ if (teco_current_doc_must_undo()) {
+ /*
+ * There is a chance the user will see this buffer even if we
+ * are currently in batch mode.
+ */
+ for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
+ teco_interface_ssm(SCI_STYLESETCHARACTERSET, style, new_cp);
+ } else {
+ /* we must still set it, so that <EE> retrieval works */
+ teco_interface_ssm(SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, new_cp);
+ }
+ /* 0 is used for ALL single-byte encodings */
+ teco_interface_ssm(SCI_SETCODEPAGE, 0, 0);
+ }
+
+ if (colon_modified)
+ /*
+ * Only now, it will be safe to recalculate dot in the new encoding.
+ * If the new codepage is UTF-8, the line character index will be
+ * ready only now.
+ */
+ teco_interface_ssm(SCI_GOTOPOS, teco_interface_glyphs2bytes(dot_glyphs), 0);
+}
+
/*$ EX exit
* [bool]EX -- Exit program
* -EX
@@ -2352,7 +2712,7 @@ teco_state_ecommand_exit(teco_machine_main_t *ctx, GError **error)
}
static teco_state_t *
-teco_state_ecommand_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
static teco_machine_main_transition_t transitions[] = {
/*
@@ -2377,6 +2737,7 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gchar chr, GError **error)
['D'] = {&teco_state_start, teco_state_ecommand_flags},
['J'] = {&teco_state_start, teco_state_ecommand_properties},
['L'] = {&teco_state_start, teco_state_ecommand_eol},
+ ['E'] = {&teco_state_start, teco_state_ecommand_encoding},
['X'] = {&teco_state_start, teco_state_ecommand_exit}
};
@@ -2395,26 +2756,61 @@ teco_state_insert_initial(teco_machine_main_t *ctx, GError **error)
if (ctx->mode > TECO_MODE_NORMAL)
return TRUE;
+ /*
+ * Current document's encoding determines the behaviour of
+ * string building constructs.
+ */
+ teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine,
+ teco_interface_get_codepage());
+
if (!teco_expressions_eval(FALSE, error))
return FALSE;
guint args = teco_expressions_args();
if (!args)
return TRUE;
- teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
- for (int i = args; i > 0; i--) {
- gchar chr = (gchar)teco_expressions_peek_num(i-1);
- teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&chr);
+ if (teco_interface_ssm(SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) {
+ /* detect possible errors before introducing side effects */
+ for (gint i = args; i > 0; i--) {
+ teco_int_t chr = teco_expressions_peek_num(i-1);
+ if (chr < 0 || !g_unichar_validate(chr)) {
+ teco_error_codepoint_set(error, "I");
+ return FALSE;
+ }
+ }
+ teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
+ for (gint i = args; i > 0; i--) {
+ /* 4 bytes should be enough, but we better follow the documentation */
+ gchar buf[6];
+ gsize len = g_unichar_to_utf8(teco_expressions_peek_num(i-1), buf);
+ teco_interface_ssm(SCI_ADDTEXT, len, (sptr_t)buf);
+ }
+ } else {
+ /* everything else is a single-byte encoding */
+ for (gint i = args; i > 0; i--) {
+ teco_int_t chr = teco_expressions_peek_num(i-1);
+ if (chr < 0 || chr > 0xFF) {
+ teco_error_codepoint_set(error, "I");
+ return FALSE;
+ }
+ }
+ teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
+ for (gint i = args; i > 0; i--) {
+ gchar chr = (gchar)teco_expressions_peek_num(i-1);
+ teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&chr);
+ }
}
- for (int i = args; i > 0; i--)
- if (!teco_expressions_pop_num_calc(NULL, 0, error))
- return FALSE;
teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
teco_ring_dirtify();
if (teco_current_doc_must_undo())
undo__teco_interface_ssm(SCI_UNDO, 0, 0);
+ /* This is done only now because it can _theoretically_ fail. */
+ for (gint i = args; i > 0; i--)
+ if (!teco_expressions_pop_num_calc(NULL, 0, error))
+ return FALSE;
+
return TRUE;
}
@@ -2451,8 +2847,8 @@ teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t *str,
* Secondly, the command inserts <text>.
* In interactive mode, <text> is inserted interactively.
*
- * String building characters are \fBenabled\fP for the
- * I command.
+ * Unlike in classic TECO dialects, string building characters are
+ * \fBenabled\fP for the \fBI\fP command.
* When editing \*(ST macros, using the \fBEI\fP command
* may be better, since it has string building characters
* disabled.
@@ -2491,10 +2887,9 @@ teco_state_insert_indent_initial(teco_machine_main_t *ctx, GError **error)
len -= teco_interface_ssm(SCI_GETCOLUMN,
teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0), 0) % len;
- gchar spaces[len];
-
- memset(spaces, ' ', sizeof(spaces));
- teco_interface_ssm(SCI_ADDTEXT, sizeof(spaces), (sptr_t)spaces);
+ gchar space = ' ';
+ while (len-- > 0)
+ teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&space);
}
teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
teco_ring_dirtify();
diff --git a/src/core-commands.h b/src/core-commands.h
index 6efc5a3..e30770d 100644
--- a/src/core-commands.h
+++ b/src/core-commands.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -43,7 +43,7 @@ gboolean teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t
gsize new_chars, GError **error);
/* in cmdline.c */
-gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
/**
* @class TECO_DEFINE_STATE_INSERT
diff --git a/src/doc.c b/src/doc.c
index a69896c..a1ebe2c 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -30,17 +30,49 @@
#include "doc.h"
static inline teco_doc_scintilla_t *
+teco_doc_scintilla_ref(teco_doc_scintilla_t *doc)
+{
+ if (doc)
+ teco_view_ssm(teco_qreg_view, SCI_ADDREFDOCUMENT, 0, (sptr_t)doc);
+ return doc;
+}
+
+static inline void
+teco_doc_scintilla_release(teco_doc_scintilla_t *doc)
+{
+ if (doc)
+ teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)doc);
+}
+
+TECO_DEFINE_UNDO_OBJECT(doc_scintilla, teco_doc_scintilla_t *,
+ teco_doc_scintilla_ref, teco_doc_scintilla_release);
+
+static inline teco_doc_scintilla_t *
teco_doc_get_scintilla(teco_doc_t *ctx)
{
+ /*
+ * FIXME: Perhaps we should always specify SC_DOCUMENTOPTION_TEXT_LARGE?
+ * SC_DOCUMENTOPTION_STYLES_NONE is unfortunately also not safe to set
+ * always as the Q-Reg might well be used for styling even in batch mode.
+ */
if (G_UNLIKELY(!ctx->doc))
ctx->doc = (teco_doc_scintilla_t *)teco_view_ssm(teco_qreg_view, SCI_CREATEDOCUMENT, 0, 0);
return ctx->doc;
}
-/** @memberof teco_doc_t */
+/**
+ * Edit the given document in the Q-Register view.
+ *
+ * @param ctx The document to edit.
+ * @param default_cp The codepage to configure if the document is new.
+ *
+ * @memberof teco_doc_t
+ */
void
-teco_doc_edit(teco_doc_t *ctx)
+teco_doc_edit(teco_doc_t *ctx, guint default_cp)
{
+ gboolean new_doc = ctx->doc == NULL;
+
teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0,
(sptr_t)teco_doc_get_scintilla(ctx));
teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0);
@@ -48,11 +80,39 @@ teco_doc_edit(teco_doc_t *ctx)
teco_view_ssm(teco_qreg_view, SCI_SETSEL, ctx->anchor, (sptr_t)ctx->dot);
/*
- * NOTE: Thanks to a custom Scintilla patch, se representations
+ * NOTE: Thanks to a custom Scintilla patch, representations
* do not get reset after SCI_SETDOCPOINTER, so they have to be
* initialized only once.
*/
//teco_view_set_representations(teco_qreg_view);
+
+ if (new_doc && default_cp != SC_CP_UTF8) {
+ /*
+ * There is a chance the user will see this buffer even if we
+ * are currently in batch mode.
+ */
+ for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
+ teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET,
+ style, default_cp);
+ /* 0 is used for ALL single-byte encodings */
+ teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0);
+ } else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
+ & SC_LINECHARACTERINDEX_UTF32)) {
+ /*
+ * All UTF-8 documents are expected to have a character index.
+ * This allocates nothing if the document is not UTF-8.
+ * But it is reference counted, so it must not be allocated
+ * more than once.
+ *
+ * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER
+ * (although I don't know why and where).
+ * Recalculating it could be inefficient.
+ * The index is reference-counted. Perhaps we could just allocate
+ * one more time, so it doesn't get freed when changing documents.
+ */
+ teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+ }
}
/** @memberof teco_doc_t */
@@ -68,26 +128,26 @@ teco_doc_undo_edit(teco_doc_t *ctx)
undo__teco_view_ssm(teco_qreg_view, SCI_SETXOFFSET, ctx->xoffset, 0);
undo__teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0);
undo__teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0,
- (sptr_t)teco_doc_get_scintilla(ctx));
+ (sptr_t)teco_doc_get_scintilla(ctx));
}
/** @memberof teco_doc_t */
void
-teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len)
+teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage)
{
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
+ teco_doc_scintilla_release(ctx->doc);
+ ctx->doc = NULL;
+
teco_doc_reset(ctx);
- teco_doc_edit(ctx);
+ teco_doc_edit(ctx, codepage);
- teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
- teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0);
teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : ""));
- teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
}
/** @memberof teco_doc_t */
@@ -100,13 +160,13 @@ teco_doc_undo_set_string(teco_doc_t *ctx)
*/
teco_doc_update(ctx, teco_qreg_view);
- if (teco_qreg_current && teco_qreg_current->must_undo) // FIXME
+ if (teco_qreg_current && teco_qreg_current->must_undo && // FIXME
+ ctx == &teco_qreg_current->string)
+ /* load old document into view */
teco_doc_undo_edit(&teco_qreg_current->string);
teco_doc_undo_reset(ctx);
- undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0);
-
- teco_doc_undo_edit(ctx);
+ teco_undo_object_doc_scintilla_push(&ctx->doc);
}
/**
@@ -117,33 +177,42 @@ teco_doc_undo_set_string(teco_doc_t *ctx)
* It can be NULL if you are interested only in the string's length.
* Strings must be freed via g_free().
* @param len Where to store the string's length (mandatory).
+ * @param codepage Where to store the document's codepage or NULL
+ * if that information is not necessary.
*
* @see teco_qreg_vtable_t::get_string()
* @memberof teco_doc_t
*/
void
-teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len)
+teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage)
{
if (!ctx->doc) {
if (str)
*str = NULL;
- *len = 0;
+ if (outlen)
+ *outlen = 0;
+ if (codepage)
+ *codepage = teco_default_codepage();
return;
}
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(ctx);
+ teco_doc_edit(ctx, teco_default_codepage());
- *len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
+ gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
if (str) {
- *str = g_malloc(*len + 1);
- teco_view_ssm(teco_qreg_view, SCI_GETTEXT, *len + 1, (sptr_t)*str);
+ *str = g_malloc(len + 1);
+ teco_view_ssm(teco_qreg_view, SCI_GETTEXT, len + 1, (sptr_t)*str);
}
+ if (outlen)
+ *outlen = len;
+ if (codepage)
+ *codepage = teco_view_get_codepage(teco_qreg_view);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
}
/** @memberof teco_doc_t */
@@ -185,6 +254,5 @@ teco_doc_exchange(teco_doc_t *ctx, teco_doc_t *other)
void
teco_doc_clear(teco_doc_t *ctx)
{
- if (ctx->doc)
- teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)ctx->doc);
+ teco_doc_scintilla_release(ctx->doc);
}
diff --git a/src/doc.h b/src/doc.h
index 91663d4..1218c35 100644
--- a/src/doc.h
+++ b/src/doc.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -42,7 +42,7 @@ typedef struct teco_doc_scintilla_t teco_doc_scintilla_t;
typedef struct {
/**
* Underlying Scintilla document.
- * It is created on demand in teco_doc_maybe_create_document(),
+ * It is created on demand in teco_doc_get_scintilla(),
* so that we don't waste memory on integer-only Q-Registers.
*/
teco_doc_scintilla_t *doc;
@@ -62,13 +62,13 @@ teco_doc_init(teco_doc_t *ctx)
memset(ctx, 0, sizeof(*ctx));
}
-void teco_doc_edit(teco_doc_t *ctx);
+void teco_doc_edit(teco_doc_t *ctx, guint default_cp);
void teco_doc_undo_edit(teco_doc_t *ctx);
-void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len);
+void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage);
void teco_doc_undo_set_string(teco_doc_t *ctx);
-void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len);
+void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len, guint *codepage);
void teco_doc_update_from_view(teco_doc_t *ctx, teco_view_t *from);
void teco_doc_update_from_doc(teco_doc_t *ctx, const teco_doc_t *from);
diff --git a/src/eol.c b/src/eol.c
index 8a6c0a3..0063bbd 100644
--- a/src/eol.c
+++ b/src/eol.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/eol.h b/src/eol.h
index 2b426a5..26418e5 100644
--- a/src/eol.h
+++ b/src/eol.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/error.c b/src/error.c
index 7c4e151..afa2ac1 100644
--- a/src/error.c
+++ b/src/error.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -37,13 +37,6 @@ guint teco_error_return_args = 0;
*/
guint teco_error_pos = 0, teco_error_line = 0, teco_error_column = 0;
-void
-teco_error_set_coord(const gchar *str, guint pos)
-{
- teco_error_pos = pos;
- teco_string_get_coord(str, pos, &teco_error_line, &teco_error_column);
-}
-
typedef enum {
TECO_FRAME_QREG,
TECO_FRAME_FILE,
@@ -161,10 +154,7 @@ teco_error_add_frame_toplevel(void)
teco_error_add_frame(TECO_FRAME_TOPLEVEL, 0);
}
-#ifndef NDEBUG
-__attribute__((destructor))
-#endif
-void
+void TECO_DEBUG_CLEANUP
teco_error_clear_frames(void)
{
teco_stailq_entry_t *entry;
diff --git a/src/error.h b/src/error.h
index 91d2b60..469d957 100644
--- a/src/error.h
+++ b/src/error.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -40,13 +40,16 @@ typedef enum {
*/
TECO_ERROR_SYNTAX,
TECO_ERROR_ARGEXPECTED,
+ TECO_ERROR_CODEPOINT,
TECO_ERROR_MOVE,
TECO_ERROR_WORDS,
TECO_ERROR_RANGE,
TECO_ERROR_INVALIDQREG,
TECO_ERROR_QREGOPUNSUPPORTED,
TECO_ERROR_QREGCONTAINSNULL,
+ TECO_ERROR_EDITINGLOCALQREG,
TECO_ERROR_MEMLIMIT,
+ TECO_ERROR_CLIPBOARD,
/** Interrupt current operation */
TECO_ERROR_INTERRUPTED,
@@ -60,10 +63,12 @@ typedef enum {
} teco_error_t;
static inline void
-teco_error_syntax_set(GError **error, gchar chr)
+teco_error_syntax_set(GError **error, gunichar chr)
{
+ gchar buf[6];
+ g_autofree gchar *chr_printable = teco_string_echo(buf, g_unichar_to_utf8(chr, buf));
g_set_error(error, TECO_ERROR, TECO_ERROR_SYNTAX,
- "Syntax error \"%c\" (%d)", chr, chr);
+ "Syntax error \"%s\" (U+%04" G_GINT32_MODIFIER "X)", chr_printable, chr);
}
static inline void
@@ -74,6 +79,13 @@ teco_error_argexpected_set(GError **error, const gchar *cmd)
}
static inline void
+teco_error_codepoint_set(GError **error, const gchar *cmd)
+{
+ g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Invalid Unicode codepoint for <%s>", cmd);
+}
+
+static inline void
teco_error_move_set(GError **error, const gchar *cmd)
{
g_set_error(error, TECO_ERROR, TECO_ERROR_MOVE,
@@ -119,6 +131,14 @@ teco_error_qregcontainsnull_set(GError **error, const gchar *name, gsize len, gb
}
static inline void
+teco_error_editinglocalqreg_set(GError **error, const gchar *name, gsize len)
+{
+ g_autofree gchar *name_printable = teco_string_echo(name, len);
+ g_set_error(error, TECO_ERROR, TECO_ERROR_EDITINGLOCALQREG,
+ "Editing local Q-Register \"%s\" at end of macro call", name_printable);
+}
+
+static inline void
teco_error_interrupted_set(GError **error)
{
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_INTERRUPTED, "Interrupted");
@@ -135,7 +155,11 @@ teco_error_return_set(GError **error, guint args)
extern guint teco_error_pos, teco_error_line, teco_error_column;
-void teco_error_set_coord(const gchar *str, guint pos);
+static inline void
+teco_error_set_coord(const gchar *str, gsize pos)
+{
+ teco_string_get_coord(str, pos, &teco_error_pos, &teco_error_line, &teco_error_column);
+}
void teco_error_display_short(const GError *error);
void teco_error_display_full(const GError *error);
diff --git a/src/expressions.c b/src/expressions.c
index 57e2f71..ee6b4dc 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -86,7 +86,7 @@ teco_int_t
teco_expressions_pop_num(guint index)
{
teco_int_t n = 0;
- teco_operator_t op = teco_expressions_pop_op(0);
+ G_GNUC_UNUSED teco_operator_t op = teco_expressions_pop_op(0);
g_assert(op == TECO_OP_NUMBER);
@@ -114,11 +114,12 @@ teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error)
}
void
-teco_expressions_add_digit(gchar digit)
+teco_expressions_add_digit(gunichar digit)
{
teco_int_t n = teco_expressions_args() > 0 ? teco_expressions_pop_num(0) : 0;
- teco_expressions_push(n*teco_radix + (n < 0 ? -1 : 1)*(digit - '0'));
+ /* use g_unichar_digit_value()? */
+ teco_expressions_push(n*teco_radix + (n < 0 ? -1 : 1)*((gint)digit - '0'));
}
void
@@ -184,7 +185,28 @@ teco_expressions_calc(GError **error)
switch (op) {
case TECO_OP_POW:
- for (result = 1; vright--; result *= vleft);
+ if (!vright) {
+ result = vleft < 0 ? -1 : 1;
+ break;
+ }
+ if (vright < 0) {
+ if (!vleft) {
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ "Negative power of 0 is not defined");
+ return FALSE;
+ }
+ result = ABS(vleft) == 1 ? vleft : 0;
+ break;
+ }
+ result = 1;
+ for (;;) {
+ if (vright & 1)
+ result *= vleft;
+ vright >>= 1;
+ if (!vright)
+ break;
+ vleft *= vleft;
+ }
break;
case TECO_OP_MUL:
result = vleft * vright;
@@ -297,6 +319,9 @@ guint teco_brace_level = 0;
void
teco_expressions_brace_open(void)
{
+ while (teco_operators->len > 0 && teco_expressions_peek_op(0) == TECO_OP_NEW)
+ teco_expressions_pop_op(0);
+
teco_expressions_push_op(TECO_OP_BRACE);
teco_undo_guint(teco_brace_level)++;
}
@@ -374,11 +399,9 @@ teco_expressions_format(gchar *buffer, teco_int_t number)
return p;
}
-#ifndef NDEBUG
-static void __attribute__((destructor))
+static void TECO_DEBUG_CLEANUP
teco_expressions_cleanup(void)
{
g_array_free(teco_numbers, TRUE);
g_array_free(teco_operators, TRUE);
}
-#endif
diff --git a/src/expressions.h b/src/expressions.h
index 45e6f64..68d8ddb 100644
--- a/src/expressions.h
+++ b/src/expressions.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -123,7 +123,7 @@ teco_int_t teco_expressions_peek_num(guint index);
teco_int_t teco_expressions_pop_num(guint index);
gboolean teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error);
-void teco_expressions_add_digit(gchar digit);
+void teco_expressions_add_digit(gunichar digit);
void teco_expressions_push_op(teco_operator_t op);
gboolean teco_expressions_push_calc(teco_operator_t op, GError **error);
diff --git a/src/file-utils.c b/src/file-utils.c
index 239cc5f..3f8f721 100644
--- a/src/file-utils.c
+++ b/src/file-utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -28,6 +28,7 @@
#ifdef HAVE_WINDOWS_H
#define WIN32_LEAN_AND_MEAN
+#define UNICODE
#include <windows.h>
#endif
@@ -36,7 +37,6 @@
#include "sciteco.h"
#include "qreg.h"
-#include "glob.h"
#include "interface.h"
#include "string-utils.h"
#include "file-utils.h"
@@ -56,26 +56,35 @@ G_STATIC_ASSERT(INVALID_FILE_ATTRIBUTES == TECO_FILE_INVALID_ATTRIBUTES);
teco_file_attributes_t
teco_file_get_attributes(const gchar *filename)
{
- return GetFileAttributes((LPCTSTR)filename);
+ g_autofree gunichar2 *filename_utf16 = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL);
+ return filename_utf16 ? GetFileAttributesW(filename_utf16)
+ : TECO_FILE_INVALID_ATTRIBUTES;
}
void
teco_file_set_attributes(const gchar *filename, teco_file_attributes_t attrs)
{
- SetFileAttributes((LPCTSTR)filename, attrs);
+ g_autofree gunichar2 *filename_utf16 = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL);
+ if (filename_utf16)
+ SetFileAttributesW(filename_utf16, attrs);
}
gchar *
teco_file_get_absolute_path(const gchar *path)
{
+ if (!path)
+ return NULL;
+ g_autofree gunichar2 *path_utf16 = g_utf8_to_utf16(path, -1, NULL, NULL, NULL);
TCHAR buf[MAX_PATH];
- return path && GetFullPathName(path, sizeof(buf), buf, NULL) ? g_strdup(buf) : NULL;
+ return path_utf16 && GetFullPathNameW(path_utf16, G_N_ELEMENTS(buf), buf, NULL)
+ ? g_utf16_to_utf8(buf, -1, NULL, NULL, NULL) : NULL;
}
gboolean
teco_file_is_visible(const gchar *path)
{
- return !(GetFileAttributes((LPCTSTR)path) & FILE_ATTRIBUTE_HIDDEN);
+ g_autofree gunichar2 *path_utf16 = g_utf8_to_utf16(path, -1, NULL, NULL, NULL);
+ return path_utf16 && !(GetFileAttributesW(path_utf16) & FILE_ATTRIBUTE_HIDDEN);
}
#else /* !G_OS_WIN32 */
@@ -83,7 +92,7 @@ teco_file_is_visible(const gchar *path)
teco_file_attributes_t
teco_file_get_attributes(const gchar *filename)
{
- struct stat buf;
+ GStatBuf buf;
return g_stat(filename, &buf) ? TECO_FILE_INVALID_ATTRIBUTES : buf.st_mode;
}
@@ -204,7 +213,7 @@ teco_file_expand_path(const gchar *path)
* but it may have been changed later on.
*/
g_auto(teco_string_t) home = {NULL, 0};
- if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL) ||
+ if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, NULL) ||
teco_string_contains(&home, '\0'))
return g_strdup(path);
g_assert(home.data != NULL);
@@ -227,9 +236,6 @@ teco_file_auto_complete(const gchar *filename, GFileTest file_test, teco_string_
{
memset(insert, 0, sizeof(*insert));
- if (teco_globber_is_pattern(filename))
- return FALSE;
-
g_autofree gchar *filename_expanded = teco_file_expand_path(filename);
gsize filename_len = strlen(filename_expanded);
diff --git a/src/file-utils.h b/src/file-utils.h
index 51b0d18..4ee59e6 100644
--- a/src/file-utils.h
+++ b/src/file-utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/glob.c b/src/glob.c
index 889858e..0374d7c 100644
--- a/src/glob.c
+++ b/src/glob.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -225,17 +225,19 @@ teco_globber_compile_pattern(const gchar *pattern)
*pout++ = '[';
break;
}
+ /* fall through: escape PCRE metacharacters */
+ case '\\':
+ case '^':
+ case '$':
+ case '.':
+ case '|':
+ case '(':
+ case ')':
+ case '+':
+ case '{':
+ *pout++ = '\\';
/* fall through */
default:
- /*
- * For simplicity, all non-alphanumeric
- * characters are escaped since they could
- * be PCRE magic characters.
- * g_regex_escape_string() is inefficient.
- * character anyway.
- */
- if (!g_ascii_isalnum(*pattern))
- *pout++ = '\\';
*pout++ = *pattern;
break;
}
@@ -271,12 +273,13 @@ teco_globber_compile_pattern(const gchar *pattern)
*pout++ = ']';
break;
}
- /* fall through */
- default:
- if (!g_ascii_isalnum(*pattern))
- *pout++ = '\\';
+ /* fall through: escape PCRE metacharacters */
+ case '\\':
+ case '[':
+ *pout++ = '\\';
/* fall through */
case '-':
+ default:
state = STATE_CLASS;
*pout++ = *pattern;
break;
@@ -315,7 +318,8 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str,
teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1);
g_assert(glob_reg != NULL);
if (!glob_reg->vtable->undo_set_string(glob_reg, error) ||
- !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error))
+ !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename),
+ teco_default_codepage(), error))
return NULL;
}
@@ -448,7 +452,7 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str,
* when they should be in a register, the user will
* have to edit that register anyway.
*/
-TECO_DEFINE_STATE_EXPECTFILE(teco_state_glob_pattern,
+TECO_DEFINE_STATE_EXPECTGLOB(teco_state_glob_pattern,
.expectstring.last = FALSE
);
@@ -490,7 +494,8 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str
teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1);
g_assert(glob_reg != NULL);
g_auto(teco_string_t) pattern_str = {NULL, 0};
- if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len, error))
+ if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len,
+ NULL, error))
return NULL;
if (teco_string_contains(&pattern_str, '\0')) {
teco_error_qregcontainsnull_set(error, "_", 1, FALSE);
diff --git a/src/glob.h b/src/glob.h
index f000a15..8f03d38 100644
--- a/src/glob.h
+++ b/src/glob.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -46,6 +46,21 @@ teco_globber_is_pattern(const gchar *str)
gchar *teco_globber_escape_pattern(const gchar *pattern);
GRegex *teco_globber_compile_pattern(const gchar *pattern);
+/* in cmdline.c */
+gboolean teco_state_expectglob_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
+
+/**
+ * @interface TECO_DEFINE_STATE_EXPECTGLOB
+ * @implements TECO_DEFINE_STATE_EXPECTFILE
+ * @ingroup states
+ */
+#define TECO_DEFINE_STATE_EXPECTGLOB(NAME, ...) \
+ TECO_DEFINE_STATE_EXPECTFILE(NAME, \
+ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \
+ teco_state_expectglob_process_edit_cmd, \
+ ##__VA_ARGS__ \
+ )
+
/*
* Command states
*/
diff --git a/src/goto-commands.c b/src/goto-commands.c
index e4cd868..a8a9689 100644
--- a/src/goto-commands.c
+++ b/src/goto-commands.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -53,7 +53,7 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error)
* I'm unsure whether !-signs should be allowed within comments.
*/
static teco_state_t *
-teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
if (chr == '!') {
/*
@@ -61,8 +61,8 @@ teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error)
* on rubout.
* Otherwise, the label will be removed (PC == -1).
*/
- gint existing_pc = teco_goto_table_set(&ctx->goto_table, ctx->goto_label.data,
- ctx->goto_label.len, ctx->macro_pc);
+ gssize existing_pc = teco_goto_table_set(&ctx->goto_table, ctx->goto_label.data,
+ ctx->goto_label.len, ctx->macro_pc);
if (ctx->parent.must_undo)
teco_goto_table_undo_set(&ctx->goto_table, ctx->goto_label.data, ctx->goto_label.len, existing_pc);
@@ -85,7 +85,7 @@ teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error)
if (ctx->parent.must_undo)
undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len);
- teco_string_append_c(&ctx->goto_label, chr);
+ teco_string_append_wc(&ctx->goto_label, chr);
return &teco_state_label;
}
@@ -119,7 +119,7 @@ teco_state_goto_done(teco_machine_main_t *ctx, const teco_string_t *str, GError
}
if (value == 0) {
- gint pc = teco_goto_table_find(&ctx->goto_table, label.data, label.len);
+ gssize pc = teco_goto_table_find(&ctx->goto_table, label.data, label.len);
if (pc >= 0) {
ctx->macro_pc = pc;
@@ -138,7 +138,7 @@ teco_state_goto_done(teco_machine_main_t *ctx, const teco_string_t *str, GError
}
/* in cmdline.c */
-gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
/*$ O
* Olabel$ -- Go to label
diff --git a/src/goto-commands.h b/src/goto-commands.h
index ffd9527..03773c0 100644
--- a/src/goto-commands.h
+++ b/src/goto-commands.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/goto.c b/src/goto.c
index c8b5808..65ee3ca 100644
--- a/src/goto.c
+++ b/src/goto.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -35,12 +35,12 @@
/** @extends teco_rb3str_head_t */
typedef struct {
teco_rb3str_head_t head;
- gint pc;
+ gsize pc;
} teco_goto_label_t;
/** @private @static @memberof teco_goto_label_t */
static teco_goto_label_t *
-teco_goto_label_new(const gchar *name, gsize len, gint pc)
+teco_goto_label_new(const gchar *name, gsize len, gsize pc)
{
teco_goto_label_t *label = g_new0(teco_goto_label_t, 1);
teco_string_init(&label->head.name, name, len);
@@ -79,10 +79,10 @@ teco_goto_table_dump(teco_goto_table_t *ctx)
#endif
/** @memberof teco_goto_table_t */
-gint
+gssize
teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len)
{
- gint existing_pc = -1;
+ gssize existing_pc = -1;
teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len);
if (label) {
@@ -95,7 +95,7 @@ teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len)
}
/** @memberof teco_goto_table_t */
-gint
+gssize
teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len)
{
teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len);
@@ -103,13 +103,13 @@ teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len)
}
/** @memberof teco_goto_table_t */
-gint
-teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc)
+gssize
+teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc)
{
if (pc < 0)
return teco_goto_table_remove(ctx, name, len);
- gint existing_pc = -1;
+ gssize existing_pc = -1;
teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len);
if (label) {
@@ -135,7 +135,7 @@ teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint p
*/
typedef struct {
teco_goto_table_t *table;
- gint pc;
+ gssize pc;
gsize len;
gchar name[];
} teco_goto_table_undo_set_t;
@@ -153,7 +153,7 @@ teco_goto_table_undo_set_action(teco_goto_table_undo_set_t *ctx, gboolean run)
/** @memberof teco_goto_table_t */
void
-teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc)
+teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc)
{
if (!ctx->must_undo)
return;
diff --git a/src/goto.h b/src/goto.h
index eadd341..01f55ac 100644
--- a/src/goto.h
+++ b/src/goto.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -40,12 +40,12 @@ teco_goto_table_init(teco_goto_table_t *ctx, gboolean must_undo)
ctx->must_undo = must_undo;
}
-gint teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len);
+gssize teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len);
-gint teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len);
+gssize teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len);
-gint teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc);
-void teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc);
+gssize teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc);
+void teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc);
/** @memberof teco_goto_table_t */
static inline gboolean
diff --git a/src/help.c b/src/help.c
index e06bda4..0f88646 100644
--- a/src/help.c
+++ b/src/help.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -94,7 +94,7 @@ teco_help_init(GError **error)
teco_qreg_t *lib_reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SCITECOPATH", 12);
g_assert(lib_reg != NULL);
g_auto(teco_string_t) lib_path = {NULL, 0};
- if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, error))
+ if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, NULL, error))
return FALSE;
/*
* FIXME: lib_path may contain null-bytes.
@@ -235,8 +235,7 @@ teco_help_auto_complete(const gchar *topic_name, teco_string_t *insert)
topic_name ? strlen(topic_name) : 0, 0, insert);
}
-#ifndef NDEBUG
-static void __attribute__((destructor))
+static void TECO_DEBUG_CLEANUP
teco_help_cleanup(void)
{
if (!teco_help_chunk)
@@ -251,7 +250,6 @@ teco_help_cleanup(void)
teco_help_topic_free((teco_help_topic_t *)cur);
}
}
-#endif
/*
* Command states
@@ -316,7 +314,7 @@ teco_state_help_done(teco_machine_main_t *ctx, const teco_string_t *str, GError
}
/* in cmdline.c */
-gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
/*$ "?" help
* ?[topic]$ -- Get help for topic
@@ -347,7 +345,7 @@ gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine
* .EE
* In other words it must be a \*(ST comment followed
* by an asterisk sign, followed by the first topic which
- * is a buffer position, followed by a colon and the topic
+ * is a buffer position in bytes, followed by a colon and the topic
* string.
* The topic string is terminated by the end of the line.
* The end of the header is marked by a single \(lq*!\(rq.
@@ -373,7 +371,7 @@ gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine
* \fIgrosciteco\fP formatter and the \fIsciteco.tmac\fP
* GNU troff macros.
* When using womanpages generated by \fIgrosciteco\fP,
- * help topics can be defined using the \fBTECO_TOPIC\fP
+ * help topics can be defined using the \fBSCITECO_TOPIC\fP
* Troff macro.
* This flexible system allows \*(ST to access internal
* and third-party help files written in plain-text or
diff --git a/src/help.h b/src/help.h
index b777343..6400399 100644
--- a/src/help.h
+++ b/src/help.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/interface-curses/Makefile.am b/src/interface-curses/Makefile.am
index 14fc920..44fb658 100644
--- a/src/interface-curses/Makefile.am
+++ b/src/interface-curses/Makefile.am
@@ -6,4 +6,5 @@ AM_CFLAGS = -std=gnu11 -Wall -Wno-initializer-overrides -Wno-unused-value
noinst_LTLIBRARIES = libsciteco-interface.la
libsciteco_interface_la_SOURCES = interface.c \
curses-utils.c curses-utils.h \
- curses-info-popup.c curses-info-popup.h
+ curses-info-popup.c curses-info-popup.h \
+ curses-icons.c curses-icons.h
diff --git a/src/interface-curses/curses-icons.c b/src/interface-curses/curses-icons.c
new file mode 100644
index 0000000..1a1ba3a
--- /dev/null
+++ b/src/interface-curses/curses-icons.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2012-2024 Robin Haberkorn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <glib.h>
+
+#include <curses.h>
+
+#include "sciteco.h"
+#include "curses-icons.h"
+
+typedef struct {
+ const gchar *name;
+ gunichar c;
+} teco_curses_icon_t;
+
+/*
+ * The following icons have initially been adapted from exa,
+ * but icons have since been added and removed.
+ *
+ * They require fonts with additional symbols, eg.
+ * Nerd Fonts (https://www.nerdfonts.com/).
+ *
+ * They MUST be kept presorted, so we can perform binary searches.
+ */
+
+/** Mapping of complete filenames to Unicode "icons" */
+static const teco_curses_icon_t teco_icons_file[] = {
+ {".Trash", 0xf1f8}, /*  */
+ {".atom", 0xe764}, /*  */
+ {".bash_history", 0xf489}, /*  */
+ {".bash_profile", 0xf489}, /*  */
+ {".bashrc", 0xf489}, /*  */
+ {".git", 0xf1d3}, /*  */
+ {".gitattributes", 0xf1d3}, /*  */
+ {".gitconfig", 0xf1d3}, /*  */
+ {".github", 0xf408}, /*  */
+ {".gitignore", 0xf1d3}, /*  */
+ {".gitmodules", 0xf1d3}, /*  */
+ {".rvm", 0xe21e}, /*  */
+ {".teco_ini", 0xedaa}, /*  */
+ {".teco_session", 0xedaa}, /*  */
+ {".vimrc", 0xe62b}, /*  */
+ {".vscode", 0xe70c}, /*  */
+ {".zshrc", 0xf489}, /*  */
+ {"COMMIT_EDITMSG", 0xf1d3}, /*  */
+ {"Cargo.lock", 0xe7a8}, /*  */
+ {"Dockerfile", 0xf308}, /*  */
+ {"GNUmakefile", 0xf489}, /*  */
+ {"MERGE_MSG", 0xf1d3}, /*  */
+ {"Makefile", 0xf489}, /*  */
+ {"PKGBUILD", 0xf303}, /*  */
+ {"TAG_EDITMSG", 0xf1d3}, /*  */
+ {"bin", 0xe5fc}, /*  */
+ {"config", 0xe5fc}, /*  */
+ {"docker-compose.yml", 0xf308}, /*  */
+ {"ds_store", 0xf179}, /*  */
+ {"git-rebase-todo", 0xf1d3}, /*  */
+ {"go.mod", 0xe626}, /*  */
+ {"go.sum", 0xe626}, /*  */
+ {"gradle", 0xe256}, /*  */
+ {"gruntfile.coffee", 0xe611}, /*  */
+ {"gruntfile.js", 0xe611}, /*  */
+ {"gruntfile.ls", 0xe611}, /*  */
+ {"gulpfile.coffee", 0xe610}, /*  */
+ {"gulpfile.js", 0xe610}, /*  */
+ {"gulpfile.ls", 0xe610}, /*  */
+ {"hidden", 0xf023}, /*  */
+ {"include", 0xe5fc}, /*  */
+ {"lib", 0xf121}, /*  */
+ {"localized", 0xf179}, /*  */
+ {"node_modules", 0xe718}, /*  */
+ {"npmignore", 0xe71e}, /*  */
+ {"rubydoc", 0xe73b}, /*  */
+ {"yarn.lock", 0xe718}, /*  */
+};
+
+/** Mapping of file extensions to Unicode "icons" */
+static const teco_curses_icon_t teco_icons_ext[] = {
+ {"DS_store", 0xf179}, /*  */
+ {"ai", 0xe7b4}, /*  */
+ {"android", 0xe70e}, /*  */
+ {"apk", 0xe70e}, /*  */
+ {"apple", 0xf179}, /*  */
+ {"avi", 0xf03d}, /*  */
+ {"avif", 0xf1c5}, /*  */
+ {"avro", 0xe60b}, /*  */
+ {"awk", 0xf489}, /*  */
+ {"bash", 0xf489}, /*  */
+ {"bat", 0xf17a}, /*  */
+ {"bats", 0xf489}, /*  */
+ {"bmp", 0xf1c5}, /*  */
+ {"bz", 0xf410}, /*  */
+ {"bz2", 0xf410}, /*  */
+ {"c", 0xe61e}, /*  */
+ {"c++", 0xe61d}, /*  */
+ {"cab", 0xe70f}, /*  */
+ {"cc", 0xe61d}, /*  */
+ {"cfg", 0xe615}, /*  */
+ {"class", 0xe256}, /*  */
+ {"clj", 0xe768}, /*  */
+ {"cljs", 0xe76a}, /*  */
+ {"cls", 0xf034}, /*  */
+ {"cmd", 0xe70f}, /*  */
+ {"coffee", 0xf0f4}, /*  */
+ {"conf", 0xe615}, /*  */
+ {"cp", 0xe61d}, /*  */
+ {"cpio", 0xf410}, /*  */
+ {"cpp", 0xe61d}, /*  */
+ {"cs", 0xf031b}, /* 󰌛 */
+ {"csh", 0xf489}, /*  */
+ {"cshtml", 0xf1fa}, /*  */
+ {"csproj", 0xf031b}, /* 󰌛 */
+ {"css", 0xe749}, /*  */
+ {"csv", 0xf1c3}, /*  */
+ {"csx", 0xf031b}, /* 󰌛 */
+ {"cxx", 0xe61d}, /*  */
+ {"d", 0xe7af}, /*  */
+ {"dart", 0xe798}, /*  */
+ {"db", 0xf1c0}, /*  */
+ {"deb", 0xe77d}, /*  */
+ {"diff", 0xf440}, /*  */
+ {"djvu", 0xf02d}, /*  */
+ {"dll", 0xe70f}, /*  */
+ {"doc", 0xf1c2}, /*  */
+ {"docx", 0xf1c2}, /*  */
+ {"ds_store", 0xf179}, /*  */
+ {"dump", 0xf1c0}, /*  */
+ {"ebook", 0xe28b}, /*  */
+ {"ebuild", 0xf30d}, /*  */
+ {"editorconfig", 0xe615}, /*  */
+ {"ejs", 0xe618}, /*  */
+ {"elm", 0xe62c}, /*  */
+ {"env", 0xf462}, /*  */
+ {"eot", 0xf031}, /*  */
+ {"epub", 0xe28a}, /*  */
+ {"erb", 0xe73b}, /*  */
+ {"erl", 0xe7b1}, /*  */
+ {"ex", 0xe62d}, /*  */
+ {"exe", 0xf17a}, /*  */
+ {"exs", 0xe62d}, /*  */
+ {"fish", 0xf489}, /*  */
+ {"flac", 0xf001}, /*  */
+ {"flv", 0xf03d}, /*  */
+ {"font", 0xf031}, /*  */
+ {"fs", 0xe7a7}, /*  */
+ {"fsi", 0xe7a7}, /*  */
+ {"fsx", 0xe7a7}, /*  */
+ {"gdoc", 0xf1c2}, /*  */
+ {"gem", 0xe21e}, /*  */
+ {"gemfile", 0xe21e}, /*  */
+ {"gemspec", 0xe21e}, /*  */
+ {"gform", 0xf298}, /*  */
+ {"gif", 0xf1c5}, /*  */
+ {"go", 0xe626}, /*  */
+ {"gradle", 0xe256}, /*  */
+ {"groovy", 0xe775}, /*  */
+ {"gsheet", 0xf1c3}, /*  */
+ {"gslides", 0xf1c4}, /*  */
+ {"guardfile", 0xe21e}, /*  */
+ {"gz", 0xf410}, /*  */
+ {"h", 0xf0fd}, /*  */
+ {"hbs", 0xe60f}, /*  */
+ {"hpp", 0xf0fd}, /*  */
+ {"hs", 0xe777}, /*  */
+ {"htm", 0xf13b}, /*  */
+ {"html", 0xf13b}, /*  */
+ {"hxx", 0xf0fd}, /*  */
+ {"ico", 0xf1c5}, /*  */
+ {"image", 0xf1c5}, /*  */
+ {"img", 0xe271}, /*  */
+ {"iml", 0xe7b5}, /*  */
+ {"ini", 0xf17a}, /*  */
+ {"ipynb", 0xe678}, /*  */
+ {"iso", 0xe271}, /*  */
+ {"j2c", 0xf1c5}, /*  */
+ {"j2k", 0xf1c5}, /*  */
+ {"jad", 0xe256}, /*  */
+ {"jar", 0xe256}, /*  */
+ {"java", 0xe256}, /*  */
+ {"jfi", 0xf1c5}, /*  */
+ {"jfif", 0xf1c5}, /*  */
+ {"jif", 0xf1c5}, /*  */
+ {"jl", 0xe624}, /*  */
+ {"jmd", 0xf48a}, /*  */
+ {"jp2", 0xf1c5}, /*  */
+ {"jpe", 0xf1c5}, /*  */
+ {"jpeg", 0xf1c5}, /*  */
+ {"jpg", 0xf1c5}, /*  */
+ {"jpx", 0xf1c5}, /*  */
+ {"js", 0xe74e}, /*  */
+ {"json", 0xe60b}, /*  */
+ {"jsx", 0xe7ba}, /*  */
+ {"jxl", 0xf1c5}, /*  */
+ {"ksh", 0xf489}, /*  */
+ {"latex", 0xf034}, /*  */
+ {"less", 0xe758}, /*  */
+ {"lhs", 0xe777}, /*  */
+ {"license", 0xf0219}, /* 󰈙 */
+ {"localized", 0xf179}, /*  */
+ {"lock", 0xf023}, /*  */
+ {"log", 0xf18d}, /*  */
+ {"lua", 0xe620}, /*  */
+ {"lz", 0xf410}, /*  */
+ {"lz4", 0xf410}, /*  */
+ {"lzh", 0xf410}, /*  */
+ {"lzma", 0xf410}, /*  */
+ {"lzo", 0xf410}, /*  */
+ {"m", 0xe61e}, /*  */
+ {"m4a", 0xf001}, /*  */
+ {"markdown", 0xf48a}, /*  */
+ {"md", 0xf48a}, /*  */
+ {"mjs", 0xe74e}, /*  */
+ {"mk", 0xf489}, /*  */
+ {"mkd", 0xf48a}, /*  */
+ {"mkv", 0xf03d}, /*  */
+ {"mm", 0xe61d}, /*  */
+ {"mobi", 0xe28b}, /*  */
+ {"mov", 0xf03d}, /*  */
+ {"mp3", 0xf001}, /*  */
+ {"mp4", 0xf03d}, /*  */
+ {"msi", 0xe70f}, /*  */
+ {"mustache", 0xe60f}, /*  */
+ {"nix", 0xf313}, /*  */
+ {"node", 0xf0399}, /* 󰎙 */
+ {"npmignore", 0xe71e}, /*  */
+ {"odp", 0xf1c4}, /*  */
+ {"ods", 0xf1c3}, /*  */
+ {"odt", 0xf1c2}, /*  */
+ {"ogg", 0xf001}, /*  */
+ {"ogv", 0xf03d}, /*  */
+ {"otf", 0xf031}, /*  */
+ {"part", 0xf43a}, /*  */
+ {"patch", 0xf440}, /*  */
+ {"pdf", 0xf1c1}, /*  */
+ {"php", 0xe73d}, /*  */
+ {"pl", 0xe769}, /*  */
+ {"plx", 0xe769}, /*  */
+ {"pm", 0xe769}, /*  */
+ {"png", 0xf1c5}, /*  */
+ {"pod", 0xe769}, /*  */
+ {"ppt", 0xf1c4}, /*  */
+ {"pptx", 0xf1c4}, /*  */
+ {"procfile", 0xe21e}, /*  */
+ {"properties", 0xe60b}, /*  */
+ {"ps1", 0xf489}, /*  */
+ {"psd", 0xe7b8}, /*  */
+ {"pxm", 0xf1c5}, /*  */
+ {"py", 0xe606}, /*  */
+ {"pyc", 0xe606}, /*  */
+ {"r", 0xf25d}, /*  */
+ {"rakefile", 0xe21e}, /*  */
+ {"rar", 0xf410}, /*  */
+ {"razor", 0xf1fa}, /*  */
+ {"rb", 0xe21e}, /*  */
+ {"rdata", 0xf25d}, /*  */
+ {"rdb", 0xe76d}, /*  */
+ {"rdoc", 0xf48a}, /*  */
+ {"rds", 0xf25d}, /*  */
+ {"readme", 0xf48a}, /*  */
+ {"rlib", 0xe7a8}, /*  */
+ {"rmd", 0xf48a}, /*  */
+ {"rpm", 0xe7bb}, /*  */
+ {"rs", 0xe7a8}, /*  */
+ {"rspec", 0xe21e}, /*  */
+ {"rspec_parallel", 0xe21e}, /*  */
+ {"rspec_status", 0xe21e}, /*  */
+ {"rss", 0xf09e}, /*  */
+ {"rtf", 0xf0219}, /* 󰈙 */
+ {"ru", 0xe21e}, /*  */
+ {"rubydoc", 0xe73b}, /*  */
+ {"sass", 0xe603}, /*  */
+ {"scala", 0xe737}, /*  */
+ {"scss", 0xe749}, /*  */
+ {"sh", 0xf489}, /*  */
+ {"shell", 0xf489}, /*  */
+ {"slim", 0xe73b}, /*  */
+ {"sln", 0xe70c}, /*  */
+ {"so", 0xf17c}, /*  */
+ {"sql", 0xf1c0}, /*  */
+ {"sqlite3", 0xe7c4}, /*  */
+ {"sty", 0xf034}, /*  */
+ {"styl", 0xe600}, /*  */
+ {"stylus", 0xe600}, /*  */
+ {"svg", 0xf1c5}, /*  */
+ {"swift", 0xe755}, /*  */
+ {"t", 0xe769}, /*  */
+ {"tar", 0xf410}, /*  */
+ {"taz", 0xf410}, /*  */
+ {"tbz", 0xf410}, /*  */
+ {"tbz2", 0xf410}, /*  */
+ {"tec", 0xedaa}, /*  */
+ {"tes", 0xedaa}, /*  */
+ {"tex", 0xf034}, /*  */
+ {"tgz", 0xf410}, /*  */
+ {"tiff", 0xf1c5}, /*  */
+ {"tlz", 0xf410}, /*  */
+ {"toml", 0xe615}, /*  */
+ {"torrent", 0xe275}, /*  */
+ {"ts", 0xe628}, /*  */
+ {"tsv", 0xf1c3}, /*  */
+ {"tsx", 0xe7ba}, /*  */
+ {"ttf", 0xf031}, /*  */
+ {"twig", 0xe61c}, /*  */
+ {"txt", 0xf15c}, /*  */
+ {"txz", 0xf410}, /*  */
+ {"tz", 0xf410}, /*  */
+ {"tzo", 0xf410}, /*  */
+ {"video", 0xf03d}, /*  */
+ {"vim", 0xe62b}, /*  */
+ {"vue", 0xf0844}, /* 󰡄 */
+ {"war", 0xe256}, /*  */
+ {"wav", 0xf001}, /*  */
+ {"webm", 0xf03d}, /*  */
+ {"webp", 0xf1c5}, /*  */
+ {"windows", 0xf17a}, /*  */
+ {"woff", 0xf031}, /*  */
+ {"woff2", 0xf031}, /*  */
+ {"woman", 0xeaa4}, /*  */
+ {"xhtml", 0xf13b}, /*  */
+ {"xls", 0xf1c3}, /*  */
+ {"xlsx", 0xf1c3}, /*  */
+ {"xml", 0xf05c0}, /* 󰗀 */
+ {"xul", 0xf05c0}, /* 󰗀 */
+ {"xz", 0xf410}, /*  */
+ {"yaml", 0xf481}, /*  */
+ {"yml", 0xf481}, /*  */
+ {"zip", 0xf410}, /*  */
+ {"zsh", 0xf489}, /*  */
+ {"zsh-theme", 0xf489}, /*  */
+ {"zst", 0xf410}, /*  */
+};
+
+static int
+teco_curses_icon_cmp(const void *a, const void *b)
+{
+ const gchar *str = a;
+ const teco_curses_icon_t *icon = b;
+
+ return strcmp(str, icon->name);
+}
+
+gunichar
+teco_curses_icons_lookup_file(const gchar *filename)
+{
+ g_autofree gchar *basename = g_path_get_basename(filename);
+ const teco_curses_icon_t *icon;
+
+ /* try to find icon by complete file name */
+ icon = bsearch(basename, teco_icons_file, G_N_ELEMENTS(teco_icons_file),
+ sizeof(teco_icons_file[0]), teco_curses_icon_cmp);
+ if (icon)
+ return icon->c;
+
+ /* try to find icon by extension */
+ const gchar *ext = strrchr(basename, '.');
+ if (ext) {
+ icon = bsearch(ext+1, teco_icons_ext, G_N_ELEMENTS(teco_icons_ext),
+ sizeof(teco_icons_ext[0]), teco_curses_icon_cmp);
+ return icon ? icon->c : 0xf15b; /*  */
+ }
+
+ /* default file icon for files without extension */
+ return 0xf016; /*  */
+}
+
+gunichar
+teco_curses_icons_lookup_dir(const gchar *dirname)
+{
+ g_autofree gchar *basename = g_path_get_basename(dirname);
+ const teco_curses_icon_t *icon;
+
+ icon = bsearch(basename, teco_icons_file, G_N_ELEMENTS(teco_icons_file),
+ sizeof(teco_icons_file[0]), teco_curses_icon_cmp);
+
+ /* default folder icon */
+ return icon ? icon->c : 0xf115; /*  */
+}
diff --git a/src/interface-curses/curses-icons.h b/src/interface-curses/curses-icons.h
new file mode 100644
index 0000000..c1be06f
--- /dev/null
+++ b/src/interface-curses/curses-icons.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2012-2024 Robin Haberkorn
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#pragma once
+
+#include <glib.h>
+
+/**
+ * Q-Register icon.
+ * 0xf04cf would look more similar to the current Gtk icon.
+ */
+#define TECO_CURSES_ICONS_QREG 0xe236 /*  */
+
+gunichar teco_curses_icons_lookup_file(const gchar *filename);
+gunichar teco_curses_icons_lookup_dir(const gchar *dirname);
diff --git a/src/interface-curses/curses-info-popup.c b/src/interface-curses/curses-info-popup.c
index a738f5d..e6e1549 100644
--- a/src/interface-curses/curses-info-popup.c
+++ b/src/interface-curses/curses-info-popup.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -28,6 +28,7 @@
#include "interface.h"
#include "curses-utils.h"
#include "curses-info-popup.h"
+#include "curses-icons.h"
/*
* FIXME: This is redundant with gtk-info-popup.c.
@@ -75,8 +76,13 @@ teco_curses_info_popup_init_pad(teco_curses_info_popup_t *ctx, attr_t attr)
gint pad_cols; /**! entry columns */
gint pad_colwidth; /**! width per entry column */
- /* reserve 2 spaces between columns */
- pad_colwidth = MIN(ctx->longest + 2, cols - 2);
+ /*
+ * With Unicode icons enabled, we reserve 2 characters at the beginning and one
+ * after the filename/directory.
+ * Otherwise 2 characters after the entry.
+ */
+ gint reserve = teco_ed & TECO_ED_ICONS ? 2+1 : 2;
+ pad_colwidth = MIN(ctx->longest + reserve, cols - 2);
/* pad_cols = floor((cols - 2) / pad_colwidth) */
pad_cols = (cols - 2) / pad_colwidth;
@@ -111,8 +117,19 @@ teco_curses_info_popup_init_pad(teco_curses_info_popup_t *ctx, attr_t attr)
switch (entry->type) {
case TECO_POPUP_FILE:
+ g_assert(!teco_string_contains(&entry->name, '\0'));
+ if (teco_ed & TECO_ED_ICONS) {
+ teco_curses_add_wc(ctx->pad, teco_curses_icons_lookup_file(entry->name.data));
+ waddch(ctx->pad, ' ');
+ }
+ teco_curses_format_filename(ctx->pad, entry->name.data, -1);
+ break;
case TECO_POPUP_DIRECTORY:
g_assert(!teco_string_contains(&entry->name, '\0'));
+ if (teco_ed & TECO_ED_ICONS) {
+ teco_curses_add_wc(ctx->pad, teco_curses_icons_lookup_dir(entry->name.data));
+ waddch(ctx->pad, ' ');
+ }
teco_curses_format_filename(ctx->pad, entry->name.data, -1);
break;
default:
diff --git a/src/interface-curses/curses-info-popup.h b/src/interface-curses/curses-info-popup.h
index bcdb3b8..a6c28a5 100644
--- a/src/interface-curses/curses-info-popup.h
+++ b/src/interface-curses/curses-info-popup.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/interface-curses/curses-utils.c b/src/interface-curses/curses-utils.c
index 8dc62f1..c751afd 100644
--- a/src/interface-curses/curses-utils.c
+++ b/src/interface-curses/curses-utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -29,7 +29,21 @@
#include "string-utils.h"
#include "curses-utils.h"
-gsize
+/**
+ * Render UTF-8 string with TECO character representations.
+ *
+ * Strings are cut off with `...` at the end if necessary.
+ * The mapping is similar to teco_view_set_representations().
+ *
+ * @param win The Curses window to write to.
+ * @param str The string to format.
+ * @param len The length of the string in bytes.
+ * @param max_width The maximum width to consume in
+ * the window in characters. If smaller 0, take the
+ * entire remaining space in the window.
+ * @return Number of characters actually written.
+ */
+guint
teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width)
{
int old_x, old_y;
@@ -42,6 +56,12 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width)
while (len > 0) {
/*
+ * NOTE: It shouldn't be possible to meet any string,
+ * that is not valid UTF-8.
+ */
+ gsize clen = g_utf8_next_char(str) - str;
+
+ /*
* NOTE: This mapping is similar to
* teco_view_set_representations().
*/
@@ -85,12 +105,18 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width)
chars_added++;
if (chars_added > max_width)
goto truncate;
- waddch(win, *str);
+ /*
+ * FIXME: This works with UTF-8 on ncurses,
+ * since it detects multi-byte characters.
+ * However on other platforms wadd_wch() may be
+ * necessary, which requires a widechar Curses variant.
+ */
+ waddnstr(win, str, clen);
}
}
- str++;
- len--;
+ str += clen;
+ len -= clen;
}
return getcurx(win) - old_x;
@@ -108,23 +134,43 @@ truncate:
return getcurx(win) - old_x;
}
-gsize
-teco_curses_format_filename(WINDOW *win, const gchar *filename,
- gint max_width)
+/**
+ * Render UTF-8 filename.
+ *
+ * This cuts of overlong filenames with `...` at the beginning,
+ * possibly skipping any drive letter.
+ * Control characters are escaped, but not highlighted.
+ *
+ * @param win The Curses window to write to.
+ * @param filename Null-terminated filename to render.
+ * @param max_width The maximum width to consume in
+ * the window in characters. If smaller 0, take the
+ * entire remaining space in the window.
+ * @return Number of characters actually written.
+ */
+guint
+teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width)
{
int old_x = getcurx(win);
g_autofree gchar *filename_printable = teco_string_echo(filename, strlen(filename));
- size_t filename_len = strlen(filename_printable);
+ glong filename_len = g_utf8_strlen(filename_printable, -1);
if (max_width < 0)
max_width = getmaxx(win) - old_x;
- if (filename_len <= (size_t)max_width) {
+ if (filename_len <= max_width) {
+ /*
+ * FIXME: This works with UTF-8 on ncurses,
+ * since it detects multi-byte characters.
+ * However on other platforms wadd_wch() may be
+ * necessary, which requires a widechar Curses variant.
+ */
waddstr(win, filename_printable);
- } else {
- const gchar *keep_post = filename_printable + filename_len -
- max_width + 3;
+ } else if (filename_len >= 3) {
+ const gchar *keep_post;
+ keep_post = g_utf8_offset_to_pointer(filename_printable + strlen(filename_printable),
+ -max_width + 3);
#ifdef G_OS_WIN32
const gchar *keep_pre = g_path_skip_root(filename_printable);
diff --git a/src/interface-curses/curses-utils.h b/src/interface-curses/curses-utils.h
index a91ab44..2c819ee 100644
--- a/src/interface-curses/curses-utils.h
+++ b/src/interface-curses/curses-utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -20,6 +20,17 @@
#include <curses.h>
-gsize teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width);
+guint teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width);
-gsize teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width);
+guint teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width);
+
+/**
+ * Add Unicode character to window.
+ * This is just like wadd_wch(), but does not require wide-char APIs.
+ */
+static inline void
+teco_curses_add_wc(WINDOW *win, gunichar chr)
+{
+ gchar buf[6];
+ waddnstr(win, buf, g_unichar_to_utf8(chr, buf));
+}
diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c
index ef3f0c7..95e86c9 100644
--- a/src/interface-curses/interface.c
+++ b/src/interface-curses/interface.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,7 +24,6 @@
#include <stdlib.h>
#include <stdarg.h>
#include <unistd.h>
-#include <locale.h>
#include <errno.h>
#ifdef HAVE_WINDOWS_H
@@ -65,11 +64,12 @@
#include "qreg.h"
#include "ring.h"
#include "error.h"
-#include "curses-utils.h"
-#include "curses-info-popup.h"
#include "view.h"
#include "memory.h"
#include "interface.h"
+#include "curses-utils.h"
+#include "curses-info-popup.h"
+#include "curses-icons.h"
#if defined(__PDCURSES__) && defined(G_OS_WIN32) && \
!defined(PDCURSES_GUI)
@@ -340,12 +340,18 @@ static struct {
TECO_INFO_TYPE_QREG
} info_type;
teco_string_t info_current;
+ gboolean info_dirty;
WINDOW *msg_window;
WINDOW *cmdline_window, *cmdline_pad;
- gsize cmdline_len, cmdline_rubout_len;
+ guint cmdline_len, cmdline_rubout_len;
+ /**
+ * Pad used exclusively for wgetch() as it will not
+ * result in unwanted wrefresh().
+ */
+ WINDOW *input_pad;
GQueue *input_queue;
teco_curses_info_popup_t popup;
@@ -554,7 +560,7 @@ teco_interface_init_screen(void)
g_assert(teco_interface.screen_tty != NULL);
teco_interface.screen = newterm(NULL, teco_interface.screen_tty, teco_interface.screen_tty);
- if (!teco_interface.screen) {
+ if (G_UNLIKELY(!teco_interface.screen)) {
g_fprintf(stderr, "Error initializing interactive mode. "
"$TERM may be incorrect.\n");
exit(EXIT_FAILURE);
@@ -629,28 +635,6 @@ teco_interface_init_interactive(GError **error)
return FALSE;
/*
- * On UNIX terminals, the escape key is usually
- * delivered as the escape character even though function
- * keys are delivered as escape sequences as well.
- * That's why there has to be a timeout for detecting
- * escape presses if function key handling is enabled.
- * This timeout can be controlled using $ESCDELAY on
- * ncurses but its default is much too long.
- * We set it to 25ms as Vim does. In the very rare cases
- * this won't suffice, $ESCDELAY can still be set explicitly.
- *
- * NOTE: The only terminal emulator I'm aware of that lets
- * us send an escape sequence for the escape key is Mintty
- * (see "\e[?7727h").
- *
- * FIXME: This appears to be ineffective for netbsd-curses.
- */
-#ifdef CURSES_TTY
- if (!g_getenv("ESCDELAY"))
- set_escdelay(25);
-#endif
-
- /*
* $TERM must be unset or "#win32con" for the win32
* driver to load.
* So we always ignore any $TERM changes by the user.
@@ -679,12 +663,31 @@ teco_interface_init_interactive(GError **error)
PDC_set_function_key(FUNCTION_KEY_SHUT_DOWN, KEY_CLOSE);
#endif
- /* for displaying UTF-8 characters properly */
- setlocale(LC_CTYPE, "");
-
teco_interface_init_screen();
/*
+ * On UNIX terminals, the escape key is usually
+ * delivered as the escape character even though function
+ * keys are delivered as escape sequences as well.
+ * That's why there has to be a timeout for detecting
+ * escape presses if function key handling is enabled.
+ * This timeout can be controlled using $ESCDELAY on
+ * ncurses but its default is much too long.
+ * We set it to 25ms as Vim does. In the very rare cases
+ * this won't suffice, $ESCDELAY can still be set explicitly.
+ *
+ * NOTE: The only terminal emulator I'm aware of that lets
+ * us send an escape sequence for the escape key is Mintty
+ * (see "\e[?7727h").
+ *
+ * NOTE: The delay is overwritten by initscr() on netbsd-curses.
+ */
+#ifdef CURSES_TTY
+ if (!g_getenv("ESCDELAY"))
+ set_escdelay(25);
+#endif
+
+ /*
* We always have a CTRL handler on Windows, but doing it
* here again, ensures that we have a higher precedence
* than the one installed by PDCurses.
@@ -699,12 +702,22 @@ teco_interface_init_interactive(GError **error)
curs_set(0);
teco_interface.info_window = newwin(1, 0, 0, 0);
-
teco_interface.msg_window = newwin(1, 0, LINES - 2, 0);
-
teco_interface.cmdline_window = newwin(0, 0, LINES - 1, 0);
- keypad(teco_interface.cmdline_window, TRUE);
- nodelay(teco_interface.cmdline_window, TRUE);
+
+ teco_interface.input_pad = newpad(1, 1);
+ /*
+ * Controlling function key processing is important
+ * on Unix Curses, as ESCAPE is handled as the beginning
+ * of a escape sequence when terminal emulators are
+ * involved.
+ * Still, it's now enabled always since the ESCDELAY
+ * workaround works nicely.
+ * On some Curses variants (XCurses) keypad
+ * must always be TRUE so we receive KEY_RESIZE.
+ */
+ keypad(teco_interface.input_pad, TRUE);
+ nodelay(teco_interface.input_pad, TRUE);
teco_interface.input_queue = g_queue_new();
@@ -748,8 +761,8 @@ teco_interface_restore_batch(void)
* Set window title to a reasonable default,
* in case it is not reset immediately by the
* shell.
- * FIXME: See set_window_title() why this
- * is necessary.
+ * FIXME: See teco_interface_set_window_title()
+ * why this is necessary.
*/
#if defined(CURSES_TTY) && defined(HAVE_TIGETSTR)
teco_interface_set_window_title(g_getenv("TERM") ? : "");
@@ -978,10 +991,14 @@ teco_interface_draw_info(void)
const gchar *info_type_str;
+ waddstr(teco_interface.info_window, PACKAGE_NAME " ");
+
switch (teco_interface.info_type) {
case TECO_INFO_TYPE_QREG:
info_type_str = PACKAGE_NAME " - <QRegister> ";
- waddstr(teco_interface.info_window, info_type_str);
+ teco_curses_add_wc(teco_interface.info_window,
+ teco_ed & TECO_ED_ICONS ? TECO_CURSES_ICONS_QREG : '-');
+ waddstr(teco_interface.info_window, " <QRegister> ");
/* same formatting as in command lines */
teco_curses_format_str(teco_interface.info_window,
teco_interface.info_current.data,
@@ -990,10 +1007,15 @@ teco_interface_draw_info(void)
case TECO_INFO_TYPE_BUFFER:
info_type_str = PACKAGE_NAME " - <Buffer> ";
- waddstr(teco_interface.info_window, info_type_str);
g_assert(!teco_string_contains(&teco_interface.info_current, '\0'));
+ teco_curses_add_wc(teco_interface.info_window,
+ teco_ed & TECO_ED_ICONS ? teco_curses_icons_lookup_file(teco_interface.info_current.data) : '-');
+ waddstr(teco_interface.info_window, " <Buffer> ");
teco_curses_format_filename(teco_interface.info_window,
- teco_interface.info_current.data, -1);
+ teco_interface.info_current.data,
+ getmaxx(teco_interface.info_window) -
+ getcurx(teco_interface.info_window) - 1);
+ waddch(teco_interface.info_window, teco_interface.info_dirty ? '*' : ' ');
break;
default:
@@ -1003,13 +1025,13 @@ teco_interface_draw_info(void)
wclrtoeol(teco_interface.info_window);
/*
- * Make sure the title will consist only of printable
- * characters
+ * Make sure the title will consist only of printable characters.
*/
g_autofree gchar *info_current_printable;
info_current_printable = teco_string_echo(teco_interface.info_current.data,
teco_interface.info_current.len);
- g_autofree gchar *title = g_strconcat(info_type_str, info_current_printable, NULL);
+ g_autofree gchar *title = g_strconcat(info_type_str, info_current_printable,
+ teco_interface.info_dirty ? "*" : "", NULL);
teco_interface_set_window_title(title);
}
@@ -1019,6 +1041,7 @@ teco_interface_info_update_qreg(const teco_qreg_t *reg)
teco_string_clear(&teco_interface.info_current);
teco_string_init(&teco_interface.info_current,
reg->head.name.data, reg->head.name.len);
+ teco_interface.info_dirty = FALSE;
teco_interface.info_type = TECO_INFO_TYPE_QREG;
/* NOTE: drawn in teco_interface_event_loop_iter() */
}
@@ -1030,8 +1053,7 @@ teco_interface_info_update_buffer(const teco_buffer_t *buffer)
teco_string_clear(&teco_interface.info_current);
teco_string_init(&teco_interface.info_current, filename, strlen(filename));
- teco_string_append_c(&teco_interface.info_current,
- buffer->dirty ? '*' : ' ');
+ teco_interface.info_dirty = buffer->dirty;
teco_interface.info_type = TECO_INFO_TYPE_BUFFER;
/* NOTE: drawn in teco_interface_event_loop_iter() */
}
@@ -1044,7 +1066,8 @@ teco_interface_cmdline_update(const teco_cmdline_t *cmdline)
* We don't know if it is similar to the last one,
* so resizing makes no sense.
* We approximate the size of the new formatted command-line,
- * wasting a few bytes for control characters.
+ * wasting a few bytes for control characters and
+ * multi-byte Unicode sequences.
*/
if (teco_interface.cmdline_pad)
delwin(teco_interface.cmdline_pad);
@@ -1172,7 +1195,7 @@ teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len,
{
int rc = str ? PDC_setclipboard(str, str_len) : PDC_clearclipboard();
if (rc != PDC_CLIP_SUCCESS) {
- g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
+ g_set_error(error, TECO_ERROR, TECO_ERROR_CLIPBOARD,
"Error %d copying to clipboard", rc);
return FALSE;
}
@@ -1194,7 +1217,7 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError
if (rc == PDC_CLIP_EMPTY)
return TRUE;
if (rc != PDC_CLIP_SUCCESS) {
- g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
+ g_set_error(error, TECO_ERROR, TECO_ERROR_CLIPBOARD,
"Error %d retrieving clipboard", rc);
return FALSE;
}
@@ -1232,9 +1255,17 @@ teco_interface_init_clipboard(void)
* must be enabled.
* There is no way to find out if they are but we must
* not register the clipboard registers if they aren't.
- * Therefore, a special XTerm clipboard ED flag an be set by the user.
+ * Still, XTerm clipboards are broken with Unicode characters.
+ * Also, there are other terminal emulators supporting OSC-52,
+ * so the XTerm version is only checked if the terminal identifies as XTerm.
+ * Also, a special clipboard ED flag must be set by the user.
+ *
+ * NOTE: Apparently there is also a terminfo entry Ms, but it's probably
+ * not worth using it since it won't always be set and even if set, does not
+ * tell you whether the terminal will actually answer to the escape sequence or not.
*/
- if (!(teco_ed & TECO_ED_XTERM_CLIPBOARD) || teco_xterm_version() < 203)
+ if (!(teco_ed & TECO_ED_OSC52) ||
+ (teco_xterm_version() >= 0 && teco_xterm_version() < 203))
return;
teco_qreg_table_insert(&teco_qreg_table_globals, teco_qreg_clipboard_new(""));
@@ -1300,6 +1331,8 @@ teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len,
gboolean
teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError **error)
{
+ gboolean ret = TRUE;
+
/*
* Query the clipboard -- XTerm will reply with the
* OSC-52 command that would set the current selection.
@@ -1320,18 +1353,19 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError
* to be on the safe side.
*/
halfdelay(1); /* 100ms timeout */
- keypad(stdscr, FALSE);
+ /* don't interpret escape sequences */
+ keypad(teco_interface.input_pad, FALSE);
/*
* Skip "\e]52;x;" (7 characters).
*/
for (gint i = 0; i < 7; i++) {
- if (getch() == ERR) {
+ ret = wgetch(teco_interface.input_pad) != ERR;
+ if (!ret) {
/* timeout */
- cbreak();
- g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD,
"Timed out reading XTerm clipboard");
- return FALSE;
+ goto cleanup;
}
}
@@ -1347,17 +1381,22 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError
*/
gchar buffer[MAX(3, 7)];
- gchar c = (gchar)getch();
- if (c == ERR) {
+ gchar c = (gchar)wgetch(teco_interface.input_pad);
+ ret = c != ERR;
+ if (!ret) {
/* timeout */
- cbreak();
g_string_free(str_base64, TRUE);
- g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD,
"Timed out reading XTerm clipboard");
- return FALSE;
+ goto cleanup;
}
if (c == '\a')
break;
+ if (c == '\e') {
+ /* OSC escape sequence can also be terminated by "\e\\" */
+ c = (gchar)wgetch(teco_interface.input_pad);
+ break;
+ }
/*
* This could be simplified using sscanf() and
@@ -1372,14 +1411,16 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError
g_string_append_len(str_base64, buffer, out_len);
}
- cbreak();
-
if (str)
*str = str_base64->str;
*len = str_base64->len;
g_string_free(str_base64, !str);
- return TRUE;
+
+cleanup:
+ keypad(teco_interface.input_pad, TRUE);
+ nodelay(teco_interface.input_pad, TRUE);
+ return ret;
}
#else /* !PDCURSES && !CURSES_TTY */
@@ -1489,13 +1530,17 @@ teco_interface_is_interrupted(void)
gboolean
teco_interface_is_interrupted(void)
{
- if (!teco_interface.cmdline_window)
+ if (!teco_interface.input_pad)
/* batch mode */
return teco_interrupted != FALSE;
- /* NOTE: getch() is configured to be nonblocking. */
+ /*
+ * NOTE: wgetch() is configured to be nonblocking.
+ * We wgetch() on a dummy pad, so this does not call any
+ * wrefresh().
+ */
gint key;
- while ((key = wgetch(teco_interface.cmdline_window)) != ERR) {
+ while ((key = wgetch(teco_interface.input_pad)) != ERR) {
if (G_UNLIKELY(key == TECO_CTL_KEY('C')))
return TRUE;
g_queue_push_tail(teco_interface.input_queue,
@@ -1535,35 +1580,19 @@ teco_interface_refresh(void)
static gint
teco_interface_blocking_getch(void)
{
- /*
- * Setting function key processing is important
- * on Unix Curses, as ESCAPE is handled as the beginning
- * of a escape sequence when terminal emulators are
- * involved.
- * On some Curses variants (XCurses) however, keypad
- * must always be TRUE so we receive KEY_RESIZE.
- *
- * FIXME: NetBSD's curses could be handled like ncurses,
- * but gets into an undefined state when SciTECO processes
- * escape sequences.
- */
-#ifdef NCURSES_UNIX
- keypad(teco_interface.cmdline_window, teco_ed & TECO_ED_FNKEYS);
-#endif
-
/* no special <CTRL/C> handling */
raw();
- nodelay(teco_interface.cmdline_window, FALSE);
+ nodelay(teco_interface.input_pad, FALSE);
/*
* Memory limiting is stopped temporarily, since it might otherwise
* constantly place 100% load on the CPU.
*/
teco_memory_stop_limiting();
- gint key = wgetch(teco_interface.cmdline_window);
+ gint key = wgetch(teco_interface.input_pad);
teco_memory_start_limiting();
/* allow asynchronous interruptions on <CTRL/C> */
teco_interrupted = FALSE;
- nodelay(teco_interface.cmdline_window, TRUE);
+ nodelay(teco_interface.input_pad, TRUE);
#if defined(CURSES_TTY) || defined(PDCURSES_WINCON) || defined(NCURSES_WIN32)
noraw(); /* FIXME: necessary because of NCURSES_WIN32 bug */
cbreak();
@@ -1585,6 +1614,11 @@ teco_interface_blocking_getch(void)
void
teco_interface_event_loop_iter(void)
{
+ static gchar keybuf[4];
+ static gint keybuf_i = 0;
+
+ GError **error = &teco_interface.event_loop_error;
+
gint key = g_queue_is_empty(teco_interface.input_queue)
? teco_interface_blocking_getch()
: GPOINTER_TO_INT(g_queue_pop_head(teco_interface.input_queue));
@@ -1613,23 +1647,24 @@ teco_interface_event_loop_iter(void)
* backspace.
* In SciTECO backspace is normalized to ^H.
*/
- if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'),
- &teco_interface.event_loop_error))
+ if (!teco_cmdline_keymacro_c(TECO_CTL_KEY('H'), error))
return;
break;
case KEY_ENTER:
case '\r':
case '\n':
- if (!teco_cmdline_keypress_c('\n', &teco_interface.event_loop_error))
+ if (!teco_cmdline_keymacro_c('\n', error))
return;
break;
/*
* Function key macros
+ *
+ * FIXME: Perhaps support everything returned by keyname()?
*/
#define FN(KEY) \
case KEY_##KEY: \
- if (!teco_cmdline_fnmacro(#KEY, &teco_interface.event_loop_error)) \
+ if (!teco_cmdline_keymacro(#KEY, -1, error)) \
return; \
break
#define FNS(KEY) FN(KEY); FN(S##KEY)
@@ -1639,9 +1674,8 @@ teco_interface_event_loop_iter(void)
gchar macro_name[3+1];
g_snprintf(macro_name, sizeof(macro_name),
- "F%d", key - KEY_F0);
- if (!teco_cmdline_fnmacro(macro_name,
- &teco_interface.event_loop_error))
+ "F%d", key - KEY_F0);
+ if (!teco_cmdline_keymacro(macro_name, -1, error))
return;
break;
}
@@ -1660,9 +1694,31 @@ teco_interface_event_loop_iter(void)
* Control keys and keys with printable representation
*/
default:
- if (key < 0x80 &&
- !teco_cmdline_keypress_c(key, &teco_interface.event_loop_error))
+ if (key > 0xFF)
+ /* unhandled function key */
return;
+
+ /*
+ * NOTE: There's also wget_wch(), but it requires
+ * a widechar version of Curses.
+ */
+ keybuf[keybuf_i++] = key;
+ gsize len = keybuf_i;
+ gunichar cp = g_utf8_get_char_validated(keybuf, len);
+ if (keybuf_i >= sizeof(keybuf) || cp != (gunichar)-2)
+ keybuf_i = 0;
+ if ((gint32)cp < 0)
+ /* incomplete or invalid */
+ return;
+ switch (teco_cmdline_keymacro(keybuf, len, error)) {
+ case TECO_KEYMACRO_ERROR:
+ return;
+ case TECO_KEYMACRO_SUCCESS:
+ break;
+ case TECO_KEYMACRO_UNDEFINED:
+ if (!teco_cmdline_keypress(keybuf, len, error))
+ return;
+ }
}
teco_interface_refresh();
@@ -1733,6 +1789,8 @@ teco_interface_cleanup(void)
delwin(teco_interface.cmdline_pad);
if (teco_interface.msg_window)
delwin(teco_interface.msg_window);
+ if (teco_interface.input_pad)
+ delwin(teco_interface.input_pad);
/*
* PDCurses/WinCon crashes if initscr() wasn't called.
diff --git a/src/interface-gtk/gtk-info-popup.c b/src/interface-gtk/gtk-info-popup.c
index 744900d..4e25224 100644
--- a/src/interface-gtk/gtk-info-popup.c
+++ b/src/interface-gtk/gtk-info-popup.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/interface-gtk/gtk-info-popup.h b/src/interface-gtk/gtk-info-popup.h
index de4b463..c3a62ec 100644
--- a/src/interface-gtk/gtk-info-popup.h
+++ b/src/interface-gtk/gtk-info-popup.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/interface-gtk/gtk-label.c b/src/interface-gtk/gtk-label.c
index c1f4867..50cd345 100644
--- a/src/interface-gtk/gtk-label.c
+++ b/src/interface-gtk/gtk-label.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/interface-gtk/gtk-label.h b/src/interface-gtk/gtk-label.h
index d2e2314..bed6642 100644
--- a/src/interface-gtk/gtk-label.h
+++ b/src/interface-gtk/gtk-label.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c
index 253600a..843ad15 100644
--- a/src/interface-gtk/interface.c
+++ b/src/interface-gtk/interface.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -279,7 +279,8 @@ teco_interface_init(void)
"type-label");
gtk_header_bar_pack_start(GTK_HEADER_BAR(teco_interface.info_bar_widget),
teco_interface.info_type_widget);
- if (teco_interface.xembed_id || teco_interface.no_csd) {
+ if (teco_interface.xembed_id || teco_interface.no_csd ||
+ !g_strcmp0(g_getenv("GTK_CSD"), "0")) {
/* fall back to adding the info bar as an ordinary widget */
gtk_box_pack_start(GTK_BOX(vbox), teco_interface.info_bar_widget,
FALSE, FALSE, 0);
@@ -390,12 +391,6 @@ teco_interface_init(void)
GOptionGroup *
teco_interface_get_options(void)
{
- /*
- * FIXME: On platforms where you want to disable CSD, you usually
- * want to disable it always, so it should be configurable in the SciTECO
- * profile.
- * On the other hand, you could just install gtk3-nocsd.
- */
static const GOptionEntry entries[] = {
{"no-csd", 0, G_OPTION_FLAG_IN_MAIN,
G_OPTION_ARG_NONE, &teco_interface.no_csd,
@@ -656,15 +651,46 @@ teco_interface_get_selection_by_name(const gchar *name)
return gdk_atom_intern(name, FALSE);
}
+static void
+teco_interface_clipboard_provide(GtkClipboard *clipboard, GtkSelectionData *selection, guint info, gpointer userdata)
+{
+ GString *str = userdata;
+ gtk_selection_data_set_text(selection, str->str, str->len);
+}
+
+static void
+teco_interface_clipboard_clear(GtkClipboard *clipboard, gpointer userdata)
+{
+ GString *str = userdata;
+ g_string_free(str, TRUE);
+}
+
gboolean
teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len, GError **error)
{
+ static const GtkTargetEntry target = {"UTF8_STRING", 0, 0};
GtkClipboard *clipboard = gtk_clipboard_get(teco_interface_get_selection_by_name(name));
+ if (!str) {
+ gtk_clipboard_clear(clipboard);
+ return TRUE;
+ }
+
/*
- * NOTE: function has compatible semantics for str_len < 0.
+ * NOTE: gtk_clipboard_set_text() would ignore embedded nulls,
+ * even though it takes a length.
+ * We could theoretically avoid one allocation, but don't yet have proper types
+ * to store string data with length in one heap object.
*/
- gtk_clipboard_set_text(clipboard, str, str_len);
+ GString *gstr = g_string_new_len(str, str_len);
+ if (!gtk_clipboard_set_with_data(clipboard, &target, 1,
+ teco_interface_clipboard_provide,
+ teco_interface_clipboard_clear, gstr)) {
+ g_string_free(gstr, TRUE);
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD,
+ "Cannot set clipboard");
+ return FALSE;
+ }
return TRUE;
}
@@ -674,16 +700,28 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError
{
GtkClipboard *clipboard = gtk_clipboard_get(teco_interface_get_selection_by_name(name));
/*
- * Could return NULL for an empty clipboard.
+ * gtk_clipboard_wait_for_text() does not return the text length,
+ * so it doesn't work with embedded nulls.
+ * gtk_clipboard_wait_for_contents() could also return NULL for empty clipboards.
*
- * FIXME: This converts to UTF8 and we loose the ability
- * to get clipboard with embedded nulls.
+ * NOTE: This also drives the main event loop,
+ * which should be safe (see teco_interface_key_pressed_cb()).
*/
- g_autofree gchar *contents = gtk_clipboard_wait_for_text(clipboard);
+ GdkAtom utf8_string = gdk_atom_intern_static_string("UTF8_STRING");
+ g_autoptr(GtkSelectionData) contents = gtk_clipboard_wait_for_contents(clipboard, utf8_string);
+ if (!contents) {
+ *len = 0;
+ if (str)
+ *str = NULL;
+ return TRUE;
+ }
- *len = contents ? strlen(contents) : 0;
- if (str)
- *str = g_steal_pointer(&contents);
+ *len = gtk_selection_data_get_length(contents);
+ if (str) {
+ /* gtk_selection_data_get_text() does not work with embedded nulls */
+ *str = memcpy(g_malloc(*len+1), gtk_selection_data_get_data(contents), *len);
+ (*str)[*len] = '\0';
+ }
return TRUE;
}
@@ -881,19 +919,50 @@ teco_interface_cmdline_commit_cb(GtkIMContext *context, gchar *str, gpointer use
{
g_autoptr(GError) error = NULL;
- /*
- * FIXME: This is only for consistency as long as we
- * do not support Unicode.
- */
- for (char *p = str; *p != '\0'; p = g_utf8_next_char(p))
- if (g_utf8_get_char(p) >= 0x80)
- return;
-
if (!teco_cmdline_keypress(str, strlen(str), &error) &&
g_error_matches(error, TECO_ERROR, TECO_ERROR_QUIT))
gtk_main_quit();
}
+/**
+ * Try to find an ANSI (latin) key for a given keypress.
+ *
+ * If the given key press does not generate a key from the ANSI
+ * range, it tries to find one in another group.
+ *
+ * @param event Key event to look up. In case of success,
+ * this event structure might also be written to.
+ * @return The codepoint of the ANSI version or 0 if there is
+ * no fitting ANSI/latin key.
+ */
+static gchar
+teco_interface_get_ansi_key(GdkEventKey *event)
+{
+ gunichar cp = gdk_keyval_to_unicode(event->keyval);
+ if (cp && cp < 0x80)
+ return cp;
+
+ GdkKeymap *map = gdk_keymap_get_for_display(gdk_window_get_display(event->window));
+ g_autofree GdkKeymapKey *keys = NULL;
+ g_autofree guint *keyvals = NULL;
+ gint n_entries = 0;
+
+ gdk_keymap_get_entries_for_keycode(map, event->hardware_keycode,
+ &keys, &keyvals, &n_entries);
+ for (gint i = 0; i < n_entries; i++) {
+ g_assert(keys[i].keycode == event->hardware_keycode);
+ cp = gdk_keyval_to_unicode(keyvals[i]);
+ if (cp && cp < 0x80 &&
+ gdk_keyval_is_upper(keyvals[i]) == gdk_keyval_is_upper(event->keyval)) {
+ event->keyval = keyvals[i];
+ event->group = keys[i].group;
+ return cp;
+ }
+ }
+
+ return 0;
+}
+
static gboolean
teco_interface_handle_key_press(GdkEventKey *event, GError **error)
{
@@ -901,19 +970,19 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error)
switch (event->keyval) {
case GDK_KEY_Escape:
- if (!teco_cmdline_keypress_c('\e', error))
+ if (!teco_cmdline_keymacro_c('\e', error))
return FALSE;
break;
case GDK_KEY_BackSpace:
- if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'), error))
+ if (!teco_cmdline_keymacro_c(TECO_CTL_KEY('H'), error))
return FALSE;
break;
case GDK_KEY_Tab:
- if (!teco_cmdline_keypress_c('\t', error))
+ if (!teco_cmdline_keymacro_c('\t', error))
return FALSE;
break;
case GDK_KEY_Return:
- if (!teco_cmdline_keypress_c('\n', error))
+ if (!teco_cmdline_keymacro_c('\n', error))
return FALSE;
break;
@@ -922,12 +991,12 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error)
*/
#define FN(KEY, MACRO) \
case GDK_KEY_##KEY: \
- if (!teco_cmdline_fnmacro(#MACRO, error)) \
+ if (!teco_cmdline_keymacro(#MACRO, -1, error)) \
return FALSE; \
break
#define FNS(KEY, MACRO) \
case GDK_KEY_##KEY: \
- if (!teco_cmdline_fnmacro(event->state & GDK_SHIFT_MASK ? "S" #MACRO : #MACRO, error)) \
+ if (!teco_cmdline_keymacro(event->state & GDK_SHIFT_MASK ? "S" #MACRO : #MACRO, -1, error)) \
return FALSE; \
break
FN(Down, DOWN); FN(Up, UP);
@@ -939,8 +1008,8 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error)
gchar macro_name[3+1];
g_snprintf(macro_name, sizeof(macro_name),
- "F%d", event->keyval - GDK_KEY_F1 + 1);
- if (!teco_cmdline_fnmacro(macro_name, error))
+ "F%d", event->keyval - GDK_KEY_F1 + 1);
+ if (!teco_cmdline_keymacro(macro_name, -1, error))
return FALSE;
break;
}
@@ -960,33 +1029,72 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error)
/*
* Control keys and keys with printable representation
*/
- default: {
- gunichar u = gdk_keyval_to_unicode(event->keyval);
+ default:
+ /*
+ * NOTE: Alt-Gr key-combinations are sometimes reported as
+ * Ctrl+Alt, so we filter those out.
+ */
+ if ((event->state & (GDK_CONTROL_MASK | GDK_MOD1_MASK)) == GDK_CONTROL_MASK) {
+ gchar c = teco_interface_get_ansi_key(event);
+ if (c) {
+ if (!teco_cmdline_keymacro_c(TECO_CTL_KEY(g_ascii_toupper(c)), error))
+ return FALSE;
+ break;
+ }
+ }
- if (u && u < 0x80 && (event->state & (GDK_CONTROL_MASK | GDK_MOD1_MASK)) == GDK_CONTROL_MASK) {
- /*
- * NOTE: Alt-Gr key-combinations are sometimes reported as
- * Ctrl+Alt, so we filter those out.
- */
- if (!teco_cmdline_keypress_c(TECO_CTL_KEY(g_ascii_toupper(u)), error))
+ /*
+ * First look up a key macro.
+ * Only if it's undefined, we try to automatically find an ANSI key.
+ * On the downside, this means we cannot define key macros for dead keys
+ * or keys that require some sort of input method editing.
+ *
+ * FIXME: This might be a good reason to be able to disable the
+ * automatic ANSIfication, as we could look up the key macro in
+ * teco_interface_cmdline_commit_cb().
+ */
+ gunichar cp = gdk_keyval_to_unicode(event->keyval);
+ if (cp) {
+ char buf[6];
+ gsize len = g_unichar_to_utf8(cp, buf);
+ teco_keymacro_status_t rc = teco_cmdline_keymacro(buf, len, error);
+ if (rc == TECO_KEYMACRO_ERROR)
return FALSE;
- } else {
- /*
- * This is necessary to handle dead keys and in the future
- * for inputting Asian languages.
- *
- * FIXME: We do not yet support preediting.
- * It would be easier to forward the event to the Scintilla
- * widget and use its existing IM support.
- * But this breaks the event freezing and results in flickering.
- */
- gtk_im_context_filter_keypress(teco_interface.input_method, event);
+ if (rc == TECO_KEYMACRO_SUCCESS)
+ break;
+ g_assert(rc == TECO_KEYMACRO_UNDEFINED);
}
- }
+
+ /*
+ * If the current state is case-insensitive, it is a command name -
+ * which consists only of ANSI letters - we try to
+ * accept non-ANSI letters as well.
+ * This means, you don't have to change keyboard layouts
+ * so often.
+ * FIXME: This could be made to work with string-building constructs
+ * within Q-Register specs as well.
+ * Unfortunately, Q-Reg specs and string building can be nested
+ * indefinitely.
+ * This would effectively require a new keymacro_mask_cb().
+ */
+ if ((teco_cmdline.machine.parent.current->keymacro_mask |
+ teco_cmdline.machine.expectstring.machine.parent.current->keymacro_mask) &
+ TECO_KEYMACRO_MASK_CASEINSENSITIVE)
+ teco_interface_get_ansi_key(event);
+
+ /*
+ * This is necessary to handle dead keys and in the future
+ * for inputting Asian languages.
+ *
+ * FIXME: We do not yet support preediting.
+ * It would be easier to forward the event to the Scintilla
+ * widget and use its existing IM support.
+ * But this breaks the event freezing and results in flickering.
+ */
+ gtk_im_context_filter_keypress(teco_interface.input_method, event);
}
teco_interface_refresh(teco_interface_current_view != last_view);
-
return TRUE;
}
@@ -997,7 +1105,7 @@ teco_interface_event_loop(GError **error)
g_assert(scitecoconfig_reg != NULL);
g_auto(teco_string_t) scitecoconfig = {NULL, 0};
if (!scitecoconfig_reg->vtable->get_string(scitecoconfig_reg,
- &scitecoconfig.data, &scitecoconfig.len, error))
+ &scitecoconfig.data, &scitecoconfig.len, NULL, error))
return FALSE;
if (teco_string_contains(&scitecoconfig, '\0')) {
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
@@ -1006,45 +1114,6 @@ teco_interface_event_loop(GError **error)
}
g_assert(scitecoconfig.data != NULL);
-#ifdef G_OS_WIN32
- /*
- * FIXME: This is necessary so that the icon themes are found in the same
- * directory as sciteco.exe.
- * This fails of course when $SCITECOCONFIG is changed.
- * We should perhaps always use the absolute path of sciteco.exe.
- * If you want to install SciTECO differently, you can still set
- * $XDG_DATA_DIRS.
- *
- * FIXME FIXME FIXME: This is also currently broken.
- */
- //g_autofree char *theme_path = g_build_filename(scitecoconfig.data, "icons");
- //gtk_icon_theme_prepend_search_path(gtk_icon_theme_get_default(), theme_path);
-#else
- /*
- * Load icons for the GTK window.
- * This is not necessary on Windows since the icon included
- * as a resource will be used by default.
- */
- static const gchar *icon_files[] = {
- SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-48.png",
- SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-32.png",
- SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-16.png"
- };
- GList *icon_list = NULL;
-
- for (gint i = 0; i < G_N_ELEMENTS(icon_files); i++) {
- GdkPixbuf *icon_pixbuf = gdk_pixbuf_new_from_file(icon_files[i], NULL);
-
- /* fail silently if there's a problem with one of the icons */
- if (icon_pixbuf)
- icon_list = g_list_append(icon_list, icon_pixbuf);
- }
-
- gtk_window_set_default_icon_list(icon_list);
-
- g_list_free_full(icon_list, g_object_unref);
-#endif
-
/*
* Initialize the CSS variable provider and the CSS provider
* for the included fallback.css.
@@ -1087,6 +1156,50 @@ teco_interface_event_loop(GError **error)
/* don't show popup by default */
gtk_widget_hide(teco_interface.popup_widget);
+#ifdef G_OS_WIN32
+ /*
+ * FIXME: This is necessary so that the icon themes are found in the same
+ * directory as sciteco.exe.
+ * This fails of course when $SCITECOCONFIG is changed.
+ * We should perhaps always use the absolute path of sciteco.exe.
+ * If you want to install SciTECO differently, you can still set
+ * $XDG_DATA_DIRS.
+ *
+ * FIXME FIXME FIXME: This is also currently broken.
+ */
+ //g_autofree char *theme_path = g_build_filename(scitecoconfig.data, "icons");
+ //gtk_icon_theme_prepend_search_path(gtk_icon_theme_get_default(), theme_path);
+#else
+ /*
+ * Load icons for the GTK window.
+ * This is not necessary on Windows since the icon included
+ * as a resource will be used by default.
+ */
+ static const gchar *icon_files[] = {
+ SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-48.png",
+ SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-32.png",
+ SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-16.png"
+ };
+ GList *icon_list = NULL;
+
+ for (gint i = 0; i < G_N_ELEMENTS(icon_files); i++) {
+ GdkPixbuf *icon_pixbuf = gdk_pixbuf_new_from_file(icon_files[i], NULL);
+
+ /* fail silently if there's a problem with one of the icons */
+ if (icon_pixbuf)
+ icon_list = g_list_append(icon_list, icon_pixbuf);
+ }
+
+ /*
+ * The position of this call after gtk_widget_show() is important, so that
+ * tabbed and other Xembed hosts can pick up the icon.
+ * They also do not pick up the icon if set via gtk_window_set_default_icon_list().
+ */
+ gtk_window_set_icon_list(GTK_WINDOW(teco_interface.window), icon_list);
+
+ g_list_free_full(icon_list, g_object_unref);
+#endif
+
/*
* SIGTERM emulates the "Close" key just like when
* closing the window if supported by this version of glib.
diff --git a/src/interface.c b/src/interface.c
index a2042db..2e2d64e 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/interface.h b/src/interface.h
index 3170849..32db6b5 100644
--- a/src/interface.h
+++ b/src/interface.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -154,6 +154,36 @@ void teco_interface_process_notify(SCNotification *notify);
/** @pure */
void teco_interface_cleanup(void);
+static inline guint
+teco_interface_get_codepage(void)
+{
+ return teco_view_get_codepage(teco_interface_current_view);
+}
+
+static inline gssize
+teco_interface_glyphs2bytes(teco_int_t pos)
+{
+ return teco_view_glyphs2bytes(teco_interface_current_view, pos);
+}
+
+static inline teco_int_t
+teco_interface_bytes2glyphs(gsize pos)
+{
+ return teco_view_bytes2glyphs(teco_interface_current_view, pos);
+}
+
+static inline gssize
+teco_interface_glyphs2bytes_relative(gsize pos, teco_int_t n)
+{
+ return teco_view_glyphs2bytes_relative(teco_interface_current_view, pos, n);
+}
+
+static inline teco_int_t
+teco_interface_get_character(gsize pos, gsize len)
+{
+ return teco_view_get_character(teco_interface_current_view, pos, len);
+}
+
/*
* The following functions are here for lack of a better place.
* They could also be in sciteco.h, but only if declared as non-inline
@@ -161,12 +191,6 @@ void teco_interface_cleanup(void);
*/
static inline gboolean
-teco_validate_pos(teco_int_t n)
-{
- return 0 <= n && n <= teco_interface_ssm(SCI_GETLENGTH, 0, 0);
-}
-
-static inline gboolean
teco_validate_line(teco_int_t n)
{
return 0 <= n && n < teco_interface_ssm(SCI_GETLINECOUNT, 0, 0);
diff --git a/src/list.h b/src/list.h
index 7446fcc..156d4a7 100644
--- a/src/list.h
+++ b/src/list.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/main.c b/src/main.c
index 467eb72..eb3c0b4 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -23,6 +23,7 @@
#include <string.h>
#include <stdlib.h>
#include <signal.h>
+#include <locale.h>
#include <glib.h>
#include <glib/gprintf.h>
@@ -104,9 +105,10 @@ teco_get_default_config_path(const gchar *program)
static gchar *teco_eval_macro = NULL;
static gboolean teco_mung_file = FALSE;
static gboolean teco_mung_profile = TRUE;
+static gboolean teco_8bit_clean = FALSE;
static gchar *
-teco_process_options(gint *argc, gchar ***argv)
+teco_process_options(gchar ***argv)
{
static const GOptionEntry option_entries[] = {
{"eval", 'e', 0, G_OPTION_ARG_STRING, &teco_eval_macro,
@@ -119,6 +121,8 @@ teco_process_options(gint *argc, gchar ***argv)
"Do not mung "
"$SCITECOCONFIG" G_DIR_SEPARATOR_S INI_FILE " "
"even if it exists"},
+ {"8bit", '8', 0, G_OPTION_ARG_NONE, &teco_8bit_clean,
+ "Use ANSI encoding by default and disable automatic EOL conversion"},
{NULL}
};
@@ -133,7 +137,7 @@ teco_process_options(gint *argc, gchar ***argv)
g_option_context_set_description(
options,
"Bug reports should go to <" PACKAGE_BUGREPORT "> or "
- "<" PACKAGE_URL_DEV ">."
+ "<" PACKAGE_URL ">."
);
g_option_context_add_main_entries(options, option_entries, NULL);
@@ -155,7 +159,7 @@ teco_process_options(gint *argc, gchar ***argv)
*/
g_option_context_set_strict_posix(options, TRUE);
- if (!g_option_context_parse(options, argc, argv, &error)) {
+ if (!g_option_context_parse_strv(options, argv, &error)) {
g_fprintf(stderr, "Option parsing failed: %s\n",
error->message);
exit(EXIT_FAILURE);
@@ -170,16 +174,13 @@ teco_process_options(gint *argc, gchar ***argv)
* and "--" is not the first non-option argument as in
* sciteco foo -- -C bar.
*/
- if (*argc >= 2 && !strcmp((*argv)[1], "--")) {
- (*argv)[1] = (*argv)[0];
- (*argv)++;
- (*argc)--;
- }
+ if ((*argv)[0] && !g_strcmp0((*argv)[1], "--"))
+ g_free(teco_strv_remove(*argv, 1));
gchar *mung_filename = NULL;
if (teco_mung_file) {
- if (*argc < 2) {
+ if (!(*argv)[0] || !(*argv)[1]) {
g_fprintf(stderr, "Script to mung expected!\n");
exit(EXIT_FAILURE);
}
@@ -190,11 +191,7 @@ teco_process_options(gint *argc, gchar ***argv)
exit(EXIT_FAILURE);
}
- mung_filename = g_strdup((*argv)[1]);
-
- (*argv)[1] = (*argv)[0];
- (*argv)++;
- (*argc)--;
+ mung_filename = teco_strv_remove(*argv, 1);
}
return mung_filename;
@@ -306,12 +303,39 @@ main(int argc, char **argv)
signal(SIGINT, teco_sigint_handler);
signal(SIGTERM, teco_sigint_handler);
- g_autofree gchar *mung_filename = teco_process_options(&argc, &argv);
+ /*
+ * Important for Unicode handling in curses and glib.
+ * In particular, in order to accept Unicode characters
+ * in option strings.
+ *
+ * NOTE: Windows 10 accepts ".UTF8" here, so the "ANSI"
+ * versions of win32 API functions accept UTF-8.
+ * We want to support older versions, though and
+ * glib happily converts to Windows' native UTF-16.
+ */
+ setlocale(LC_ALL, "");
+
+#ifdef G_OS_WIN32
+ /*
+ * main()'s argv is in the system locale, so we might loose
+ * information when passing it to g_option_context_parse().
+ * The remaining strings are also not guaranteed to be in
+ * UTF-8.
+ */
+ g_auto(GStrv) argv_utf8 = g_win32_get_command_line();
+#else
+ g_auto(GStrv) argv_utf8 = g_strdupv(argv);
+#endif
+ g_autofree gchar *mung_filename = teco_process_options(&argv_utf8);
/*
* All remaining arguments in argv are arguments
* to the macro or munged file.
*/
+ if (teco_8bit_clean)
+ /* equivalent to 16,4ED but executed earlier */
+ teco_ed = (teco_ed & ~TECO_ED_AUTOEOL) | TECO_ED_DEFAULT_ANSI;
+
/*
* Theoretically, QReg tables should only be initialized
* after the interface, since they contain Scintilla documents.
@@ -343,7 +367,7 @@ main(int argc, char **argv)
/* current working directory ("$") */
teco_qreg_table_insert(&teco_qreg_table_globals, teco_qreg_workingdir_new());
/* environment defaults and registers */
- teco_initialize_environment(argv[0]);
+ teco_initialize_environment(argv_utf8[0]);
teco_qreg_table_t local_qregs;
teco_qreg_table_init(&local_qregs, TRUE);
@@ -361,8 +385,8 @@ main(int argc, char **argv)
* Also, the Unnamed Buffer should be kept empty for piping.
* Therefore, it would be best to store the arguments in Q-Regs, e.g. $0,$1,$2...
*/
- for (gint i = 1; i < argc; i++) {
- teco_interface_ssm(SCI_APPENDTEXT, strlen(argv[i]), (sptr_t)argv[i]);
+ for (gint i = 1; argv_utf8[i]; i++) {
+ teco_interface_ssm(SCI_APPENDTEXT, strlen(argv_utf8[i]), (sptr_t)argv_utf8[i]);
teco_interface_ssm(SCI_APPENDTEXT, 1, (sptr_t)"\n");
}
diff --git a/src/memory.c b/src/memory.c
index 6d7645c..26cde55 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -288,7 +288,7 @@
* Current memory usage.
* Access must be synchronized using atomic operations.
*/
-static gint teco_memory_usage = 0;
+static guint teco_memory_usage = 0;
/*
* NOTE: This implementation based on malloc_usable_size() might
@@ -495,6 +495,16 @@ teco_memory_get_usage(void)
return procstk.ki_rssize * page_size;
}
+/**
+ * Options passed to jemalloc.
+ *
+ * It's crucial to disable opt.retain, so that freeing memory after
+ * recovering from memory limit hits actually decreases the RSS.
+ * The reasons for activating the option, mentioned in jemalloc(3),
+ * shouldn't be relevant on FreeBSD.
+ */
+const gchar *malloc_conf = "retain:false";
+
#define NEED_POLL_THREAD
#elif defined(G_OS_UNIX) && defined(HAVE_SYSCONF) && defined(HAVE_PROCFS)
@@ -628,8 +638,7 @@ teco_memory_stop_limiting(void)
g_mutex_unlock(&teco_memory_mutex);
}
-#ifndef NDEBUG
-static void __attribute__((destructor))
+static void TECO_DEBUG_CLEANUP
teco_memory_cleanup(void)
{
if (!teco_memory_thread)
@@ -642,7 +651,6 @@ teco_memory_cleanup(void)
g_thread_join(teco_memory_thread);
}
-#endif
#else /* !NEED_POLL_THREAD */
@@ -660,7 +668,7 @@ gsize teco_memory_limit = 500*1000*1000;
gboolean
teco_memory_set_limit(gsize new_limit, GError **error)
{
- gsize memory_usage = g_atomic_int_get(&teco_memory_usage);
+ gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage);
if (G_UNLIKELY(new_limit && memory_usage > new_limit)) {
g_autofree gchar *usage_str = g_format_size(memory_usage);
@@ -693,18 +701,19 @@ teco_memory_set_limit(gsize new_limit, GError **error)
gboolean
teco_memory_check(gsize request, GError **error)
{
- gsize memory_usage = g_atomic_int_get(&teco_memory_usage) + request;
+ gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage);
+ gsize requested_memory_usage = memory_usage+request;
/*
* Check for overflows.
* NOTE: Glib 2.48 has g_size_checked_add().
*/
- if (G_UNLIKELY(memory_usage < request))
+ if (G_UNLIKELY(requested_memory_usage < memory_usage))
/* guaranteed to fail if memory limiting is enabled */
- memory_usage = G_MAXSIZE;
+ requested_memory_usage = G_MAXSIZE;
- if (G_UNLIKELY(teco_memory_limit && memory_usage >= teco_memory_limit)) {
- g_autofree gchar *limit_str = g_format_size(memory_usage);
+ if (G_UNLIKELY(teco_memory_limit && requested_memory_usage >= teco_memory_limit)) {
+ g_autofree gchar *limit_str = g_format_size(requested_memory_usage);
g_set_error(error, TECO_ERROR, TECO_ERROR_MEMLIMIT,
"Memory limit (%s) exceeded. See <EJ> command.",
diff --git a/src/memory.h b/src/memory.h
index f31a451..39f8319 100644
--- a/src/memory.h
+++ b/src/memory.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/parser.c b/src/parser.c
index 910fc7f..b1aa06e 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -52,16 +52,14 @@ teco_loop_stack_init(void)
TECO_DEFINE_ARRAY_UNDO_INSERT_VAL(teco_loop_stack, teco_loop_context_t);
TECO_DEFINE_ARRAY_UNDO_REMOVE_INDEX(teco_loop_stack);
-#ifndef NDEBUG
-static void __attribute__((destructor))
+static void TECO_DEBUG_CLEANUP
teco_loop_stack_cleanup(void)
{
g_array_free(teco_loop_stack, TRUE);
}
-#endif
gboolean
-teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error)
+teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error)
{
teco_state_t *next = ctx->current->input_cb(ctx, chr, error);
if (!next)
@@ -88,18 +86,22 @@ teco_state_end_of_macro(teco_machine_t *ctx, GError **error)
}
/**
+ * Execute macro from current PC to stop position.
+ *
* Handles all expected exceptions and preparing them for stack frame insertion.
+ *
+ * @param ctx State machine.
+ * @param macro The macro to execute.
+ * It does not have to be complete.
+ * It must consist only of validated UTF-8 sequences, though.
+ * @param stop_pos Where to stop execution in bytes.
+ * @param error Location to store error.
+ * @return FALSE if an error occurred.
*/
gboolean
-teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_pos, GError **error)
+teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gsize stop_pos, GError **error)
{
while (ctx->macro_pc < stop_pos) {
-#ifdef DEBUG
- g_printf("EXEC(%d): input='%c'/%x, state=%p, mode=%d\n",
- ctx->macro_pc, macro[ctx->macro_pc], macro[ctx->macro_pc],
- ctx->parent.current, ctx->mode);
-#endif
-
if (G_UNLIKELY(teco_interface_is_interrupted())) {
teco_error_interrupted_set(error);
goto error_attach;
@@ -112,9 +114,18 @@ teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_p
if (!teco_memory_check(0, error))
goto error_attach;
- if (!teco_machine_input(&ctx->parent, macro[ctx->macro_pc], error))
+ /* UTF-8 sequences are already validated */
+ gunichar chr = g_utf8_get_char(macro+ctx->macro_pc);
+
+#ifdef DEBUG
+ g_printf("EXEC(%d): input='%C' (U+%04" G_GINT32_MODIFIER "X), state=%p, mode=%d\n",
+ ctx->macro_pc, chr, chr, ctx->parent.current, ctx->mode);
+#endif
+
+ if (!teco_machine_input(&ctx->parent, chr, error))
goto error_attach;
- ctx->macro_pc++;
+
+ ctx->macro_pc = g_utf8_next_char(macro+ctx->macro_pc) - macro;
}
/*
@@ -146,6 +157,14 @@ gboolean
teco_execute_macro(const gchar *macro, gsize macro_len,
teco_qreg_table_t *qreg_table_locals, GError **error)
{
+ const teco_string_t str = {(gchar *)macro, macro_len};
+
+ if (!teco_string_validate_utf8(&str)) {
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Invalid UTF-8 byte sequence in macro");
+ return FALSE;
+ }
+
/*
* This is not auto-cleaned up, so it can be initialized
* on demand.
@@ -311,26 +330,26 @@ teco_machine_main_eval_colon(teco_machine_main_t *ctx)
teco_state_t *
teco_machine_main_transition_input(teco_machine_main_t *ctx,
teco_machine_main_transition_t *transitions,
- guint len, gchar chr, GError **error)
+ guint len, gunichar chr, GError **error)
{
- if (chr < 0 || chr >= len || !transitions[(guint)chr].next) {
+ if (chr >= len || !transitions[chr].next) {
teco_error_syntax_set(error, chr);
return NULL;
}
- if (ctx->mode == TECO_MODE_NORMAL && transitions[(guint)chr].transition_cb) {
+ if (ctx->mode == TECO_MODE_NORMAL && transitions[chr].transition_cb) {
/*
* NOTE: We could also just let transition_cb return a boolean...
*/
GError *tmp_error = NULL;
- transitions[(guint)chr].transition_cb(ctx, &tmp_error);
+ transitions[chr].transition_cb(ctx, &tmp_error);
if (tmp_error) {
g_propagate_error(error, tmp_error);
return NULL;
}
}
- return transitions[(guint)chr].next;
+ return transitions[chr].next;
}
void
@@ -340,15 +359,40 @@ teco_machine_main_clear(teco_machine_main_t *ctx)
teco_machine_stringbuilding_clear(&ctx->expectstring.machine);
}
+/** Append string to result with case folding. */
+static void
+teco_machine_stringbuilding_append(teco_machine_stringbuilding_t *ctx, const gchar *str, gsize len)
+{
+ g_assert(ctx->result != NULL);
+
+ switch (ctx->mode) {
+ case TECO_STRINGBUILDING_MODE_NORMAL:
+ teco_string_append(ctx->result, str, len);
+ break;
+ case TECO_STRINGBUILDING_MODE_UPPER: {
+ g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8
+ ? g_utf8_strup(str, len) : g_ascii_strup(str, len);
+ teco_string_append(ctx->result, folded, strlen(folded));
+ break;
+ }
+ case TECO_STRINGBUILDING_MODE_LOWER: {
+ g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8
+ ? g_utf8_strdown(str, len) : g_ascii_strdown(str, len);
+ teco_string_append(ctx->result, folded, strlen(folded));
+ break;
+ }
+ }
+}
+
/*
* FIXME: All teco_state_stringbuilding_* states could be static?
*/
static teco_state_t *teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx,
- gchar chr, GError **error);
+ gunichar chr, GError **error);
TECO_DECLARE_STATE(teco_state_stringbuilding_ctl);
static teco_state_t *teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx,
- gchar chr, GError **error);
+ gunichar chr, GError **error);
TECO_DECLARE_STATE(teco_state_stringbuilding_escaped);
TECO_DECLARE_STATE(teco_state_stringbuilding_lower);
@@ -362,19 +406,29 @@ TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_quote);
TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_n);
static teco_state_t *
-teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
- if (chr == '^')
+ switch (chr) {
+ case '^':
return &teco_state_stringbuilding_ctl;
- if (TECO_IS_CTL(chr))
- return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error);
+ case TECO_CTL_KEY('^'):
+ /*
+ * Ctrl+^ is inserted verbatim as code 30.
+ * Otherwise it would expand to a single caret
+ * just like caret+caret (^^).
+ */
+ break;
+ default:
+ if (TECO_IS_CTL(chr))
+ return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error);
+ }
return teco_state_stringbuilding_escaped_input(ctx, chr, error);
}
/* in cmdline.c */
gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
- gchar key, GError **error);
+ gunichar key, GError **error);
TECO_DEFINE_STATE(teco_state_stringbuilding_start,
.is_start = TRUE,
@@ -383,12 +437,19 @@ TECO_DEFINE_STATE(teco_state_stringbuilding_start,
);
static teco_state_t *
-teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
chr = teco_ascii_toupper(chr);
switch (chr) {
- case '^': break;
+ case '^':
+ /*
+ * Double-caret expands to a single caret.
+ * Ctrl+^ (30) is handled separately and inserts code 30.
+ * The special handling of the double-caret should perhaps
+ * be abolished altogether.
+ */
+ break;
case 'Q':
case 'R': return &teco_state_stringbuilding_escaped;
case 'V': return &teco_state_stringbuilding_lower;
@@ -398,85 +459,139 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar ch
chr = TECO_CTL_KEY(chr);
}
+ /*
+ * Source code is always in UTF-8, so it does not
+ * make sense to handle ctx->codepage != SC_CP_UTF8
+ * separately.
+ */
if (ctx->result)
- teco_string_append_c(ctx->result, chr);
+ teco_string_append_wc(ctx->result, chr);
return &teco_state_stringbuilding_start;
}
TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl);
static teco_state_t *
-teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
if (!ctx->result)
/* parse-only mode */
return &teco_state_stringbuilding_start;
+ /*
+ * The subtle difference between UTF-8 and single-byte targets
+ * is that we don't try to casefold non-ANSI characters in single-byte mode.
+ */
switch (ctx->mode) {
+ case TECO_STRINGBUILDING_MODE_NORMAL:
+ break;
case TECO_STRINGBUILDING_MODE_UPPER:
- chr = g_ascii_toupper(chr);
+ chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+ ? g_unichar_toupper(chr) : chr;
break;
case TECO_STRINGBUILDING_MODE_LOWER:
- chr = g_ascii_tolower(chr);
- break;
- default:
+ chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+ ? g_unichar_tolower(chr) : chr;
break;
}
- teco_string_append_c(ctx->result, chr);
+ teco_string_append_wc(ctx->result, chr);
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE(teco_state_stringbuilding_escaped);
+/* in cmdline.c */
+gboolean teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
+ gunichar key, GError **error);
+
+TECO_DEFINE_STATE(teco_state_stringbuilding_escaped,
+ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)
+ teco_state_stringbuilding_escaped_process_edit_cmd
+);
static teco_state_t *
-teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_lower_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
if (!ctx->result)
/* parse-only mode */
return &teco_state_stringbuilding_start;
- /*
- * FIXME: This does not handle ^V^V typed with up-carets.
- */
- if (chr == TECO_CTL_KEY('V')) {
+ chr = teco_ascii_toupper(chr);
+
+ if (chr == 'V') {
if (ctx->parent.must_undo)
teco_undo_guint(ctx->mode);
ctx->mode = TECO_STRINGBUILDING_MODE_LOWER;
} else {
- teco_string_append_c(ctx->result, g_ascii_tolower(chr));
+ /* control keys cannot be case folded */
+ teco_string_append_wc(ctx->result, TECO_CTL_KEY(chr));
}
return &teco_state_stringbuilding_start;
}
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_lower_ctl);
+
+static teco_state_t *
+teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
+{
+ if (chr == '^')
+ return &teco_state_stringbuilding_lower_ctl;
+ if (TECO_IS_CTL(chr))
+ return teco_state_stringbuilding_lower_ctl_input(ctx, TECO_CTL_ECHO(chr), error);
+
+ if (ctx->result) {
+ chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+ ? g_unichar_tolower(chr) : chr;
+ teco_string_append_wc(ctx->result, chr);
+ }
+ return &teco_state_stringbuilding_start;
+}
+
TECO_DEFINE_STATE(teco_state_stringbuilding_lower);
static teco_state_t *
-teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_upper_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
if (!ctx->result)
/* parse-only mode */
return &teco_state_stringbuilding_start;
- /*
- * FIXME: This does not handle ^W^W typed with up-carets.
- */
- if (chr == TECO_CTL_KEY('W')) {
+ chr = teco_ascii_toupper(chr);
+
+ if (chr == 'W') {
if (ctx->parent.must_undo)
teco_undo_guint(ctx->mode);
ctx->mode = TECO_STRINGBUILDING_MODE_UPPER;
} else {
- teco_string_append_c(ctx->result, g_ascii_toupper(chr));
+ /* control keys cannot be case folded */
+ teco_string_append_wc(ctx->result, TECO_CTL_KEY(chr));
}
return &teco_state_stringbuilding_start;
}
+TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_upper_ctl);
+
+static teco_state_t *
+teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
+{
+ if (chr == '^')
+ return &teco_state_stringbuilding_upper_ctl;
+ if (TECO_IS_CTL(chr))
+ return teco_state_stringbuilding_upper_ctl_input(ctx, TECO_CTL_ECHO(chr), error);
+
+ if (ctx->result) {
+ chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+ ? g_unichar_toupper(chr) : chr;
+ teco_string_append_wc(ctx->result, chr);
+ }
+ return &teco_state_stringbuilding_start;
+}
+
TECO_DEFINE_STATE(teco_state_stringbuilding_upper);
static teco_state_t *
-teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
teco_state_t *next;
@@ -488,8 +603,10 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar c
case 'N': next = &teco_state_stringbuilding_ctle_n; break;
default:
if (ctx->result) {
- gchar buf[] = {TECO_CTL_KEY('E'), chr};
- teco_string_append(ctx->result, buf, sizeof(buf));
+ /* also makes sure that search patterns can start with ^E */
+ gchar buf[1+6] = {TECO_CTL_KEY('E')};
+ gsize len = g_unichar_to_utf8(chr, buf+1);
+ teco_machine_stringbuilding_append(ctx, buf, 1+len);
}
return &teco_state_stringbuilding_start;
}
@@ -507,7 +624,7 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle);
/* in cmdline.c */
gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
- gchar chr, GError **error);
+ gunichar chr, GError **error);
/**
* @interface TECO_DEFINE_STATE_STRINGBUILDING_QREG
@@ -522,7 +639,7 @@ gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuil
)
static teco_state_t *
-teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
teco_qreg_t *qreg;
@@ -549,7 +666,7 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gch
*/
gchar buffer[TECO_EXPRESSIONS_FORMAT_LEN];
const gchar *num = teco_expressions_format(buffer, value);
- teco_string_append(ctx->result, num, strlen(num));
+ teco_machine_stringbuilding_append(ctx, num, strlen(num));
return &teco_state_stringbuilding_start;
}
@@ -557,7 +674,7 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gch
TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num);
static teco_state_t *
-teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
teco_qreg_t *qreg;
@@ -578,21 +695,51 @@ teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar
teco_int_t value;
if (!qreg->vtable->get_integer(qreg, &value, error))
return NULL;
- if (value < 0 || value > 0xFF) {
- g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len);
- g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
- "Q-Register \"%s\" does not contain a valid character", name_printable);
- return NULL;
+
+ if (ctx->codepage == SC_CP_UTF8) {
+ if (value < 0 || !g_unichar_validate(value))
+ goto error_codepoint;
+ switch (ctx->mode) {
+ case TECO_STRINGBUILDING_MODE_NORMAL:
+ break;
+ case TECO_STRINGBUILDING_MODE_UPPER:
+ value = g_unichar_toupper(value);
+ break;
+ case TECO_STRINGBUILDING_MODE_LOWER:
+ value = g_unichar_tolower(value);
+ break;
+ }
+ teco_string_append_wc(ctx->result, value);
+ } else {
+ if (value < 0 || value > 0xFF)
+ goto error_codepoint;
+ switch (ctx->mode) {
+ case TECO_STRINGBUILDING_MODE_NORMAL:
+ break;
+ case TECO_STRINGBUILDING_MODE_UPPER:
+ value = g_ascii_toupper(value);
+ break;
+ case TECO_STRINGBUILDING_MODE_LOWER:
+ value = g_ascii_tolower(value);
+ break;
+ }
+ teco_string_append_c(ctx->result, value);
}
- teco_string_append_c(ctx->result, (gchar)value);
return &teco_state_stringbuilding_start;
+
+error_codepoint: {
+ g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len);
+ g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Q-Register \"%s\" does not contain a valid codepoint", name_printable);
+ return NULL;
+}
}
TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u);
static teco_state_t *
-teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
teco_qreg_t *qreg;
@@ -610,20 +757,17 @@ teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar
/* parse-only mode */
return &teco_state_stringbuilding_start;
- /*
- * FIXME: Should we have a special teco_qreg_get_string_append() function?
- */
g_auto(teco_string_t) str = {NULL, 0};
- if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error))
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
return NULL;
- teco_string_append(ctx->result, str.data, str.len);
+ teco_machine_stringbuilding_append(ctx, str.data, str.len);
return &teco_state_stringbuilding_start;
}
TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q);
static teco_state_t *
-teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
teco_qreg_t *qreg;
teco_qreg_table_t *table;
@@ -643,7 +787,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g
return &teco_state_stringbuilding_start;
g_auto(teco_string_t) str = {NULL, 0};
- if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error))
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
return NULL;
/*
* NOTE: g_shell_quote() expects a null-terminated string, so it is
@@ -658,7 +802,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g
return NULL;
}
g_autofree gchar *str_quoted = g_shell_quote(str.data ? : "");
- teco_string_append(ctx->result, str_quoted, strlen(str_quoted));
+ teco_machine_stringbuilding_append(ctx, str_quoted, strlen(str_quoted));
return &teco_state_stringbuilding_start;
}
@@ -666,7 +810,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g
TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote);
static teco_state_t *
-teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
teco_qreg_t *qreg;
teco_qreg_table_t *table;
@@ -686,7 +830,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar
return &teco_state_stringbuilding_start;
g_auto(teco_string_t) str = {NULL, 0};
- if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error))
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
return NULL;
if (teco_string_contains(&str, '\0')) {
teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len,
@@ -695,7 +839,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar
}
g_autofree gchar *str_escaped = teco_globber_escape_pattern(str.data);
- teco_string_append(ctx->result, str_escaped, strlen(str_escaped));
+ teco_machine_stringbuilding_append(ctx, str_escaped, strlen(str_escaped));
return &teco_state_stringbuilding_start;
}
@@ -703,13 +847,14 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar
TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n);
void
-teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char,
+teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char,
teco_qreg_table_t *locals, gboolean must_undo)
{
memset(ctx, 0, sizeof(*ctx));
teco_machine_init(&ctx->parent, &teco_state_stringbuilding_start, must_undo);
ctx->escape_char = escape_char;
ctx->qreg_table_locals = locals;
+ ctx->codepage = teco_default_codepage();
}
void
@@ -723,6 +868,10 @@ teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx)
ctx->mode = TECO_STRINGBUILDING_MODE_NORMAL;
}
+/*
+ * If we case folded only ANSI characters as in teco_ascii_toupper(),
+ * this could be simplified.
+ */
void
teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gchar *str, gsize len,
teco_string_t *target)
@@ -730,12 +879,18 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch
target->data = g_malloc(len*2+1);
target->len = 0;
- for (guint i = 0; i < len; i++) {
- if (teco_ascii_toupper(str[i]) == ctx->escape_char ||
- (ctx->escape_char == '[' && str[i] == ']') ||
- (ctx->escape_char == '{' && str[i] == '}'))
+ for (guint i = 0; i < len; ) {
+ gunichar chr = g_utf8_get_char(str+i);
+
+ if (g_unichar_toupper(chr) == ctx->escape_char ||
+ (ctx->escape_char == '[' && chr == ']') ||
+ (ctx->escape_char == '{' && chr == '}'))
target->data[target->len++] = TECO_CTL_KEY('Q');
- target->data[target->len++] = str[i];
+
+ gsize lenc = g_utf8_next_char(str+i) - (str+i);
+ memcpy(target->data+target->len, str+i, lenc);
+ target->len += lenc;
+ i += lenc;
}
target->data[target->len] = '\0';
@@ -748,8 +903,17 @@ teco_machine_stringbuilding_clear(teco_machine_stringbuilding_t *ctx)
teco_machine_qregspec_free(ctx->machine_qregspec);
}
+gboolean
+teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error)
+{
+ if (ctx->mode == TECO_MODE_NORMAL)
+ teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine,
+ teco_default_codepage());
+ return TRUE;
+}
+
teco_state_t *
-teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
teco_state_t *current = ctx->parent.current;
@@ -766,13 +930,18 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
/*
* FIXME: Exclude setting at least whitespace characters as the
* new string escape character to avoid accidental errors?
+ *
+ * FIXME: Should we perhaps restrict case folding escape characters
+ * to the ANSI range (teco_ascii_toupper())?
+ * This would be faster than case folding each and every character
+ * of a string argument to check against the escape char.
*/
switch (ctx->expectstring.machine.escape_char) {
case '\e':
case '{':
if (ctx->parent.must_undo)
- teco_undo_gchar(ctx->expectstring.machine.escape_char);
- ctx->expectstring.machine.escape_char = teco_ascii_toupper(chr);
+ teco_undo_gunichar(ctx->expectstring.machine.escape_char);
+ ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
return current;
}
}
@@ -796,7 +965,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
ctx->expectstring.nesting--;
break;
}
- } else if (teco_ascii_toupper(chr) == ctx->expectstring.machine.escape_char) {
+ } else if (g_unichar_toupper(chr) == ctx->expectstring.machine.escape_char) {
if (ctx->parent.must_undo)
teco_undo_gint(ctx->expectstring.nesting);
ctx->expectstring.nesting--;
@@ -826,7 +995,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
if (current->expectstring.last) {
if (ctx->parent.must_undo)
- teco_undo_gchar(ctx->expectstring.machine.escape_char);
+ teco_undo_gunichar(ctx->expectstring.machine.escape_char);
ctx->expectstring.machine.escape_char = '\e';
}
ctx->expectstring.nesting = 1;
@@ -857,7 +1026,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
if (!teco_machine_stringbuilding_input(&ctx->expectstring.machine, chr, str, error))
return NULL;
} else if (ctx->mode == TECO_MODE_NORMAL) {
- teco_string_append_c(&ctx->expectstring.string, chr);
+ teco_string_append_wc(&ctx->expectstring.string, chr);
}
/*
@@ -901,7 +1070,7 @@ teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str
g_assert(str->data != NULL);
/*
- * Null-chars must not ocur in filename/path strings and at some point
+ * Null-chars must not occur in filename/path strings and at some point
* teco_string_t has to be converted to a null-terminated C string
* as all the glib filename functions rely on null-terminated strings.
* Doing it here ensures that teco_file_expand_path() can be safely called
diff --git a/src/parser.h b/src/parser.h
index 05a9715..066896f 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,6 +16,8 @@
*/
#pragma once
+#include <stdbool.h>
+
#include <glib.h>
#include <Scintilla.h>
@@ -23,6 +25,7 @@
#include "sciteco.h"
#include "string-utils.h"
#include "goto.h"
+#include "undo.h"
#include "qreg.h"
/*
@@ -36,7 +39,9 @@ typedef struct {
/** how many iterations are left */
teco_int_t counter;
/** Program counter of loop start command */
- guint pc : sizeof(guint)*8 - 1;
+ gsize pc;
+ /** Brace level at loop start */
+ guint brace_level : sizeof(guint)*8 - 1;
/**
* Whether the loop represents an argument
* barrier or not (it "passes through"
@@ -46,7 +51,7 @@ typedef struct {
* a signed integer, it's ok steal one
* bit for the pass_through flag.
*/
- gboolean pass_through : 1;
+ bool pass_through : 1;
} teco_loop_context_t;
extern GArray *teco_loop_stack;
@@ -71,8 +76,8 @@ void undo__remove_index__teco_loop_stack(guint);
* FIXME: Maybe use TECO_DECLARE_VTABLE_METHOD()?
*/
typedef const struct {
- gboolean string_building : 1;
- gboolean last : 1;
+ bool string_building : 1;
+ bool last : 1;
/**
* Called repeatedly to process chunks of input and give interactive feedback.
@@ -99,17 +104,18 @@ typedef const struct {
} teco_state_expectqreg_t;
typedef gboolean (*teco_state_initial_cb_t)(teco_machine_t *ctx, GError **error);
-typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gchar chr, GError **error);
+typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gunichar chr, GError **error);
typedef gboolean (*teco_state_refresh_cb_t)(teco_machine_t *ctx, GError **error);
typedef gboolean (*teco_state_end_of_macro_cb_t)(teco_machine_t *ctx, GError **error);
typedef gboolean (*teco_state_process_edit_cmd_cb_t)(teco_machine_t *ctx, teco_machine_t *parent_ctx,
- gchar key, GError **error);
+ gunichar key, GError **error);
typedef enum {
- TECO_FNMACRO_MASK_START = (1 << 0),
- TECO_FNMACRO_MASK_STRING = (1 << 1),
- TECO_FNMACRO_MASK_DEFAULT = ~((1 << 2)-1)
-} teco_fnmacro_mask_t;
+ TECO_KEYMACRO_MASK_START = (1 << 0),
+ TECO_KEYMACRO_MASK_STRING = (1 << 1),
+ TECO_KEYMACRO_MASK_CASEINSENSITIVE = (1 << 2),
+ TECO_KEYMACRO_MASK_DEFAULT = ~((1 << 3)-1)
+} teco_keymacro_mask_t;
/**
* A teco_machine_t state.
@@ -182,19 +188,19 @@ struct teco_state_t {
/**
* Whether this state is a start state (ie. not within any
* escape sequence etc.).
- * This is separate of TECO_FNMACRO_MASK_START which is set
+ * This is separate of TECO_KEYMACRO_MASK_START which is set
* only in the main machine's start states.
*/
- gboolean is_start : 1;
+ bool is_start : 1;
/**
- * Function key macro mask.
+ * Key macro mask.
* This is not a bitmask since it is compared with values set
* from TECO, so the bitorder needs to be defined.
*
* @fixme If we intend to "forward" masks from other state machines like
* teco_machine_stringbuilding_t, this should probably be a callback.
*/
- teco_fnmacro_mask_t fnmacro_mask : 8;
+ teco_keymacro_mask_t keymacro_mask : 8;
/**
* Additional state-dependent callbacks and settings.
@@ -214,7 +220,7 @@ struct teco_state_t {
gboolean teco_state_end_of_macro(teco_machine_t *ctx, GError **error);
/* in cmdline.c */
-gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
/**
* @interface TECO_DEFINE_STATE
@@ -234,7 +240,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent
.end_of_macro_cb = teco_state_end_of_macro, \
.process_edit_cmd_cb = teco_state_process_edit_cmd, \
.is_start = FALSE, \
- .fnmacro_mask = TECO_FNMACRO_MASK_DEFAULT, \
+ .keymacro_mask = TECO_KEYMACRO_MASK_DEFAULT, \
##__VA_ARGS__ \
}
@@ -243,20 +249,21 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent
extern teco_state_t NAME
/* in cmdline.c */
-gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
/**
* @interface TECO_DEFINE_STATE_CASEINSENSITIVE
* @implements TECO_DEFINE_STATE
* @ingroup states
*
- * Base class of states with case-insenstive input.
+ * Base class of states with case-insensitive input.
*
* This is meant for states accepting command characters
* that can possibly be case-folded.
*/
#define TECO_DEFINE_STATE_CASEINSENSITIVE(NAME, ...) \
TECO_DEFINE_STATE(NAME, \
+ .keymacro_mask = TECO_KEYMACRO_MASK_CASEINSENSITIVE, \
.process_edit_cmd_cb = teco_state_caseinsensitive_process_edit_cmd, \
##__VA_ARGS__ \
)
@@ -278,6 +285,8 @@ struct teco_machine_t {
* Whether side effects must be reverted on rubout.
* State machines created within macro calls don't have to
* even in interactive mode.
+ * In fact you MUST not revert side effects if this is FALSE
+ * as the data no longer exists on the call stack at undo-time.
*/
gboolean must_undo;
};
@@ -296,7 +305,7 @@ teco_machine_reset(teco_machine_t *ctx, teco_state_t *initial)
teco_undo_ptr(ctx->current) = initial;
}
-gboolean teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error);
+gboolean teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error);
typedef enum {
TECO_STRINGBUILDING_MODE_NORMAL = 0,
@@ -307,9 +316,6 @@ typedef enum {
/**
* A stringbuilding state machine.
*
- * @fixme Should contain the escape char (currently in teco_machine_expectstring_t),
- * so that we can escape it via ^Q.
- *
* @extends teco_machine_t
*/
typedef struct teco_machine_stringbuilding_t {
@@ -327,7 +333,7 @@ typedef struct teco_machine_stringbuilding_t {
* If this is `[` or `{`, it is assumed that `]` and `}` must
* be escaped as well by teco_machine_stringbuilding_escape().
*/
- gchar escape_char;
+ gunichar escape_char;
/**
* Q-Register table for local registers.
@@ -348,11 +354,28 @@ typedef struct teco_machine_stringbuilding_t {
* (see teco_state_stringbuilding_start_process_edit_cmd()).
*/
teco_string_t *result;
+
+ /**
+ * Encoding of string in `result`.
+ * This is inherited from the embedding command and may depend on
+ * the buffer's or Q-Register's encoding.
+ */
+ guint codepage;
} teco_machine_stringbuilding_t;
-void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char,
+void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char,
teco_qreg_table_t *locals, gboolean must_undo);
+static inline void
+teco_machine_stringbuilding_set_codepage(teco_machine_stringbuilding_t *ctx,
+ guint codepage)
+{
+ /* NOTE: This is not safe to undo in macro calls. */
+ if (ctx->parent.must_undo)
+ teco_undo_guint(ctx->codepage);
+ ctx->codepage = codepage;
+}
+
void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx);
/**
@@ -365,7 +388,7 @@ void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx);
* @return FALSE in case of error.
*/
static inline gboolean
-teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gchar chr,
+teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gunichar chr,
teco_string_t *result, GError **error)
{
ctx->result = result;
@@ -424,7 +447,8 @@ typedef enum {
struct teco_machine_main_t {
teco_machine_t parent;
- gint macro_pc;
+ /* signed because it is sometimes set to -1 for flow control */
+ gssize macro_pc;
/**
* Aliases bitfield with an integer.
@@ -435,8 +459,8 @@ struct teco_machine_main_t {
struct {
teco_mode_t mode : 8;
- gboolean modifier_colon : 1;
- gboolean modifier_at : 1;
+ bool modifier_colon : 1;
+ bool modifier_at : 1;
};
guint __flags;
};
@@ -481,7 +505,7 @@ void teco_machine_main_init(teco_machine_main_t *ctx,
gboolean teco_machine_main_eval_colon(teco_machine_main_t *ctx);
gboolean teco_machine_main_step(teco_machine_main_t *ctx,
- const gchar *macro, gint stop_pos, GError **error);
+ const gchar *macro, gsize stop_pos, GError **error);
gboolean teco_execute_macro(const gchar *macro, gsize macro_len,
teco_qreg_table_t *qreg_table_locals, GError **error);
@@ -500,17 +524,18 @@ typedef const struct {
*/
teco_state_t *teco_machine_main_transition_input(teco_machine_main_t *ctx,
teco_machine_main_transition_t *transitions,
- guint len, gchar chr, GError **error);
+ guint len, gunichar chr, GError **error);
void teco_machine_main_clear(teco_machine_main_t *ctx);
G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(teco_machine_main_t, teco_machine_main_clear);
-teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error);
+gboolean teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error);
+teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error);
gboolean teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error);
/* in cmdline.c */
-gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
/**
* @interface TECO_DEFINE_STATE_EXPECTSTRING
@@ -526,15 +551,16 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco
*/
#define TECO_DEFINE_STATE_EXPECTSTRING(NAME, ...) \
static teco_state_t * \
- NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \
+ NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \
{ \
return teco_state_expectstring_input(ctx, chr, error); \
} \
TECO_DEFINE_STATE(NAME, \
+ .initial_cb = (teco_state_initial_cb_t)teco_state_expectstring_initial, \
.refresh_cb = (teco_state_refresh_cb_t)teco_state_expectstring_refresh, \
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \
teco_state_expectstring_process_edit_cmd, \
- .fnmacro_mask = TECO_FNMACRO_MASK_STRING, \
+ .keymacro_mask = TECO_KEYMACRO_MASK_STRING, \
.expectstring.string_building = TRUE, \
.expectstring.last = TRUE, \
.expectstring.process_cb = NULL, /* do nothing */ \
@@ -546,7 +572,7 @@ gboolean teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_stri
gsize new_chars, GError **error);
/* in cmdline.c */
-gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
/**
* @interface TECO_DEFINE_STATE_EXPECTFILE
@@ -562,7 +588,7 @@ gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_m
)
/* in cmdline.c */
-gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
/**
* @interface TECO_DEFINE_STATE_EXPECTDIR
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index be0aada..cff4c84 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -50,7 +50,7 @@ teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error)
}
teco_state_t *
-teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
teco_state_t *current = ctx->parent.current;
@@ -149,7 +149,7 @@ teco_state_loadqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr
if (str->len > 0) {
/* Load file into Q-Register */
g_autofree gchar *filename = teco_file_expand_path(str->data);
- if (!teco_qreg_load(qreg, filename, error))
+ if (!qreg->vtable->load(qreg, filename, error))
return NULL;
} else {
/* Edit Q-Register */
@@ -202,7 +202,7 @@ teco_state_saveqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr
return &teco_state_start;
g_autofree gchar *filename = teco_file_expand_path(str->data);
- return teco_qreg_save(qreg, filename, error) ? &teco_state_start : NULL;
+ return qreg->vtable->save(qreg, filename, error) ? &teco_state_start : NULL;
}
/*$ E% E%q
@@ -259,9 +259,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
if (teco_machine_main_eval_colon(ctx)) {
/* Query Q-Register's existence or string size */
if (qreg) {
- gsize len;
-
- if (!qreg->vtable->get_string(qreg, NULL, &len, error))
+ /* get_string() would return the size in bytes */
+ teco_int_t len = qreg->vtable->get_length(qreg, error);
+ if (len < 0)
return NULL;
teco_expressions_push(len);
} else {
@@ -281,10 +281,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
return NULL;
}
- gint c = qreg->vtable->get_character(qreg, pos, error);
- if (c < 0)
+ teco_int_t c;
+ if (!qreg->vtable->get_character(qreg, pos, &c, error))
return NULL;
-
teco_expressions_push(c);
} else {
/* Query integer */
@@ -311,6 +310,10 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
* Positions are handled like buffer positions \(em they
* begin at 0 up to the length of the string minus 1.
* An error is thrown for invalid positions.
+ * If <q> is encoded as UTF-8 and there is
+ * an incomplete sequence at the requested position,
+ * -1 is returned.
+ * All other invalid Unicode sequences are returned as -2.
* Both non-colon-modified forms of Q require register <q>
* to be defined and fail otherwise.
*
@@ -369,24 +372,50 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
gint args = teco_expressions_args();
if (args > 0) {
- g_autofree gchar *buffer = g_malloc(args);
+ guint codepage = teco_default_codepage();
+ if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error))
+ return NULL;
- for (gint i = args; i > 0; i--) {
- teco_int_t v;
- if (!teco_expressions_pop_num_calc(&v, 0, error))
- return NULL;
- buffer[i-1] = (gchar)v;
+ g_autofree gchar *buffer = NULL;
+ gsize len = 0;
+
+ if (codepage == SC_CP_UTF8) {
+ /* the glib docs wrongly claim that one character can take 6 bytes */
+ buffer = g_malloc(4*args);
+ for (gint i = args; i > 0; i--) {
+ teco_int_t v;
+ if (!teco_expressions_pop_num_calc(&v, 0, error))
+ return NULL;
+ if (v < 0 || !g_unichar_validate(v)) {
+ teco_error_codepoint_set(error, "^U");
+ return NULL;
+ }
+ len += g_unichar_to_utf8(v, buffer+len);
+ }
+ } else {
+ buffer = g_malloc(args);
+ for (gint i = args; i > 0; i--) {
+ teco_int_t v;
+ if (!teco_expressions_pop_num_calc(&v, 0, error))
+ return NULL;
+ if (v < 0 || v > 0xFF) {
+ teco_error_codepoint_set(error, "^U");
+ return NULL;
+ }
+ buffer[len++] = v;
+ }
}
if (colon_modified) {
/* append to register */
if (!qreg->vtable->undo_append_string(qreg, error) ||
- !qreg->vtable->append_string(qreg, buffer, args, error))
+ !qreg->vtable->append_string(qreg, buffer, len, error))
return NULL;
} else {
/* set register */
if (!qreg->vtable->undo_set_string(qreg, error) ||
- !qreg->vtable->set_string(qreg, buffer, args, error))
+ !qreg->vtable->set_string(qreg, buffer, len,
+ codepage, error))
return NULL;
}
}
@@ -399,7 +428,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
} else {
/* set register */
if (!qreg->vtable->undo_set_string(qreg, error) ||
- !qreg->vtable->set_string(qreg, str->data, str->len, error))
+ !qreg->vtable->set_string(qreg, str->data, str->len,
+ teco_default_codepage(), error))
return NULL;
}
@@ -450,6 +480,26 @@ TECO_DEFINE_STATE_EXPECTQREG(teco_state_eucommand,
.expectqreg.type = TECO_QREG_OPTIONAL_INIT
);
+static gboolean
+teco_state_setqregstring_building_initial(teco_machine_main_t *ctx, GError **error)
+{
+ if (ctx->mode > TECO_MODE_NORMAL)
+ return TRUE;
+
+ teco_qreg_t *qreg;
+ teco_machine_qregspec_get_results(ctx->expectqreg, &qreg, NULL);
+
+ /*
+ * The expected codepage of string building constructs is determined
+ * by the Q-Register.
+ */
+ guint codepage;
+ if (!qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error))
+ return FALSE;
+ teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, codepage);
+ return TRUE;
+}
+
static teco_state_t *
teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error)
{
@@ -467,6 +517,7 @@ teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_stri
* characters \fBenabled\fP.
*/
TECO_DEFINE_STATE_EXPECTSTRING(teco_state_setqregstring_building,
+ .initial_cb = (teco_state_initial_cb_t)teco_state_setqregstring_building_initial,
.expectstring.string_building = TRUE
);
@@ -481,7 +532,7 @@ teco_state_getqregstring_got_register(teco_machine_main_t *ctx, teco_qreg_t *qre
g_auto(teco_string_t) str = {NULL, 0};
- if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error))
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
return NULL;
if (str.len > 0) {
@@ -604,8 +655,15 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
} else {
g_auto(teco_qreg_table_t) table;
teco_qreg_table_init(&table, FALSE);
+
if (!teco_qreg_execute(qreg, &table, error))
return NULL;
+ if (teco_qreg_current && !teco_qreg_current->must_undo) {
+ /* currently editing local Q-Register */
+ teco_error_editinglocalqreg_set(error, teco_qreg_current->head.name.data,
+ teco_qreg_current->head.name.len);
+ return NULL;
+ }
}
return &teco_state_start;
@@ -632,6 +690,10 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
* Note that the string of <q> will be copied upon macro execution,
* so subsequent changes to Q-Register <q> from inside the macro do
* not modify the executed code.
+ *
+ * While \fBM\fP does not check the register's configured encoding
+ * (as reported by \fBEE\fP), its contents must be and are checked to be in
+ * valid UTF-8.
*/
TECO_DEFINE_STATE_EXPECTQREG(teco_state_macro);
@@ -666,6 +728,9 @@ teco_state_macrofile_done(teco_machine_main_t *ctx, const teco_string_t *str, GE
* It is otherwise similar to the \(lqM\(rq command.
*
* If <file> could not be read, the command yields an error.
+ *
+ * As all \*(ST code, the contents of <file> must be in valid UTF-8
+ * even if operating in the \(lqdefault ANSI\(rq mode as configured by \fBED\fP.
*/
TECO_DEFINE_STATE_EXPECTFILE(teco_state_macrofile);
@@ -678,7 +743,7 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
if (ctx->mode > TECO_MODE_NORMAL)
return &teco_state_start;
- teco_int_t from, len;
+ gssize from, len; /* in bytes */
if (!teco_expressions_eval(FALSE, error))
return NULL;
@@ -702,32 +767,37 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
len *= -1;
}
} else {
- teco_int_t to = teco_expressions_pop_num(0);
- from = teco_expressions_pop_num(0);
-
+ gssize to = teco_interface_glyphs2bytes(teco_expressions_pop_num(0));
+ from = teco_interface_glyphs2bytes(teco_expressions_pop_num(0));
len = to - from;
- if (len < 0 || !teco_validate_pos(from) || !teco_validate_pos(to)) {
+ if (len < 0 || from < 0 || to < 0) {
teco_error_range_set(error, "X");
return NULL;
}
}
+ /*
+ * NOTE: This does not use SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION
+ * since it may not be safe when copying from register to register.
+ */
g_autofree gchar *str = g_malloc(len + 1);
- struct Sci_TextRange text_range = {
- .chrg = {.cpMin = from, .cpMax = from + len},
+ struct Sci_TextRangeFull range = {
+ .chrg = {from, from + len},
.lpstrText = str
};
- teco_interface_ssm(SCI_GETTEXTRANGE, 0, (sptr_t)&text_range);
+ teco_interface_ssm(SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range);
if (teco_machine_main_eval_colon(ctx)) {
if (!qreg->vtable->undo_append_string(qreg, error) ||
!qreg->vtable->append_string(qreg, str, len, error))
return NULL;
} else {
+ guint cp = teco_interface_get_codepage();
+
if (!qreg->vtable->undo_set_string(qreg, error) ||
- !qreg->vtable->set_string(qreg, str, len, error))
+ !qreg->vtable->set_string(qreg, str, len, cp, error))
return NULL;
}
diff --git a/src/qreg-commands.h b/src/qreg-commands.h
index 6a41fc5..27a6a5c 100644
--- a/src/qreg-commands.h
+++ b/src/qreg-commands.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -33,10 +33,10 @@ teco_state_expectqreg_reset(teco_machine_main_t *ctx)
gboolean teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error);
-teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error);
+teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error);
/* in cmdline.c */
-gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
/**
* @interface TECO_DEFINE_STATE_EXPECTQREG
@@ -47,7 +47,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m
*/
#define TECO_DEFINE_STATE_EXPECTQREG(NAME, ...) \
static teco_state_t * \
- NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \
+ NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \
{ \
return teco_state_expectqreg_input(ctx, chr, error); \
} \
diff --git a/src/qreg.c b/src/qreg.c
index 14cd331..c337dbe 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,6 +18,7 @@
#include "config.h"
#endif
+#include <stdbool.h>
#include <string.h>
#include <glib.h>
@@ -82,7 +83,12 @@ teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_locals, GErro
{
g_auto(teco_string_t) macro = {NULL, 0};
- if (!qreg->vtable->get_string(qreg, &macro.data, &macro.len, error) ||
+ /*
+ * SciTECO macros must be in UTF-8, but we don't check the encoding,
+ * so as not to complicate TECO_ED_DEFAULT_ANSI mode.
+ * The UTF-8 byte sequences are checked anyway.
+ */
+ if (!qreg->vtable->get_string(qreg, &macro.data, &macro.len, NULL, error) ||
!teco_execute_macro(macro.data, macro.len, qreg_table_locals, error)) {
teco_error_add_frame_qreg(qreg->head.name.data, qreg->head.name.len);
return FALSE;
@@ -120,65 +126,11 @@ teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode)
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
teco_view_ssm(teco_qreg_view, SCI_SETEOLMODE, mode, 0);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
-}
-
-/** @memberof teco_qreg_t */
-gboolean
-teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
-{
- if (!qreg->vtable->undo_set_string(qreg, error))
- return FALSE;
-
- if (teco_qreg_current)
- teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
-
- teco_doc_edit(&qreg->string);
- teco_doc_reset(&qreg->string);
-
- /*
- * teco_view_load() might change the EOL style.
- */
- teco_qreg_undo_set_eol_mode(qreg);
-
- /*
- * undo_set_string() pushes undo tokens that restore
- * the previous document in the view.
- * So if loading fails, teco_qreg_current will be
- * made the current document again.
- */
- if (!teco_view_load(teco_qreg_view, filename, error))
- return FALSE;
-
- if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
-
- return TRUE;
-}
-
-/** @memberof teco_qreg_t */
-gboolean
-teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error)
-{
- if (teco_qreg_current)
- teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
-
- teco_doc_edit(&qreg->string);
-
- if (!teco_view_save(teco_qreg_view, filename, error)) {
- if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
- return FALSE;
- }
-
- if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
-
- return TRUE;
+ teco_doc_edit(&teco_qreg_current->string, 0);
}
static gboolean
@@ -204,9 +156,10 @@ teco_qreg_plain_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **error)
}
static gboolean
-teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error)
+teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len,
+ guint codepage, GError **error)
{
- teco_doc_set_string(&qreg->string, str, len);
+ teco_doc_set_string(&qreg->string, str, len, codepage);
return TRUE;
}
@@ -231,43 +184,64 @@ teco_qreg_plain_append_string(teco_qreg_t *qreg, const gchar *str, gsize len, GE
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)str);
teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
return TRUE;
}
static gboolean
-teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error)
+teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
+ guint *codepage, GError **error)
{
- teco_doc_get_string(&qreg->string, str, len);
+ teco_doc_get_string(&qreg->string, str, len, codepage);
return TRUE;
}
-static gint
-teco_qreg_plain_get_character(teco_qreg_t *qreg, guint position, GError **error)
+static gboolean
+teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position,
+ teco_int_t *chr, GError **error)
{
- gint ret = -1;
-
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
- if (position < teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0))
- ret = teco_view_ssm(teco_qreg_view, SCI_GETCHARAT, position, 0);
- else
+ sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
+ gssize off = teco_view_glyphs2bytes(teco_qreg_view, position);
+
+ gboolean ret = off >= 0 && off != len;
+ if (!ret)
g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
- "Position %u out of range", position);
+ "Position %" TECO_INT_FORMAT " out of range", position);
/* make sure we still restore the current Q-Register */
+ else
+ *chr = teco_view_get_character(teco_qreg_view, off, len);
+
+ if (teco_qreg_current)
+ teco_doc_edit(&teco_qreg_current->string, 0);
+
+ return ret;
+}
+
+static teco_int_t
+teco_qreg_plain_get_length(teco_qreg_t *qreg, GError **error)
+{
+ if (teco_qreg_current)
+ teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
+
+ teco_doc_edit(&qreg->string, teco_default_codepage());
+
+ sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
+ teco_int_t ret = teco_view_bytes2glyphs(teco_qreg_view, len);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
return ret;
}
@@ -294,7 +268,7 @@ teco_qreg_plain_edit(teco_qreg_t *qreg, GError **error)
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
teco_interface_show_view(teco_qreg_view);
teco_interface_info_update(qreg);
@@ -319,6 +293,58 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error)
return TRUE;
}
+static gboolean
+teco_qreg_plain_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
+{
+ if (!qreg->vtable->undo_set_string(qreg, error))
+ return FALSE;
+
+ if (teco_qreg_current)
+ teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
+
+ teco_doc_edit(&qreg->string, teco_default_codepage());
+ teco_doc_reset(&qreg->string);
+
+ /*
+ * teco_view_load() might change the EOL style.
+ */
+ teco_qreg_undo_set_eol_mode(qreg);
+
+ /*
+ * undo_set_string() pushes undo tokens that restore
+ * the previous document in the view.
+ * So if loading fails, teco_qreg_current will be
+ * made the current document again.
+ */
+ if (!teco_view_load(teco_qreg_view, filename, error))
+ return FALSE;
+
+ if (teco_qreg_current)
+ teco_doc_edit(&teco_qreg_current->string, 0);
+
+ return TRUE;
+}
+
+static gboolean
+teco_qreg_plain_save(teco_qreg_t *qreg, const gchar *filename, GError **error)
+{
+ if (teco_qreg_current)
+ teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
+
+ teco_doc_edit(&qreg->string, teco_default_codepage());
+
+ gboolean ret = teco_view_save(teco_qreg_view, filename, error);
+
+ if (teco_qreg_current)
+ teco_doc_edit(&teco_qreg_current->string, 0);
+
+ return ret;
+}
+
+/**
+ * Initializer for vtables of Q-Registers with "plain" storage of strings.
+ * These store their string part as teco_docs.
+ */
#define TECO_INIT_QREG(...) { \
.set_integer = teco_qreg_plain_set_integer, \
.undo_set_integer = teco_qreg_plain_undo_set_integer, \
@@ -329,10 +355,13 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error)
.undo_append_string = teco_qreg_plain_undo_set_string, \
.get_string = teco_qreg_plain_get_string, \
.get_character = teco_qreg_plain_get_character, \
+ .get_length = teco_qreg_plain_get_length, \
.exchange_string = teco_qreg_plain_exchange_string, \
.undo_exchange_string = teco_qreg_plain_undo_exchange_string, \
.edit = teco_qreg_plain_edit, \
.undo_edit = teco_qreg_plain_undo_edit, \
+ .load = teco_qreg_plain_load, \
+ .save = teco_qreg_plain_save, \
##__VA_ARGS__ \
}
@@ -345,6 +374,150 @@ teco_qreg_plain_new(const gchar *name, gsize len)
return teco_qreg_new(&vtable, name, len);
}
+static gboolean
+teco_qreg_external_edit(teco_qreg_t *qreg, GError **error)
+{
+ g_auto(teco_string_t) str = {NULL, 0};
+
+ if (!teco_qreg_plain_edit(qreg, error) ||
+ !qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
+ return FALSE;
+
+ teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
+ teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0);
+ teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data);
+ teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
+
+ undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0);
+ return TRUE;
+}
+
+static gboolean
+teco_qreg_external_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error)
+{
+ g_auto(teco_string_t) other_str, own_str = {NULL, 0};
+ guint other_cp, own_cp;
+
+ teco_doc_get_string(src, &other_str.data, &other_str.len, &other_cp);
+
+ if (!qreg->vtable->get_string(qreg, &own_str.data, &own_str.len, &own_cp, error) ||
+ !qreg->vtable->set_string(qreg, other_str.data, other_str.len, other_cp, error))
+ return FALSE;
+
+ teco_doc_set_string(src, own_str.data, own_str.len, own_cp);
+ return TRUE;
+}
+
+static gboolean
+teco_qreg_external_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error)
+{
+ if (!qreg->vtable->undo_set_string(qreg, error))
+ return FALSE;
+ if (qreg->must_undo) // FIXME
+ teco_doc_undo_set_string(src);
+ return TRUE;
+}
+
+static gboolean
+teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position,
+ teco_int_t *chr, GError **error)
+{
+ g_auto(teco_string_t) str = {NULL, 0};
+
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
+ return FALSE;
+
+ if (position < 0 || position >= g_utf8_strlen(str.data, str.len)) {
+ g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
+ "Position %" TECO_INT_FORMAT " out of range", position);
+ return FALSE;
+ }
+ const gchar *p = g_utf8_offset_to_pointer(str.data, position);
+
+ /*
+ * Make sure that the -1/-2 error values are preserved.
+ * The sign bit in UCS-4/UTF-32 is unused, so this will even
+ * suffice if TECO_INTEGER == 32.
+ */
+ *chr = (gint32)g_utf8_get_char_validated(p, -1);
+ return TRUE;
+}
+
+static teco_int_t
+teco_qreg_external_get_length(teco_qreg_t *qreg, GError **error)
+{
+ g_auto(teco_string_t) str = {NULL, 0};
+
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
+ return -1;
+
+ return g_utf8_strlen(str.data, str.len);
+}
+
+/*
+ * NOTE: This does not perform EOL normalization unlike teco_view_load().
+ * It shouldn't be critical since "external" registers are mainly used for filenames.
+ * Otherwise we could of course load into the view() and call set_string() afterwards.
+ */
+static gboolean
+teco_qreg_external_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
+{
+ g_auto(teco_string_t) str = {NULL, 0};
+
+ return g_file_get_contents(filename, &str.data, &str.len, error) &&
+ qreg->vtable->undo_set_string(qreg, error) &&
+ qreg->vtable->set_string(qreg, str.data, str.len, teco_default_codepage(), error);
+}
+
+/*
+ * NOTE: This does not simply use g_file_set_contents(), as we have to create
+ * save point files as well.
+ * FIXME: On the other hand, this does not set the correct EOL style on the document,
+ * so teco_view_save() will save only with the default EOL style.
+ * It might therefore still be a good idea to avoid any conversion.
+ */
+static gboolean
+teco_qreg_external_save(teco_qreg_t *qreg, const gchar *filename, GError **error)
+{
+ if (teco_qreg_current)
+ teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
+
+ teco_doc_edit(&qreg->string, teco_default_codepage());
+
+ g_auto(teco_string_t) str = {NULL, 0};
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
+ return FALSE;
+
+ teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
+ teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0);
+ teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data);
+ teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
+
+ undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0);
+
+ gboolean ret = teco_view_save(teco_qreg_view, filename, error);
+
+ if (teco_qreg_current)
+ teco_doc_edit(&teco_qreg_current->string, 0);
+
+ return ret;
+}
+
+/**
+ * Initializer for vtables of Q-Registers with "external" storage of strings.
+ * These rely on custom implementations of get_string() and set_string().
+ */
+#define TECO_INIT_QREG_EXTERNAL(...) TECO_INIT_QREG( \
+ .exchange_string = teco_qreg_external_exchange_string, \
+ .undo_exchange_string = teco_qreg_external_undo_exchange_string, \
+ .edit = teco_qreg_external_edit, \
+ .get_character = teco_qreg_external_get_character, \
+ .get_length = teco_qreg_external_get_length, \
+ .load = teco_qreg_external_load, \
+ .save = teco_qreg_external_save, \
+ ##__VA_ARGS__ \
+)
+
/*
* NOTE: The integer-component is currently unused on the "*" special register.
*/
@@ -368,11 +541,12 @@ teco_qreg_bufferinfo_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **er
}
/*
- * FIXME: These operations can and should be implemented.
- * Setting the "*" register could for instance rename the file.
+ * FIXME: Something could be implemented here. There are 2 possibilities:
+ * Either it renames the current buffer, or opens a file (alternative to EB).
*/
static gboolean
-teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error)
+teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len,
+ guint codepage, GError **error)
{
teco_error_qregopunsupported_set(error, qreg->head.name.data, qreg->head.name.len, FALSE);
return FALSE;
@@ -401,7 +575,8 @@ teco_qreg_bufferinfo_undo_append_string(teco_qreg_t *qreg, GError **error)
* NOTE: The `string` component is currently unused on the "*" register.
*/
static gboolean
-teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error)
+teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
+ guint *codepage, GError **error)
{
/*
* On platforms with a default non-forward-slash directory
@@ -416,43 +591,8 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr
* NOTE: teco_file_normalize_path() does not change the size of the string.
*/
*len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0;
- return TRUE;
-}
-
-static gint
-teco_qreg_bufferinfo_get_character(teco_qreg_t *qreg, guint position, GError **error)
-{
- gsize max_len;
-
- if (!teco_qreg_bufferinfo_get_string(qreg, NULL, &max_len, error))
- return -1;
-
- if (position >= max_len) {
- g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
- "Position %u out of range", position);
- return -1;
- }
-
- return teco_ring_current->filename[position];
-}
-
-static gboolean
-teco_qreg_bufferinfo_edit(teco_qreg_t *qreg, GError **error)
-{
- if (!teco_qreg_plain_edit(qreg, error))
- return FALSE;
-
- g_auto(teco_string_t) str = {NULL, 0};
-
- if (!teco_qreg_bufferinfo_get_string(qreg, &str.data, &str.len, error))
- return FALSE;
-
- teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
- teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0);
- teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data);
- teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
-
- undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0);
+ if (codepage)
+ *codepage = teco_default_codepage();
return TRUE;
}
@@ -460,7 +600,7 @@ teco_qreg_bufferinfo_edit(teco_qreg_t *qreg, GError **error)
teco_qreg_t *
teco_qreg_bufferinfo_new(void)
{
- static teco_qreg_vtable_t vtable = TECO_INIT_QREG(
+ static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL(
.set_integer = teco_qreg_bufferinfo_set_integer,
.undo_set_integer = teco_qreg_bufferinfo_undo_set_integer,
.get_integer = teco_qreg_bufferinfo_get_integer,
@@ -469,15 +609,22 @@ teco_qreg_bufferinfo_new(void)
.append_string = teco_qreg_bufferinfo_append_string,
.undo_append_string = teco_qreg_bufferinfo_undo_append_string,
.get_string = teco_qreg_bufferinfo_get_string,
- .get_character = teco_qreg_bufferinfo_get_character,
- .edit = teco_qreg_bufferinfo_edit
+ /*
+ * As teco_qreg_bufferinfo_set_string() is not implemented,
+ * it's important to not inherit teco_qreg_external_exchange_string().
+ * `[*` and `]*` will still work though.
+ * The inherited teco_qreg_external_load() will simply fail.
+ */
+ .exchange_string = teco_qreg_plain_exchange_string,
+ .undo_exchange_string = teco_qreg_plain_undo_exchange_string
);
return teco_qreg_new(&vtable, "*", 1);
}
static gboolean
-teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error)
+teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len,
+ guint codepage, GError **error)
{
/*
* NOTE: Makes sure that `dir` will be null-terminated as str[len] may not be '\0'.
@@ -528,7 +675,8 @@ teco_qreg_workingdir_undo_append_string(teco_qreg_t *qreg, GError **error)
}
static gboolean
-teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error)
+teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
+ guint *codepage, GError **error)
{
/*
* On platforms with a default non-forward-slash directory
@@ -545,84 +693,22 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr
*str = teco_file_normalize_path(dir);
else
g_free(dir);
+ if (codepage)
+ *codepage = teco_default_codepage();
return TRUE;
}
-static gint
-teco_qreg_workingdir_get_character(teco_qreg_t *qreg, guint position, GError **error)
-{
- g_auto(teco_string_t) str = {NULL, 0};
-
- if (!teco_qreg_workingdir_get_string(qreg, &str.data, &str.len, error))
- return -1;
-
- if (position >= str.len) {
- g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
- "Position %u out of range", position);
- return -1;
- }
-
- return str.data[position];
-}
-
-static gboolean
-teco_qreg_workingdir_edit(teco_qreg_t *qreg, GError **error)
-{
- g_auto(teco_string_t) str = {NULL, 0};
-
- if (!teco_qreg_plain_edit(qreg, error) ||
- !teco_qreg_workingdir_get_string(qreg, &str.data, &str.len, error))
- return FALSE;
-
- teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
- teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0);
- teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data);
- teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
-
- undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0);
- return TRUE;
-}
-
-static gboolean
-teco_qreg_workingdir_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error)
-{
- g_auto(teco_string_t) other_str, own_str = {NULL, 0};
-
- teco_doc_get_string(src, &other_str.data, &other_str.len);
-
- if (!teco_qreg_workingdir_get_string(qreg, &own_str.data, &own_str.len, error) ||
- /* FIXME: Why is teco_qreg_plain_set_string() sufficient? */
- !teco_qreg_plain_set_string(qreg, other_str.data, other_str.len, error))
- return FALSE;
-
- teco_doc_set_string(src, own_str.data, own_str.len);
- return TRUE;
-}
-
-static gboolean
-teco_qreg_workingdir_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error)
-{
- teco_undo_change_dir_to_current();
- if (qreg->must_undo) // FIXME
- teco_doc_undo_set_string(src);
- return TRUE;
-}
-
/** @static @memberof teco_qreg_t */
teco_qreg_t *
teco_qreg_workingdir_new(void)
{
- static teco_qreg_vtable_t vtable = TECO_INIT_QREG(
+ static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL(
.set_string = teco_qreg_workingdir_set_string,
.undo_set_string = teco_qreg_workingdir_undo_set_string,
.append_string = teco_qreg_workingdir_append_string,
.undo_append_string = teco_qreg_workingdir_undo_append_string,
- .get_string = teco_qreg_workingdir_get_string,
- .get_character = teco_qreg_workingdir_get_character,
- .edit = teco_qreg_workingdir_edit,
- .exchange_string = teco_qreg_workingdir_exchange_string,
- .undo_exchange_string = teco_qreg_workingdir_undo_exchange_string
+ .get_string = teco_qreg_workingdir_get_string
);
/*
@@ -639,7 +725,8 @@ teco_qreg_workingdir_new(void)
}
static gboolean
-teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error)
+teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len,
+ guint codepage, GError **error)
{
g_assert(!teco_string_contains(&qreg->head.name, '\0'));
const gchar *clipboard_name = qreg->head.name.data + 1;
@@ -724,7 +811,8 @@ teco_qreg_clipboard_undo_set_string(teco_qreg_t *qreg, GError **error)
}
static gboolean
-teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error)
+teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
+ guint *codepage, GError **error)
{
g_assert(!teco_string_contains(&qreg->head.name, '\0'));
const gchar *clipboard_name = qreg->head.name.data + 1;
@@ -756,93 +844,41 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErro
else
teco_string_clear(&str_converted);
*len = str_converted.len;
+ if (codepage)
+ *codepage = teco_default_codepage();
return TRUE;
}
-static gint
-teco_qreg_clipboard_get_character(teco_qreg_t *qreg, guint position, GError **error)
-{
- g_auto(teco_string_t) str = {NULL, 0};
-
- if (!teco_qreg_clipboard_get_string(qreg, &str.data, &str.len, error))
- return -1;
-
- if (position >= str.len) {
- g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
- "Position %u out of range", position);
- return -1;
- }
-
- return str.data[position];
-}
-
-static gboolean
-teco_qreg_clipboard_edit(teco_qreg_t *qreg, GError **error)
-{
- if (!teco_qreg_plain_edit(qreg, error))
- return FALSE;
-
- g_auto(teco_string_t) str = {NULL, 0};
-
- if (!teco_qreg_clipboard_get_string(qreg, &str.data, &str.len, error))
- return FALSE;
-
- teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
- teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0);
- teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, str.len, (sptr_t)str.data);
- teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
-
- undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0);
- return TRUE;
-}
-
/*
- * FIXME: Very similar to teco_qreg_workingdir_exchange_string().
+ * Regardless of whether EOL normalization is enabled,
+ * this will never perform it.
+ * Other than that, it's very similar to teco_qreg_external_load().
*/
static gboolean
-teco_qreg_clipboard_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error)
+teco_qreg_clipboard_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
{
- g_auto(teco_string_t) other_str, own_str = {NULL, 0};
-
- teco_doc_get_string(src, &other_str.data, &other_str.len);
-
- if (!teco_qreg_clipboard_get_string(qreg, &own_str.data, &own_str.len, error) ||
- /* FIXME: Why is teco_qreg_plain_set_string() sufficient? */
- !teco_qreg_plain_set_string(qreg, other_str.data, other_str.len, error))
- return FALSE;
+ g_assert(!teco_string_contains(&qreg->head.name, '\0'));
+ const gchar *clipboard_name = qreg->head.name.data + 1;
- teco_doc_set_string(src, own_str.data, own_str.len);
- return TRUE;
-}
+ g_auto(teco_string_t) str = {NULL, 0};
-/*
- * FIXME: Very similar to teco_qreg_workingdir_undo_exchange_string().
- */
-static gboolean
-teco_qreg_clipboard_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error)
-{
- if (!teco_qreg_clipboard_undo_set_string(qreg, error))
- return FALSE;
- if (qreg->must_undo) // FIXME
- teco_doc_undo_set_string(src);
- return TRUE;
+ return g_file_get_contents(filename, &str.data, &str.len, error) &&
+ teco_qreg_clipboard_undo_set_string(qreg, error) &&
+ teco_interface_set_clipboard(clipboard_name, str.data, str.len, error);
}
/** @static @memberof teco_qreg_t */
teco_qreg_t *
teco_qreg_clipboard_new(const gchar *name)
{
- static teco_qreg_vtable_t vtable = TECO_INIT_QREG(
+ static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL(
.set_string = teco_qreg_clipboard_set_string,
.undo_set_string = teco_qreg_clipboard_undo_set_string,
.append_string = teco_qreg_clipboard_append_string,
.undo_append_string = teco_qreg_clipboard_undo_append_string,
.get_string = teco_qreg_clipboard_get_string,
- .get_character = teco_qreg_clipboard_get_character,
- .edit = teco_qreg_clipboard_edit,
- .exchange_string = teco_qreg_clipboard_exchange_string,
- .undo_exchange_string = teco_qreg_clipboard_undo_exchange_string
+ .load = teco_qreg_clipboard_load
);
teco_qreg_t *qreg = teco_qreg_new(&vtable, "~", 1);
@@ -939,7 +975,8 @@ teco_qreg_table_set_environ(teco_qreg_table_t *table, GError **error)
qreg = found;
}
- if (!qreg->vtable->set_string(qreg, value, strlen(value), error))
+ if (!qreg->vtable->set_string(qreg, value, strlen(value),
+ teco_default_codepage(), error))
return FALSE;
}
@@ -994,7 +1031,7 @@ teco_qreg_table_get_environ(teco_qreg_table_t *table, GError **error)
continue;
g_auto(teco_string_t) value = {NULL, 0};
- if (!cur->vtable->get_string(cur, &value.data, &value.len, error)) {
+ if (!cur->vtable->get_string(cur, &value.data, &value.len, NULL, error)) {
g_strfreev(envp);
return NULL;
}
@@ -1088,12 +1125,13 @@ teco_qreg_stack_push(teco_qreg_t *qreg, GError **error)
{
teco_qreg_stack_entry_t entry;
g_auto(teco_string_t) string = {NULL, 0};
+ guint codepage;
if (!qreg->vtable->get_integer(qreg, &entry.integer, error) ||
- !qreg->vtable->get_string(qreg, &string.data, &string.len, error))
+ !qreg->vtable->get_string(qreg, &string.data, &string.len, &codepage, error))
return FALSE;
teco_doc_init(&entry.string);
- teco_doc_set_string(&entry.string, string.data, string.len);
+ teco_doc_set_string(&entry.string, string.data, string.len, codepage);
teco_doc_update(&entry.string, &qreg->string);
/* pass ownership of entry to teco_qreg_stack */
@@ -1196,6 +1234,12 @@ teco_ed_hook(teco_ed_hook_t type, GError **error)
if (!teco_qreg_execute(qreg, &locals, error))
goto error_add_frame;
+ if (teco_qreg_current && !teco_qreg_current->must_undo) {
+ /* currently editing local Q-Register */
+ teco_error_editinglocalqreg_set(error, teco_qreg_current->head.name.data,
+ teco_qreg_current->head.name.len);
+ goto error_add_frame;
+ }
return teco_expressions_discard_args(error) &&
teco_expressions_brace_close(error);
@@ -1225,7 +1269,7 @@ struct teco_machine_qregspec_t {
union {
struct {
teco_qreg_type_t type : 8;
- gboolean parse_only : 1;
+ bool parse_only : 1;
};
guint __flags;
};
@@ -1255,7 +1299,7 @@ TECO_DECLARE_STATE(teco_state_qregspec_secondchar);
TECO_DECLARE_STATE(teco_state_qregspec_string);
static teco_state_t *teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx,
- gchar chr, GError **error);
+ gunichar chr, GError **error);
static teco_state_t *
teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error)
@@ -1290,7 +1334,7 @@ teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error)
}
static teco_state_t *
-teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
{
/*
* FIXME: We're using teco_state_qregspec_start as a success condition,
@@ -1307,7 +1351,7 @@ teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError
}
/* in cmdline.c */
-gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
TECO_DEFINE_STATE(teco_state_qregspec_start,
.is_start = TRUE,
@@ -1315,7 +1359,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start,
);
static teco_state_t *
-teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
{
/*
* FIXME: Disallow space characters?
@@ -1334,7 +1378,7 @@ teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr,
if (!ctx->parse_only) {
if (ctx->parent.must_undo)
undo__teco_string_truncate(&ctx->name, ctx->name.len);
- teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
+ teco_string_append_wc(&ctx->name, g_unichar_toupper(chr));
}
return teco_state_qregspec_done(ctx, error);
}
@@ -1350,7 +1394,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start_global,
);
static teco_state_t *
-teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
{
/*
* FIXME: Disallow space characters?
@@ -1358,7 +1402,7 @@ teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GEr
if (!ctx->parse_only) {
if (ctx->parent.must_undo)
undo__teco_string_truncate(&ctx->name, ctx->name.len);
- teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
+ teco_string_append_wc(&ctx->name, g_unichar_toupper(chr));
}
return &teco_state_qregspec_secondchar;
}
@@ -1368,7 +1412,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_firstchar,
);
static teco_state_t *
-teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
{
/*
* FIXME: Disallow space characters?
@@ -1376,7 +1420,7 @@ teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GE
if (!ctx->parse_only) {
if (ctx->parent.must_undo)
undo__teco_string_truncate(&ctx->name, ctx->name.len);
- teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
+ teco_string_append_wc(&ctx->name, g_unichar_toupper(chr));
}
return teco_state_qregspec_done(ctx, error);
}
@@ -1386,7 +1430,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_secondchar,
);
static teco_state_t *
-teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
{
/*
* Makes sure that braces within string building constructs do not have to be
@@ -1427,7 +1471,7 @@ teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError
/* in cmdline.c */
gboolean teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx,
- gchar key, GError **error);
+ gunichar key, GError **error);
TECO_DEFINE_STATE(teco_state_qregspec_string,
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_qregspec_string_process_edit_cmd
@@ -1488,7 +1532,7 @@ teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx)
* @memberof teco_machine_qregspec_t
*/
teco_machine_qregspec_status_t
-teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr,
+teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr,
teco_qreg_t **result, teco_qreg_table_t **result_table, GError **error)
{
ctx->parse_only = result == NULL;
@@ -1516,7 +1560,7 @@ teco_machine_qregspec_get_results(teco_machine_qregspec_t *ctx,
gboolean
teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t *insert)
{
- gsize restrict_len = 0;
+ guint restrict_len = 0;
/*
* NOTE: We could have separate process_edit_cmd_cb() for
@@ -1531,6 +1575,10 @@ teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t
/* two-letter Q-Reg */
restrict_len = 2;
+ /*
+ * FIXME: This is not quite right as it will propose even
+ * lower case single or two-letter Q-Register names.
+ */
return teco_rb3str_auto_complete(&ctx->result_table->tree, !restrict_len,
ctx->name.data, ctx->name.len, restrict_len, insert) &&
ctx->nesting == 1;
diff --git a/src/qreg.h b/src/qreg.h
index f1c81ac..85da898 100644
--- a/src/qreg.h
+++ b/src/qreg.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -41,19 +41,26 @@ extern teco_view_t *teco_qreg_view;
* FIXME: Use TECO_DECLARE_VTABLE_METHOD(gboolean, teco_qreg, set_integer, teco_qreg_t *, teco_int_t, GError **);
* ...
* teco_qreg_set_integer_t set_integer;
+ * ...
+ * teco_qreg_set_integer(qreg, 23, error);
*/
typedef const struct {
gboolean (*set_integer)(teco_qreg_t *qreg, teco_int_t value, GError **error);
gboolean (*undo_set_integer)(teco_qreg_t *qreg, GError **error);
gboolean (*get_integer)(teco_qreg_t *qreg, teco_int_t *ret, GError **error);
- gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error);
+ gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len,
+ guint codepage, GError **error);
gboolean (*undo_set_string)(teco_qreg_t *qreg, GError **error);
gboolean (*append_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error);
gboolean (*undo_append_string)(teco_qreg_t *qreg, GError **error);
- gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error);
- gint (*get_character)(teco_qreg_t *qreg, guint position, GError **error);
+ gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len,
+ guint *codepage, GError **error);
+ gboolean (*get_character)(teco_qreg_t *qreg, teco_int_t position,
+ teco_int_t *chr, GError **error);
+ /* always returns length in glyphs in contrast to get_string() */
+ teco_int_t (*get_length)(teco_qreg_t *qreg, GError **error);
/*
* These callbacks exist only to optimize teco_qreg_stack_push|pop()
@@ -65,6 +72,13 @@ typedef const struct {
gboolean (*edit)(teco_qreg_t *qreg, GError **error);
gboolean (*undo_edit)(teco_qreg_t *qreg, GError **error);
+
+ /*
+ * Load and save already care about undo token
+ * creation.
+ */
+ gboolean (*load)(teco_qreg_t *qreg, const gchar *filename, GError **error);
+ gboolean (*save)(teco_qreg_t *qreg, const gchar *filename, GError **error);
} teco_qreg_vtable_t;
/** @extends teco_rb3str_head_t */
@@ -106,13 +120,6 @@ gboolean teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_loca
void teco_qreg_undo_set_eol_mode(teco_qreg_t *qreg);
void teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode);
-/*
- * Load and save already care about undo token
- * creation.
- */
-gboolean teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error);
-gboolean teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error);
-
/** @memberof teco_qreg_t */
static inline void
teco_qreg_free(teco_qreg_t *qreg)
@@ -220,7 +227,7 @@ void teco_machine_qregspec_reset(teco_machine_qregspec_t *ctx);
*/
struct teco_machine_stringbuilding_t *teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx);
-teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr,
+teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr,
teco_qreg_t **result,
teco_qreg_table_t **result_table, GError **error);
diff --git a/src/rb3str.c b/src/rb3str.c
index 889c52e..d51ac5d 100644
--- a/src/rb3str.c
+++ b/src/rb3str.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -95,7 +95,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar
* @param case_sensitive Whether to match case-sensitive.
* @param str String to complete (not necessarily null-terminated).
* @param str_len Length of characters in `str`.
- * @param restrict_len Limit completions to this size.
+ * @param restrict_len Limit completions to this size (in characters).
* @param insert String to set with characters that can be autocompleted.
* @return TRUE if the completion was unambiguous, else FALSE.
*
@@ -103,7 +103,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar
*/
gboolean
teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
- const gchar *str, gsize str_len, gsize restrict_len, teco_string_t *insert)
+ const gchar *str, gsize str_len, guint restrict_len, teco_string_t *insert)
{
memset(insert, 0, sizeof(*insert));
@@ -115,7 +115,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
for (teco_rb3str_head_t *cur = teco_rb3str_nfind(tree, case_sensitive, str, str_len);
cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len;
cur = teco_rb3str_get_next(cur)) {
- if (restrict_len && cur->key.len != restrict_len)
+ if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len)
continue;
if (G_UNLIKELY(!first)) {
@@ -136,7 +136,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
for (teco_rb3str_head_t *cur = first;
cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len;
cur = teco_rb3str_get_next(cur)) {
- if (restrict_len && cur->key.len != restrict_len)
+ if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len)
continue;
teco_interface_popup_add(TECO_POPUP_PLAIN,
diff --git a/src/rb3str.h b/src/rb3str.h
index ddbf6bb..adf5f89 100644
--- a/src/rb3str.h
+++ b/src/rb3str.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -65,5 +65,5 @@ teco_rb3str_head_t *teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_se
const gchar *str, gsize len);
gboolean teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
- const gchar *str, gsize str_len, gsize restrict_len,
+ const gchar *str, gsize str_len, guint restrict_len,
teco_string_t *insert);
diff --git a/src/ring.c b/src/ring.c
index fbcc845..6a4eae5 100644
--- a/src/ring.c
+++ b/src/ring.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -512,7 +512,7 @@ teco_state_edit_file_done(teco_machine_main_t *ctx, const teco_string_t *str, GE
* A value of 1 denotes the first buffer, 2 the second,
* ecetera.
*/
-TECO_DEFINE_STATE_EXPECTFILE(teco_state_edit_file,
+TECO_DEFINE_STATE_EXPECTGLOB(teco_state_edit_file,
.initial_cb = (teco_state_initial_cb_t)teco_state_edit_file_initial
);
@@ -524,7 +524,7 @@ teco_state_save_file_done(teco_machine_main_t *ctx, const teco_string_t *str, GE
g_autofree gchar *filename = teco_file_expand_path(str->data);
if (teco_qreg_current) {
- if (!teco_qreg_save(teco_qreg_current, filename, error))
+ if (!teco_qreg_current->vtable->save(teco_qreg_current, filename, error))
return NULL;
} else {
if (!teco_buffer_save(teco_ring_current, *filename ? filename : NULL, error))
diff --git a/src/ring.h b/src/ring.h
index 3ad9a78..833d052 100644
--- a/src/ring.h
+++ b/src/ring.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/sciteco.h b/src/sciteco.h
index 87bd973..7fe09d4 100644
--- a/src/sciteco.h
+++ b/src/sciteco.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,6 +21,8 @@
#include <glib.h>
+#include <Scintilla.h>
+
#if TECO_INTEGER == 32
typedef gint32 teco_int_t;
#define TECO_INT_FORMAT G_GINT32_FORMAT
@@ -58,8 +60,18 @@ teco_is_failure(teco_bool_t x)
return x >= 0;
}
+/**
+ * Call function as destructor on debug builds.
+ * This should be used only if the cleanup is optional.
+ */
+#ifdef NDEBUG
+#define TECO_DEBUG_CLEANUP __attribute__((unused))
+#else
+#define TECO_DEBUG_CLEANUP __attribute__((destructor))
+#endif
+
/** TRUE if C is a control character */
-#define TECO_IS_CTL(C) ((C) < ' ')
+#define TECO_IS_CTL(C) ((gunichar)(C) < ' ')
/** ASCII character to echo control character C */
#define TECO_CTL_ECHO(C) ((C) | 0x40)
/**
@@ -73,17 +85,25 @@ teco_is_failure(teco_bool_t x)
* This is not a bitfield, since it is set from SciTECO.
*/
enum {
+ TECO_ED_DEFAULT_ANSI = (1 << 2),
TECO_ED_AUTOCASEFOLD = (1 << 3),
TECO_ED_AUTOEOL = (1 << 4),
TECO_ED_HOOKS = (1 << 5),
- TECO_ED_FNKEYS = (1 << 6),
+ //TECO_ED_MOUSEKEY = (1 << 6),
TECO_ED_SHELLEMU = (1 << 7),
- TECO_ED_XTERM_CLIPBOARD = (1 << 8)
+ TECO_ED_OSC52 = (1 << 8),
+ TECO_ED_ICONS = (1 << 9)
};
/* in main.c */
extern teco_int_t teco_ed;
+static inline guint
+teco_default_codepage(void)
+{
+ return teco_ed & TECO_ED_DEFAULT_ANSI ? SC_CHARSET_ANSI : SC_CP_UTF8;
+}
+
/* in main.c */
extern volatile sig_atomic_t teco_interrupted;
diff --git a/src/search.c b/src/search.c
index 733eab9..0d04895 100644
--- a/src/search.c
+++ b/src/search.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -38,11 +38,8 @@
#include "search.h"
typedef struct {
- /*
- * FIXME: Should perhaps all be teco_int_t?
- */
- gint dot;
- gint from, to;
+ gssize dot;
+ gssize from, to;
gint count;
teco_buffer_t *from_buffer, *to_buffer;
@@ -63,6 +60,9 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error)
if (ctx->mode > TECO_MODE_NORMAL)
return TRUE;
+ teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine,
+ teco_interface_get_codepage());
+
if (G_UNLIKELY(!teco_search_qreg_machine))
teco_search_qreg_machine = teco_machine_qregspec_new(TECO_QREG_REQUIRED, ctx->qreg_table_locals,
ctx->parent.must_undo);
@@ -79,16 +79,16 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error)
return FALSE;
if (v1 <= v2) {
teco_search_parameters.count = 1;
- teco_search_parameters.from = (gint)v1;
- teco_search_parameters.to = (gint)v2;
+ teco_search_parameters.from = teco_interface_glyphs2bytes(v1);
+ teco_search_parameters.to = teco_interface_glyphs2bytes(v2);
} else {
teco_search_parameters.count = -1;
- teco_search_parameters.from = (gint)v2;
- teco_search_parameters.to = (gint)v1;
+ teco_search_parameters.from = teco_interface_glyphs2bytes(v2);
+ teco_search_parameters.to = teco_interface_glyphs2bytes(v1);
}
- if (!teco_validate_pos(teco_search_parameters.from) ||
- !teco_validate_pos(teco_search_parameters.to)) {
+ if (teco_search_parameters.from < 0 ||
+ teco_search_parameters.to < 0) {
/*
* FIXME: In derived classes, the command name will
* no longer be correct.
@@ -114,24 +114,10 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error)
return TRUE;
}
-static const gchar *
-teco_regexp_escape_chr(gchar chr)
-{
- static gchar escaped[] = {'\\', '\0', '\0', '\0'};
-
- if (!chr) {
- escaped[1] = 'c';
- escaped[2] = '@';
- return escaped;
- }
-
- escaped[1] = chr;
- escaped[2] = '\0';
- return g_ascii_isalnum(chr) ? escaped + 1 : escaped;
-}
-
typedef enum {
TECO_SEARCH_STATE_START,
+ TECO_SEARCH_STATE_CTL,
+ TECO_SEARCH_STATE_ESCAPE,
TECO_SEARCH_STATE_NOT,
TECO_SEARCH_STATE_CTL_E,
TECO_SEARCH_STATE_ANYQ,
@@ -153,6 +139,7 @@ typedef enum {
* The pointer is modified and always left after
* the last character used, so it may point to the
* terminating null byte after the call.
+ * @param codepage The codepage of pattern.
* @param escape_default Whether to treat single characters
* as classes or not.
* @param error A GError.
@@ -161,10 +148,13 @@ typedef enum {
* When a non-empty string is returned, the state has always
* been reset to TECO_STATE_STATE_START.
* Must be freed with g_free().
+ *
+ * @fixme The allocations could be avoided by letting it append
+ * to the target regexp teco_string_t directly.
*/
static gchar *
teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern,
- gboolean escape_default, GError **error)
+ guint codepage, gboolean escape_default, GError **error)
{
while (pattern->len > 0) {
switch (*state) {
@@ -184,8 +174,12 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern,
*/
if (!escape_default)
return g_strdup("");
- pattern->len--;
- return g_strdup(teco_regexp_escape_chr(*pattern->data++));
+ gsize len = codepage == SC_CP_UTF8
+ ? g_utf8_next_char(pattern->data) - pattern->data : 1;
+ gchar *escaped = g_regex_escape_string(pattern->data, len);
+ pattern->data += len;
+ pattern->len -= len;
+ return escaped;
}
break;
@@ -246,25 +240,36 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern,
case TECO_SEARCH_STATE_ANYQ: {
teco_qreg_t *reg;
-
+ gsize len;
+ gunichar chr;
+
+ if (codepage == SC_CP_UTF8) {
+ len = g_utf8_next_char(pattern->data) - pattern->data;
+ chr = g_utf8_get_char(pattern->data);
+ } else {
+ len = 1;
+ chr = *pattern->data;
+ }
switch (teco_machine_qregspec_input(teco_search_qreg_machine,
- *pattern->data, &reg, NULL, error)) {
+ chr, &reg, NULL, error)) {
case TECO_MACHINE_QREGSPEC_ERROR:
return NULL;
case TECO_MACHINE_QREGSPEC_MORE:
/* incomplete, but consume byte */
- break;
+ pattern->data += len;
+ pattern->len -= len;
+ continue;
case TECO_MACHINE_QREGSPEC_DONE:
teco_machine_qregspec_reset(teco_search_qreg_machine);
g_auto(teco_string_t) str = {NULL, 0};
- if (!reg->vtable->get_string(reg, &str.data, &str.len, error))
+ if (!reg->vtable->get_string(reg, &str.data, &str.len, NULL, error))
return NULL;
- pattern->data++;
- pattern->len--;
+ pattern->data += len;
+ pattern->len -= len;
*state = TECO_SEARCH_STATE_START;
return g_regex_escape_string(str.data, str.len);
}
@@ -303,6 +308,7 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern,
* successfully scanned character, so it can be
* called recursively. It may also point to the
* terminating null byte after the call.
+ * @param codepage The codepage of pattern.
* @param single_expr Whether to scan a single pattern
* expression or an arbitrary sequence.
* @param error A GError.
@@ -310,19 +316,31 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern,
* Must be freed with g_free().
*/
static gchar *
-teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error)
+teco_pattern2regexp(teco_string_t *pattern, guint codepage, gboolean single_expr, GError **error)
{
teco_search_state_t state = TECO_SEARCH_STATE_START;
g_auto(teco_string_t) re = {NULL, 0};
do {
/*
+ * Previous character was caret.
+ * Make sure it is handled like a control character.
+ * This is necessary even though we have string building activated,
+ * to support constructs like ^Q^Q (typed with carets) in order to
+ * quote pattern matching characters.
+ */
+ if (state == TECO_SEARCH_STATE_CTL) {
+ *pattern->data = TECO_CTL_KEY(g_ascii_toupper(*pattern->data));
+ state = TECO_SEARCH_STATE_START;
+ }
+
+ /*
* First check whether it is a class.
* This will not treat individual characters
* as classes, so we do not convert them to regexp
* classes unnecessarily.
*/
- g_autofree gchar *temp = teco_class2regexp(&state, pattern, FALSE, error);
+ g_autofree gchar *temp = teco_class2regexp(&state, pattern, codepage, FALSE, error);
if (!temp)
return NULL;
@@ -344,18 +362,40 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error
switch (state) {
case TECO_SEARCH_STATE_START:
switch (*pattern->data) {
- case TECO_CTL_KEY('X'): teco_string_append_c(&re, '.'); break;
- case TECO_CTL_KEY('N'): state = TECO_SEARCH_STATE_NOT; break;
- default: {
- const gchar *escaped = teco_regexp_escape_chr(*pattern->data);
- teco_string_append(&re, escaped, strlen(escaped));
- }
+ case '^':
+ state = TECO_SEARCH_STATE_CTL;
+ break;
+ case TECO_CTL_KEY('Q'):
+ case TECO_CTL_KEY('R'):
+ state = TECO_SEARCH_STATE_ESCAPE;
+ break;
+ case TECO_CTL_KEY('X'):
+ teco_string_append_c(&re, '.');
+ break;
+ case TECO_CTL_KEY('N'):
+ state = TECO_SEARCH_STATE_NOT;
+ break;
+ default:
+ state = TECO_SEARCH_STATE_ESCAPE;
+ continue;
}
break;
+ case TECO_SEARCH_STATE_ESCAPE: {
+ state = TECO_SEARCH_STATE_START;
+ gsize len = codepage == SC_CP_UTF8
+ ? g_utf8_next_char(pattern->data) - pattern->data : 1;
+ /* the allocation could theoretically be avoided by escaping char-wise */
+ g_autofree gchar *escaped = g_regex_escape_string(pattern->data, len);
+ teco_string_append(&re, escaped, strlen(escaped));
+ pattern->data += len;
+ pattern->len -= len;
+ continue;
+ }
+
case TECO_SEARCH_STATE_NOT: {
state = TECO_SEARCH_STATE_START;
- g_autofree gchar *temp = teco_class2regexp(&state, pattern, TRUE, error);
+ g_autofree gchar *temp = teco_class2regexp(&state, pattern, codepage, TRUE, error);
if (!temp)
return NULL;
if (!*temp)
@@ -391,7 +431,7 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error
case TECO_SEARCH_STATE_MANY: {
/* consume exactly one pattern element */
- g_autofree gchar *temp = teco_pattern2regexp(pattern, TRUE, error);
+ g_autofree gchar *temp = teco_pattern2regexp(pattern, codepage, TRUE, error);
if (!temp)
return NULL;
if (!*temp)
@@ -417,7 +457,7 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error
state = TECO_SEARCH_STATE_START;
break;
default: {
- g_autofree gchar *temp = teco_pattern2regexp(pattern, TRUE, error);
+ g_autofree gchar *temp = teco_pattern2regexp(pattern, codepage, TRUE, error);
if (!temp)
return NULL;
if (!*temp)
@@ -454,16 +494,17 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error
}
static gboolean
-teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
+teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
{
g_autoptr(GMatchInfo) info = NULL;
- const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0);
+ /* NOTE: can return NULL pointer for completely new and empty documents */
+ const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, from, to-from) ? : "";
GError *tmp_error = NULL;
/*
* NOTE: The return boolean does NOT signal whether an error was generated.
*/
- g_regex_match_full(re, buffer, (gssize)to, from, 0, &info, &tmp_error);
+ g_regex_match_full(re, buffer, to-from, 0, 0, &info, &tmp_error);
if (tmp_error) {
g_propagate_error(error, tmp_error);
return FALSE;
@@ -543,7 +584,7 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
if (matched_from >= 0 && matched_to >= 0)
/* match success */
- teco_interface_ssm(SCI_SETSEL, matched_from, matched_to);
+ teco_interface_ssm(SCI_SETSEL, from+matched_from, from+matched_to);
return TRUE;
}
@@ -551,8 +592,22 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
static gboolean
teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gsize new_chars, GError **error)
{
- static const GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE |
- G_REGEX_DOTALL | G_REGEX_RAW;
+ /* FIXME: Should G_REGEX_OPTIMIZE be added under certain circumstances? */
+ GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_DOTALL;
+
+ /* this is set in teco_state_search_initial() */
+ if (ctx->expectstring.machine.codepage != SC_CP_UTF8) {
+ /* single byte encoding */
+ flags |= G_REGEX_RAW;
+ } else if (!teco_string_validate_utf8(str)) {
+ /*
+ * While SciTECO code is always guaranteed to be in valid UTF-8,
+ * the result of string building may not (eg. if ^EQq inserts garbage).
+ */
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Invalid UTF-8 byte sequence in search pattern");
+ return FALSE;
+ }
if (teco_current_doc_must_undo())
undo__teco_interface_ssm(SCI_SETSEL,
@@ -567,8 +622,9 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
g_autoptr(GRegex) re = NULL;
teco_string_t pattern = *str;
+ g_autofree gchar *re_pattern;
/* NOTE: teco_pattern2regexp() modifies str pointer */
- g_autofree gchar *re_pattern = teco_pattern2regexp(&pattern, FALSE, error);
+ re_pattern = teco_pattern2regexp(&pattern, ctx->expectstring.machine.codepage, FALSE, error);
if (!re_pattern)
return FALSE;
teco_machine_qregspec_reset(teco_search_qreg_machine);
@@ -668,13 +724,15 @@ teco_state_search_done(teco_machine_main_t *ctx, const teco_string_t *str, GErro
undo__teco_interface_ssm(SCI_SETANCHOR, anchor, 0);
if (!search_reg->vtable->undo_set_string(search_reg, error) ||
- !search_reg->vtable->set_string(search_reg, str->data, str->len, error))
+ !search_reg->vtable->set_string(search_reg, str->data, str->len,
+ teco_default_codepage(), error))
return NULL;
teco_interface_ssm(SCI_SETANCHOR, anchor, 0);
} else {
g_auto(teco_string_t) search_str = {NULL, 0};
- if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len, error) ||
+ if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len,
+ NULL, error) ||
!teco_state_search_process(ctx, &search_str, search_str.len, error))
return NULL;
}
@@ -890,12 +948,12 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str,
if (teco_is_failure(search_state))
return &teco_state_start;
- gint dot = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ sptr_t dot = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
if (teco_search_parameters.dot < dot) {
/* kill forwards */
- gint anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0);
+ sptr_t anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0);
if (teco_current_doc_must_undo())
undo__teco_interface_ssm(SCI_GOTOPOS, dot, 0);
@@ -903,18 +961,23 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str,
teco_interface_ssm(SCI_DELETERANGE, teco_search_parameters.dot,
anchor - teco_search_parameters.dot);
+
+ /* NOTE: An undo action is not always created. */
+ if (teco_current_doc_must_undo() &&
+ teco_search_parameters.dot != anchor)
+ undo__teco_interface_ssm(SCI_UNDO, 0, 0);
} else {
/* kill backwards */
teco_interface_ssm(SCI_DELETERANGE, dot, teco_search_parameters.dot - dot);
+
+ /* NOTE: An undo action is not always created. */
+ if (teco_current_doc_must_undo() &&
+ teco_search_parameters.dot != dot)
+ undo__teco_interface_ssm(SCI_UNDO, 0, 0);
}
teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
teco_ring_dirtify();
- /* NOTE: An undo action is not always created. */
- if (teco_current_doc_must_undo() &&
- teco_search_parameters.dot != dot)
- undo__teco_interface_ssm(SCI_UNDO, 0, 0);
-
return &teco_state_start;
}
@@ -981,11 +1044,20 @@ teco_state_search_delete_done(teco_machine_main_t *ctx, const teco_string_t *str
*/
TECO_DEFINE_STATE_SEARCH(teco_state_search_delete);
+static gboolean
+teco_state_replace_insert_initial(teco_machine_main_t *ctx, GError **error)
+{
+ if (ctx->mode == TECO_MODE_NORMAL)
+ teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine,
+ teco_interface_get_codepage());
+ return TRUE;
+}
+
/*
* FIXME: Could be static
*/
TECO_DEFINE_STATE_INSERT(teco_state_replace_insert,
- .initial_cb = NULL
+ .initial_cb = (teco_state_initial_cb_t)teco_state_replace_insert_initial
);
static teco_state_t *
@@ -1058,11 +1130,13 @@ teco_state_replace_default_insert_done_overwrite(teco_machine_main_t *ctx, const
if (str->len > 0) {
if (!replace_reg->vtable->undo_set_string(replace_reg, error) ||
- !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error))
+ !replace_reg->vtable->set_string(replace_reg, str->data, str->len,
+ teco_default_codepage(), error))
return NULL;
} else {
g_auto(teco_string_t) replace_str = {NULL, 0};
- if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len, error) ||
+ if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len,
+ NULL, error) ||
(replace_str.len > 0 && !teco_state_insert_process(ctx, &replace_str, replace_str.len, error)))
return NULL;
}
@@ -1089,7 +1163,8 @@ teco_state_replace_default_ignore_done(teco_machine_main_t *ctx, const teco_stri
g_assert(replace_reg != NULL);
if (!replace_reg->vtable->undo_set_string(replace_reg, error) ||
- !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error))
+ !replace_reg->vtable->set_string(replace_reg, str->data, str->len,
+ teco_default_codepage(), error))
return NULL;
return &teco_state_start;
diff --git a/src/search.h b/src/search.h
index 3e4a2ef..3eacb6d 100644
--- a/src/search.h
+++ b/src/search.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/spawn.c b/src/spawn.c
index a30e6b2..e6d620c 100644
--- a/src/spawn.c
+++ b/src/spawn.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -76,8 +76,8 @@ static struct {
GSource *stdin_src, *stdout_src;
gboolean interrupted;
- teco_int_t from, to;
- teco_int_t start;
+ gssize from, to;
+ gsize start;
gboolean text_added;
teco_eol_writer_t stdin_writer;
@@ -121,7 +121,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error)
teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$COMSPEC", 8);
g_assert(reg != NULL);
teco_string_t comspec;
- if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, error))
+ if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, NULL, error))
return NULL;
argv = g_new(gchar *, 5);
@@ -140,7 +140,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error)
teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SHELL", 6);
g_assert(reg != NULL);
teco_string_t shell;
- if (!reg->vtable->get_string(reg, &shell.data, &shell.len, error))
+ if (!reg->vtable->get_string(reg, &shell.data, &shell.len, NULL, error))
return NULL;
argv = g_new(gchar *, 4);
@@ -164,6 +164,13 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error)
if (ctx->mode > TECO_MODE_NORMAL)
return TRUE;
+ /*
+ * Command-lines and file names are always assumed to be UTF-8,
+ * unless we set TECO_ED_DEFAULT_ANSI.
+ */
+ teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine,
+ teco_default_codepage());
+
if (!teco_expressions_eval(FALSE, error))
return FALSE;
@@ -202,15 +209,17 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error)
break;
}
- default:
+ default: {
/* pipe and replace character range */
- if (!teco_expressions_pop_num_calc(&teco_spawn_ctx.to, 0, error) ||
- !teco_expressions_pop_num_calc(&teco_spawn_ctx.from, 0, error))
+ teco_int_t from, to;
+ if (!teco_expressions_pop_num_calc(&to, 0, error) ||
+ !teco_expressions_pop_num_calc(&from, 0, error))
return FALSE;
+ teco_spawn_ctx.from = teco_interface_glyphs2bytes(from);
+ teco_spawn_ctx.to = teco_interface_glyphs2bytes(to);
rc = teco_bool(teco_spawn_ctx.from <= teco_spawn_ctx.to &&
- teco_validate_pos(teco_spawn_ctx.from) &&
- teco_validate_pos(teco_spawn_ctx.to));
- break;
+ teco_spawn_ctx.from >= 0 && teco_spawn_ctx.to >= 0);
+ }
}
if (teco_is_failure(rc)) {
@@ -257,12 +266,11 @@ teco_state_execute_done(teco_machine_main_t *ctx, const teco_string_t *str, GErr
g_autoptr(GIOChannel) stdin_chan = NULL, stdout_chan = NULL;
g_auto(GStrv) argv = NULL, envp = NULL;
- if (teco_string_contains(str, '\0')) {
+ if (!str->len || teco_string_contains(str, '\0')) {
g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
- "Command line must not contain null-bytes");
+ "Command line must not be empty or contain null-bytes");
goto gerror;
}
- g_assert(str->data != NULL);
argv = teco_parse_shell_command_line(str->data, error);
if (!argv)
@@ -410,17 +418,17 @@ cleanup:
}
/* in cmdline.c */
-gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
/*$ EC pipe filter
- * EC[command]$ -- Execute operating system command and filter buffer contents
- * linesEC[command]$
- * -EC[command]$
- * from,toEC[command]$
- * :EC[command]$ -> Success|Failure
- * lines:EC[command]$ -> Success|Failure
- * -:EC[command]$ -> Success|Failure
- * from,to:EC[command]$ -> Success|Failure
+ * ECcommand$ -- Execute operating system command and filter buffer contents
+ * linesECcommand$
+ * -ECcommand$
+ * from,toECcommand$
+ * :ECcommand$ -> Success|Failure
+ * lines:ECcommand$ -> Success|Failure
+ * -:ECcommand$ -> Success|Failure
+ * from,to:ECcommand$ -> Success|Failure
*
* The EC command allows you to interface with the operating
* system shell and external programs.
@@ -546,14 +554,14 @@ teco_state_egcommand_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
}
/*$ EG EGq
- * EGq[command]$ -- Set Q-Register to output of operating system command
- * linesEGq[command]$
- * -EGq[command]$
- * from,toEGq[command]$
- * :EGq[command]$ -> Success|Failure
- * lines:EGq[command]$ -> Success|Failure
- * -:EGq[command]$ -> Success|Failure
- * from,to:EGq[command]$ -> Success|Failure
+ * EGq command$ -- Set Q-Register to output of operating system command
+ * linesEGq command$
+ * -EGq command$
+ * from,toEGq command$
+ * :EGq command$ -> Success|Failure
+ * lines:EGq command$ -> Success|Failure
+ * -:EGq command$ -> Success|Failure
+ * from,to:EGq command$ -> Success|Failure
*
* Runs an operating system <command> and set Q-Register
* <q> to the data read from its standard output stream.
@@ -635,7 +643,7 @@ teco_spawn_stdin_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer dat
gssize bytes_written = teco_eol_writer_convert(&teco_spawn_ctx.stdin_writer, buffer,
convert_len, &teco_spawn_ctx.error);
if (bytes_written < 0) {
- /* GError ocurred */
+ /* GError occurred */
g_main_loop_quit(teco_spawn_ctx.mainloop);
return G_SOURCE_REMOVE;
}
@@ -667,6 +675,8 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da
/* source has already been dispatched */
return G_SOURCE_REMOVE;
+ teco_qreg_t *qreg = teco_spawn_ctx.register_argument;
+
for (;;) {
teco_string_t buffer;
@@ -685,20 +695,16 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da
if (!buffer.len)
return G_SOURCE_CONTINUE;
- if (teco_spawn_ctx.register_argument) {
+ if (qreg) {
if (teco_spawn_ctx.text_added) {
- if (!teco_spawn_ctx.register_argument->vtable->undo_append_string(teco_spawn_ctx.register_argument,
- &teco_spawn_ctx.error) ||
- !teco_spawn_ctx.register_argument->vtable->append_string(teco_spawn_ctx.register_argument,
- buffer.data, buffer.len,
- &teco_spawn_ctx.error))
+ if (!qreg->vtable->undo_append_string(qreg, &teco_spawn_ctx.error) ||
+ !qreg->vtable->append_string(qreg, buffer.data, buffer.len,
+ &teco_spawn_ctx.error))
goto error;
} else {
- if (!teco_spawn_ctx.register_argument->vtable->undo_set_string(teco_spawn_ctx.register_argument,
- &teco_spawn_ctx.error) ||
- !teco_spawn_ctx.register_argument->vtable->set_string(teco_spawn_ctx.register_argument,
- buffer.data, buffer.len,
- &teco_spawn_ctx.error))
+ if (!qreg->vtable->undo_set_string(qreg, &teco_spawn_ctx.error) ||
+ !qreg->vtable->set_string(qreg, buffer.data, buffer.len,
+ teco_default_codepage(), &teco_spawn_ctx.error))
goto error;
}
} else {
@@ -789,8 +795,7 @@ teco_spawn_idle_cb(gpointer user_data)
return G_SOURCE_CONTINUE;
}
-#ifndef NDEBUG
-static void __attribute__((destructor))
+static void TECO_DEBUG_CLEANUP
teco_spawn_cleanup(void)
{
g_source_unref(teco_spawn_ctx.idle_src);
@@ -801,4 +806,3 @@ teco_spawn_cleanup(void)
if (teco_spawn_ctx.error)
g_error_free(teco_spawn_ctx.error);
}
-#endif
diff --git a/src/spawn.h b/src/spawn.h
index 0e5ca96..312de6e 100644
--- a/src/spawn.h
+++ b/src/spawn.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/string-utils.c b/src/string-utils.c
index f2cd45e..b284760 100644
--- a/src/string-utils.c
+++ b/src/string-utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -55,13 +55,20 @@ teco_string_echo(const gchar *str, gsize len)
return ret;
}
-/** @memberof teco_string_t */
+/**
+ * Get character coordinates for a given byte index.
+ *
+ * The given string must be valid UTF-8.
+ *
+ * @memberof teco_string_t
+ */
void
-teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column)
+teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column)
{
+ *pos = 0;
*line = *column = 1;
- for (guint i = 0; i < pos; i++) {
+ for (guint i = 0; i < off; i = g_utf8_next_char(str+i) - str) {
switch (str[i]) {
case '\r':
if (str[i+1] == '\n')
@@ -75,10 +82,21 @@ teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column)
(*column)++;
break;
}
+ (*pos)++;
}
}
-/** @memberof teco_string_t */
+/**
+ * Get the length of the prefix common to two strings.
+ * Works with UTF-8 and single-byte encodings.
+ *
+ * @param a Left string.
+ * @param b Right string.
+ * @param b_len Length of right string.
+ * @return Length of the common prefix in bytes.
+ *
+ * @memberof teco_string_t
+ */
gsize
teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
{
@@ -91,15 +109,32 @@ teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
return len;
}
-/** @memberof teco_string_t */
+/**
+ * Get the length of the prefix common to two UTF-8 strings
+ * without considering case.
+ *
+ * The UTF-8 strings must be validated, which should be the case
+ * for help labels and short Q-Register names.
+ *
+ * @param a Left UTF-8 string.
+ * @param b Right UTF-8 string.
+ * @param b_len Length of right UTF-8 string.
+ * @return Length of the common prefix in bytes.
+ *
+ * @memberof teco_string_t
+ */
gsize
teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len)
{
gsize len = 0;
- while (len < a->len && len < b_len &&
- g_ascii_tolower(a->data[len]) == g_ascii_tolower(b[len]))
- len++;
+ while (len < a->len && len < b_len) {
+ gunichar a_chr = g_utf8_get_char(a->data+len);
+ gunichar b_chr = g_utf8_get_char(b+len);
+ if (g_unichar_tolower(a_chr) != g_unichar_tolower(b_chr))
+ break;
+ len = g_utf8_next_char(b+len) - b;
+ }
return len;
}
diff --git a/src/string-utils.h b/src/string-utils.h
index 26b660b..ebe25d5 100644
--- a/src/string-utils.h
+++ b/src/string-utils.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -26,15 +26,25 @@
/**
* Upper-case SciTECO command character.
*
- * There are implementations in glib (g_ascii_toupper) and libc,
+ * There are implementations in glib (g_ascii_toupper() and g_unichar_toupper()) and libc,
* but this implementation is sufficient for all letters used by SciTECO commands.
*/
-static inline gchar
-teco_ascii_toupper(gchar chr)
+static inline gunichar
+teco_ascii_toupper(gunichar chr)
{
return chr >= 'a' && chr <= 'z' ? chr & ~0x20 : chr;
}
+static inline gchar *
+teco_strv_remove(gchar **strv, guint i)
+{
+ gchar *ret = strv[i];
+ do
+ strv[i] = strv[i+1];
+ while (strv[++i]);
+ return ret;
+}
+
/**
* An 8-bit clean null-terminated string.
*
@@ -42,6 +52,7 @@ teco_ascii_toupper(gchar chr)
* and the allocation length is not stored.
* Just like GString, teco_string_t are always null-terminated but at the
* same time 8-bit clean (can contain null-characters).
+ * It may or may not contain UTF-8 byte sequences.
*
* The API is designed such that teco_string_t operations operate on plain
* (null-terminated) C strings, a single character or character array as well as
@@ -51,6 +62,12 @@ teco_ascii_toupper(gchar chr)
* A target teco_string_t::data is always null-terminated and thus safe to pass
* to functions expecting traditional null-terminated C strings if you can
* guarantee that it contains no null-character other than the trailing one.
+ *
+ * @warning For consistency with C idioms the underlying character type is
+ * `char`, which might be signed!
+ * Accessing individual characters may yield signed integers and that sign
+ * might be preserved when upcasting to a larger signed integer.
+ * In this case you should always cast to `guchar` first.
*/
typedef struct {
/**
@@ -58,7 +75,7 @@ typedef struct {
* The pointer is guaranteed to be non-NULL after initialization.
*/
gchar *data;
- /** Length of `data` without the trailing null-byte. */
+ /** Length of `data` without the trailing null-byte in bytes. */
gsize len;
} teco_string_t;
@@ -112,6 +129,16 @@ teco_string_append_c(teco_string_t *str, gchar chr)
teco_string_append(str, &chr, sizeof(chr));
}
+/** @memberof teco_string_t */
+static inline void
+teco_string_append_wc(teco_string_t *target, gunichar chr)
+{
+ /* 4 bytes should be enough, but we better follow the documentation */
+ target->data = g_realloc(target->data, target->len + 6 + 1);
+ target->len += g_unichar_to_utf8(chr, target->data+target->len);
+ target->data[target->len] = '\0';
+}
+
/**
* @fixme Should this also realloc str->data?
*
@@ -135,7 +162,7 @@ void undo__teco_string_truncate(teco_string_t *, gsize);
gchar *teco_string_echo(const gchar *str, gsize len);
-void teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column);
+void teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column);
typedef gsize (*teco_string_diff_t)(const teco_string_t *a, const gchar *b, gsize b_len);
gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len);
@@ -170,6 +197,19 @@ teco_string_rindex(const teco_string_t *str, gchar chr)
const gchar *teco_string_last_occurrence(const teco_string_t *str, const gchar *chars);
+/**
+ * Validate whether string consists exclusively of valid UTF-8, but accept null bytes.
+ * @note there is g_utf8_validate_len() in Glib 2.60
+ */
+static inline gboolean
+teco_string_validate_utf8(const teco_string_t *str)
+{
+ const gchar *p = str->data;
+ while (!g_utf8_validate(p, str->len - (p - str->data), &p) && !*p)
+ p++;
+ return p - str->data == str->len;
+}
+
/** @memberof teco_string_t */
static inline void
teco_string_clear(teco_string_t *str)
diff --git a/src/symbols-extract.tes b/src/symbols-extract.tes
index 9f43fa6..1ab6667 100755
--- a/src/symbols-extract.tes
+++ b/src/symbols-extract.tes
@@ -1,4 +1,4 @@
-#!/usr/local/bin/sciteco -m
+#!/usr/local/bin/sciteco -8m
!*
* ./symbols-extract.tes [-p <prefix pattern list>] -n <SymbolList object> [--] \
* <output file> <input header>
@@ -48,13 +48,12 @@ teco_symbols_init(void)
teco_symbol_list_init(&Q[getopt.n], entries, G_N_ELEMENTS(entries), FALSE);
}
-#ifndef NDEBUG
-static void __attribute__((destructor))
+static void TECO_DEBUG_CLEANUP
teco_cmdline_cleanup(void)
{
teco_symbol_list_clear(&Q[getopt.n]);
}
-#endif^J
+
!* write output file *!
2EL EWQ#ou
diff --git a/src/symbols.c b/src/symbols.c
index ce7a7f6..feead76 100644
--- a/src/symbols.c
+++ b/src/symbols.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -251,7 +251,7 @@ teco_state_scintilla_symbols_done(teco_machine_main_t *ctx, const teco_string_t
}
/* in cmdline.c */
-gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
/*$ ES scintilla message
* -- Send Scintilla message
diff --git a/src/symbols.h b/src/symbols.h
index 9cdfd74..0325d9d 100644
--- a/src/symbols.h
+++ b/src/symbols.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/src/undo.c b/src/undo.c
index 1b53fa1..bc12107 100644
--- a/src/undo.c
+++ b/src/undo.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -30,7 +30,7 @@
//#define DEBUG
-TECO_DEFINE_UNDO_SCALAR(gchar);
+TECO_DEFINE_UNDO_SCALAR(gunichar);
TECO_DEFINE_UNDO_SCALAR(gint);
TECO_DEFINE_UNDO_SCALAR(guint);
TECO_DEFINE_UNDO_SCALAR(gsize);
@@ -112,7 +112,7 @@ teco_undo_push_size(teco_undo_action_t action_cb, gsize size)
}
void
-teco_undo_pop(gint pc)
+teco_undo_pop(gsize pc)
{
while ((gint)teco_undo_heads->len > pc) {
teco_undo_token_t *top =
diff --git a/src/undo.h b/src/undo.h
index 2c9fc77..1d1d6fb 100644
--- a/src/undo.h
+++ b/src/undo.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -164,8 +164,8 @@ gpointer teco_undo_push_size(teco_undo_action_t action_cb, gsize size)
* significantly improves batch-mode performance.
*/
-TECO_DECLARE_UNDO_SCALAR(gchar);
-#define teco_undo_gchar(VAR) (*teco_undo_object_gchar_push(&(VAR)))
+TECO_DECLARE_UNDO_SCALAR(gunichar);
+#define teco_undo_gunichar(VAR) (*teco_undo_object_gunichar_push(&(VAR)))
TECO_DECLARE_UNDO_SCALAR(gint);
#define teco_undo_gint(VAR) (*teco_undo_object_gint_push(&(VAR)))
@@ -243,5 +243,5 @@ TECO_DECLARE_UNDO_SCALAR(gconstpointer);
/** @} */
-void teco_undo_pop(gint pc);
+void teco_undo_pop(gsize pc);
void teco_undo_clear(void);
diff --git a/src/view.c b/src/view.c
index 2e6df3f..7cdc987 100644
--- a/src/view.c
+++ b/src/view.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -45,6 +45,7 @@
#include "error.h"
#include "qreg.h"
#include "eol.h"
+#include "memory.h"
#include "view.h"
/** @memberof teco_view_t */
@@ -72,6 +73,27 @@ teco_view_setup(teco_view_t *ctx)
*/
teco_view_ssm(ctx, SCI_SETMARGINWIDTHN, 1, 0);
+ if (teco_ed & TECO_ED_DEFAULT_ANSI) {
+ /*
+ * Configure a single-byte codepage/charset.
+ * This requires setting it on all of the possible styles.
+ * Fortunately, we can do it before SCI_STYLECLEARALL.
+ * This is important only for display purposes - other than that
+ * all single-byte encodings are handled the same.
+ */
+ teco_view_ssm(ctx, SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, SC_CHARSET_ANSI);
+ /* 0 is used for ALL single-byte encodings */
+ teco_view_ssm(ctx, SCI_SETCODEPAGE, 0, 0);
+ } else {
+ /*
+ * Documents are UTF-8 by default and all UTF-8 documents
+ * are expected to have a character index.
+ * This is a property of the document, instead of the view.
+ */
+ teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+ }
+
/*
* Set some basic styles in order to provide
* a consistent look across UIs if no profile
@@ -137,6 +159,28 @@ teco_view_set_representations(teco_view_t *ctx)
gchar buf[] = {(gchar)cc, '\0'};
teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)reps[cc]);
}
+
+ if (teco_ed & TECO_ED_DEFAULT_ANSI) {
+ /*
+ * Non-ANSI chars should be visible somehow.
+ * This would best be done always when changing the
+ * encoding to 0, but it would be kind of expensive.
+ *
+ * FIXME: On the other hand, this could cause problems
+ * when setting SC_CP_UTF8 later on.
+ */
+ for (guint cc = 0x80; cc <= 0xFF; cc++) {
+ gchar buf[] = {(gchar)cc, '\0'};
+ gchar rep[2+1];
+ /*
+ * Hexadecimal is poorly supported in SciTECO, but
+ * multiple decimal numbers one after another look
+ * confusing, esp. in Curses.
+ */
+ g_snprintf(rep, sizeof(rep), "%02X", cc);
+ teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)rep);
+ }
+ }
}
/**
@@ -161,6 +205,9 @@ teco_view_set_representations(teco_view_t *ctx)
gboolean
teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **error)
{
+ g_auto(teco_eol_reader_t) reader;
+ teco_eol_reader_init_gio(&reader, channel);
+
teco_view_ssm(ctx, SCI_BEGINUNDOACTION, 0, 0);
teco_view_ssm(ctx, SCI_CLEARALL, 0, 0);
@@ -173,11 +220,11 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
*/
struct stat stat_buf = {.st_size = 0};
if (!fstat(g_io_channel_unix_get_fd(channel), &stat_buf) &&
- stat_buf.st_size > 0)
+ stat_buf.st_size > 0) {
+ if (!teco_memory_check(stat_buf.st_size, error))
+ goto error;
teco_view_ssm(ctx, SCI_ALLOCATE, stat_buf.st_size, 0);
-
- g_auto(teco_eol_reader_t) reader;
- teco_eol_reader_init_gio(&reader, channel);
+ }
for (;;) {
/*
@@ -187,14 +234,24 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
teco_string_t str;
GIOStatus rc = teco_eol_reader_convert(&reader, &str.data, &str.len, error);
- if (rc == G_IO_STATUS_ERROR) {
- teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0);
- return FALSE;
- }
+ if (rc == G_IO_STATUS_ERROR)
+ goto error;
if (rc == G_IO_STATUS_EOF)
break;
teco_view_ssm(ctx, SCI_APPENDTEXT, str.len, (sptr_t)str.data);
+
+ /*
+ * Even if we checked initially, knowing the file size,
+ * Scintilla could allocate much more bytes.
+ */
+ if (!teco_memory_check(0, error))
+ goto error;
+
+ if (G_UNLIKELY(teco_interface_is_interrupted())) {
+ teco_error_interrupted_set(error);
+ goto error;
+ }
}
/*
@@ -216,6 +273,10 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0);
return TRUE;
+
+error:
+ teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0);
+ return FALSE;
}
/**
@@ -449,3 +510,129 @@ teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError **error)
return TRUE;
}
+
+/**
+ * Convert a glyph index to a byte offset as used by Scintilla.
+ *
+ * This is optimized with the "line character index",
+ * which must always be enabled in UTF-8 documents.
+ *
+ * It is also used to validate glyph indexes.
+ *
+ * @param ctx The view to operate on.
+ * @param pos Position in glyphs/characters.
+ * @return Position in bytes or -1 if pos is out of bounds.
+ */
+gssize
+teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos)
+{
+ if (pos < 0)
+ return -1; /* invalid position */
+ if (!pos)
+ return 0;
+
+ if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos <= teco_view_ssm(ctx, SCI_GETLENGTH, 0, 0) ? pos : -1;
+
+ sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMINDEXPOSITION, pos,
+ SC_LINECHARACTERINDEX_UTF32);
+ sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0);
+ pos -= teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32);
+ return teco_view_ssm(ctx, SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1;
+}
+
+/**
+ * Convert byte offset to glyph/character index without bounds checking.
+ */
+teco_int_t
+teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos)
+{
+ if (!pos)
+ return 0;
+
+ if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos;
+
+ sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMPOSITION, pos, 0);
+ sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0);
+ return teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32) +
+ teco_view_ssm(ctx, SCI_COUNTCHARACTERS, line_bytes, pos);
+}
+
+#define TECO_RELATIVE_LIMIT 1024
+
+/**
+ * Convert a glyph index relative to a byte position to
+ * a byte position.
+ *
+ * Can be used to implement commands with relative character
+ * ranges.
+ * As an optimization, this always counts characters for deltas
+ * smaller than TECO_RELATIVE_LIMIT, so it will be fast
+ * even where the character-index based lookup is too slow
+ * (as on exceedingly long lines).
+ *
+ * @param ctx The view to operate on.
+ * @param pos Byte position to start.
+ * @param n Number of glyphs/characters to the left (negative) or
+ * right (positive) of pos.
+ * @return Position in bytes or -1 if the resulting position is out of bounds.
+ */
+gssize
+teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n)
+{
+ if (!n)
+ return pos;
+ if (ABS(n) > TECO_RELATIVE_LIMIT)
+ return teco_view_glyphs2bytes(ctx, teco_view_bytes2glyphs(ctx, pos) + n);
+
+ sptr_t res = teco_view_ssm(ctx, SCI_POSITIONRELATIVE, pos, n);
+ /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */
+ return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1;
+}
+
+/**
+ * Get codepoint at given byte offset.
+ *
+ * @param ctx The view to operate on.
+ * @param pos The glyph's byte position
+ * @param len The length of the document in bytes
+ * @return The requested codepoint.
+ * In UTF-8 encoded documents, this might be -1 (incomplete sequence)
+ * or -2 (invalid byte sequence).
+ */
+teco_int_t
+teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len)
+{
+ if (teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) != SC_CP_UTF8)
+ /*
+ * We don't support the asiatic multi-byte encodings,
+ * so everything else is single-byte codepages.
+ * NOTE: Internally, the character is casted to signed char
+ * and may therefore become negative.
+ */
+ return (guchar)teco_view_ssm(ctx, SCI_GETCHARAT, pos, 0);
+
+ gchar buf[4+1];
+ struct Sci_TextRangeFull range = {
+ .chrg = {pos, MIN(len, pos+sizeof(buf)-1)},
+ .lpstrText = buf
+ };
+ /*
+ * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION
+ * or repeatedly calling SCI_GETCHARAT.
+ */
+ teco_view_ssm(ctx, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range);
+ /*
+ * Make sure that the -1/-2 error values are preserved.
+ * The sign bit in UCS-4/UTF-32 is unused, so this will even
+ * suffice if TECO_INTEGER == 32.
+ */
+ return (gint32)g_utf8_get_char_validated(buf, -1);
+}
diff --git a/src/view.h b/src/view.h
index 50cd98c..8f54fdd 100644
--- a/src/view.h
+++ b/src/view.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2023 Robin Haberkorn
+ * Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -70,3 +70,16 @@ gboolean teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError
/** @pure @memberof teco_view_t */
void teco_view_free(teco_view_t *ctx);
+
+static inline guint
+teco_view_get_codepage(teco_view_t *ctx)
+{
+ return teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0)
+ ? : teco_view_ssm(ctx, SCI_STYLEGETCHARACTERSET, STYLE_DEFAULT, 0);
+}
+
+gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos);
+teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos);
+gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n);
+
+teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len);