diff options
Diffstat (limited to 'src')
66 files changed, 3286 insertions, 1196 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index b9aca8a..5b2572e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -67,10 +67,9 @@ noinst_PROGRAMS = sciteco-minimal sciteco_minimal_SOURCES = symbols-scintilla.c symbols-scilexer.c : sciteco-minimal$(EXEEXT) endif -sciteco_minimal_LDADD = libsciteco-base.la \ - @SCINTILLA_PATH@/bin/scintilla.a +sciteco_minimal_LDADD = libsciteco-base.la $(LIBSCINTILLA) if LEXILLA -sciteco_minimal_LDADD += @LEXILLA_PATH@/bin/liblexilla.a +sciteco_minimal_LDADD += $(LIBLEXILLA) endif # Scintilla is unfortunately still written in C++, so we must force # Automake to use the C++ linker when linking the binaries. @@ -99,10 +98,10 @@ CLEANFILES = $(BUILT_SOURCES) \ symbols-scintilla.c : @SCINTILLA_PATH@/include/Scintilla.h \ symbols-extract.tes - $(SCITECO_MINIMAL) -m -- @srcdir@/symbols-extract.tes \ + $(SCITECO_MINIMAL) -8m -- @srcdir@/symbols-extract.tes \ -p "SCI_" -n teco_symbol_list_scintilla $@ $< symbols-scilexer.c : @LEXILLA_PATH@/include/SciLexer.h \ symbols-extract.tes - $(SCITECO_MINIMAL) -m -- @srcdir@/symbols-extract.tes \ + $(SCITECO_MINIMAL) -8m -- @srcdir@/symbols-extract.tes \ -p "SCE_" -n teco_symbol_list_scilexer $@ $< diff --git a/src/cmdline.c b/src/cmdline.c index 58d48b4..816816c 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,6 +52,7 @@ #include "eol.h" #include "error.h" #include "qreg.h" +#include "glob.h" #include "cmdline.h" #if defined(HAVE_MALLOC_TRIM) && !defined(HAVE_DECL_MALLOC_TRIM) @@ -81,12 +82,12 @@ static teco_string_t teco_last_cmdline = {NULL, 0}; * @param error A GError. * @return FALSE to throw a GError */ -gboolean +static gboolean teco_cmdline_insert(const gchar *data, gsize len, GError **error) { const teco_string_t src = {(gchar *)data, len}; - teco_string_t old_cmdline = {NULL, 0}; - guint repl_pc = 0; + g_auto(teco_string_t) old_cmdline = {NULL, 0}; + gsize repl_pc = 0; teco_cmdline.machine.macro_pc = teco_cmdline.pc = teco_cmdline.effective_len; @@ -109,8 +110,6 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) /* * Parse/execute characters, one at a time so * undo tokens get emitted for the corresponding characters. - * - * FIXME: The inner loop should be factored out. */ while (teco_cmdline.pc < teco_cmdline.effective_len) { g_autoptr(GError) tmp_error = NULL; @@ -125,7 +124,8 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) teco_qreg_t *cmdline_reg = teco_qreg_table_find(&teco_qreg_table_globals, "\e", 1); teco_string_t new_cmdline; - if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len, error)) + if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len, + NULL, error)) return FALSE; /* @@ -160,6 +160,7 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) teco_string_clear(&teco_cmdline.str); teco_cmdline.str = old_cmdline; + memset(&old_cmdline, 0, sizeof(old_cmdline)); teco_cmdline.machine.macro_pc = teco_cmdline.pc = repl_pc; /* rubout cmdline replacement command */ @@ -179,55 +180,65 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) return TRUE; } +static gboolean +teco_cmdline_rubin(GError **error) +{ + if (!teco_cmdline.str.len) + return TRUE; + + const gchar *start, *end, *next; + start = teco_cmdline.str.data+teco_cmdline.effective_len; + end = teco_cmdline.str.data+teco_cmdline.str.len; + next = g_utf8_find_next_char(start, end) ? : end; + return teco_cmdline_insert(start, next-start, error); +} + +/** + * Process key press or expansion of key macro. + * + * Should be called only with the results of a single keypress. + * They are considered an unity and in case of errors, we + * rubout the entire sequence (unless there was a $$ return in the + * middle). + * + * @param data Key presses in UTF-8. + * @param len Length of data. + * @param error A GError. + * @return FALSE if error was set. + * If TRUE was returned, there could still have been an error, + * but it has already been handled. + */ gboolean -teco_cmdline_keypress_c(gchar key, GError **error) +teco_cmdline_keypress(const gchar *data, gsize len, GError **error) { + const teco_string_t str = {(gchar *)data, len}; teco_machine_t *machine = &teco_cmdline.machine.parent; - g_autoptr(GError) tmp_error = NULL; + + if (!teco_string_validate_utf8(&str)) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid UTF-8 sequence"); + return FALSE; + } /* - * Cleanup messages,etc... + * Cleanup messages, etc... */ teco_interface_msg_clear(); - /* - * Process immediate editing commands, inserting - * characters as necessary into the command line. - */ - if (!machine->current->process_edit_cmd_cb(machine, NULL, key, &tmp_error)) { - if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) { - /* - * Return from top-level macro, results - * in command line termination. - * The return "arguments" are currently - * ignored. - */ - g_assert(machine->current == &teco_state_start); + gsize start_pc = teco_cmdline.effective_len; - teco_interface_popup_clear(); + for (guint i = 0; i < len; i = g_utf8_next_char(data+i) - data) { + gunichar chr = g_utf8_get_char(data+i); + g_autoptr(GError) tmp_error = NULL; - if (teco_quit_requested) { - /* cought by user interface */ - g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, ""); - return FALSE; - } + /* + * Process immediate editing commands, inserting + * characters as necessary into the command line. + */ + if (machine->current->process_edit_cmd_cb(machine, NULL, chr, &tmp_error)) + continue; - teco_undo_clear(); - /* also empties all Scintilla undo buffers */ - teco_ring_set_scintilla_undo(TRUE); - teco_view_set_scintilla_undo(teco_qreg_view, TRUE); - /* - * FIXME: Reset main machine? - */ - teco_goto_table_clear(&teco_cmdline.machine.goto_table); - teco_expressions_clear(); - g_array_remove_range(teco_loop_stack, 0, teco_loop_stack->len); - - teco_string_clear(&teco_last_cmdline); - teco_last_cmdline = teco_cmdline.str; - memset(&teco_cmdline.str, 0, sizeof(teco_cmdline.str)); - teco_cmdline.effective_len = 0; - } else { + if (!g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) { /* * NOTE: Error message already displayed in * teco_cmdline_insert(). @@ -237,29 +248,76 @@ teco_cmdline_keypress_c(gchar key, GError **error) * is thrown. They must be executed so * as if the character had never been * inserted. + * Actually we rub out the entire command line + * up until the insertion point. */ - teco_undo_pop(teco_cmdline.pc); - teco_cmdline.effective_len = teco_cmdline.pc; + teco_undo_pop(start_pc); + teco_cmdline.effective_len = start_pc; /* program counter could be messed up */ teco_cmdline.machine.macro_pc = teco_cmdline.effective_len; - } #ifdef HAVE_MALLOC_TRIM + /* + * Undo stacks can grow very large - sometimes large enough to + * make the system swap and become unresponsive. + * This shrinks the program break after lots of memory has + * been freed, reducing the virtual memory size and aiding + * in recovering from swapping issues. + * + * This is particularily important with some memory limiting backends + * after hitting the memory limit* as otherwise the program's resident + * size won't shrink and it would be impossible to recover. + */ + if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_MEMLIMIT)) + malloc_trim(0); +#endif + + break; + } + /* - * Undo stacks can grow very large - sometimes large enough to - * make the system swap and become unresponsive. - * This shrinks the program break after lots of memory has - * been freed, reducing the virtual memory size and aiding - * in recovering from swapping issues. - * - * This is particularily important with some memory limiting backends - * after hitting the memory limit* as otherwise the program's resident - * size won't shrink and it would be impossible to recover. + * Return from top-level macro, results + * in command line termination. + * The return "arguments" are currently + * ignored. + */ + g_assert(machine->current == &teco_state_start); + + teco_interface_popup_clear(); + + if (teco_quit_requested) { + /* caught by user interface */ + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, ""); + return FALSE; + } + + teco_undo_clear(); + /* also empties all Scintilla undo buffers */ + teco_ring_set_scintilla_undo(TRUE); + teco_view_set_scintilla_undo(teco_qreg_view, TRUE); + /* + * FIXME: Reset main machine? */ - if (g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN) || - g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_MEMLIMIT)) - malloc_trim(0); + teco_goto_table_clear(&teco_cmdline.machine.goto_table); + teco_expressions_clear(); + g_array_remove_range(teco_loop_stack, 0, teco_loop_stack->len); + + teco_string_clear(&teco_last_cmdline); + teco_last_cmdline = teco_cmdline.str; + memset(&teco_cmdline.str, 0, sizeof(teco_cmdline.str)); + teco_cmdline.effective_len = 0; + +#ifdef HAVE_MALLOC_TRIM + /* see above */ + malloc_trim(0); #endif + + /* + * Continue with the other keys, + * but we obviously can't rub out beyond the return if any + * error occurs later on. + */ + start_pc = 0; } /* @@ -269,33 +327,40 @@ teco_cmdline_keypress_c(gchar key, GError **error) return TRUE; } -gboolean -teco_cmdline_fnmacro(const gchar *name, GError **error) +teco_keymacro_status_t +teco_cmdline_keymacro(const gchar *name, gssize name_len, GError **error) { g_assert(name != NULL); + if (name_len < 0) + name_len = strlen(name); + /* * NOTE: It should be safe to allocate on the stack since * there are only a limited number of possible function key macros. */ - gchar macro_name[1 + strlen(name)]; - macro_name[0] = TECO_CTL_KEY('F'); - memcpy(macro_name+1, name, sizeof(macro_name)-1); + gchar macro_name[1 + name_len]; + macro_name[0] = TECO_CTL_KEY('K'); + memcpy(macro_name+1, name, name_len); - teco_qreg_t *macro_reg; - - if (teco_ed & TECO_ED_FNKEYS && - (macro_reg = teco_qreg_table_find(&teco_qreg_table_globals, macro_name, sizeof(macro_name)))) { + teco_qreg_t *macro_reg = teco_qreg_table_find(&teco_qreg_table_globals, macro_name, sizeof(macro_name)); + if (macro_reg) { teco_int_t macro_mask; if (!macro_reg->vtable->get_integer(macro_reg, ¯o_mask, error)) - return FALSE; + return TECO_KEYMACRO_ERROR; - if (macro_mask & teco_cmdline.machine.parent.current->fnmacro_mask) - return TRUE; + /* + * FIXME: This does not work with Q-Register specs embedded into string arguments. + * There should be a keymacro_mask_cb() instead. + */ + if (!((teco_cmdline.machine.parent.current->keymacro_mask | + teco_cmdline.machine.expectstring.machine.parent.current->keymacro_mask) & ~macro_mask)) + return TECO_KEYMACRO_UNDEFINED; g_auto(teco_string_t) macro_str = {NULL, 0}; - return macro_reg->vtable->get_string(macro_reg, ¯o_str.data, ¯o_str.len, error) && - teco_cmdline_keypress(macro_str.data, macro_str.len, error); + return macro_reg->vtable->get_string(macro_reg, ¯o_str.data, ¯o_str.len, NULL, error) && + teco_cmdline_keypress(macro_str.data, macro_str.len, error) + ? TECO_KEYMACRO_SUCCESS : TECO_KEYMACRO_ERROR; } /* @@ -303,28 +368,34 @@ teco_cmdline_fnmacro(const gchar *name, GError **error) * except "CLOSE" which quits the application * (this may loose unsaved data but is better than * not doing anything if the user closes the window). - * NOTE: Doing the check here is less efficient than - * doing it in the UI implementations, but defines - * the default actions centrally. - * Also, fnmacros are only handled after key presses. */ - if (!strcmp(name, "CLOSE")) { + if (name_len == 5 && !strncmp(name, "CLOSE", name_len)) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_QUIT, ""); - return FALSE; + return TECO_KEYMACRO_ERROR; } - return TRUE; + return TECO_KEYMACRO_UNDEFINED; +} + +static void +teco_cmdline_rubout(void) +{ + const gchar *p; + p = g_utf8_find_prev_char(teco_cmdline.str.data, + teco_cmdline.str.data+teco_cmdline.effective_len); + if (p) { + teco_cmdline.effective_len = p - teco_cmdline.str.data; + teco_undo_pop(teco_cmdline.effective_len); + } } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_cmdline_cleanup(void) { teco_machine_main_clear(&teco_cmdline.machine); teco_string_clear(&teco_cmdline.str); teco_string_clear(&teco_last_cmdline); } -#endif /* * Commandline key processing. @@ -337,7 +408,7 @@ teco_cmdline_cleanup(void) */ gboolean -teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { switch (key) { case '\n': /* insert EOL sequence */ @@ -407,23 +478,30 @@ teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gch } teco_interface_popup_clear(); - return teco_cmdline_insert(&key, sizeof(key), error); + + gchar buf[6]; + gsize len = g_unichar_to_utf8(key, buf); + return teco_cmdline_insert(buf, len, error); } gboolean -teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { + /* + * Auto case folding is for syntactic characters, + * so this could be done by working only with a-z and A-Z. + * However, it's also not speed critical. + */ if (teco_ed & TECO_ED_AUTOCASEFOLD) - /* will not modify non-letter keys */ - key = g_ascii_islower(key) ? g_ascii_toupper(key) - : g_ascii_tolower(key); + key = g_unichar_islower(key) ? g_unichar_toupper(key) + : g_unichar_tolower(key); return teco_state_process_edit_cmd(ctx, parent_ctx, key, error); } gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error) + gunichar key, GError **error) { teco_state_t *current = ctx->parent.current; @@ -460,20 +538,15 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t * * get the default behaviour of teco_state_process_edit_cmd(). * This may not be a real-life issue serious enough to maintain * a result string even in parse-only mode. - * - * FIXME: Does not properly rubout string-building commands at the - * start of the string argument -- ctx->result->len is not - * a valid indicator of argument emptyness. - * Since it chains to teco_state_process_edit_cmd() we will instead - * rubout the entire command. */ if (ctx->result && ctx->result->len > 0) { gboolean is_wordchar = teco_string_contains(&wchars, teco_cmdline.str.data[teco_cmdline.effective_len-1]); teco_cmdline_rubout(); if (ctx->parent.current != current) { /* rub out string building command */ - while (ctx->result->len > 0 && ctx->parent.current != current) + do teco_cmdline_rubout(); + while (ctx->parent.current != current); return TRUE; } @@ -572,8 +645,29 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t * } gboolean +teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, + gunichar key, GError **error) +{ + /* + * Allow insertion of characters that would otherwise be interpreted as + * immediate editing commands after ^Q/^R. + */ + switch (key) { + //case TECO_CTL_KEY('G'): + case TECO_CTL_KEY('W'): + case TECO_CTL_KEY('U'): + teco_interface_popup_clear(); + + gchar c = key; + return teco_cmdline_insert(&c, sizeof(c), error); + } + + return teco_state_process_edit_cmd(parent_ctx, NULL, key, error); +} + +gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar chr, GError **error) + gunichar chr, GError **error) { g_assert(ctx->machine_qregspec != NULL); /* We downcast since teco_machine_qregspec_t is private in qreg.c */ @@ -582,7 +676,7 @@ teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *c } gboolean -teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -590,7 +684,7 @@ teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_ } gboolean -teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -626,7 +720,7 @@ teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *par } gboolean -teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -696,8 +790,8 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t gboolean unambiguous = teco_file_auto_complete(ctx->expectstring.string.data, G_FILE_TEST_EXISTS, &new_chars); teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped); if (unambiguous && ctx->expectstring.nesting == 1) - teco_string_append_c(&new_chars_escaped, - ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); + teco_string_append_wc(&new_chars_escaped, + ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); return teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error); } @@ -707,7 +801,61 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t } gboolean -teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectglob_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) +{ + teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; + teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; + + /* + * NOTE: We don't just define teco_state_stringbuilding_start_process_edit_cmd(), + * as it would be hard to subclass/overwrite for different main machine states. + */ + if (!stringbuilding_current->is_start) + return stringbuilding_current->process_edit_cmd_cb(&stringbuilding_ctx->parent, &ctx->parent, key, error); + + switch (key) { + case '\t': { /* autocomplete file name */ + if (teco_cmdline.modifier_enabled) + break; + + if (teco_interface_popup_is_shown()) { + /* cycle through popup pages */ + teco_interface_popup_show(); + return TRUE; + } + + if (teco_string_contains(&ctx->expectstring.string, '\0')) + /* null-byte not allowed in file names */ + return TRUE; + + /* + * We do not support autocompleting glob patterns. + * + * FIXME: What if the last autocompletion inserted escaped glob + * characters? + * Perhaps teco_file_auto_complete() should natively support glob patterns. + */ + if (teco_globber_is_pattern(ctx->expectstring.string.data)) + return TRUE; + + g_auto(teco_string_t) new_chars, new_chars_escaped; + gboolean unambiguous = teco_file_auto_complete(ctx->expectstring.string.data, G_FILE_TEST_EXISTS, &new_chars); + g_autofree gchar *pattern_escaped = teco_globber_escape_pattern(new_chars.data); + teco_machine_stringbuilding_escape(stringbuilding_ctx, pattern_escaped, strlen(pattern_escaped), &new_chars_escaped); + if (unambiguous && ctx->expectstring.nesting == 1) + teco_string_append_wc(&new_chars_escaped, + ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); + + return teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error); + } + } + + /* ^W should behave like in commands accepting files */ + return teco_state_expectfile_process_edit_cmd(ctx, parent_ctx, key, error); +} + +gboolean +teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -745,11 +893,12 @@ teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t * } } - return stringbuilding_current->process_edit_cmd_cb(&stringbuilding_ctx->parent, &ctx->parent, key, error); + /* ^W should behave like in commands accepting files */ + return teco_state_expectfile_process_edit_cmd(ctx, parent_ctx, key, error); } gboolean -teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { g_assert(ctx->expectqreg != NULL); /* @@ -761,7 +910,7 @@ teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t } gboolean -teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { switch (key) { case '\t': { /* autocomplete Q-Register name */ @@ -796,7 +945,7 @@ teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_ } gboolean -teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = teco_machine_qregspec_get_stringbuilding(ctx); teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -836,7 +985,7 @@ teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_m } gboolean -teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -881,7 +1030,7 @@ teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa } gboolean -teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -926,7 +1075,7 @@ teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_mac } gboolean -teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -973,7 +1122,7 @@ teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren } gboolean -teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error) +teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error) { teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine; teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current; @@ -1004,8 +1153,8 @@ teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren gboolean unambiguous = teco_help_auto_complete(ctx->expectstring.string.data, &new_chars); teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped); if (unambiguous && ctx->expectstring.nesting == 1) - teco_string_append_c(&new_chars_escaped, - ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); + teco_string_append_wc(&new_chars_escaped, + ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char); return new_chars_escaped.len ? teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error) : TRUE; } @@ -1028,7 +1177,8 @@ teco_state_save_cmdline_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg return &teco_state_start; if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, error)) + !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, + teco_default_codepage(), error)) return NULL; return &teco_state_start; diff --git a/src/cmdline.h b/src/cmdline.h index 85e657a..f4b84e4 100644 --- a/src/cmdline.h +++ b/src/cmdline.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,7 +46,7 @@ typedef struct { gsize effective_len; /** Program counter within the command-line macro */ - guint pc; + gsize pc; /** * Specifies whether the immediate editing modifier @@ -60,35 +60,30 @@ typedef struct { extern teco_cmdline_t teco_cmdline; -gboolean teco_cmdline_insert(const gchar *data, gsize len, GError **error); +gboolean teco_cmdline_keypress(const gchar *data, gsize len, GError **error); -static inline gboolean -teco_cmdline_rubin(GError **error) -{ - return teco_cmdline.effective_len >= teco_cmdline.str.len || - teco_cmdline_insert(teco_cmdline.str.data + teco_cmdline.effective_len, 1, error); -} +typedef enum { + TECO_KEYMACRO_ERROR = 0, /**< GError occurred */ + TECO_KEYMACRO_SUCCESS, /**< key macro found and inserted */ + TECO_KEYMACRO_UNDEFINED /**< no key macro found */ +} teco_keymacro_status_t; -gboolean teco_cmdline_keypress_c(gchar key, GError **error); +teco_keymacro_status_t teco_cmdline_keymacro(const gchar *name, gssize name_len, GError **error); static inline gboolean -teco_cmdline_keypress(const gchar *str, gsize len, GError **error) +teco_cmdline_keymacro_c(gchar key, GError **error) { - for (guint i = 0; i < len; i++) - if (!teco_cmdline_keypress_c(str[i], error)) - return FALSE; + switch (teco_cmdline_keymacro(&key, sizeof(key), error)) { + case TECO_KEYMACRO_ERROR: + return FALSE; + case TECO_KEYMACRO_SUCCESS: + break; + case TECO_KEYMACRO_UNDEFINED: + return teco_cmdline_keypress(&key, sizeof(key), error); + } return TRUE; } -gboolean teco_cmdline_fnmacro(const gchar *name, GError **error); - -static inline void -teco_cmdline_rubout(void) -{ - if (teco_cmdline.effective_len) - teco_undo_pop(--teco_cmdline.effective_len); -} - extern gboolean teco_quit_requested; /* diff --git a/src/core-commands.c b/src/core-commands.c index 4d5b378..0cde7e0 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,7 +45,7 @@ #include "goto-commands.h" #include "core-commands.h" -static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error); +static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error); /* * NOTE: This needs some extra code in teco_state_start_input(). @@ -129,7 +129,8 @@ teco_state_start_dot(teco_machine_main_t *ctx, GError **error) { if (!teco_expressions_eval(FALSE, error)) return; - teco_expressions_push(teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0)); + sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_expressions_push(teco_interface_bytes2glyphs(pos)); } /*$ Z size @@ -145,7 +146,8 @@ teco_state_start_zed(teco_machine_main_t *ctx, GError **error) { if (!teco_expressions_eval(FALSE, error)) return; - teco_expressions_push(teco_interface_ssm(SCI_GETLENGTH, 0, 0)); + sptr_t pos = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + teco_expressions_push(teco_interface_bytes2glyphs(pos)); } /*$ H @@ -162,10 +164,11 @@ teco_state_start_range(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_eval(FALSE, error)) return; teco_expressions_push(0); - teco_expressions_push(teco_interface_ssm(SCI_GETLENGTH, 0, 0)); + sptr_t pos = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + teco_expressions_push(teco_interface_bytes2glyphs(pos)); } -/*$ "\\" +/*$ \[rs] * n\\ -- Insert or read ASCII numbers * \\ -> n * @@ -241,6 +244,7 @@ teco_state_start_loop_open(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_eval(FALSE, error) || !teco_expressions_pop_num_calc(&lctx.counter, -1, error)) return; + lctx.brace_level = teco_brace_level; lctx.pass_through = teco_machine_main_eval_colon(ctx); if (lctx.counter) { @@ -280,6 +284,14 @@ teco_state_start_loop_close(teco_machine_main_t *ctx, GError **error) teco_loop_context_t *lctx = &g_array_index(teco_loop_stack, teco_loop_context_t, teco_loop_stack->len-1); + + /* only non-pass-through loops increase the brace level */ + if (teco_brace_level != lctx->brace_level + !lctx->pass_through) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + "Brace left open at loop end command"); + return; + } + gboolean colon_modified = teco_machine_main_eval_colon(ctx); /* @@ -348,7 +360,7 @@ teco_state_start_break(teco_machine_main_t *ctx, GError **error) { if (teco_loop_stack->len <= ctx->loop_stack_fp) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, - "<;> only allowed in iterations"); + "<;> only allowed in loops"); return; } @@ -373,7 +385,7 @@ teco_state_start_break(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_discard_args(error)) return; if (!lctx.pass_through && - !teco_expressions_brace_close(error)) + !teco_expressions_brace_return(lctx.brace_level, 0, error)) return; undo__insert_val__teco_loop_stack(teco_loop_stack->len, lctx); @@ -511,11 +523,12 @@ teco_state_start_jump(teco_machine_main_t *ctx, GError **error) if (!teco_expressions_pop_num_calc(&v, 0, error)) return; - if (teco_validate_pos(v)) { + gssize pos = teco_interface_glyphs2bytes(v); + if (pos >= 0) { if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_GOTOPOS, teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0), 0); - teco_interface_ssm(SCI_GOTOPOS, v, 0); + teco_interface_ssm(SCI_GOTOPOS, pos, 0); if (teco_machine_main_eval_colon(ctx)) teco_expressions_push(TECO_SUCCESS); @@ -531,11 +544,11 @@ static teco_bool_t teco_move_chars(teco_int_t n) { sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); - - if (!teco_validate_pos(pos + n)) + gssize next_pos = teco_interface_glyphs2bytes_relative(pos, n); + if (next_pos < 0) return TECO_FAILURE; - teco_interface_ssm(SCI_GOTOPOS, pos + n, 0); + teco_interface_ssm(SCI_GOTOPOS, next_pos, 0); if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_GOTOPOS, pos, 0); @@ -879,7 +892,7 @@ static gboolean teco_state_start_kill(teco_machine_main_t *ctx, const gchar *cmd, gboolean by_lines, GError **error) { teco_bool_t rc; - teco_int_t from, len; + gssize from, len; /* in bytes */ if (!teco_expressions_eval(FALSE, error)) return FALSE; @@ -894,20 +907,24 @@ teco_state_start_kill(teco_machine_main_t *ctx, const gchar *cmd, gboolean by_li len = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0) - from; rc = teco_bool(teco_validate_line(line)); } else { - if (!teco_expressions_pop_num_calc(&len, teco_num_sign, error)) + teco_int_t len_glyphs; + if (!teco_expressions_pop_num_calc(&len_glyphs, teco_num_sign, error)) return FALSE; - rc = teco_bool(teco_validate_pos(from + len)); + gssize to = teco_interface_glyphs2bytes_relative(from, len_glyphs); + rc = teco_bool(to >= 0); + len = to-from; } if (len < 0) { len *= -1; from -= len; } } else { - teco_int_t to = teco_expressions_pop_num(0); - from = teco_expressions_pop_num(0); + teco_int_t to_glyphs = teco_expressions_pop_num(0); + gssize to = teco_interface_glyphs2bytes(to_glyphs); + teco_int_t from_glyphs = teco_expressions_pop_num(0); + from = teco_interface_glyphs2bytes(from_glyphs); len = to - from; - rc = teco_bool(len >= 0 && teco_validate_pos(from) && - teco_validate_pos(to)); + rc = teco_bool(len >= 0 && from >= 0 && to >= 0); } if (teco_machine_main_eval_colon(ctx)) { @@ -1002,6 +1019,9 @@ teco_state_start_delete_chars(teco_machine_main_t *ctx, GError **error) * This can be an ASCII <code> or Unicode codepoint * depending on Scintilla's encoding of the current * buffer. + * Invalid Unicode byte sequences are reported as + * -1 or -2. + * * - If <n> is 0, return the <code> of the character * pointed to by dot. * - If <n> is 1, return the <code> of the character @@ -1012,28 +1032,33 @@ teco_state_start_delete_chars(teco_machine_main_t *ctx, GError **error) * * If the position of the queried character is off-page, * the command will yield an error. + * + * If the document is encoded as UTF-8 and there is + * an incomplete sequence at the requested position, + * -1 is returned. + * All other invalid Unicode sequences are returned as -2. */ -/** @todo does Scintilla really return code points??? */ static void teco_state_start_get(teco_machine_main_t *ctx, GError **error) { teco_int_t v; if (!teco_expressions_pop_num_calc(&v, teco_num_sign, error)) return; - v += teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); - /* - * NOTE: We cannot use teco_validate_pos() here since - * the end of the buffer is not a valid position for <A>. - */ - if (v < 0 || v >= teco_interface_ssm(SCI_GETLENGTH, 0, 0)) { + + sptr_t pos = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + gssize get_pos = teco_interface_glyphs2bytes_relative(pos, v); + sptr_t len = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + + if (get_pos < 0 || get_pos == len) { teco_error_range_set(error, "A"); return; } - teco_expressions_push(teco_interface_ssm(SCI_GETCHARAT, v, 0)); + + teco_expressions_push(teco_interface_get_character(get_pos, len)); } static teco_state_t * -teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -1148,7 +1173,7 @@ teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error) * * FIXME: Maybe, there should be a special teco_state_t * for beginnings of command-lines? - * It could also be used for a corresponding FNMACRO mask. + * It could also be used for a corresponding KEYMACRO mask. */ if (teco_cmdline.effective_len == 1 && teco_cmdline.str.data[0] == '*') return &teco_state_save_cmdline; @@ -1244,7 +1269,7 @@ teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error) TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_start, .end_of_macro_cb = NULL, /* Allowed at the end of a macro! */ .is_start = TRUE, - .fnmacro_mask = TECO_FNMACRO_MASK_START + .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE ); /*$ F< @@ -1372,7 +1397,7 @@ teco_state_fcommand_cond_else(teco_machine_main_t *ctx, GError **error) } static teco_state_t * -teco_state_fcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -1435,7 +1460,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE teco_qreg_t *qreg = teco_qreg_table_find(&teco_qreg_table_globals, "$HOME", 5); g_assert(qreg != NULL); teco_string_t home; - if (!qreg->vtable->get_string(qreg, &home.data, &home.len, error)) + if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, error)) return NULL; /* @@ -1496,7 +1521,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE TECO_DEFINE_STATE_EXPECTDIR(teco_state_changedir); static teco_state_t * -teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { teco_int_t value = 0; gboolean result = TRUE; @@ -1536,20 +1561,20 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error break; case 'A': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isalpha((gchar)value); + result = g_unichar_isalpha(value); break; case 'C': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isalnum((gchar)value) || + result = g_unichar_isalnum(value) || value == '.' || value == '$' || value == '_'; break; case 'D': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isdigit((gchar)value); + result = g_unichar_isdigit(value); break; case 'I': if (ctx->mode == TECO_MODE_NORMAL) - result = G_IS_DIR_SEPARATOR((gchar)value); + result = G_IS_DIR_SEPARATOR(value); break; case 'S': case 'T': @@ -1582,15 +1607,15 @@ teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error break; case 'R': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isalnum((gchar)value); + result = g_unichar_isalnum(value); break; case 'V': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_islower((gchar)value); + result = g_unichar_islower(value); break; case 'W': if (ctx->mode == TECO_MODE_NORMAL) - result = g_ascii_isupper((gchar)value); + result = g_unichar_isupper(value); break; default: g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, @@ -1720,8 +1745,71 @@ teco_state_control_radix(teco_machine_main_t *ctx, GError **error) } } +/*$ ^E glyphs2bytes bytes2glyphs + * glyphs^E -> bytes -- Translate between glyph and byte indexes + * bytes:^E -> glyphs + * ^E -> bytes + * :^E -> length + * + * Translates from glyph/character to byte indexes when called + * without a colon. + * Otherwise when colon-modified, translates from byte indexes + * back to glyph indexes. + * These values can differ in documents with multi-byte + * encodings (of which only UTF-8 is supported). + * It is especially useful to translate between these indexes + * when manually invoking Scintilla messages (\fBES\fP command), as + * they almost always take byte positions. + * + * When called without arguments, \fB^E\fP returns the current + * position (dot) in bytes. + * This is equivalent, but faster than \(lq.^E\(rq. + * \fB:^E\fP without arguments returns the length of the current + * document in bytes, which is equivalent but faster than \(lqZ^E\(rq. + * + * When passing in indexes outside of the document's valid area, + * -1 is returned, so the return value can also be interpreted + * as a TECO boolean, signalling truth/success for invalid indexes. + * This provides an elegant and effective way to validate + * buffer addresses. + */ +static void +teco_state_control_glyphs2bytes(teco_machine_main_t *ctx, GError **error) +{ + teco_int_t res; + + if (!teco_expressions_eval(FALSE, error)) + return; + + gboolean colon_modified = teco_machine_main_eval_colon(ctx); + + if (!teco_expressions_args()) { + /* + * This is shorter than .^E or Z^E and avoids unnecessary glyph to + * byte index translations. + * On the other hand :^E is inconsistent, as it will return a byte + * index, instead of glyph index. + */ + res = teco_interface_ssm(colon_modified ? SCI_GETLENGTH : SCI_GETCURRENTPOS, 0, 0); + } else { + teco_int_t pos; + if (!teco_expressions_pop_num_calc(&pos, 0, error)) + return; + if (colon_modified) { + /* teco_interface_bytes2glyphs() does not check addresses */ + res = 0 <= pos && pos <= teco_interface_ssm(SCI_GETLENGTH, 0, 0) + ? teco_interface_bytes2glyphs(pos) : -1; + } else { + /* negative values for invalid indexes are passed down. */ + res = teco_interface_glyphs2bytes(pos); + } + } + + teco_expressions_push(res); +} + static teco_state_t * -teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -1746,7 +1834,8 @@ teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error) ['C'] = {&teco_state_start, teco_state_control_exit}, ['O'] = {&teco_state_start, teco_state_control_octal}, ['D'] = {&teco_state_start, teco_state_control_decimal}, - ['R'] = {&teco_state_start, teco_state_control_radix} + ['R'] = {&teco_state_start, teco_state_control_radix}, + ['E'] = {&teco_state_start, teco_state_control_glyphs2bytes} }; /* @@ -1761,7 +1850,7 @@ teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error) TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control); static teco_state_t * -teco_state_ascii_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { if (ctx->mode == TECO_MODE_NORMAL) teco_expressions_push(chr); @@ -1797,7 +1886,7 @@ TECO_DEFINE_STATE(teco_state_ascii); * only be seen when executing the following command. */ static teco_state_t * -teco_state_escape_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_escape_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { /*$ ^[^[ ^[$ $$ terminate return * [a1,a2,...]$$ -- Terminate command line or return from macro @@ -1891,7 +1980,7 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_escape, * when it comes to function key macro masking. */ .is_start = TRUE, - .fnmacro_mask = TECO_FNMACRO_MASK_START + .keymacro_mask = TECO_KEYMACRO_MASK_START | TECO_KEYMACRO_MASK_CASEINSENSITIVE ); /*$ EF close @@ -1958,6 +2047,11 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error) * Without any argument ED returns the current flags. * * Currently, the following flags are used by \*(ST: + * - 4: If enabled, prefer raw single-byte ANSI encoding + * for all new buffers and registers. + * This does not change the encoding of any existing + * buffers and any initialized default register when set via + * \fBED\fP, so you might want to launch \*(ST with \fB--8bit\fP. * - 8: Enable/disable automatic folding of case-insensitive * command characters during interactive key translation. * The case of letter keys is inverted, so one or two @@ -1973,14 +2067,17 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error) * of files. * - 32: Enable/Disable buffer editing hooks * (via execution of macro in global Q-Register \(lqED\(rq) - * - 64: Enable/Disable function key macros * - 128: Enable/Disable enforcement of UNIX98 * \(lq/bin/sh\(rq emulation for operating system command * executions - * - 256: Enable/Disable \fBxterm\fP(1) clipboard support. - * Should only be enabled if XTerm allows the - * \fIGetSelection\fP and \fISetSelection\fP window - * operations. + * - 256: Enable/Disable OSC-52 clipboard support. + * Must only be enabled if the terminal emulator is configured + * properly. + * - 512: Enable/Disable Unicode icons in the Curses UI. + * This requires a capable font, like the ones provided + * by the \(lqNerd Fonts\(rq project. + * Changes to this flag in interactive mode may not become + * effective immediately. * * The features controlled thus are discribed in other sections * of this manual. @@ -2098,6 +2195,12 @@ teco_state_ecommand_flags(teco_machine_main_t *ctx, GError **error) * on exit the author is aware of is \fBxterm\fP(1) and * the Linux console driver. * You have been warned. Good luck. + * .IP 4 + * The column after the last horizontal movement. + * This is only used by \fBfnkeys.tes\fP and is similar to the Scintilla-internal + * setting \fBSCI_CHOOSECARETX\fP. + * Unless most other settings, this is on purpose not restored on rubout, + * so it "survives" command line replacements. */ static void teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) @@ -2106,9 +2209,12 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) EJ_USER_INTERFACE = 0, EJ_BUFFERS, EJ_MEMORY_LIMIT, - EJ_INIT_COLOR + EJ_INIT_COLOR, + EJ_CARETX }; + static teco_int_t caret_x = 0; + teco_int_t property; if (!teco_expressions_eval(FALSE, error) || !teco_expressions_pop_num_calc(&property, teco_num_sign, error)) @@ -2144,6 +2250,10 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) teco_interface_init_color((guint)value, (guint32)color); break; + case EJ_CARETX: + caret_x = value; + break; + default: g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, "Cannot set property %" TECO_INT_FORMAT " " @@ -2180,6 +2290,10 @@ teco_state_ecommand_properties(teco_machine_main_t *ctx, GError **error) teco_expressions_push(teco_memory_limit); break; + case EJ_CARETX: + teco_expressions_push(caret_x); + break; + default: g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, "Invalid property %" TECO_INT_FORMAT " " @@ -2292,6 +2406,252 @@ teco_state_ecommand_eol(teco_machine_main_t *ctx, GError **error) } } +static const gchar * +teco_codepage2str(guint codepage) +{ + /* + * The multi-byte charsets are excluded, since we don't + * support them in SciTECO, even though Scintilla has them. + * Contrary to the Scintilla documentation, Gtk supports + * most of them. + * Those that are supported are tested, so the codepage + * mapping should be definitive (although there could be + * similar related codepages). + */ + switch (codepage) { + case SC_CP_UTF8: return "UTF-8"; + case SC_CHARSET_ANSI: + case SC_CHARSET_DEFAULT: return "ISO-8859-1"; /* LATIN1 */ + case SC_CHARSET_BALTIC: return "ISO-8859-13"; /* LATIN7 */ + //case SC_CHARSET_CHINESEBIG5: return "BIG5"; + case SC_CHARSET_EASTEUROPE: return "ISO-8859-2"; /* LATIN2 */ + //case SC_CHARSET_GB2312: return "GB2312"; + case SC_CHARSET_GREEK: return "ISO-8859-7"; // CP1253??? + //case SC_CHARSET_HANGUL: return "UHC"; + /* unsure whether this is supported on Gtk */ + case SC_CHARSET_MAC: return "MAC"; + /* not supported by Gtk */ + case SC_CHARSET_OEM: return "CP437"; + /* + * Apparently, this can be CP1251 on the native Windows + * port of Scintilla. + */ + case SC_CHARSET_RUSSIAN: return "KOI8-R"; + case SC_CHARSET_OEM866: return "CP866"; + case SC_CHARSET_CYRILLIC: return "CP1251"; + //case SC_CHARSET_SHIFTJIS: return "SHIFT-JIS"; + //case SC_CHARSET_SYMBOL: + case SC_CHARSET_TURKISH: return "ISO-8859-9"; /* LATIN5 */ + //case SC_CHARSET_JOHAB: return "JOHAB"; + case SC_CHARSET_HEBREW: return "ISO-8859-8"; // CP1255? + /* + * FIXME: Some arabic codepage is supported by Gtk, + * but I am not sure which. + */ + case SC_CHARSET_ARABIC: return "ISO-8859-6"; // CP720, CP1256??? + /* apparently not supported by Gtk */ + case SC_CHARSET_VIETNAMESE: return "CP1258"; + case SC_CHARSET_THAI: return "ISO-8859-11"; + case SC_CHARSET_8859_15: return "ISO-8859-15"; /* LATIN9 */ + } + + return NULL; +} + +/*$ EE encoding codepage charset + * codepageEE -- Edit current document's encoding (codepage/charset) + * EE -> codepage + * codepage:EE + * :EE -> codepage + * + * When called with an argument, it sets the current codepage, + * otherwise returns it. + * The following codepages are supported: + * - 0: ANSI (raw bytes) + * - 1: ISO-8859-1 (latin1) + * - 77: Macintosh Latin encoding + * - 161: ISO-8859-7 + * - 162: ISO-8859-9 (latin5) + * - 163: CP1258 + * - 177: ISO-8859-8 + * - 178: ISO-8859-6 + * - 186: ISO-8859-13 (latin7) + * - 204: KOI8-R + * - 222: ISO-8859-11 + * - 238: ISO-8859-2 (latin2) + * - 255: CP437 + * - 866: CP866 + * - 1000: ISO-8859-15 (latin9) + * - 1251: CP1251 + * - 65001: UTF-8 + * + * Displaying characters in the single-byte (non-UTF-8) codepages might + * be supported only with the Gtk UI. + * At least 77, 178, 163 and 255 are not displayed correctly on Gtk. + * 65001 (UTF-8) is the default for new buffers. + * 0 (ANSI) should be used when working with raw bytes, + * but is currently displayed like ISO-8859-1 (latin1). + * + * \fBEE\fP does not change the buffer contents itself by default, only + * how it is displayed and how \*(ST interacts with it. + * This allows fixing up the codepage if it is not in the default UTF-8 + * or if codepage guessing failed. + * + * When colon-modified the \fB:EE\fP command will also additionally convert + * the current buffer contents into the new code page, preserving the + * current position (dot). + * This will fail if the conversion would be lossy. + * Conversions from and to UTF-8 \fIshould\fP always be successful. + */ +static void +teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error) +{ + if (!teco_expressions_eval(FALSE, error)) + return; + + gboolean colon_modified = teco_machine_main_eval_colon(ctx); + + guint old_cp = teco_interface_get_codepage(); + + if (!teco_expressions_args()) { + /* get current code page */ + teco_expressions_push(old_cp); + return; + } + + /* + * Set code page + */ + teco_int_t new_cp; + if (!teco_expressions_pop_num_calc(&new_cp, 0, error)) + return; + + if (old_cp == SC_CP_UTF8 && new_cp == SC_CP_UTF8) + return; + + if (teco_current_doc_must_undo() && teco_undo_enabled) { + if (old_cp == SC_CP_UTF8) { /* new_cp != SC_CP_UTF8 */ + undo__teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + undo__teco_interface_ssm(SCI_SETCODEPAGE, SC_CP_UTF8, 0); + } else { + undo__teco_interface_ssm(SCI_SETCODEPAGE, 0, 0); + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + undo__teco_interface_ssm(SCI_STYLESETCHARACTERSET, style, old_cp); + /* + * The index is internally reference-counted and could underflow, + * so don't do it more than necessary. + */ + if (new_cp == SC_CP_UTF8) + undo__teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } + } + + teco_int_t dot_glyphs; + if (colon_modified) { + sptr_t dot_bytes = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + dot_glyphs = teco_interface_bytes2glyphs(dot_bytes); + + /* + * Convert buffer to new codepage. + * + * FIXME: Could be optimized slightly by converting first + * before the gap, inserting the converted text and then + * converting after the gap. + */ + const gchar *to_codepage = teco_codepage2str(new_cp); + const gchar *from_codepage = teco_codepage2str(old_cp); + if (!to_codepage || !from_codepage) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + "Unknown or unsupported codepage/charset"); + return; + } + + const gchar *buf = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0); + gsize len = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + g_autofree gchar *converted; + gsize converted_len; + + /* + * This fails if there is no direct translation. + * If we'd use g_convert_with_fallback(), it would be tricky to choose + * fallback characters that will always work. + */ + converted = g_convert(buf, len, to_codepage, from_codepage, + NULL, &converted_len, error); + if (!converted) + return; + + teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); + teco_interface_ssm(SCI_CLEARALL, 0, 0); + teco_interface_ssm(SCI_APPENDTEXT, converted_len, (sptr_t)converted); + teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); + teco_ring_dirtify(); + + if (teco_current_doc_must_undo()) { + undo__teco_interface_ssm(SCI_GOTOPOS, dot_bytes, 0); + undo__teco_interface_ssm(SCI_UNDO, 0, 0); + } + } + + if (new_cp == SC_CP_UTF8) { + teco_interface_ssm(SCI_SETCODEPAGE, SC_CP_UTF8, 0); + /* + * UTF-8 documents strictly require the line character index. + * See teco_view_glyphs2bytes() and teco_view_bytes2glyphs(). + */ + g_assert(!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)); + teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } else { + /* + * The index is NOT released automatically when setting the codepage. + * But it is internally reference-counted and could underflow, + * so don't do it more than necessary. + */ + if (old_cp == SC_CP_UTF8) { + teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + g_assert(!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)); + } + + /* + * Configure a single-byte codepage/charset. + * This requires setting it on all of the possible styles. + * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles. + * This is important only for display purposes - other than that + * all single-byte encodings are handled the same. + * + * FIXME: Should we avoid this if new_cp == 0? + * It will be used for raw byte handling mostly. + */ + if (teco_current_doc_must_undo()) { + /* + * There is a chance the user will see this buffer even if we + * are currently in batch mode. + */ + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + teco_interface_ssm(SCI_STYLESETCHARACTERSET, style, new_cp); + } else { + /* we must still set it, so that <EE> retrieval works */ + teco_interface_ssm(SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, new_cp); + } + /* 0 is used for ALL single-byte encodings */ + teco_interface_ssm(SCI_SETCODEPAGE, 0, 0); + } + + if (colon_modified) + /* + * Only now, it will be safe to recalculate dot in the new encoding. + * If the new codepage is UTF-8, the line character index will be + * ready only now. + */ + teco_interface_ssm(SCI_GOTOPOS, teco_interface_glyphs2bytes(dot_glyphs), 0); +} + /*$ EX exit * [bool]EX -- Exit program * -EX @@ -2352,7 +2712,7 @@ teco_state_ecommand_exit(teco_machine_main_t *ctx, GError **error) } static teco_state_t * -teco_state_ecommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { static teco_machine_main_transition_t transitions[] = { /* @@ -2377,6 +2737,7 @@ teco_state_ecommand_input(teco_machine_main_t *ctx, gchar chr, GError **error) ['D'] = {&teco_state_start, teco_state_ecommand_flags}, ['J'] = {&teco_state_start, teco_state_ecommand_properties}, ['L'] = {&teco_state_start, teco_state_ecommand_eol}, + ['E'] = {&teco_state_start, teco_state_ecommand_encoding}, ['X'] = {&teco_state_start, teco_state_ecommand_exit} }; @@ -2395,26 +2756,61 @@ teco_state_insert_initial(teco_machine_main_t *ctx, GError **error) if (ctx->mode > TECO_MODE_NORMAL) return TRUE; + /* + * Current document's encoding determines the behaviour of + * string building constructs. + */ + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_interface_get_codepage()); + if (!teco_expressions_eval(FALSE, error)) return FALSE; guint args = teco_expressions_args(); if (!args) return TRUE; - teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); - for (int i = args; i > 0; i--) { - gchar chr = (gchar)teco_expressions_peek_num(i-1); - teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&chr); + if (teco_interface_ssm(SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) { + /* detect possible errors before introducing side effects */ + for (gint i = args; i > 0; i--) { + teco_int_t chr = teco_expressions_peek_num(i-1); + if (chr < 0 || !g_unichar_validate(chr)) { + teco_error_codepoint_set(error, "I"); + return FALSE; + } + } + teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); + for (gint i = args; i > 0; i--) { + /* 4 bytes should be enough, but we better follow the documentation */ + gchar buf[6]; + gsize len = g_unichar_to_utf8(teco_expressions_peek_num(i-1), buf); + teco_interface_ssm(SCI_ADDTEXT, len, (sptr_t)buf); + } + } else { + /* everything else is a single-byte encoding */ + for (gint i = args; i > 0; i--) { + teco_int_t chr = teco_expressions_peek_num(i-1); + if (chr < 0 || chr > 0xFF) { + teco_error_codepoint_set(error, "I"); + return FALSE; + } + } + teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); + for (gint i = args; i > 0; i--) { + gchar chr = (gchar)teco_expressions_peek_num(i-1); + teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&chr); + } } - for (int i = args; i > 0; i--) - if (!teco_expressions_pop_num_calc(NULL, 0, error)) - return FALSE; teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); teco_ring_dirtify(); if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_UNDO, 0, 0); + /* This is done only now because it can _theoretically_ fail. */ + for (gint i = args; i > 0; i--) + if (!teco_expressions_pop_num_calc(NULL, 0, error)) + return FALSE; + return TRUE; } @@ -2451,8 +2847,8 @@ teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t *str, * Secondly, the command inserts <text>. * In interactive mode, <text> is inserted interactively. * - * String building characters are \fBenabled\fP for the - * I command. + * Unlike in classic TECO dialects, string building characters are + * \fBenabled\fP for the \fBI\fP command. * When editing \*(ST macros, using the \fBEI\fP command * may be better, since it has string building characters * disabled. @@ -2491,10 +2887,9 @@ teco_state_insert_indent_initial(teco_machine_main_t *ctx, GError **error) len -= teco_interface_ssm(SCI_GETCOLUMN, teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0), 0) % len; - gchar spaces[len]; - - memset(spaces, ' ', sizeof(spaces)); - teco_interface_ssm(SCI_ADDTEXT, sizeof(spaces), (sptr_t)spaces); + gchar space = ' '; + while (len-- > 0) + teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&space); } teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); teco_ring_dirtify(); diff --git a/src/core-commands.h b/src/core-commands.h index 6efc5a3..e30770d 100644 --- a/src/core-commands.h +++ b/src/core-commands.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -43,7 +43,7 @@ gboolean teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t gsize new_chars, GError **error); /* in cmdline.c */ -gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /** * @class TECO_DEFINE_STATE_INSERT @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,17 +30,49 @@ #include "doc.h" static inline teco_doc_scintilla_t * +teco_doc_scintilla_ref(teco_doc_scintilla_t *doc) +{ + if (doc) + teco_view_ssm(teco_qreg_view, SCI_ADDREFDOCUMENT, 0, (sptr_t)doc); + return doc; +} + +static inline void +teco_doc_scintilla_release(teco_doc_scintilla_t *doc) +{ + if (doc) + teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)doc); +} + +TECO_DEFINE_UNDO_OBJECT(doc_scintilla, teco_doc_scintilla_t *, + teco_doc_scintilla_ref, teco_doc_scintilla_release); + +static inline teco_doc_scintilla_t * teco_doc_get_scintilla(teco_doc_t *ctx) { + /* + * FIXME: Perhaps we should always specify SC_DOCUMENTOPTION_TEXT_LARGE? + * SC_DOCUMENTOPTION_STYLES_NONE is unfortunately also not safe to set + * always as the Q-Reg might well be used for styling even in batch mode. + */ if (G_UNLIKELY(!ctx->doc)) ctx->doc = (teco_doc_scintilla_t *)teco_view_ssm(teco_qreg_view, SCI_CREATEDOCUMENT, 0, 0); return ctx->doc; } -/** @memberof teco_doc_t */ +/** + * Edit the given document in the Q-Register view. + * + * @param ctx The document to edit. + * @param default_cp The codepage to configure if the document is new. + * + * @memberof teco_doc_t + */ void -teco_doc_edit(teco_doc_t *ctx) +teco_doc_edit(teco_doc_t *ctx, guint default_cp) { + gboolean new_doc = ctx->doc == NULL; + teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0, (sptr_t)teco_doc_get_scintilla(ctx)); teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0); @@ -48,11 +80,39 @@ teco_doc_edit(teco_doc_t *ctx) teco_view_ssm(teco_qreg_view, SCI_SETSEL, ctx->anchor, (sptr_t)ctx->dot); /* - * NOTE: Thanks to a custom Scintilla patch, se representations + * NOTE: Thanks to a custom Scintilla patch, representations * do not get reset after SCI_SETDOCPOINTER, so they have to be * initialized only once. */ //teco_view_set_representations(teco_qreg_view); + + if (new_doc && default_cp != SC_CP_UTF8) { + /* + * There is a chance the user will see this buffer even if we + * are currently in batch mode. + */ + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, + style, default_cp); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0); + } else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)) { + /* + * All UTF-8 documents are expected to have a character index. + * This allocates nothing if the document is not UTF-8. + * But it is reference counted, so it must not be allocated + * more than once. + * + * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER + * (although I don't know why and where). + * Recalculating it could be inefficient. + * The index is reference-counted. Perhaps we could just allocate + * one more time, so it doesn't get freed when changing documents. + */ + teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } } /** @memberof teco_doc_t */ @@ -68,26 +128,26 @@ teco_doc_undo_edit(teco_doc_t *ctx) undo__teco_view_ssm(teco_qreg_view, SCI_SETXOFFSET, ctx->xoffset, 0); undo__teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0); undo__teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0, - (sptr_t)teco_doc_get_scintilla(ctx)); + (sptr_t)teco_doc_get_scintilla(ctx)); } /** @memberof teco_doc_t */ void -teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len) +teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage) { if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + teco_doc_scintilla_release(ctx->doc); + ctx->doc = NULL; + teco_doc_reset(ctx); - teco_doc_edit(ctx); + teco_doc_edit(ctx, codepage); - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : "")); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ @@ -100,13 +160,13 @@ teco_doc_undo_set_string(teco_doc_t *ctx) */ teco_doc_update(ctx, teco_qreg_view); - if (teco_qreg_current && teco_qreg_current->must_undo) // FIXME + if (teco_qreg_current && teco_qreg_current->must_undo && // FIXME + ctx == &teco_qreg_current->string) + /* load old document into view */ teco_doc_undo_edit(&teco_qreg_current->string); teco_doc_undo_reset(ctx); - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); - - teco_doc_undo_edit(ctx); + teco_undo_object_doc_scintilla_push(&ctx->doc); } /** @@ -117,33 +177,42 @@ teco_doc_undo_set_string(teco_doc_t *ctx) * It can be NULL if you are interested only in the string's length. * Strings must be freed via g_free(). * @param len Where to store the string's length (mandatory). + * @param codepage Where to store the document's codepage or NULL + * if that information is not necessary. * * @see teco_qreg_vtable_t::get_string() * @memberof teco_doc_t */ void -teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len) +teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage) { if (!ctx->doc) { if (str) *str = NULL; - *len = 0; + if (outlen) + *outlen = 0; + if (codepage) + *codepage = teco_default_codepage(); return; } if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(ctx); + teco_doc_edit(ctx, teco_default_codepage()); - *len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); + gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); if (str) { - *str = g_malloc(*len + 1); - teco_view_ssm(teco_qreg_view, SCI_GETTEXT, *len + 1, (sptr_t)*str); + *str = g_malloc(len + 1); + teco_view_ssm(teco_qreg_view, SCI_GETTEXT, len + 1, (sptr_t)*str); } + if (outlen) + *outlen = len; + if (codepage) + *codepage = teco_view_get_codepage(teco_qreg_view); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ @@ -185,6 +254,5 @@ teco_doc_exchange(teco_doc_t *ctx, teco_doc_t *other) void teco_doc_clear(teco_doc_t *ctx) { - if (ctx->doc) - teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)ctx->doc); + teco_doc_scintilla_release(ctx->doc); } @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -42,7 +42,7 @@ typedef struct teco_doc_scintilla_t teco_doc_scintilla_t; typedef struct { /** * Underlying Scintilla document. - * It is created on demand in teco_doc_maybe_create_document(), + * It is created on demand in teco_doc_get_scintilla(), * so that we don't waste memory on integer-only Q-Registers. */ teco_doc_scintilla_t *doc; @@ -62,13 +62,13 @@ teco_doc_init(teco_doc_t *ctx) memset(ctx, 0, sizeof(*ctx)); } -void teco_doc_edit(teco_doc_t *ctx); +void teco_doc_edit(teco_doc_t *ctx, guint default_cp); void teco_doc_undo_edit(teco_doc_t *ctx); -void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len); +void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage); void teco_doc_undo_set_string(teco_doc_t *ctx); -void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len); +void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len, guint *codepage); void teco_doc_update_from_view(teco_doc_t *ctx, teco_view_t *from); void teco_doc_update_from_doc(teco_doc_t *ctx, const teco_doc_t *from); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/error.c b/src/error.c index 7c4e151..afa2ac1 100644 --- a/src/error.c +++ b/src/error.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -37,13 +37,6 @@ guint teco_error_return_args = 0; */ guint teco_error_pos = 0, teco_error_line = 0, teco_error_column = 0; -void -teco_error_set_coord(const gchar *str, guint pos) -{ - teco_error_pos = pos; - teco_string_get_coord(str, pos, &teco_error_line, &teco_error_column); -} - typedef enum { TECO_FRAME_QREG, TECO_FRAME_FILE, @@ -161,10 +154,7 @@ teco_error_add_frame_toplevel(void) teco_error_add_frame(TECO_FRAME_TOPLEVEL, 0); } -#ifndef NDEBUG -__attribute__((destructor)) -#endif -void +void TECO_DEBUG_CLEANUP teco_error_clear_frames(void) { teco_stailq_entry_t *entry; diff --git a/src/error.h b/src/error.h index 91d2b60..469d957 100644 --- a/src/error.h +++ b/src/error.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,13 +40,16 @@ typedef enum { */ TECO_ERROR_SYNTAX, TECO_ERROR_ARGEXPECTED, + TECO_ERROR_CODEPOINT, TECO_ERROR_MOVE, TECO_ERROR_WORDS, TECO_ERROR_RANGE, TECO_ERROR_INVALIDQREG, TECO_ERROR_QREGOPUNSUPPORTED, TECO_ERROR_QREGCONTAINSNULL, + TECO_ERROR_EDITINGLOCALQREG, TECO_ERROR_MEMLIMIT, + TECO_ERROR_CLIPBOARD, /** Interrupt current operation */ TECO_ERROR_INTERRUPTED, @@ -60,10 +63,12 @@ typedef enum { } teco_error_t; static inline void -teco_error_syntax_set(GError **error, gchar chr) +teco_error_syntax_set(GError **error, gunichar chr) { + gchar buf[6]; + g_autofree gchar *chr_printable = teco_string_echo(buf, g_unichar_to_utf8(chr, buf)); g_set_error(error, TECO_ERROR, TECO_ERROR_SYNTAX, - "Syntax error \"%c\" (%d)", chr, chr); + "Syntax error \"%s\" (U+%04" G_GINT32_MODIFIER "X)", chr_printable, chr); } static inline void @@ -74,6 +79,13 @@ teco_error_argexpected_set(GError **error, const gchar *cmd) } static inline void +teco_error_codepoint_set(GError **error, const gchar *cmd) +{ + g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid Unicode codepoint for <%s>", cmd); +} + +static inline void teco_error_move_set(GError **error, const gchar *cmd) { g_set_error(error, TECO_ERROR, TECO_ERROR_MOVE, @@ -119,6 +131,14 @@ teco_error_qregcontainsnull_set(GError **error, const gchar *name, gsize len, gb } static inline void +teco_error_editinglocalqreg_set(GError **error, const gchar *name, gsize len) +{ + g_autofree gchar *name_printable = teco_string_echo(name, len); + g_set_error(error, TECO_ERROR, TECO_ERROR_EDITINGLOCALQREG, + "Editing local Q-Register \"%s\" at end of macro call", name_printable); +} + +static inline void teco_error_interrupted_set(GError **error) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_INTERRUPTED, "Interrupted"); @@ -135,7 +155,11 @@ teco_error_return_set(GError **error, guint args) extern guint teco_error_pos, teco_error_line, teco_error_column; -void teco_error_set_coord(const gchar *str, guint pos); +static inline void +teco_error_set_coord(const gchar *str, gsize pos) +{ + teco_string_get_coord(str, pos, &teco_error_pos, &teco_error_line, &teco_error_column); +} void teco_error_display_short(const GError *error); void teco_error_display_full(const GError *error); diff --git a/src/expressions.c b/src/expressions.c index 57e2f71..ee6b4dc 100644 --- a/src/expressions.c +++ b/src/expressions.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -86,7 +86,7 @@ teco_int_t teco_expressions_pop_num(guint index) { teco_int_t n = 0; - teco_operator_t op = teco_expressions_pop_op(0); + G_GNUC_UNUSED teco_operator_t op = teco_expressions_pop_op(0); g_assert(op == TECO_OP_NUMBER); @@ -114,11 +114,12 @@ teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error) } void -teco_expressions_add_digit(gchar digit) +teco_expressions_add_digit(gunichar digit) { teco_int_t n = teco_expressions_args() > 0 ? teco_expressions_pop_num(0) : 0; - teco_expressions_push(n*teco_radix + (n < 0 ? -1 : 1)*(digit - '0')); + /* use g_unichar_digit_value()? */ + teco_expressions_push(n*teco_radix + (n < 0 ? -1 : 1)*((gint)digit - '0')); } void @@ -184,7 +185,28 @@ teco_expressions_calc(GError **error) switch (op) { case TECO_OP_POW: - for (result = 1; vright--; result *= vleft); + if (!vright) { + result = vleft < 0 ? -1 : 1; + break; + } + if (vright < 0) { + if (!vleft) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + "Negative power of 0 is not defined"); + return FALSE; + } + result = ABS(vleft) == 1 ? vleft : 0; + break; + } + result = 1; + for (;;) { + if (vright & 1) + result *= vleft; + vright >>= 1; + if (!vright) + break; + vleft *= vleft; + } break; case TECO_OP_MUL: result = vleft * vright; @@ -297,6 +319,9 @@ guint teco_brace_level = 0; void teco_expressions_brace_open(void) { + while (teco_operators->len > 0 && teco_expressions_peek_op(0) == TECO_OP_NEW) + teco_expressions_pop_op(0); + teco_expressions_push_op(TECO_OP_BRACE); teco_undo_guint(teco_brace_level)++; } @@ -374,11 +399,9 @@ teco_expressions_format(gchar *buffer, teco_int_t number) return p; } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_expressions_cleanup(void) { g_array_free(teco_numbers, TRUE); g_array_free(teco_operators, TRUE); } -#endif diff --git a/src/expressions.h b/src/expressions.h index 45e6f64..68d8ddb 100644 --- a/src/expressions.h +++ b/src/expressions.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -123,7 +123,7 @@ teco_int_t teco_expressions_peek_num(guint index); teco_int_t teco_expressions_pop_num(guint index); gboolean teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error); -void teco_expressions_add_digit(gchar digit); +void teco_expressions_add_digit(gunichar digit); void teco_expressions_push_op(teco_operator_t op); gboolean teco_expressions_push_calc(teco_operator_t op, GError **error); diff --git a/src/file-utils.c b/src/file-utils.c index 239cc5f..3f8f721 100644 --- a/src/file-utils.c +++ b/src/file-utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #ifdef HAVE_WINDOWS_H #define WIN32_LEAN_AND_MEAN +#define UNICODE #include <windows.h> #endif @@ -36,7 +37,6 @@ #include "sciteco.h" #include "qreg.h" -#include "glob.h" #include "interface.h" #include "string-utils.h" #include "file-utils.h" @@ -56,26 +56,35 @@ G_STATIC_ASSERT(INVALID_FILE_ATTRIBUTES == TECO_FILE_INVALID_ATTRIBUTES); teco_file_attributes_t teco_file_get_attributes(const gchar *filename) { - return GetFileAttributes((LPCTSTR)filename); + g_autofree gunichar2 *filename_utf16 = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL); + return filename_utf16 ? GetFileAttributesW(filename_utf16) + : TECO_FILE_INVALID_ATTRIBUTES; } void teco_file_set_attributes(const gchar *filename, teco_file_attributes_t attrs) { - SetFileAttributes((LPCTSTR)filename, attrs); + g_autofree gunichar2 *filename_utf16 = g_utf8_to_utf16(filename, -1, NULL, NULL, NULL); + if (filename_utf16) + SetFileAttributesW(filename_utf16, attrs); } gchar * teco_file_get_absolute_path(const gchar *path) { + if (!path) + return NULL; + g_autofree gunichar2 *path_utf16 = g_utf8_to_utf16(path, -1, NULL, NULL, NULL); TCHAR buf[MAX_PATH]; - return path && GetFullPathName(path, sizeof(buf), buf, NULL) ? g_strdup(buf) : NULL; + return path_utf16 && GetFullPathNameW(path_utf16, G_N_ELEMENTS(buf), buf, NULL) + ? g_utf16_to_utf8(buf, -1, NULL, NULL, NULL) : NULL; } gboolean teco_file_is_visible(const gchar *path) { - return !(GetFileAttributes((LPCTSTR)path) & FILE_ATTRIBUTE_HIDDEN); + g_autofree gunichar2 *path_utf16 = g_utf8_to_utf16(path, -1, NULL, NULL, NULL); + return path_utf16 && !(GetFileAttributesW(path_utf16) & FILE_ATTRIBUTE_HIDDEN); } #else /* !G_OS_WIN32 */ @@ -83,7 +92,7 @@ teco_file_is_visible(const gchar *path) teco_file_attributes_t teco_file_get_attributes(const gchar *filename) { - struct stat buf; + GStatBuf buf; return g_stat(filename, &buf) ? TECO_FILE_INVALID_ATTRIBUTES : buf.st_mode; } @@ -204,7 +213,7 @@ teco_file_expand_path(const gchar *path) * but it may have been changed later on. */ g_auto(teco_string_t) home = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL) || + if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, NULL) || teco_string_contains(&home, '\0')) return g_strdup(path); g_assert(home.data != NULL); @@ -227,9 +236,6 @@ teco_file_auto_complete(const gchar *filename, GFileTest file_test, teco_string_ { memset(insert, 0, sizeof(*insert)); - if (teco_globber_is_pattern(filename)) - return FALSE; - g_autofree gchar *filename_expanded = teco_file_expand_path(filename); gsize filename_len = strlen(filename_expanded); diff --git a/src/file-utils.h b/src/file-utils.h index 51b0d18..4ee59e6 100644 --- a/src/file-utils.h +++ b/src/file-utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -225,17 +225,19 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = '['; break; } + /* fall through: escape PCRE metacharacters */ + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '+': + case '{': + *pout++ = '\\'; /* fall through */ default: - /* - * For simplicity, all non-alphanumeric - * characters are escaped since they could - * be PCRE magic characters. - * g_regex_escape_string() is inefficient. - * character anyway. - */ - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; *pout++ = *pattern; break; } @@ -271,12 +273,13 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = ']'; break; } - /* fall through */ - default: - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; + /* fall through: escape PCRE metacharacters */ + case '\\': + case '[': + *pout++ = '\\'; /* fall through */ case '-': + default: state = STATE_CLASS; *pout++ = *pattern; break; @@ -315,7 +318,8 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); if (!glob_reg->vtable->undo_set_string(glob_reg, error) || - !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error)) + !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), + teco_default_codepage(), error)) return NULL; } @@ -448,7 +452,7 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, * when they should be in a register, the user will * have to edit that register anyway. */ -TECO_DEFINE_STATE_EXPECTFILE(teco_state_glob_pattern, +TECO_DEFINE_STATE_EXPECTGLOB(teco_state_glob_pattern, .expectstring.last = FALSE ); @@ -490,7 +494,8 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); g_auto(teco_string_t) pattern_str = {NULL, 0}; - if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len, error)) + if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len, + NULL, error)) return NULL; if (teco_string_contains(&pattern_str, '\0')) { teco_error_qregcontainsnull_set(error, "_", 1, FALSE); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,6 +46,21 @@ teco_globber_is_pattern(const gchar *str) gchar *teco_globber_escape_pattern(const gchar *pattern); GRegex *teco_globber_compile_pattern(const gchar *pattern); +/* in cmdline.c */ +gboolean teco_state_expectglob_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); + +/** + * @interface TECO_DEFINE_STATE_EXPECTGLOB + * @implements TECO_DEFINE_STATE_EXPECTFILE + * @ingroup states + */ +#define TECO_DEFINE_STATE_EXPECTGLOB(NAME, ...) \ + TECO_DEFINE_STATE_EXPECTFILE(NAME, \ + .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ + teco_state_expectglob_process_edit_cmd, \ + ##__VA_ARGS__ \ + ) + /* * Command states */ diff --git a/src/goto-commands.c b/src/goto-commands.c index e4cd868..a8a9689 100644 --- a/src/goto-commands.c +++ b/src/goto-commands.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -53,7 +53,7 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error) * I'm unsure whether !-signs should be allowed within comments. */ static teco_state_t * -teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { if (chr == '!') { /* @@ -61,8 +61,8 @@ teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error) * on rubout. * Otherwise, the label will be removed (PC == -1). */ - gint existing_pc = teco_goto_table_set(&ctx->goto_table, ctx->goto_label.data, - ctx->goto_label.len, ctx->macro_pc); + gssize existing_pc = teco_goto_table_set(&ctx->goto_table, ctx->goto_label.data, + ctx->goto_label.len, ctx->macro_pc); if (ctx->parent.must_undo) teco_goto_table_undo_set(&ctx->goto_table, ctx->goto_label.data, ctx->goto_label.len, existing_pc); @@ -85,7 +85,7 @@ teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error) if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len); - teco_string_append_c(&ctx->goto_label, chr); + teco_string_append_wc(&ctx->goto_label, chr); return &teco_state_label; } @@ -119,7 +119,7 @@ teco_state_goto_done(teco_machine_main_t *ctx, const teco_string_t *str, GError } if (value == 0) { - gint pc = teco_goto_table_find(&ctx->goto_table, label.data, label.len); + gssize pc = teco_goto_table_find(&ctx->goto_table, label.data, label.len); if (pc >= 0) { ctx->macro_pc = pc; @@ -138,7 +138,7 @@ teco_state_goto_done(teco_machine_main_t *ctx, const teco_string_t *str, GError } /* in cmdline.c */ -gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /*$ O * Olabel$ -- Go to label diff --git a/src/goto-commands.h b/src/goto-commands.h index ffd9527..03773c0 100644 --- a/src/goto-commands.h +++ b/src/goto-commands.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -35,12 +35,12 @@ /** @extends teco_rb3str_head_t */ typedef struct { teco_rb3str_head_t head; - gint pc; + gsize pc; } teco_goto_label_t; /** @private @static @memberof teco_goto_label_t */ static teco_goto_label_t * -teco_goto_label_new(const gchar *name, gsize len, gint pc) +teco_goto_label_new(const gchar *name, gsize len, gsize pc) { teco_goto_label_t *label = g_new0(teco_goto_label_t, 1); teco_string_init(&label->head.name, name, len); @@ -79,10 +79,10 @@ teco_goto_table_dump(teco_goto_table_t *ctx) #endif /** @memberof teco_goto_table_t */ -gint +gssize teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len) { - gint existing_pc = -1; + gssize existing_pc = -1; teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len); if (label) { @@ -95,7 +95,7 @@ teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len) } /** @memberof teco_goto_table_t */ -gint +gssize teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len) { teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len); @@ -103,13 +103,13 @@ teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len) } /** @memberof teco_goto_table_t */ -gint -teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc) +gssize +teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc) { if (pc < 0) return teco_goto_table_remove(ctx, name, len); - gint existing_pc = -1; + gssize existing_pc = -1; teco_goto_label_t *label = (teco_goto_label_t *)teco_rb3str_find(&ctx->tree, TRUE, name, len); if (label) { @@ -135,7 +135,7 @@ teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint p */ typedef struct { teco_goto_table_t *table; - gint pc; + gssize pc; gsize len; gchar name[]; } teco_goto_table_undo_set_t; @@ -153,7 +153,7 @@ teco_goto_table_undo_set_action(teco_goto_table_undo_set_t *ctx, gboolean run) /** @memberof teco_goto_table_t */ void -teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc) +teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc) { if (!ctx->must_undo) return; @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,12 +40,12 @@ teco_goto_table_init(teco_goto_table_t *ctx, gboolean must_undo) ctx->must_undo = must_undo; } -gint teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len); +gssize teco_goto_table_remove(teco_goto_table_t *ctx, const gchar *name, gsize len); -gint teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len); +gssize teco_goto_table_find(teco_goto_table_t *ctx, const gchar *name, gsize len); -gint teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc); -void teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gint pc); +gssize teco_goto_table_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc); +void teco_goto_table_undo_set(teco_goto_table_t *ctx, const gchar *name, gsize len, gssize pc); /** @memberof teco_goto_table_t */ static inline gboolean @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -94,7 +94,7 @@ teco_help_init(GError **error) teco_qreg_t *lib_reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SCITECOPATH", 12); g_assert(lib_reg != NULL); g_auto(teco_string_t) lib_path = {NULL, 0}; - if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, error)) + if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, NULL, error)) return FALSE; /* * FIXME: lib_path may contain null-bytes. @@ -235,8 +235,7 @@ teco_help_auto_complete(const gchar *topic_name, teco_string_t *insert) topic_name ? strlen(topic_name) : 0, 0, insert); } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_help_cleanup(void) { if (!teco_help_chunk) @@ -251,7 +250,6 @@ teco_help_cleanup(void) teco_help_topic_free((teco_help_topic_t *)cur); } } -#endif /* * Command states @@ -316,7 +314,7 @@ teco_state_help_done(teco_machine_main_t *ctx, const teco_string_t *str, GError } /* in cmdline.c */ -gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /*$ "?" help * ?[topic]$ -- Get help for topic @@ -347,7 +345,7 @@ gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine * .EE * In other words it must be a \*(ST comment followed * by an asterisk sign, followed by the first topic which - * is a buffer position, followed by a colon and the topic + * is a buffer position in bytes, followed by a colon and the topic * string. * The topic string is terminated by the end of the line. * The end of the header is marked by a single \(lq*!\(rq. @@ -373,7 +371,7 @@ gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine * \fIgrosciteco\fP formatter and the \fIsciteco.tmac\fP * GNU troff macros. * When using womanpages generated by \fIgrosciteco\fP, - * help topics can be defined using the \fBTECO_TOPIC\fP + * help topics can be defined using the \fBSCITECO_TOPIC\fP * Troff macro. * This flexible system allows \*(ST to access internal * and third-party help files written in plain-text or @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-curses/Makefile.am b/src/interface-curses/Makefile.am index 14fc920..44fb658 100644 --- a/src/interface-curses/Makefile.am +++ b/src/interface-curses/Makefile.am @@ -6,4 +6,5 @@ AM_CFLAGS = -std=gnu11 -Wall -Wno-initializer-overrides -Wno-unused-value noinst_LTLIBRARIES = libsciteco-interface.la libsciteco_interface_la_SOURCES = interface.c \ curses-utils.c curses-utils.h \ - curses-info-popup.c curses-info-popup.h + curses-info-popup.c curses-info-popup.h \ + curses-icons.c curses-icons.h diff --git a/src/interface-curses/curses-icons.c b/src/interface-curses/curses-icons.c new file mode 100644 index 0000000..1a1ba3a --- /dev/null +++ b/src/interface-curses/curses-icons.c @@ -0,0 +1,398 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> + +#include <glib.h> + +#include <curses.h> + +#include "sciteco.h" +#include "curses-icons.h" + +typedef struct { + const gchar *name; + gunichar c; +} teco_curses_icon_t; + +/* + * The following icons have initially been adapted from exa, + * but icons have since been added and removed. + * + * They require fonts with additional symbols, eg. + * Nerd Fonts (https://www.nerdfonts.com/). + * + * They MUST be kept presorted, so we can perform binary searches. + */ + +/** Mapping of complete filenames to Unicode "icons" */ +static const teco_curses_icon_t teco_icons_file[] = { + {".Trash", 0xf1f8}, /* */ + {".atom", 0xe764}, /* */ + {".bash_history", 0xf489}, /* */ + {".bash_profile", 0xf489}, /* */ + {".bashrc", 0xf489}, /* */ + {".git", 0xf1d3}, /* */ + {".gitattributes", 0xf1d3}, /* */ + {".gitconfig", 0xf1d3}, /* */ + {".github", 0xf408}, /* */ + {".gitignore", 0xf1d3}, /* */ + {".gitmodules", 0xf1d3}, /* */ + {".rvm", 0xe21e}, /* */ + {".teco_ini", 0xedaa}, /* */ + {".teco_session", 0xedaa}, /* */ + {".vimrc", 0xe62b}, /* */ + {".vscode", 0xe70c}, /* */ + {".zshrc", 0xf489}, /* */ + {"COMMIT_EDITMSG", 0xf1d3}, /* */ + {"Cargo.lock", 0xe7a8}, /* */ + {"Dockerfile", 0xf308}, /* */ + {"GNUmakefile", 0xf489}, /* */ + {"MERGE_MSG", 0xf1d3}, /* */ + {"Makefile", 0xf489}, /* */ + {"PKGBUILD", 0xf303}, /* */ + {"TAG_EDITMSG", 0xf1d3}, /* */ + {"bin", 0xe5fc}, /* */ + {"config", 0xe5fc}, /* */ + {"docker-compose.yml", 0xf308}, /* */ + {"ds_store", 0xf179}, /* */ + {"git-rebase-todo", 0xf1d3}, /* */ + {"go.mod", 0xe626}, /* */ + {"go.sum", 0xe626}, /* */ + {"gradle", 0xe256}, /* */ + {"gruntfile.coffee", 0xe611}, /* */ + {"gruntfile.js", 0xe611}, /* */ + {"gruntfile.ls", 0xe611}, /* */ + {"gulpfile.coffee", 0xe610}, /* */ + {"gulpfile.js", 0xe610}, /* */ + {"gulpfile.ls", 0xe610}, /* */ + {"hidden", 0xf023}, /* */ + {"include", 0xe5fc}, /* */ + {"lib", 0xf121}, /* */ + {"localized", 0xf179}, /* */ + {"node_modules", 0xe718}, /* */ + {"npmignore", 0xe71e}, /* */ + {"rubydoc", 0xe73b}, /* */ + {"yarn.lock", 0xe718}, /* */ +}; + +/** Mapping of file extensions to Unicode "icons" */ +static const teco_curses_icon_t teco_icons_ext[] = { + {"DS_store", 0xf179}, /* */ + {"ai", 0xe7b4}, /* */ + {"android", 0xe70e}, /* */ + {"apk", 0xe70e}, /* */ + {"apple", 0xf179}, /* */ + {"avi", 0xf03d}, /* */ + {"avif", 0xf1c5}, /* */ + {"avro", 0xe60b}, /* */ + {"awk", 0xf489}, /* */ + {"bash", 0xf489}, /* */ + {"bat", 0xf17a}, /* */ + {"bats", 0xf489}, /* */ + {"bmp", 0xf1c5}, /* */ + {"bz", 0xf410}, /* */ + {"bz2", 0xf410}, /* */ + {"c", 0xe61e}, /* */ + {"c++", 0xe61d}, /* */ + {"cab", 0xe70f}, /* */ + {"cc", 0xe61d}, /* */ + {"cfg", 0xe615}, /* */ + {"class", 0xe256}, /* */ + {"clj", 0xe768}, /* */ + {"cljs", 0xe76a}, /* */ + {"cls", 0xf034}, /* */ + {"cmd", 0xe70f}, /* */ + {"coffee", 0xf0f4}, /* */ + {"conf", 0xe615}, /* */ + {"cp", 0xe61d}, /* */ + {"cpio", 0xf410}, /* */ + {"cpp", 0xe61d}, /* */ + {"cs", 0xf031b}, /* */ + {"csh", 0xf489}, /* */ + {"cshtml", 0xf1fa}, /* */ + {"csproj", 0xf031b}, /* */ + {"css", 0xe749}, /* */ + {"csv", 0xf1c3}, /* */ + {"csx", 0xf031b}, /* */ + {"cxx", 0xe61d}, /* */ + {"d", 0xe7af}, /* */ + {"dart", 0xe798}, /* */ + {"db", 0xf1c0}, /* */ + {"deb", 0xe77d}, /* */ + {"diff", 0xf440}, /* */ + {"djvu", 0xf02d}, /* */ + {"dll", 0xe70f}, /* */ + {"doc", 0xf1c2}, /* */ + {"docx", 0xf1c2}, /* */ + {"ds_store", 0xf179}, /* */ + {"dump", 0xf1c0}, /* */ + {"ebook", 0xe28b}, /* */ + {"ebuild", 0xf30d}, /* */ + {"editorconfig", 0xe615}, /* */ + {"ejs", 0xe618}, /* */ + {"elm", 0xe62c}, /* */ + {"env", 0xf462}, /* */ + {"eot", 0xf031}, /* */ + {"epub", 0xe28a}, /* */ + {"erb", 0xe73b}, /* */ + {"erl", 0xe7b1}, /* */ + {"ex", 0xe62d}, /* */ + {"exe", 0xf17a}, /* */ + {"exs", 0xe62d}, /* */ + {"fish", 0xf489}, /* */ + {"flac", 0xf001}, /* */ + {"flv", 0xf03d}, /* */ + {"font", 0xf031}, /* */ + {"fs", 0xe7a7}, /* */ + {"fsi", 0xe7a7}, /* */ + {"fsx", 0xe7a7}, /* */ + {"gdoc", 0xf1c2}, /* */ + {"gem", 0xe21e}, /* */ + {"gemfile", 0xe21e}, /* */ + {"gemspec", 0xe21e}, /* */ + {"gform", 0xf298}, /* */ + {"gif", 0xf1c5}, /* */ + {"go", 0xe626}, /* */ + {"gradle", 0xe256}, /* */ + {"groovy", 0xe775}, /* */ + {"gsheet", 0xf1c3}, /* */ + {"gslides", 0xf1c4}, /* */ + {"guardfile", 0xe21e}, /* */ + {"gz", 0xf410}, /* */ + {"h", 0xf0fd}, /* */ + {"hbs", 0xe60f}, /* */ + {"hpp", 0xf0fd}, /* */ + {"hs", 0xe777}, /* */ + {"htm", 0xf13b}, /* */ + {"html", 0xf13b}, /* */ + {"hxx", 0xf0fd}, /* */ + {"ico", 0xf1c5}, /* */ + {"image", 0xf1c5}, /* */ + {"img", 0xe271}, /* */ + {"iml", 0xe7b5}, /* */ + {"ini", 0xf17a}, /* */ + {"ipynb", 0xe678}, /* */ + {"iso", 0xe271}, /* */ + {"j2c", 0xf1c5}, /* */ + {"j2k", 0xf1c5}, /* */ + {"jad", 0xe256}, /* */ + {"jar", 0xe256}, /* */ + {"java", 0xe256}, /* */ + {"jfi", 0xf1c5}, /* */ + {"jfif", 0xf1c5}, /* */ + {"jif", 0xf1c5}, /* */ + {"jl", 0xe624}, /* */ + {"jmd", 0xf48a}, /* */ + {"jp2", 0xf1c5}, /* */ + {"jpe", 0xf1c5}, /* */ + {"jpeg", 0xf1c5}, /* */ + {"jpg", 0xf1c5}, /* */ + {"jpx", 0xf1c5}, /* */ + {"js", 0xe74e}, /* */ + {"json", 0xe60b}, /* */ + {"jsx", 0xe7ba}, /* */ + {"jxl", 0xf1c5}, /* */ + {"ksh", 0xf489}, /* */ + {"latex", 0xf034}, /* */ + {"less", 0xe758}, /* */ + {"lhs", 0xe777}, /* */ + {"license", 0xf0219}, /* */ + {"localized", 0xf179}, /* */ + {"lock", 0xf023}, /* */ + {"log", 0xf18d}, /* */ + {"lua", 0xe620}, /* */ + {"lz", 0xf410}, /* */ + {"lz4", 0xf410}, /* */ + {"lzh", 0xf410}, /* */ + {"lzma", 0xf410}, /* */ + {"lzo", 0xf410}, /* */ + {"m", 0xe61e}, /* */ + {"m4a", 0xf001}, /* */ + {"markdown", 0xf48a}, /* */ + {"md", 0xf48a}, /* */ + {"mjs", 0xe74e}, /* */ + {"mk", 0xf489}, /* */ + {"mkd", 0xf48a}, /* */ + {"mkv", 0xf03d}, /* */ + {"mm", 0xe61d}, /* */ + {"mobi", 0xe28b}, /* */ + {"mov", 0xf03d}, /* */ + {"mp3", 0xf001}, /* */ + {"mp4", 0xf03d}, /* */ + {"msi", 0xe70f}, /* */ + {"mustache", 0xe60f}, /* */ + {"nix", 0xf313}, /* */ + {"node", 0xf0399}, /* */ + {"npmignore", 0xe71e}, /* */ + {"odp", 0xf1c4}, /* */ + {"ods", 0xf1c3}, /* */ + {"odt", 0xf1c2}, /* */ + {"ogg", 0xf001}, /* */ + {"ogv", 0xf03d}, /* */ + {"otf", 0xf031}, /* */ + {"part", 0xf43a}, /* */ + {"patch", 0xf440}, /* */ + {"pdf", 0xf1c1}, /* */ + {"php", 0xe73d}, /* */ + {"pl", 0xe769}, /* */ + {"plx", 0xe769}, /* */ + {"pm", 0xe769}, /* */ + {"png", 0xf1c5}, /* */ + {"pod", 0xe769}, /* */ + {"ppt", 0xf1c4}, /* */ + {"pptx", 0xf1c4}, /* */ + {"procfile", 0xe21e}, /* */ + {"properties", 0xe60b}, /* */ + {"ps1", 0xf489}, /* */ + {"psd", 0xe7b8}, /* */ + {"pxm", 0xf1c5}, /* */ + {"py", 0xe606}, /* */ + {"pyc", 0xe606}, /* */ + {"r", 0xf25d}, /* */ + {"rakefile", 0xe21e}, /* */ + {"rar", 0xf410}, /* */ + {"razor", 0xf1fa}, /* */ + {"rb", 0xe21e}, /* */ + {"rdata", 0xf25d}, /* */ + {"rdb", 0xe76d}, /* */ + {"rdoc", 0xf48a}, /* */ + {"rds", 0xf25d}, /* */ + {"readme", 0xf48a}, /* */ + {"rlib", 0xe7a8}, /* */ + {"rmd", 0xf48a}, /* */ + {"rpm", 0xe7bb}, /* */ + {"rs", 0xe7a8}, /* */ + {"rspec", 0xe21e}, /* */ + {"rspec_parallel", 0xe21e}, /* */ + {"rspec_status", 0xe21e}, /* */ + {"rss", 0xf09e}, /* */ + {"rtf", 0xf0219}, /* */ + {"ru", 0xe21e}, /* */ + {"rubydoc", 0xe73b}, /* */ + {"sass", 0xe603}, /* */ + {"scala", 0xe737}, /* */ + {"scss", 0xe749}, /* */ + {"sh", 0xf489}, /* */ + {"shell", 0xf489}, /* */ + {"slim", 0xe73b}, /* */ + {"sln", 0xe70c}, /* */ + {"so", 0xf17c}, /* */ + {"sql", 0xf1c0}, /* */ + {"sqlite3", 0xe7c4}, /* */ + {"sty", 0xf034}, /* */ + {"styl", 0xe600}, /* */ + {"stylus", 0xe600}, /* */ + {"svg", 0xf1c5}, /* */ + {"swift", 0xe755}, /* */ + {"t", 0xe769}, /* */ + {"tar", 0xf410}, /* */ + {"taz", 0xf410}, /* */ + {"tbz", 0xf410}, /* */ + {"tbz2", 0xf410}, /* */ + {"tec", 0xedaa}, /* */ + {"tes", 0xedaa}, /* */ + {"tex", 0xf034}, /* */ + {"tgz", 0xf410}, /* */ + {"tiff", 0xf1c5}, /* */ + {"tlz", 0xf410}, /* */ + {"toml", 0xe615}, /* */ + {"torrent", 0xe275}, /* */ + {"ts", 0xe628}, /* */ + {"tsv", 0xf1c3}, /* */ + {"tsx", 0xe7ba}, /* */ + {"ttf", 0xf031}, /* */ + {"twig", 0xe61c}, /* */ + {"txt", 0xf15c}, /* */ + {"txz", 0xf410}, /* */ + {"tz", 0xf410}, /* */ + {"tzo", 0xf410}, /* */ + {"video", 0xf03d}, /* */ + {"vim", 0xe62b}, /* */ + {"vue", 0xf0844}, /* */ + {"war", 0xe256}, /* */ + {"wav", 0xf001}, /* */ + {"webm", 0xf03d}, /* */ + {"webp", 0xf1c5}, /* */ + {"windows", 0xf17a}, /* */ + {"woff", 0xf031}, /* */ + {"woff2", 0xf031}, /* */ + {"woman", 0xeaa4}, /* */ + {"xhtml", 0xf13b}, /* */ + {"xls", 0xf1c3}, /* */ + {"xlsx", 0xf1c3}, /* */ + {"xml", 0xf05c0}, /* */ + {"xul", 0xf05c0}, /* */ + {"xz", 0xf410}, /* */ + {"yaml", 0xf481}, /* */ + {"yml", 0xf481}, /* */ + {"zip", 0xf410}, /* */ + {"zsh", 0xf489}, /* */ + {"zsh-theme", 0xf489}, /* */ + {"zst", 0xf410}, /* */ +}; + +static int +teco_curses_icon_cmp(const void *a, const void *b) +{ + const gchar *str = a; + const teco_curses_icon_t *icon = b; + + return strcmp(str, icon->name); +} + +gunichar +teco_curses_icons_lookup_file(const gchar *filename) +{ + g_autofree gchar *basename = g_path_get_basename(filename); + const teco_curses_icon_t *icon; + + /* try to find icon by complete file name */ + icon = bsearch(basename, teco_icons_file, G_N_ELEMENTS(teco_icons_file), + sizeof(teco_icons_file[0]), teco_curses_icon_cmp); + if (icon) + return icon->c; + + /* try to find icon by extension */ + const gchar *ext = strrchr(basename, '.'); + if (ext) { + icon = bsearch(ext+1, teco_icons_ext, G_N_ELEMENTS(teco_icons_ext), + sizeof(teco_icons_ext[0]), teco_curses_icon_cmp); + return icon ? icon->c : 0xf15b; /* */ + } + + /* default file icon for files without extension */ + return 0xf016; /* */ +} + +gunichar +teco_curses_icons_lookup_dir(const gchar *dirname) +{ + g_autofree gchar *basename = g_path_get_basename(dirname); + const teco_curses_icon_t *icon; + + icon = bsearch(basename, teco_icons_file, G_N_ELEMENTS(teco_icons_file), + sizeof(teco_icons_file[0]), teco_curses_icon_cmp); + + /* default folder icon */ + return icon ? icon->c : 0xf115; /* */ +} diff --git a/src/interface-curses/curses-icons.h b/src/interface-curses/curses-icons.h new file mode 100644 index 0000000..c1be06f --- /dev/null +++ b/src/interface-curses/curses-icons.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012-2024 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#pragma once + +#include <glib.h> + +/** + * Q-Register icon. + * 0xf04cf would look more similar to the current Gtk icon. + */ +#define TECO_CURSES_ICONS_QREG 0xe236 /* */ + +gunichar teco_curses_icons_lookup_file(const gchar *filename); +gunichar teco_curses_icons_lookup_dir(const gchar *dirname); diff --git a/src/interface-curses/curses-info-popup.c b/src/interface-curses/curses-info-popup.c index a738f5d..e6e1549 100644 --- a/src/interface-curses/curses-info-popup.c +++ b/src/interface-curses/curses-info-popup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ #include "interface.h" #include "curses-utils.h" #include "curses-info-popup.h" +#include "curses-icons.h" /* * FIXME: This is redundant with gtk-info-popup.c. @@ -75,8 +76,13 @@ teco_curses_info_popup_init_pad(teco_curses_info_popup_t *ctx, attr_t attr) gint pad_cols; /**! entry columns */ gint pad_colwidth; /**! width per entry column */ - /* reserve 2 spaces between columns */ - pad_colwidth = MIN(ctx->longest + 2, cols - 2); + /* + * With Unicode icons enabled, we reserve 2 characters at the beginning and one + * after the filename/directory. + * Otherwise 2 characters after the entry. + */ + gint reserve = teco_ed & TECO_ED_ICONS ? 2+1 : 2; + pad_colwidth = MIN(ctx->longest + reserve, cols - 2); /* pad_cols = floor((cols - 2) / pad_colwidth) */ pad_cols = (cols - 2) / pad_colwidth; @@ -111,8 +117,19 @@ teco_curses_info_popup_init_pad(teco_curses_info_popup_t *ctx, attr_t attr) switch (entry->type) { case TECO_POPUP_FILE: + g_assert(!teco_string_contains(&entry->name, '\0')); + if (teco_ed & TECO_ED_ICONS) { + teco_curses_add_wc(ctx->pad, teco_curses_icons_lookup_file(entry->name.data)); + waddch(ctx->pad, ' '); + } + teco_curses_format_filename(ctx->pad, entry->name.data, -1); + break; case TECO_POPUP_DIRECTORY: g_assert(!teco_string_contains(&entry->name, '\0')); + if (teco_ed & TECO_ED_ICONS) { + teco_curses_add_wc(ctx->pad, teco_curses_icons_lookup_dir(entry->name.data)); + waddch(ctx->pad, ' '); + } teco_curses_format_filename(ctx->pad, entry->name.data, -1); break; default: diff --git a/src/interface-curses/curses-info-popup.h b/src/interface-curses/curses-info-popup.h index bcdb3b8..a6c28a5 100644 --- a/src/interface-curses/curses-info-popup.h +++ b/src/interface-curses/curses-info-popup.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-curses/curses-utils.c b/src/interface-curses/curses-utils.c index 8dc62f1..c751afd 100644 --- a/src/interface-curses/curses-utils.c +++ b/src/interface-curses/curses-utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,7 +29,21 @@ #include "string-utils.h" #include "curses-utils.h" -gsize +/** + * Render UTF-8 string with TECO character representations. + * + * Strings are cut off with `...` at the end if necessary. + * The mapping is similar to teco_view_set_representations(). + * + * @param win The Curses window to write to. + * @param str The string to format. + * @param len The length of the string in bytes. + * @param max_width The maximum width to consume in + * the window in characters. If smaller 0, take the + * entire remaining space in the window. + * @return Number of characters actually written. + */ +guint teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width) { int old_x, old_y; @@ -42,6 +56,12 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width) while (len > 0) { /* + * NOTE: It shouldn't be possible to meet any string, + * that is not valid UTF-8. + */ + gsize clen = g_utf8_next_char(str) - str; + + /* * NOTE: This mapping is similar to * teco_view_set_representations(). */ @@ -85,12 +105,18 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width) chars_added++; if (chars_added > max_width) goto truncate; - waddch(win, *str); + /* + * FIXME: This works with UTF-8 on ncurses, + * since it detects multi-byte characters. + * However on other platforms wadd_wch() may be + * necessary, which requires a widechar Curses variant. + */ + waddnstr(win, str, clen); } } - str++; - len--; + str += clen; + len -= clen; } return getcurx(win) - old_x; @@ -108,23 +134,43 @@ truncate: return getcurx(win) - old_x; } -gsize -teco_curses_format_filename(WINDOW *win, const gchar *filename, - gint max_width) +/** + * Render UTF-8 filename. + * + * This cuts of overlong filenames with `...` at the beginning, + * possibly skipping any drive letter. + * Control characters are escaped, but not highlighted. + * + * @param win The Curses window to write to. + * @param filename Null-terminated filename to render. + * @param max_width The maximum width to consume in + * the window in characters. If smaller 0, take the + * entire remaining space in the window. + * @return Number of characters actually written. + */ +guint +teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width) { int old_x = getcurx(win); g_autofree gchar *filename_printable = teco_string_echo(filename, strlen(filename)); - size_t filename_len = strlen(filename_printable); + glong filename_len = g_utf8_strlen(filename_printable, -1); if (max_width < 0) max_width = getmaxx(win) - old_x; - if (filename_len <= (size_t)max_width) { + if (filename_len <= max_width) { + /* + * FIXME: This works with UTF-8 on ncurses, + * since it detects multi-byte characters. + * However on other platforms wadd_wch() may be + * necessary, which requires a widechar Curses variant. + */ waddstr(win, filename_printable); - } else { - const gchar *keep_post = filename_printable + filename_len - - max_width + 3; + } else if (filename_len >= 3) { + const gchar *keep_post; + keep_post = g_utf8_offset_to_pointer(filename_printable + strlen(filename_printable), + -max_width + 3); #ifdef G_OS_WIN32 const gchar *keep_pre = g_path_skip_root(filename_printable); diff --git a/src/interface-curses/curses-utils.h b/src/interface-curses/curses-utils.h index a91ab44..2c819ee 100644 --- a/src/interface-curses/curses-utils.h +++ b/src/interface-curses/curses-utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,6 +20,17 @@ #include <curses.h> -gsize teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width); +guint teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width); -gsize teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width); +guint teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width); + +/** + * Add Unicode character to window. + * This is just like wadd_wch(), but does not require wide-char APIs. + */ +static inline void +teco_curses_add_wc(WINDOW *win, gunichar chr) +{ + gchar buf[6]; + waddnstr(win, buf, g_unichar_to_utf8(chr, buf)); +} diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c index ef3f0c7..95e86c9 100644 --- a/src/interface-curses/interface.c +++ b/src/interface-curses/interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,7 +24,6 @@ #include <stdlib.h> #include <stdarg.h> #include <unistd.h> -#include <locale.h> #include <errno.h> #ifdef HAVE_WINDOWS_H @@ -65,11 +64,12 @@ #include "qreg.h" #include "ring.h" #include "error.h" -#include "curses-utils.h" -#include "curses-info-popup.h" #include "view.h" #include "memory.h" #include "interface.h" +#include "curses-utils.h" +#include "curses-info-popup.h" +#include "curses-icons.h" #if defined(__PDCURSES__) && defined(G_OS_WIN32) && \ !defined(PDCURSES_GUI) @@ -340,12 +340,18 @@ static struct { TECO_INFO_TYPE_QREG } info_type; teco_string_t info_current; + gboolean info_dirty; WINDOW *msg_window; WINDOW *cmdline_window, *cmdline_pad; - gsize cmdline_len, cmdline_rubout_len; + guint cmdline_len, cmdline_rubout_len; + /** + * Pad used exclusively for wgetch() as it will not + * result in unwanted wrefresh(). + */ + WINDOW *input_pad; GQueue *input_queue; teco_curses_info_popup_t popup; @@ -554,7 +560,7 @@ teco_interface_init_screen(void) g_assert(teco_interface.screen_tty != NULL); teco_interface.screen = newterm(NULL, teco_interface.screen_tty, teco_interface.screen_tty); - if (!teco_interface.screen) { + if (G_UNLIKELY(!teco_interface.screen)) { g_fprintf(stderr, "Error initializing interactive mode. " "$TERM may be incorrect.\n"); exit(EXIT_FAILURE); @@ -629,28 +635,6 @@ teco_interface_init_interactive(GError **error) return FALSE; /* - * On UNIX terminals, the escape key is usually - * delivered as the escape character even though function - * keys are delivered as escape sequences as well. - * That's why there has to be a timeout for detecting - * escape presses if function key handling is enabled. - * This timeout can be controlled using $ESCDELAY on - * ncurses but its default is much too long. - * We set it to 25ms as Vim does. In the very rare cases - * this won't suffice, $ESCDELAY can still be set explicitly. - * - * NOTE: The only terminal emulator I'm aware of that lets - * us send an escape sequence for the escape key is Mintty - * (see "\e[?7727h"). - * - * FIXME: This appears to be ineffective for netbsd-curses. - */ -#ifdef CURSES_TTY - if (!g_getenv("ESCDELAY")) - set_escdelay(25); -#endif - - /* * $TERM must be unset or "#win32con" for the win32 * driver to load. * So we always ignore any $TERM changes by the user. @@ -679,12 +663,31 @@ teco_interface_init_interactive(GError **error) PDC_set_function_key(FUNCTION_KEY_SHUT_DOWN, KEY_CLOSE); #endif - /* for displaying UTF-8 characters properly */ - setlocale(LC_CTYPE, ""); - teco_interface_init_screen(); /* + * On UNIX terminals, the escape key is usually + * delivered as the escape character even though function + * keys are delivered as escape sequences as well. + * That's why there has to be a timeout for detecting + * escape presses if function key handling is enabled. + * This timeout can be controlled using $ESCDELAY on + * ncurses but its default is much too long. + * We set it to 25ms as Vim does. In the very rare cases + * this won't suffice, $ESCDELAY can still be set explicitly. + * + * NOTE: The only terminal emulator I'm aware of that lets + * us send an escape sequence for the escape key is Mintty + * (see "\e[?7727h"). + * + * NOTE: The delay is overwritten by initscr() on netbsd-curses. + */ +#ifdef CURSES_TTY + if (!g_getenv("ESCDELAY")) + set_escdelay(25); +#endif + + /* * We always have a CTRL handler on Windows, but doing it * here again, ensures that we have a higher precedence * than the one installed by PDCurses. @@ -699,12 +702,22 @@ teco_interface_init_interactive(GError **error) curs_set(0); teco_interface.info_window = newwin(1, 0, 0, 0); - teco_interface.msg_window = newwin(1, 0, LINES - 2, 0); - teco_interface.cmdline_window = newwin(0, 0, LINES - 1, 0); - keypad(teco_interface.cmdline_window, TRUE); - nodelay(teco_interface.cmdline_window, TRUE); + + teco_interface.input_pad = newpad(1, 1); + /* + * Controlling function key processing is important + * on Unix Curses, as ESCAPE is handled as the beginning + * of a escape sequence when terminal emulators are + * involved. + * Still, it's now enabled always since the ESCDELAY + * workaround works nicely. + * On some Curses variants (XCurses) keypad + * must always be TRUE so we receive KEY_RESIZE. + */ + keypad(teco_interface.input_pad, TRUE); + nodelay(teco_interface.input_pad, TRUE); teco_interface.input_queue = g_queue_new(); @@ -748,8 +761,8 @@ teco_interface_restore_batch(void) * Set window title to a reasonable default, * in case it is not reset immediately by the * shell. - * FIXME: See set_window_title() why this - * is necessary. + * FIXME: See teco_interface_set_window_title() + * why this is necessary. */ #if defined(CURSES_TTY) && defined(HAVE_TIGETSTR) teco_interface_set_window_title(g_getenv("TERM") ? : ""); @@ -978,10 +991,14 @@ teco_interface_draw_info(void) const gchar *info_type_str; + waddstr(teco_interface.info_window, PACKAGE_NAME " "); + switch (teco_interface.info_type) { case TECO_INFO_TYPE_QREG: info_type_str = PACKAGE_NAME " - <QRegister> "; - waddstr(teco_interface.info_window, info_type_str); + teco_curses_add_wc(teco_interface.info_window, + teco_ed & TECO_ED_ICONS ? TECO_CURSES_ICONS_QREG : '-'); + waddstr(teco_interface.info_window, " <QRegister> "); /* same formatting as in command lines */ teco_curses_format_str(teco_interface.info_window, teco_interface.info_current.data, @@ -990,10 +1007,15 @@ teco_interface_draw_info(void) case TECO_INFO_TYPE_BUFFER: info_type_str = PACKAGE_NAME " - <Buffer> "; - waddstr(teco_interface.info_window, info_type_str); g_assert(!teco_string_contains(&teco_interface.info_current, '\0')); + teco_curses_add_wc(teco_interface.info_window, + teco_ed & TECO_ED_ICONS ? teco_curses_icons_lookup_file(teco_interface.info_current.data) : '-'); + waddstr(teco_interface.info_window, " <Buffer> "); teco_curses_format_filename(teco_interface.info_window, - teco_interface.info_current.data, -1); + teco_interface.info_current.data, + getmaxx(teco_interface.info_window) - + getcurx(teco_interface.info_window) - 1); + waddch(teco_interface.info_window, teco_interface.info_dirty ? '*' : ' '); break; default: @@ -1003,13 +1025,13 @@ teco_interface_draw_info(void) wclrtoeol(teco_interface.info_window); /* - * Make sure the title will consist only of printable - * characters + * Make sure the title will consist only of printable characters. */ g_autofree gchar *info_current_printable; info_current_printable = teco_string_echo(teco_interface.info_current.data, teco_interface.info_current.len); - g_autofree gchar *title = g_strconcat(info_type_str, info_current_printable, NULL); + g_autofree gchar *title = g_strconcat(info_type_str, info_current_printable, + teco_interface.info_dirty ? "*" : "", NULL); teco_interface_set_window_title(title); } @@ -1019,6 +1041,7 @@ teco_interface_info_update_qreg(const teco_qreg_t *reg) teco_string_clear(&teco_interface.info_current); teco_string_init(&teco_interface.info_current, reg->head.name.data, reg->head.name.len); + teco_interface.info_dirty = FALSE; teco_interface.info_type = TECO_INFO_TYPE_QREG; /* NOTE: drawn in teco_interface_event_loop_iter() */ } @@ -1030,8 +1053,7 @@ teco_interface_info_update_buffer(const teco_buffer_t *buffer) teco_string_clear(&teco_interface.info_current); teco_string_init(&teco_interface.info_current, filename, strlen(filename)); - teco_string_append_c(&teco_interface.info_current, - buffer->dirty ? '*' : ' '); + teco_interface.info_dirty = buffer->dirty; teco_interface.info_type = TECO_INFO_TYPE_BUFFER; /* NOTE: drawn in teco_interface_event_loop_iter() */ } @@ -1044,7 +1066,8 @@ teco_interface_cmdline_update(const teco_cmdline_t *cmdline) * We don't know if it is similar to the last one, * so resizing makes no sense. * We approximate the size of the new formatted command-line, - * wasting a few bytes for control characters. + * wasting a few bytes for control characters and + * multi-byte Unicode sequences. */ if (teco_interface.cmdline_pad) delwin(teco_interface.cmdline_pad); @@ -1172,7 +1195,7 @@ teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len, { int rc = str ? PDC_setclipboard(str, str_len) : PDC_clearclipboard(); if (rc != PDC_CLIP_SUCCESS) { - g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Error %d copying to clipboard", rc); return FALSE; } @@ -1194,7 +1217,7 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError if (rc == PDC_CLIP_EMPTY) return TRUE; if (rc != PDC_CLIP_SUCCESS) { - g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Error %d retrieving clipboard", rc); return FALSE; } @@ -1232,9 +1255,17 @@ teco_interface_init_clipboard(void) * must be enabled. * There is no way to find out if they are but we must * not register the clipboard registers if they aren't. - * Therefore, a special XTerm clipboard ED flag an be set by the user. + * Still, XTerm clipboards are broken with Unicode characters. + * Also, there are other terminal emulators supporting OSC-52, + * so the XTerm version is only checked if the terminal identifies as XTerm. + * Also, a special clipboard ED flag must be set by the user. + * + * NOTE: Apparently there is also a terminfo entry Ms, but it's probably + * not worth using it since it won't always be set and even if set, does not + * tell you whether the terminal will actually answer to the escape sequence or not. */ - if (!(teco_ed & TECO_ED_XTERM_CLIPBOARD) || teco_xterm_version() < 203) + if (!(teco_ed & TECO_ED_OSC52) || + (teco_xterm_version() >= 0 && teco_xterm_version() < 203)) return; teco_qreg_table_insert(&teco_qreg_table_globals, teco_qreg_clipboard_new("")); @@ -1300,6 +1331,8 @@ teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len, gboolean teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError **error) { + gboolean ret = TRUE; + /* * Query the clipboard -- XTerm will reply with the * OSC-52 command that would set the current selection. @@ -1320,18 +1353,19 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError * to be on the safe side. */ halfdelay(1); /* 100ms timeout */ - keypad(stdscr, FALSE); + /* don't interpret escape sequences */ + keypad(teco_interface.input_pad, FALSE); /* * Skip "\e]52;x;" (7 characters). */ for (gint i = 0; i < 7; i++) { - if (getch() == ERR) { + ret = wgetch(teco_interface.input_pad) != ERR; + if (!ret) { /* timeout */ - cbreak(); - g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Timed out reading XTerm clipboard"); - return FALSE; + goto cleanup; } } @@ -1347,17 +1381,22 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError */ gchar buffer[MAX(3, 7)]; - gchar c = (gchar)getch(); - if (c == ERR) { + gchar c = (gchar)wgetch(teco_interface.input_pad); + ret = c != ERR; + if (!ret) { /* timeout */ - cbreak(); g_string_free(str_base64, TRUE); - g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, "Timed out reading XTerm clipboard"); - return FALSE; + goto cleanup; } if (c == '\a') break; + if (c == '\e') { + /* OSC escape sequence can also be terminated by "\e\\" */ + c = (gchar)wgetch(teco_interface.input_pad); + break; + } /* * This could be simplified using sscanf() and @@ -1372,14 +1411,16 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError g_string_append_len(str_base64, buffer, out_len); } - cbreak(); - if (str) *str = str_base64->str; *len = str_base64->len; g_string_free(str_base64, !str); - return TRUE; + +cleanup: + keypad(teco_interface.input_pad, TRUE); + nodelay(teco_interface.input_pad, TRUE); + return ret; } #else /* !PDCURSES && !CURSES_TTY */ @@ -1489,13 +1530,17 @@ teco_interface_is_interrupted(void) gboolean teco_interface_is_interrupted(void) { - if (!teco_interface.cmdline_window) + if (!teco_interface.input_pad) /* batch mode */ return teco_interrupted != FALSE; - /* NOTE: getch() is configured to be nonblocking. */ + /* + * NOTE: wgetch() is configured to be nonblocking. + * We wgetch() on a dummy pad, so this does not call any + * wrefresh(). + */ gint key; - while ((key = wgetch(teco_interface.cmdline_window)) != ERR) { + while ((key = wgetch(teco_interface.input_pad)) != ERR) { if (G_UNLIKELY(key == TECO_CTL_KEY('C'))) return TRUE; g_queue_push_tail(teco_interface.input_queue, @@ -1535,35 +1580,19 @@ teco_interface_refresh(void) static gint teco_interface_blocking_getch(void) { - /* - * Setting function key processing is important - * on Unix Curses, as ESCAPE is handled as the beginning - * of a escape sequence when terminal emulators are - * involved. - * On some Curses variants (XCurses) however, keypad - * must always be TRUE so we receive KEY_RESIZE. - * - * FIXME: NetBSD's curses could be handled like ncurses, - * but gets into an undefined state when SciTECO processes - * escape sequences. - */ -#ifdef NCURSES_UNIX - keypad(teco_interface.cmdline_window, teco_ed & TECO_ED_FNKEYS); -#endif - /* no special <CTRL/C> handling */ raw(); - nodelay(teco_interface.cmdline_window, FALSE); + nodelay(teco_interface.input_pad, FALSE); /* * Memory limiting is stopped temporarily, since it might otherwise * constantly place 100% load on the CPU. */ teco_memory_stop_limiting(); - gint key = wgetch(teco_interface.cmdline_window); + gint key = wgetch(teco_interface.input_pad); teco_memory_start_limiting(); /* allow asynchronous interruptions on <CTRL/C> */ teco_interrupted = FALSE; - nodelay(teco_interface.cmdline_window, TRUE); + nodelay(teco_interface.input_pad, TRUE); #if defined(CURSES_TTY) || defined(PDCURSES_WINCON) || defined(NCURSES_WIN32) noraw(); /* FIXME: necessary because of NCURSES_WIN32 bug */ cbreak(); @@ -1585,6 +1614,11 @@ teco_interface_blocking_getch(void) void teco_interface_event_loop_iter(void) { + static gchar keybuf[4]; + static gint keybuf_i = 0; + + GError **error = &teco_interface.event_loop_error; + gint key = g_queue_is_empty(teco_interface.input_queue) ? teco_interface_blocking_getch() : GPOINTER_TO_INT(g_queue_pop_head(teco_interface.input_queue)); @@ -1613,23 +1647,24 @@ teco_interface_event_loop_iter(void) * backspace. * In SciTECO backspace is normalized to ^H. */ - if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'), - &teco_interface.event_loop_error)) + if (!teco_cmdline_keymacro_c(TECO_CTL_KEY('H'), error)) return; break; case KEY_ENTER: case '\r': case '\n': - if (!teco_cmdline_keypress_c('\n', &teco_interface.event_loop_error)) + if (!teco_cmdline_keymacro_c('\n', error)) return; break; /* * Function key macros + * + * FIXME: Perhaps support everything returned by keyname()? */ #define FN(KEY) \ case KEY_##KEY: \ - if (!teco_cmdline_fnmacro(#KEY, &teco_interface.event_loop_error)) \ + if (!teco_cmdline_keymacro(#KEY, -1, error)) \ return; \ break #define FNS(KEY) FN(KEY); FN(S##KEY) @@ -1639,9 +1674,8 @@ teco_interface_event_loop_iter(void) gchar macro_name[3+1]; g_snprintf(macro_name, sizeof(macro_name), - "F%d", key - KEY_F0); - if (!teco_cmdline_fnmacro(macro_name, - &teco_interface.event_loop_error)) + "F%d", key - KEY_F0); + if (!teco_cmdline_keymacro(macro_name, -1, error)) return; break; } @@ -1660,9 +1694,31 @@ teco_interface_event_loop_iter(void) * Control keys and keys with printable representation */ default: - if (key < 0x80 && - !teco_cmdline_keypress_c(key, &teco_interface.event_loop_error)) + if (key > 0xFF) + /* unhandled function key */ return; + + /* + * NOTE: There's also wget_wch(), but it requires + * a widechar version of Curses. + */ + keybuf[keybuf_i++] = key; + gsize len = keybuf_i; + gunichar cp = g_utf8_get_char_validated(keybuf, len); + if (keybuf_i >= sizeof(keybuf) || cp != (gunichar)-2) + keybuf_i = 0; + if ((gint32)cp < 0) + /* incomplete or invalid */ + return; + switch (teco_cmdline_keymacro(keybuf, len, error)) { + case TECO_KEYMACRO_ERROR: + return; + case TECO_KEYMACRO_SUCCESS: + break; + case TECO_KEYMACRO_UNDEFINED: + if (!teco_cmdline_keypress(keybuf, len, error)) + return; + } } teco_interface_refresh(); @@ -1733,6 +1789,8 @@ teco_interface_cleanup(void) delwin(teco_interface.cmdline_pad); if (teco_interface.msg_window) delwin(teco_interface.msg_window); + if (teco_interface.input_pad) + delwin(teco_interface.input_pad); /* * PDCurses/WinCon crashes if initscr() wasn't called. diff --git a/src/interface-gtk/gtk-info-popup.c b/src/interface-gtk/gtk-info-popup.c index 744900d..4e25224 100644 --- a/src/interface-gtk/gtk-info-popup.c +++ b/src/interface-gtk/gtk-info-popup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/gtk-info-popup.h b/src/interface-gtk/gtk-info-popup.h index de4b463..c3a62ec 100644 --- a/src/interface-gtk/gtk-info-popup.h +++ b/src/interface-gtk/gtk-info-popup.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/gtk-label.c b/src/interface-gtk/gtk-label.c index c1f4867..50cd345 100644 --- a/src/interface-gtk/gtk-label.c +++ b/src/interface-gtk/gtk-label.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/gtk-label.h b/src/interface-gtk/gtk-label.h index d2e2314..bed6642 100644 --- a/src/interface-gtk/gtk-label.h +++ b/src/interface-gtk/gtk-label.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c index 253600a..843ad15 100644 --- a/src/interface-gtk/interface.c +++ b/src/interface-gtk/interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -279,7 +279,8 @@ teco_interface_init(void) "type-label"); gtk_header_bar_pack_start(GTK_HEADER_BAR(teco_interface.info_bar_widget), teco_interface.info_type_widget); - if (teco_interface.xembed_id || teco_interface.no_csd) { + if (teco_interface.xembed_id || teco_interface.no_csd || + !g_strcmp0(g_getenv("GTK_CSD"), "0")) { /* fall back to adding the info bar as an ordinary widget */ gtk_box_pack_start(GTK_BOX(vbox), teco_interface.info_bar_widget, FALSE, FALSE, 0); @@ -390,12 +391,6 @@ teco_interface_init(void) GOptionGroup * teco_interface_get_options(void) { - /* - * FIXME: On platforms where you want to disable CSD, you usually - * want to disable it always, so it should be configurable in the SciTECO - * profile. - * On the other hand, you could just install gtk3-nocsd. - */ static const GOptionEntry entries[] = { {"no-csd", 0, G_OPTION_FLAG_IN_MAIN, G_OPTION_ARG_NONE, &teco_interface.no_csd, @@ -656,15 +651,46 @@ teco_interface_get_selection_by_name(const gchar *name) return gdk_atom_intern(name, FALSE); } +static void +teco_interface_clipboard_provide(GtkClipboard *clipboard, GtkSelectionData *selection, guint info, gpointer userdata) +{ + GString *str = userdata; + gtk_selection_data_set_text(selection, str->str, str->len); +} + +static void +teco_interface_clipboard_clear(GtkClipboard *clipboard, gpointer userdata) +{ + GString *str = userdata; + g_string_free(str, TRUE); +} + gboolean teco_interface_set_clipboard(const gchar *name, const gchar *str, gsize str_len, GError **error) { + static const GtkTargetEntry target = {"UTF8_STRING", 0, 0}; GtkClipboard *clipboard = gtk_clipboard_get(teco_interface_get_selection_by_name(name)); + if (!str) { + gtk_clipboard_clear(clipboard); + return TRUE; + } + /* - * NOTE: function has compatible semantics for str_len < 0. + * NOTE: gtk_clipboard_set_text() would ignore embedded nulls, + * even though it takes a length. + * We could theoretically avoid one allocation, but don't yet have proper types + * to store string data with length in one heap object. */ - gtk_clipboard_set_text(clipboard, str, str_len); + GString *gstr = g_string_new_len(str, str_len); + if (!gtk_clipboard_set_with_data(clipboard, &target, 1, + teco_interface_clipboard_provide, + teco_interface_clipboard_clear, gstr)) { + g_string_free(gstr, TRUE); + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CLIPBOARD, + "Cannot set clipboard"); + return FALSE; + } return TRUE; } @@ -674,16 +700,28 @@ teco_interface_get_clipboard(const gchar *name, gchar **str, gsize *len, GError { GtkClipboard *clipboard = gtk_clipboard_get(teco_interface_get_selection_by_name(name)); /* - * Could return NULL for an empty clipboard. + * gtk_clipboard_wait_for_text() does not return the text length, + * so it doesn't work with embedded nulls. + * gtk_clipboard_wait_for_contents() could also return NULL for empty clipboards. * - * FIXME: This converts to UTF8 and we loose the ability - * to get clipboard with embedded nulls. + * NOTE: This also drives the main event loop, + * which should be safe (see teco_interface_key_pressed_cb()). */ - g_autofree gchar *contents = gtk_clipboard_wait_for_text(clipboard); + GdkAtom utf8_string = gdk_atom_intern_static_string("UTF8_STRING"); + g_autoptr(GtkSelectionData) contents = gtk_clipboard_wait_for_contents(clipboard, utf8_string); + if (!contents) { + *len = 0; + if (str) + *str = NULL; + return TRUE; + } - *len = contents ? strlen(contents) : 0; - if (str) - *str = g_steal_pointer(&contents); + *len = gtk_selection_data_get_length(contents); + if (str) { + /* gtk_selection_data_get_text() does not work with embedded nulls */ + *str = memcpy(g_malloc(*len+1), gtk_selection_data_get_data(contents), *len); + (*str)[*len] = '\0'; + } return TRUE; } @@ -881,19 +919,50 @@ teco_interface_cmdline_commit_cb(GtkIMContext *context, gchar *str, gpointer use { g_autoptr(GError) error = NULL; - /* - * FIXME: This is only for consistency as long as we - * do not support Unicode. - */ - for (char *p = str; *p != '\0'; p = g_utf8_next_char(p)) - if (g_utf8_get_char(p) >= 0x80) - return; - if (!teco_cmdline_keypress(str, strlen(str), &error) && g_error_matches(error, TECO_ERROR, TECO_ERROR_QUIT)) gtk_main_quit(); } +/** + * Try to find an ANSI (latin) key for a given keypress. + * + * If the given key press does not generate a key from the ANSI + * range, it tries to find one in another group. + * + * @param event Key event to look up. In case of success, + * this event structure might also be written to. + * @return The codepoint of the ANSI version or 0 if there is + * no fitting ANSI/latin key. + */ +static gchar +teco_interface_get_ansi_key(GdkEventKey *event) +{ + gunichar cp = gdk_keyval_to_unicode(event->keyval); + if (cp && cp < 0x80) + return cp; + + GdkKeymap *map = gdk_keymap_get_for_display(gdk_window_get_display(event->window)); + g_autofree GdkKeymapKey *keys = NULL; + g_autofree guint *keyvals = NULL; + gint n_entries = 0; + + gdk_keymap_get_entries_for_keycode(map, event->hardware_keycode, + &keys, &keyvals, &n_entries); + for (gint i = 0; i < n_entries; i++) { + g_assert(keys[i].keycode == event->hardware_keycode); + cp = gdk_keyval_to_unicode(keyvals[i]); + if (cp && cp < 0x80 && + gdk_keyval_is_upper(keyvals[i]) == gdk_keyval_is_upper(event->keyval)) { + event->keyval = keyvals[i]; + event->group = keys[i].group; + return cp; + } + } + + return 0; +} + static gboolean teco_interface_handle_key_press(GdkEventKey *event, GError **error) { @@ -901,19 +970,19 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) switch (event->keyval) { case GDK_KEY_Escape: - if (!teco_cmdline_keypress_c('\e', error)) + if (!teco_cmdline_keymacro_c('\e', error)) return FALSE; break; case GDK_KEY_BackSpace: - if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'), error)) + if (!teco_cmdline_keymacro_c(TECO_CTL_KEY('H'), error)) return FALSE; break; case GDK_KEY_Tab: - if (!teco_cmdline_keypress_c('\t', error)) + if (!teco_cmdline_keymacro_c('\t', error)) return FALSE; break; case GDK_KEY_Return: - if (!teco_cmdline_keypress_c('\n', error)) + if (!teco_cmdline_keymacro_c('\n', error)) return FALSE; break; @@ -922,12 +991,12 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) */ #define FN(KEY, MACRO) \ case GDK_KEY_##KEY: \ - if (!teco_cmdline_fnmacro(#MACRO, error)) \ + if (!teco_cmdline_keymacro(#MACRO, -1, error)) \ return FALSE; \ break #define FNS(KEY, MACRO) \ case GDK_KEY_##KEY: \ - if (!teco_cmdline_fnmacro(event->state & GDK_SHIFT_MASK ? "S" #MACRO : #MACRO, error)) \ + if (!teco_cmdline_keymacro(event->state & GDK_SHIFT_MASK ? "S" #MACRO : #MACRO, -1, error)) \ return FALSE; \ break FN(Down, DOWN); FN(Up, UP); @@ -939,8 +1008,8 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) gchar macro_name[3+1]; g_snprintf(macro_name, sizeof(macro_name), - "F%d", event->keyval - GDK_KEY_F1 + 1); - if (!teco_cmdline_fnmacro(macro_name, error)) + "F%d", event->keyval - GDK_KEY_F1 + 1); + if (!teco_cmdline_keymacro(macro_name, -1, error)) return FALSE; break; } @@ -960,33 +1029,72 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error) /* * Control keys and keys with printable representation */ - default: { - gunichar u = gdk_keyval_to_unicode(event->keyval); + default: + /* + * NOTE: Alt-Gr key-combinations are sometimes reported as + * Ctrl+Alt, so we filter those out. + */ + if ((event->state & (GDK_CONTROL_MASK | GDK_MOD1_MASK)) == GDK_CONTROL_MASK) { + gchar c = teco_interface_get_ansi_key(event); + if (c) { + if (!teco_cmdline_keymacro_c(TECO_CTL_KEY(g_ascii_toupper(c)), error)) + return FALSE; + break; + } + } - if (u && u < 0x80 && (event->state & (GDK_CONTROL_MASK | GDK_MOD1_MASK)) == GDK_CONTROL_MASK) { - /* - * NOTE: Alt-Gr key-combinations are sometimes reported as - * Ctrl+Alt, so we filter those out. - */ - if (!teco_cmdline_keypress_c(TECO_CTL_KEY(g_ascii_toupper(u)), error)) + /* + * First look up a key macro. + * Only if it's undefined, we try to automatically find an ANSI key. + * On the downside, this means we cannot define key macros for dead keys + * or keys that require some sort of input method editing. + * + * FIXME: This might be a good reason to be able to disable the + * automatic ANSIfication, as we could look up the key macro in + * teco_interface_cmdline_commit_cb(). + */ + gunichar cp = gdk_keyval_to_unicode(event->keyval); + if (cp) { + char buf[6]; + gsize len = g_unichar_to_utf8(cp, buf); + teco_keymacro_status_t rc = teco_cmdline_keymacro(buf, len, error); + if (rc == TECO_KEYMACRO_ERROR) return FALSE; - } else { - /* - * This is necessary to handle dead keys and in the future - * for inputting Asian languages. - * - * FIXME: We do not yet support preediting. - * It would be easier to forward the event to the Scintilla - * widget and use its existing IM support. - * But this breaks the event freezing and results in flickering. - */ - gtk_im_context_filter_keypress(teco_interface.input_method, event); + if (rc == TECO_KEYMACRO_SUCCESS) + break; + g_assert(rc == TECO_KEYMACRO_UNDEFINED); } - } + + /* + * If the current state is case-insensitive, it is a command name - + * which consists only of ANSI letters - we try to + * accept non-ANSI letters as well. + * This means, you don't have to change keyboard layouts + * so often. + * FIXME: This could be made to work with string-building constructs + * within Q-Register specs as well. + * Unfortunately, Q-Reg specs and string building can be nested + * indefinitely. + * This would effectively require a new keymacro_mask_cb(). + */ + if ((teco_cmdline.machine.parent.current->keymacro_mask | + teco_cmdline.machine.expectstring.machine.parent.current->keymacro_mask) & + TECO_KEYMACRO_MASK_CASEINSENSITIVE) + teco_interface_get_ansi_key(event); + + /* + * This is necessary to handle dead keys and in the future + * for inputting Asian languages. + * + * FIXME: We do not yet support preediting. + * It would be easier to forward the event to the Scintilla + * widget and use its existing IM support. + * But this breaks the event freezing and results in flickering. + */ + gtk_im_context_filter_keypress(teco_interface.input_method, event); } teco_interface_refresh(teco_interface_current_view != last_view); - return TRUE; } @@ -997,7 +1105,7 @@ teco_interface_event_loop(GError **error) g_assert(scitecoconfig_reg != NULL); g_auto(teco_string_t) scitecoconfig = {NULL, 0}; if (!scitecoconfig_reg->vtable->get_string(scitecoconfig_reg, - &scitecoconfig.data, &scitecoconfig.len, error)) + &scitecoconfig.data, &scitecoconfig.len, NULL, error)) return FALSE; if (teco_string_contains(&scitecoconfig, '\0')) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, @@ -1006,45 +1114,6 @@ teco_interface_event_loop(GError **error) } g_assert(scitecoconfig.data != NULL); -#ifdef G_OS_WIN32 - /* - * FIXME: This is necessary so that the icon themes are found in the same - * directory as sciteco.exe. - * This fails of course when $SCITECOCONFIG is changed. - * We should perhaps always use the absolute path of sciteco.exe. - * If you want to install SciTECO differently, you can still set - * $XDG_DATA_DIRS. - * - * FIXME FIXME FIXME: This is also currently broken. - */ - //g_autofree char *theme_path = g_build_filename(scitecoconfig.data, "icons"); - //gtk_icon_theme_prepend_search_path(gtk_icon_theme_get_default(), theme_path); -#else - /* - * Load icons for the GTK window. - * This is not necessary on Windows since the icon included - * as a resource will be used by default. - */ - static const gchar *icon_files[] = { - SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-48.png", - SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-32.png", - SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-16.png" - }; - GList *icon_list = NULL; - - for (gint i = 0; i < G_N_ELEMENTS(icon_files); i++) { - GdkPixbuf *icon_pixbuf = gdk_pixbuf_new_from_file(icon_files[i], NULL); - - /* fail silently if there's a problem with one of the icons */ - if (icon_pixbuf) - icon_list = g_list_append(icon_list, icon_pixbuf); - } - - gtk_window_set_default_icon_list(icon_list); - - g_list_free_full(icon_list, g_object_unref); -#endif - /* * Initialize the CSS variable provider and the CSS provider * for the included fallback.css. @@ -1087,6 +1156,50 @@ teco_interface_event_loop(GError **error) /* don't show popup by default */ gtk_widget_hide(teco_interface.popup_widget); +#ifdef G_OS_WIN32 + /* + * FIXME: This is necessary so that the icon themes are found in the same + * directory as sciteco.exe. + * This fails of course when $SCITECOCONFIG is changed. + * We should perhaps always use the absolute path of sciteco.exe. + * If you want to install SciTECO differently, you can still set + * $XDG_DATA_DIRS. + * + * FIXME FIXME FIXME: This is also currently broken. + */ + //g_autofree char *theme_path = g_build_filename(scitecoconfig.data, "icons"); + //gtk_icon_theme_prepend_search_path(gtk_icon_theme_get_default(), theme_path); +#else + /* + * Load icons for the GTK window. + * This is not necessary on Windows since the icon included + * as a resource will be used by default. + */ + static const gchar *icon_files[] = { + SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-48.png", + SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-32.png", + SCITECODATADIR G_DIR_SEPARATOR_S "sciteco-16.png" + }; + GList *icon_list = NULL; + + for (gint i = 0; i < G_N_ELEMENTS(icon_files); i++) { + GdkPixbuf *icon_pixbuf = gdk_pixbuf_new_from_file(icon_files[i], NULL); + + /* fail silently if there's a problem with one of the icons */ + if (icon_pixbuf) + icon_list = g_list_append(icon_list, icon_pixbuf); + } + + /* + * The position of this call after gtk_widget_show() is important, so that + * tabbed and other Xembed hosts can pick up the icon. + * They also do not pick up the icon if set via gtk_window_set_default_icon_list(). + */ + gtk_window_set_icon_list(GTK_WINDOW(teco_interface.window), icon_list); + + g_list_free_full(icon_list, g_object_unref); +#endif + /* * SIGTERM emulates the "Close" key just like when * closing the window if supported by this version of glib. diff --git a/src/interface.c b/src/interface.c index a2042db..2e2d64e 100644 --- a/src/interface.c +++ b/src/interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/interface.h b/src/interface.h index 3170849..32db6b5 100644 --- a/src/interface.h +++ b/src/interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -154,6 +154,36 @@ void teco_interface_process_notify(SCNotification *notify); /** @pure */ void teco_interface_cleanup(void); +static inline guint +teco_interface_get_codepage(void) +{ + return teco_view_get_codepage(teco_interface_current_view); +} + +static inline gssize +teco_interface_glyphs2bytes(teco_int_t pos) +{ + return teco_view_glyphs2bytes(teco_interface_current_view, pos); +} + +static inline teco_int_t +teco_interface_bytes2glyphs(gsize pos) +{ + return teco_view_bytes2glyphs(teco_interface_current_view, pos); +} + +static inline gssize +teco_interface_glyphs2bytes_relative(gsize pos, teco_int_t n) +{ + return teco_view_glyphs2bytes_relative(teco_interface_current_view, pos, n); +} + +static inline teco_int_t +teco_interface_get_character(gsize pos, gsize len) +{ + return teco_view_get_character(teco_interface_current_view, pos, len); +} + /* * The following functions are here for lack of a better place. * They could also be in sciteco.h, but only if declared as non-inline @@ -161,12 +191,6 @@ void teco_interface_cleanup(void); */ static inline gboolean -teco_validate_pos(teco_int_t n) -{ - return 0 <= n && n <= teco_interface_ssm(SCI_GETLENGTH, 0, 0); -} - -static inline gboolean teco_validate_line(teco_int_t n) { return 0 <= n && n < teco_interface_ssm(SCI_GETLINECOUNT, 0, 0); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ #include <string.h> #include <stdlib.h> #include <signal.h> +#include <locale.h> #include <glib.h> #include <glib/gprintf.h> @@ -104,9 +105,10 @@ teco_get_default_config_path(const gchar *program) static gchar *teco_eval_macro = NULL; static gboolean teco_mung_file = FALSE; static gboolean teco_mung_profile = TRUE; +static gboolean teco_8bit_clean = FALSE; static gchar * -teco_process_options(gint *argc, gchar ***argv) +teco_process_options(gchar ***argv) { static const GOptionEntry option_entries[] = { {"eval", 'e', 0, G_OPTION_ARG_STRING, &teco_eval_macro, @@ -119,6 +121,8 @@ teco_process_options(gint *argc, gchar ***argv) "Do not mung " "$SCITECOCONFIG" G_DIR_SEPARATOR_S INI_FILE " " "even if it exists"}, + {"8bit", '8', 0, G_OPTION_ARG_NONE, &teco_8bit_clean, + "Use ANSI encoding by default and disable automatic EOL conversion"}, {NULL} }; @@ -133,7 +137,7 @@ teco_process_options(gint *argc, gchar ***argv) g_option_context_set_description( options, "Bug reports should go to <" PACKAGE_BUGREPORT "> or " - "<" PACKAGE_URL_DEV ">." + "<" PACKAGE_URL ">." ); g_option_context_add_main_entries(options, option_entries, NULL); @@ -155,7 +159,7 @@ teco_process_options(gint *argc, gchar ***argv) */ g_option_context_set_strict_posix(options, TRUE); - if (!g_option_context_parse(options, argc, argv, &error)) { + if (!g_option_context_parse_strv(options, argv, &error)) { g_fprintf(stderr, "Option parsing failed: %s\n", error->message); exit(EXIT_FAILURE); @@ -170,16 +174,13 @@ teco_process_options(gint *argc, gchar ***argv) * and "--" is not the first non-option argument as in * sciteco foo -- -C bar. */ - if (*argc >= 2 && !strcmp((*argv)[1], "--")) { - (*argv)[1] = (*argv)[0]; - (*argv)++; - (*argc)--; - } + if ((*argv)[0] && !g_strcmp0((*argv)[1], "--")) + g_free(teco_strv_remove(*argv, 1)); gchar *mung_filename = NULL; if (teco_mung_file) { - if (*argc < 2) { + if (!(*argv)[0] || !(*argv)[1]) { g_fprintf(stderr, "Script to mung expected!\n"); exit(EXIT_FAILURE); } @@ -190,11 +191,7 @@ teco_process_options(gint *argc, gchar ***argv) exit(EXIT_FAILURE); } - mung_filename = g_strdup((*argv)[1]); - - (*argv)[1] = (*argv)[0]; - (*argv)++; - (*argc)--; + mung_filename = teco_strv_remove(*argv, 1); } return mung_filename; @@ -306,12 +303,39 @@ main(int argc, char **argv) signal(SIGINT, teco_sigint_handler); signal(SIGTERM, teco_sigint_handler); - g_autofree gchar *mung_filename = teco_process_options(&argc, &argv); + /* + * Important for Unicode handling in curses and glib. + * In particular, in order to accept Unicode characters + * in option strings. + * + * NOTE: Windows 10 accepts ".UTF8" here, so the "ANSI" + * versions of win32 API functions accept UTF-8. + * We want to support older versions, though and + * glib happily converts to Windows' native UTF-16. + */ + setlocale(LC_ALL, ""); + +#ifdef G_OS_WIN32 + /* + * main()'s argv is in the system locale, so we might loose + * information when passing it to g_option_context_parse(). + * The remaining strings are also not guaranteed to be in + * UTF-8. + */ + g_auto(GStrv) argv_utf8 = g_win32_get_command_line(); +#else + g_auto(GStrv) argv_utf8 = g_strdupv(argv); +#endif + g_autofree gchar *mung_filename = teco_process_options(&argv_utf8); /* * All remaining arguments in argv are arguments * to the macro or munged file. */ + if (teco_8bit_clean) + /* equivalent to 16,4ED but executed earlier */ + teco_ed = (teco_ed & ~TECO_ED_AUTOEOL) | TECO_ED_DEFAULT_ANSI; + /* * Theoretically, QReg tables should only be initialized * after the interface, since they contain Scintilla documents. @@ -343,7 +367,7 @@ main(int argc, char **argv) /* current working directory ("$") */ teco_qreg_table_insert(&teco_qreg_table_globals, teco_qreg_workingdir_new()); /* environment defaults and registers */ - teco_initialize_environment(argv[0]); + teco_initialize_environment(argv_utf8[0]); teco_qreg_table_t local_qregs; teco_qreg_table_init(&local_qregs, TRUE); @@ -361,8 +385,8 @@ main(int argc, char **argv) * Also, the Unnamed Buffer should be kept empty for piping. * Therefore, it would be best to store the arguments in Q-Regs, e.g. $0,$1,$2... */ - for (gint i = 1; i < argc; i++) { - teco_interface_ssm(SCI_APPENDTEXT, strlen(argv[i]), (sptr_t)argv[i]); + for (gint i = 1; argv_utf8[i]; i++) { + teco_interface_ssm(SCI_APPENDTEXT, strlen(argv_utf8[i]), (sptr_t)argv_utf8[i]); teco_interface_ssm(SCI_APPENDTEXT, 1, (sptr_t)"\n"); } diff --git a/src/memory.c b/src/memory.c index 6d7645c..26cde55 100644 --- a/src/memory.c +++ b/src/memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -288,7 +288,7 @@ * Current memory usage. * Access must be synchronized using atomic operations. */ -static gint teco_memory_usage = 0; +static guint teco_memory_usage = 0; /* * NOTE: This implementation based on malloc_usable_size() might @@ -495,6 +495,16 @@ teco_memory_get_usage(void) return procstk.ki_rssize * page_size; } +/** + * Options passed to jemalloc. + * + * It's crucial to disable opt.retain, so that freeing memory after + * recovering from memory limit hits actually decreases the RSS. + * The reasons for activating the option, mentioned in jemalloc(3), + * shouldn't be relevant on FreeBSD. + */ +const gchar *malloc_conf = "retain:false"; + #define NEED_POLL_THREAD #elif defined(G_OS_UNIX) && defined(HAVE_SYSCONF) && defined(HAVE_PROCFS) @@ -628,8 +638,7 @@ teco_memory_stop_limiting(void) g_mutex_unlock(&teco_memory_mutex); } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_memory_cleanup(void) { if (!teco_memory_thread) @@ -642,7 +651,6 @@ teco_memory_cleanup(void) g_thread_join(teco_memory_thread); } -#endif #else /* !NEED_POLL_THREAD */ @@ -660,7 +668,7 @@ gsize teco_memory_limit = 500*1000*1000; gboolean teco_memory_set_limit(gsize new_limit, GError **error) { - gsize memory_usage = g_atomic_int_get(&teco_memory_usage); + gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage); if (G_UNLIKELY(new_limit && memory_usage > new_limit)) { g_autofree gchar *usage_str = g_format_size(memory_usage); @@ -693,18 +701,19 @@ teco_memory_set_limit(gsize new_limit, GError **error) gboolean teco_memory_check(gsize request, GError **error) { - gsize memory_usage = g_atomic_int_get(&teco_memory_usage) + request; + gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage); + gsize requested_memory_usage = memory_usage+request; /* * Check for overflows. * NOTE: Glib 2.48 has g_size_checked_add(). */ - if (G_UNLIKELY(memory_usage < request)) + if (G_UNLIKELY(requested_memory_usage < memory_usage)) /* guaranteed to fail if memory limiting is enabled */ - memory_usage = G_MAXSIZE; + requested_memory_usage = G_MAXSIZE; - if (G_UNLIKELY(teco_memory_limit && memory_usage >= teco_memory_limit)) { - g_autofree gchar *limit_str = g_format_size(memory_usage); + if (G_UNLIKELY(teco_memory_limit && requested_memory_usage >= teco_memory_limit)) { + g_autofree gchar *limit_str = g_format_size(requested_memory_usage); g_set_error(error, TECO_ERROR, TECO_ERROR_MEMLIMIT, "Memory limit (%s) exceeded. See <EJ> command.", diff --git a/src/memory.h b/src/memory.h index f31a451..39f8319 100644 --- a/src/memory.h +++ b/src/memory.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/parser.c b/src/parser.c index 910fc7f..b1aa06e 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -52,16 +52,14 @@ teco_loop_stack_init(void) TECO_DEFINE_ARRAY_UNDO_INSERT_VAL(teco_loop_stack, teco_loop_context_t); TECO_DEFINE_ARRAY_UNDO_REMOVE_INDEX(teco_loop_stack); -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_loop_stack_cleanup(void) { g_array_free(teco_loop_stack, TRUE); } -#endif gboolean -teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error) +teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error) { teco_state_t *next = ctx->current->input_cb(ctx, chr, error); if (!next) @@ -88,18 +86,22 @@ teco_state_end_of_macro(teco_machine_t *ctx, GError **error) } /** + * Execute macro from current PC to stop position. + * * Handles all expected exceptions and preparing them for stack frame insertion. + * + * @param ctx State machine. + * @param macro The macro to execute. + * It does not have to be complete. + * It must consist only of validated UTF-8 sequences, though. + * @param stop_pos Where to stop execution in bytes. + * @param error Location to store error. + * @return FALSE if an error occurred. */ gboolean -teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_pos, GError **error) +teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gsize stop_pos, GError **error) { while (ctx->macro_pc < stop_pos) { -#ifdef DEBUG - g_printf("EXEC(%d): input='%c'/%x, state=%p, mode=%d\n", - ctx->macro_pc, macro[ctx->macro_pc], macro[ctx->macro_pc], - ctx->parent.current, ctx->mode); -#endif - if (G_UNLIKELY(teco_interface_is_interrupted())) { teco_error_interrupted_set(error); goto error_attach; @@ -112,9 +114,18 @@ teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_p if (!teco_memory_check(0, error)) goto error_attach; - if (!teco_machine_input(&ctx->parent, macro[ctx->macro_pc], error)) + /* UTF-8 sequences are already validated */ + gunichar chr = g_utf8_get_char(macro+ctx->macro_pc); + +#ifdef DEBUG + g_printf("EXEC(%d): input='%C' (U+%04" G_GINT32_MODIFIER "X), state=%p, mode=%d\n", + ctx->macro_pc, chr, chr, ctx->parent.current, ctx->mode); +#endif + + if (!teco_machine_input(&ctx->parent, chr, error)) goto error_attach; - ctx->macro_pc++; + + ctx->macro_pc = g_utf8_next_char(macro+ctx->macro_pc) - macro; } /* @@ -146,6 +157,14 @@ gboolean teco_execute_macro(const gchar *macro, gsize macro_len, teco_qreg_table_t *qreg_table_locals, GError **error) { + const teco_string_t str = {(gchar *)macro, macro_len}; + + if (!teco_string_validate_utf8(&str)) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid UTF-8 byte sequence in macro"); + return FALSE; + } + /* * This is not auto-cleaned up, so it can be initialized * on demand. @@ -311,26 +330,26 @@ teco_machine_main_eval_colon(teco_machine_main_t *ctx) teco_state_t * teco_machine_main_transition_input(teco_machine_main_t *ctx, teco_machine_main_transition_t *transitions, - guint len, gchar chr, GError **error) + guint len, gunichar chr, GError **error) { - if (chr < 0 || chr >= len || !transitions[(guint)chr].next) { + if (chr >= len || !transitions[chr].next) { teco_error_syntax_set(error, chr); return NULL; } - if (ctx->mode == TECO_MODE_NORMAL && transitions[(guint)chr].transition_cb) { + if (ctx->mode == TECO_MODE_NORMAL && transitions[chr].transition_cb) { /* * NOTE: We could also just let transition_cb return a boolean... */ GError *tmp_error = NULL; - transitions[(guint)chr].transition_cb(ctx, &tmp_error); + transitions[chr].transition_cb(ctx, &tmp_error); if (tmp_error) { g_propagate_error(error, tmp_error); return NULL; } } - return transitions[(guint)chr].next; + return transitions[chr].next; } void @@ -340,15 +359,40 @@ teco_machine_main_clear(teco_machine_main_t *ctx) teco_machine_stringbuilding_clear(&ctx->expectstring.machine); } +/** Append string to result with case folding. */ +static void +teco_machine_stringbuilding_append(teco_machine_stringbuilding_t *ctx, const gchar *str, gsize len) +{ + g_assert(ctx->result != NULL); + + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + teco_string_append(ctx->result, str, len); + break; + case TECO_STRINGBUILDING_MODE_UPPER: { + g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8 + ? g_utf8_strup(str, len) : g_ascii_strup(str, len); + teco_string_append(ctx->result, folded, strlen(folded)); + break; + } + case TECO_STRINGBUILDING_MODE_LOWER: { + g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8 + ? g_utf8_strdown(str, len) : g_ascii_strdown(str, len); + teco_string_append(ctx->result, folded, strlen(folded)); + break; + } + } +} + /* * FIXME: All teco_state_stringbuilding_* states could be static? */ static teco_state_t *teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, - gchar chr, GError **error); + gunichar chr, GError **error); TECO_DECLARE_STATE(teco_state_stringbuilding_ctl); static teco_state_t *teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, - gchar chr, GError **error); + gunichar chr, GError **error); TECO_DECLARE_STATE(teco_state_stringbuilding_escaped); TECO_DECLARE_STATE(teco_state_stringbuilding_lower); @@ -362,19 +406,29 @@ TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_quote); TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_n); static teco_state_t * -teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { - if (chr == '^') + switch (chr) { + case '^': return &teco_state_stringbuilding_ctl; - if (TECO_IS_CTL(chr)) - return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + case TECO_CTL_KEY('^'): + /* + * Ctrl+^ is inserted verbatim as code 30. + * Otherwise it would expand to a single caret + * just like caret+caret (^^). + */ + break; + default: + if (TECO_IS_CTL(chr)) + return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + } return teco_state_stringbuilding_escaped_input(ctx, chr, error); } /* in cmdline.c */ gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error); + gunichar key, GError **error); TECO_DEFINE_STATE(teco_state_stringbuilding_start, .is_start = TRUE, @@ -383,12 +437,19 @@ TECO_DEFINE_STATE(teco_state_stringbuilding_start, ); static teco_state_t * -teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { chr = teco_ascii_toupper(chr); switch (chr) { - case '^': break; + case '^': + /* + * Double-caret expands to a single caret. + * Ctrl+^ (30) is handled separately and inserts code 30. + * The special handling of the double-caret should perhaps + * be abolished altogether. + */ + break; case 'Q': case 'R': return &teco_state_stringbuilding_escaped; case 'V': return &teco_state_stringbuilding_lower; @@ -398,85 +459,139 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar ch chr = TECO_CTL_KEY(chr); } + /* + * Source code is always in UTF-8, so it does not + * make sense to handle ctx->codepage != SC_CP_UTF8 + * separately. + */ if (ctx->result) - teco_string_append_c(ctx->result, chr); + teco_string_append_wc(ctx->result, chr); return &teco_state_stringbuilding_start; } TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl); static teco_state_t * -teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { if (!ctx->result) /* parse-only mode */ return &teco_state_stringbuilding_start; + /* + * The subtle difference between UTF-8 and single-byte targets + * is that we don't try to casefold non-ANSI characters in single-byte mode. + */ switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + break; case TECO_STRINGBUILDING_MODE_UPPER: - chr = g_ascii_toupper(chr); + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_toupper(chr) : chr; break; case TECO_STRINGBUILDING_MODE_LOWER: - chr = g_ascii_tolower(chr); - break; - default: + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_tolower(chr) : chr; break; } - teco_string_append_c(ctx->result, chr); + teco_string_append_wc(ctx->result, chr); return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE(teco_state_stringbuilding_escaped); +/* in cmdline.c */ +gboolean teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, + gunichar key, GError **error); + +TECO_DEFINE_STATE(teco_state_stringbuilding_escaped, + .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) + teco_state_stringbuilding_escaped_process_edit_cmd +); static teco_state_t * -teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_lower_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { if (!ctx->result) /* parse-only mode */ return &teco_state_stringbuilding_start; - /* - * FIXME: This does not handle ^V^V typed with up-carets. - */ - if (chr == TECO_CTL_KEY('V')) { + chr = teco_ascii_toupper(chr); + + if (chr == 'V') { if (ctx->parent.must_undo) teco_undo_guint(ctx->mode); ctx->mode = TECO_STRINGBUILDING_MODE_LOWER; } else { - teco_string_append_c(ctx->result, g_ascii_tolower(chr)); + /* control keys cannot be case folded */ + teco_string_append_wc(ctx->result, TECO_CTL_KEY(chr)); } return &teco_state_stringbuilding_start; } +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_lower_ctl); + +static teco_state_t * +teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) +{ + if (chr == '^') + return &teco_state_stringbuilding_lower_ctl; + if (TECO_IS_CTL(chr)) + return teco_state_stringbuilding_lower_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + + if (ctx->result) { + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_tolower(chr) : chr; + teco_string_append_wc(ctx->result, chr); + } + return &teco_state_stringbuilding_start; +} + TECO_DEFINE_STATE(teco_state_stringbuilding_lower); static teco_state_t * -teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_upper_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { if (!ctx->result) /* parse-only mode */ return &teco_state_stringbuilding_start; - /* - * FIXME: This does not handle ^W^W typed with up-carets. - */ - if (chr == TECO_CTL_KEY('W')) { + chr = teco_ascii_toupper(chr); + + if (chr == 'W') { if (ctx->parent.must_undo) teco_undo_guint(ctx->mode); ctx->mode = TECO_STRINGBUILDING_MODE_UPPER; } else { - teco_string_append_c(ctx->result, g_ascii_toupper(chr)); + /* control keys cannot be case folded */ + teco_string_append_wc(ctx->result, TECO_CTL_KEY(chr)); } return &teco_state_stringbuilding_start; } +TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_upper_ctl); + +static teco_state_t * +teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) +{ + if (chr == '^') + return &teco_state_stringbuilding_upper_ctl; + if (TECO_IS_CTL(chr)) + return teco_state_stringbuilding_upper_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + + if (ctx->result) { + chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 + ? g_unichar_toupper(chr) : chr; + teco_string_append_wc(ctx->result, chr); + } + return &teco_state_stringbuilding_start; +} + TECO_DEFINE_STATE(teco_state_stringbuilding_upper); static teco_state_t * -teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_state_t *next; @@ -488,8 +603,10 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar c case 'N': next = &teco_state_stringbuilding_ctle_n; break; default: if (ctx->result) { - gchar buf[] = {TECO_CTL_KEY('E'), chr}; - teco_string_append(ctx->result, buf, sizeof(buf)); + /* also makes sure that search patterns can start with ^E */ + gchar buf[1+6] = {TECO_CTL_KEY('E')}; + gsize len = g_unichar_to_utf8(chr, buf+1); + teco_machine_stringbuilding_append(ctx, buf, 1+len); } return &teco_state_stringbuilding_start; } @@ -507,7 +624,7 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle); /* in cmdline.c */ gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, - gchar chr, GError **error); + gunichar chr, GError **error); /** * @interface TECO_DEFINE_STATE_STRINGBUILDING_QREG @@ -522,7 +639,7 @@ gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuil ) static teco_state_t * -teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; @@ -549,7 +666,7 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gch */ gchar buffer[TECO_EXPRESSIONS_FORMAT_LEN]; const gchar *num = teco_expressions_format(buffer, value); - teco_string_append(ctx->result, num, strlen(num)); + teco_machine_stringbuilding_append(ctx, num, strlen(num)); return &teco_state_stringbuilding_start; } @@ -557,7 +674,7 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gch TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num); static teco_state_t * -teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; @@ -578,21 +695,51 @@ teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar teco_int_t value; if (!qreg->vtable->get_integer(qreg, &value, error)) return NULL; - if (value < 0 || value > 0xFF) { - g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len); - g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, - "Q-Register \"%s\" does not contain a valid character", name_printable); - return NULL; + + if (ctx->codepage == SC_CP_UTF8) { + if (value < 0 || !g_unichar_validate(value)) + goto error_codepoint; + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + break; + case TECO_STRINGBUILDING_MODE_UPPER: + value = g_unichar_toupper(value); + break; + case TECO_STRINGBUILDING_MODE_LOWER: + value = g_unichar_tolower(value); + break; + } + teco_string_append_wc(ctx->result, value); + } else { + if (value < 0 || value > 0xFF) + goto error_codepoint; + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_NORMAL: + break; + case TECO_STRINGBUILDING_MODE_UPPER: + value = g_ascii_toupper(value); + break; + case TECO_STRINGBUILDING_MODE_LOWER: + value = g_ascii_tolower(value); + break; + } + teco_string_append_c(ctx->result, value); } - teco_string_append_c(ctx->result, (gchar)value); return &teco_state_stringbuilding_start; + +error_codepoint: { + g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len); + g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Q-Register \"%s\" does not contain a valid codepoint", name_printable); + return NULL; +} } TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u); static teco_state_t * -teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; @@ -610,20 +757,17 @@ teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar /* parse-only mode */ return &teco_state_stringbuilding_start; - /* - * FIXME: Should we have a special teco_qreg_get_string_append() function? - */ g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; - teco_string_append(ctx->result, str.data, str.len); + teco_machine_stringbuilding_append(ctx, str.data, str.len); return &teco_state_stringbuilding_start; } TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q); static teco_state_t * -teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; teco_qreg_table_t *table; @@ -643,7 +787,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g return &teco_state_stringbuilding_start; g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; /* * NOTE: g_shell_quote() expects a null-terminated string, so it is @@ -658,7 +802,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g return NULL; } g_autofree gchar *str_quoted = g_shell_quote(str.data ? : ""); - teco_string_append(ctx->result, str_quoted, strlen(str_quoted)); + teco_machine_stringbuilding_append(ctx, str_quoted, strlen(str_quoted)); return &teco_state_stringbuilding_start; } @@ -666,7 +810,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote); static teco_state_t * -teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error) +teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { teco_qreg_t *qreg; teco_qreg_table_t *table; @@ -686,7 +830,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar return &teco_state_stringbuilding_start; g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; if (teco_string_contains(&str, '\0')) { teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len, @@ -695,7 +839,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar } g_autofree gchar *str_escaped = teco_globber_escape_pattern(str.data); - teco_string_append(ctx->result, str_escaped, strlen(str_escaped)); + teco_machine_stringbuilding_append(ctx, str_escaped, strlen(str_escaped)); return &teco_state_stringbuilding_start; } @@ -703,13 +847,14 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n); void -teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char, +teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char, teco_qreg_table_t *locals, gboolean must_undo) { memset(ctx, 0, sizeof(*ctx)); teco_machine_init(&ctx->parent, &teco_state_stringbuilding_start, must_undo); ctx->escape_char = escape_char; ctx->qreg_table_locals = locals; + ctx->codepage = teco_default_codepage(); } void @@ -723,6 +868,10 @@ teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx) ctx->mode = TECO_STRINGBUILDING_MODE_NORMAL; } +/* + * If we case folded only ANSI characters as in teco_ascii_toupper(), + * this could be simplified. + */ void teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gchar *str, gsize len, teco_string_t *target) @@ -730,12 +879,18 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch target->data = g_malloc(len*2+1); target->len = 0; - for (guint i = 0; i < len; i++) { - if (teco_ascii_toupper(str[i]) == ctx->escape_char || - (ctx->escape_char == '[' && str[i] == ']') || - (ctx->escape_char == '{' && str[i] == '}')) + for (guint i = 0; i < len; ) { + gunichar chr = g_utf8_get_char(str+i); + + if (g_unichar_toupper(chr) == ctx->escape_char || + (ctx->escape_char == '[' && chr == ']') || + (ctx->escape_char == '{' && chr == '}')) target->data[target->len++] = TECO_CTL_KEY('Q'); - target->data[target->len++] = str[i]; + + gsize lenc = g_utf8_next_char(str+i) - (str+i); + memcpy(target->data+target->len, str+i, lenc); + target->len += lenc; + i += lenc; } target->data[target->len] = '\0'; @@ -748,8 +903,17 @@ teco_machine_stringbuilding_clear(teco_machine_stringbuilding_t *ctx) teco_machine_qregspec_free(ctx->machine_qregspec); } +gboolean +teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error) +{ + if (ctx->mode == TECO_MODE_NORMAL) + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_default_codepage()); + return TRUE; +} + teco_state_t * -teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { teco_state_t *current = ctx->parent.current; @@ -766,13 +930,18 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro /* * FIXME: Exclude setting at least whitespace characters as the * new string escape character to avoid accidental errors? + * + * FIXME: Should we perhaps restrict case folding escape characters + * to the ANSI range (teco_ascii_toupper())? + * This would be faster than case folding each and every character + * of a string argument to check against the escape char. */ switch (ctx->expectstring.machine.escape_char) { case '\e': case '{': if (ctx->parent.must_undo) - teco_undo_gchar(ctx->expectstring.machine.escape_char); - ctx->expectstring.machine.escape_char = teco_ascii_toupper(chr); + teco_undo_gunichar(ctx->expectstring.machine.escape_char); + ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); return current; } } @@ -796,7 +965,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro ctx->expectstring.nesting--; break; } - } else if (teco_ascii_toupper(chr) == ctx->expectstring.machine.escape_char) { + } else if (g_unichar_toupper(chr) == ctx->expectstring.machine.escape_char) { if (ctx->parent.must_undo) teco_undo_gint(ctx->expectstring.nesting); ctx->expectstring.nesting--; @@ -826,7 +995,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro if (current->expectstring.last) { if (ctx->parent.must_undo) - teco_undo_gchar(ctx->expectstring.machine.escape_char); + teco_undo_gunichar(ctx->expectstring.machine.escape_char); ctx->expectstring.machine.escape_char = '\e'; } ctx->expectstring.nesting = 1; @@ -857,7 +1026,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro if (!teco_machine_stringbuilding_input(&ctx->expectstring.machine, chr, str, error)) return NULL; } else if (ctx->mode == TECO_MODE_NORMAL) { - teco_string_append_c(&ctx->expectstring.string, chr); + teco_string_append_wc(&ctx->expectstring.string, chr); } /* @@ -901,7 +1070,7 @@ teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str g_assert(str->data != NULL); /* - * Null-chars must not ocur in filename/path strings and at some point + * Null-chars must not occur in filename/path strings and at some point * teco_string_t has to be converted to a null-terminated C string * as all the glib filename functions rely on null-terminated strings. * Doing it here ensures that teco_file_expand_path() can be safely called diff --git a/src/parser.h b/src/parser.h index 05a9715..066896f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ */ #pragma once +#include <stdbool.h> + #include <glib.h> #include <Scintilla.h> @@ -23,6 +25,7 @@ #include "sciteco.h" #include "string-utils.h" #include "goto.h" +#include "undo.h" #include "qreg.h" /* @@ -36,7 +39,9 @@ typedef struct { /** how many iterations are left */ teco_int_t counter; /** Program counter of loop start command */ - guint pc : sizeof(guint)*8 - 1; + gsize pc; + /** Brace level at loop start */ + guint brace_level : sizeof(guint)*8 - 1; /** * Whether the loop represents an argument * barrier or not (it "passes through" @@ -46,7 +51,7 @@ typedef struct { * a signed integer, it's ok steal one * bit for the pass_through flag. */ - gboolean pass_through : 1; + bool pass_through : 1; } teco_loop_context_t; extern GArray *teco_loop_stack; @@ -71,8 +76,8 @@ void undo__remove_index__teco_loop_stack(guint); * FIXME: Maybe use TECO_DECLARE_VTABLE_METHOD()? */ typedef const struct { - gboolean string_building : 1; - gboolean last : 1; + bool string_building : 1; + bool last : 1; /** * Called repeatedly to process chunks of input and give interactive feedback. @@ -99,17 +104,18 @@ typedef const struct { } teco_state_expectqreg_t; typedef gboolean (*teco_state_initial_cb_t)(teco_machine_t *ctx, GError **error); -typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gchar chr, GError **error); +typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gunichar chr, GError **error); typedef gboolean (*teco_state_refresh_cb_t)(teco_machine_t *ctx, GError **error); typedef gboolean (*teco_state_end_of_macro_cb_t)(teco_machine_t *ctx, GError **error); typedef gboolean (*teco_state_process_edit_cmd_cb_t)(teco_machine_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error); + gunichar key, GError **error); typedef enum { - TECO_FNMACRO_MASK_START = (1 << 0), - TECO_FNMACRO_MASK_STRING = (1 << 1), - TECO_FNMACRO_MASK_DEFAULT = ~((1 << 2)-1) -} teco_fnmacro_mask_t; + TECO_KEYMACRO_MASK_START = (1 << 0), + TECO_KEYMACRO_MASK_STRING = (1 << 1), + TECO_KEYMACRO_MASK_CASEINSENSITIVE = (1 << 2), + TECO_KEYMACRO_MASK_DEFAULT = ~((1 << 3)-1) +} teco_keymacro_mask_t; /** * A teco_machine_t state. @@ -182,19 +188,19 @@ struct teco_state_t { /** * Whether this state is a start state (ie. not within any * escape sequence etc.). - * This is separate of TECO_FNMACRO_MASK_START which is set + * This is separate of TECO_KEYMACRO_MASK_START which is set * only in the main machine's start states. */ - gboolean is_start : 1; + bool is_start : 1; /** - * Function key macro mask. + * Key macro mask. * This is not a bitmask since it is compared with values set * from TECO, so the bitorder needs to be defined. * * @fixme If we intend to "forward" masks from other state machines like * teco_machine_stringbuilding_t, this should probably be a callback. */ - teco_fnmacro_mask_t fnmacro_mask : 8; + teco_keymacro_mask_t keymacro_mask : 8; /** * Additional state-dependent callbacks and settings. @@ -214,7 +220,7 @@ struct teco_state_t { gboolean teco_state_end_of_macro(teco_machine_t *ctx, GError **error); /* in cmdline.c */ -gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /** * @interface TECO_DEFINE_STATE @@ -234,7 +240,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent .end_of_macro_cb = teco_state_end_of_macro, \ .process_edit_cmd_cb = teco_state_process_edit_cmd, \ .is_start = FALSE, \ - .fnmacro_mask = TECO_FNMACRO_MASK_DEFAULT, \ + .keymacro_mask = TECO_KEYMACRO_MASK_DEFAULT, \ ##__VA_ARGS__ \ } @@ -243,20 +249,21 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent extern teco_state_t NAME /* in cmdline.c */ -gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error); +gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); /** * @interface TECO_DEFINE_STATE_CASEINSENSITIVE * @implements TECO_DEFINE_STATE * @ingroup states * - * Base class of states with case-insenstive input. + * Base class of states with case-insensitive input. * * This is meant for states accepting command characters * that can possibly be case-folded. */ #define TECO_DEFINE_STATE_CASEINSENSITIVE(NAME, ...) \ TECO_DEFINE_STATE(NAME, \ + .keymacro_mask = TECO_KEYMACRO_MASK_CASEINSENSITIVE, \ .process_edit_cmd_cb = teco_state_caseinsensitive_process_edit_cmd, \ ##__VA_ARGS__ \ ) @@ -278,6 +285,8 @@ struct teco_machine_t { * Whether side effects must be reverted on rubout. * State machines created within macro calls don't have to * even in interactive mode. + * In fact you MUST not revert side effects if this is FALSE + * as the data no longer exists on the call stack at undo-time. */ gboolean must_undo; }; @@ -296,7 +305,7 @@ teco_machine_reset(teco_machine_t *ctx, teco_state_t *initial) teco_undo_ptr(ctx->current) = initial; } -gboolean teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error); +gboolean teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error); typedef enum { TECO_STRINGBUILDING_MODE_NORMAL = 0, @@ -307,9 +316,6 @@ typedef enum { /** * A stringbuilding state machine. * - * @fixme Should contain the escape char (currently in teco_machine_expectstring_t), - * so that we can escape it via ^Q. - * * @extends teco_machine_t */ typedef struct teco_machine_stringbuilding_t { @@ -327,7 +333,7 @@ typedef struct teco_machine_stringbuilding_t { * If this is `[` or `{`, it is assumed that `]` and `}` must * be escaped as well by teco_machine_stringbuilding_escape(). */ - gchar escape_char; + gunichar escape_char; /** * Q-Register table for local registers. @@ -348,11 +354,28 @@ typedef struct teco_machine_stringbuilding_t { * (see teco_state_stringbuilding_start_process_edit_cmd()). */ teco_string_t *result; + + /** + * Encoding of string in `result`. + * This is inherited from the embedding command and may depend on + * the buffer's or Q-Register's encoding. + */ + guint codepage; } teco_machine_stringbuilding_t; -void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char, +void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char, teco_qreg_table_t *locals, gboolean must_undo); +static inline void +teco_machine_stringbuilding_set_codepage(teco_machine_stringbuilding_t *ctx, + guint codepage) +{ + /* NOTE: This is not safe to undo in macro calls. */ + if (ctx->parent.must_undo) + teco_undo_guint(ctx->codepage); + ctx->codepage = codepage; +} + void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx); /** @@ -365,7 +388,7 @@ void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx); * @return FALSE in case of error. */ static inline gboolean -teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gchar chr, +teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gunichar chr, teco_string_t *result, GError **error) { ctx->result = result; @@ -424,7 +447,8 @@ typedef enum { struct teco_machine_main_t { teco_machine_t parent; - gint macro_pc; + /* signed because it is sometimes set to -1 for flow control */ + gssize macro_pc; /** * Aliases bitfield with an integer. @@ -435,8 +459,8 @@ struct teco_machine_main_t { struct { teco_mode_t mode : 8; - gboolean modifier_colon : 1; - gboolean modifier_at : 1; + bool modifier_colon : 1; + bool modifier_at : 1; }; guint __flags; }; @@ -481,7 +505,7 @@ void teco_machine_main_init(teco_machine_main_t *ctx, gboolean teco_machine_main_eval_colon(teco_machine_main_t *ctx); gboolean teco_machine_main_step(teco_machine_main_t *ctx, - const gchar *macro, gint stop_pos, GError **error); + const gchar *macro, gsize stop_pos, GError **error); gboolean teco_execute_macro(const gchar *macro, gsize macro_len, teco_qreg_table_t *qreg_table_locals, GError **error); @@ -500,17 +524,18 @@ typedef const struct { */ teco_state_t *teco_machine_main_transition_input(teco_machine_main_t *ctx, teco_machine_main_transition_t *transitions, - guint len, gchar chr, GError **error); + guint len, gunichar chr, GError **error); void teco_machine_main_clear(teco_machine_main_t *ctx); G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(teco_machine_main_t, teco_machine_main_clear); -teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error); +gboolean teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error); +teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error); gboolean teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error); /* in cmdline.c */ -gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTSTRING @@ -526,15 +551,16 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco */ #define TECO_DEFINE_STATE_EXPECTSTRING(NAME, ...) \ static teco_state_t * \ - NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \ + NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \ { \ return teco_state_expectstring_input(ctx, chr, error); \ } \ TECO_DEFINE_STATE(NAME, \ + .initial_cb = (teco_state_initial_cb_t)teco_state_expectstring_initial, \ .refresh_cb = (teco_state_refresh_cb_t)teco_state_expectstring_refresh, \ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ teco_state_expectstring_process_edit_cmd, \ - .fnmacro_mask = TECO_FNMACRO_MASK_STRING, \ + .keymacro_mask = TECO_KEYMACRO_MASK_STRING, \ .expectstring.string_building = TRUE, \ .expectstring.last = TRUE, \ .expectstring.process_cb = NULL, /* do nothing */ \ @@ -546,7 +572,7 @@ gboolean teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_stri gsize new_chars, GError **error); /* in cmdline.c */ -gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTFILE @@ -562,7 +588,7 @@ gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_m ) /* in cmdline.c */ -gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTDIR diff --git a/src/qreg-commands.c b/src/qreg-commands.c index be0aada..cff4c84 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -50,7 +50,7 @@ teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error) } teco_state_t * -teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { teco_state_t *current = ctx->parent.current; @@ -149,7 +149,7 @@ teco_state_loadqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr if (str->len > 0) { /* Load file into Q-Register */ g_autofree gchar *filename = teco_file_expand_path(str->data); - if (!teco_qreg_load(qreg, filename, error)) + if (!qreg->vtable->load(qreg, filename, error)) return NULL; } else { /* Edit Q-Register */ @@ -202,7 +202,7 @@ teco_state_saveqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr return &teco_state_start; g_autofree gchar *filename = teco_file_expand_path(str->data); - return teco_qreg_save(qreg, filename, error) ? &teco_state_start : NULL; + return qreg->vtable->save(qreg, filename, error) ? &teco_state_start : NULL; } /*$ E% E%q @@ -259,9 +259,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, if (teco_machine_main_eval_colon(ctx)) { /* Query Q-Register's existence or string size */ if (qreg) { - gsize len; - - if (!qreg->vtable->get_string(qreg, NULL, &len, error)) + /* get_string() would return the size in bytes */ + teco_int_t len = qreg->vtable->get_length(qreg, error); + if (len < 0) return NULL; teco_expressions_push(len); } else { @@ -281,10 +281,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, return NULL; } - gint c = qreg->vtable->get_character(qreg, pos, error); - if (c < 0) + teco_int_t c; + if (!qreg->vtable->get_character(qreg, pos, &c, error)) return NULL; - teco_expressions_push(c); } else { /* Query integer */ @@ -311,6 +310,10 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, * Positions are handled like buffer positions \(em they * begin at 0 up to the length of the string minus 1. * An error is thrown for invalid positions. + * If <q> is encoded as UTF-8 and there is + * an incomplete sequence at the requested position, + * -1 is returned. + * All other invalid Unicode sequences are returned as -2. * Both non-colon-modified forms of Q require register <q> * to be defined and fail otherwise. * @@ -369,24 +372,50 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, gint args = teco_expressions_args(); if (args > 0) { - g_autofree gchar *buffer = g_malloc(args); + guint codepage = teco_default_codepage(); + if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) + return NULL; - for (gint i = args; i > 0; i--) { - teco_int_t v; - if (!teco_expressions_pop_num_calc(&v, 0, error)) - return NULL; - buffer[i-1] = (gchar)v; + g_autofree gchar *buffer = NULL; + gsize len = 0; + + if (codepage == SC_CP_UTF8) { + /* the glib docs wrongly claim that one character can take 6 bytes */ + buffer = g_malloc(4*args); + for (gint i = args; i > 0; i--) { + teco_int_t v; + if (!teco_expressions_pop_num_calc(&v, 0, error)) + return NULL; + if (v < 0 || !g_unichar_validate(v)) { + teco_error_codepoint_set(error, "^U"); + return NULL; + } + len += g_unichar_to_utf8(v, buffer+len); + } + } else { + buffer = g_malloc(args); + for (gint i = args; i > 0; i--) { + teco_int_t v; + if (!teco_expressions_pop_num_calc(&v, 0, error)) + return NULL; + if (v < 0 || v > 0xFF) { + teco_error_codepoint_set(error, "^U"); + return NULL; + } + buffer[len++] = v; + } } if (colon_modified) { /* append to register */ if (!qreg->vtable->undo_append_string(qreg, error) || - !qreg->vtable->append_string(qreg, buffer, args, error)) + !qreg->vtable->append_string(qreg, buffer, len, error)) return NULL; } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, buffer, args, error)) + !qreg->vtable->set_string(qreg, buffer, len, + codepage, error)) return NULL; } } @@ -399,7 +428,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str->data, str->len, error)) + !qreg->vtable->set_string(qreg, str->data, str->len, + teco_default_codepage(), error)) return NULL; } @@ -450,6 +480,26 @@ TECO_DEFINE_STATE_EXPECTQREG(teco_state_eucommand, .expectqreg.type = TECO_QREG_OPTIONAL_INIT ); +static gboolean +teco_state_setqregstring_building_initial(teco_machine_main_t *ctx, GError **error) +{ + if (ctx->mode > TECO_MODE_NORMAL) + return TRUE; + + teco_qreg_t *qreg; + teco_machine_qregspec_get_results(ctx->expectqreg, &qreg, NULL); + + /* + * The expected codepage of string building constructs is determined + * by the Q-Register. + */ + guint codepage; + if (!qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) + return FALSE; + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, codepage); + return TRUE; +} + static teco_state_t * teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) { @@ -467,6 +517,7 @@ teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_stri * characters \fBenabled\fP. */ TECO_DEFINE_STATE_EXPECTSTRING(teco_state_setqregstring_building, + .initial_cb = (teco_state_initial_cb_t)teco_state_setqregstring_building_initial, .expectstring.string_building = TRUE ); @@ -481,7 +532,7 @@ teco_state_getqregstring_got_register(teco_machine_main_t *ctx, teco_qreg_t *qre g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; if (str.len > 0) { @@ -604,8 +655,15 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, } else { g_auto(teco_qreg_table_t) table; teco_qreg_table_init(&table, FALSE); + if (!teco_qreg_execute(qreg, &table, error)) return NULL; + if (teco_qreg_current && !teco_qreg_current->must_undo) { + /* currently editing local Q-Register */ + teco_error_editinglocalqreg_set(error, teco_qreg_current->head.name.data, + teco_qreg_current->head.name.len); + return NULL; + } } return &teco_state_start; @@ -632,6 +690,10 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, * Note that the string of <q> will be copied upon macro execution, * so subsequent changes to Q-Register <q> from inside the macro do * not modify the executed code. + * + * While \fBM\fP does not check the register's configured encoding + * (as reported by \fBEE\fP), its contents must be and are checked to be in + * valid UTF-8. */ TECO_DEFINE_STATE_EXPECTQREG(teco_state_macro); @@ -666,6 +728,9 @@ teco_state_macrofile_done(teco_machine_main_t *ctx, const teco_string_t *str, GE * It is otherwise similar to the \(lqM\(rq command. * * If <file> could not be read, the command yields an error. + * + * As all \*(ST code, the contents of <file> must be in valid UTF-8 + * even if operating in the \(lqdefault ANSI\(rq mode as configured by \fBED\fP. */ TECO_DEFINE_STATE_EXPECTFILE(teco_state_macrofile); @@ -678,7 +743,7 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, if (ctx->mode > TECO_MODE_NORMAL) return &teco_state_start; - teco_int_t from, len; + gssize from, len; /* in bytes */ if (!teco_expressions_eval(FALSE, error)) return NULL; @@ -702,32 +767,37 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, len *= -1; } } else { - teco_int_t to = teco_expressions_pop_num(0); - from = teco_expressions_pop_num(0); - + gssize to = teco_interface_glyphs2bytes(teco_expressions_pop_num(0)); + from = teco_interface_glyphs2bytes(teco_expressions_pop_num(0)); len = to - from; - if (len < 0 || !teco_validate_pos(from) || !teco_validate_pos(to)) { + if (len < 0 || from < 0 || to < 0) { teco_error_range_set(error, "X"); return NULL; } } + /* + * NOTE: This does not use SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION + * since it may not be safe when copying from register to register. + */ g_autofree gchar *str = g_malloc(len + 1); - struct Sci_TextRange text_range = { - .chrg = {.cpMin = from, .cpMax = from + len}, + struct Sci_TextRangeFull range = { + .chrg = {from, from + len}, .lpstrText = str }; - teco_interface_ssm(SCI_GETTEXTRANGE, 0, (sptr_t)&text_range); + teco_interface_ssm(SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); if (teco_machine_main_eval_colon(ctx)) { if (!qreg->vtable->undo_append_string(qreg, error) || !qreg->vtable->append_string(qreg, str, len, error)) return NULL; } else { + guint cp = teco_interface_get_codepage(); + if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str, len, error)) + !qreg->vtable->set_string(qreg, str, len, cp, error)) return NULL; } diff --git a/src/qreg-commands.h b/src/qreg-commands.h index 6a41fc5..27a6a5c 100644 --- a/src/qreg-commands.h +++ b/src/qreg-commands.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -33,10 +33,10 @@ teco_state_expectqreg_reset(teco_machine_main_t *ctx) gboolean teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error); -teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error); +teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error); /* in cmdline.c */ -gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /** * @interface TECO_DEFINE_STATE_EXPECTQREG @@ -47,7 +47,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m */ #define TECO_DEFINE_STATE_EXPECTQREG(NAME, ...) \ static teco_state_t * \ - NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \ + NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \ { \ return teco_state_expectqreg_input(ctx, chr, error); \ } \ @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,6 +18,7 @@ #include "config.h" #endif +#include <stdbool.h> #include <string.h> #include <glib.h> @@ -82,7 +83,12 @@ teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_locals, GErro { g_auto(teco_string_t) macro = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, ¯o.data, ¯o.len, error) || + /* + * SciTECO macros must be in UTF-8, but we don't check the encoding, + * so as not to complicate TECO_ED_DEFAULT_ANSI mode. + * The UTF-8 byte sequences are checked anyway. + */ + if (!qreg->vtable->get_string(qreg, ¯o.data, ¯o.len, NULL, error) || !teco_execute_macro(macro.data, macro.len, qreg_table_locals, error)) { teco_error_add_frame_qreg(qreg->head.name.data, qreg->head.name.len); return FALSE; @@ -120,65 +126,11 @@ teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_view_ssm(teco_qreg_view, SCI_SETEOLMODE, mode, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); -} - -/** @memberof teco_qreg_t */ -gboolean -teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error) -{ - if (!qreg->vtable->undo_set_string(qreg, error)) - return FALSE; - - if (teco_qreg_current) - teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - - teco_doc_edit(&qreg->string); - teco_doc_reset(&qreg->string); - - /* - * teco_view_load() might change the EOL style. - */ - teco_qreg_undo_set_eol_mode(qreg); - - /* - * undo_set_string() pushes undo tokens that restore - * the previous document in the view. - * So if loading fails, teco_qreg_current will be - * made the current document again. - */ - if (!teco_view_load(teco_qreg_view, filename, error)) - return FALSE; - - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - - return TRUE; -} - -/** @memberof teco_qreg_t */ -gboolean -teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error) -{ - if (teco_qreg_current) - teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - - teco_doc_edit(&qreg->string); - - if (!teco_view_save(teco_qreg_view, filename, error)) { - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - return FALSE; - } - - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - - return TRUE; + teco_doc_edit(&teco_qreg_current->string, 0); } static gboolean @@ -204,9 +156,10 @@ teco_qreg_plain_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **error) } static gboolean -teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { - teco_doc_set_string(&qreg->string, str, len); + teco_doc_set_string(&qreg->string, str, len, codepage); return TRUE; } @@ -231,43 +184,64 @@ teco_qreg_plain_append_string(teco_qreg_t *qreg, const gchar *str, gsize len, GE if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)str); teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return TRUE; } static gboolean -teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { - teco_doc_get_string(&qreg->string, str, len); + teco_doc_get_string(&qreg->string, str, len, codepage); return TRUE; } -static gint -teco_qreg_plain_get_character(teco_qreg_t *qreg, guint position, GError **error) +static gboolean +teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position, + teco_int_t *chr, GError **error) { - gint ret = -1; - if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); - if (position < teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0)) - ret = teco_view_ssm(teco_qreg_view, SCI_GETCHARAT, position, 0); - else + sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); + gssize off = teco_view_glyphs2bytes(teco_qreg_view, position); + + gboolean ret = off >= 0 && off != len; + if (!ret) g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); + "Position %" TECO_INT_FORMAT " out of range", position); /* make sure we still restore the current Q-Register */ + else + *chr = teco_view_get_character(teco_qreg_view, off, len); + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return ret; +} + +static teco_int_t +teco_qreg_plain_get_length(teco_qreg_t *qreg, GError **error) +{ + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + + sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); + teco_int_t ret = teco_view_bytes2glyphs(teco_qreg_view, len); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return ret; } @@ -294,7 +268,7 @@ teco_qreg_plain_edit(teco_qreg_t *qreg, GError **error) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_interface_show_view(teco_qreg_view); teco_interface_info_update(qreg); @@ -319,6 +293,58 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error) return TRUE; } +static gboolean +teco_qreg_plain_load(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + if (!qreg->vtable->undo_set_string(qreg, error)) + return FALSE; + + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + teco_doc_reset(&qreg->string); + + /* + * teco_view_load() might change the EOL style. + */ + teco_qreg_undo_set_eol_mode(qreg); + + /* + * undo_set_string() pushes undo tokens that restore + * the previous document in the view. + * So if loading fails, teco_qreg_current will be + * made the current document again. + */ + if (!teco_view_load(teco_qreg_view, filename, error)) + return FALSE; + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return TRUE; +} + +static gboolean +teco_qreg_plain_save(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + + gboolean ret = teco_view_save(teco_qreg_view, filename, error); + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return ret; +} + +/** + * Initializer for vtables of Q-Registers with "plain" storage of strings. + * These store their string part as teco_docs. + */ #define TECO_INIT_QREG(...) { \ .set_integer = teco_qreg_plain_set_integer, \ .undo_set_integer = teco_qreg_plain_undo_set_integer, \ @@ -329,10 +355,13 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error) .undo_append_string = teco_qreg_plain_undo_set_string, \ .get_string = teco_qreg_plain_get_string, \ .get_character = teco_qreg_plain_get_character, \ + .get_length = teco_qreg_plain_get_length, \ .exchange_string = teco_qreg_plain_exchange_string, \ .undo_exchange_string = teco_qreg_plain_undo_exchange_string, \ .edit = teco_qreg_plain_edit, \ .undo_edit = teco_qreg_plain_undo_edit, \ + .load = teco_qreg_plain_load, \ + .save = teco_qreg_plain_save, \ ##__VA_ARGS__ \ } @@ -345,6 +374,150 @@ teco_qreg_plain_new(const gchar *name, gsize len) return teco_qreg_new(&vtable, name, len); } +static gboolean +teco_qreg_external_edit(teco_qreg_t *qreg, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + if (!teco_qreg_plain_edit(qreg, error) || + !qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return FALSE; + + teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); + teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); + + undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); + return TRUE; +} + +static gboolean +teco_qreg_external_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) +{ + g_auto(teco_string_t) other_str, own_str = {NULL, 0}; + guint other_cp, own_cp; + + teco_doc_get_string(src, &other_str.data, &other_str.len, &other_cp); + + if (!qreg->vtable->get_string(qreg, &own_str.data, &own_str.len, &own_cp, error) || + !qreg->vtable->set_string(qreg, other_str.data, other_str.len, other_cp, error)) + return FALSE; + + teco_doc_set_string(src, own_str.data, own_str.len, own_cp); + return TRUE; +} + +static gboolean +teco_qreg_external_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) +{ + if (!qreg->vtable->undo_set_string(qreg, error)) + return FALSE; + if (qreg->must_undo) // FIXME + teco_doc_undo_set_string(src); + return TRUE; +} + +static gboolean +teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position, + teco_int_t *chr, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return FALSE; + + if (position < 0 || position >= g_utf8_strlen(str.data, str.len)) { + g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, + "Position %" TECO_INT_FORMAT " out of range", position); + return FALSE; + } + const gchar *p = g_utf8_offset_to_pointer(str.data, position); + + /* + * Make sure that the -1/-2 error values are preserved. + * The sign bit in UCS-4/UTF-32 is unused, so this will even + * suffice if TECO_INTEGER == 32. + */ + *chr = (gint32)g_utf8_get_char_validated(p, -1); + return TRUE; +} + +static teco_int_t +teco_qreg_external_get_length(teco_qreg_t *qreg, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return -1; + + return g_utf8_strlen(str.data, str.len); +} + +/* + * NOTE: This does not perform EOL normalization unlike teco_view_load(). + * It shouldn't be critical since "external" registers are mainly used for filenames. + * Otherwise we could of course load into the view() and call set_string() afterwards. + */ +static gboolean +teco_qreg_external_load(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + g_auto(teco_string_t) str = {NULL, 0}; + + return g_file_get_contents(filename, &str.data, &str.len, error) && + qreg->vtable->undo_set_string(qreg, error) && + qreg->vtable->set_string(qreg, str.data, str.len, teco_default_codepage(), error); +} + +/* + * NOTE: This does not simply use g_file_set_contents(), as we have to create + * save point files as well. + * FIXME: On the other hand, this does not set the correct EOL style on the document, + * so teco_view_save() will save only with the default EOL style. + * It might therefore still be a good idea to avoid any conversion. + */ +static gboolean +teco_qreg_external_save(teco_qreg_t *qreg, const gchar *filename, GError **error) +{ + if (teco_qreg_current) + teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + + teco_doc_edit(&qreg->string, teco_default_codepage()); + + g_auto(teco_string_t) str = {NULL, 0}; + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) + return FALSE; + + teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); + teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); + teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); + + undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); + + gboolean ret = teco_view_save(teco_qreg_view, filename, error); + + if (teco_qreg_current) + teco_doc_edit(&teco_qreg_current->string, 0); + + return ret; +} + +/** + * Initializer for vtables of Q-Registers with "external" storage of strings. + * These rely on custom implementations of get_string() and set_string(). + */ +#define TECO_INIT_QREG_EXTERNAL(...) TECO_INIT_QREG( \ + .exchange_string = teco_qreg_external_exchange_string, \ + .undo_exchange_string = teco_qreg_external_undo_exchange_string, \ + .edit = teco_qreg_external_edit, \ + .get_character = teco_qreg_external_get_character, \ + .get_length = teco_qreg_external_get_length, \ + .load = teco_qreg_external_load, \ + .save = teco_qreg_external_save, \ + ##__VA_ARGS__ \ +) + /* * NOTE: The integer-component is currently unused on the "*" special register. */ @@ -368,11 +541,12 @@ teco_qreg_bufferinfo_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **er } /* - * FIXME: These operations can and should be implemented. - * Setting the "*" register could for instance rename the file. + * FIXME: Something could be implemented here. There are 2 possibilities: + * Either it renames the current buffer, or opens a file (alternative to EB). */ static gboolean -teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { teco_error_qregopunsupported_set(error, qreg->head.name.data, qreg->head.name.len, FALSE); return FALSE; @@ -401,7 +575,8 @@ teco_qreg_bufferinfo_undo_append_string(teco_qreg_t *qreg, GError **error) * NOTE: The `string` component is currently unused on the "*" register. */ static gboolean -teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { /* * On platforms with a default non-forward-slash directory @@ -416,43 +591,8 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr * NOTE: teco_file_normalize_path() does not change the size of the string. */ *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0; - return TRUE; -} - -static gint -teco_qreg_bufferinfo_get_character(teco_qreg_t *qreg, guint position, GError **error) -{ - gsize max_len; - - if (!teco_qreg_bufferinfo_get_string(qreg, NULL, &max_len, error)) - return -1; - - if (position >= max_len) { - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); - return -1; - } - - return teco_ring_current->filename[position]; -} - -static gboolean -teco_qreg_bufferinfo_edit(teco_qreg_t *qreg, GError **error) -{ - if (!teco_qreg_plain_edit(qreg, error)) - return FALSE; - - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_bufferinfo_get_string(qreg, &str.data, &str.len, error)) - return FALSE; - - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); - - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); + if (codepage) + *codepage = teco_default_codepage(); return TRUE; } @@ -460,7 +600,7 @@ teco_qreg_bufferinfo_edit(teco_qreg_t *qreg, GError **error) teco_qreg_t * teco_qreg_bufferinfo_new(void) { - static teco_qreg_vtable_t vtable = TECO_INIT_QREG( + static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL( .set_integer = teco_qreg_bufferinfo_set_integer, .undo_set_integer = teco_qreg_bufferinfo_undo_set_integer, .get_integer = teco_qreg_bufferinfo_get_integer, @@ -469,15 +609,22 @@ teco_qreg_bufferinfo_new(void) .append_string = teco_qreg_bufferinfo_append_string, .undo_append_string = teco_qreg_bufferinfo_undo_append_string, .get_string = teco_qreg_bufferinfo_get_string, - .get_character = teco_qreg_bufferinfo_get_character, - .edit = teco_qreg_bufferinfo_edit + /* + * As teco_qreg_bufferinfo_set_string() is not implemented, + * it's important to not inherit teco_qreg_external_exchange_string(). + * `[*` and `]*` will still work though. + * The inherited teco_qreg_external_load() will simply fail. + */ + .exchange_string = teco_qreg_plain_exchange_string, + .undo_exchange_string = teco_qreg_plain_undo_exchange_string ); return teco_qreg_new(&vtable, "*", 1); } static gboolean -teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { /* * NOTE: Makes sure that `dir` will be null-terminated as str[len] may not be '\0'. @@ -528,7 +675,8 @@ teco_qreg_workingdir_undo_append_string(teco_qreg_t *qreg, GError **error) } static gboolean -teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { /* * On platforms with a default non-forward-slash directory @@ -545,84 +693,22 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr *str = teco_file_normalize_path(dir); else g_free(dir); + if (codepage) + *codepage = teco_default_codepage(); return TRUE; } -static gint -teco_qreg_workingdir_get_character(teco_qreg_t *qreg, guint position, GError **error) -{ - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_workingdir_get_string(qreg, &str.data, &str.len, error)) - return -1; - - if (position >= str.len) { - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); - return -1; - } - - return str.data[position]; -} - -static gboolean -teco_qreg_workingdir_edit(teco_qreg_t *qreg, GError **error) -{ - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_plain_edit(qreg, error) || - !teco_qreg_workingdir_get_string(qreg, &str.data, &str.len, error)) - return FALSE; - - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_ADDTEXT, str.len, (sptr_t)str.data); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); - - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); - return TRUE; -} - -static gboolean -teco_qreg_workingdir_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) -{ - g_auto(teco_string_t) other_str, own_str = {NULL, 0}; - - teco_doc_get_string(src, &other_str.data, &other_str.len); - - if (!teco_qreg_workingdir_get_string(qreg, &own_str.data, &own_str.len, error) || - /* FIXME: Why is teco_qreg_plain_set_string() sufficient? */ - !teco_qreg_plain_set_string(qreg, other_str.data, other_str.len, error)) - return FALSE; - - teco_doc_set_string(src, own_str.data, own_str.len); - return TRUE; -} - -static gboolean -teco_qreg_workingdir_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) -{ - teco_undo_change_dir_to_current(); - if (qreg->must_undo) // FIXME - teco_doc_undo_set_string(src); - return TRUE; -} - /** @static @memberof teco_qreg_t */ teco_qreg_t * teco_qreg_workingdir_new(void) { - static teco_qreg_vtable_t vtable = TECO_INIT_QREG( + static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL( .set_string = teco_qreg_workingdir_set_string, .undo_set_string = teco_qreg_workingdir_undo_set_string, .append_string = teco_qreg_workingdir_append_string, .undo_append_string = teco_qreg_workingdir_undo_append_string, - .get_string = teco_qreg_workingdir_get_string, - .get_character = teco_qreg_workingdir_get_character, - .edit = teco_qreg_workingdir_edit, - .exchange_string = teco_qreg_workingdir_exchange_string, - .undo_exchange_string = teco_qreg_workingdir_undo_exchange_string + .get_string = teco_qreg_workingdir_get_string ); /* @@ -639,7 +725,8 @@ teco_qreg_workingdir_new(void) } static gboolean -teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { g_assert(!teco_string_contains(&qreg->head.name, '\0')); const gchar *clipboard_name = qreg->head.name.data + 1; @@ -724,7 +811,8 @@ teco_qreg_clipboard_undo_set_string(teco_qreg_t *qreg, GError **error) } static gboolean -teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { g_assert(!teco_string_contains(&qreg->head.name, '\0')); const gchar *clipboard_name = qreg->head.name.data + 1; @@ -756,93 +844,41 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErro else teco_string_clear(&str_converted); *len = str_converted.len; + if (codepage) + *codepage = teco_default_codepage(); return TRUE; } -static gint -teco_qreg_clipboard_get_character(teco_qreg_t *qreg, guint position, GError **error) -{ - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_clipboard_get_string(qreg, &str.data, &str.len, error)) - return -1; - - if (position >= str.len) { - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %u out of range", position); - return -1; - } - - return str.data[position]; -} - -static gboolean -teco_qreg_clipboard_edit(teco_qreg_t *qreg, GError **error) -{ - if (!teco_qreg_plain_edit(qreg, error)) - return FALSE; - - g_auto(teco_string_t) str = {NULL, 0}; - - if (!teco_qreg_clipboard_get_string(qreg, &str.data, &str.len, error)) - return FALSE; - - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, str.len, (sptr_t)str.data); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); - - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); - return TRUE; -} - /* - * FIXME: Very similar to teco_qreg_workingdir_exchange_string(). + * Regardless of whether EOL normalization is enabled, + * this will never perform it. + * Other than that, it's very similar to teco_qreg_external_load(). */ static gboolean -teco_qreg_clipboard_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) +teco_qreg_clipboard_load(teco_qreg_t *qreg, const gchar *filename, GError **error) { - g_auto(teco_string_t) other_str, own_str = {NULL, 0}; - - teco_doc_get_string(src, &other_str.data, &other_str.len); - - if (!teco_qreg_clipboard_get_string(qreg, &own_str.data, &own_str.len, error) || - /* FIXME: Why is teco_qreg_plain_set_string() sufficient? */ - !teco_qreg_plain_set_string(qreg, other_str.data, other_str.len, error)) - return FALSE; + g_assert(!teco_string_contains(&qreg->head.name, '\0')); + const gchar *clipboard_name = qreg->head.name.data + 1; - teco_doc_set_string(src, own_str.data, own_str.len); - return TRUE; -} + g_auto(teco_string_t) str = {NULL, 0}; -/* - * FIXME: Very similar to teco_qreg_workingdir_undo_exchange_string(). - */ -static gboolean -teco_qreg_clipboard_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) -{ - if (!teco_qreg_clipboard_undo_set_string(qreg, error)) - return FALSE; - if (qreg->must_undo) // FIXME - teco_doc_undo_set_string(src); - return TRUE; + return g_file_get_contents(filename, &str.data, &str.len, error) && + teco_qreg_clipboard_undo_set_string(qreg, error) && + teco_interface_set_clipboard(clipboard_name, str.data, str.len, error); } /** @static @memberof teco_qreg_t */ teco_qreg_t * teco_qreg_clipboard_new(const gchar *name) { - static teco_qreg_vtable_t vtable = TECO_INIT_QREG( + static teco_qreg_vtable_t vtable = TECO_INIT_QREG_EXTERNAL( .set_string = teco_qreg_clipboard_set_string, .undo_set_string = teco_qreg_clipboard_undo_set_string, .append_string = teco_qreg_clipboard_append_string, .undo_append_string = teco_qreg_clipboard_undo_append_string, .get_string = teco_qreg_clipboard_get_string, - .get_character = teco_qreg_clipboard_get_character, - .edit = teco_qreg_clipboard_edit, - .exchange_string = teco_qreg_clipboard_exchange_string, - .undo_exchange_string = teco_qreg_clipboard_undo_exchange_string + .load = teco_qreg_clipboard_load ); teco_qreg_t *qreg = teco_qreg_new(&vtable, "~", 1); @@ -939,7 +975,8 @@ teco_qreg_table_set_environ(teco_qreg_table_t *table, GError **error) qreg = found; } - if (!qreg->vtable->set_string(qreg, value, strlen(value), error)) + if (!qreg->vtable->set_string(qreg, value, strlen(value), + teco_default_codepage(), error)) return FALSE; } @@ -994,7 +1031,7 @@ teco_qreg_table_get_environ(teco_qreg_table_t *table, GError **error) continue; g_auto(teco_string_t) value = {NULL, 0}; - if (!cur->vtable->get_string(cur, &value.data, &value.len, error)) { + if (!cur->vtable->get_string(cur, &value.data, &value.len, NULL, error)) { g_strfreev(envp); return NULL; } @@ -1088,12 +1125,13 @@ teco_qreg_stack_push(teco_qreg_t *qreg, GError **error) { teco_qreg_stack_entry_t entry; g_auto(teco_string_t) string = {NULL, 0}; + guint codepage; if (!qreg->vtable->get_integer(qreg, &entry.integer, error) || - !qreg->vtable->get_string(qreg, &string.data, &string.len, error)) + !qreg->vtable->get_string(qreg, &string.data, &string.len, &codepage, error)) return FALSE; teco_doc_init(&entry.string); - teco_doc_set_string(&entry.string, string.data, string.len); + teco_doc_set_string(&entry.string, string.data, string.len, codepage); teco_doc_update(&entry.string, &qreg->string); /* pass ownership of entry to teco_qreg_stack */ @@ -1196,6 +1234,12 @@ teco_ed_hook(teco_ed_hook_t type, GError **error) if (!teco_qreg_execute(qreg, &locals, error)) goto error_add_frame; + if (teco_qreg_current && !teco_qreg_current->must_undo) { + /* currently editing local Q-Register */ + teco_error_editinglocalqreg_set(error, teco_qreg_current->head.name.data, + teco_qreg_current->head.name.len); + goto error_add_frame; + } return teco_expressions_discard_args(error) && teco_expressions_brace_close(error); @@ -1225,7 +1269,7 @@ struct teco_machine_qregspec_t { union { struct { teco_qreg_type_t type : 8; - gboolean parse_only : 1; + bool parse_only : 1; }; guint __flags; }; @@ -1255,7 +1299,7 @@ TECO_DECLARE_STATE(teco_state_qregspec_secondchar); TECO_DECLARE_STATE(teco_state_qregspec_string); static teco_state_t *teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, - gchar chr, GError **error); + gunichar chr, GError **error); static teco_state_t * teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error) @@ -1290,7 +1334,7 @@ teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error) } static teco_state_t * -teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: We're using teco_state_qregspec_start as a success condition, @@ -1307,7 +1351,7 @@ teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError } /* in cmdline.c */ -gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); TECO_DEFINE_STATE(teco_state_qregspec_start, .is_start = TRUE, @@ -1315,7 +1359,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start, ); static teco_state_t * -teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: Disallow space characters? @@ -1334,7 +1378,7 @@ teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr, if (!ctx->parse_only) { if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->name, ctx->name.len); - teco_string_append_c(&ctx->name, g_ascii_toupper(chr)); + teco_string_append_wc(&ctx->name, g_unichar_toupper(chr)); } return teco_state_qregspec_done(ctx, error); } @@ -1350,7 +1394,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start_global, ); static teco_state_t * -teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: Disallow space characters? @@ -1358,7 +1402,7 @@ teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GEr if (!ctx->parse_only) { if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->name, ctx->name.len); - teco_string_append_c(&ctx->name, g_ascii_toupper(chr)); + teco_string_append_wc(&ctx->name, g_unichar_toupper(chr)); } return &teco_state_qregspec_secondchar; } @@ -1368,7 +1412,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_firstchar, ); static teco_state_t * -teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * FIXME: Disallow space characters? @@ -1376,7 +1420,7 @@ teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GE if (!ctx->parse_only) { if (ctx->parent.must_undo) undo__teco_string_truncate(&ctx->name, ctx->name.len); - teco_string_append_c(&ctx->name, g_ascii_toupper(chr)); + teco_string_append_wc(&ctx->name, g_unichar_toupper(chr)); } return teco_state_qregspec_done(ctx, error); } @@ -1386,7 +1430,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_secondchar, ); static teco_state_t * -teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error) +teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error) { /* * Makes sure that braces within string building constructs do not have to be @@ -1427,7 +1471,7 @@ teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError /* in cmdline.c */ gboolean teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, - gchar key, GError **error); + gunichar key, GError **error); TECO_DEFINE_STATE(teco_state_qregspec_string, .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_qregspec_string_process_edit_cmd @@ -1488,7 +1532,7 @@ teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx) * @memberof teco_machine_qregspec_t */ teco_machine_qregspec_status_t -teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr, +teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr, teco_qreg_t **result, teco_qreg_table_t **result_table, GError **error) { ctx->parse_only = result == NULL; @@ -1516,7 +1560,7 @@ teco_machine_qregspec_get_results(teco_machine_qregspec_t *ctx, gboolean teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t *insert) { - gsize restrict_len = 0; + guint restrict_len = 0; /* * NOTE: We could have separate process_edit_cmd_cb() for @@ -1531,6 +1575,10 @@ teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t /* two-letter Q-Reg */ restrict_len = 2; + /* + * FIXME: This is not quite right as it will propose even + * lower case single or two-letter Q-Register names. + */ return teco_rb3str_auto_complete(&ctx->result_table->tree, !restrict_len, ctx->name.data, ctx->name.len, restrict_len, insert) && ctx->nesting == 1; @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,19 +41,26 @@ extern teco_view_t *teco_qreg_view; * FIXME: Use TECO_DECLARE_VTABLE_METHOD(gboolean, teco_qreg, set_integer, teco_qreg_t *, teco_int_t, GError **); * ... * teco_qreg_set_integer_t set_integer; + * ... + * teco_qreg_set_integer(qreg, 23, error); */ typedef const struct { gboolean (*set_integer)(teco_qreg_t *qreg, teco_int_t value, GError **error); gboolean (*undo_set_integer)(teco_qreg_t *qreg, GError **error); gboolean (*get_integer)(teco_qreg_t *qreg, teco_int_t *ret, GError **error); - gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error); + gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error); gboolean (*undo_set_string)(teco_qreg_t *qreg, GError **error); gboolean (*append_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error); gboolean (*undo_append_string)(teco_qreg_t *qreg, GError **error); - gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error); - gint (*get_character)(teco_qreg_t *qreg, guint position, GError **error); + gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error); + gboolean (*get_character)(teco_qreg_t *qreg, teco_int_t position, + teco_int_t *chr, GError **error); + /* always returns length in glyphs in contrast to get_string() */ + teco_int_t (*get_length)(teco_qreg_t *qreg, GError **error); /* * These callbacks exist only to optimize teco_qreg_stack_push|pop() @@ -65,6 +72,13 @@ typedef const struct { gboolean (*edit)(teco_qreg_t *qreg, GError **error); gboolean (*undo_edit)(teco_qreg_t *qreg, GError **error); + + /* + * Load and save already care about undo token + * creation. + */ + gboolean (*load)(teco_qreg_t *qreg, const gchar *filename, GError **error); + gboolean (*save)(teco_qreg_t *qreg, const gchar *filename, GError **error); } teco_qreg_vtable_t; /** @extends teco_rb3str_head_t */ @@ -106,13 +120,6 @@ gboolean teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_loca void teco_qreg_undo_set_eol_mode(teco_qreg_t *qreg); void teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode); -/* - * Load and save already care about undo token - * creation. - */ -gboolean teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error); -gboolean teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error); - /** @memberof teco_qreg_t */ static inline void teco_qreg_free(teco_qreg_t *qreg) @@ -220,7 +227,7 @@ void teco_machine_qregspec_reset(teco_machine_qregspec_t *ctx); */ struct teco_machine_stringbuilding_t *teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx); -teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr, +teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr, teco_qreg_t **result, teco_qreg_table_t **result_table, GError **error); diff --git a/src/rb3str.c b/src/rb3str.c index 889c52e..d51ac5d 100644 --- a/src/rb3str.c +++ b/src/rb3str.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -95,7 +95,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar * @param case_sensitive Whether to match case-sensitive. * @param str String to complete (not necessarily null-terminated). * @param str_len Length of characters in `str`. - * @param restrict_len Limit completions to this size. + * @param restrict_len Limit completions to this size (in characters). * @param insert String to set with characters that can be autocompleted. * @return TRUE if the completion was unambiguous, else FALSE. * @@ -103,7 +103,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar */ gboolean teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, - const gchar *str, gsize str_len, gsize restrict_len, teco_string_t *insert) + const gchar *str, gsize str_len, guint restrict_len, teco_string_t *insert) { memset(insert, 0, sizeof(*insert)); @@ -115,7 +115,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, for (teco_rb3str_head_t *cur = teco_rb3str_nfind(tree, case_sensitive, str, str_len); cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len; cur = teco_rb3str_get_next(cur)) { - if (restrict_len && cur->key.len != restrict_len) + if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len) continue; if (G_UNLIKELY(!first)) { @@ -136,7 +136,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, for (teco_rb3str_head_t *cur = first; cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len; cur = teco_rb3str_get_next(cur)) { - if (restrict_len && cur->key.len != restrict_len) + if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len) continue; teco_interface_popup_add(TECO_POPUP_PLAIN, diff --git a/src/rb3str.h b/src/rb3str.h index ddbf6bb..adf5f89 100644 --- a/src/rb3str.h +++ b/src/rb3str.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -65,5 +65,5 @@ teco_rb3str_head_t *teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_se const gchar *str, gsize len); gboolean teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive, - const gchar *str, gsize str_len, gsize restrict_len, + const gchar *str, gsize str_len, guint restrict_len, teco_string_t *insert); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -512,7 +512,7 @@ teco_state_edit_file_done(teco_machine_main_t *ctx, const teco_string_t *str, GE * A value of 1 denotes the first buffer, 2 the second, * ecetera. */ -TECO_DEFINE_STATE_EXPECTFILE(teco_state_edit_file, +TECO_DEFINE_STATE_EXPECTGLOB(teco_state_edit_file, .initial_cb = (teco_state_initial_cb_t)teco_state_edit_file_initial ); @@ -524,7 +524,7 @@ teco_state_save_file_done(teco_machine_main_t *ctx, const teco_string_t *str, GE g_autofree gchar *filename = teco_file_expand_path(str->data); if (teco_qreg_current) { - if (!teco_qreg_save(teco_qreg_current, filename, error)) + if (!teco_qreg_current->vtable->save(teco_qreg_current, filename, error)) return NULL; } else { if (!teco_buffer_save(teco_ring_current, *filename ? filename : NULL, error)) @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/sciteco.h b/src/sciteco.h index 87bd973..7fe09d4 100644 --- a/src/sciteco.h +++ b/src/sciteco.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,6 +21,8 @@ #include <glib.h> +#include <Scintilla.h> + #if TECO_INTEGER == 32 typedef gint32 teco_int_t; #define TECO_INT_FORMAT G_GINT32_FORMAT @@ -58,8 +60,18 @@ teco_is_failure(teco_bool_t x) return x >= 0; } +/** + * Call function as destructor on debug builds. + * This should be used only if the cleanup is optional. + */ +#ifdef NDEBUG +#define TECO_DEBUG_CLEANUP __attribute__((unused)) +#else +#define TECO_DEBUG_CLEANUP __attribute__((destructor)) +#endif + /** TRUE if C is a control character */ -#define TECO_IS_CTL(C) ((C) < ' ') +#define TECO_IS_CTL(C) ((gunichar)(C) < ' ') /** ASCII character to echo control character C */ #define TECO_CTL_ECHO(C) ((C) | 0x40) /** @@ -73,17 +85,25 @@ teco_is_failure(teco_bool_t x) * This is not a bitfield, since it is set from SciTECO. */ enum { + TECO_ED_DEFAULT_ANSI = (1 << 2), TECO_ED_AUTOCASEFOLD = (1 << 3), TECO_ED_AUTOEOL = (1 << 4), TECO_ED_HOOKS = (1 << 5), - TECO_ED_FNKEYS = (1 << 6), + //TECO_ED_MOUSEKEY = (1 << 6), TECO_ED_SHELLEMU = (1 << 7), - TECO_ED_XTERM_CLIPBOARD = (1 << 8) + TECO_ED_OSC52 = (1 << 8), + TECO_ED_ICONS = (1 << 9) }; /* in main.c */ extern teco_int_t teco_ed; +static inline guint +teco_default_codepage(void) +{ + return teco_ed & TECO_ED_DEFAULT_ANSI ? SC_CHARSET_ANSI : SC_CP_UTF8; +} + /* in main.c */ extern volatile sig_atomic_t teco_interrupted; diff --git a/src/search.c b/src/search.c index 733eab9..0d04895 100644 --- a/src/search.c +++ b/src/search.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,11 +38,8 @@ #include "search.h" typedef struct { - /* - * FIXME: Should perhaps all be teco_int_t? - */ - gint dot; - gint from, to; + gssize dot; + gssize from, to; gint count; teco_buffer_t *from_buffer, *to_buffer; @@ -63,6 +60,9 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) if (ctx->mode > TECO_MODE_NORMAL) return TRUE; + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_interface_get_codepage()); + if (G_UNLIKELY(!teco_search_qreg_machine)) teco_search_qreg_machine = teco_machine_qregspec_new(TECO_QREG_REQUIRED, ctx->qreg_table_locals, ctx->parent.must_undo); @@ -79,16 +79,16 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) return FALSE; if (v1 <= v2) { teco_search_parameters.count = 1; - teco_search_parameters.from = (gint)v1; - teco_search_parameters.to = (gint)v2; + teco_search_parameters.from = teco_interface_glyphs2bytes(v1); + teco_search_parameters.to = teco_interface_glyphs2bytes(v2); } else { teco_search_parameters.count = -1; - teco_search_parameters.from = (gint)v2; - teco_search_parameters.to = (gint)v1; + teco_search_parameters.from = teco_interface_glyphs2bytes(v2); + teco_search_parameters.to = teco_interface_glyphs2bytes(v1); } - if (!teco_validate_pos(teco_search_parameters.from) || - !teco_validate_pos(teco_search_parameters.to)) { + if (teco_search_parameters.from < 0 || + teco_search_parameters.to < 0) { /* * FIXME: In derived classes, the command name will * no longer be correct. @@ -114,24 +114,10 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) return TRUE; } -static const gchar * -teco_regexp_escape_chr(gchar chr) -{ - static gchar escaped[] = {'\\', '\0', '\0', '\0'}; - - if (!chr) { - escaped[1] = 'c'; - escaped[2] = '@'; - return escaped; - } - - escaped[1] = chr; - escaped[2] = '\0'; - return g_ascii_isalnum(chr) ? escaped + 1 : escaped; -} - typedef enum { TECO_SEARCH_STATE_START, + TECO_SEARCH_STATE_CTL, + TECO_SEARCH_STATE_ESCAPE, TECO_SEARCH_STATE_NOT, TECO_SEARCH_STATE_CTL_E, TECO_SEARCH_STATE_ANYQ, @@ -153,6 +139,7 @@ typedef enum { * The pointer is modified and always left after * the last character used, so it may point to the * terminating null byte after the call. + * @param codepage The codepage of pattern. * @param escape_default Whether to treat single characters * as classes or not. * @param error A GError. @@ -161,10 +148,13 @@ typedef enum { * When a non-empty string is returned, the state has always * been reset to TECO_STATE_STATE_START. * Must be freed with g_free(). + * + * @fixme The allocations could be avoided by letting it append + * to the target regexp teco_string_t directly. */ static gchar * teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, - gboolean escape_default, GError **error) + guint codepage, gboolean escape_default, GError **error) { while (pattern->len > 0) { switch (*state) { @@ -184,8 +174,12 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, */ if (!escape_default) return g_strdup(""); - pattern->len--; - return g_strdup(teco_regexp_escape_chr(*pattern->data++)); + gsize len = codepage == SC_CP_UTF8 + ? g_utf8_next_char(pattern->data) - pattern->data : 1; + gchar *escaped = g_regex_escape_string(pattern->data, len); + pattern->data += len; + pattern->len -= len; + return escaped; } break; @@ -246,25 +240,36 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, case TECO_SEARCH_STATE_ANYQ: { teco_qreg_t *reg; - + gsize len; + gunichar chr; + + if (codepage == SC_CP_UTF8) { + len = g_utf8_next_char(pattern->data) - pattern->data; + chr = g_utf8_get_char(pattern->data); + } else { + len = 1; + chr = *pattern->data; + } switch (teco_machine_qregspec_input(teco_search_qreg_machine, - *pattern->data, ®, NULL, error)) { + chr, ®, NULL, error)) { case TECO_MACHINE_QREGSPEC_ERROR: return NULL; case TECO_MACHINE_QREGSPEC_MORE: /* incomplete, but consume byte */ - break; + pattern->data += len; + pattern->len -= len; + continue; case TECO_MACHINE_QREGSPEC_DONE: teco_machine_qregspec_reset(teco_search_qreg_machine); g_auto(teco_string_t) str = {NULL, 0}; - if (!reg->vtable->get_string(reg, &str.data, &str.len, error)) + if (!reg->vtable->get_string(reg, &str.data, &str.len, NULL, error)) return NULL; - pattern->data++; - pattern->len--; + pattern->data += len; + pattern->len -= len; *state = TECO_SEARCH_STATE_START; return g_regex_escape_string(str.data, str.len); } @@ -303,6 +308,7 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, * successfully scanned character, so it can be * called recursively. It may also point to the * terminating null byte after the call. + * @param codepage The codepage of pattern. * @param single_expr Whether to scan a single pattern * expression or an arbitrary sequence. * @param error A GError. @@ -310,19 +316,31 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, * Must be freed with g_free(). */ static gchar * -teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error) +teco_pattern2regexp(teco_string_t *pattern, guint codepage, gboolean single_expr, GError **error) { teco_search_state_t state = TECO_SEARCH_STATE_START; g_auto(teco_string_t) re = {NULL, 0}; do { /* + * Previous character was caret. + * Make sure it is handled like a control character. + * This is necessary even though we have string building activated, + * to support constructs like ^Q^Q (typed with carets) in order to + * quote pattern matching characters. + */ + if (state == TECO_SEARCH_STATE_CTL) { + *pattern->data = TECO_CTL_KEY(g_ascii_toupper(*pattern->data)); + state = TECO_SEARCH_STATE_START; + } + + /* * First check whether it is a class. * This will not treat individual characters * as classes, so we do not convert them to regexp * classes unnecessarily. */ - g_autofree gchar *temp = teco_class2regexp(&state, pattern, FALSE, error); + g_autofree gchar *temp = teco_class2regexp(&state, pattern, codepage, FALSE, error); if (!temp) return NULL; @@ -344,18 +362,40 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error switch (state) { case TECO_SEARCH_STATE_START: switch (*pattern->data) { - case TECO_CTL_KEY('X'): teco_string_append_c(&re, '.'); break; - case TECO_CTL_KEY('N'): state = TECO_SEARCH_STATE_NOT; break; - default: { - const gchar *escaped = teco_regexp_escape_chr(*pattern->data); - teco_string_append(&re, escaped, strlen(escaped)); - } + case '^': + state = TECO_SEARCH_STATE_CTL; + break; + case TECO_CTL_KEY('Q'): + case TECO_CTL_KEY('R'): + state = TECO_SEARCH_STATE_ESCAPE; + break; + case TECO_CTL_KEY('X'): + teco_string_append_c(&re, '.'); + break; + case TECO_CTL_KEY('N'): + state = TECO_SEARCH_STATE_NOT; + break; + default: + state = TECO_SEARCH_STATE_ESCAPE; + continue; } break; + case TECO_SEARCH_STATE_ESCAPE: { + state = TECO_SEARCH_STATE_START; + gsize len = codepage == SC_CP_UTF8 + ? g_utf8_next_char(pattern->data) - pattern->data : 1; + /* the allocation could theoretically be avoided by escaping char-wise */ + g_autofree gchar *escaped = g_regex_escape_string(pattern->data, len); + teco_string_append(&re, escaped, strlen(escaped)); + pattern->data += len; + pattern->len -= len; + continue; + } + case TECO_SEARCH_STATE_NOT: { state = TECO_SEARCH_STATE_START; - g_autofree gchar *temp = teco_class2regexp(&state, pattern, TRUE, error); + g_autofree gchar *temp = teco_class2regexp(&state, pattern, codepage, TRUE, error); if (!temp) return NULL; if (!*temp) @@ -391,7 +431,7 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error case TECO_SEARCH_STATE_MANY: { /* consume exactly one pattern element */ - g_autofree gchar *temp = teco_pattern2regexp(pattern, TRUE, error); + g_autofree gchar *temp = teco_pattern2regexp(pattern, codepage, TRUE, error); if (!temp) return NULL; if (!*temp) @@ -417,7 +457,7 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error state = TECO_SEARCH_STATE_START; break; default: { - g_autofree gchar *temp = teco_pattern2regexp(pattern, TRUE, error); + g_autofree gchar *temp = teco_pattern2regexp(pattern, codepage, TRUE, error); if (!temp) return NULL; if (!*temp) @@ -454,16 +494,17 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error } static gboolean -teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error) +teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) { g_autoptr(GMatchInfo) info = NULL; - const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0); + /* NOTE: can return NULL pointer for completely new and empty documents */ + const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, from, to-from) ? : ""; GError *tmp_error = NULL; /* * NOTE: The return boolean does NOT signal whether an error was generated. */ - g_regex_match_full(re, buffer, (gssize)to, from, 0, &info, &tmp_error); + g_regex_match_full(re, buffer, to-from, 0, 0, &info, &tmp_error); if (tmp_error) { g_propagate_error(error, tmp_error); return FALSE; @@ -543,7 +584,7 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error) if (matched_from >= 0 && matched_to >= 0) /* match success */ - teco_interface_ssm(SCI_SETSEL, matched_from, matched_to); + teco_interface_ssm(SCI_SETSEL, from+matched_from, from+matched_to); return TRUE; } @@ -551,8 +592,22 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error) static gboolean teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gsize new_chars, GError **error) { - static const GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE | - G_REGEX_DOTALL | G_REGEX_RAW; + /* FIXME: Should G_REGEX_OPTIMIZE be added under certain circumstances? */ + GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_DOTALL; + + /* this is set in teco_state_search_initial() */ + if (ctx->expectstring.machine.codepage != SC_CP_UTF8) { + /* single byte encoding */ + flags |= G_REGEX_RAW; + } else if (!teco_string_validate_utf8(str)) { + /* + * While SciTECO code is always guaranteed to be in valid UTF-8, + * the result of string building may not (eg. if ^EQq inserts garbage). + */ + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid UTF-8 byte sequence in search pattern"); + return FALSE; + } if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_SETSEL, @@ -567,8 +622,9 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs g_autoptr(GRegex) re = NULL; teco_string_t pattern = *str; + g_autofree gchar *re_pattern; /* NOTE: teco_pattern2regexp() modifies str pointer */ - g_autofree gchar *re_pattern = teco_pattern2regexp(&pattern, FALSE, error); + re_pattern = teco_pattern2regexp(&pattern, ctx->expectstring.machine.codepage, FALSE, error); if (!re_pattern) return FALSE; teco_machine_qregspec_reset(teco_search_qreg_machine); @@ -668,13 +724,15 @@ teco_state_search_done(teco_machine_main_t *ctx, const teco_string_t *str, GErro undo__teco_interface_ssm(SCI_SETANCHOR, anchor, 0); if (!search_reg->vtable->undo_set_string(search_reg, error) || - !search_reg->vtable->set_string(search_reg, str->data, str->len, error)) + !search_reg->vtable->set_string(search_reg, str->data, str->len, + teco_default_codepage(), error)) return NULL; teco_interface_ssm(SCI_SETANCHOR, anchor, 0); } else { g_auto(teco_string_t) search_str = {NULL, 0}; - if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len, error) || + if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len, + NULL, error) || !teco_state_search_process(ctx, &search_str, search_str.len, error)) return NULL; } @@ -890,12 +948,12 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str, if (teco_is_failure(search_state)) return &teco_state_start; - gint dot = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + sptr_t dot = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); if (teco_search_parameters.dot < dot) { /* kill forwards */ - gint anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0); + sptr_t anchor = teco_interface_ssm(SCI_GETANCHOR, 0, 0); if (teco_current_doc_must_undo()) undo__teco_interface_ssm(SCI_GOTOPOS, dot, 0); @@ -903,18 +961,23 @@ teco_state_search_kill_done(teco_machine_main_t *ctx, const teco_string_t *str, teco_interface_ssm(SCI_DELETERANGE, teco_search_parameters.dot, anchor - teco_search_parameters.dot); + + /* NOTE: An undo action is not always created. */ + if (teco_current_doc_must_undo() && + teco_search_parameters.dot != anchor) + undo__teco_interface_ssm(SCI_UNDO, 0, 0); } else { /* kill backwards */ teco_interface_ssm(SCI_DELETERANGE, dot, teco_search_parameters.dot - dot); + + /* NOTE: An undo action is not always created. */ + if (teco_current_doc_must_undo() && + teco_search_parameters.dot != dot) + undo__teco_interface_ssm(SCI_UNDO, 0, 0); } teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); teco_ring_dirtify(); - /* NOTE: An undo action is not always created. */ - if (teco_current_doc_must_undo() && - teco_search_parameters.dot != dot) - undo__teco_interface_ssm(SCI_UNDO, 0, 0); - return &teco_state_start; } @@ -981,11 +1044,20 @@ teco_state_search_delete_done(teco_machine_main_t *ctx, const teco_string_t *str */ TECO_DEFINE_STATE_SEARCH(teco_state_search_delete); +static gboolean +teco_state_replace_insert_initial(teco_machine_main_t *ctx, GError **error) +{ + if (ctx->mode == TECO_MODE_NORMAL) + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_interface_get_codepage()); + return TRUE; +} + /* * FIXME: Could be static */ TECO_DEFINE_STATE_INSERT(teco_state_replace_insert, - .initial_cb = NULL + .initial_cb = (teco_state_initial_cb_t)teco_state_replace_insert_initial ); static teco_state_t * @@ -1058,11 +1130,13 @@ teco_state_replace_default_insert_done_overwrite(teco_machine_main_t *ctx, const if (str->len > 0) { if (!replace_reg->vtable->undo_set_string(replace_reg, error) || - !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error)) + !replace_reg->vtable->set_string(replace_reg, str->data, str->len, + teco_default_codepage(), error)) return NULL; } else { g_auto(teco_string_t) replace_str = {NULL, 0}; - if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len, error) || + if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len, + NULL, error) || (replace_str.len > 0 && !teco_state_insert_process(ctx, &replace_str, replace_str.len, error))) return NULL; } @@ -1089,7 +1163,8 @@ teco_state_replace_default_ignore_done(teco_machine_main_t *ctx, const teco_stri g_assert(replace_reg != NULL); if (!replace_reg->vtable->undo_set_string(replace_reg, error) || - !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error)) + !replace_reg->vtable->set_string(replace_reg, str->data, str->len, + teco_default_codepage(), error)) return NULL; return &teco_state_start; diff --git a/src/search.h b/src/search.h index 3e4a2ef..3eacb6d 100644 --- a/src/search.h +++ b/src/search.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/spawn.c b/src/spawn.c index a30e6b2..e6d620c 100644 --- a/src/spawn.c +++ b/src/spawn.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -76,8 +76,8 @@ static struct { GSource *stdin_src, *stdout_src; gboolean interrupted; - teco_int_t from, to; - teco_int_t start; + gssize from, to; + gsize start; gboolean text_added; teco_eol_writer_t stdin_writer; @@ -121,7 +121,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error) teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$COMSPEC", 8); g_assert(reg != NULL); teco_string_t comspec; - if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, error)) + if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, NULL, error)) return NULL; argv = g_new(gchar *, 5); @@ -140,7 +140,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error) teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SHELL", 6); g_assert(reg != NULL); teco_string_t shell; - if (!reg->vtable->get_string(reg, &shell.data, &shell.len, error)) + if (!reg->vtable->get_string(reg, &shell.data, &shell.len, NULL, error)) return NULL; argv = g_new(gchar *, 4); @@ -164,6 +164,13 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error) if (ctx->mode > TECO_MODE_NORMAL) return TRUE; + /* + * Command-lines and file names are always assumed to be UTF-8, + * unless we set TECO_ED_DEFAULT_ANSI. + */ + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, + teco_default_codepage()); + if (!teco_expressions_eval(FALSE, error)) return FALSE; @@ -202,15 +209,17 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error) break; } - default: + default: { /* pipe and replace character range */ - if (!teco_expressions_pop_num_calc(&teco_spawn_ctx.to, 0, error) || - !teco_expressions_pop_num_calc(&teco_spawn_ctx.from, 0, error)) + teco_int_t from, to; + if (!teco_expressions_pop_num_calc(&to, 0, error) || + !teco_expressions_pop_num_calc(&from, 0, error)) return FALSE; + teco_spawn_ctx.from = teco_interface_glyphs2bytes(from); + teco_spawn_ctx.to = teco_interface_glyphs2bytes(to); rc = teco_bool(teco_spawn_ctx.from <= teco_spawn_ctx.to && - teco_validate_pos(teco_spawn_ctx.from) && - teco_validate_pos(teco_spawn_ctx.to)); - break; + teco_spawn_ctx.from >= 0 && teco_spawn_ctx.to >= 0); + } } if (teco_is_failure(rc)) { @@ -257,12 +266,11 @@ teco_state_execute_done(teco_machine_main_t *ctx, const teco_string_t *str, GErr g_autoptr(GIOChannel) stdin_chan = NULL, stdout_chan = NULL; g_auto(GStrv) argv = NULL, envp = NULL; - if (teco_string_contains(str, '\0')) { + if (!str->len || teco_string_contains(str, '\0')) { g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, - "Command line must not contain null-bytes"); + "Command line must not be empty or contain null-bytes"); goto gerror; } - g_assert(str->data != NULL); argv = teco_parse_shell_command_line(str->data, error); if (!argv) @@ -410,17 +418,17 @@ cleanup: } /* in cmdline.c */ -gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /*$ EC pipe filter - * EC[command]$ -- Execute operating system command and filter buffer contents - * linesEC[command]$ - * -EC[command]$ - * from,toEC[command]$ - * :EC[command]$ -> Success|Failure - * lines:EC[command]$ -> Success|Failure - * -:EC[command]$ -> Success|Failure - * from,to:EC[command]$ -> Success|Failure + * ECcommand$ -- Execute operating system command and filter buffer contents + * linesECcommand$ + * -ECcommand$ + * from,toECcommand$ + * :ECcommand$ -> Success|Failure + * lines:ECcommand$ -> Success|Failure + * -:ECcommand$ -> Success|Failure + * from,to:ECcommand$ -> Success|Failure * * The EC command allows you to interface with the operating * system shell and external programs. @@ -546,14 +554,14 @@ teco_state_egcommand_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, } /*$ EG EGq - * EGq[command]$ -- Set Q-Register to output of operating system command - * linesEGq[command]$ - * -EGq[command]$ - * from,toEGq[command]$ - * :EGq[command]$ -> Success|Failure - * lines:EGq[command]$ -> Success|Failure - * -:EGq[command]$ -> Success|Failure - * from,to:EGq[command]$ -> Success|Failure + * EGq command$ -- Set Q-Register to output of operating system command + * linesEGq command$ + * -EGq command$ + * from,toEGq command$ + * :EGq command$ -> Success|Failure + * lines:EGq command$ -> Success|Failure + * -:EGq command$ -> Success|Failure + * from,to:EGq command$ -> Success|Failure * * Runs an operating system <command> and set Q-Register * <q> to the data read from its standard output stream. @@ -635,7 +643,7 @@ teco_spawn_stdin_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer dat gssize bytes_written = teco_eol_writer_convert(&teco_spawn_ctx.stdin_writer, buffer, convert_len, &teco_spawn_ctx.error); if (bytes_written < 0) { - /* GError ocurred */ + /* GError occurred */ g_main_loop_quit(teco_spawn_ctx.mainloop); return G_SOURCE_REMOVE; } @@ -667,6 +675,8 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da /* source has already been dispatched */ return G_SOURCE_REMOVE; + teco_qreg_t *qreg = teco_spawn_ctx.register_argument; + for (;;) { teco_string_t buffer; @@ -685,20 +695,16 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da if (!buffer.len) return G_SOURCE_CONTINUE; - if (teco_spawn_ctx.register_argument) { + if (qreg) { if (teco_spawn_ctx.text_added) { - if (!teco_spawn_ctx.register_argument->vtable->undo_append_string(teco_spawn_ctx.register_argument, - &teco_spawn_ctx.error) || - !teco_spawn_ctx.register_argument->vtable->append_string(teco_spawn_ctx.register_argument, - buffer.data, buffer.len, - &teco_spawn_ctx.error)) + if (!qreg->vtable->undo_append_string(qreg, &teco_spawn_ctx.error) || + !qreg->vtable->append_string(qreg, buffer.data, buffer.len, + &teco_spawn_ctx.error)) goto error; } else { - if (!teco_spawn_ctx.register_argument->vtable->undo_set_string(teco_spawn_ctx.register_argument, - &teco_spawn_ctx.error) || - !teco_spawn_ctx.register_argument->vtable->set_string(teco_spawn_ctx.register_argument, - buffer.data, buffer.len, - &teco_spawn_ctx.error)) + if (!qreg->vtable->undo_set_string(qreg, &teco_spawn_ctx.error) || + !qreg->vtable->set_string(qreg, buffer.data, buffer.len, + teco_default_codepage(), &teco_spawn_ctx.error)) goto error; } } else { @@ -789,8 +795,7 @@ teco_spawn_idle_cb(gpointer user_data) return G_SOURCE_CONTINUE; } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_spawn_cleanup(void) { g_source_unref(teco_spawn_ctx.idle_src); @@ -801,4 +806,3 @@ teco_spawn_cleanup(void) if (teco_spawn_ctx.error) g_error_free(teco_spawn_ctx.error); } -#endif diff --git a/src/spawn.h b/src/spawn.h index 0e5ca96..312de6e 100644 --- a/src/spawn.h +++ b/src/spawn.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/string-utils.c b/src/string-utils.c index f2cd45e..b284760 100644 --- a/src/string-utils.c +++ b/src/string-utils.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -55,13 +55,20 @@ teco_string_echo(const gchar *str, gsize len) return ret; } -/** @memberof teco_string_t */ +/** + * Get character coordinates for a given byte index. + * + * The given string must be valid UTF-8. + * + * @memberof teco_string_t + */ void -teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column) +teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column) { + *pos = 0; *line = *column = 1; - for (guint i = 0; i < pos; i++) { + for (guint i = 0; i < off; i = g_utf8_next_char(str+i) - str) { switch (str[i]) { case '\r': if (str[i+1] == '\n') @@ -75,10 +82,21 @@ teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column) (*column)++; break; } + (*pos)++; } } -/** @memberof teco_string_t */ +/** + * Get the length of the prefix common to two strings. + * Works with UTF-8 and single-byte encodings. + * + * @param a Left string. + * @param b Right string. + * @param b_len Length of right string. + * @return Length of the common prefix in bytes. + * + * @memberof teco_string_t + */ gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len) { @@ -91,15 +109,32 @@ teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len) return len; } -/** @memberof teco_string_t */ +/** + * Get the length of the prefix common to two UTF-8 strings + * without considering case. + * + * The UTF-8 strings must be validated, which should be the case + * for help labels and short Q-Register names. + * + * @param a Left UTF-8 string. + * @param b Right UTF-8 string. + * @param b_len Length of right UTF-8 string. + * @return Length of the common prefix in bytes. + * + * @memberof teco_string_t + */ gsize teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len) { gsize len = 0; - while (len < a->len && len < b_len && - g_ascii_tolower(a->data[len]) == g_ascii_tolower(b[len])) - len++; + while (len < a->len && len < b_len) { + gunichar a_chr = g_utf8_get_char(a->data+len); + gunichar b_chr = g_utf8_get_char(b+len); + if (g_unichar_tolower(a_chr) != g_unichar_tolower(b_chr)) + break; + len = g_utf8_next_char(b+len) - b; + } return len; } diff --git a/src/string-utils.h b/src/string-utils.h index 26b660b..ebe25d5 100644 --- a/src/string-utils.h +++ b/src/string-utils.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,15 +26,25 @@ /** * Upper-case SciTECO command character. * - * There are implementations in glib (g_ascii_toupper) and libc, + * There are implementations in glib (g_ascii_toupper() and g_unichar_toupper()) and libc, * but this implementation is sufficient for all letters used by SciTECO commands. */ -static inline gchar -teco_ascii_toupper(gchar chr) +static inline gunichar +teco_ascii_toupper(gunichar chr) { return chr >= 'a' && chr <= 'z' ? chr & ~0x20 : chr; } +static inline gchar * +teco_strv_remove(gchar **strv, guint i) +{ + gchar *ret = strv[i]; + do + strv[i] = strv[i+1]; + while (strv[++i]); + return ret; +} + /** * An 8-bit clean null-terminated string. * @@ -42,6 +52,7 @@ teco_ascii_toupper(gchar chr) * and the allocation length is not stored. * Just like GString, teco_string_t are always null-terminated but at the * same time 8-bit clean (can contain null-characters). + * It may or may not contain UTF-8 byte sequences. * * The API is designed such that teco_string_t operations operate on plain * (null-terminated) C strings, a single character or character array as well as @@ -51,6 +62,12 @@ teco_ascii_toupper(gchar chr) * A target teco_string_t::data is always null-terminated and thus safe to pass * to functions expecting traditional null-terminated C strings if you can * guarantee that it contains no null-character other than the trailing one. + * + * @warning For consistency with C idioms the underlying character type is + * `char`, which might be signed! + * Accessing individual characters may yield signed integers and that sign + * might be preserved when upcasting to a larger signed integer. + * In this case you should always cast to `guchar` first. */ typedef struct { /** @@ -58,7 +75,7 @@ typedef struct { * The pointer is guaranteed to be non-NULL after initialization. */ gchar *data; - /** Length of `data` without the trailing null-byte. */ + /** Length of `data` without the trailing null-byte in bytes. */ gsize len; } teco_string_t; @@ -112,6 +129,16 @@ teco_string_append_c(teco_string_t *str, gchar chr) teco_string_append(str, &chr, sizeof(chr)); } +/** @memberof teco_string_t */ +static inline void +teco_string_append_wc(teco_string_t *target, gunichar chr) +{ + /* 4 bytes should be enough, but we better follow the documentation */ + target->data = g_realloc(target->data, target->len + 6 + 1); + target->len += g_unichar_to_utf8(chr, target->data+target->len); + target->data[target->len] = '\0'; +} + /** * @fixme Should this also realloc str->data? * @@ -135,7 +162,7 @@ void undo__teco_string_truncate(teco_string_t *, gsize); gchar *teco_string_echo(const gchar *str, gsize len); -void teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column); +void teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column); typedef gsize (*teco_string_diff_t)(const teco_string_t *a, const gchar *b, gsize b_len); gsize teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len); @@ -170,6 +197,19 @@ teco_string_rindex(const teco_string_t *str, gchar chr) const gchar *teco_string_last_occurrence(const teco_string_t *str, const gchar *chars); +/** + * Validate whether string consists exclusively of valid UTF-8, but accept null bytes. + * @note there is g_utf8_validate_len() in Glib 2.60 + */ +static inline gboolean +teco_string_validate_utf8(const teco_string_t *str) +{ + const gchar *p = str->data; + while (!g_utf8_validate(p, str->len - (p - str->data), &p) && !*p) + p++; + return p - str->data == str->len; +} + /** @memberof teco_string_t */ static inline void teco_string_clear(teco_string_t *str) diff --git a/src/symbols-extract.tes b/src/symbols-extract.tes index 9f43fa6..1ab6667 100755 --- a/src/symbols-extract.tes +++ b/src/symbols-extract.tes @@ -1,4 +1,4 @@ -#!/usr/local/bin/sciteco -m +#!/usr/local/bin/sciteco -8m !* * ./symbols-extract.tes [-p <prefix pattern list>] -n <SymbolList object> [--] \ * <output file> <input header> @@ -48,13 +48,12 @@ teco_symbols_init(void) teco_symbol_list_init(&Q[getopt.n], entries, G_N_ELEMENTS(entries), FALSE); } -#ifndef NDEBUG -static void __attribute__((destructor)) +static void TECO_DEBUG_CLEANUP teco_cmdline_cleanup(void) { teco_symbol_list_clear(&Q[getopt.n]); } -#endif^J + !* write output file *! 2EL EWQ#ou diff --git a/src/symbols.c b/src/symbols.c index ce7a7f6..feead76 100644 --- a/src/symbols.c +++ b/src/symbols.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -251,7 +251,7 @@ teco_state_scintilla_symbols_done(teco_machine_main_t *ctx, const teco_string_t } /* in cmdline.c */ -gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error); +gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); /*$ ES scintilla message * -- Send Scintilla message diff --git a/src/symbols.h b/src/symbols.h index 9cdfd74..0325d9d 100644 --- a/src/symbols.h +++ b/src/symbols.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ //#define DEBUG -TECO_DEFINE_UNDO_SCALAR(gchar); +TECO_DEFINE_UNDO_SCALAR(gunichar); TECO_DEFINE_UNDO_SCALAR(gint); TECO_DEFINE_UNDO_SCALAR(guint); TECO_DEFINE_UNDO_SCALAR(gsize); @@ -112,7 +112,7 @@ teco_undo_push_size(teco_undo_action_t action_cb, gsize size) } void -teco_undo_pop(gint pc) +teco_undo_pop(gsize pc) { while ((gint)teco_undo_heads->len > pc) { teco_undo_token_t *top = @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -164,8 +164,8 @@ gpointer teco_undo_push_size(teco_undo_action_t action_cb, gsize size) * significantly improves batch-mode performance. */ -TECO_DECLARE_UNDO_SCALAR(gchar); -#define teco_undo_gchar(VAR) (*teco_undo_object_gchar_push(&(VAR))) +TECO_DECLARE_UNDO_SCALAR(gunichar); +#define teco_undo_gunichar(VAR) (*teco_undo_object_gunichar_push(&(VAR))) TECO_DECLARE_UNDO_SCALAR(gint); #define teco_undo_gint(VAR) (*teco_undo_object_gint_push(&(VAR))) @@ -243,5 +243,5 @@ TECO_DECLARE_UNDO_SCALAR(gconstpointer); /** @} */ -void teco_undo_pop(gint pc); +void teco_undo_pop(gsize pc); void teco_undo_clear(void); @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,6 +45,7 @@ #include "error.h" #include "qreg.h" #include "eol.h" +#include "memory.h" #include "view.h" /** @memberof teco_view_t */ @@ -72,6 +73,27 @@ teco_view_setup(teco_view_t *ctx) */ teco_view_ssm(ctx, SCI_SETMARGINWIDTHN, 1, 0); + if (teco_ed & TECO_ED_DEFAULT_ANSI) { + /* + * Configure a single-byte codepage/charset. + * This requires setting it on all of the possible styles. + * Fortunately, we can do it before SCI_STYLECLEARALL. + * This is important only for display purposes - other than that + * all single-byte encodings are handled the same. + */ + teco_view_ssm(ctx, SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, SC_CHARSET_ANSI); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(ctx, SCI_SETCODEPAGE, 0, 0); + } else { + /* + * Documents are UTF-8 by default and all UTF-8 documents + * are expected to have a character index. + * This is a property of the document, instead of the view. + */ + teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } + /* * Set some basic styles in order to provide * a consistent look across UIs if no profile @@ -137,6 +159,28 @@ teco_view_set_representations(teco_view_t *ctx) gchar buf[] = {(gchar)cc, '\0'}; teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)reps[cc]); } + + if (teco_ed & TECO_ED_DEFAULT_ANSI) { + /* + * Non-ANSI chars should be visible somehow. + * This would best be done always when changing the + * encoding to 0, but it would be kind of expensive. + * + * FIXME: On the other hand, this could cause problems + * when setting SC_CP_UTF8 later on. + */ + for (guint cc = 0x80; cc <= 0xFF; cc++) { + gchar buf[] = {(gchar)cc, '\0'}; + gchar rep[2+1]; + /* + * Hexadecimal is poorly supported in SciTECO, but + * multiple decimal numbers one after another look + * confusing, esp. in Curses. + */ + g_snprintf(rep, sizeof(rep), "%02X", cc); + teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)rep); + } + } } /** @@ -161,6 +205,9 @@ teco_view_set_representations(teco_view_t *ctx) gboolean teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **error) { + g_auto(teco_eol_reader_t) reader; + teco_eol_reader_init_gio(&reader, channel); + teco_view_ssm(ctx, SCI_BEGINUNDOACTION, 0, 0); teco_view_ssm(ctx, SCI_CLEARALL, 0, 0); @@ -173,11 +220,11 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro */ struct stat stat_buf = {.st_size = 0}; if (!fstat(g_io_channel_unix_get_fd(channel), &stat_buf) && - stat_buf.st_size > 0) + stat_buf.st_size > 0) { + if (!teco_memory_check(stat_buf.st_size, error)) + goto error; teco_view_ssm(ctx, SCI_ALLOCATE, stat_buf.st_size, 0); - - g_auto(teco_eol_reader_t) reader; - teco_eol_reader_init_gio(&reader, channel); + } for (;;) { /* @@ -187,14 +234,24 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro teco_string_t str; GIOStatus rc = teco_eol_reader_convert(&reader, &str.data, &str.len, error); - if (rc == G_IO_STATUS_ERROR) { - teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); - return FALSE; - } + if (rc == G_IO_STATUS_ERROR) + goto error; if (rc == G_IO_STATUS_EOF) break; teco_view_ssm(ctx, SCI_APPENDTEXT, str.len, (sptr_t)str.data); + + /* + * Even if we checked initially, knowing the file size, + * Scintilla could allocate much more bytes. + */ + if (!teco_memory_check(0, error)) + goto error; + + if (G_UNLIKELY(teco_interface_is_interrupted())) { + teco_error_interrupted_set(error); + goto error; + } } /* @@ -216,6 +273,10 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); return TRUE; + +error: + teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0); + return FALSE; } /** @@ -449,3 +510,129 @@ teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError **error) return TRUE; } + +/** + * Convert a glyph index to a byte offset as used by Scintilla. + * + * This is optimized with the "line character index", + * which must always be enabled in UTF-8 documents. + * + * It is also used to validate glyph indexes. + * + * @param ctx The view to operate on. + * @param pos Position in glyphs/characters. + * @return Position in bytes or -1 if pos is out of bounds. + */ +gssize +teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos) +{ + if (pos < 0) + return -1; /* invalid position */ + if (!pos) + return 0; + + if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos <= teco_view_ssm(ctx, SCI_GETLENGTH, 0, 0) ? pos : -1; + + sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMINDEXPOSITION, pos, + SC_LINECHARACTERINDEX_UTF32); + sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0); + pos -= teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32); + return teco_view_ssm(ctx, SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1; +} + +/** + * Convert byte offset to glyph/character index without bounds checking. + */ +teco_int_t +teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos) +{ + if (!pos) + return 0; + + if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos; + + sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMPOSITION, pos, 0); + sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0); + return teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32) + + teco_view_ssm(ctx, SCI_COUNTCHARACTERS, line_bytes, pos); +} + +#define TECO_RELATIVE_LIMIT 1024 + +/** + * Convert a glyph index relative to a byte position to + * a byte position. + * + * Can be used to implement commands with relative character + * ranges. + * As an optimization, this always counts characters for deltas + * smaller than TECO_RELATIVE_LIMIT, so it will be fast + * even where the character-index based lookup is too slow + * (as on exceedingly long lines). + * + * @param ctx The view to operate on. + * @param pos Byte position to start. + * @param n Number of glyphs/characters to the left (negative) or + * right (positive) of pos. + * @return Position in bytes or -1 if the resulting position is out of bounds. + */ +gssize +teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n) +{ + if (!n) + return pos; + if (ABS(n) > TECO_RELATIVE_LIMIT) + return teco_view_glyphs2bytes(ctx, teco_view_bytes2glyphs(ctx, pos) + n); + + sptr_t res = teco_view_ssm(ctx, SCI_POSITIONRELATIVE, pos, n); + /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */ + return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1; +} + +/** + * Get codepoint at given byte offset. + * + * @param ctx The view to operate on. + * @param pos The glyph's byte position + * @param len The length of the document in bytes + * @return The requested codepoint. + * In UTF-8 encoded documents, this might be -1 (incomplete sequence) + * or -2 (invalid byte sequence). + */ +teco_int_t +teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) +{ + if (teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) != SC_CP_UTF8) + /* + * We don't support the asiatic multi-byte encodings, + * so everything else is single-byte codepages. + * NOTE: Internally, the character is casted to signed char + * and may therefore become negative. + */ + return (guchar)teco_view_ssm(ctx, SCI_GETCHARAT, pos, 0); + + gchar buf[4+1]; + struct Sci_TextRangeFull range = { + .chrg = {pos, MIN(len, pos+sizeof(buf)-1)}, + .lpstrText = buf + }; + /* + * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION + * or repeatedly calling SCI_GETCHARAT. + */ + teco_view_ssm(ctx, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); + /* + * Make sure that the -1/-2 error values are preserved. + * The sign bit in UCS-4/UTF-32 is unused, so this will even + * suffice if TECO_INTEGER == 32. + */ + return (gint32)g_utf8_get_char_validated(buf, -1); +} @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -70,3 +70,16 @@ gboolean teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError /** @pure @memberof teco_view_t */ void teco_view_free(teco_view_t *ctx); + +static inline guint +teco_view_get_codepage(teco_view_t *ctx) +{ + return teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) + ? : teco_view_ssm(ctx, SCI_STYLEGETCHARACTERSET, STYLE_DEFAULT, 0); +} + +gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos); +teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos); +gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n); + +teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len); |