From 867d22e419afe769f05ad26b61c6ea5ea1432c3c Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sat, 22 Mar 2025 13:45:28 +0300 Subject: harmonized all word-movement and deletion commands: they move/delete until the beginning of words now * All commands and their documentations were inconsistent. * ^W rubbed out to the beginning of words. * Shift+Right (fnkeys.tes) moved to the beginning of the next word if invoked at the beginning of a word and to the end of the next word otherwise. * (and and by extension) moved to the end of the next word. * The cheat sheet would claim that moves to the beginning of the next word. * Video TECO's command would differ again from everything else. With positive arguments, it moved to the beginning of words, while with negative it moved to end of words. I decided not to copy this behavior. * It has been decided to adopt a consistent beginning-of-words policy. -W therefore differs from Video TECO in moving to the beginning of the current or previous word. * teco_find_words() is now based on parsing the document pointer, instead of relying on SCI_WORDENDPOSITION, since the latter cannot actually be used to skip strictly non-word characters. This requires a constant amount of Scintilla messages but will require fewer messages only when moving for more than 3 words. * The semantics of are therefore now consistent with Vim and Emacs as well. * Shift+Right/Left is still based on SCI_WORDENDPOSITION, so it's behavior differs slightly from for instance at the end of lines, as it will stop at linebreaks. * Unfortunately, these changes will break lots of macros, among others the M#rf, M#sp and git.blame macros ("Useful macros" from the wiki). --- src/cmdline.c | 4 ++ src/core-commands.c | 109 +++++++++++++++++++++++++++++++++++++++--------- src/symbols-extract.tes | 4 +- 3 files changed, 95 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/cmdline.c b/src/cmdline.c index dde096d..b3da887 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -509,6 +509,10 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t * case TECO_CTL_KEY('W'): { /* rubout/reinsert word */ teco_interface_popup_clear(); + /* + * NOTE: This must be consistent with teco_find_words(): + * Always delete to the beginning of the previous word. + */ g_auto(teco_string_t) wchars; wchars.len = teco_interface_ssm(SCI_GETWORDCHARS, 0, 0); wchars.data = g_malloc(wchars.len + 1); diff --git a/src/core-commands.c b/src/core-commands.c index 979095b..8cbb4be 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -700,31 +700,97 @@ teco_state_start_back(teco_machine_main_t *ctx, GError **error) } /* - * FIXME: would be nice to do this with constant amount of - * editor messages. E.g. by using custom algorithm accessing - * the internal document buffer. + * NOTE: This implementation has a constant/maximum number of Scintilla + * messages, compared to using SCI_WORDENDPOSITION. + * This pays out only beginning at n > 3, though. + * But most importantly SCI_WORDENDPOSITION(p, FALSE) does not actually skip + * over all non-word characters. */ static gboolean teco_find_words(gsize *pos, teco_int_t n) { + if (!n) + return TRUE; + + g_auto(teco_string_t) wchars; + wchars.len = teco_interface_ssm(SCI_GETWORDCHARS, 0, 0); + wchars.data = g_malloc(wchars.len + 1); + teco_interface_ssm(SCI_GETWORDCHARS, 0, (sptr_t)wchars.data); + wchars.data[wchars.len] = '\0'; + + sptr_t gap = teco_interface_ssm(SCI_GETGAPPOSITION, 0, 0); + if (n > 0) { + /* scan forward */ + gsize len = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + gsize range_len = gap > *pos ? gap - *pos : len - *pos; + if (!range_len) + return FALSE; + const gchar *buffer, *p; + p = buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, *pos, range_len); + while (n--) { - sptr_t old_pos = *pos; - *pos = teco_interface_ssm(SCI_WORDENDPOSITION, *pos, FALSE); - *pos = teco_interface_ssm(SCI_WORDENDPOSITION, *pos, TRUE); - if (*pos == old_pos) - return FALSE; + gboolean skip_word = TRUE; + + for (;;) { + if (*pos == len) + /* end of document */ + return n == 0; + if (p-buffer >= range_len) { + g_assert(*pos == gap); + range_len = len - gap; + p = buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, gap, range_len); + } + /* + * FIXME: Is this safe or do we have to look up Unicode code points? + */ + if ((!teco_string_contains(&wchars, *p)) == skip_word) { + if (!skip_word) + break; + skip_word = !skip_word; + continue; + } + (*pos)++; + p++; + } } return TRUE; } + /* scan backwards */ + gsize range_len = gap < *pos ? *pos - gap : *pos; + if (!range_len) + return FALSE; + const gchar *buffer, *p; + buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, *pos - range_len, range_len); + p = buffer+range_len; + while (n++) { - sptr_t old_pos = *pos; - *pos = teco_interface_ssm(SCI_WORDSTARTPOSITION, *pos, TRUE); - *pos = teco_interface_ssm(SCI_WORDSTARTPOSITION, *pos, FALSE); - if (*pos == old_pos) - return FALSE; + gboolean skip_word = FALSE; + + for (;;) { + if (*pos == 0) + /* beginning of document */ + return n == 0; + if (p == buffer) { + g_assert(*pos == gap); + range_len = *pos; + buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, 0, range_len); + p = buffer+range_len; + } + /* + * FIXME: Is this safe or do we have to look up Unicode code points? + */ + if ((!teco_string_contains(&wchars, p[-1])) == skip_word) { + if (skip_word) + break; + skip_word = !skip_word; + continue; + } + (*pos)--; + p--; + } } return TRUE; @@ -738,8 +804,8 @@ teco_find_words(gsize *pos, teco_int_t n) * Move dot words forward. * - If is positive, dot is positioned at the beginning * of the word words after the current one. - * - If is negative, dot is positioned at the end - * of the word words before the current one. + * - If is negative, dot is positioned at the beginning + * of the word, <-n> words before the current one. * - If is zero, dot is not moved. * * \(lqW\(rq uses Scintilla's definition of a word as @@ -747,8 +813,9 @@ teco_find_words(gsize *pos, teco_int_t n) * .B SCI_SETWORDCHARS * message. * - * Otherwise, the command's behaviour is analogous to - * the \(lqC\(rq command. + * If the requested word would lie beyond the range of the + * buffer, the command yields an error. + * If colon-modified it instead returns a condition code. */ static void teco_state_start_word(teco_machine_main_t *ctx, GError **error) @@ -805,10 +872,12 @@ teco_delete_words(teco_int_t n) * -V * [n]:V -> Success|Failure * - * Deletes the next words until the end of the + * Deletes the next words until the beginning of the * n'th word after the current one. - * If is negative, deletes up to end of the - * n'th word before the current one. + * If is negative, deletes up to the beginning of the + * word, <-n> words before the current one. + * \(lq-V\(rq in the middle of a word deletes until the beginning + * of the word. * If is omitted, 1 or -1 is implied depending on the * sign prefix. * diff --git a/src/symbols-extract.tes b/src/symbols-extract.tes index 1ab6667..9a8a270 100755 --- a/src/symbols-extract.tes +++ b/src/symbols-extract.tes @@ -15,7 +15,7 @@ EMQ[$SCITECOPATH]/string.tes LR 0X#ou 2LR 0X#in HK !* copy all defines in input file beginning with prefix *! -EBN#in EF +EBN#in EF !* sort all defines *! Ga ZJB 0,.M[qsort] J @@ -37,7 +37,7 @@ I/* static const teco_symbol_entry_t entries[] = {^J < - .,W.Xa 0KK + .,LR.Xa 0KK I#ifdef Qa^J^I{"Qa", Qa},^J#endif^J .-Z;> I}; -- cgit v1.2.3