From 867d22e419afe769f05ad26b61c6ea5ea1432c3c Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sat, 22 Mar 2025 13:45:28 +0300 Subject: harmonized all word-movement and deletion commands: they move/delete until the beginning of words now * All commands and their documentations were inconsistent. * ^W rubbed out to the beginning of words. * Shift+Right (fnkeys.tes) moved to the beginning of the next word if invoked at the beginning of a word and to the end of the next word otherwise. * (and and by extension) moved to the end of the next word. * The cheat sheet would claim that moves to the beginning of the next word. * Video TECO's command would differ again from everything else. With positive arguments, it moved to the beginning of words, while with negative it moved to end of words. I decided not to copy this behavior. * It has been decided to adopt a consistent beginning-of-words policy. -W therefore differs from Video TECO in moving to the beginning of the current or previous word. * teco_find_words() is now based on parsing the document pointer, instead of relying on SCI_WORDENDPOSITION, since the latter cannot actually be used to skip strictly non-word characters. This requires a constant amount of Scintilla messages but will require fewer messages only when moving for more than 3 words. * The semantics of are therefore now consistent with Vim and Emacs as well. * Shift+Right/Left is still based on SCI_WORDENDPOSITION, so it's behavior differs slightly from for instance at the end of lines, as it will stop at linebreaks. * Unfortunately, these changes will break lots of macros, among others the M#rf, M#sp and git.blame macros ("Useful macros" from the wiki). --- doc/grosciteco.tes | 4 +- doc/sciteco.7.template | 5 ++- lib/fnkeys.tes | 4 +- lib/opener.tes | 2 +- src/cmdline.c | 4 ++ src/core-commands.c | 109 +++++++++++++++++++++++++++++++++++++++--------- src/symbols-extract.tes | 4 +- tests/testsuite.at | 11 ++--- 8 files changed, 109 insertions(+), 34 deletions(-) diff --git a/doc/grosciteco.tes b/doc/grosciteco.tes index e5be8a9..f1d7830 100755 --- a/doc/grosciteco.tes +++ b/doc/grosciteco.tes @@ -135,7 +135,7 @@ EBN[input] !cmd.xF! L F< !cmd.xX! - :M#sw .(W).X.w + :M#sw .,1,.ESSCI_WORDENDPOSITIONX.w Ocmd.xXQ.w !cmd.xXsciteco_topic! !* @@ -272,7 +272,7 @@ EBN[input] !cmd.C! :M#sw 0A-^^u"= !* FIXME: This can be CuXXXX_XXXX (decomposed, e.g. for cyrillic й) *! - C 16 \U.w  W + C 16 \U.w  LR | .(:M#sa).X.w 0Q[glyphs.Q.w]U.w ' diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template index b274715..30f53ba 100644 --- a/doc/sciteco.7.template +++ b/doc/sciteco.7.template @@ -503,10 +503,11 @@ Non-empty string arguments .br (modifier \fIdisabled\fP) T};T{ -Rub out last word according to Scintilla's definition of a word +Rub out to beginning of last word according to Scintilla's definition of a word as set by .SCITECO_TOPIC SCI_SETWORDCHARS -.BR SCI_SETWORDCHARS . +.BR SCI_SETWORDCHARS , +analogous to \fBY\fP command. T} \^;\^;\^;T{ Miscelleaneous diff --git a/lib/fnkeys.tes b/lib/fnkeys.tes index 857c249..922548b 100644 --- a/lib/fnkeys.tes +++ b/lib/fnkeys.tes @@ -78,7 +78,7 @@ 1U[ LEFT] @[SLEFT]{ - 0,0,ESWORDSTARTPOSITIONESWORDSTARTPOSITIONU.p + 1,0,ESWORDSTARTPOSITIONESWORDSTARTPOSITIONU.p Q.pESGETCOLUMN,4EJ Q.p:-.M#c } @@ -94,7 +94,7 @@ 1U[ RIGHT] @[SRIGHT]{ - 0,0,ESWORDENDPOSITIONESWORDENDPOSITIONU.p + 0,1,ESWORDENDPOSITIONESWORDENDPOSITIONU.p Q.pESGETCOLUMN,4EJ Q.p:-.M#c } diff --git a/lib/opener.tes b/lib/opener.tes index 6a57317..21c118d 100644 --- a/lib/opener.tes +++ b/lib/opener.tes @@ -16,7 +16,7 @@ 1U.l 1U.c !* +line[,column] *! 0A-+"= - C 0A"D \U.l W 0A-,"= C \U.c ' 0A-10"=L' ' + C 0A"D \U.l <0A"DC|1;'> 0A-,"= C \U.c ' 0A-10"=L' ' ' !* filename:line[:column][:] *! diff --git a/src/cmdline.c b/src/cmdline.c index dde096d..b3da887 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -509,6 +509,10 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t * case TECO_CTL_KEY('W'): { /* rubout/reinsert word */ teco_interface_popup_clear(); + /* + * NOTE: This must be consistent with teco_find_words(): + * Always delete to the beginning of the previous word. + */ g_auto(teco_string_t) wchars; wchars.len = teco_interface_ssm(SCI_GETWORDCHARS, 0, 0); wchars.data = g_malloc(wchars.len + 1); diff --git a/src/core-commands.c b/src/core-commands.c index 979095b..8cbb4be 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -700,31 +700,97 @@ teco_state_start_back(teco_machine_main_t *ctx, GError **error) } /* - * FIXME: would be nice to do this with constant amount of - * editor messages. E.g. by using custom algorithm accessing - * the internal document buffer. + * NOTE: This implementation has a constant/maximum number of Scintilla + * messages, compared to using SCI_WORDENDPOSITION. + * This pays out only beginning at n > 3, though. + * But most importantly SCI_WORDENDPOSITION(p, FALSE) does not actually skip + * over all non-word characters. */ static gboolean teco_find_words(gsize *pos, teco_int_t n) { + if (!n) + return TRUE; + + g_auto(teco_string_t) wchars; + wchars.len = teco_interface_ssm(SCI_GETWORDCHARS, 0, 0); + wchars.data = g_malloc(wchars.len + 1); + teco_interface_ssm(SCI_GETWORDCHARS, 0, (sptr_t)wchars.data); + wchars.data[wchars.len] = '\0'; + + sptr_t gap = teco_interface_ssm(SCI_GETGAPPOSITION, 0, 0); + if (n > 0) { + /* scan forward */ + gsize len = teco_interface_ssm(SCI_GETLENGTH, 0, 0); + gsize range_len = gap > *pos ? gap - *pos : len - *pos; + if (!range_len) + return FALSE; + const gchar *buffer, *p; + p = buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, *pos, range_len); + while (n--) { - sptr_t old_pos = *pos; - *pos = teco_interface_ssm(SCI_WORDENDPOSITION, *pos, FALSE); - *pos = teco_interface_ssm(SCI_WORDENDPOSITION, *pos, TRUE); - if (*pos == old_pos) - return FALSE; + gboolean skip_word = TRUE; + + for (;;) { + if (*pos == len) + /* end of document */ + return n == 0; + if (p-buffer >= range_len) { + g_assert(*pos == gap); + range_len = len - gap; + p = buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, gap, range_len); + } + /* + * FIXME: Is this safe or do we have to look up Unicode code points? + */ + if ((!teco_string_contains(&wchars, *p)) == skip_word) { + if (!skip_word) + break; + skip_word = !skip_word; + continue; + } + (*pos)++; + p++; + } } return TRUE; } + /* scan backwards */ + gsize range_len = gap < *pos ? *pos - gap : *pos; + if (!range_len) + return FALSE; + const gchar *buffer, *p; + buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, *pos - range_len, range_len); + p = buffer+range_len; + while (n++) { - sptr_t old_pos = *pos; - *pos = teco_interface_ssm(SCI_WORDSTARTPOSITION, *pos, TRUE); - *pos = teco_interface_ssm(SCI_WORDSTARTPOSITION, *pos, FALSE); - if (*pos == old_pos) - return FALSE; + gboolean skip_word = FALSE; + + for (;;) { + if (*pos == 0) + /* beginning of document */ + return n == 0; + if (p == buffer) { + g_assert(*pos == gap); + range_len = *pos; + buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, 0, range_len); + p = buffer+range_len; + } + /* + * FIXME: Is this safe or do we have to look up Unicode code points? + */ + if ((!teco_string_contains(&wchars, p[-1])) == skip_word) { + if (skip_word) + break; + skip_word = !skip_word; + continue; + } + (*pos)--; + p--; + } } return TRUE; @@ -738,8 +804,8 @@ teco_find_words(gsize *pos, teco_int_t n) * Move dot words forward. * - If is positive, dot is positioned at the beginning * of the word words after the current one. - * - If is negative, dot is positioned at the end - * of the word words before the current one. + * - If is negative, dot is positioned at the beginning + * of the word, <-n> words before the current one. * - If is zero, dot is not moved. * * \(lqW\(rq uses Scintilla's definition of a word as @@ -747,8 +813,9 @@ teco_find_words(gsize *pos, teco_int_t n) * .B SCI_SETWORDCHARS * message. * - * Otherwise, the command's behaviour is analogous to - * the \(lqC\(rq command. + * If the requested word would lie beyond the range of the + * buffer, the command yields an error. + * If colon-modified it instead returns a condition code. */ static void teco_state_start_word(teco_machine_main_t *ctx, GError **error) @@ -805,10 +872,12 @@ teco_delete_words(teco_int_t n) * -V * [n]:V -> Success|Failure * - * Deletes the next words until the end of the + * Deletes the next words until the beginning of the * n'th word after the current one. - * If is negative, deletes up to end of the - * n'th word before the current one. + * If is negative, deletes up to the beginning of the + * word, <-n> words before the current one. + * \(lq-V\(rq in the middle of a word deletes until the beginning + * of the word. * If is omitted, 1 or -1 is implied depending on the * sign prefix. * diff --git a/src/symbols-extract.tes b/src/symbols-extract.tes index 1ab6667..9a8a270 100755 --- a/src/symbols-extract.tes +++ b/src/symbols-extract.tes @@ -15,7 +15,7 @@ EMQ[$SCITECOPATH]/string.tes LR 0X#ou 2LR 0X#in HK !* copy all defines in input file beginning with prefix *! -EBN#in EF +EBN#in EF !* sort all defines *! Ga ZJB 0,.M[qsort] J @@ -37,7 +37,7 @@ I/* static const teco_symbol_entry_t entries[] = {^J < - .,W.Xa 0KK + .,LR.Xa 0KK I#ifdef Qa^J^I{"Qa", Qa},^J#endif^J .-Z;> I}; diff --git a/tests/testsuite.at b/tests/testsuite.at index 1c42fe9..c76c4c5 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -114,14 +114,15 @@ AT_CHECK([$SCITECO -e "@I/1^J2^J3/J 2^QC :^Q-3\"N(0/0)'"], 0, ignore, ignore) AT_CLEANUP AT_SETUP([Moving by words]) -AT_CHECK([$SCITECO -e "Z= 3J 2W .-11\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "Z= 3J 2W .-18\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@I/foo ^J bar/ JW .-6\"N(0/0)'"], 0, ignore, ignore) # FIXME: Sooner or later, there will be a shortcut for -W. -AT_CHECK([$SCITECO -e "Z-4J -2W .-17\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "Z-4J -3W .-12\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) AT_CLEANUP AT_SETUP([Deleting words]) -AT_CHECK([$SCITECO -e "3J 2V .-3\"N(0/0)' Z-20\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) -AT_CHECK([$SCITECO -e "Z-4J 2Y .-17\"N(0/0)' Z-21\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "3J 2V .-3\"N(0/0)' Z-13\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "Z-4J 2Y .-18\"N(0/0)' Z-22\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore) AT_CLEANUP AT_SETUP([Searches]) @@ -171,7 +172,7 @@ AT_CLEANUP AT_SETUP([Unicode]) AT_CHECK([$SCITECO -e "8594@I/Здравствуй, мир!/ Z-17\"N(0/0)' J0A-8594\"N(0/0)'"], 0, ignore, ignore) AT_CHECK([$SCITECO -e "8594@^Ua/Здравствуй, мир!/ :Qa-17\"N(0/0)' 0Qa-8594\"N(0/0)'"], 0, ignore, ignore) -AT_CHECK([$SCITECO -e "@I/Здравствуй, мир!/ JW .-10\"N(0/0)' ^E-20\"N(0/0)' 204:EE .-10\"N(0/0)'"], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@I/Здравствуй, мир!/ JW .-12\"N(0/0)' ^E-22\"N(0/0)' 204:EE .-12\"N(0/0)'"], 0, ignore, ignore) AT_CHECK([$SCITECO -e "@I/TEST/ @EW/юникод.txt/"], 0, ignore, ignore) AT_CHECK([test -f юникод.txt], 0, ignore, ignore) AT_CHECK([$SCITECO -e "^^ß-223\"N(0/0) 23Uъ Q[Ъ]-23\"N(0/0)'"], 0, ignore, ignore) -- cgit v1.2.3