From 867d22e419afe769f05ad26b61c6ea5ea1432c3c Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <robin.haberkorn@googlemail.com>
Date: Sat, 22 Mar 2025 13:45:28 +0300
Subject: harmonized all word-movement and deletion commands: they move/delete
 until the beginning of words now

* All commands and their documentations were inconsistent.
  * ^W rubbed out to the beginning of words.
  * Shift+Right (fnkeys.tes) moved to the beginning of the next word if
    invoked at the beginning of a word and to the end of the next word otherwise.
  * <W> (and <V> and <Y> by extension) moved to the end of the next word.
  * The cheat sheet would claim that <W> moves to the beginning of the next word.
* Video TECO's <W> command would differ again from everything else.
  With positive arguments, it moved to the beginning of words, while
  with negative it moved to end of words.
  I decided not to copy this behavior.
* It has been decided to adopt a consistent beginning-of-words policy.
  -W therefore differs from Video TECO in moving to the beginning of the
  current or previous word.
* teco_find_words() is now based on parsing the document pointer, instead
  of relying on SCI_WORDENDPOSITION, since the latter cannot actually be
  used to skip strictly non-word characters.
  This requires a constant amount of Scintilla messages but will require fewer
  messages only when moving for more than 3 words.
* The semantics of <W> are therefore now consistent with Vim and Emacs as well.
* Shift+Right/Left is still based on SCI_WORDENDPOSITION, so it's behavior
  differs slightly from <W> for instance at the end of lines, as it will
  stop at linebreaks.
* Unfortunately, these changes will break lots of macros, among others
  the M#rf, M#sp and git.blame macros ("Useful macros" from the wiki).
---
 doc/grosciteco.tes      |   4 +-
 doc/sciteco.7.template  |   5 ++-
 lib/fnkeys.tes          |   4 +-
 lib/opener.tes          |   2 +-
 src/cmdline.c           |   4 ++
 src/core-commands.c     | 109 +++++++++++++++++++++++++++++++++++++++---------
 src/symbols-extract.tes |   4 +-
 tests/testsuite.at      |  11 ++---
 8 files changed, 109 insertions(+), 34 deletions(-)
diff --git a/doc/grosciteco.tes b/doc/grosciteco.tes
index e5be8a9..f1d7830 100755
--- a/doc/grosciteco.tes
+++ b/doc/grosciteco.tes
@@ -135,7 +135,7 @@ EBN[input]
     !cmd.xF!
       L F<
     !cmd.xX!
-      :M#sw .(W).X.w
+      :M#sw .,1,.ESSCI_WORDENDPOSITIONX.w
       Ocmd.xXQ.w
       !cmd.xXsciteco_topic!
         !*
@@ -272,7 +272,7 @@ EBN[input]
   !cmd.C!
     :M#sw 0A-^^u"=
       !* FIXME: This can be CuXXXX_XXXX (decomposed, e.g. for cyrillic й) *!
-      C 16 \U.w  W
+      C 16 \U.w  LR
     |
       .(:M#sa).X.w 0Q[glyphs.Q.w]U.w
     '
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template
index b274715..30f53ba 100644
--- a/doc/sciteco.7.template
+++ b/doc/sciteco.7.template
@@ -503,10 +503,11 @@ Non-empty string arguments
 .br
 (modifier \fIdisabled\fP)
 T};T{
-Rub out last word according to Scintilla's definition of a word
+Rub out to beginning of last word according to Scintilla's definition of a word
 as set by
 .SCITECO_TOPIC SCI_SETWORDCHARS
-.BR SCI_SETWORDCHARS .
+.BR SCI_SETWORDCHARS ,
+analogous to \fBY\fP command.
 T}
 \^;\^;\^;T{
 Miscelleaneous
diff --git a/lib/fnkeys.tes b/lib/fnkeys.tes
index 857c249..922548b 100644
--- a/lib/fnkeys.tes
+++ b/lib/fnkeys.tes
@@ -78,7 +78,7 @@
 1U[LEFT]
 
 @[SLEFT]{
-  0,0,ESWORDSTARTPOSITIONESWORDSTARTPOSITIONU.p
+  1,0,ESWORDSTARTPOSITIONESWORDSTARTPOSITIONU.p
   Q.pESGETCOLUMN,4EJ
   Q.p:-.M#c
 }
@@ -94,7 +94,7 @@
 1U[RIGHT]
 
 @[SRIGHT]{
-  0,0,ESWORDENDPOSITIONESWORDENDPOSITIONU.p
+  0,1,ESWORDENDPOSITIONESWORDENDPOSITIONU.p
   Q.pESGETCOLUMN,4EJ
   Q.p:-.M#c
 }
diff --git a/lib/opener.tes b/lib/opener.tes
index 6a57317..21c118d 100644
--- a/lib/opener.tes
+++ b/lib/opener.tes
@@ -16,7 +16,7 @@
     1U.l 1U.c
     !* +line[,column] *!
     0A-+"=
-      C 0A"D \U.l W 0A-,"= C \U.c ' 0A-10"=L' '
+      C 0A"D \U.l <0A"DC|1;'> 0A-,"= C \U.c ' 0A-10"=L' '
     '
 
     !* filename:line[:column][:] *!
diff --git a/src/cmdline.c b/src/cmdline.c
index dde096d..b3da887 100644
--- a/src/cmdline.c
+++ b/src/cmdline.c
@@ -509,6 +509,10 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *
 	case TECO_CTL_KEY('W'): { /* rubout/reinsert word */
 		teco_interface_popup_clear();
 
+		/*
+		 * NOTE: This must be consistent with teco_find_words():
+		 * Always delete to the beginning of the previous word.
+		 */
 		g_auto(teco_string_t) wchars;
 		wchars.len = teco_interface_ssm(SCI_GETWORDCHARS, 0, 0);
 		wchars.data = g_malloc(wchars.len + 1);
diff --git a/src/core-commands.c b/src/core-commands.c
index 979095b..8cbb4be 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -700,31 +700,97 @@ teco_state_start_back(teco_machine_main_t *ctx, GError **error)
 }
 
 /*
- * FIXME: would be nice to do this with constant amount of
- * editor messages. E.g. by using custom algorithm accessing
- * the internal document buffer.
+ * NOTE: This implementation has a constant/maximum number of Scintilla
+ * messages, compared to using SCI_WORDENDPOSITION.
+ * This pays out only beginning at n > 3, though.
+ * But most importantly SCI_WORDENDPOSITION(p, FALSE) does not actually skip
+ * over all non-word characters.
  */
 static gboolean
 teco_find_words(gsize *pos, teco_int_t n)
 {
+	if (!n)
+		return TRUE;
+
+	g_auto(teco_string_t) wchars;
+	wchars.len = teco_interface_ssm(SCI_GETWORDCHARS, 0, 0);
+	wchars.data = g_malloc(wchars.len + 1);
+	teco_interface_ssm(SCI_GETWORDCHARS, 0, (sptr_t)wchars.data);
+	wchars.data[wchars.len] = '\0';
+
+	sptr_t gap = teco_interface_ssm(SCI_GETGAPPOSITION, 0, 0);
+
 	if (n > 0) {
+		/* scan forward */
+		gsize len = teco_interface_ssm(SCI_GETLENGTH, 0, 0);
+		gsize range_len = gap > *pos ? gap - *pos : len - *pos;
+		if (!range_len)
+			return FALSE;
+		const gchar *buffer, *p;
+		p = buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, *pos, range_len);
+
 		while (n--) {
-			sptr_t old_pos = *pos;
-			*pos = teco_interface_ssm(SCI_WORDENDPOSITION, *pos, FALSE);
-			*pos = teco_interface_ssm(SCI_WORDENDPOSITION, *pos, TRUE);
-			if (*pos == old_pos)
-				return FALSE;
+			gboolean skip_word = TRUE;
+
+			for (;;) {
+				if (*pos == len)
+					/* end of document */
+					return n == 0;
+				if (p-buffer >= range_len) {
+					g_assert(*pos == gap);
+					range_len = len - gap;
+					p = buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, gap, range_len);
+				}
+				/*
+				 * FIXME: Is this safe or do we have to look up Unicode code points?
+				 */
+				if ((!teco_string_contains(&wchars, *p)) == skip_word) {
+					if (!skip_word)
+						break;
+					skip_word = !skip_word;
+					continue;
+				}
+				(*pos)++;
+				p++;
+			}
 		}
 
 		return TRUE;
 	}
 
+	/* scan backwards */
+	gsize range_len = gap < *pos ? *pos - gap : *pos;
+	if (!range_len)
+		return FALSE;
+	const gchar *buffer, *p;
+	buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, *pos - range_len, range_len);
+	p = buffer+range_len;
+
 	while (n++) {
-		sptr_t old_pos = *pos;
-		*pos = teco_interface_ssm(SCI_WORDSTARTPOSITION, *pos, TRUE);
-		*pos = teco_interface_ssm(SCI_WORDSTARTPOSITION, *pos, FALSE);
-		if (*pos == old_pos)
-			return FALSE;
+		gboolean skip_word = FALSE;
+
+		for (;;) {
+			if (*pos == 0)
+				/* beginning of document */
+				return n == 0;
+			if (p == buffer) {
+				g_assert(*pos == gap);
+				range_len = *pos;
+				buffer = (const gchar *)teco_interface_ssm(SCI_GETRANGEPOINTER, 0, range_len);
+				p = buffer+range_len;
+			}
+			/*
+			 * FIXME: Is this safe or do we have to look up Unicode code points?
+			 */
+			if ((!teco_string_contains(&wchars, p[-1])) == skip_word) {
+				if (skip_word)
+					break;
+				skip_word = !skip_word;
+				continue;
+			}
+			(*pos)--;
+			p--;
+		}
 	}
 
 	return TRUE;
@@ -738,8 +804,8 @@ teco_find_words(gsize *pos, teco_int_t n)
  * Move dot <n> words forward.
  *   - If <n> is positive, dot is positioned at the beginning
  *     of the word <n> words after the current one.
- *   - If <n> is negative, dot is positioned at the end
- *     of the word <n> words before the current one.
+ *   - If <n> is negative, dot is positioned at the beginning
+ *     of the word, <-n> words before the current one.
  *   - If <n> is zero, dot is not moved.
  *
  * \(lqW\(rq uses Scintilla's definition of a word as
@@ -747,8 +813,9 @@ teco_find_words(gsize *pos, teco_int_t n)
  * .B SCI_SETWORDCHARS
  * message.
  *
- * Otherwise, the command's behaviour is analogous to
- * the \(lqC\(rq command.
+ * If the requested word would lie beyond the range of the
+ * buffer, the command yields an error.
+ * If colon-modified it instead returns a condition code.
  */
 static void
 teco_state_start_word(teco_machine_main_t *ctx, GError **error)
@@ -805,10 +872,12 @@ teco_delete_words(teco_int_t n)
  * -V
  * [n]:V -> Success|Failure
  *
- * Deletes the next <n> words until the end of the
+ * Deletes the next <n> words until the beginning of the
  * n'th word after the current one.
- * If <n> is negative, deletes up to end of the
- * n'th word before the current one.
+ * If <n> is negative, deletes up to the beginning of the
+ * word, <-n> words before the current one.
+ * \(lq-V\(rq in the middle of a word deletes until the beginning
+ * of the word.
  * If <n> is omitted, 1 or -1 is implied depending on the
  * sign prefix.
  *
diff --git a/src/symbols-extract.tes b/src/symbols-extract.tes
index 1ab6667..9a8a270 100755
--- a/src/symbols-extract.tes
+++ b/src/symbols-extract.tes
@@ -15,7 +15,7 @@ EMQ[$SCITECOPATH]/string.tes
 LR 0X#ou 2LR 0X#in HK
 
 !* copy all defines in input file beginning with prefix *!
-EBN#in <S#defineS[Q[getopt.p]]; -SS :Xa> EF
+EBN#in <S#defineS[[Q[getopt.p]]M ]; 1:Xa 10:a> EF
 
 !* sort all defines *!
 Ga ZJB 0,.M[qsort] J
@@ -37,7 +37,7 @@ I/*
 
 static const teco_symbol_entry_t entries[] = {^J
 <
-  .,W.Xa 0KK
+  .,LR.Xa 0KK
   I#ifdef Qa^J^I{"Qa", Qa},^J#endif^J
 .-Z;>
 I};
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 1c42fe9..c76c4c5 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -114,14 +114,15 @@ AT_CHECK([$SCITECO -e "@I/1^J2^J3/J 2^QC :^Q-3\"N(0/0)'"], 0, ignore, ignore)
 AT_CLEANUP
 
 AT_SETUP([Moving by words])
-AT_CHECK([$SCITECO -e "Z= 3J 2W .-11\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "Z= 3J 2W .-18\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "@I/foo ^J bar/ JW .-6\"N(0/0)'"], 0, ignore, ignore)
 # FIXME: Sooner or later, there will be a shortcut for -W.
-AT_CHECK([$SCITECO -e "Z-4J -2W .-17\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "Z-4J -3W .-12\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
 AT_CLEANUP
 
 AT_SETUP([Deleting words])
-AT_CHECK([$SCITECO -e "3J 2V .-3\"N(0/0)' Z-20\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
-AT_CHECK([$SCITECO -e "Z-4J 2Y .-17\"N(0/0)' Z-21\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "3J 2V .-3\"N(0/0)' Z-13\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "Z-4J 2Y .-18\"N(0/0)' Z-22\"N(0/0)'" "$WORDS_EXAMPLE"], 0, ignore, ignore)
 AT_CLEANUP
 
 AT_SETUP([Searches])
@@ -171,7 +172,7 @@ AT_CLEANUP
 AT_SETUP([Unicode])
 AT_CHECK([$SCITECO -e "8594@I/Здравствуй, мир!/ Z-17\"N(0/0)' J0A-8594\"N(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "8594@^Ua/Здравствуй, мир!/ :Qa-17\"N(0/0)' 0Qa-8594\"N(0/0)'"], 0, ignore, ignore)
-AT_CHECK([$SCITECO -e "@I/Здравствуй, мир!/ JW .-10\"N(0/0)' ^E-20\"N(0/0)' 204:EE .-10\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "@I/Здравствуй, мир!/ JW .-12\"N(0/0)' ^E-22\"N(0/0)' 204:EE .-12\"N(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "@I/TEST/ @EW/юникод.txt/"], 0, ignore, ignore)
 AT_CHECK([test -f юникод.txt], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "^^ß-223\"N(0/0) 23Uъ Q[Ъ]-23\"N(0/0)'"], 0, ignore, ignore)
-- 
cgit v1.2.3