<nA> and <nQq> now return -1 in case the index n is out of range

* The old behavior of throwing an error was inherited from Video TECO. * The command is now more similar to TECO-11. * Since -1 is taken, invalid and incomplete UTF-8 byte sequences are now reported as -2/-3. I wasn't really able to provoke -3, though.
author: Robin Haberkorn <robin.haberkorn@googlemail.com> 2025-06-01 02:38:25 +0300
committer: Robin Haberkorn <robin.haberkorn@googlemail.com> 2025-06-01 03:06:13 +0300
commit: 442268285a5f8b1d53052b6c7b0566d9200e71c7 (patch)
tree: 90fb62e9157445aebc2253ece0ec96b0934d1932
parent: 6e3da17a2fae11af9ae00d9b59bd0d752022e16b (diff)
5 files changed, 27 insertions, 33 deletions
diff --git a/src/core-commands.c b/src/core-commands.c
index bb731a1..d2abe79 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -574,8 +574,6 @@ teco_state_start_print(teco_machine_main_t *ctx, GError **error)
  * This can be an ASCII <code> or Unicode codepoint
  * depending on Scintilla's encoding of the current
  * buffer.
- * Invalid Unicode byte sequences are reported as
- * -1 or -2.
  *
  *   - If <n> is 0, return the <code> of the character
  *     pointed to by dot.
@@ -586,12 +584,11 @@ teco_state_start_print(teco_machine_main_t *ctx, GError **error)
  *   - If <n> is omitted, the sign prefix is implied.
  *
  * If the position of the queried character is off-page,
- * the command will yield an error.
- *
+ * the command will return -1.
  * If the document is encoded as UTF-8 and there is
- * an incomplete sequence at the requested position,
- * -1 is returned.
- * All other invalid Unicode sequences are returned as -2.
+ * an invalid byte sequence at the requested position,
+ * -2 is returned.
+ * Incomplete byte sequences are returned as -3.
  */
 static void
 teco_state_start_get(teco_machine_main_t *ctx, GError **error)
@@ -604,12 +601,8 @@ teco_state_start_get(teco_machine_main_t *ctx, GError **error)
 	gssize get_pos = teco_interface_glyphs2bytes_relative(pos, v);
 	sptr_t len = teco_interface_ssm(SCI_GETLENGTH, 0, 0);
 
-	if (get_pos < 0 || get_pos == len) {
-		teco_error_range_set(error, "A");
-		return;
-	}
-
-	teco_expressions_push(teco_interface_get_character(get_pos, len));
+	teco_expressions_push(get_pos < 0 || get_pos == len
+				? -1 : teco_interface_get_character(get_pos, len));
 }
 
 static teco_state_t *
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index 51f9149..a3caad0 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -300,21 +300,21 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
 /*$ Q Qq query
  * Qq -> n -- Query Q-Register existence, its integer or string characters
  * -Qq -> -n
- * <position>Qq -> character
+ * <position>Qq -> code
  * :Qq -> -1 | size
  *
  * Without any arguments, get and return the integer-part of
  * Q-Register <q>.
  *
- * With one argument, return the <character> code at <position>
+ * With one argument, return the character <code> at <position>
  * from the string-part of Q-Register <q>.
  * Positions are handled like buffer positions \(em they
  * begin at 0 up to the length of the string minus 1.
- * An error is thrown for invalid positions.
+ * -1 is returned for invalid positions.
  * If <q> is encoded as UTF-8 and there is
- * an incomplete sequence at the requested position,
- * -1 is returned.
- * All other invalid Unicode sequences are returned as -2.
+ * an invalid byte sequence at the requested position,
+ * -2 is returned.
+ * Incomplete UTF-8 byte sequences are returned as -3.
  * Both non-colon-modified forms of Q require register <q>
  * to be defined and fail otherwise.
  *
diff --git a/src/qreg.c b/src/qreg.c
index 8990210..9695f64 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -239,18 +239,12 @@ teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position,
 	sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
 	gssize off = teco_view_glyphs2bytes(teco_qreg_view, position);
 
-	gboolean ret = off >= 0 && off != len;
-	if (!ret)
-		g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
-		            "Position %" TECO_INT_FORMAT " out of range", position);
-		/* make sure we still restore the current Q-Register */
-	else
-		*chr = teco_view_get_character(teco_qreg_view, off, len);
+	*chr = off >= 0 && off != len ? teco_view_get_character(teco_qreg_view, off, len) : -1;
 
 	if (teco_qreg_current)
 		teco_doc_edit(&teco_qreg_current->string, 0);
 
-	return ret;
+	return TRUE;
 }
 
 static teco_int_t
@@ -527,9 +521,8 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position,
 		return FALSE;
 
 	if (position < 0 || position >= g_utf8_strlen(str.data, str.len)) {
-		g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
-		            "Position %" TECO_INT_FORMAT " out of range", position);
-		return FALSE;
+		*chr = -1;
+		return TRUE;
 	}
 	const gchar *p = g_utf8_offset_to_pointer(str.data, position);
 
diff --git a/src/view.c b/src/view.c
index 71d74e2..e21d53a 100644
--- a/src/view.c
+++ b/src/view.c
@@ -628,8 +628,8 @@ teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n)
  * @param pos The glyph's byte position
  * @param len The length of the document in bytes
  * @return The requested codepoint.
- *   In UTF-8 encoded documents, this might be -1 (incomplete sequence)
- *   or -2 (invalid byte sequence).
+ *   In UTF-8 encoded documents, this might be -2 (invalid byte sequence)
+ *   or -3 (incomplete sequence).
  */
 teco_int_t
 teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len)
@@ -653,12 +653,15 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len)
 	 * or repeatedly calling SCI_GETCHARAT.
 	 */
 	teco_view_ssm(ctx, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range);
+	if (!*buf)
+		return 0;
 	/*
 	 * Make sure that the -1/-2 error values are preserved.
 	 * The sign bit in UCS-4/UTF-32 is unused, so this will even
 	 * suffice if TECO_INTEGER == 32.
 	 */
-	return *buf ? (gint32)g_utf8_get_char_validated(buf, -1) : 0;
+	gint32 rc = g_utf8_get_char_validated(buf, -1);
+	return rc < 0 ? rc-1 : rc;
 }
 
 void
diff --git a/tests/testsuite.at b/tests/testsuite.at
index dddb1f0..fd93aa4 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -242,6 +242,11 @@ TE_CHECK([[@I/TEST/ @EW/юникод.txt/]], 0, ignore, ignore)
 AT_CHECK([[test -f юникод.txt]], 0, ignore, ignore)
 TE_CHECK([[^^ß-223"N(0/0)' 23Uъ Q[Ъ]-23"N(0/0)']], 0, ignore, ignore)
 TE_CHECK([[@O/метка/ !метка!]], 0, ignore, ignore)
+
+# Test the "error" return codes of <A>:
+TE_CHECK([[0EE 255@I/A/J 65001EE 0A-(-2)"N(0/0)' 1A-^^A"N(0/0)' 2A-(-1)"N(0/0)']], 0, ignore, ignore)
+# FIXME: Byte 128 should probably return -3 (incomplete sequence).
+TE_CHECK([[@EQa// 0EE 128@I/A/J 65001EE 0Qa-(-2)"N(0/0)' 1Qa-^^A"N(0/0)' 2Qa-(-1)"N(0/0)']], 0, ignore, ignore)
 AT_CLEANUP
 
 AT_SETUP([Automatic EOL normalization])
author	Robin Haberkorn <robin.haberkorn@googlemail.com>	2025-06-01 02:38:25 +0300
committer	Robin Haberkorn <robin.haberkorn@googlemail.com>	2025-06-01 03:06:13 +0300
commit	442268285a5f8b1d53052b6c7b0566d9200e71c7 (patch)
tree	90fb62e9157445aebc2253ece0ec96b0934d1932
parent	6e3da17a2fae11af9ae00d9b59bd0d752022e16b (diff)