diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2025-06-01 02:38:25 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2025-06-01 03:06:13 +0300 |
commit | 442268285a5f8b1d53052b6c7b0566d9200e71c7 (patch) | |
tree | 90fb62e9157445aebc2253ece0ec96b0934d1932 | |
parent | 6e3da17a2fae11af9ae00d9b59bd0d752022e16b (diff) | |
download | sciteco-442268285a5f8b1d53052b6c7b0566d9200e71c7.tar.gz |
<nA> and <nQq> now return -1 in case the index n is out of range
* The old behavior of throwing an error was inherited from Video TECO.
* The command is now more similar to TECO-11.
* Since -1 is taken, invalid and incomplete UTF-8 byte sequences
are now reported as -2/-3.
I wasn't really able to provoke -3, though.
-rw-r--r-- | src/core-commands.c | 19 | ||||
-rw-r--r-- | src/qreg-commands.c | 12 | ||||
-rw-r--r-- | src/qreg.c | 15 | ||||
-rw-r--r-- | src/view.c | 9 | ||||
-rw-r--r-- | tests/testsuite.at | 5 |
5 files changed, 27 insertions, 33 deletions
diff --git a/src/core-commands.c b/src/core-commands.c index bb731a1..d2abe79 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -574,8 +574,6 @@ teco_state_start_print(teco_machine_main_t *ctx, GError **error) * This can be an ASCII <code> or Unicode codepoint * depending on Scintilla's encoding of the current * buffer. - * Invalid Unicode byte sequences are reported as - * -1 or -2. * * - If <n> is 0, return the <code> of the character * pointed to by dot. @@ -586,12 +584,11 @@ teco_state_start_print(teco_machine_main_t *ctx, GError **error) * - If <n> is omitted, the sign prefix is implied. * * If the position of the queried character is off-page, - * the command will yield an error. - * + * the command will return -1. * If the document is encoded as UTF-8 and there is - * an incomplete sequence at the requested position, - * -1 is returned. - * All other invalid Unicode sequences are returned as -2. + * an invalid byte sequence at the requested position, + * -2 is returned. + * Incomplete byte sequences are returned as -3. */ static void teco_state_start_get(teco_machine_main_t *ctx, GError **error) @@ -604,12 +601,8 @@ teco_state_start_get(teco_machine_main_t *ctx, GError **error) gssize get_pos = teco_interface_glyphs2bytes_relative(pos, v); sptr_t len = teco_interface_ssm(SCI_GETLENGTH, 0, 0); - if (get_pos < 0 || get_pos == len) { - teco_error_range_set(error, "A"); - return; - } - - teco_expressions_push(teco_interface_get_character(get_pos, len)); + teco_expressions_push(get_pos < 0 || get_pos == len + ? -1 : teco_interface_get_character(get_pos, len)); } static teco_state_t * diff --git a/src/qreg-commands.c b/src/qreg-commands.c index 51f9149..a3caad0 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -300,21 +300,21 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, /*$ Q Qq query * Qq -> n -- Query Q-Register existence, its integer or string characters * -Qq -> -n - * <position>Qq -> character + * <position>Qq -> code * :Qq -> -1 | size * * Without any arguments, get and return the integer-part of * Q-Register <q>. * - * With one argument, return the <character> code at <position> + * With one argument, return the character <code> at <position> * from the string-part of Q-Register <q>. * Positions are handled like buffer positions \(em they * begin at 0 up to the length of the string minus 1. - * An error is thrown for invalid positions. + * -1 is returned for invalid positions. * If <q> is encoded as UTF-8 and there is - * an incomplete sequence at the requested position, - * -1 is returned. - * All other invalid Unicode sequences are returned as -2. + * an invalid byte sequence at the requested position, + * -2 is returned. + * Incomplete UTF-8 byte sequences are returned as -3. * Both non-colon-modified forms of Q require register <q> * to be defined and fail otherwise. * @@ -239,18 +239,12 @@ teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position, sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); gssize off = teco_view_glyphs2bytes(teco_qreg_view, position); - gboolean ret = off >= 0 && off != len; - if (!ret) - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %" TECO_INT_FORMAT " out of range", position); - /* make sure we still restore the current Q-Register */ - else - *chr = teco_view_get_character(teco_qreg_view, off, len); + *chr = off >= 0 && off != len ? teco_view_get_character(teco_qreg_view, off, len) : -1; if (teco_qreg_current) teco_doc_edit(&teco_qreg_current->string, 0); - return ret; + return TRUE; } static teco_int_t @@ -527,9 +521,8 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position, return FALSE; if (position < 0 || position >= g_utf8_strlen(str.data, str.len)) { - g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, - "Position %" TECO_INT_FORMAT " out of range", position); - return FALSE; + *chr = -1; + return TRUE; } const gchar *p = g_utf8_offset_to_pointer(str.data, position); @@ -628,8 +628,8 @@ teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n) * @param pos The glyph's byte position * @param len The length of the document in bytes * @return The requested codepoint. - * In UTF-8 encoded documents, this might be -1 (incomplete sequence) - * or -2 (invalid byte sequence). + * In UTF-8 encoded documents, this might be -2 (invalid byte sequence) + * or -3 (incomplete sequence). */ teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) @@ -653,12 +653,15 @@ teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) * or repeatedly calling SCI_GETCHARAT. */ teco_view_ssm(ctx, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); + if (!*buf) + return 0; /* * Make sure that the -1/-2 error values are preserved. * The sign bit in UCS-4/UTF-32 is unused, so this will even * suffice if TECO_INTEGER == 32. */ - return *buf ? (gint32)g_utf8_get_char_validated(buf, -1) : 0; + gint32 rc = g_utf8_get_char_validated(buf, -1); + return rc < 0 ? rc-1 : rc; } void diff --git a/tests/testsuite.at b/tests/testsuite.at index dddb1f0..fd93aa4 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -242,6 +242,11 @@ TE_CHECK([[@I/TEST/ @EW/юникод.txt/]], 0, ignore, ignore) AT_CHECK([[test -f юникод.txt]], 0, ignore, ignore) TE_CHECK([[^^ß-223"N(0/0)' 23Uъ Q[Ъ]-23"N(0/0)']], 0, ignore, ignore) TE_CHECK([[@O/метка/ !метка!]], 0, ignore, ignore) + +# Test the "error" return codes of <A>: +TE_CHECK([[0EE 255@I/A/J 65001EE 0A-(-2)"N(0/0)' 1A-^^A"N(0/0)' 2A-(-1)"N(0/0)']], 0, ignore, ignore) +# FIXME: Byte 128 should probably return -3 (incomplete sequence). +TE_CHECK([[@EQa// 0EE 128@I/A/J 65001EE 0Qa-(-2)"N(0/0)' 1Qa-^^A"N(0/0)' 2Qa-(-1)"N(0/0)']], 0, ignore, ignore) AT_CLEANUP AT_SETUP([Automatic EOL normalization]) |