aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests/testsuite.at
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-11 14:30:24 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-11 16:14:27 +0200
commit2a050759ab621b87d0782cc8235907a1757b46cc (patch)
treecde0c666146f833fc948a13c22056061f62d2619 /tests/testsuite.at
parent68578072bfaf6054a96bb6bcedfccb6e56a508fe (diff)
downloadsciteco-2a050759ab621b87d0782cc8235907a1757b46cc.tar.gz
fixed searches in single-byte encoded documents
* while code is guaranteed to be in valid UTF-8, this cannot be said about the result of string building. * The search pattern can end up with invalid Unicode bytes even when searching on UTF-8 buffers, e.g. if ^EQq inserts garbage. There are currently no checks. * When searching on a raw buffer, it must be possible to search for arbitrary bytes (^EUq). Since teco_pattern2regexp() was always expecting clean UTF-8 input, this would sometimes skip over too many bytes and could even crash. * Instead, teco_pattern2regexp() now takes the <S> target codepage into account.
Diffstat (limited to 'tests/testsuite.at')
-rw-r--r--tests/testsuite.at9
1 files changed, 1 insertions, 8 deletions
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 0733d2a..0c7612a 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -85,6 +85,7 @@ AT_CHECK([$SCITECO -e "@EQa//0EE 1U*0EE 0:@EUa/f^@^@/ :Qa-4\"N(0/0)' Ga Z-4\"N(0
AT_CHECK([$SCITECO -e "0EE 129@I// -A-129\"N(0/0)' HXa @EQa// EE\"N(0/0)'"], 0, ignore, ignore)
AT_CHECK([$SCITECO -8e "129@:^Ua// 0Qa-129\"N(0/0)'"], 0, ignore, ignore)
AT_CHECK([$SCITECO -e "1EE 167Ua @I/^EUa/ .-1\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -8e "194Ua Qa@I//J :@S/^EUa/\"F(0/0)'"], 0, ignore, ignore)
AT_CLEANUP
AT_SETUP([Unicode])
@@ -203,11 +204,3 @@ AT_SKIP_IF([case $host in *-*-*bsd* | *-*-darwin*) true;; *) false;; esac])
AT_CHECK([$SCITECO -e "@^Um{U.a Q.a-100000\"<%.aMm'} 0Mm"], 0, ignore, ignore)
AT_XFAIL_IF(true)
AT_CLEANUP
-
-AT_SETUP([Unicode glitches])
-# While TECO code must always be UTF-8, strings after string building
-# can be in single-byte encodings as well.
-# It must be possible to search for single bytes in single-byte encodings.
-AT_CHECK([$SCITECO -8e "164Ua Ga@I//J :@S/^EUa/\"F(0/0)'"], 0, ignore, ignore)
-AT_XFAIL_IF(true)
-AT_CLEANUP