2 files changed, 27 insertions, 5 deletions
diff --git a/tests/atlocal.in b/tests/atlocal.in
index 090a604..47137bb 100644
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -13,3 +13,7 @@ SCITECOPATH=@abs_top_srcdir@/lib
 # Glib debug options
 G_SLICE=debug-blocks
 G_ENABLE_DIAGNOSTIC=1
+
+# For the Unicode tests - makes sure that UTF-8 characters
+# are accepted on command lines.
+LC_ALL=C.UTF-8
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 6ea95ab..3e121fb 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -79,11 +79,20 @@ AT_CHECK([$SCITECO -e "[[a 23Ub ]]b Qb\"N(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "[[\$ @FG'..' ]]\$ :Q\$-1Q\$-^^r\"=(0/0)'"], 0, ignore, ignore)
 AT_CLEANUP
 
-AT_SETUP([8-bit cleanlyness])
-AT_CHECK([$SCITECO -e "0:@EUa/f^@^@/ :Qa-4\"N(0/0)' Ga Z= Z-4\"N(0/0)'"], 0, ignore, ignore)
-AT_CHECK([$SCITECO -e "129@I// -A-129\"N(0/0)'"], 0, ignore, ignore)
-AT_CHECK([$SCITECO -e "129@^Ua// 0Qa-129\"N(0/0)'"], 0, ignore, ignore)
-AT_CHECK([$SCITECO -e "@^Ua/^^/ 129:@^Ua// Ma-129\"N(0/0)'"], 0, ignore, ignore)
+AT_SETUP([8-bit cleanliness])
+AT_CHECK([$SCITECO -e "0@I//J 0A\"N(0/0)' :@S/^@/\"F(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "@EQa//0EE 1U*0EE 0:@EUa/f^@^@/ :Qa-4\"N(0/0)' Ga Z-4\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "0EE 129@I// -A-129\"N(0/0)' HXa @EQa// EE\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -8e "129@:^Ua// 0Qa-129\"N(0/0)'"], 0, ignore, ignore)
+# FIXME: This will fail once we have an UTF-8-only parser.
+AT_CHECK([$SCITECO -8e "@:^Ua/^^/ 129:@^Ua// Ma-129\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "1EE 167Ua @I/^EUa/ .-1\"N(0/0)'"], 0, ignore, ignore)
+AT_CLEANUP
+
+AT_SETUP([Unicode])
+AT_CHECK([$SCITECO -e "8594@I/Здравствуй, мир!/ Z-17\"N(0/0)' J0A-8594\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "8594@^Ua/Здравствуй, мир!/ :Qa-17\"N(0/0)' 0Qa-8594\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "@I/Здравствуй, мир!/ JW .-10\"N(0/0)' ^E-20\"N(0/0)' 204:EE .-10\"N(0/0)'"], 0, ignore, ignore)
 AT_CLEANUP
 
 AT_SETUP([Automatic EOL normalization])
@@ -192,3 +201,12 @@ AT_SKIP_IF([case $host in *-*-*bsd* | *-*-darwin*) true;; *) false;; esac])
 AT_CHECK([$SCITECO -e "@^Um{U.a Q.a-100000\"<%.aMm'} 0Mm"], 0, ignore, ignore)
 AT_XFAIL_IF(true)
 AT_CLEANUP
+
+AT_SETUP([Unicode glitches])
+# While TECO code must always be UTF-8, strings after string building
+# can be in single-byte encodings as well.
+# This might already work after introducing the Unicode-aware parser.
+# If not, it should be fixed.
+AT_CHECK([$SCITECO -8e "164Ua Ga@I//J :@S/^EUa/\"F(0/0)'"], 0, ignore, ignore)
+AT_XFAIL_IF(true)
+AT_CLEANUP