aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2018-03-20 14:42:44 +1100
committerNeil <nyamatongwe@gmail.com>2018-03-20 14:42:44 +1100
commit76d417ea8a7a87d9c5cd10d21808859b9e3f5a89 (patch)
tree3eab985fcd6037ab0427cdb42e31c994cedc32a8
parentbc3e60c9fb30629649aa1552a739e48a4babca86 (diff)
downloadscintilla-mirror-76d417ea8a7a87d9c5cd10d21808859b9e3f5a89.tar.gz
Add unit tests for UniCoversion.
-rw-r--r--test/unit/UnitTester.vcxproj1
-rw-r--r--test/unit/makefile3
-rw-r--r--test/unit/test.mak3
-rw-r--r--test/unit/testUniConversion.cxx264
-rw-r--r--test/unit/unitTest.cxx2
5 files changed, 270 insertions, 3 deletions
diff --git a/test/unit/UnitTester.vcxproj b/test/unit/UnitTester.vcxproj
index 0554b68ba..d74b691fe 100644
--- a/test/unit/UnitTester.vcxproj
+++ b/test/unit/UnitTester.vcxproj
@@ -152,6 +152,7 @@
<ClCompile Include="..\..\src\ContractionState.cxx" />
<ClCompile Include="..\..\src\Decoration.cxx" />
<ClCompile Include="..\..\src\RunStyles.cxx" />
+ <ClCompile Include="..\..\src\UniConversion.cxx" />
<ClCompile Include="test*.cxx" />
<ClCompile Include="UnitTester.cxx" />
</ItemGroup>
diff --git a/test/unit/makefile b/test/unit/makefile
index b38da46ca..f73dc3f5c 100644
--- a/test/unit/makefile
+++ b/test/unit/makefile
@@ -52,7 +52,8 @@ TESTEDSRC=\
../../src/CharClassify.cxx \
../../src/ContractionState.cxx \
../../src/Decoration.cxx \
- ../../src/RunStyles.cxx
+ ../../src/RunStyles.cxx \
+ ../../src/UniConversion.cxx
TESTS=$(EXE)
diff --git a/test/unit/test.mak b/test/unit/test.mak
index f7adc7396..1e056fcba 100644
--- a/test/unit/test.mak
+++ b/test/unit/test.mak
@@ -17,7 +17,8 @@ TESTEDSRC=\
../../src/CharClassify.cxx \
../../src/ContractionState.cxx \
../../src/Decoration.cxx \
- ../../src/RunStyles.cxx
+ ../../src/RunStyles.cxx \
+ ../../src/UniConversion.cxx
TESTS=$(EXE)
diff --git a/test/unit/testUniConversion.cxx b/test/unit/testUniConversion.cxx
new file mode 100644
index 000000000..16ea1d974
--- /dev/null
+++ b/test/unit/testUniConversion.cxx
@@ -0,0 +1,264 @@
+// Unit Tests for Scintilla internal data structures
+
+#include <cstring>
+
+#include <string>
+#include <algorithm>
+
+#include "Platform.h"
+
+#include "UniConversion.h"
+
+#include "catch.hpp"
+
+using namespace Scintilla;
+
+// Test UniConversion.
+// Use examples from Wikipedia:
+// https://en.wikipedia.org/wiki/UTF-8
+
+TEST_CASE("UTF16Length") {
+
+ SECTION("UTF16Length ASCII") {
+ // Latin Small Letter A
+ const char *s = "a";
+ size_t len = UTF16Length(s, strlen(s));
+ REQUIRE(len == 1U);
+ }
+
+ SECTION("UTF16Length Example1") {
+ // Dollar Sign
+ const char *s = "\x24";
+ size_t len = UTF16Length(s, strlen(s));
+ REQUIRE(len == 1U);
+ }
+
+ SECTION("UTF16Length Example2") {
+ // Cent Sign
+ const char *s = "\xC2\xA2";
+ size_t len = UTF16Length(s, strlen(s));
+ REQUIRE(len == 1U);
+ }
+
+ SECTION("UTF16Length Example3") {
+ // Euro Sign
+ const char *s = "\xE2\x82\xAC";
+ size_t len = UTF16Length(s, strlen(s));
+ REQUIRE(len == 1U);
+ }
+
+ SECTION("UTF16Length Example4") {
+ // Gothic Letter Hwair
+ const char *s = "\xF0\x90\x8D\x88";
+ size_t len = UTF16Length(s, strlen(s));
+ REQUIRE(len == 2U);
+ }
+}
+
+TEST_CASE("UniConversion") {
+
+ // UTF16FromUTF8
+
+ SECTION("UTF16FromUTF8 ASCII") {
+ const char s[] = {'a', 0};
+ wchar_t tbuf[1] = {0};
+ size_t tlen = UTF16FromUTF8(s, 1, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 'a');
+ }
+
+ SECTION("UTF16FromUTF8 Example1") {
+ const char s[] = {'\x24', 0};
+ wchar_t tbuf[1] = {0};
+ size_t tlen = UTF16FromUTF8(s, 1, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 0x24);
+ }
+
+ SECTION("UTF16FromUTF8 Example2") {
+ const char s[] = {'\xC2', '\xA2', 0};
+ wchar_t tbuf[1] = {0};
+ size_t tlen = UTF16FromUTF8(s, 2, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 0xA2);
+ }
+
+ SECTION("UTF16FromUTF8 Example3") {
+ const char s[] = {'\xE2', '\x82', '\xAC', 0};
+ wchar_t tbuf[1] = {0};
+ size_t tlen = UTF16FromUTF8(s, 3, tbuf, 1);;
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 0x20AC);
+ }
+
+ SECTION("UTF16FromUTF8 Example4") {
+ const char s[] = {'\xF0', '\x90', '\x8D', '\x88', 0};
+ wchar_t tbuf[2] = {0, 0};
+ size_t tlen = UTF16FromUTF8(s, 4, tbuf, 2);
+ REQUIRE(tlen == 2U);
+ REQUIRE(tbuf[0] == 0xD800);
+ REQUIRE(tbuf[1] == 0xDF48);
+ }
+
+ // UTF32FromUTF8
+
+ SECTION("UTF32FromUTF8 ASCII") {
+ const char s[] = {'a', 0};
+ unsigned int tbuf[1] = {0};
+ size_t tlen = UTF32FromUTF8(s, 1, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == static_cast<unsigned int>('a'));
+ }
+
+ SECTION("UTF32FromUTF8 Example1") {
+ const char s[] = {'\x24', 0};
+ unsigned int tbuf[1] = {0};
+ size_t tlen = UTF32FromUTF8(s, 1, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 0x24);
+ }
+
+ SECTION("UTF32FromUTF8 Example2") {
+ const char s[] = {'\xC2', '\xA2', 0};
+ unsigned int tbuf[1] = {0};
+ size_t tlen = UTF32FromUTF8(s, 2, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 0xA2);
+ }
+
+ SECTION("UTF32FromUTF8 Example3") {
+ const char s[] = {'\xE2', '\x82', '\xAC', 0};
+ unsigned int tbuf[1] = {0};
+ size_t tlen = UTF32FromUTF8(s, 3, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 0x20AC);
+ }
+
+ SECTION("UTF32FromUTF8 Example4") {
+ const char s[] = {'\xF0', '\x90', '\x8D', '\x88', 0};
+ unsigned int tbuf[1] = {0};
+ size_t tlen = UTF32FromUTF8(s, 4, tbuf, 1);
+ REQUIRE(tlen == 1U);
+ REQUIRE(tbuf[0] == 0x10348);
+ }
+}
+
+TEST_CASE("UTF8Classify") {
+
+ // These tests are supposed to hit every return statement in UTF8Classify once in order
+ // except the last which is hit twice.
+
+ // Single byte
+
+ SECTION("UTF8Classify Simple ASCII") {
+ const char *s = "a";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == 1);
+ }
+
+ SECTION("UTF8Classify Invalid Too large lead") {
+ const char *s = "\xF5";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1|UTF8MaskInvalid));
+ }
+
+ // 4 byte lead
+
+ SECTION("UTF8Classify 4 byte lead, string less than 4 long") {
+ const char *s = "\xF0";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 1FFFF non-character") {
+ const char *s = "\xF0\x9F\xBF\xBF";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (4 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 1 Greater than max Unicode 110000") {
+ // Maximum Unicode value is 10FFFF so 110000 is out of range
+ const char *s = "\xF4\x90\x80\x80";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 4 byte overlong") {
+ const char *s = "\xF0\x80\x80\x80";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 4 byte valid character") {
+ const char *s = "\xF0\x9F\x8C\x90";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == 4);
+ }
+
+ SECTION("UTF8Classify 4 byte bad trails") {
+ const char *s = "\xF0xyz";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ // 3 byte lead
+
+ SECTION("UTF8Classify 3 byte lead, string less than 3 long") {
+ const char *s = "\xEF";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 3 byte lead, overlong") {
+ const char *s = "\xE0\x80\xAF";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 3 byte lead, surrogate") {
+ const char *s = "\xED\xA0\x80";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify FFFE non-character") {
+ const char *s = "\xEF\xBF\xBE";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (3 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify FFFF non-character") {
+ const char *s = "\xEF\xBF\xBF";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (3 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify FDD0 non-character") {
+ const char *s = "\xEF\xB7\x90";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (3 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 3 byte valid character") {
+ const char *s = "\xE2\x82\xAC";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == 3);
+ }
+
+ SECTION("UTF8Classify 3 byte bad trails") {
+ const char *s = "\xE2qq";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ // 2 byte lead
+
+ SECTION("UTF8Classify 2 byte lead, string less than 2 long") {
+ const char *s = "\xD0";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify 2 byte valid character") {
+ const char *s = "\xD0\x80";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == 2);
+ }
+
+ SECTION("UTF8Classify 2 byte lead trail is invalid") {
+ const char *s = "\xD0q";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify Overlong") {
+ const char *s = "\xC0";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+
+ SECTION("UTF8Classify single trail byte") {
+ const char *s = "\x80";
+ REQUIRE(UTF8Classify(reinterpret_cast<const unsigned char *>(s), strlen(s)) == (1 | UTF8MaskInvalid));
+ }
+} \ No newline at end of file
diff --git a/test/unit/unitTest.cxx b/test/unit/unitTest.cxx
index 762f202bc..cdea417db 100644
--- a/test/unit/unitTest.cxx
+++ b/test/unit/unitTest.cxx
@@ -11,6 +11,7 @@
DecorationList
CellBuffer
UnicodeFromUTF8
+ UniConversion
To do:
PerLine *
@@ -20,7 +21,6 @@
Document
RESearch
Selection
- UniConversion
Style
lexlib: