aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/unit/testCellBuffer.cxx
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
committerNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
commit56e20ea0283d8018dee48d736ba9dfef3c84dc3f (patch)
tree21bdb500dfc092fadecb123b87e9799a2c46f6a9 /test/unit/testCellBuffer.cxx
parentd27cbe587930d13d3f1802b271d0d13e7e3c6e38 (diff)
downloadscintilla-mirror-56e20ea0283d8018dee48d736ba9dfef3c84dc3f.tar.gz
Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added.
Diffstat (limited to 'test/unit/testCellBuffer.cxx')
-rw-r--r--test/unit/testCellBuffer.cxx288
1 files changed, 288 insertions, 0 deletions
diff --git a/test/unit/testCellBuffer.cxx b/test/unit/testCellBuffer.cxx
index 067fa4bc1..e6e486e58 100644
--- a/test/unit/testCellBuffer.cxx
+++ b/test/unit/testCellBuffer.cxx
@@ -10,6 +10,7 @@
#include "Platform.h"
+#include "Scintilla.h"
#include "Position.h"
#include "SplitVector.h"
#include "Partitioning.h"
@@ -145,3 +146,290 @@ TEST_CASE("CellBuffer") {
}
}
+
+TEST_CASE("CharacterIndex") {
+
+ CellBuffer cb(true, false);
+
+ SECTION("Setup") {
+ REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0);
+ cb.SetUTF8Substance(true);
+
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16);
+ REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_UTF16);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0);
+
+ cb.ReleaseLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16);
+ REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE);
+ }
+
+ SECTION("Insertion") {
+ cb.SetUTF8Substance(true);
+
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ cb.InsertString(0, "a", 1, startSequence);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1);
+
+ const char *hwair = "\xF0\x90\x8D\x88";
+ cb.InsertString(0, hwair, strlen(hwair), startSequence);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ }
+
+ SECTION("Deletion") {
+ cb.SetUTF8Substance(true);
+
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ const char *hwair = "a\xF0\x90\x8D\x88z";
+ cb.InsertString(0, hwair, strlen(hwair), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+
+ cb.DeleteChars(5, 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+
+ cb.DeleteChars(1, 4, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1);
+ }
+
+ SECTION("Insert Complex") {
+ cb.SetUTF8Substance(true);
+ cb.SetLineEndTypes(1);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 3 lines of text containing 8 bytes
+ const char *data = "a\n\xF0\x90\x8D\x88\nz";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 6);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 5);
+
+ // Insert a new line at end -> "a\n\xF0\x90\x8D\x88\nz\n" 4 lines
+ // Last line empty
+ cb.InsertString(strlen(data), "\n", 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 7);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 6);
+
+ // Insert a new line before end -> "a\n\xF0\x90\x8D\x88\nz\n\n" 5 lines
+ cb.InsertString(strlen(data), "\n", 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 8);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 8);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 7);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 7);
+
+ // Insert a valid 3-byte UTF-8 character at start ->
+ // "\xE2\x82\xACa\n\xF0\x90\x8D\x88\nz\n\n" 5 lines
+
+ const char *euro = "\xE2\x82\xAC";
+ cb.InsertString(0, euro, strlen(euro), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 8);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 9);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 9);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 8);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 8);
+
+ // Insert a lone lead byte implying a 3 byte character at start of line 2 ->
+ // "\xE2\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines
+ // Should be treated as a single byte character
+
+ const char *lead = "\xEF";
+ cb.InsertString(5, lead, strlen(lead), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 9);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 8);
+
+ // Insert an ASCII lead byte inside the 3-byte initial character ->
+ // "\xE2!\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines
+ // It should b treated as a single character and should cause the
+ // byte before and the 2 bytes after also be each treated as singles
+ // so 3 more characters on line 0.
+
+ const char *ascii = "!";
+ cb.InsertString(1, ascii, strlen(ascii), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 6);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 10);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 9);
+
+ // Insert a NEL after the '!' to trigger the utf8 line end case ->
+ // "\xE2!\xC2\x85 \x82\xACa\n \EF\xF0\x90\x8D\x88\n z\n\n" 5 lines
+
+ const char *nel = "\xC2\x85";
+ cb.InsertString(2, nel, strlen(nel), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 11);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 7);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 10);
+ }
+
+ SECTION("Delete Multiple lines") {
+ cb.SetUTF8Substance(true);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 3 lines of text containing 8 bytes
+ const char *data = "a\n\xF0\x90\x8D\x88\nz\nc";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ // Delete first 2 new lines -> "az\nc"
+ cb.DeleteChars(1, strlen(data) - 4, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ }
+
+ SECTION("Delete Complex") {
+ cb.SetUTF8Substance(true);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 3 lines of text containing 8 bytes
+ const char *data = "a\n\xF0\x90\x8D\x88\nz";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ // Delete lead byte from character on line 1 ->
+ // "a\n\x90\x8D\x88\nz"
+ // line 1 becomes 4 single byte characters
+ cb.DeleteChars(2, 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7);
+
+ // Delete first new line ->
+ // "a\x90\x8D\x88\nz"
+ // Only 2 lines with line 0 containing 5 single byte characters
+ cb.DeleteChars(1, 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 5);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6);
+
+ // Restore lead byte from character on line 0 making a 4-byte character ->
+ // "a\xF0\x90\x8D\x88\nz"
+
+ const char *lead4 = "\xF0";
+ cb.InsertString(1, lead4, strlen(lead4), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ }
+
+ SECTION("Insert separates new line bytes") {
+ cb.SetUTF8Substance(true);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 2 lines of text containing 4 bytes
+ const char *data = "a\r\nb";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ // 3 lines of text containing 5 bytes ->
+ // "a\r!\nb"
+ const char *ascii = "!";
+ cb.InsertString(2, ascii, strlen(ascii), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 5);
+ }
+}