diff options
author | Neil <nyamatongwe@gmail.com> | 2018-07-10 15:06:50 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2018-07-10 15:06:50 +1000 |
commit | 56e20ea0283d8018dee48d736ba9dfef3c84dc3f (patch) | |
tree | 21bdb500dfc092fadecb123b87e9799a2c46f6a9 /test | |
parent | d27cbe587930d13d3f1802b271d0d13e7e3c6e38 (diff) | |
download | scintilla-mirror-56e20ea0283d8018dee48d736ba9dfef3c84dc3f.tar.gz |
Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added.
Diffstat (limited to 'test')
-rw-r--r-- | test/simpleTests.py | 95 | ||||
-rw-r--r-- | test/unit/testCellBuffer.cxx | 288 |
2 files changed, 383 insertions, 0 deletions
diff --git a/test/simpleTests.py b/test/simpleTests.py index 3ff283dad..b1e8efdb7 100644 --- a/test/simpleTests.py +++ b/test/simpleTests.py @@ -1631,6 +1631,76 @@ class TestStyleAttributes(unittest.TestCase): self.ed.StyleSetHotSpot(self.ed.STYLE_DEFAULT, 1) self.assertEquals(self.ed.StyleGetHotSpot(self.ed.STYLE_DEFAULT), 1) +class TestIndices(unittest.TestCase): + def setUp(self): + self.xite = Xite.xiteFrame + self.ed = self.xite.ed + self.ed.ClearAll() + self.ed.EmptyUndoBuffer() + self.ed.SetCodePage(65001) + # Text includes one non-BMP character + t = "aå\U00010348flﬔ-\n" + self.tv = t.encode("UTF-8") + + def tearDown(self): + self.ed.SetCodePage(0) + + def testAllocation(self): + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_UTF32) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testUTF32(self): + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.SetContents(self.tv) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testUTF16(self): + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + t = "aå\U00010348flﬔ-" + tv = t.encode("UTF-8") + self.ed.SetContents(self.tv) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testBoth(self): + # Set text before turning indices on + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.SetContents(self.tv) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8) + # Test the inverse: position->line + self.assertEquals(self.ed.LineFromIndexPosition(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.LineFromIndexPosition(7, self.ed.SC_LINECHARACTERINDEX_UTF32), 1) + self.assertEquals(self.ed.LineFromIndexPosition(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.LineFromIndexPosition(8, self.ed.SC_LINECHARACTERINDEX_UTF16), 1) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testMaintenance(self): + # Set text after turning indices on + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.ed.SetContents(self.tv) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + class TestCharacterNavigation(unittest.TestCase): def setUp(self): self.xite = Xite.xiteFrame @@ -1677,6 +1747,31 @@ class TestCharacterNavigation(unittest.TestCase): self.assert_(after < previous) previous = after + def testRelativeNonBOM(self): + # \x61 \xF0\x90\x8D\x88 \xef\xac\x82 \xef\xac\x94 \x2d + t = "a\U00010348flﬔ-" + tv = t.encode("UTF-8") + self.ed.SetContents(tv) + self.assertEquals(self.ed.PositionRelative(1, 2), 8) + self.assertEquals(self.ed.CountCharacters(1, 8), 2) + self.assertEquals(self.ed.CountCodeUnits(1, 8), 3) + self.assertEquals(self.ed.PositionRelative(8, -2), 1) + self.assertEquals(self.ed.PositionRelativeCodeUnits(8, -3), 1) + pos = 0 + previous = 0 + for i in range(1, len(t)): + after = self.ed.PositionRelative(pos, i) + self.assert_(after > pos) + self.assert_(after > previous) + previous = after + pos = len(t) + previous = pos + for i in range(1, len(t)-1): + after = self.ed.PositionRelative(pos, -i) + self.assert_(after < pos) + self.assert_(after <= previous) + previous = after + def testLineEnd(self): t = "a\r\nb\nc" tv = t.encode("UTF-8") diff --git a/test/unit/testCellBuffer.cxx b/test/unit/testCellBuffer.cxx index 067fa4bc1..e6e486e58 100644 --- a/test/unit/testCellBuffer.cxx +++ b/test/unit/testCellBuffer.cxx @@ -10,6 +10,7 @@ #include "Platform.h" +#include "Scintilla.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" @@ -145,3 +146,290 @@ TEST_CASE("CellBuffer") { } } + +TEST_CASE("CharacterIndex") { + + CellBuffer cb(true, false); + + SECTION("Setup") { + REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0); + cb.SetUTF8Substance(true); + + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16); + REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_UTF16); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0); + + cb.ReleaseLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16); + REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE); + } + + SECTION("Insertion") { + cb.SetUTF8Substance(true); + + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + cb.InsertString(0, "a", 1, startSequence); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1); + + const char *hwair = "\xF0\x90\x8D\x88"; + cb.InsertString(0, hwair, strlen(hwair), startSequence); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + } + + SECTION("Deletion") { + cb.SetUTF8Substance(true); + + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + const char *hwair = "a\xF0\x90\x8D\x88z"; + cb.InsertString(0, hwair, strlen(hwair), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + + cb.DeleteChars(5, 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + + cb.DeleteChars(1, 4, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1); + } + + SECTION("Insert Complex") { + cb.SetUTF8Substance(true); + cb.SetLineEndTypes(1); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 3 lines of text containing 8 bytes + const char *data = "a\n\xF0\x90\x8D\x88\nz"; + cb.InsertString(0, data, strlen(data), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 6); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 5); + + // Insert a new line at end -> "a\n\xF0\x90\x8D\x88\nz\n" 4 lines + // Last line empty + cb.InsertString(strlen(data), "\n", 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 7); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 6); + + // Insert a new line before end -> "a\n\xF0\x90\x8D\x88\nz\n\n" 5 lines + cb.InsertString(strlen(data), "\n", 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 8); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 8); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 7); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 7); + + // Insert a valid 3-byte UTF-8 character at start -> + // "\xE2\x82\xACa\n\xF0\x90\x8D\x88\nz\n\n" 5 lines + + const char *euro = "\xE2\x82\xAC"; + cb.InsertString(0, euro, strlen(euro), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 8); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 9); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 9); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 8); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 8); + + // Insert a lone lead byte implying a 3 byte character at start of line 2 -> + // "\xE2\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines + // Should be treated as a single byte character + + const char *lead = "\xEF"; + cb.InsertString(5, lead, strlen(lead), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 9); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 8); + + // Insert an ASCII lead byte inside the 3-byte initial character -> + // "\xE2!\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines + // It should b treated as a single character and should cause the + // byte before and the 2 bytes after also be each treated as singles + // so 3 more characters on line 0. + + const char *ascii = "!"; + cb.InsertString(1, ascii, strlen(ascii), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 6); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 10); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 9); + + // Insert a NEL after the '!' to trigger the utf8 line end case -> + // "\xE2!\xC2\x85 \x82\xACa\n \EF\xF0\x90\x8D\x88\n z\n\n" 5 lines + + const char *nel = "\xC2\x85"; + cb.InsertString(2, nel, strlen(nel), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 11); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 7); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 10); + } + + SECTION("Delete Multiple lines") { + cb.SetUTF8Substance(true); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 3 lines of text containing 8 bytes + const char *data = "a\n\xF0\x90\x8D\x88\nz\nc"; + cb.InsertString(0, data, strlen(data), startSequence); + + // Delete first 2 new lines -> "az\nc" + cb.DeleteChars(1, strlen(data) - 4, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + } + + SECTION("Delete Complex") { + cb.SetUTF8Substance(true); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 3 lines of text containing 8 bytes + const char *data = "a\n\xF0\x90\x8D\x88\nz"; + cb.InsertString(0, data, strlen(data), startSequence); + + // Delete lead byte from character on line 1 -> + // "a\n\x90\x8D\x88\nz" + // line 1 becomes 4 single byte characters + cb.DeleteChars(2, 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7); + + // Delete first new line -> + // "a\x90\x8D\x88\nz" + // Only 2 lines with line 0 containing 5 single byte characters + cb.DeleteChars(1, 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 5); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6); + + // Restore lead byte from character on line 0 making a 4-byte character -> + // "a\xF0\x90\x8D\x88\nz" + + const char *lead4 = "\xF0"; + cb.InsertString(1, lead4, strlen(lead4), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + } + + SECTION("Insert separates new line bytes") { + cb.SetUTF8Substance(true); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 2 lines of text containing 4 bytes + const char *data = "a\r\nb"; + cb.InsertString(0, data, strlen(data), startSequence); + + // 3 lines of text containing 5 bytes -> + // "a\r!\nb" + const char *ascii = "!"; + cb.InsertString(2, ascii, strlen(ascii), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 5); + } +} |