diff options
author | Zufu Liu <unknown> | 2021-07-05 16:18:13 +1000 |
---|---|---|
committer | Zufu Liu <unknown> | 2021-07-05 16:18:13 +1000 |
commit | 289314060dd7a44f9844cfc891d1c1d823742f94 (patch) | |
tree | a414ccc8b1136ea24f0d0e6d60bb5cc2f89d6fac | |
parent | c9d4cf4a91ae137870804d244945cb90c1dd37f4 (diff) | |
download | scintilla-mirror-289314060dd7a44f9844cfc891d1c1d823742f94.tar.gz |
Feature [feature-requests:#1408] Check both bytes of potential DBCS character
before treating as a character.
-rw-r--r-- | src/Document.cxx | 13 | ||||
-rw-r--r-- | src/Document.h | 2 | ||||
-rw-r--r-- | test/unit/testDocument.cxx | 67 |
3 files changed, 70 insertions, 12 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index e5022ad64..6dc14238f 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -674,7 +674,7 @@ int Document::LenChar(Sci::Position pos) const noexcept { return utf8status & UTF8MaskWidth; } } else { - if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) { + if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) { return 2; } else { return 1; @@ -709,7 +709,7 @@ bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position } } -// Normalise a position so that it is not halfway through a two byte character. +// Normalise a position so that it is not part way through a multi-byte character. // This can occur in two situations - // When lines are terminated with \r\n pairs which should be treated as one character. // When displaying DBCS text such as Japanese. @@ -760,7 +760,7 @@ Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position // Check from known start of character. while (posCheck < pos) { - const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1; + const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1; if (posCheck + mbsize == pos) { return pos; } else if (posCheck + mbsize > pos) { @@ -825,7 +825,7 @@ Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexc } } else { if (moveDir > 0) { - const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1; + const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1; pos += mbsize; if (pos > cb.Length()) pos = cb.Length(); @@ -1098,6 +1098,11 @@ int Document::DBCSDrawBytes(std::string_view text) const noexcept { } } +bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept { + return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) + && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1)); +} + static constexpr bool IsSpaceOrTab(int ch) noexcept { return ch == ' ' || ch == '\t'; } diff --git a/src/Document.h b/src/Document.h index c40ce2a44..fe27f4936 100644 --- a/src/Document.h +++ b/src/Document.h @@ -330,8 +330,8 @@ public: bool SCI_METHOD IsDBCSLeadByte(char ch) const override; bool IsDBCSLeadByteNoExcept(char ch) const noexcept; bool IsDBCSTrailByteNoExcept(char ch) const noexcept; - bool IsDBCSLeadByteInvalid(char ch) const noexcept; int DBCSDrawBytes(std::string_view text) const noexcept; + bool IsDBCSDualByteAt(Sci::Position pos) const noexcept; int SafeSegment(const char *text, int length, int lengthSegment) const noexcept; EncodingFamily CodePageFamily() const noexcept; diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index e07f99997..c91868165 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -316,25 +316,78 @@ TEST_CASE("Document") { // Can not test case mapping of double byte text as folder available here does not implement this } - SECTION("GetCharacterAndWidth") { + SECTION("GetCharacterAndWidth DBCS") { Document doc(DocumentOption::Default); doc.SetDBCSCodePage(932); REQUIRE(doc.CodePage() == 932); - const Sci::Position length = doc.InsertString(0, "\x84\xff=", 3); - REQUIRE(3 == length); - REQUIRE(3 == doc.Length()); + const Sci::Position length = doc.InsertString(0, "H\x84\xff\x84H", 5); + // This text is invalid in code page 932. + // A reasonable interpretation is as 4 items: 2 characters and 2 character fragments + // The last item is a 2-byte CYRILLIC CAPITAL LETTER ZE character + // H [84] [FF] ZE + REQUIRE(5 == length); + REQUIRE(5 == doc.Length()); Sci::Position width = 0; + // test GetCharacterAndWidth() int ch = doc.GetCharacterAndWidth(0, &width); REQUIRE(width == 1); + REQUIRE(ch == 'H'); + ch = doc.GetCharacterAndWidth(1, &width); + REQUIRE(width == 1); REQUIRE(ch == 0x84); width = 0; - ch = doc.GetCharacterAndWidth(1, &width); + ch = doc.GetCharacterAndWidth(2, &width); REQUIRE(width == 1); REQUIRE(ch == 0xff); width = 0; - ch = doc.GetCharacterAndWidth(2, &width); + ch = doc.GetCharacterAndWidth(3, &width); + REQUIRE(width == 2); + REQUIRE(ch == 0x8448); + // test LenChar() + width = doc.LenChar(0); + REQUIRE(width == 1); + width = doc.LenChar(1); + REQUIRE(width == 1); + width = doc.LenChar(2); REQUIRE(width == 1); - REQUIRE(ch == '='); + width = doc.LenChar(3); + REQUIRE(width == 2); + // test MovePositionOutsideChar() + Sci::Position pos = doc.MovePositionOutsideChar(1, 1); + REQUIRE(pos == 1); + pos = doc.MovePositionOutsideChar(2, 1); + REQUIRE(pos == 2); + pos = doc.MovePositionOutsideChar(3, 1); + REQUIRE(pos == 3); + pos = doc.MovePositionOutsideChar(4, 1); + REQUIRE(pos == 5); + pos = doc.MovePositionOutsideChar(1, -1); + REQUIRE(pos == 1); + pos = doc.MovePositionOutsideChar(2, -1); + REQUIRE(pos == 2); + pos = doc.MovePositionOutsideChar(3, -1); + REQUIRE(pos == 3); + pos = doc.MovePositionOutsideChar(4, -1); + REQUIRE(pos == 3); + // test NextPosition() + pos = doc.NextPosition(0, 1); + REQUIRE(pos == 1); + pos = doc.NextPosition(1, 1); + REQUIRE(pos == 2); + pos = doc.NextPosition(2, 1); + REQUIRE(pos == 3); + pos = doc.NextPosition(3, 1); + REQUIRE(pos == 5); + pos = doc.NextPosition(1, -1); + REQUIRE(pos == 0); + // The next two tests are commented out because the implementation of NextPosition + // cannot yet handle character fragments correctly when moving backwards. + //pos = doc.NextPosition(2, -1); + //REQUIRE(pos == 1); + //pos = doc.NextPosition(3, -1); + //REQUIRE(pos == 2); + pos = doc.NextPosition(5, -1); + REQUIRE(pos == 3); } } |