aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/Document.cxx13
-rw-r--r--src/Document.h2
-rw-r--r--test/unit/testDocument.cxx67
3 files changed, 70 insertions, 12 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index e5022ad64..6dc14238f 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -674,7 +674,7 @@ int Document::LenChar(Sci::Position pos) const noexcept {
return utf8status & UTF8MaskWidth;
}
} else {
- if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) {
+ if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) {
return 2;
} else {
return 1;
@@ -709,7 +709,7 @@ bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position
}
}
-// Normalise a position so that it is not halfway through a two byte character.
+// Normalise a position so that it is not part way through a multi-byte character.
// This can occur in two situations -
// When lines are terminated with \r\n pairs which should be treated as one character.
// When displaying DBCS text such as Japanese.
@@ -760,7 +760,7 @@ Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position
// Check from known start of character.
while (posCheck < pos) {
- const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1;
+ const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1;
if (posCheck + mbsize == pos) {
return pos;
} else if (posCheck + mbsize > pos) {
@@ -825,7 +825,7 @@ Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexc
}
} else {
if (moveDir > 0) {
- const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
+ const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1;
pos += mbsize;
if (pos > cb.Length())
pos = cb.Length();
@@ -1098,6 +1098,11 @@ int Document::DBCSDrawBytes(std::string_view text) const noexcept {
}
}
+bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept {
+ return IsDBCSLeadByteNoExcept(cb.CharAt(pos))
+ && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1));
+}
+
static constexpr bool IsSpaceOrTab(int ch) noexcept {
return ch == ' ' || ch == '\t';
}
diff --git a/src/Document.h b/src/Document.h
index c40ce2a44..fe27f4936 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -330,8 +330,8 @@ public:
bool SCI_METHOD IsDBCSLeadByte(char ch) const override;
bool IsDBCSLeadByteNoExcept(char ch) const noexcept;
bool IsDBCSTrailByteNoExcept(char ch) const noexcept;
- bool IsDBCSLeadByteInvalid(char ch) const noexcept;
int DBCSDrawBytes(std::string_view text) const noexcept;
+ bool IsDBCSDualByteAt(Sci::Position pos) const noexcept;
int SafeSegment(const char *text, int length, int lengthSegment) const noexcept;
EncodingFamily CodePageFamily() const noexcept;
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx
index e07f99997..c91868165 100644
--- a/test/unit/testDocument.cxx
+++ b/test/unit/testDocument.cxx
@@ -316,25 +316,78 @@ TEST_CASE("Document") {
// Can not test case mapping of double byte text as folder available here does not implement this
}
- SECTION("GetCharacterAndWidth") {
+ SECTION("GetCharacterAndWidth DBCS") {
Document doc(DocumentOption::Default);
doc.SetDBCSCodePage(932);
REQUIRE(doc.CodePage() == 932);
- const Sci::Position length = doc.InsertString(0, "\x84\xff=", 3);
- REQUIRE(3 == length);
- REQUIRE(3 == doc.Length());
+ const Sci::Position length = doc.InsertString(0, "H\x84\xff\x84H", 5);
+ // This text is invalid in code page 932.
+ // A reasonable interpretation is as 4 items: 2 characters and 2 character fragments
+ // The last item is a 2-byte CYRILLIC CAPITAL LETTER ZE character
+ // H [84] [FF] ZE
+ REQUIRE(5 == length);
+ REQUIRE(5 == doc.Length());
Sci::Position width = 0;
+ // test GetCharacterAndWidth()
int ch = doc.GetCharacterAndWidth(0, &width);
REQUIRE(width == 1);
+ REQUIRE(ch == 'H');
+ ch = doc.GetCharacterAndWidth(1, &width);
+ REQUIRE(width == 1);
REQUIRE(ch == 0x84);
width = 0;
- ch = doc.GetCharacterAndWidth(1, &width);
+ ch = doc.GetCharacterAndWidth(2, &width);
REQUIRE(width == 1);
REQUIRE(ch == 0xff);
width = 0;
- ch = doc.GetCharacterAndWidth(2, &width);
+ ch = doc.GetCharacterAndWidth(3, &width);
+ REQUIRE(width == 2);
+ REQUIRE(ch == 0x8448);
+ // test LenChar()
+ width = doc.LenChar(0);
+ REQUIRE(width == 1);
+ width = doc.LenChar(1);
+ REQUIRE(width == 1);
+ width = doc.LenChar(2);
REQUIRE(width == 1);
- REQUIRE(ch == '=');
+ width = doc.LenChar(3);
+ REQUIRE(width == 2);
+ // test MovePositionOutsideChar()
+ Sci::Position pos = doc.MovePositionOutsideChar(1, 1);
+ REQUIRE(pos == 1);
+ pos = doc.MovePositionOutsideChar(2, 1);
+ REQUIRE(pos == 2);
+ pos = doc.MovePositionOutsideChar(3, 1);
+ REQUIRE(pos == 3);
+ pos = doc.MovePositionOutsideChar(4, 1);
+ REQUIRE(pos == 5);
+ pos = doc.MovePositionOutsideChar(1, -1);
+ REQUIRE(pos == 1);
+ pos = doc.MovePositionOutsideChar(2, -1);
+ REQUIRE(pos == 2);
+ pos = doc.MovePositionOutsideChar(3, -1);
+ REQUIRE(pos == 3);
+ pos = doc.MovePositionOutsideChar(4, -1);
+ REQUIRE(pos == 3);
+ // test NextPosition()
+ pos = doc.NextPosition(0, 1);
+ REQUIRE(pos == 1);
+ pos = doc.NextPosition(1, 1);
+ REQUIRE(pos == 2);
+ pos = doc.NextPosition(2, 1);
+ REQUIRE(pos == 3);
+ pos = doc.NextPosition(3, 1);
+ REQUIRE(pos == 5);
+ pos = doc.NextPosition(1, -1);
+ REQUIRE(pos == 0);
+ // The next two tests are commented out because the implementation of NextPosition
+ // cannot yet handle character fragments correctly when moving backwards.
+ //pos = doc.NextPosition(2, -1);
+ //REQUIRE(pos == 1);
+ //pos = doc.NextPosition(3, -1);
+ //REQUIRE(pos == 2);
+ pos = doc.NextPosition(5, -1);
+ REQUIRE(pos == 3);
}
}