From 7b646db9fbfb71c41c477b01d99e1e1c6c85cef8 Mon Sep 17 00:00:00 2001 From: Zufu Liu Date: Fri, 2 Jul 2021 10:13:21 +1000 Subject: Feature [feature-requests:#1408] Treat valid DBCS lead byte followed by invalid trail byte as single byte. --- doc/ScintillaHistory.html | 4 ++++ src/Document.cxx | 19 +++++++++++++------ test/unit/testDocument.cxx | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index 542fd4eb9..fcfc47827 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -572,6 +572,10 @@ Released 23 June 2021.
  • + In DBCS encodings, treat valid DBCS lead byte followed by invalid trail byte as single byte. + Feature #1408. +
  • +
  • On Win32, when technology is changed, buffering is set to a reasonable value for the technology: on for GDI and off for Direct2D as Direct2D performs its own buffering. Feature #1400. diff --git a/src/Document.cxx b/src/Document.cxx index c3fcf10c3..876e95b4f 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -892,11 +892,13 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); } } else { - if (IsDBCSLeadByteNoExcept(leadByte) && ((position + 1) < LengthNoExcept())) { - return CharacterExtracted::DBCS(leadByte, cb.UCharAt(position + 1)); - } else { - return CharacterExtracted(leadByte, 1); + if (IsDBCSLeadByteNoExcept(leadByte)) { + const unsigned char trailByte = cb.UCharAt(position + 1); + if (!IsDBCSTrailByteInvalid(trailByte)) { + return CharacterExtracted::DBCS(leadByte, trailByte); + } } + return CharacterExtracted(leadByte, 1); } } @@ -1007,8 +1009,13 @@ int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Positio } } else { if (IsDBCSLeadByteNoExcept(leadByte)) { - bytesInCharacter = 2; - character = (leadByte << 8) | cb.UCharAt(position+1); + const unsigned char trailByte = cb.UCharAt(position + 1); + if (!IsDBCSTrailByteInvalid(trailByte)) { + bytesInCharacter = 2; + character = (leadByte << 8) | trailByte; + } else { + character = leadByte; + } } else { character = leadByte; } diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index 14d5fc22f..cc6255caa 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -100,4 +100,24 @@ TEST_CASE("Document") { REQUIRE(location == 1); } + SECTION("GetCharacterAndWidth") { + Document doc(DocumentOption::Default); + doc.SetDBCSCodePage(932); + REQUIRE(doc.CodePage() == 932); + const Sci::Position length = doc.InsertString(0, "\x84\xff=", 3); + REQUIRE(3 == length); + REQUIRE(3 == doc.Length()); + Sci::Position width = 0; + int ch = doc.GetCharacterAndWidth(0, &width); + REQUIRE(width == 1); + REQUIRE(ch == 0x84); + width = 0; + ch = doc.GetCharacterAndWidth(1, &width); + REQUIRE(width == 1); + REQUIRE(ch == 0xff); + width = 0; + ch = doc.GetCharacterAndWidth(2, &width); + REQUIRE(width == 1); + REQUIRE(ch == '='); + } } -- cgit v1.2.3