diff options
author | Zufu Liu <unknown> | 2021-07-03 13:21:38 +1000 |
---|---|---|
committer | Zufu Liu <unknown> | 2021-07-03 13:21:38 +1000 |
commit | a2d23bd463e65f532301b682b64cd02b8a57716b (patch) | |
tree | 1d8129cc58281e1134ebf3884273f458bfeebef4 | |
parent | 9cda372c64c8920d2e910825161a8ed882b417b3 (diff) | |
download | scintilla-mirror-a2d23bd463e65f532301b682b64cd02b8a57716b.tar.gz |
Feature [feature-requests:#1408] Simplify code, remove IsDBCSTrailByteInvalid.
Drop temporary test for IsDBCSTrailByteNoExcept.
-rw-r--r-- | src/Document.cxx | 76 | ||||
-rw-r--r-- | src/Document.h | 1 | ||||
-rw-r--r-- | test/unit/testDocument.cxx | 11 |
3 files changed, 13 insertions, 75 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 739b6266a..0ec598bc2 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -985,27 +985,22 @@ Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sc } int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { - int character; int bytesInCharacter = 1; const unsigned char leadByte = cb.UCharAt(position); - if (dbcsCodePage) { + int character = leadByte; + if (dbcsCodePage && !UTF8IsAscii(leadByte)) { if (CpUtf8 == dbcsCodePage) { - if (UTF8IsAscii(leadByte)) { - // Single byte character or invalid - character = leadByte; + const int widthCharBytes = UTF8BytesOfLead[leadByte]; + unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; + for (int b=1; b<widthCharBytes; b++) + charBytes[b] = cb.UCharAt(position+b); + const int utf8status = UTF8Classify(charBytes, widthCharBytes); + if (utf8status & UTF8MaskInvalid) { + // Report as singleton surrogate values which are invalid Unicode + character = 0xDC80 + leadByte; } else { - const int widthCharBytes = UTF8BytesOfLead[leadByte]; - unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; - for (int b=1; b<widthCharBytes; b++) - charBytes[b] = cb.UCharAt(position+b); - const int utf8status = UTF8Classify(charBytes, widthCharBytes); - if (utf8status & UTF8MaskInvalid) { - // Report as singleton surrogate values which are invalid Unicode - character = 0xDC80 + leadByte; - } else { - bytesInCharacter = utf8status & UTF8MaskWidth; - character = UnicodeFromUTF8(charBytes); - } + bytesInCharacter = utf8status & UTF8MaskWidth; + character = UnicodeFromUTF8(charBytes); } } else { if (IsDBCSLeadByteNoExcept(leadByte)) { @@ -1013,15 +1008,9 @@ int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Positio if (IsDBCSTrailByteNoExcept(trailByte)) { bytesInCharacter = 2; character = (leadByte << 8) | trailByte; - } else { - character = leadByte; } - } else { - character = leadByte; } } - } else { - character = leadByte; } if (pWidth) { *pWidth = bytesInCharacter; @@ -1135,51 +1124,12 @@ bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept { return false; } -bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept { - const unsigned char trail = ch; - switch (dbcsCodePage) { - case 932: - // Shift_jis - return - (trail <= 0x3F) || - (trail == 0x7F) || - (trail >= 0xFD); - case 936: - // GBK - return - (trail <= 0x3F) || - (trail == 0x7F) || - (trail == 0xFF); - case 949: - // Korean Wansung KS C-5601-1987 - return - (trail <= 0x40) || - ((trail >= 0x5B) && (trail <= 0x60)) || - ((trail >= 0x7B) && (trail <= 0x80)) || - (trail == 0xFF); - case 950: - // Big5 - return - (trail <= 0x3F) || - ((trail >= 0x7F) && (trail <= 0xA0)) || - (trail == 0xFF); - case 1361: - // Korean Johab KS C-5601-1992 - return - (trail <= 0x30) || - (trail == 0x7F) || - (trail == 0x80) || - (trail == 0xFF); - } - return false; -} - int Document::DBCSDrawBytes(std::string_view text) const noexcept { if (text.length() <= 1) { return static_cast<int>(text.length()); } if (IsDBCSLeadByteNoExcept(text[0])) { - return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2; + return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1; } else { return 1; } diff --git a/src/Document.h b/src/Document.h index 88392c1a2..c40ce2a44 100644 --- a/src/Document.h +++ b/src/Document.h @@ -331,7 +331,6 @@ public: bool IsDBCSLeadByteNoExcept(char ch) const noexcept; bool IsDBCSTrailByteNoExcept(char ch) const noexcept; bool IsDBCSLeadByteInvalid(char ch) const noexcept; - bool IsDBCSTrailByteInvalid(char ch) const noexcept; int DBCSDrawBytes(std::string_view text) const noexcept; int SafeSegment(const char *text, int length, int lengthSegment) const noexcept; EncodingFamily CodePageFamily() const noexcept; diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index cecd14920..c5275d25f 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -121,15 +121,4 @@ TEST_CASE("Document") { REQUIRE(ch == '='); } - SECTION("CheckTrailBytes") { - Document doc(DocumentOption::Default); - const int pages[] = { 932, 936, 949, 950, 1361 }; - for (const int page : pages) { - doc.SetDBCSCodePage(page); - for (int byteVal = 0; byteVal < 0x100; byteVal++) { - char ch = static_cast<char>(byteVal); - REQUIRE(doc.IsDBCSTrailByteNoExcept(ch) != doc.IsDBCSTrailByteInvalid(ch)); - } - } - } } |