diff options
-rw-r--r-- | doc/ScintillaHistory.html | 4 | ||||
-rw-r--r-- | src/Document.cxx | 87 | ||||
-rw-r--r-- | src/Document.h | 3 | ||||
-rw-r--r-- | src/Editor.cxx | 11 | ||||
-rw-r--r-- | src/PositionCache.cxx | 3 |
5 files changed, 107 insertions, 1 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index bb28a95fb..bba75339b 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -548,6 +548,10 @@ Indicators are drawn for line end characters when displayed. </li> <li> + Most invalid bytes in DBCS encodings are displayed as blobs to make problems clear + and ensure something is shown. + </li> + <li> Crashes fixed on macOS for invalid DBCS characters when dragging text, changing case of text, case-insensitive searching, and retrieving text as UTF-8. </li> diff --git a/src/Document.cxx b/src/Document.cxx index 2852e1097..942903b78 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -969,6 +969,93 @@ bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept { return false; } +bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept { + const unsigned char lead = ch; + switch (dbcsCodePage) { + case 932: + // Shift_jis + return + (lead == 0x85) || + (lead == 0x86) || + (lead == 0xEB) || + (lead == 0xEC) || + (lead == 0xEF) || + (lead == 0xFA) || + (lead == 0xFB) || + (lead == 0xFC); + case 936: + // GBK + return (lead == 0x80) || (lead == 0xFF); + case 949: + // Korean Wansung KS C-5601-1987 + return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE); + case 950: + // Big5 + return + ((lead >= 0x80) && (lead <= 0xA0)) || + (lead == 0xC8) || + (lead >= 0xFA); + case 1361: + // Korean Johab KS C-5601-1992 + return + ((lead >= 0x80) && (lead <= 0x83)) || + ((lead >= 0xD4) && (lead <= 0xD8)) || + (lead == 0xDF) || + (lead >= 0xFA); + } + return false; +} + +bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept { + const unsigned char trail = ch; + switch (dbcsCodePage) { + case 932: + // Shift_jis + return + (trail <= 0x3F) || + (trail == 0x7F) || + (trail >= 0xFD); + case 936: + // GBK + return + (trail <= 0x3F) || + (trail == 0x7F) || + (trail == 0xFF); + case 949: + // Korean Wansung KS C-5601-1987 + return + (trail <= 0x40) || + ((trail >= 0x5B) && (trail <= 0x60)) || + ((trail >= 0x7B) && (trail <= 0x80)) || + (trail == 0xFF); + case 950: + // Big5 + return + (trail <= 0x3F) || + ((trail >= 0x7F) && (trail <= 0xA0)) || + (trail == 0xFF); + case 1361: + // Korean Johab KS C-5601-1992 + return + (trail <= 0x30) || + (trail == 0x7F) || + (trail == 0x80) || + (trail == 0xFF); + } + return false; +} + +int Document::DBCSDrawBytes(std::string_view text) const noexcept { + if (text.length() <= 1) { + return static_cast<int>(text.length()); + } + if (IsDBCSLeadByteNoExcept(text[0])) { + return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2; + } else { + return 1; + } +} + static inline bool IsSpaceOrTab(int ch) noexcept { return ch == ' ' || ch == '\t'; } diff --git a/src/Document.h b/src/Document.h index 0edf0b76e..db6d79066 100644 --- a/src/Document.h +++ b/src/Document.h @@ -309,6 +309,9 @@ public: int SCI_METHOD CodePage() const override; bool SCI_METHOD IsDBCSLeadByte(char ch) const override; bool IsDBCSLeadByteNoExcept(char ch) const noexcept; + bool IsDBCSLeadByteInvalid(char ch) const noexcept; + bool IsDBCSTrailByteInvalid(char ch) const noexcept; + int DBCSDrawBytes(std::string_view text) const noexcept; int SafeSegment(const char *text, int length, int lengthSegment) const; EncodingFamily CodePageFamily() const; diff --git a/src/Editor.cxx b/src/Editor.cxx index 9aba5ccd2..04d61bea6 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -242,6 +242,17 @@ void Editor::SetRepresentations() { sprintf(hexits, "x%2X", k); reprs.SetRepresentation(hiByte, hexits); } + } else if (pdoc->dbcsCodePage) { + // DBCS invalid single lead bytes + for (int k = 0x80; k < 0x100; k++) { + char ch = static_cast<char>(k); + if (pdoc->IsDBCSLeadByteNoExcept(ch) || pdoc->IsDBCSLeadByteInvalid(ch)) { + const char hiByte[2] = { ch, 0 }; + char hexits[5]; // Really only needs 4 but that causes warning from gcc 7.1 + sprintf(hexits, "x%2X", k); + reprs.SetRepresentation(hiByte, hexits); + } + } } } diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index bf5560678..31a8601f5 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -508,7 +508,8 @@ TextSegment BreakFinder::Next() { charWidth = UTF8DrawBytes(reinterpret_cast<unsigned char *>(&ll->chars[nextBreak]), static_cast<int>(lineRange.end - nextBreak)); else if (encodingFamily == efDBCS) - charWidth = pdoc->IsDBCSLeadByteNoExcept(ll->chars[nextBreak]) ? 2 : 1; + charWidth = pdoc->DBCSDrawBytes( + std::string_view(&ll->chars[nextBreak], lineRange.end - nextBreak)); const Representation *repr = preprs->RepresentationFromCharacter(&ll->chars[nextBreak], charWidth); if (((nextBreak > 0) && (ll->styles[nextBreak] != ll->styles[nextBreak - 1])) || repr || |