aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNeil Hodgson <nyamatongwe@gmail.com>2018-05-21 15:15:34 +1000
committerNeil Hodgson <nyamatongwe@gmail.com>2018-05-21 15:15:34 +1000
commit521ef7054806424c97dac5ee71b3a05ed5b9d7f4 (patch)
treed46c95588195f593baa1c46b0882e1c169b6b590
parent150417216d362bae78621939102f8183af951f78 (diff)
downloadscintilla-mirror-521ef7054806424c97dac5ee71b3a05ed5b9d7f4.tar.gz
Draw invalid bytes in DBCS when detected as blobs in a similar way to UTF-8.
-rw-r--r--doc/ScintillaHistory.html4
-rw-r--r--src/Document.cxx87
-rw-r--r--src/Document.h3
-rw-r--r--src/Editor.cxx11
-rw-r--r--src/PositionCache.cxx3
5 files changed, 107 insertions, 1 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index bb28a95fb..bba75339b 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -548,6 +548,10 @@
Indicators are drawn for line end characters when displayed.
</li>
<li>
+ Most invalid bytes in DBCS encodings are displayed as blobs to make problems clear
+ and ensure something is shown.
+ </li>
+ <li>
Crashes fixed on macOS for invalid DBCS characters when dragging text,
changing case of text, case-insensitive searching, and retrieving text as UTF-8.
</li>
diff --git a/src/Document.cxx b/src/Document.cxx
index 2852e1097..942903b78 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -969,6 +969,93 @@ bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
return false;
}
+bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {
+ const unsigned char lead = ch;
+ switch (dbcsCodePage) {
+ case 932:
+ // Shift_jis
+ return
+ (lead == 0x85) ||
+ (lead == 0x86) ||
+ (lead == 0xEB) ||
+ (lead == 0xEC) ||
+ (lead == 0xEF) ||
+ (lead == 0xFA) ||
+ (lead == 0xFB) ||
+ (lead == 0xFC);
+ case 936:
+ // GBK
+ return (lead == 0x80) || (lead == 0xFF);
+ case 949:
+ // Korean Wansung KS C-5601-1987
+ return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE);
+ case 950:
+ // Big5
+ return
+ ((lead >= 0x80) && (lead <= 0xA0)) ||
+ (lead == 0xC8) ||
+ (lead >= 0xFA);
+ case 1361:
+ // Korean Johab KS C-5601-1992
+ return
+ ((lead >= 0x80) && (lead <= 0x83)) ||
+ ((lead >= 0xD4) && (lead <= 0xD8)) ||
+ (lead == 0xDF) ||
+ (lead >= 0xFA);
+ }
+ return false;
+}
+
+bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept {
+ const unsigned char trail = ch;
+ switch (dbcsCodePage) {
+ case 932:
+ // Shift_jis
+ return
+ (trail <= 0x3F) ||
+ (trail == 0x7F) ||
+ (trail >= 0xFD);
+ case 936:
+ // GBK
+ return
+ (trail <= 0x3F) ||
+ (trail == 0x7F) ||
+ (trail == 0xFF);
+ case 949:
+ // Korean Wansung KS C-5601-1987
+ return
+ (trail <= 0x40) ||
+ ((trail >= 0x5B) && (trail <= 0x60)) ||
+ ((trail >= 0x7B) && (trail <= 0x80)) ||
+ (trail == 0xFF);
+ case 950:
+ // Big5
+ return
+ (trail <= 0x3F) ||
+ ((trail >= 0x7F) && (trail <= 0xA0)) ||
+ (trail == 0xFF);
+ case 1361:
+ // Korean Johab KS C-5601-1992
+ return
+ (trail <= 0x30) ||
+ (trail == 0x7F) ||
+ (trail == 0x80) ||
+ (trail == 0xFF);
+ }
+ return false;
+}
+
+int Document::DBCSDrawBytes(std::string_view text) const noexcept {
+ if (text.length() <= 1) {
+ return static_cast<int>(text.length());
+ }
+ if (IsDBCSLeadByteNoExcept(text[0])) {
+ return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2;
+ } else {
+ return 1;
+ }
+}
+
static inline bool IsSpaceOrTab(int ch) noexcept {
return ch == ' ' || ch == '\t';
}
diff --git a/src/Document.h b/src/Document.h
index 0edf0b76e..db6d79066 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -309,6 +309,9 @@ public:
int SCI_METHOD CodePage() const override;
bool SCI_METHOD IsDBCSLeadByte(char ch) const override;
bool IsDBCSLeadByteNoExcept(char ch) const noexcept;
+ bool IsDBCSLeadByteInvalid(char ch) const noexcept;
+ bool IsDBCSTrailByteInvalid(char ch) const noexcept;
+ int DBCSDrawBytes(std::string_view text) const noexcept;
int SafeSegment(const char *text, int length, int lengthSegment) const;
EncodingFamily CodePageFamily() const;
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 9aba5ccd2..04d61bea6 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -242,6 +242,17 @@ void Editor::SetRepresentations() {
sprintf(hexits, "x%2X", k);
reprs.SetRepresentation(hiByte, hexits);
}
+ } else if (pdoc->dbcsCodePage) {
+ // DBCS invalid single lead bytes
+ for (int k = 0x80; k < 0x100; k++) {
+ char ch = static_cast<char>(k);
+ if (pdoc->IsDBCSLeadByteNoExcept(ch) || pdoc->IsDBCSLeadByteInvalid(ch)) {
+ const char hiByte[2] = { ch, 0 };
+ char hexits[5]; // Really only needs 4 but that causes warning from gcc 7.1
+ sprintf(hexits, "x%2X", k);
+ reprs.SetRepresentation(hiByte, hexits);
+ }
+ }
}
}
diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx
index bf5560678..31a8601f5 100644
--- a/src/PositionCache.cxx
+++ b/src/PositionCache.cxx
@@ -508,7 +508,8 @@ TextSegment BreakFinder::Next() {
charWidth = UTF8DrawBytes(reinterpret_cast<unsigned char *>(&ll->chars[nextBreak]),
static_cast<int>(lineRange.end - nextBreak));
else if (encodingFamily == efDBCS)
- charWidth = pdoc->IsDBCSLeadByteNoExcept(ll->chars[nextBreak]) ? 2 : 1;
+ charWidth = pdoc->DBCSDrawBytes(
+ std::string_view(&ll->chars[nextBreak], lineRange.end - nextBreak));
const Representation *repr = preprs->RepresentationFromCharacter(&ll->chars[nextBreak], charWidth);
if (((nextBreak > 0) && (ll->styles[nextBreak] != ll->styles[nextBreak - 1])) ||
repr ||