aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--doc/ScintillaHistory.html4
-rw-r--r--src/Document.cxx87
-rw-r--r--src/Document.h3
-rw-r--r--src/Editor.cxx11
-rw-r--r--src/PositionCache.cxx3
5 files changed, 107 insertions, 1 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index 2d6fdd34a..621cb2c8a 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -555,6 +555,10 @@
Indicators are drawn for line end characters when displayed.
</li>
<li>
+ Most invalid bytes in DBCS encodings are displayed as blobs to make problems clear
+ and ensure something is shown.
+ </li>
+ <li>
EDIFACT lexer adds property lexer.edifact.highlight.un.all to highlight all UN* segments.
<a href="https://sourceforge.net/p/scintilla/feature-requests/1166/">Feature #1166.</a>
</li>
diff --git a/src/Document.cxx b/src/Document.cxx
index 312aa5c02..b7dc4c8d1 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -972,6 +972,93 @@ bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
return false;
}
+bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {
+ const unsigned char lead = ch;
+ switch (dbcsCodePage) {
+ case 932:
+ // Shift_jis
+ return
+ (lead == 0x85) ||
+ (lead == 0x86) ||
+ (lead == 0xEB) ||
+ (lead == 0xEC) ||
+ (lead == 0xEF) ||
+ (lead == 0xFA) ||
+ (lead == 0xFB) ||
+ (lead == 0xFC);
+ case 936:
+ // GBK
+ return (lead == 0x80) || (lead == 0xFF);
+ case 949:
+ // Korean Wansung KS C-5601-1987
+ return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE);
+ case 950:
+ // Big5
+ return
+ ((lead >= 0x80) && (lead <= 0xA0)) ||
+ (lead == 0xC8) ||
+ (lead >= 0xFA);
+ case 1361:
+ // Korean Johab KS C-5601-1992
+ return
+ ((lead >= 0x80) && (lead <= 0x83)) ||
+ ((lead >= 0xD4) && (lead <= 0xD8)) ||
+ (lead == 0xDF) ||
+ (lead >= 0xFA);
+ }
+ return false;
+}
+
+bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept {
+ const unsigned char trail = ch;
+ switch (dbcsCodePage) {
+ case 932:
+ // Shift_jis
+ return
+ (trail <= 0x3F) ||
+ (trail == 0x7F) ||
+ (trail >= 0xFD);
+ case 936:
+ // GBK
+ return
+ (trail <= 0x3F) ||
+ (trail == 0x7F) ||
+ (trail == 0xFF);
+ case 949:
+ // Korean Wansung KS C-5601-1987
+ return
+ (trail <= 0x40) ||
+ ((trail >= 0x5B) && (trail <= 0x60)) ||
+ ((trail >= 0x7B) && (trail <= 0x80)) ||
+ (trail == 0xFF);
+ case 950:
+ // Big5
+ return
+ (trail <= 0x3F) ||
+ ((trail >= 0x7F) && (trail <= 0xA0)) ||
+ (trail == 0xFF);
+ case 1361:
+ // Korean Johab KS C-5601-1992
+ return
+ (trail <= 0x30) ||
+ (trail == 0x7F) ||
+ (trail == 0x80) ||
+ (trail == 0xFF);
+ }
+ return false;
+}
+
+int Document::DBCSDrawBytes(const char *text, int len) const noexcept {
+ if (len <= 1) {
+ return len;
+ }
+ if (IsDBCSLeadByteNoExcept(text[0])) {
+ return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2;
+ } else {
+ return 1;
+ }
+}
+
static inline bool IsSpaceOrTab(int ch) noexcept {
return ch == ' ' || ch == '\t';
}
diff --git a/src/Document.h b/src/Document.h
index b4639a9c2..42f9d36ce 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -309,6 +309,9 @@ public:
int SCI_METHOD CodePage() const override;
bool SCI_METHOD IsDBCSLeadByte(char ch) const override;
bool IsDBCSLeadByteNoExcept(char ch) const noexcept;
+ bool IsDBCSLeadByteInvalid(char ch) const noexcept;
+ bool IsDBCSTrailByteInvalid(char ch) const noexcept;
+ int DBCSDrawBytes(const char *text, int len) const noexcept;
int SafeSegment(const char *text, int length, int lengthSegment) const;
EncodingFamily CodePageFamily() const;
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 092c6c28f..7ceef1c50 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -242,6 +242,17 @@ void Editor::SetRepresentations() {
sprintf(hexits, "x%2X", k);
reprs.SetRepresentation(hiByte, hexits);
}
+ } else if (pdoc->dbcsCodePage) {
+ // DBCS invalid single lead bytes
+ for (int k = 0x80; k < 0x100; k++) {
+ char ch = static_cast<char>(k);
+ if (pdoc->IsDBCSLeadByteNoExcept(ch) || pdoc->IsDBCSLeadByteInvalid(ch)) {
+ const char hiByte[2] = { ch, 0 };
+ char hexits[5]; // Really only needs 4 but that causes warning from gcc 7.1
+ sprintf(hexits, "x%2X", k);
+ reprs.SetRepresentation(hiByte, hexits);
+ }
+ }
}
}
diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx
index 543573b38..df58aa9a7 100644
--- a/src/PositionCache.cxx
+++ b/src/PositionCache.cxx
@@ -507,7 +507,8 @@ TextSegment BreakFinder::Next() {
charWidth = UTF8DrawBytes(reinterpret_cast<unsigned char *>(&ll->chars[nextBreak]),
static_cast<int>(lineRange.end - nextBreak));
else if (encodingFamily == efDBCS)
- charWidth = pdoc->IsDBCSLeadByteNoExcept(ll->chars[nextBreak]) ? 2 : 1;
+ charWidth = pdoc->DBCSDrawBytes(
+ &ll->chars[nextBreak], static_cast<int>(lineRange.end - nextBreak));
const Representation *repr = preprs->RepresentationFromCharacter(&ll->chars[nextBreak], charWidth);
if (((nextBreak > 0) && (ll->styles[nextBreak] != ll->styles[nextBreak - 1])) ||
repr ||