aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorZufu Liu <unknown>2019-06-28 08:51:55 +1000
committerZufu Liu <unknown>2019-06-28 08:51:55 +1000
commit0d4dd734b7b6e33ce87937447c20bdaf41c86e3e (patch)
tree1b0ba27e2f22973fe38e437bd3c11058346ae53e
parent4c8a1f741c73911eff813a78c08fa88427eec38b (diff)
downloadscintilla-mirror-0d4dd734b7b6e33ce87937447c20bdaf41c86e3e.tar.gz
Backport: Bug [#2116]. Fix deletion of isolated invalid bytes.
Backport of changeset 7610:1031c155fb62.
-rw-r--r--doc/ScintillaHistory.html4
-rw-r--r--src/Document.cxx37
2 files changed, 30 insertions, 11 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index 74ba9a851..957c4a69b 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -584,6 +584,10 @@
<a href="https://sourceforge.net/p/scintilla/bugs/2110/">Bug #2110</a>.
</li>
<li>
+ Fix deletion of isolated invalid bytes.
+ <a href="https://sourceforge.net/p/scintilla/bugs/2116/">Bug #2116</a>.
+ </li>
+ <li>
Fix position of line caret when overstrike caret set to block.
<a href="https://sourceforge.net/p/scintilla/bugs/2106/">Bug #2106</a>.
</li>
diff --git a/src/Document.cxx b/src/Document.cxx
index 9b30b44aa..3ba78c086 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -608,22 +608,37 @@ bool Document::IsCrLf(Sci::Position pos) const {
}
int Document::LenChar(Sci::Position pos) {
- if (pos < 0) {
+ if (pos < 0 || pos >= Length()) {
+ // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
return 1;
} else if (IsCrLf(pos)) {
return 2;
- } else if (SC_CP_UTF8 == dbcsCodePage) {
- const unsigned char leadByte = cb.UCharAt(pos);
+ }
+
+ const unsigned char leadByte = cb.UCharAt(pos);
+ if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
+ // Common case: ASCII character
+ return 1;
+ }
+ if (SC_CP_UTF8 == dbcsCodePage) {
const int widthCharBytes = UTF8BytesOfLead[leadByte];
- const Sci::Position lengthDoc = Length();
- if ((pos + widthCharBytes) > lengthDoc)
- return static_cast<int>(lengthDoc - pos);
- else
- return widthCharBytes;
- } else if (dbcsCodePage) {
- return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
+ unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
+ for (int b = 1; b < widthCharBytes; b++) {
+ charBytes[b] = cb.UCharAt(pos + b);
+ }
+ const int utf8status = UTF8Classify(charBytes, widthCharBytes);
+ if (utf8status & UTF8MaskInvalid) {
+ // Treat as invalid and use up just one byte
+ return 1;
+ } else {
+ return utf8status & UTF8MaskWidth;
+ }
} else {
- return 1;
+ if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < Length())) {
+ return 2;
+ } else {
+ return 1;
+ }
}
}