aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorZufu Liu <unknown>2019-06-28 08:51:55 +1000
committerZufu Liu <unknown>2019-06-28 08:51:55 +1000
commit0d4dd734b7b6e33ce87937447c20bdaf41c86e3e (patch)
tree1b0ba27e2f22973fe38e437bd3c11058346ae53e /src
parent4c8a1f741c73911eff813a78c08fa88427eec38b (diff)
downloadscintilla-mirror-0d4dd734b7b6e33ce87937447c20bdaf41c86e3e.tar.gz
Backport: Bug [#2116]. Fix deletion of isolated invalid bytes.
Backport of changeset 7610:1031c155fb62.
Diffstat (limited to 'src')
-rw-r--r--src/Document.cxx37
1 files changed, 26 insertions, 11 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index 9b30b44aa..3ba78c086 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -608,22 +608,37 @@ bool Document::IsCrLf(Sci::Position pos) const {
}
int Document::LenChar(Sci::Position pos) {
- if (pos < 0) {
+ if (pos < 0 || pos >= Length()) {
+ // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
return 1;
} else if (IsCrLf(pos)) {
return 2;
- } else if (SC_CP_UTF8 == dbcsCodePage) {
- const unsigned char leadByte = cb.UCharAt(pos);
+ }
+
+ const unsigned char leadByte = cb.UCharAt(pos);
+ if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
+ // Common case: ASCII character
+ return 1;
+ }
+ if (SC_CP_UTF8 == dbcsCodePage) {
const int widthCharBytes = UTF8BytesOfLead[leadByte];
- const Sci::Position lengthDoc = Length();
- if ((pos + widthCharBytes) > lengthDoc)
- return static_cast<int>(lengthDoc - pos);
- else
- return widthCharBytes;
- } else if (dbcsCodePage) {
- return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
+ unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
+ for (int b = 1; b < widthCharBytes; b++) {
+ charBytes[b] = cb.UCharAt(pos + b);
+ }
+ const int utf8status = UTF8Classify(charBytes, widthCharBytes);
+ if (utf8status & UTF8MaskInvalid) {
+ // Treat as invalid and use up just one byte
+ return 1;
+ } else {
+ return utf8status & UTF8MaskWidth;
+ }
} else {
- return 1;
+ if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < Length())) {
+ return 2;
+ } else {
+ return 1;
+ }
}
}