aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorZufu Liu <unknown>2019-06-28 08:51:55 +1000
committerZufu Liu <unknown>2019-06-28 08:51:55 +1000
commit704c13fad8df333aef9f813d9dee9b41b43316d5 (patch)
treeba3e83ebf7fa501e3644b61bc17e4a8dc192ebdf /src
parent1573a0d60a3cf054979125c7986566d13a6686d3 (diff)
downloadscintilla-mirror-704c13fad8df333aef9f813d9dee9b41b43316d5.tar.gz
Bug [#2116]. Fix deletion of isolated invalid bytes.
Diffstat (limited to 'src')
-rw-r--r--src/Document.cxx37
1 files changed, 26 insertions, 11 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index b825823bd..226e9d158 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -605,22 +605,37 @@ bool Document::IsCrLf(Sci::Position pos) const {
}
int Document::LenChar(Sci::Position pos) {
- if (pos < 0) {
+ if (pos < 0 || pos >= Length()) {
+ // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
return 1;
} else if (IsCrLf(pos)) {
return 2;
- } else if (SC_CP_UTF8 == dbcsCodePage) {
- const unsigned char leadByte = cb.UCharAt(pos);
+ }
+
+ const unsigned char leadByte = cb.UCharAt(pos);
+ if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
+ // Common case: ASCII character
+ return 1;
+ }
+ if (SC_CP_UTF8 == dbcsCodePage) {
const int widthCharBytes = UTF8BytesOfLead[leadByte];
- const Sci::Position lengthDoc = Length();
- if ((pos + widthCharBytes) > lengthDoc)
- return static_cast<int>(lengthDoc - pos);
- else
- return widthCharBytes;
- } else if (dbcsCodePage) {
- return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
+ unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
+ for (int b = 1; b < widthCharBytes; b++) {
+ charBytes[b] = cb.UCharAt(pos + b);
+ }
+ const int utf8status = UTF8Classify(charBytes, widthCharBytes);
+ if (utf8status & UTF8MaskInvalid) {
+ // Treat as invalid and use up just one byte
+ return 1;
+ } else {
+ return utf8status & UTF8MaskWidth;
+ }
} else {
- return 1;
+ if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < Length())) {
+ return 2;
+ } else {
+ return 1;
+ }
}
}