diff options
author | Zufu Liu <unknown> | 2019-06-28 08:51:55 +1000 |
---|---|---|
committer | Zufu Liu <unknown> | 2019-06-28 08:51:55 +1000 |
commit | 704c13fad8df333aef9f813d9dee9b41b43316d5 (patch) | |
tree | ba3e83ebf7fa501e3644b61bc17e4a8dc192ebdf /src | |
parent | 1573a0d60a3cf054979125c7986566d13a6686d3 (diff) | |
download | scintilla-mirror-704c13fad8df333aef9f813d9dee9b41b43316d5.tar.gz |
Bug [#2116]. Fix deletion of isolated invalid bytes.
Diffstat (limited to 'src')
-rw-r--r-- | src/Document.cxx | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index b825823bd..226e9d158 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -605,22 +605,37 @@ bool Document::IsCrLf(Sci::Position pos) const { } int Document::LenChar(Sci::Position pos) { - if (pos < 0) { + if (pos < 0 || pos >= Length()) { + // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. return 1; } else if (IsCrLf(pos)) { return 2; - } else if (SC_CP_UTF8 == dbcsCodePage) { - const unsigned char leadByte = cb.UCharAt(pos); + } + + const unsigned char leadByte = cb.UCharAt(pos); + if (!dbcsCodePage || UTF8IsAscii(leadByte)) { + // Common case: ASCII character + return 1; + } + if (SC_CP_UTF8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; - const Sci::Position lengthDoc = Length(); - if ((pos + widthCharBytes) > lengthDoc) - return static_cast<int>(lengthDoc - pos); - else - return widthCharBytes; - } else if (dbcsCodePage) { - return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1; + unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; + for (int b = 1; b < widthCharBytes; b++) { + charBytes[b] = cb.UCharAt(pos + b); + } + const int utf8status = UTF8Classify(charBytes, widthCharBytes); + if (utf8status & UTF8MaskInvalid) { + // Treat as invalid and use up just one byte + return 1; + } else { + return utf8status & UTF8MaskWidth; + } } else { - return 1; + if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < Length())) { + return 2; + } else { + return 1; + } } } |