aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/UniConversion.cxx
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
committerNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
commit72b5df15f33da27c59efd54eb0c84e173ca8c692 (patch)
treea65cbcf60c89542255a27672302e5de5e715624e /src/UniConversion.cxx
parent34540c84e31840787054652b72be7709d79eb1a2 (diff)
downloadscintilla-mirror-72b5df15f33da27c59efd54eb0c84e173ca8c692.tar.gz
Backport: Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added. Converted instances of C++17 std::string_view to C++11. Also used const_casts where appropriate to fix compile errors. Backport of changeset 7063:0d5edc93e280.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r--src/UniConversion.cxx16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 58e899faa..6cd6a8ba9 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -327,6 +327,22 @@ int UTF8DrawBytes(const unsigned char *us, int len) noexcept {
return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
}
+bool UTF8IsValid(const char *s, size_t len) noexcept {
+ const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
+ size_t remaining = len;
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(us, remaining);
+ if (utf8Status & UTF8MaskInvalid) {
+ return false;
+ } else {
+ const int lenChar = utf8Status & UTF8MaskWidth;
+ us += lenChar;
+ remaining -= lenChar;
+ }
+ }
+ return remaining == 0;
+}
+
// Replace invalid bytes in UTF-8 with the replacement character
std::string FixInvalidUTF8(const std::string &text) {
std::string result;