aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/UniConversion.cxx
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
committerNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
commit56e20ea0283d8018dee48d736ba9dfef3c84dc3f (patch)
tree21bdb500dfc092fadecb123b87e9799a2c46f6a9 /src/UniConversion.cxx
parentd27cbe587930d13d3f1802b271d0d13e7e3c6e38 (diff)
downloadscintilla-mirror-56e20ea0283d8018dee48d736ba9dfef3c84dc3f.tar.gz
Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r--src/UniConversion.cxx16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 3b7472638..58475687b 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -340,6 +340,22 @@ int UTF8DrawBytes(const unsigned char *us, int len) noexcept {
return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
}
+bool UTF8IsValid(std::string_view sv) noexcept {
+ const unsigned char *us = reinterpret_cast<const unsigned char *>(sv.data());
+ size_t remaining = sv.length();
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(us, remaining);
+ if (utf8Status & UTF8MaskInvalid) {
+ return false;
+ } else {
+ const int lenChar = utf8Status & UTF8MaskWidth;
+ us += lenChar;
+ remaining -= lenChar;
+ }
+ }
+ return remaining == 0;
+}
+
// Replace invalid bytes in UTF-8 with the replacement character
std::string FixInvalidUTF8(const std::string &text) {
std::string result;