diff options
author | Neil <nyamatongwe@gmail.com> | 2018-07-10 15:06:50 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2018-07-10 15:06:50 +1000 |
commit | 56e20ea0283d8018dee48d736ba9dfef3c84dc3f (patch) | |
tree | 21bdb500dfc092fadecb123b87e9799a2c46f6a9 /src/UniConversion.cxx | |
parent | d27cbe587930d13d3f1802b271d0d13e7e3c6e38 (diff) | |
download | scintilla-mirror-56e20ea0283d8018dee48d736ba9dfef3c84dc3f.tar.gz |
Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r-- | src/UniConversion.cxx | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 3b7472638..58475687b 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -340,6 +340,22 @@ int UTF8DrawBytes(const unsigned char *us, int len) noexcept { return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth); } +bool UTF8IsValid(std::string_view sv) noexcept { + const unsigned char *us = reinterpret_cast<const unsigned char *>(sv.data()); + size_t remaining = sv.length(); + while (remaining > 0) { + const int utf8Status = UTF8Classify(us, remaining); + if (utf8Status & UTF8MaskInvalid) { + return false; + } else { + const int lenChar = utf8Status & UTF8MaskWidth; + us += lenChar; + remaining -= lenChar; + } + } + return remaining == 0; +} + // Replace invalid bytes in UTF-8 with the replacement character std::string FixInvalidUTF8(const std::string &text) { std::string result; |