aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/UniConversion.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r--src/UniConversion.cxx16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 3b7472638..58475687b 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -340,6 +340,22 @@ int UTF8DrawBytes(const unsigned char *us, int len) noexcept {
return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
}
+bool UTF8IsValid(std::string_view sv) noexcept {
+ const unsigned char *us = reinterpret_cast<const unsigned char *>(sv.data());
+ size_t remaining = sv.length();
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(us, remaining);
+ if (utf8Status & UTF8MaskInvalid) {
+ return false;
+ } else {
+ const int lenChar = utf8Status & UTF8MaskWidth;
+ us += lenChar;
+ remaining -= lenChar;
+ }
+ }
+ return remaining == 0;
+}
+
// Replace invalid bytes in UTF-8 with the replacement character
std::string FixInvalidUTF8(const std::string &text) {
std::string result;