diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/CaseConvert.cxx | 2 | ||||
-rw-r--r-- | src/UniConversion.cxx | 12 | ||||
-rw-r--r-- | src/UniConversion.h | 1 |
3 files changed, 14 insertions, 1 deletions
diff --git a/src/CaseConvert.cxx b/src/CaseConvert.cxx index 53824a987..752fd54e0 100644 --- a/src/CaseConvert.cxx +++ b/src/CaseConvert.cxx @@ -749,7 +749,7 @@ void CaseConverter::SetupConversions(CaseConversion conversion) { break; } if (!converted.empty()) { - const int character = UnicodeFromUTF8(reinterpret_cast<const unsigned char *>(originUTF8.data())); + const int character = UnicodeFromUTF8(originUTF8); Add(character, converted); } } diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index eadac8915..868fbacf5 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -261,6 +261,18 @@ unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept { return 2; } +int UnicodeFromUTF8(std::string_view sv) noexcept { + if (!sv.empty()) { + const unsigned char uch = sv.front(); + const unsigned int byteCount = UTF8BytesOfLead[uch]; + if (sv.length() >= byteCount) { + return UnicodeFromUTF8(reinterpret_cast<const unsigned char *>(sv.data())); + } + } + // Failure so let the caller know + return unicodeReplacementChar; +} + const unsigned char UTF8BytesOfLead[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00 - 0F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10 - 1F diff --git a/src/UniConversion.h b/src/UniConversion.h index 657e3eca7..7a51b2d08 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -43,6 +43,7 @@ inline int UnicodeFromUTF8(const unsigned char *us) noexcept { return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); } } +int UnicodeFromUTF8(std::string_view sv) noexcept; constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept { return (ch >= 0x80) && (ch < 0xc0); |