From ff707f0fe276677a4d89633ae4964e8b94712ca3 Mon Sep 17 00:00:00 2001 From: Zufu Liu Date: Thu, 22 Mar 2018 15:02:38 +1100 Subject: Feature [feature-requests:#1211]. Use pre-computed table for UTF8BytesOfLead. Friendlier treatment of invalid UTF-8. Add tests for UniConversion handling invalid UTF-8. Simplify UTF8Classify tests. --- src/UniConversion.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/UniConversion.h') diff --git a/src/UniConversion.h b/src/UniConversion.h index 2f358c9c5..0f22c06e6 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -16,17 +16,15 @@ const int unicodeReplacementChar = 0xFFFD; size_t UTF8Length(const wchar_t *uptr, size_t tlen); void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len); -unsigned int UTF8CharLength(unsigned char ch); size_t UTF16Length(const char *s, size_t len); size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen); size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen); unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf); std::string FixInvalidUTF8(const std::string &text); -extern int UTF8BytesOfLead[256]; -void UTF8BytesOfLeadInitialise(); +extern const unsigned char UTF8BytesOfLead[256]; -inline bool UTF8IsTrailByte(int ch) { +inline bool UTF8IsTrailByte(unsigned char ch) { return (ch >= 0x80) && (ch < 0xc0); } @@ -64,6 +62,10 @@ inline unsigned int UTF16CharLength(wchar_t uch) { return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1; } +inline unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) { + return (byteCount < 4) ? 1 : 2; +} + } #endif -- cgit v1.2.3