diff options
author | Zufu Liu <unknown> | 2018-03-22 15:02:38 +1100 |
---|---|---|
committer | Zufu Liu <unknown> | 2018-03-22 15:02:38 +1100 |
commit | ff707f0fe276677a4d89633ae4964e8b94712ca3 (patch) | |
tree | 103d8741341108a8dc04ef59923e19da6f4a64e4 /src/UniConversion.h | |
parent | 9e4cdff7752304fff978ab7f606b64ea85310baf (diff) | |
download | scintilla-mirror-ff707f0fe276677a4d89633ae4964e8b94712ca3.tar.gz |
Feature [feature-requests:#1211]. Use pre-computed table for UTF8BytesOfLead.
Friendlier treatment of invalid UTF-8.
Add tests for UniConversion handling invalid UTF-8. Simplify UTF8Classify tests.
Diffstat (limited to 'src/UniConversion.h')
-rw-r--r-- | src/UniConversion.h | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/src/UniConversion.h b/src/UniConversion.h index 2f358c9c5..0f22c06e6 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -16,17 +16,15 @@ const int unicodeReplacementChar = 0xFFFD; size_t UTF8Length(const wchar_t *uptr, size_t tlen); void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len); -unsigned int UTF8CharLength(unsigned char ch); size_t UTF16Length(const char *s, size_t len); size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen); size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen); unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf); std::string FixInvalidUTF8(const std::string &text); -extern int UTF8BytesOfLead[256]; -void UTF8BytesOfLeadInitialise(); +extern const unsigned char UTF8BytesOfLead[256]; -inline bool UTF8IsTrailByte(int ch) { +inline bool UTF8IsTrailByte(unsigned char ch) { return (ch >= 0x80) && (ch < 0xc0); } @@ -64,6 +62,10 @@ inline unsigned int UTF16CharLength(wchar_t uch) { return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1; } +inline unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) { + return (byteCount < 4) ? 1 : 2; +} + } #endif |