aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/UniConversion.h
diff options
context:
space:
mode:
authorZufu Liu <unknown>2018-03-22 15:02:38 +1100
committerZufu Liu <unknown>2018-03-22 15:02:38 +1100
commitff707f0fe276677a4d89633ae4964e8b94712ca3 (patch)
tree103d8741341108a8dc04ef59923e19da6f4a64e4 /src/UniConversion.h
parent9e4cdff7752304fff978ab7f606b64ea85310baf (diff)
downloadscintilla-mirror-ff707f0fe276677a4d89633ae4964e8b94712ca3.tar.gz
Feature [feature-requests:#1211]. Use pre-computed table for UTF8BytesOfLead.
Friendlier treatment of invalid UTF-8. Add tests for UniConversion handling invalid UTF-8. Simplify UTF8Classify tests.
Diffstat (limited to 'src/UniConversion.h')
-rw-r--r--src/UniConversion.h10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 2f358c9c5..0f22c06e6 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -16,17 +16,15 @@ const int unicodeReplacementChar = 0xFFFD;
size_t UTF8Length(const wchar_t *uptr, size_t tlen);
void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len);
-unsigned int UTF8CharLength(unsigned char ch);
size_t UTF16Length(const char *s, size_t len);
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf);
std::string FixInvalidUTF8(const std::string &text);
-extern int UTF8BytesOfLead[256];
-void UTF8BytesOfLeadInitialise();
+extern const unsigned char UTF8BytesOfLead[256];
-inline bool UTF8IsTrailByte(int ch) {
+inline bool UTF8IsTrailByte(unsigned char ch) {
return (ch >= 0x80) && (ch < 0xc0);
}
@@ -64,6 +62,10 @@ inline unsigned int UTF16CharLength(wchar_t uch) {
return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1;
}
+inline unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) {
+ return (byteCount < 4) ? 1 : 2;
+}
+
}
#endif