From c725c015867e59efd1ebe66e0247b62e38e04ac9 Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Sat, 26 May 2012 12:08:06 +1000 Subject: Move classification of UTF-8 byte sequences into UniConversion module. --- src/UniConversion.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/UniConversion.h') diff --git a/src/UniConversion.h b/src/UniConversion.h index 2de2ef3fe..6793221cf 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -11,3 +11,9 @@ unsigned int UTF8CharLength(unsigned char ch); unsigned int UTF16Length(const char *s, unsigned int len); unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen); +inline bool UTF8IsTrailByte(int ch) { + return (ch >= 0x80) && (ch < 0xc0); +} + +enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 }; +int UTF8Classify(const unsigned char *us, int len); -- cgit v1.2.3