From a9235ffc9976527a6147ca87ca371ea70582404f Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Tue, 23 Mar 2010 05:22:21 +0000 Subject: Added function for finding how many bytes are in a UTF-8 character. --- src/UniConversion.cxx | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/UniConversion.cxx') diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 482a0a581..0064e3cba 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -61,6 +61,18 @@ void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned putf[len] = '\0'; } +unsigned int UTF8CharLength(unsigned char ch) { + if (ch < 0x80) { + return 1; + } else if (ch < 0x80 + 0x40 + 0x20) { + return 2; + } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { + return 3; + } else { + return 4; + } +} + unsigned int UTF16Length(const char *s, unsigned int len) { unsigned int ulen = 0; unsigned int charLen; -- cgit v1.2.3