aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authornyamatongwe <devnull@localhost>2010-03-23 05:22:21 +0000
committernyamatongwe <devnull@localhost>2010-03-23 05:22:21 +0000
commit99335bdcf3814c7268bfb3f1a9c0906e59ae11c3 (patch)
tree31b1e6454ad4f4e191bcffa08aef846825b4b2c2 /src
parenta0df10a7cf8742e9eead68023c26dbb46318e804 (diff)
downloadscintilla-mirror-99335bdcf3814c7268bfb3f1a9c0906e59ae11c3.tar.gz
Added function for finding how many bytes are in a UTF-8 character.
Diffstat (limited to 'src')
-rw-r--r--src/UniConversion.cxx12
-rw-r--r--src/UniConversion.h1
2 files changed, 13 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 482a0a581..0064e3cba 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -61,6 +61,18 @@ void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned
putf[len] = '\0';
}
+unsigned int UTF8CharLength(unsigned char ch) {
+ if (ch < 0x80) {
+ return 1;
+ } else if (ch < 0x80 + 0x40 + 0x20) {
+ return 2;
+ } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
+ return 3;
+ } else {
+ return 4;
+ }
+}
+
unsigned int UTF16Length(const char *s, unsigned int len) {
unsigned int ulen = 0;
unsigned int charLen;
diff --git a/src/UniConversion.h b/src/UniConversion.h
index fd420a688..8cc3d0a18 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -7,6 +7,7 @@
unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen);
void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len);
+unsigned int UTF8CharLength(unsigned char ch);
unsigned int UTF16Length(const char *s, unsigned int len);
unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen);