diff options
author | nyamatongwe <unknown> | 2007-04-19 04:38:53 +0000 |
---|---|---|
committer | nyamatongwe <unknown> | 2007-04-19 04:38:53 +0000 |
commit | 476e533e7277cfd122f3ca3472783831c9e47ca5 (patch) | |
tree | 1f7678e4a7fa68f9f761bd4650b9a84339841db8 /src/UniConversion.cxx | |
parent | 101ccc292a2a2623d6680e8f488f762bd5c9a091 (diff) | |
download | scintilla-mirror-476e533e7277cfd122f3ca3472783831c9e47ca5.tar.gz |
All Unicode planes supported, not just the Basic Multilingual Plane.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r-- | src/UniConversion.cxx | 67 |
1 files changed, 55 insertions, 12 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 363db90f4..863eb82cd 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -9,49 +9,80 @@ #include "UniConversion.h" +enum { SURROGATE_LEAD_FIRST = 0xD800 }; +enum { SURROGATE_TRAIL_FIRST = 0xDC00 }; +enum { SURROGATE_TRAIL_LAST = 0xDFFF }; + unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) { unsigned int len = 0; - for (unsigned int i = 0; i < tlen && uptr[i]; i++) { + for (unsigned int i = 0; i < tlen && uptr[i];) { unsigned int uch = uptr[i]; - if (uch < 0x80) + if (uch < 0x80) { len++; - else if (uch < 0x800) + } else if (uch < 0x800) { len += 2; - else - len +=3; + } else if ((uch >= SURROGATE_LEAD_FIRST) && + (uch <= SURROGATE_TRAIL_LAST)) { + len += 4; + i++; + } else { + len += 3; + } + i++; } return len; } -void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { +void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { int k = 0; - for (unsigned int i = 0; i < tlen && uptr[i]; i++) { + for (unsigned int i = 0; i < tlen && uptr[i];) { unsigned int uch = uptr[i]; if (uch < 0x80) { putf[k++] = static_cast<char>(uch); } else if (uch < 0x800) { putf[k++] = static_cast<char>(0xC0 | (uch >> 6)); putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); + } else if ((uch >= SURROGATE_LEAD_FIRST) && + (uch <= SURROGATE_TRAIL_LAST)) { + // Half a surrogate pair + i++; + unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff); + putf[k++] = static_cast<char>(0xF0 | (xch >> 18)); + putf[k++] = static_cast<char>(0x80 | (xch >> 12) & 0x3f); + putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f)); + putf[k++] = static_cast<char>(0x80 | (xch & 0x3f)); } else { putf[k++] = static_cast<char>(0xE0 | (uch >> 12)); putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f)); putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); } + i++; } putf[len] = '\0'; } -unsigned int UCS2Length(const char *s, unsigned int len) { +unsigned int UTF16Length(const char *s, unsigned int len) { unsigned int ulen = 0; - for (unsigned int i=0;i<len;i++) { + unsigned int charLen; + for (unsigned int i=0;i<len;) { unsigned char ch = static_cast<unsigned char>(s[i]); - if ((ch < 0x80) || (ch > (0x80 + 0x40))) + if (ch < 0x80) { + charLen = 1; + } else if (ch < 0x80 + 0x40 + 0x20) { + charLen = 2; + } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { + charLen = 3; + } else { + charLen = 4; ulen++; + } + i += charLen; + ulen++; } return ulen; } -unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { +unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { unsigned int ui=0; const unsigned char *us = reinterpret_cast<const unsigned char *>(s); unsigned int i=0; @@ -63,12 +94,24 @@ unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsign tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6); ch = us[i++]; tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); - } else { + } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12); ch = us[i++]; tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6)); ch = us[i++]; tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); + } else { + // Outside the BMP so need two surrogates + int val = (ch & 0x7) << 18; + ch = us[i++]; + val += (ch & 0x3F) << 12; + ch = us[i++]; + val += (ch & 0x3F) << 6; + ch = us[i++]; + val += (ch & 0x3F); + tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST); + ui++; + tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST); } ui++; } |