diff options
| author | nyamatongwe <unknown> | 2007-04-19 04:38:53 +0000 | 
|---|---|---|
| committer | nyamatongwe <unknown> | 2007-04-19 04:38:53 +0000 | 
| commit | 476e533e7277cfd122f3ca3472783831c9e47ca5 (patch) | |
| tree | 1f7678e4a7fa68f9f761bd4650b9a84339841db8 /src/UniConversion.cxx | |
| parent | 101ccc292a2a2623d6680e8f488f762bd5c9a091 (diff) | |
| download | scintilla-mirror-476e533e7277cfd122f3ca3472783831c9e47ca5.tar.gz | |
All Unicode planes supported, not just the Basic Multilingual Plane.
Diffstat (limited to 'src/UniConversion.cxx')
| -rw-r--r-- | src/UniConversion.cxx | 67 | 
1 files changed, 55 insertions, 12 deletions
| diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 363db90f4..863eb82cd 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -9,49 +9,80 @@  #include "UniConversion.h" +enum { SURROGATE_LEAD_FIRST = 0xD800 }; +enum { SURROGATE_TRAIL_FIRST = 0xDC00 }; +enum { SURROGATE_TRAIL_LAST = 0xDFFF }; +  unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) {  	unsigned int len = 0; -	for (unsigned int i = 0; i < tlen && uptr[i]; i++) { +	for (unsigned int i = 0; i < tlen && uptr[i];) {  		unsigned int uch = uptr[i]; -		if (uch < 0x80) +		if (uch < 0x80) {  			len++; -		else if (uch < 0x800) +		} else if (uch < 0x800) {  			len += 2; -		else -			len +=3; +		} else if ((uch >= SURROGATE_LEAD_FIRST) && +			(uch <= SURROGATE_TRAIL_LAST)) { +			len += 4; +			i++; +		} else { +			len += 3; +		} +		i++;  	}  	return len;  } -void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { +void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) {  	int k = 0; -	for (unsigned int i = 0; i < tlen && uptr[i]; i++) { +	for (unsigned int i = 0; i < tlen && uptr[i];) {  		unsigned int uch = uptr[i];  		if (uch < 0x80) {  			putf[k++] = static_cast<char>(uch);  		} else if (uch < 0x800) {  			putf[k++] = static_cast<char>(0xC0 | (uch >> 6));  			putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); +		} else if ((uch >= SURROGATE_LEAD_FIRST) && +			(uch <= SURROGATE_TRAIL_LAST)) { +			// Half a surrogate pair +			i++; +			unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff); +			putf[k++] = static_cast<char>(0xF0 | (xch >> 18)); +			putf[k++] = static_cast<char>(0x80 | (xch >> 12) & 0x3f); +			putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f)); +			putf[k++] = static_cast<char>(0x80 | (xch & 0x3f));  		} else {  			putf[k++] = static_cast<char>(0xE0 | (uch >> 12));  			putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));  			putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));  		} +		i++;  	}  	putf[len] = '\0';  } -unsigned int UCS2Length(const char *s, unsigned int len) { +unsigned int UTF16Length(const char *s, unsigned int len) {  	unsigned int ulen = 0; -	for (unsigned int i=0;i<len;i++) { +	unsigned int charLen; +	for (unsigned int i=0;i<len;) {  		unsigned char ch = static_cast<unsigned char>(s[i]); -		if ((ch < 0x80) || (ch > (0x80 + 0x40))) +		if (ch < 0x80) { +			charLen = 1; +		} else if (ch < 0x80 + 0x40 + 0x20) { +			charLen = 2; +		} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { +			charLen = 3; +		} else { +			charLen = 4;  			ulen++; +		} +		i += charLen; +		ulen++;  	}  	return ulen;  } -unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { +unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) {  	unsigned int ui=0;  	const unsigned char *us = reinterpret_cast<const unsigned char *>(s);  	unsigned int i=0; @@ -63,12 +94,24 @@ unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsign  			tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6);  			ch = us[i++];  			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); -		} else { +		} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {  			tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12);  			ch = us[i++];  			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6));  			ch = us[i++];  			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); +		} else { +			// Outside the BMP so need two surrogates +			int val = (ch & 0x7) << 18; +			ch = us[i++]; +			val += (ch & 0x3F) << 12; +			ch = us[i++]; +			val += (ch & 0x3F) << 6; +			ch = us[i++]; +			val += (ch & 0x3F); +			tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST); +			ui++; +			tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST);  		}  		ui++;  	} | 
