diff options
| author | nyamatongwe <devnull@localhost> | 2007-04-19 04:38:53 +0000 | 
|---|---|---|
| committer | nyamatongwe <devnull@localhost> | 2007-04-19 04:38:53 +0000 | 
| commit | 6f02bfd7333bd67d7e89531c9e80ee3b6d0915c7 (patch) | |
| tree | 1f7678e4a7fa68f9f761bd4650b9a84339841db8 /src | |
| parent | 1237ee3c6a123b5b6bd6270bbadc3ba569a10854 (diff) | |
| download | scintilla-mirror-6f02bfd7333bd67d7e89531c9e80ee3b6d0915c7.tar.gz | |
All Unicode planes supported, not just the Basic Multilingual Plane.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Document.cxx | 4 | ||||
| -rw-r--r-- | src/UniConversion.cxx | 67 | ||||
| -rw-r--r-- | src/UniConversion.h | 6 | 
3 files changed, 61 insertions, 16 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index a25e3070d..3061bbc37 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -266,7 +266,9 @@ int Document::LenChar(int pos) {  		if (ch < 0x80)  			return 1;  		int len = 2; -		if (ch >= (0x80 + 0x40 + 0x20)) +		if (ch >= (0x80 + 0x40 + 0x20 + 0x10)) +			len = 4; +		else if (ch >= (0x80 + 0x40 + 0x20))  			len = 3;  		int lengthDoc = Length();  		if ((pos + len) > lengthDoc) diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 363db90f4..863eb82cd 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -9,49 +9,80 @@  #include "UniConversion.h" +enum { SURROGATE_LEAD_FIRST = 0xD800 }; +enum { SURROGATE_TRAIL_FIRST = 0xDC00 }; +enum { SURROGATE_TRAIL_LAST = 0xDFFF }; +  unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) {  	unsigned int len = 0; -	for (unsigned int i = 0; i < tlen && uptr[i]; i++) { +	for (unsigned int i = 0; i < tlen && uptr[i];) {  		unsigned int uch = uptr[i]; -		if (uch < 0x80) +		if (uch < 0x80) {  			len++; -		else if (uch < 0x800) +		} else if (uch < 0x800) {  			len += 2; -		else -			len +=3; +		} else if ((uch >= SURROGATE_LEAD_FIRST) && +			(uch <= SURROGATE_TRAIL_LAST)) { +			len += 4; +			i++; +		} else { +			len += 3; +		} +		i++;  	}  	return len;  } -void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { +void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) {  	int k = 0; -	for (unsigned int i = 0; i < tlen && uptr[i]; i++) { +	for (unsigned int i = 0; i < tlen && uptr[i];) {  		unsigned int uch = uptr[i];  		if (uch < 0x80) {  			putf[k++] = static_cast<char>(uch);  		} else if (uch < 0x800) {  			putf[k++] = static_cast<char>(0xC0 | (uch >> 6));  			putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); +		} else if ((uch >= SURROGATE_LEAD_FIRST) && +			(uch <= SURROGATE_TRAIL_LAST)) { +			// Half a surrogate pair +			i++; +			unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff); +			putf[k++] = static_cast<char>(0xF0 | (xch >> 18)); +			putf[k++] = static_cast<char>(0x80 | (xch >> 12) & 0x3f); +			putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f)); +			putf[k++] = static_cast<char>(0x80 | (xch & 0x3f));  		} else {  			putf[k++] = static_cast<char>(0xE0 | (uch >> 12));  			putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));  			putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));  		} +		i++;  	}  	putf[len] = '\0';  } -unsigned int UCS2Length(const char *s, unsigned int len) { +unsigned int UTF16Length(const char *s, unsigned int len) {  	unsigned int ulen = 0; -	for (unsigned int i=0;i<len;i++) { +	unsigned int charLen; +	for (unsigned int i=0;i<len;) {  		unsigned char ch = static_cast<unsigned char>(s[i]); -		if ((ch < 0x80) || (ch > (0x80 + 0x40))) +		if (ch < 0x80) { +			charLen = 1; +		} else if (ch < 0x80 + 0x40 + 0x20) { +			charLen = 2; +		} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { +			charLen = 3; +		} else { +			charLen = 4;  			ulen++; +		} +		i += charLen; +		ulen++;  	}  	return ulen;  } -unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { +unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) {  	unsigned int ui=0;  	const unsigned char *us = reinterpret_cast<const unsigned char *>(s);  	unsigned int i=0; @@ -63,12 +94,24 @@ unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsign  			tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6);  			ch = us[i++];  			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); -		} else { +		} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {  			tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12);  			ch = us[i++];  			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6));  			ch = us[i++];  			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); +		} else { +			// Outside the BMP so need two surrogates +			int val = (ch & 0x7) << 18; +			ch = us[i++]; +			val += (ch & 0x3F) << 12; +			ch = us[i++]; +			val += (ch & 0x3F) << 6; +			ch = us[i++]; +			val += (ch & 0x3F); +			tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST); +			ui++; +			tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST);  		}  		ui++;  	} diff --git a/src/UniConversion.h b/src/UniConversion.h index bd1d7754d..fd420a688 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -6,7 +6,7 @@  // The License.txt file describes the conditions under which this software may be distributed.  unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen); -void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len); -unsigned int UCS2Length(const char *s, unsigned int len); -unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen); +void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len); +unsigned int UTF16Length(const char *s, unsigned int len); +unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen); | 
