diff options
Diffstat (limited to 'src/UniConversion.cxx')
| -rw-r--r-- | src/UniConversion.cxx | 77 | 
1 files changed, 77 insertions, 0 deletions
| diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx new file mode 100644 index 000000000..9306f307c --- /dev/null +++ b/src/UniConversion.cxx @@ -0,0 +1,77 @@ +// UniConversion.h - functions to handle UFT-8 and UCS-2 strings +// Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> + +#include "UniConversion.h" + +unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) { +	unsigned int len = 0; +	for (unsigned int i = 0; i < tlen && uptr[i]; i++) { +		unsigned int uch = uptr[i]; +		if (uch < 0x80) +			len++; +		else if (uch < 0x800) +			len+=2; +		else +			len +=3; +	} +	return len; +} + +void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { +	int k = 0; +	for (unsigned int i = 0; i < tlen && uptr[i]; i++) { +		unsigned int uch = uptr[i]; +		if (uch < 0x80) { +			putf[k++] = static_cast<char>(uch); +		} else if (uch < 0x800) { +			putf[k++] = static_cast<char>(0xC0 | (uch >> 6)); +			putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); +		} else { +			putf[k++] = static_cast<char>(0xE0 | (uch >> 12)); +			putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f)); +			putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); +		} +	} +	putf[len] = '\0'; +} + +unsigned int UCS2Length(const char *s, unsigned int len) { +	unsigned int ulen = 0; +	for (unsigned int i=0;i<len;i++) { +		unsigned char ch = static_cast<unsigned char>(s[i]); +		if ((ch < 0x80) || (ch > (0x80 + 0x40))) +			ulen++; +	} +	return ulen; +} + +unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { +#ifdef USE_API +	return ::MultiByteToWideChar(CP_UTF8, 0, s, len, tbuf, tlen); +#else  +	unsigned int ui=0; +	const unsigned char *us = reinterpret_cast<const unsigned char *>(s); +	unsigned int i=0; +	while ((i<len) && (ui<tlen)) { +		unsigned char ch = us[i++]; +		if (ch < 0x80) { +			tbuf[ui] = ch; +		} else if (ch < 0x80 + 0x40 + 0x20) { +			tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6); +			ch = us[i++]; +			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); +		} else { +			tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12); +			ch = us[i++]; +			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6)); +			ch = us[i++]; +			tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); +		} +		ui++; +	} +	return ui; +#endif +} | 
