diff options
author | nyamatongwe <unknown> | 2007-04-19 04:38:53 +0000 |
---|---|---|
committer | nyamatongwe <unknown> | 2007-04-19 04:38:53 +0000 |
commit | 476e533e7277cfd122f3ca3472783831c9e47ca5 (patch) | |
tree | 1f7678e4a7fa68f9f761bd4650b9a84339841db8 | |
parent | 101ccc292a2a2623d6680e8f488f762bd5c9a091 (diff) | |
download | scintilla-mirror-476e533e7277cfd122f3ca3472783831c9e47ca5.tar.gz |
All Unicode planes supported, not just the Basic Multilingual Plane.
-rw-r--r-- | gtk/PlatGTK.cxx | 6 | ||||
-rw-r--r-- | src/Document.cxx | 4 | ||||
-rw-r--r-- | src/UniConversion.cxx | 67 | ||||
-rw-r--r-- | src/UniConversion.h | 6 | ||||
-rw-r--r-- | win32/PlatWin.cxx | 29 | ||||
-rw-r--r-- | win32/ScintillaWin.cxx | 24 |
6 files changed, 92 insertions, 44 deletions
diff --git a/gtk/PlatGTK.cxx b/gtk/PlatGTK.cxx index 59e4d9dca..e361b3e17 100644 --- a/gtk/PlatGTK.cxx +++ b/gtk/PlatGTK.cxx @@ -1293,7 +1293,7 @@ void SurfaceImpl::DrawTextBase(PRectangle rc, Font &font_, int ybase, const char len = maxLengthTextRun-1; int wclen; if (et == UTF8) { - wclen = UCS2FromUTF8(s, len, + wclen = UTF16FromUTF8(s, len, static_cast<wchar_t *>(static_cast<void *>(wctext)), maxLengthTextRun - 1); } else { // dbcs, so convert using current locale char sMeasure[maxLengthTextRun]; @@ -1468,7 +1468,7 @@ void SurfaceImpl::MeasureWidths(Font &font_, const char *s, int len, int *positi len = maxLengthTextRun-1; int wclen; if (et == UTF8) { - wclen = UCS2FromUTF8(s, len, + wclen = UTF16FromUTF8(s, len, static_cast<wchar_t *>(static_cast<void *>(wctext)), maxLengthTextRun - 1); } else { // dbcsMode, so convert using current locale char sDraw[maxLengthTextRun]; @@ -1554,7 +1554,7 @@ int SurfaceImpl::WidthText(Font &font_, const char *s, int len) { #endif if (et == UTF8) { GdkWChar wctext[maxLengthTextRun]; - size_t wclen = UCS2FromUTF8(s, len, static_cast<wchar_t *>(static_cast<void *>(wctext)), + size_t wclen = UTF16FromUTF8(s, len, static_cast<wchar_t *>(static_cast<void *>(wctext)), sizeof(wctext) / sizeof(GdkWChar) - 1); wctext[wclen] = L'\0'; return gdk_text_width_wc(PFont(font_)->pfont, wctext, wclen); diff --git a/src/Document.cxx b/src/Document.cxx index a25e3070d..3061bbc37 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -266,7 +266,9 @@ int Document::LenChar(int pos) { if (ch < 0x80) return 1; int len = 2; - if (ch >= (0x80 + 0x40 + 0x20)) + if (ch >= (0x80 + 0x40 + 0x20 + 0x10)) + len = 4; + else if (ch >= (0x80 + 0x40 + 0x20)) len = 3; int lengthDoc = Length(); if ((pos + len) > lengthDoc) diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 363db90f4..863eb82cd 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -9,49 +9,80 @@ #include "UniConversion.h" +enum { SURROGATE_LEAD_FIRST = 0xD800 }; +enum { SURROGATE_TRAIL_FIRST = 0xDC00 }; +enum { SURROGATE_TRAIL_LAST = 0xDFFF }; + unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) { unsigned int len = 0; - for (unsigned int i = 0; i < tlen && uptr[i]; i++) { + for (unsigned int i = 0; i < tlen && uptr[i];) { unsigned int uch = uptr[i]; - if (uch < 0x80) + if (uch < 0x80) { len++; - else if (uch < 0x800) + } else if (uch < 0x800) { len += 2; - else - len +=3; + } else if ((uch >= SURROGATE_LEAD_FIRST) && + (uch <= SURROGATE_TRAIL_LAST)) { + len += 4; + i++; + } else { + len += 3; + } + i++; } return len; } -void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { +void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { int k = 0; - for (unsigned int i = 0; i < tlen && uptr[i]; i++) { + for (unsigned int i = 0; i < tlen && uptr[i];) { unsigned int uch = uptr[i]; if (uch < 0x80) { putf[k++] = static_cast<char>(uch); } else if (uch < 0x800) { putf[k++] = static_cast<char>(0xC0 | (uch >> 6)); putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); + } else if ((uch >= SURROGATE_LEAD_FIRST) && + (uch <= SURROGATE_TRAIL_LAST)) { + // Half a surrogate pair + i++; + unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff); + putf[k++] = static_cast<char>(0xF0 | (xch >> 18)); + putf[k++] = static_cast<char>(0x80 | (xch >> 12) & 0x3f); + putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f)); + putf[k++] = static_cast<char>(0x80 | (xch & 0x3f)); } else { putf[k++] = static_cast<char>(0xE0 | (uch >> 12)); putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f)); putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); } + i++; } putf[len] = '\0'; } -unsigned int UCS2Length(const char *s, unsigned int len) { +unsigned int UTF16Length(const char *s, unsigned int len) { unsigned int ulen = 0; - for (unsigned int i=0;i<len;i++) { + unsigned int charLen; + for (unsigned int i=0;i<len;) { unsigned char ch = static_cast<unsigned char>(s[i]); - if ((ch < 0x80) || (ch > (0x80 + 0x40))) + if (ch < 0x80) { + charLen = 1; + } else if (ch < 0x80 + 0x40 + 0x20) { + charLen = 2; + } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { + charLen = 3; + } else { + charLen = 4; ulen++; + } + i += charLen; + ulen++; } return ulen; } -unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { +unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { unsigned int ui=0; const unsigned char *us = reinterpret_cast<const unsigned char *>(s); unsigned int i=0; @@ -63,12 +94,24 @@ unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsign tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6); ch = us[i++]; tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); - } else { + } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12); ch = us[i++]; tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6)); ch = us[i++]; tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); + } else { + // Outside the BMP so need two surrogates + int val = (ch & 0x7) << 18; + ch = us[i++]; + val += (ch & 0x3F) << 12; + ch = us[i++]; + val += (ch & 0x3F) << 6; + ch = us[i++]; + val += (ch & 0x3F); + tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST); + ui++; + tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST); } ui++; } diff --git a/src/UniConversion.h b/src/UniConversion.h index bd1d7754d..fd420a688 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -6,7 +6,7 @@ // The License.txt file describes the conditions under which this software may be distributed. unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen); -void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len); -unsigned int UCS2Length(const char *s, unsigned int len); -unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen); +void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len); +unsigned int UTF16Length(const char *s, unsigned int len); +unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen); diff --git a/win32/PlatWin.cxx b/win32/PlatWin.cxx index b2cc554bb..dd3213bf9 100644 --- a/win32/PlatWin.cxx +++ b/win32/PlatWin.cxx @@ -684,7 +684,7 @@ void SurfaceImpl::DrawTextCommon(PRectangle rc, Font &font_, int ybase, const ch wchar_t tbuf[MAX_US_LEN]; int tlen; if (unicodeMode) { - tlen = UCS2FromUTF8(s, len, tbuf, MAX_US_LEN); + tlen = UTF16FromUTF8(s, len, tbuf, MAX_US_LEN); } else { // Support Asian string display in 9x English tlen = ::MultiByteToWideChar(codePage, 0, s, len, NULL, 0); @@ -740,7 +740,7 @@ int SurfaceImpl::WidthText(Font &font_, const char *s, int len) { SIZE sz={0,0}; if (unicodeMode) { wchar_t tbuf[MAX_US_LEN]; - int tlen = UCS2FromUTF8(s, len, tbuf, MAX_US_LEN); + int tlen = UTF16FromUTF8(s, len, tbuf, MAX_US_LEN); ::GetTextExtentPoint32W(hdc, tbuf, tlen, &sz); } else if (IsNT() || (codePage==0) || win9xACPSame) { ::GetTextExtentPoint32A(hdc, s, Platform::Minimum(len, maxLenText), &sz); @@ -760,7 +760,7 @@ void SurfaceImpl::MeasureWidths(Font &font_, const char *s, int len, int *positi int fit = 0; if (unicodeMode) { wchar_t tbuf[MAX_US_LEN]; - int tlen = UCS2FromUTF8(s, len, tbuf, MAX_US_LEN); + int tlen = UTF16FromUTF8(s, len, tbuf, MAX_US_LEN); int poses[MAX_US_LEN]; fit = tlen; if (!::GetTextExtentExPointW(hdc, tbuf, tlen, maxWidthMeasure, &fit, poses, &sz)) { @@ -778,14 +778,17 @@ void SurfaceImpl::MeasureWidths(Font &font_, const char *s, int len, int *positi int i=0; while (ui<fit) { unsigned char uch = us[i]; - positions[i++] = poses[ui]; - if (uch >= 0x80) { - if (uch < (0x80 + 0x40 + 0x20)) { - positions[i++] = poses[ui]; - } else { - positions[i++] = poses[ui]; - positions[i++] = poses[ui]; - } + unsigned int lenChar = 1; + if (uch >= (0x80 + 0x40 + 0x20 + 0x10)) { + lenChar = 4; + ui++; + } else if (uch >= (0x80 + 0x40 + 0x20)) { + lenChar = 3; + } else if (uch >= (0x80)) { + lenChar = 2; + } + for (unsigned int bytePos=0; bytePos<lenChar; bytePos++) { + positions[i++] = poses[ui]; } ui++; } @@ -1312,7 +1315,7 @@ PRectangle ListBoxX::GetDesiredRect() { int len = widestItem ? strlen(widestItem) : 0; if (unicodeMode) { wchar_t tbuf[MAX_US_LEN]; - len = UCS2FromUTF8(widestItem, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1); + len = UTF16FromUTF8(widestItem, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1); tbuf[len] = L'\0'; ::GetTextExtentPoint32W(hdc, tbuf, len, &textSize); } else { @@ -1431,7 +1434,7 @@ void ListBoxX::Draw(DRAWITEMSTRUCT *pDrawItem) { if (unicodeMode) { wchar_t tbuf[MAX_US_LEN]; - int tlen = UCS2FromUTF8(text, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1); + int tlen = UTF16FromUTF8(text, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1); tbuf[tlen] = L'\0'; ::DrawTextW(pDrawItem->hDC, tbuf, tlen, &rcText, DT_NOPREFIX|DT_END_ELLIPSIS|DT_SINGLELINE|DT_NOCLIP); } else { diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx index 046da18a6..acca53d6a 100644 --- a/win32/ScintillaWin.cxx +++ b/win32/ScintillaWin.cxx @@ -487,7 +487,7 @@ sptr_t ScintillaWin::HandleComposition(uptr_t wParam, sptr_t lParam) { if (IsUnicodeMode()) { char utfval[maxLenInputIME * 3]; unsigned int len = UTF8Length(wcs, wides); - UTF8FromUCS2(wcs, wides, utfval, len); + UTF8FromUTF16(wcs, wides, utfval, len); utfval[len] = '\0'; AddCharUTF(utfval, len); } else { @@ -725,7 +725,7 @@ sptr_t ScintillaWin::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam //char utfval[4]; //wchar_t wcs[2] = {wParam, 0}; //unsigned int len = UTF8Length(wcs, 1); - //UTF8FromUCS2(wcs, 1, utfval, len); + //UTF8FromUTF16(wcs, 1, utfval, len); //AddCharUTF(utfval, len); AddCharBytes('\0', LOBYTE(wParam)); } else { @@ -744,7 +744,7 @@ sptr_t ScintillaWin::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam char utfval[4]; wchar_t wcs[2] = {static_cast<wchar_t>(wParam), 0}; unsigned int len = UTF8Length(wcs, 1); - UTF8FromUCS2(wcs, 1, utfval, len); + UTF8FromUTF16(wcs, 1, utfval, len); AddCharUTF(utfval, len); return 1; } else { @@ -1301,7 +1301,7 @@ void ScintillaWin::Paste() { len = UTF8Length(uptr, bytes / 2); putf = new char[len + 1]; if (putf) { - UTF8FromUCS2(uptr, bytes / 2, putf, len); + UTF8FromUTF16(uptr, bytes / 2, putf, len); } } else { // CF_UNICODETEXT available, but not in Unicode mode @@ -1346,8 +1346,8 @@ void ScintillaWin::Paste() { unsigned int mlen = UTF8Length(uptr, ulen); char *putf = new char[mlen + 1]; if (putf) { - // CP_UTF8 not available on Windows 95, so use UTF8FromUCS2() - UTF8FromUCS2(uptr, ulen, putf, mlen); + // CP_UTF8 not available on Windows 95, so use UTF8FromUTF16() + UTF8FromUTF16(uptr, ulen, putf, mlen); } delete []uptr; @@ -1775,7 +1775,7 @@ void ScintillaWin::AddCharBytes(char b0, char b1) { ::MultiByteToWideChar(inputCodePage, 0, ansiChars, 1, wcs, 1); } unsigned int len = UTF8Length(wcs, 1); - UTF8FromUCS2(wcs, 1, utfval, len); + UTF8FromUTF16(wcs, 1, utfval, len); utfval[len] = '\0'; AddCharUTF(utfval, len ? len : 1); } else if (b0) { @@ -1803,10 +1803,10 @@ void ScintillaWin::CopyToClipboard(const SelectionText &selectedText) { // Default Scintilla behaviour in Unicode mode if (IsUnicodeMode()) { - int uchars = UCS2Length(selectedText.s, selectedText.len); + int uchars = UTF16Length(selectedText.s, selectedText.len); uniText.Allocate(2 * uchars); if (uniText) { - UCS2FromUTF8(selectedText.s, selectedText.len, static_cast<wchar_t *>(uniText.ptr), uchars); + UTF16FromUTF8(selectedText.s, selectedText.len, static_cast<wchar_t *>(uniText.ptr), uchars); } } else { // Not Unicode mode @@ -2093,7 +2093,7 @@ STDMETHODIMP ScintillaWin::Drop(LPDATAOBJECT pIDataSource, DWORD grfKeyState, int dataLen = UTF8Length(udata, tlen/2); data = new char[dataLen+1]; if (data) { - UTF8FromUCS2(udata, tlen/2, data, dataLen); + UTF8FromUTF16(udata, tlen/2, data, dataLen); dataAllocated = true; } } @@ -2153,10 +2153,10 @@ STDMETHODIMP ScintillaWin::GetData(FORMATETC *pFEIn, STGMEDIUM *pSTM) { GlobalMemory text; if (pFEIn->cfFormat == CF_UNICODETEXT) { - int uchars = UCS2Length(drag.s, drag.len); + int uchars = UTF16Length(drag.s, drag.len); text.Allocate(2 * uchars); if (text) { - UCS2FromUTF8(drag.s, drag.len, static_cast<wchar_t *>(text.ptr), uchars); + UTF16FromUTF8(drag.s, drag.len, static_cast<wchar_t *>(text.ptr), uchars); } } else { text.Allocate(drag.len); |