aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--gtk/PlatGTK.cxx6
-rw-r--r--src/Document.cxx4
-rw-r--r--src/UniConversion.cxx67
-rw-r--r--src/UniConversion.h6
-rw-r--r--win32/PlatWin.cxx29
-rw-r--r--win32/ScintillaWin.cxx24
6 files changed, 92 insertions, 44 deletions
diff --git a/gtk/PlatGTK.cxx b/gtk/PlatGTK.cxx
index 59e4d9dca..e361b3e17 100644
--- a/gtk/PlatGTK.cxx
+++ b/gtk/PlatGTK.cxx
@@ -1293,7 +1293,7 @@ void SurfaceImpl::DrawTextBase(PRectangle rc, Font &font_, int ybase, const char
len = maxLengthTextRun-1;
int wclen;
if (et == UTF8) {
- wclen = UCS2FromUTF8(s, len,
+ wclen = UTF16FromUTF8(s, len,
static_cast<wchar_t *>(static_cast<void *>(wctext)), maxLengthTextRun - 1);
} else { // dbcs, so convert using current locale
char sMeasure[maxLengthTextRun];
@@ -1468,7 +1468,7 @@ void SurfaceImpl::MeasureWidths(Font &font_, const char *s, int len, int *positi
len = maxLengthTextRun-1;
int wclen;
if (et == UTF8) {
- wclen = UCS2FromUTF8(s, len,
+ wclen = UTF16FromUTF8(s, len,
static_cast<wchar_t *>(static_cast<void *>(wctext)), maxLengthTextRun - 1);
} else { // dbcsMode, so convert using current locale
char sDraw[maxLengthTextRun];
@@ -1554,7 +1554,7 @@ int SurfaceImpl::WidthText(Font &font_, const char *s, int len) {
#endif
if (et == UTF8) {
GdkWChar wctext[maxLengthTextRun];
- size_t wclen = UCS2FromUTF8(s, len, static_cast<wchar_t *>(static_cast<void *>(wctext)),
+ size_t wclen = UTF16FromUTF8(s, len, static_cast<wchar_t *>(static_cast<void *>(wctext)),
sizeof(wctext) / sizeof(GdkWChar) - 1);
wctext[wclen] = L'\0';
return gdk_text_width_wc(PFont(font_)->pfont, wctext, wclen);
diff --git a/src/Document.cxx b/src/Document.cxx
index a25e3070d..3061bbc37 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -266,7 +266,9 @@ int Document::LenChar(int pos) {
if (ch < 0x80)
return 1;
int len = 2;
- if (ch >= (0x80 + 0x40 + 0x20))
+ if (ch >= (0x80 + 0x40 + 0x20 + 0x10))
+ len = 4;
+ else if (ch >= (0x80 + 0x40 + 0x20))
len = 3;
int lengthDoc = Length();
if ((pos + len) > lengthDoc)
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 363db90f4..863eb82cd 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -9,49 +9,80 @@
#include "UniConversion.h"
+enum { SURROGATE_LEAD_FIRST = 0xD800 };
+enum { SURROGATE_TRAIL_FIRST = 0xDC00 };
+enum { SURROGATE_TRAIL_LAST = 0xDFFF };
+
unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) {
unsigned int len = 0;
- for (unsigned int i = 0; i < tlen && uptr[i]; i++) {
+ for (unsigned int i = 0; i < tlen && uptr[i];) {
unsigned int uch = uptr[i];
- if (uch < 0x80)
+ if (uch < 0x80) {
len++;
- else if (uch < 0x800)
+ } else if (uch < 0x800) {
len += 2;
- else
- len +=3;
+ } else if ((uch >= SURROGATE_LEAD_FIRST) &&
+ (uch <= SURROGATE_TRAIL_LAST)) {
+ len += 4;
+ i++;
+ } else {
+ len += 3;
+ }
+ i++;
}
return len;
}
-void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) {
+void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) {
int k = 0;
- for (unsigned int i = 0; i < tlen && uptr[i]; i++) {
+ for (unsigned int i = 0; i < tlen && uptr[i];) {
unsigned int uch = uptr[i];
if (uch < 0x80) {
putf[k++] = static_cast<char>(uch);
} else if (uch < 0x800) {
putf[k++] = static_cast<char>(0xC0 | (uch >> 6));
putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
+ } else if ((uch >= SURROGATE_LEAD_FIRST) &&
+ (uch <= SURROGATE_TRAIL_LAST)) {
+ // Half a surrogate pair
+ i++;
+ unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff);
+ putf[k++] = static_cast<char>(0xF0 | (xch >> 18));
+ putf[k++] = static_cast<char>(0x80 | (xch >> 12) & 0x3f);
+ putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f));
+ putf[k++] = static_cast<char>(0x80 | (xch & 0x3f));
} else {
putf[k++] = static_cast<char>(0xE0 | (uch >> 12));
putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
}
+ i++;
}
putf[len] = '\0';
}
-unsigned int UCS2Length(const char *s, unsigned int len) {
+unsigned int UTF16Length(const char *s, unsigned int len) {
unsigned int ulen = 0;
- for (unsigned int i=0;i<len;i++) {
+ unsigned int charLen;
+ for (unsigned int i=0;i<len;) {
unsigned char ch = static_cast<unsigned char>(s[i]);
- if ((ch < 0x80) || (ch > (0x80 + 0x40)))
+ if (ch < 0x80) {
+ charLen = 1;
+ } else if (ch < 0x80 + 0x40 + 0x20) {
+ charLen = 2;
+ } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
+ charLen = 3;
+ } else {
+ charLen = 4;
ulen++;
+ }
+ i += charLen;
+ ulen++;
}
return ulen;
}
-unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) {
+unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) {
unsigned int ui=0;
const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
unsigned int i=0;
@@ -63,12 +94,24 @@ unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsign
tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6);
ch = us[i++];
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F));
- } else {
+ } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12);
ch = us[i++];
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6));
ch = us[i++];
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F));
+ } else {
+ // Outside the BMP so need two surrogates
+ int val = (ch & 0x7) << 18;
+ ch = us[i++];
+ val += (ch & 0x3F) << 12;
+ ch = us[i++];
+ val += (ch & 0x3F) << 6;
+ ch = us[i++];
+ val += (ch & 0x3F);
+ tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST);
+ ui++;
+ tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST);
}
ui++;
}
diff --git a/src/UniConversion.h b/src/UniConversion.h
index bd1d7754d..fd420a688 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -6,7 +6,7 @@
// The License.txt file describes the conditions under which this software may be distributed.
unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen);
-void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len);
-unsigned int UCS2Length(const char *s, unsigned int len);
-unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen);
+void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len);
+unsigned int UTF16Length(const char *s, unsigned int len);
+unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen);
diff --git a/win32/PlatWin.cxx b/win32/PlatWin.cxx
index b2cc554bb..dd3213bf9 100644
--- a/win32/PlatWin.cxx
+++ b/win32/PlatWin.cxx
@@ -684,7 +684,7 @@ void SurfaceImpl::DrawTextCommon(PRectangle rc, Font &font_, int ybase, const ch
wchar_t tbuf[MAX_US_LEN];
int tlen;
if (unicodeMode) {
- tlen = UCS2FromUTF8(s, len, tbuf, MAX_US_LEN);
+ tlen = UTF16FromUTF8(s, len, tbuf, MAX_US_LEN);
} else {
// Support Asian string display in 9x English
tlen = ::MultiByteToWideChar(codePage, 0, s, len, NULL, 0);
@@ -740,7 +740,7 @@ int SurfaceImpl::WidthText(Font &font_, const char *s, int len) {
SIZE sz={0,0};
if (unicodeMode) {
wchar_t tbuf[MAX_US_LEN];
- int tlen = UCS2FromUTF8(s, len, tbuf, MAX_US_LEN);
+ int tlen = UTF16FromUTF8(s, len, tbuf, MAX_US_LEN);
::GetTextExtentPoint32W(hdc, tbuf, tlen, &sz);
} else if (IsNT() || (codePage==0) || win9xACPSame) {
::GetTextExtentPoint32A(hdc, s, Platform::Minimum(len, maxLenText), &sz);
@@ -760,7 +760,7 @@ void SurfaceImpl::MeasureWidths(Font &font_, const char *s, int len, int *positi
int fit = 0;
if (unicodeMode) {
wchar_t tbuf[MAX_US_LEN];
- int tlen = UCS2FromUTF8(s, len, tbuf, MAX_US_LEN);
+ int tlen = UTF16FromUTF8(s, len, tbuf, MAX_US_LEN);
int poses[MAX_US_LEN];
fit = tlen;
if (!::GetTextExtentExPointW(hdc, tbuf, tlen, maxWidthMeasure, &fit, poses, &sz)) {
@@ -778,14 +778,17 @@ void SurfaceImpl::MeasureWidths(Font &font_, const char *s, int len, int *positi
int i=0;
while (ui<fit) {
unsigned char uch = us[i];
- positions[i++] = poses[ui];
- if (uch >= 0x80) {
- if (uch < (0x80 + 0x40 + 0x20)) {
- positions[i++] = poses[ui];
- } else {
- positions[i++] = poses[ui];
- positions[i++] = poses[ui];
- }
+ unsigned int lenChar = 1;
+ if (uch >= (0x80 + 0x40 + 0x20 + 0x10)) {
+ lenChar = 4;
+ ui++;
+ } else if (uch >= (0x80 + 0x40 + 0x20)) {
+ lenChar = 3;
+ } else if (uch >= (0x80)) {
+ lenChar = 2;
+ }
+ for (unsigned int bytePos=0; bytePos<lenChar; bytePos++) {
+ positions[i++] = poses[ui];
}
ui++;
}
@@ -1312,7 +1315,7 @@ PRectangle ListBoxX::GetDesiredRect() {
int len = widestItem ? strlen(widestItem) : 0;
if (unicodeMode) {
wchar_t tbuf[MAX_US_LEN];
- len = UCS2FromUTF8(widestItem, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1);
+ len = UTF16FromUTF8(widestItem, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1);
tbuf[len] = L'\0';
::GetTextExtentPoint32W(hdc, tbuf, len, &textSize);
} else {
@@ -1431,7 +1434,7 @@ void ListBoxX::Draw(DRAWITEMSTRUCT *pDrawItem) {
if (unicodeMode) {
wchar_t tbuf[MAX_US_LEN];
- int tlen = UCS2FromUTF8(text, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1);
+ int tlen = UTF16FromUTF8(text, len, tbuf, sizeof(tbuf)/sizeof(wchar_t)-1);
tbuf[tlen] = L'\0';
::DrawTextW(pDrawItem->hDC, tbuf, tlen, &rcText, DT_NOPREFIX|DT_END_ELLIPSIS|DT_SINGLELINE|DT_NOCLIP);
} else {
diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx
index 046da18a6..acca53d6a 100644
--- a/win32/ScintillaWin.cxx
+++ b/win32/ScintillaWin.cxx
@@ -487,7 +487,7 @@ sptr_t ScintillaWin::HandleComposition(uptr_t wParam, sptr_t lParam) {
if (IsUnicodeMode()) {
char utfval[maxLenInputIME * 3];
unsigned int len = UTF8Length(wcs, wides);
- UTF8FromUCS2(wcs, wides, utfval, len);
+ UTF8FromUTF16(wcs, wides, utfval, len);
utfval[len] = '\0';
AddCharUTF(utfval, len);
} else {
@@ -725,7 +725,7 @@ sptr_t ScintillaWin::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam
//char utfval[4];
//wchar_t wcs[2] = {wParam, 0};
//unsigned int len = UTF8Length(wcs, 1);
- //UTF8FromUCS2(wcs, 1, utfval, len);
+ //UTF8FromUTF16(wcs, 1, utfval, len);
//AddCharUTF(utfval, len);
AddCharBytes('\0', LOBYTE(wParam));
} else {
@@ -744,7 +744,7 @@ sptr_t ScintillaWin::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam
char utfval[4];
wchar_t wcs[2] = {static_cast<wchar_t>(wParam), 0};
unsigned int len = UTF8Length(wcs, 1);
- UTF8FromUCS2(wcs, 1, utfval, len);
+ UTF8FromUTF16(wcs, 1, utfval, len);
AddCharUTF(utfval, len);
return 1;
} else {
@@ -1301,7 +1301,7 @@ void ScintillaWin::Paste() {
len = UTF8Length(uptr, bytes / 2);
putf = new char[len + 1];
if (putf) {
- UTF8FromUCS2(uptr, bytes / 2, putf, len);
+ UTF8FromUTF16(uptr, bytes / 2, putf, len);
}
} else {
// CF_UNICODETEXT available, but not in Unicode mode
@@ -1346,8 +1346,8 @@ void ScintillaWin::Paste() {
unsigned int mlen = UTF8Length(uptr, ulen);
char *putf = new char[mlen + 1];
if (putf) {
- // CP_UTF8 not available on Windows 95, so use UTF8FromUCS2()
- UTF8FromUCS2(uptr, ulen, putf, mlen);
+ // CP_UTF8 not available on Windows 95, so use UTF8FromUTF16()
+ UTF8FromUTF16(uptr, ulen, putf, mlen);
}
delete []uptr;
@@ -1775,7 +1775,7 @@ void ScintillaWin::AddCharBytes(char b0, char b1) {
::MultiByteToWideChar(inputCodePage, 0, ansiChars, 1, wcs, 1);
}
unsigned int len = UTF8Length(wcs, 1);
- UTF8FromUCS2(wcs, 1, utfval, len);
+ UTF8FromUTF16(wcs, 1, utfval, len);
utfval[len] = '\0';
AddCharUTF(utfval, len ? len : 1);
} else if (b0) {
@@ -1803,10 +1803,10 @@ void ScintillaWin::CopyToClipboard(const SelectionText &selectedText) {
// Default Scintilla behaviour in Unicode mode
if (IsUnicodeMode()) {
- int uchars = UCS2Length(selectedText.s, selectedText.len);
+ int uchars = UTF16Length(selectedText.s, selectedText.len);
uniText.Allocate(2 * uchars);
if (uniText) {
- UCS2FromUTF8(selectedText.s, selectedText.len, static_cast<wchar_t *>(uniText.ptr), uchars);
+ UTF16FromUTF8(selectedText.s, selectedText.len, static_cast<wchar_t *>(uniText.ptr), uchars);
}
} else {
// Not Unicode mode
@@ -2093,7 +2093,7 @@ STDMETHODIMP ScintillaWin::Drop(LPDATAOBJECT pIDataSource, DWORD grfKeyState,
int dataLen = UTF8Length(udata, tlen/2);
data = new char[dataLen+1];
if (data) {
- UTF8FromUCS2(udata, tlen/2, data, dataLen);
+ UTF8FromUTF16(udata, tlen/2, data, dataLen);
dataAllocated = true;
}
}
@@ -2153,10 +2153,10 @@ STDMETHODIMP ScintillaWin::GetData(FORMATETC *pFEIn, STGMEDIUM *pSTM) {
GlobalMemory text;
if (pFEIn->cfFormat == CF_UNICODETEXT) {
- int uchars = UCS2Length(drag.s, drag.len);
+ int uchars = UTF16Length(drag.s, drag.len);
text.Allocate(2 * uchars);
if (text) {
- UCS2FromUTF8(drag.s, drag.len, static_cast<wchar_t *>(text.ptr), uchars);
+ UTF16FromUTF8(drag.s, drag.len, static_cast<wchar_t *>(text.ptr), uchars);
}
} else {
text.Allocate(drag.len);