aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSam Hocevar <sam@hocevar.net>2015-11-20 10:43:26 +1100
committerSam Hocevar <sam@hocevar.net>2015-11-20 10:43:26 +1100
commit1f9bc0d42bc53fd04cc848da1da52635dcf7a2c0 (patch)
treec3a81badf03c26ae81cffe258df9dec094966fa7
parent5bd63ddfe65136ace6d11ec363b4f33e10fdbb49 (diff)
downloadscintilla-mirror-1f9bc0d42bc53fd04cc848da1da52635dcf7a2c0.tar.gz
Bug [#1779]. Better Unicode input support on Windows systems.
- support surrogate pairs in WM_CHAR messages - support characters from supplementary planes in WM_UNICHAR messages - support WM_UNICHAR messages in non-Unicode mode - fix some code duplication Also, do not return FALSE upon receiving a WM_UNICHAR message with a UNICODE_NOCHAR parameter, since WM_UNICHAR can actually be handled just fine (at least with the exact same level of support as WM_CHAR).
-rw-r--r--doc/ScintillaHistory.html6
-rw-r--r--src/UniConversion.cxx4
-rw-r--r--src/UniConversion.h4
-rw-r--r--win32/ScintillaWin.cxx73
4 files changed, 58 insertions, 29 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index d554633db..1c7943845 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -478,6 +478,7 @@
<td>Jordan Jueckstock</td>
</tr><tr>
<td>Yury Dubinsky</td>
+ <td>Sam Hocevar</td>
</tr>
</table>
<p>
@@ -507,6 +508,11 @@
selection.
</li>
<li>
+ On Windows, fix non-BMP input through WM_CHAR and allow WM_UNICHAR to work
+ with non-BMP characters and on non-Unicode documents.
+ <a href="http://sourceforge.net/p/scintilla/bugs/1779/">Bug #1779</a>.
+ </li>
+ <li>
On GTK+ on OS X, fix warning during destruction.
<a href="http://sourceforge.net/p/scintilla/bugs/1777/">Bug #1777</a>.
</li>
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index c12ca34c2..4da9e102a 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -19,10 +19,6 @@ using namespace Scintilla;
namespace Scintilla {
#endif
-enum { SURROGATE_TRAIL_FIRST = 0xDC00 };
-enum { SURROGATE_TRAIL_LAST = 0xDFFF };
-enum { SUPPLEMENTAL_PLANE_FIRST = 0x10000 };
-
unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) {
unsigned int len = 0;
for (unsigned int i = 0; i < tlen && uptr[i];) {
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 08898cac3..aeb13f0c2 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -57,6 +57,10 @@ inline bool UTF8IsNEL(const unsigned char *us) {
enum { SURROGATE_LEAD_FIRST = 0xD800 };
enum { SURROGATE_LEAD_LAST = 0xDBFF };
+enum { SURROGATE_TRAIL_FIRST = 0xDC00 };
+enum { SURROGATE_TRAIL_LAST = 0xDFFF };
+enum { SUPPLEMENTAL_PLANE_FIRST = 0x10000 };
+
inline unsigned int UTF16CharLength(wchar_t uch) {
return ((uch >= SURROGATE_LEAD_FIRST) && (uch <= SURROGATE_LEAD_LAST)) ? 2 : 1;
}
diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx
index d0ea62a31..92df2aad6 100644
--- a/win32/ScintillaWin.cxx
+++ b/win32/ScintillaWin.cxx
@@ -100,6 +100,14 @@
#define UNICODE_NOCHAR 0xFFFF
#endif
+#ifndef IS_HIGH_SURROGATE
+#define IS_HIGH_SURROGATE(x) ((x) >= SURROGATE_LEAD_FIRST && (x) <= SURROGATE_LEAD_LAST)
+#endif
+
+#ifndef IS_LOW_SURROGATE
+#define IS_LOW_SURROGATE(x) ((x) >= SURROGATE_TRAIL_FIRST && (x) <= SURROGATE_TRAIL_LAST)
+#endif
+
#ifndef MK_ALT
#define MK_ALT 32
#endif
@@ -212,6 +220,7 @@ class ScintillaWin :
public ScintillaBase {
bool lastKeyDownConsumed;
+ wchar_t lastHighSurrogateChar;
bool capturedMouse;
bool trackedMouseLeave;
@@ -269,6 +278,7 @@ class ScintillaWin :
virtual bool DragThreshold(Point ptStart, Point ptNow);
virtual void StartDrag();
int TargetAsUTF8(char *text);
+ void AddCharUTF16(wchar_t const *wcs, unsigned int wclen);
int EncodedFromUTF8(char *utf8, char *encoded) const;
sptr_t WndPaint(uptr_t wParam);
@@ -383,6 +393,7 @@ ATOM ScintillaWin::callClassAtom = 0;
ScintillaWin::ScintillaWin(HWND hwnd) {
lastKeyDownConsumed = false;
+ lastHighSurrogateChar = 0;
capturedMouse = false;
trackedMouseLeave = false;
@@ -729,6 +740,26 @@ int ScintillaWin::EncodedFromUTF8(char *utf8, char *encoded) const {
}
}
+// Add one character from a UTF-16 string, by converting to either UTF-8 or
+// the current codepage. Code is similar to HandleCompositionWindowed().
+void ScintillaWin::AddCharUTF16(wchar_t const *wcs, unsigned int wclen) {
+ if (IsUnicodeMode()) {
+ char utfval[maxLenInputIME * 3];
+ unsigned int len = UTF8Length(wcs, wclen);
+ UTF8FromUTF16(wcs, wclen, utfval, len);
+ utfval[len] = '\0';
+ AddCharUTF(utfval, len);
+ } else {
+ UINT cpDest = CodePageOfDocument();
+ char inBufferCP[maxLenInputIME * 2];
+ int size = ::WideCharToMultiByte(cpDest,
+ 0, wcs, wclen, inBufferCP, sizeof(inBufferCP) - 1, 0, 0);
+ for (int i=0; i<size; i++) {
+ AddChar(inBufferCP[i]);
+ }
+ }
+}
+
sptr_t ScintillaWin::WndPaint(uptr_t wParam) {
//ElapsedTime et;
@@ -1414,40 +1445,32 @@ sptr_t ScintillaWin::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam
case WM_CHAR:
if (((wParam >= 128) || !iscntrl(static_cast<int>(wParam))) || !lastKeyDownConsumed) {
- wchar_t wcs[2] = {static_cast<wchar_t>(wParam), 0};
- if (IsUnicodeMode()) {
- // For a wide character version of the window:
- char utfval[UTF8MaxBytes];
- unsigned int len = UTF8Length(wcs, 1);
- UTF8FromUTF16(wcs, 1, utfval, len);
- AddCharUTF(utfval, len);
- } else {
- UINT cpDest = CodePageOfDocument();
- char inBufferCP[20];
- int size = ::WideCharToMultiByte(cpDest,
- 0, wcs, 1, inBufferCP, sizeof(inBufferCP) - 1, 0, 0);
- inBufferCP[size] = '\0';
- AddCharUTF(inBufferCP, size);
+ wchar_t wcs[3] = {static_cast<wchar_t>(wParam), 0};
+ unsigned int wclen = 1;
+ if (IS_HIGH_SURROGATE(wcs[0])) {
+ // If this is a high surrogate character, we need a second one
+ lastHighSurrogateChar = wcs[0];
+ return 0;
+ } else if (IS_LOW_SURROGATE(wcs[0])) {
+ wcs[1] = wcs[0];
+ wcs[0] = lastHighSurrogateChar;
+ lastHighSurrogateChar = 0;
+ wclen = 2;
}
+ AddCharUTF16(wcs, wclen);
}
return 0;
case WM_UNICHAR:
if (wParam == UNICODE_NOCHAR) {
- return IsUnicodeMode() ? 1 : 0;
+ return TRUE;
} else if (lastKeyDownConsumed) {
return 1;
} else {
- if (IsUnicodeMode()) {
- char utfval[UTF8MaxBytes];
- wchar_t wcs[2] = {static_cast<wchar_t>(wParam), 0};
- unsigned int len = UTF8Length(wcs, 1);
- UTF8FromUTF16(wcs, 1, utfval, len);
- AddCharUTF(utfval, len);
- return 1;
- } else {
- return 0;
- }
+ wchar_t wcs[3] = {0};
+ unsigned int wclen = UTF16FromUTF32Character(wParam, wcs);
+ AddCharUTF16(wcs, wclen);
+ return FALSE;
}
case WM_SYSKEYDOWN: