diff options
author | Neil <nyamatongwe@gmail.com> | 2015-02-24 09:30:42 +1100 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2015-02-24 09:30:42 +1100 |
commit | 59a6f6fd788f0491a26f4e39ab536ee5e5be87ea (patch) | |
tree | 0af3920a1c0a91e44cdfc4852c344d678e69b401 | |
parent | c194249102cea0df80bcbe2d154dfb4bc582aa10 (diff) | |
download | scintilla-mirror-59a6f6fd788f0491a26f4e39ab536ee5e5be87ea.tar.gz |
Notify container of non-BMP characters correctly.
-rw-r--r-- | src/Editor.cxx | 25 | ||||
-rw-r--r-- | src/UniConversion.cxx | 2 |
2 files changed, 4 insertions, 23 deletions
diff --git a/src/Editor.cxx b/src/Editor.cxx index 6c80b7511..585acb086 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -1872,28 +1872,9 @@ void Editor::AddCharUTF(const char *s, unsigned int len, bool treatAsDBCS) { // Also treats \0 and naked trail bytes 0x80 to 0xBF as valid // characters representing themselves. } else { - // Unroll 1 to 3 byte UTF-8 sequences. See reference data at: - // http://www.cl.cam.ac.uk/~mgk25/unicode.html - // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt - if (byte < 0xE0) { - int byte2 = static_cast<unsigned char>(s[1]); - if ((byte2 & 0xC0) == 0x80) { - // Two-byte-character lead-byte followed by a trail-byte. - byte = (((byte & 0x1F) << 6) | (byte2 & 0x3F)); - } - // A two-byte-character lead-byte not followed by trail-byte - // represents itself. - } else if (byte < 0xF0) { - int byte2 = static_cast<unsigned char>(s[1]); - int byte3 = static_cast<unsigned char>(s[2]); - if (((byte2 & 0xC0) == 0x80) && ((byte3 & 0xC0) == 0x80)) { - // Three-byte-character lead byte followed by two trail bytes. - byte = (((byte & 0x0F) << 12) | ((byte2 & 0x3F) << 6) | - (byte3 & 0x3F)); - } - // A three-byte-character lead-byte not followed by two trail-bytes - // represents itself. - } + unsigned int utf32[1] = { 0 }; + UTF32FromUTF8(s, len, utf32, ELEMENTS(utf32)); + byte = utf32[0]; } NotifyChar(byte); } diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 0d69d9969..04053b057 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -145,7 +145,7 @@ unsigned int UTF32FromUTF8(const char *s, unsigned int len, unsigned int *tbuf, unsigned int i=0; while ((i<len) && (ui<tlen)) { unsigned char ch = us[i++]; - wchar_t value = 0; + unsigned int value = 0; if (ch < 0x80) { value = ch; } else if (((len-i) >= 1) && (ch < 0x80 + 0x40 + 0x20)) { |