aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2015-02-24 09:30:42 +1100
committerNeil <nyamatongwe@gmail.com>2015-02-24 09:30:42 +1100
commitf18873c71e55526c8dd606b5979da7df127fa418 (patch)
treebb42dc9a2a14fc5c19086391f367ccc8ccf68a70
parentc4c875b860d9a447967b625f67748e962b697652 (diff)
downloadscintilla-mirror-f18873c71e55526c8dd606b5979da7df127fa418.tar.gz
Notify container of non-BMP characters correctly.
-rw-r--r--src/Editor.cxx25
-rw-r--r--src/UniConversion.cxx2
2 files changed, 4 insertions, 23 deletions
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 6c80b7511..585acb086 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -1872,28 +1872,9 @@ void Editor::AddCharUTF(const char *s, unsigned int len, bool treatAsDBCS) {
// Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
// characters representing themselves.
} else {
- // Unroll 1 to 3 byte UTF-8 sequences. See reference data at:
- // http://www.cl.cam.ac.uk/~mgk25/unicode.html
- // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
- if (byte < 0xE0) {
- int byte2 = static_cast<unsigned char>(s[1]);
- if ((byte2 & 0xC0) == 0x80) {
- // Two-byte-character lead-byte followed by a trail-byte.
- byte = (((byte & 0x1F) << 6) | (byte2 & 0x3F));
- }
- // A two-byte-character lead-byte not followed by trail-byte
- // represents itself.
- } else if (byte < 0xF0) {
- int byte2 = static_cast<unsigned char>(s[1]);
- int byte3 = static_cast<unsigned char>(s[2]);
- if (((byte2 & 0xC0) == 0x80) && ((byte3 & 0xC0) == 0x80)) {
- // Three-byte-character lead byte followed by two trail bytes.
- byte = (((byte & 0x0F) << 12) | ((byte2 & 0x3F) << 6) |
- (byte3 & 0x3F));
- }
- // A three-byte-character lead-byte not followed by two trail-bytes
- // represents itself.
- }
+ unsigned int utf32[1] = { 0 };
+ UTF32FromUTF8(s, len, utf32, ELEMENTS(utf32));
+ byte = utf32[0];
}
NotifyChar(byte);
}
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 0d69d9969..04053b057 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -145,7 +145,7 @@ unsigned int UTF32FromUTF8(const char *s, unsigned int len, unsigned int *tbuf,
unsigned int i=0;
while ((i<len) && (ui<tlen)) {
unsigned char ch = us[i++];
- wchar_t value = 0;
+ unsigned int value = 0;
if (ch < 0x80) {
value = ch;
} else if (((len-i) >= 1) && (ch < 0x80 + 0x40 + 0x20)) {