aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Editor.cxx
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2015-02-24 09:30:42 +1100
committerNeil <nyamatongwe@gmail.com>2015-02-24 09:30:42 +1100
commitf18873c71e55526c8dd606b5979da7df127fa418 (patch)
treebb42dc9a2a14fc5c19086391f367ccc8ccf68a70 /src/Editor.cxx
parentc4c875b860d9a447967b625f67748e962b697652 (diff)
downloadscintilla-mirror-f18873c71e55526c8dd606b5979da7df127fa418.tar.gz
Notify container of non-BMP characters correctly.
Diffstat (limited to 'src/Editor.cxx')
-rw-r--r--src/Editor.cxx25
1 files changed, 3 insertions, 22 deletions
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 6c80b7511..585acb086 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -1872,28 +1872,9 @@ void Editor::AddCharUTF(const char *s, unsigned int len, bool treatAsDBCS) {
// Also treats \0 and naked trail bytes 0x80 to 0xBF as valid
// characters representing themselves.
} else {
- // Unroll 1 to 3 byte UTF-8 sequences. See reference data at:
- // http://www.cl.cam.ac.uk/~mgk25/unicode.html
- // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
- if (byte < 0xE0) {
- int byte2 = static_cast<unsigned char>(s[1]);
- if ((byte2 & 0xC0) == 0x80) {
- // Two-byte-character lead-byte followed by a trail-byte.
- byte = (((byte & 0x1F) << 6) | (byte2 & 0x3F));
- }
- // A two-byte-character lead-byte not followed by trail-byte
- // represents itself.
- } else if (byte < 0xF0) {
- int byte2 = static_cast<unsigned char>(s[1]);
- int byte3 = static_cast<unsigned char>(s[2]);
- if (((byte2 & 0xC0) == 0x80) && ((byte3 & 0xC0) == 0x80)) {
- // Three-byte-character lead byte followed by two trail bytes.
- byte = (((byte & 0x0F) << 12) | ((byte2 & 0x3F) << 6) |
- (byte3 & 0x3F));
- }
- // A three-byte-character lead-byte not followed by two trail-bytes
- // represents itself.
- }
+ unsigned int utf32[1] = { 0 };
+ UTF32FromUTF8(s, len, utf32, ELEMENTS(utf32));
+ byte = utf32[0];
}
NotifyChar(byte);
}