diff options
author | Neil <nyamatongwe@gmail.com> | 2018-03-14 10:05:30 +1100 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2018-03-14 10:05:30 +1100 |
commit | 49daf8127b8d8df7a307be5c4a2b65a6a6708678 (patch) | |
tree | abbd9ed3acece317bf86db5a2d74c698aedad323 /src/UniConversion.cxx | |
parent | aae86a999fe82ab85660991892253040126386b8 (diff) | |
download | scintilla-mirror-49daf8127b8d8df7a307be5c4a2b65a6a6708678.tar.gz |
Bug [#2001]. Make masking and comparison code clearer.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r-- | src/UniConversion.cxx | 41 |
1 files changed, 25 insertions, 16 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index e4eade7dc..8e537c689 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -98,6 +98,15 @@ size_t UTF16Length(const char *s, size_t len) { return ulen; } +constexpr unsigned char TrailByteValue(unsigned char c) { + // The top 2 bits are 0b10 to indicate a trail byte. + // The lower 6 bits contain the value. + return c & 0b0011'1111; +} + +const unsigned char utf8Start3 = 0b1110'0000; +const unsigned char utf8Start4 = 0b1111'0000; + size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) { size_t ui = 0; const unsigned char *us = reinterpret_cast<const unsigned char *>(s); @@ -106,25 +115,25 @@ size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) { unsigned char ch = us[i++]; if (ch < 0x80) { tbuf[ui] = ch; - } else if (ch < 0x80 + 0x40 + 0x20) { + } else if (ch < utf8Start3) { tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6); ch = us[i++]; - tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); - } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) { + tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + TrailByteValue(ch)); + } else if (ch < utf8Start4) { tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12); ch = us[i++]; - tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6)); + tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (TrailByteValue(ch) << 6)); ch = us[i++]; - tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); + tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + TrailByteValue(ch)); } else { // Outside the BMP so need two surrogates int val = (ch & 0x7) << 18; ch = us[i++]; - val += (ch & 0x3F) << 12; + val += TrailByteValue(ch) << 12; ch = us[i++]; - val += (ch & 0x3F) << 6; + val += TrailByteValue(ch) << 6; ch = us[i++]; - val += (ch & 0x3F); + val += TrailByteValue(ch); tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST); ui++; tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST); @@ -143,24 +152,24 @@ size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen) unsigned int value = 0; if (ch < 0x80) { value = ch; - } else if (((len-i) >= 1) && (ch < 0x80 + 0x40 + 0x20)) { + } else if (((len-i) >= 1) && (ch < utf8Start3)) { value = (ch & 0x1F) << 6; ch = us[i++]; - value += ch & 0x7F; - } else if (((len-i) >= 2) && (ch < 0x80 + 0x40 + 0x20 + 0x10)) { + value += TrailByteValue(ch); + } else if (((len-i) >= 2) && (ch < utf8Start4)) { value = (ch & 0xF) << 12; ch = us[i++]; - value += (ch & 0x7F) << 6; + value += TrailByteValue(ch) << 6; ch = us[i++]; - value += ch & 0x7F; + value += TrailByteValue(ch); } else if ((len-i) >= 3) { value = (ch & 0x7) << 18; ch = us[i++]; - value += (ch & 0x3F) << 12; + value += TrailByteValue(ch) << 12; ch = us[i++]; - value += (ch & 0x3F) << 6; + value += TrailByteValue(ch) << 6; ch = us[i++]; - value += ch & 0x3F; + value += TrailByteValue(ch); } tbuf[ui] = value; ui++; |