diff options
author | nyamatongwe <unknown> | 2012-05-26 12:17:54 +1000 |
---|---|---|
committer | nyamatongwe <unknown> | 2012-05-26 12:17:54 +1000 |
commit | 477a06c700990e4b646472ce1682a8e68a93383d (patch) | |
tree | 53c15b811862eb874a98d071ac854724da52ba1b /src/UniConversion.cxx | |
parent | c725c015867e59efd1ebe66e0247b62e38e04ac9 (diff) | |
download | scintilla-mirror-477a06c700990e4b646472ce1682a8e68a93383d.tar.gz |
Optimize UTF-8 character length calculations by using an array.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r-- | src/UniConversion.cxx | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index e1ad99563..40ac982c9 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -130,6 +130,34 @@ unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsig return ui; } +int UTF8BytesOfLead[256]; +static bool initialisedBytesOfLead = false; + +static int BytesFromLead(int leadByte) { + if (leadByte < 0xC2) { + // Single byte or invalid + return 1; + } else if (leadByte < 0xE0) { + return 2; + } else if (leadByte < 0xF0) { + return 3; + } else if (leadByte < 0xF5) { + return 4; + } else { + // Characters longer than 4 bytes not possible in current UTF-8 + return 1; + } +} + +void UTF8BytesOfLeadInitialise() { + if (!initialisedBytesOfLead) { + for (int i=0;i<256;i++) { + UTF8BytesOfLead[i] = BytesFromLead(i); + } + initialisedBytesOfLead = true; + } +} + // Return both the width of the first character in the string and a status // saying whether it is valid or invalid. // Most invalid sequences return a width of 1 so are treated as isolated bytes but |