diff options
author | nyamatongwe <devnull@localhost> | 2012-05-26 12:17:54 +1000 |
---|---|---|
committer | nyamatongwe <devnull@localhost> | 2012-05-26 12:17:54 +1000 |
commit | 032a0017a6e992fc40790214c738dbc59c084dea (patch) | |
tree | e04ee892cef4668f4e70d3857760348613e70021 /src/UniConversion.cxx | |
parent | a6598d01d00e24f8c0ee2f4e9cc9036dd447c15f (diff) | |
download | scintilla-mirror-032a0017a6e992fc40790214c738dbc59c084dea.tar.gz |
Optimize UTF-8 character length calculations by using an array.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r-- | src/UniConversion.cxx | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index e1ad99563..40ac982c9 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -130,6 +130,34 @@ unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsig return ui; } +int UTF8BytesOfLead[256]; +static bool initialisedBytesOfLead = false; + +static int BytesFromLead(int leadByte) { + if (leadByte < 0xC2) { + // Single byte or invalid + return 1; + } else if (leadByte < 0xE0) { + return 2; + } else if (leadByte < 0xF0) { + return 3; + } else if (leadByte < 0xF5) { + return 4; + } else { + // Characters longer than 4 bytes not possible in current UTF-8 + return 1; + } +} + +void UTF8BytesOfLeadInitialise() { + if (!initialisedBytesOfLead) { + for (int i=0;i<256;i++) { + UTF8BytesOfLead[i] = BytesFromLead(i); + } + initialisedBytesOfLead = true; + } +} + // Return both the width of the first character in the string and a status // saying whether it is valid or invalid. // Most invalid sequences return a width of 1 so are treated as isolated bytes but |