aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/UniConversion.cxx
diff options
context:
space:
mode:
authornyamatongwe <unknown>2012-05-26 12:17:54 +1000
committernyamatongwe <unknown>2012-05-26 12:17:54 +1000
commit477a06c700990e4b646472ce1682a8e68a93383d (patch)
tree53c15b811862eb874a98d071ac854724da52ba1b /src/UniConversion.cxx
parentc725c015867e59efd1ebe66e0247b62e38e04ac9 (diff)
downloadscintilla-mirror-477a06c700990e4b646472ce1682a8e68a93383d.tar.gz
Optimize UTF-8 character length calculations by using an array.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r--src/UniConversion.cxx28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index e1ad99563..40ac982c9 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -130,6 +130,34 @@ unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsig
return ui;
}
+int UTF8BytesOfLead[256];
+static bool initialisedBytesOfLead = false;
+
+static int BytesFromLead(int leadByte) {
+ if (leadByte < 0xC2) {
+ // Single byte or invalid
+ return 1;
+ } else if (leadByte < 0xE0) {
+ return 2;
+ } else if (leadByte < 0xF0) {
+ return 3;
+ } else if (leadByte < 0xF5) {
+ return 4;
+ } else {
+ // Characters longer than 4 bytes not possible in current UTF-8
+ return 1;
+ }
+}
+
+void UTF8BytesOfLeadInitialise() {
+ if (!initialisedBytesOfLead) {
+ for (int i=0;i<256;i++) {
+ UTF8BytesOfLead[i] = BytesFromLead(i);
+ }
+ initialisedBytesOfLead = true;
+ }
+}
+
// Return both the width of the first character in the string and a status
// saying whether it is valid or invalid.
// Most invalid sequences return a width of 1 so are treated as isolated bytes but