diff options
| author | nyamatongwe <unknown> | 2012-05-26 12:17:54 +1000 | 
|---|---|---|
| committer | nyamatongwe <unknown> | 2012-05-26 12:17:54 +1000 | 
| commit | 477a06c700990e4b646472ce1682a8e68a93383d (patch) | |
| tree | 53c15b811862eb874a98d071ac854724da52ba1b /src/UniConversion.cxx | |
| parent | c725c015867e59efd1ebe66e0247b62e38e04ac9 (diff) | |
| download | scintilla-mirror-477a06c700990e4b646472ce1682a8e68a93383d.tar.gz | |
Optimize UTF-8 character length calculations by using an array.
Diffstat (limited to 'src/UniConversion.cxx')
| -rw-r--r-- | src/UniConversion.cxx | 28 | 
1 files changed, 28 insertions, 0 deletions
| diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index e1ad99563..40ac982c9 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -130,6 +130,34 @@ unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsig  	return ui;  } +int UTF8BytesOfLead[256]; +static bool initialisedBytesOfLead = false; + +static int BytesFromLead(int leadByte) { +	if (leadByte < 0xC2) { +		// Single byte or invalid +		return 1; +	} else if (leadByte < 0xE0) { +		return 2; +	} else if (leadByte < 0xF0) { +		return 3; +	} else if (leadByte < 0xF5) { +		return 4; +	} else { +		// Characters longer than 4 bytes not possible in current UTF-8 +		return 1; +	} +} + +void UTF8BytesOfLeadInitialise() { +	if (!initialisedBytesOfLead) { +		for (int i=0;i<256;i++) { +			UTF8BytesOfLead[i] = BytesFromLead(i); +		} +		initialisedBytesOfLead = true; +	} +} +  // Return both the width of the first character in the string and a status  // saying whether it is valid or invalid.  // Most invalid sequences return a width of 1 so are treated as isolated bytes but | 
