diff options
| author | nyamatongwe <devnull@localhost> | 2012-05-26 12:17:54 +1000 | 
|---|---|---|
| committer | nyamatongwe <devnull@localhost> | 2012-05-26 12:17:54 +1000 | 
| commit | 032a0017a6e992fc40790214c738dbc59c084dea (patch) | |
| tree | e04ee892cef4668f4e70d3857760348613e70021 /src/Document.cxx | |
| parent | a6598d01d00e24f8c0ee2f4e9cc9036dd447c15f (diff) | |
| download | scintilla-mirror-032a0017a6e992fc40790214c738dbc59c084dea.tar.gz | |
Optimize UTF-8 character length calculations by using an array.
Diffstat (limited to 'src/Document.cxx')
| -rw-r--r-- | src/Document.cxx | 25 | 
1 files changed, 8 insertions, 17 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index 6cae14e8a..d427d636d 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -112,6 +112,8 @@ Document::Document() {  	matchesValid = false;  	regex = 0; +	UTF8BytesOfLeadInitialise(); +  	perLineData[ldMarkers] = new LineMarkers();  	perLineData[ldLevels] = new LineLevels();  	perLineData[ldState] = new LineState(); @@ -449,19 +451,13 @@ int Document::LenChar(int pos) {  	} else if (IsCrLf(pos)) {  		return 2;  	} else if (SC_CP_UTF8 == dbcsCodePage) { -		unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); -		if (ch < 0x80) -			return 1; -		int len = 2; -		if (ch >= (0x80 + 0x40 + 0x20 + 0x10)) -			len = 4; -		else if (ch >= (0x80 + 0x40 + 0x20)) -			len = 3; +		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos)); +		const int widthCharBytes = UTF8BytesOfLead[leadByte];  		int lengthDoc = Length(); -		if ((pos + len) > lengthDoc) -			return lengthDoc -pos; +		if ((pos + widthCharBytes) > lengthDoc) +			return lengthDoc - pos;  		else -			return len; +			return widthCharBytes;  	} else if (dbcsCodePage) {  		return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;  	} else { @@ -720,12 +716,7 @@ int Document::SafeSegment(const char *text, int length, int lengthSegment) {  		lastEncodingAllowedBreak = j;  		if (dbcsCodePage == SC_CP_UTF8) { -			if (ch < 0x80) { -				j++; -			} else { -				int bytes = BytesFromLead(ch); -				j += bytes ? bytes : 1; -			} +			j += UTF8BytesOfLead[ch];  		} else if (dbcsCodePage) {  			j += IsDBCSLeadByte(ch) ? 2 : 1;  		} else { | 
