diff options
| author | nyamatongwe <unknown> | 2013-01-19 12:33:20 +1100 | 
|---|---|---|
| committer | nyamatongwe <unknown> | 2013-01-19 12:33:20 +1100 | 
| commit | 5d17740fdedcea321a23ffd3350aa7adbf4c2329 (patch) | |
| tree | 1512465a2bbf066e96eb1ae2d10fdf3fc7dbd42b /lexlib/StyleContext.h | |
| parent | f46c96ecb682ad736453f78f6709fca6c6911886 (diff) | |
| download | scintilla-mirror-5d17740fdedcea321a23ffd3350aa7adbf4c2329.tar.gz | |
Implement generic support for Unicode line ends and sub styles in lexer support classes.
Diffstat (limited to 'lexlib/StyleContext.h')
| -rw-r--r-- | lexlib/StyleContext.h | 97 | 
1 files changed, 83 insertions, 14 deletions
| diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h index c2d223e3f..9f1818f21 100644 --- a/lexlib/StyleContext.h +++ b/lexlib/StyleContext.h @@ -19,6 +19,30 @@ static inline int MakeLowerCase(int ch) {  		return ch - 'A' + 'a';  } +inline int UnicodeCodePoint(const unsigned char *us) { +	if (us[0] < 0xC2) { +		return us[0]; +	} else if (us[0] < 0xE0) { +		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F); +	} else if (us[0] < 0xF0) { +		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F); +	} else if (us[0] < 0xF5) { +		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); +	} +	return us[0]; +} + +inline int BytesInUnicodeCodePoint(int codePoint) { +	if (codePoint < 0x80) +		return 1; +	else if (codePoint < 0x800) +		return 2; +	else if (codePoint < 0x10000) +		return 3; +	else +		return 4; +} +  // All languages handled so far can treat all characters >= 0x80 as one class  // which just continues the current token or starts an identifier if in default.  // DBCS treated specially as the second character can be < 0x80 and hence @@ -27,22 +51,40 @@ class StyleContext {  	LexAccessor &styler;  	unsigned int endPos;  	StyleContext &operator=(const StyleContext &); +  	void GetNextChar(unsigned int pos) {  		chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1)); -		if (styler.IsLeadByte(static_cast<char>(chNext))) { -			chNext = chNext << 8; -			chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2)); +		if (styler.Encoding() == encUnicode) { +			if (chNext >= 0x80) { +				unsigned char bytes[4] = { static_cast<unsigned char>(chNext), 0, 0, 0 }; +				for (int trail=1; trail<3; trail++) { +					bytes[trail] = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1+trail)); +					if (!((bytes[trail] >= 0x80) && (bytes[trail] < 0xc0))) { +						bytes[trail] = 0; +						break; +					} +				} +				chNext = UnicodeCodePoint(bytes); +			} +		} else if (styler.Encoding() == encDBCS) { +			if (styler.IsLeadByte(static_cast<char>(chNext))) { +				chNext = chNext << 8; +				chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2)); +			}  		}  		// End of line?  		// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win)  		// or on LF alone (Unix). Avoid triggering two times on Dos/Win. -		atLineEnd = (ch == '\r' && chNext != '\n') || -					(ch == '\n') || -					(currentPos >= endPos); +		if (lineStartNext < styler.Length()) +			atLineEnd = static_cast<int>(pos) >= (lineStartNext-1); +		else // Last line +			atLineEnd = static_cast<int>(pos) >= lineStartNext;  	}  public:  	unsigned int currentPos; +	int currentLine; +	int lineStartNext;  	bool atLineStart;  	bool atLineEnd;  	int state; @@ -55,6 +97,8 @@ public:  		styler(styler_),  		endPos(startPos + length),  		currentPos(startPos), +		currentLine(-1), +		lineStartNext(-1),  		atLineEnd(false),  		state(initStyle & chMask), // Mask off all bits which aren't in the chMask.  		chPrev(0), @@ -62,13 +106,22 @@ public:  		chNext(0) {  		styler.StartAt(startPos, chMask);  		styler.StartSegment(startPos); -		atLineStart = static_cast<unsigned int>(styler.LineStart(styler.GetLine(startPos))) == startPos; +		currentLine = styler.GetLine(startPos); +		lineStartNext = styler.LineStart(currentLine+1); +		atLineStart = static_cast<unsigned int>(styler.LineStart(currentLine)) == startPos;  		unsigned int pos = currentPos;  		ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); -		if (styler.IsLeadByte(static_cast<char>(ch))) { -			pos++; -			ch = ch << 8; -			ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos)); +		if (styler.Encoding() == encUnicode) { +			// Get the current char +			GetNextChar(pos-1); +			ch = chNext; +			pos += BytesInUnicodeCodePoint(ch) - 1; +		} else if (styler.Encoding() == encDBCS) { +			if (styler.IsLeadByte(static_cast<char>(ch))) { +				pos++; +				ch = ch << 8; +				ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos)); +			}  		}  		GetNextChar(pos);  	} @@ -82,12 +135,28 @@ public:  	void Forward() {  		if (currentPos < endPos) {  			atLineStart = atLineEnd; +			if (atLineStart) { +				currentLine++; +				lineStartNext = styler.LineStart(currentLine+1); +			}  			chPrev = ch; -			currentPos++; -			if (ch >= 0x100) +			if (styler.Encoding() == encUnicode) { +				currentPos += BytesInUnicodeCodePoint(ch); +			} else if (styler.Encoding() == encDBCS) { +				currentPos++; +				if (ch >= 0x100) +					currentPos++; +			} else {  				currentPos++; +			}  			ch = chNext; -			GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0)); +			if (styler.Encoding() == encUnicode) { +				GetNextChar(currentPos + BytesInUnicodeCodePoint(ch)-1); +			} else if (styler.Encoding() == encDBCS) { +				GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0)); +			} else { +				GetNextChar(currentPos); +			}  		} else {  			atLineStart = false;  			chPrev = ' '; | 
