diff options
Diffstat (limited to 'src/LexCPP.cxx')
| -rw-r--r-- | src/LexCPP.cxx | 483 | 
1 files changed, 271 insertions, 212 deletions
| diff --git a/src/LexCPP.cxx b/src/LexCPP.cxx index b261e621a..b7ea9f47d 100644 --- a/src/LexCPP.cxx +++ b/src/LexCPP.cxx @@ -19,254 +19,313 @@  #include "Scintilla.h"  #include "SciLexer.h" -static bool classifyWordCpp(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { -	PLATFORM_ASSERT(end >= start); -	char s[100]; -	for (unsigned int i = 0; (i < end - start + 1) && (i < 30); i++) { +static bool IsOKBeforeRE(int ch) { +	return (ch == '(') || (ch == '=') || (ch == ','); +} + +static void getRange(unsigned int start, +		unsigned int end, +		Accessor &styler, +		char *s, +		unsigned int len) { +	unsigned int i = 0; +	while ((i < end - start + 1) && (i < len-1)) {  		s[i] = styler[start + i]; -		s[i + 1] = '\0'; +		i++;  	} -	bool wordIsUUID = false; -	char chAttr = SCE_C_IDENTIFIER; -	if (isdigit(s[0]) || (s[0] == '.')) -		chAttr = SCE_C_NUMBER; -	else { -		if (keywords.InList(s)) { -			chAttr = SCE_C_WORD; -			wordIsUUID = strcmp(s, "uuid") == 0; -		} -	} -	styler.ColourTo(end, chAttr); -	return wordIsUUID; +	s[i] = '\0';  } -static bool isOKBeforeRE(char ch) { -	return (ch == '(') || (ch == '=') || (ch == ','); +inline bool IsASpace(int ch) { +    return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); +} + +inline bool IsAWordChar(int  ch) { +	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_'); +} + +inline bool IsAWordStart(int ch) { +	return (ch < 0x80) && (isalnum(ch) || ch == '_'); +} + +inline bool IsADigit(int ch) { +	return (ch >= '0') && (ch <= '9');  } +// All languages handled so far can treat all characters >= 0x80 as one class +// which just continues the current token or starts an identifier if in default. +// DBCS treated specially as the second character can be < 0x80 and hence  +// syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80 +class ColouriseContext { +	Accessor &styler; +	int lengthDoc; +	int currentPos; +	ColouriseContext& operator=(const ColouriseContext&) { +		return *this; +	} +public: +	bool atEOL; +	int state; +	int chPrev; +	int ch; +	int chNext; + +	ColouriseContext(unsigned int startPos, int length, +                        int initStyle, Accessor &styler_) :  +		styler(styler_), +		lengthDoc(startPos + length), +		currentPos(startPos),  +		atEOL(false), +		state(initStyle),  +		chPrev(0), +		ch(0),  +		chNext(0) { +		styler.StartAt(startPos); +		styler.StartSegment(startPos); +		int pos = currentPos; +		ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); +		if (styler.IsLeadByte(static_cast<char>(ch))) { +			pos++; +			ch = ch << 8; +			ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos)); +		} +		chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1)); +		if (styler.IsLeadByte(static_cast<char>(chNext))) { +			chNext = chNext << 8; +			chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2)); +		} +		atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); +	} +	void Complete() { +		styler.ColourTo(currentPos - 1, state); +	} +	bool More() { +		return currentPos <= lengthDoc; +	} +	void Forward() { +		// A lot of this is repeated from the constructor - TODO: merge code +		chPrev = ch; +		currentPos++; +		if (ch >= 0x100) +			currentPos++; +		ch = chNext; +		chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+1)); +		if (styler.IsLeadByte(static_cast<char>(chNext))) { +			chNext = chNext << 8; +			chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + 2)); +		} +		// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) +		// Avoid triggering two times on Dos/Win +		// End of line +		atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); +	} +	void ChangeState(int state_) { +		state = state_; +	} +	void SetState(int state_) { +		styler.ColourTo(currentPos - 1, state); +		state = state_; +	} +	void ForwardSetState(int state_) { +		Forward(); +		styler.ColourTo(currentPos - 1, state); +		state = state_; +	} +	void GetCurrent(char *s, int len) { +		getRange(styler.GetStartSegment(), currentPos - 1, styler, s, len); +	} +	int LengthCurrent() { +		return currentPos - styler.GetStartSegment(); +	} +	bool Match(char ch0) { +		return ch == ch0; +	} +	bool Match(char ch0, char ch1) { +		return (ch == ch0) && (chNext == ch1); +	} +	bool Match(const char *s) { +		if (ch != *s) +			return false; +		s++; +		if (chNext != *s) +			return false; +		s++; +		for (int n=2; *s; n++) { +			if (*s != styler.SafeGetCharAt(currentPos+n)) +				return false; +			s++; +		} +		return true; +	} +}; +  static void ColouriseCppDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],                              Accessor &styler) {  	WordList &keywords = *keywordlists[0]; - -	styler.StartAt(startPos); +	WordList &keywords2 = *keywordlists[1];  	bool stylingWithinPreprocessor = styler.GetPropertyInt("styling.within.preprocessor"); -	//int lineCurrent = styler.GetLine(startPos); -	int state = initStyle; -	int styleBeforeLineStart = initStyle; -	if (state == SCE_C_STRINGEOL)	// Does not leak onto next line -		state = SCE_C_DEFAULT; -	char chPrev = ' '; -	char chNext = styler[startPos]; +	if (initStyle == SCE_C_STRINGEOL)	// Does not leak onto next line +		initStyle = SCE_C_DEFAULT; +  	char chPrevNonWhite = ' '; -	unsigned int lengthDoc = startPos + length;  	int visibleChars = 0; -	styler.StartSegment(startPos);  	bool lastWordWasUUID = false; -	for (unsigned int i = startPos; i < lengthDoc; i++) { -		char ch = chNext; -		chNext = styler.SafeGetCharAt(i + 1); -		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); -		if (atEOL) { -			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) -			// Avoid triggering two times on Dos/Win -			// End of line -			if (state == SCE_C_STRINGEOL) { -				styler.ColourTo(i, state); -				state = SCE_C_DEFAULT; -			} -		} +	ColouriseContext cc(startPos, length, initStyle, styler); -		if (styler.IsLeadByte(ch)) { -			chNext = styler.SafeGetCharAt(i + 2); -			chPrev = ' '; -			i += 1; -			continue; -		} +	for (; cc.More(); cc.Forward()) { -		if (state == SCE_C_DEFAULT) { -			if (ch == '@' && chNext == '\"') { -				styler.ColourTo(i - 1, state); -				state = SCE_C_VERBATIM; -				i++; -				ch = chNext; -				chNext = styler.SafeGetCharAt(i + 1); -			} else if (iswordstart(ch) || (ch == '@')) { -				styler.ColourTo(i - 1, state); -				if (lastWordWasUUID) { -					state = SCE_C_UUID; -					lastWordWasUUID = false; -				} else { -					state = SCE_C_IDENTIFIER; -				} -			} else if (ch == '/' && chNext == '*') { -				styler.ColourTo(i - 1, state); -				if (styler.SafeGetCharAt(i + 2) == '*' || -				        styler.SafeGetCharAt(i + 2) == '!')	// Support of Qt/Doxygen doc. style -					state = SCE_C_COMMENTDOC; -				else -					state = SCE_C_COMMENT; -			} else if (ch == '/' && chNext == '/') { -				styler.ColourTo(i - 1, state); -				if (styler.SafeGetCharAt(i + 2) == '/' || -				        styler.SafeGetCharAt(i + 2) == '!')	// Support of Qt/Doxygen doc. style -					state = SCE_C_COMMENTLINEDOC; -				else -					state = SCE_C_COMMENTLINE; -			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) { -				styler.ColourTo(i - 1, state); -				state = SCE_C_REGEX; -			} else if (ch == '\"') { -				styler.ColourTo(i - 1, state); -				state = SCE_C_STRING; -			} else if (ch == '\'') { -				styler.ColourTo(i - 1, state); -				state = SCE_C_CHARACTER; -			} else if (ch == '#' && visibleChars == 0) { -				// Preprocessor commands are alone on their line -				styler.ColourTo(i - 1, state); -				state = SCE_C_PREPROCESSOR; -				// Skip whitespace between # and preprocessor word -				do { -					i++; -					ch = chNext; -					chNext = styler.SafeGetCharAt(i + 1); -				} while (isspacechar(ch) && (i < lengthDoc)); -			} else if (isoperator(ch)) { -				styler.ColourTo(i-1, state); -				styler.ColourTo(i, SCE_C_OPERATOR); +		if (cc.state == SCE_C_STRINGEOL) { +			if (cc.atEOL) { +				cc.SetState(SCE_C_DEFAULT);  			} -		} else if (state == SCE_C_IDENTIFIER) { -			if (!iswordchar(ch)) { -				lastWordWasUUID = classifyWordCpp(styler.GetStartSegment(), i - 1, keywords, styler); -				state = SCE_C_DEFAULT; -				if (ch == '/' && chNext == '*') { -					if (styler.SafeGetCharAt(i + 2) == '*') -						state = SCE_C_COMMENTDOC; -					else -						state = SCE_C_COMMENT; -				} else if (ch == '/' && chNext == '/') { -					state = SCE_C_COMMENTLINE; -				} else if (ch == '\"') { -					state = SCE_C_STRING; -				} else if (ch == '\'') { -					state = SCE_C_CHARACTER; -				} else if (isoperator(ch)) { -					styler.ColourTo(i, SCE_C_OPERATOR); -				} +		} else if (cc.state == SCE_C_OPERATOR) { +			cc.SetState(SCE_C_DEFAULT); +		} else if (cc.state == SCE_C_NUMBER) { +			if (!IsAWordChar(cc.ch)) { +				cc.SetState(SCE_C_DEFAULT);  			} -		} else { -			if (state == SCE_C_PREPROCESSOR) { -				if (stylingWithinPreprocessor) { -					if (isspacechar(ch)) { -						styler.ColourTo(i - 1, state); -						state = SCE_C_DEFAULT; -					} -				} else { -					if (atEOL && (chPrev != '\\')) { -						styler.ColourTo(i - 1, state); -						state = SCE_C_DEFAULT; -					} +		} else if (cc.state == SCE_C_IDENTIFIER) { +			if (!IsAWordChar(cc.ch) || (cc.ch == '.')) { +				char s[100]; +				cc.GetCurrent(s, sizeof(s)); +				if (keywords.InList(s)) { +					lastWordWasUUID = strcmp(s, "uuid") == 0; +					cc.ChangeState(SCE_C_WORD); +				} else if (keywords2.InList(s)) { +					cc.ChangeState(SCE_C_WORD2);  				} -			} else if (state == SCE_C_COMMENT) { -				if (ch == '/' && chPrev == '*') { -					if (((i > styler.GetStartSegment() + 2) || ( -					            (styleBeforeLineStart == SCE_C_COMMENT) && -					            (i > styler.GetStartSegment())))) { -						styler.ColourTo(i, state); -						state = SCE_C_DEFAULT; -					} +				cc.SetState(SCE_C_DEFAULT); +			} +		} if (cc.state == SCE_C_PREPROCESSOR) { +			if (stylingWithinPreprocessor) { +				if (IsASpace(cc.ch)) { +					cc.SetState(SCE_C_DEFAULT);  				} -			} else if (state == SCE_C_COMMENTDOC) { -				if (ch == '/' && chPrev == '*') { -					if (((i > styler.GetStartSegment() + 2) || ( -					            (styleBeforeLineStart == SCE_C_COMMENTDOC) && -					            (i > styler.GetStartSegment())))) { -						styler.ColourTo(i, state); -						state = SCE_C_DEFAULT; -					} +			} else { +				if (cc.atEOL && (cc.chPrev != '\\')) { +					cc.SetState(SCE_C_DEFAULT);  				} -			} else if (state == SCE_C_COMMENTLINE || state == SCE_C_COMMENTLINEDOC) { -				if (ch == '\r' || ch == '\n') { -					styler.ColourTo(i - 1, state); -					state = SCE_C_DEFAULT; +			} +		} else if (cc.state == SCE_C_COMMENT) { +			if (cc.Match('*', '/')) { +				cc.Forward(); +				cc.ForwardSetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_COMMENTDOC) { +			if (cc.Match('*', '/')) { +				cc.Forward(); +				cc.ForwardSetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_COMMENTLINE || cc.state == SCE_C_COMMENTLINEDOC) { +			if (cc.ch == '\r' || cc.ch == '\n') { +				cc.SetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_STRING) { +			if (cc.ch == '\\') { +				if (cc.chNext == '\"' || cc.chNext == '\'' || cc.chNext == '\\') { +					cc.Forward();  				} -			} else if (state == SCE_C_STRING) { -				if (ch == '\\') { -					if (chNext == '\"' || chNext == '\'' || chNext == '\\') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} -				} else if (ch == '\"') { -					styler.ColourTo(i, state); -					state = SCE_C_DEFAULT; -				} else if ((chNext == '\r' || chNext == '\n') && (chPrev != '\\')) { -					styler.ColourTo(i - 1, SCE_C_STRINGEOL); -					state = SCE_C_STRINGEOL; +			} else if (cc.ch == '\"') { +				cc.ForwardSetState(SCE_C_DEFAULT); +			} else if ((cc.atEOL) && (cc.chPrev != '\\')) { +				cc.ChangeState(SCE_C_STRINGEOL); +			} +		} else if (cc.state == SCE_C_CHARACTER) { +			if ((cc.ch == '\r' || cc.ch == '\n') && (cc.chPrev != '\\')) { +				cc.ChangeState(SCE_C_STRINGEOL); +			} else if (cc.ch == '\\') { +				if (cc.chNext == '\"' || cc.chNext == '\'' || cc.chNext == '\\') { +					cc.Forward();  				} -			} else if (state == SCE_C_CHARACTER) { -				if ((ch == '\r' || ch == '\n') && (chPrev != '\\')) { -					styler.ColourTo(i - 1, SCE_C_STRINGEOL); -					state = SCE_C_STRINGEOL; -				} else if (ch == '\\') { -					if (chNext == '\"' || chNext == '\'' || chNext == '\\') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} -				} else if (ch == '\'') { -					styler.ColourTo(i, state); -					state = SCE_C_DEFAULT; +			} else if (cc.ch == '\'') { +				cc.ForwardSetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_REGEX) { +			if (cc.ch == '\r' || cc.ch == '\n' || cc.ch == '/') { +				cc.ForwardSetState(SCE_C_DEFAULT); +			} else if (cc.ch == '\\') { +				// Gobble up the quoted character +				if (cc.chNext == '\\' || cc.chNext == '/') { +					cc.Forward();  				} -			} else if (state == SCE_C_REGEX) { -				if (ch == '\r' || ch == '\n' || ch == '/') { -					styler.ColourTo(i, state); -					state = SCE_C_DEFAULT; -				} else if (ch == '\\') { -					// Gobble up the quoted character -					if (chNext == '\\' || chNext == '/') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} +			} +		} else if (cc.state == SCE_C_VERBATIM) { +			if (cc.ch == '\"') { +				if (cc.chNext == '\"') { +					cc.Forward(); +				} else { +					cc.ForwardSetState(SCE_C_DEFAULT);  				} -			} else if (state == SCE_C_VERBATIM) { -				if (ch == '\"') { -					if (chNext == '\"') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} else { -						styler.ColourTo(i, state); -						state = SCE_C_DEFAULT; -					} +			} +		} else if (cc.state == SCE_C_UUID) { +			if (cc.ch == '\r' || cc.ch == '\n' || cc.ch == ')') { +				cc.SetState(SCE_C_DEFAULT); +			} +		} + +		if (cc.state == SCE_C_DEFAULT) { +			if (cc.Match('@', '\"')) { +				cc.SetState(SCE_C_VERBATIM); +				cc.Forward(); +			} else if (IsADigit(cc.ch) || (cc.ch == '.' && IsADigit(cc.chNext))) { +				if (lastWordWasUUID) { +					cc.SetState(SCE_C_UUID); +					lastWordWasUUID = false; +				} else { +					cc.SetState(SCE_C_NUMBER);  				} -			} else if (state == SCE_C_UUID) { -				if (ch == '\r' || ch == '\n' || ch == ')') { -					styler.ColourTo(i - 1, state); -					if (ch == ')') -						styler.ColourTo(i, SCE_C_OPERATOR); -					state = SCE_C_DEFAULT; +			} else if (IsAWordStart(cc.ch) || (cc.ch == '@')) { +				if (lastWordWasUUID) { +					cc.SetState(SCE_C_UUID); +					lastWordWasUUID = false; +				} else { +					cc.SetState(SCE_C_IDENTIFIER);  				} +			} else if (cc.Match('/', '*')) { +				if (cc.Match("/**") || cc.Match("/*!"))	// Support of Qt/Doxygen doc. style +					cc.SetState(SCE_C_COMMENTDOC); +				else +					cc.SetState(SCE_C_COMMENT); +				cc.Forward();	// Eat the * so it isn't used for the end of the comment +			} else if (cc.Match('/', '/')) { +				if (cc.Match("///") || cc.Match("//!"))	// Support of Qt/Doxygen doc. style +					cc.SetState(SCE_C_COMMENTLINEDOC); +				else +					cc.SetState(SCE_C_COMMENTLINE); +			} else if (cc.ch == '/' && IsOKBeforeRE(chPrevNonWhite)) { +				cc.SetState(SCE_C_REGEX); +			} else if (cc.ch == '\"') { +				cc.SetState(SCE_C_STRING); +			} else if (cc.ch == '\'') { +				cc.SetState(SCE_C_CHARACTER); +			} else if (cc.ch == '#' && visibleChars == 0) { +				// Preprocessor commands are alone on their line +				cc.SetState(SCE_C_PREPROCESSOR); +				// Skip whitespace between # and preprocessor word +				do { +					cc.Forward(); +				} while (IsASpace(cc.ch) && cc.More()); +			} else if (isoperator(cc.ch)) { +				cc.SetState(SCE_C_OPERATOR);  			}  		} - -		if (atEOL) { -			styleBeforeLineStart = state; +		if (cc.atEOL) { +			// Reset states to begining of colourise so no surprises  +			// if different sets of lines lexed. +			chPrevNonWhite = ' ';  			visibleChars = 0; +			lastWordWasUUID = false;  		} -		if (!isspacechar(ch)) +		if (!IsASpace(cc.ch)) { +			chPrevNonWhite = cc.ch;  			visibleChars++; - -		chPrev = ch; -		if (ch != ' ' && ch != '\t') -			chPrevNonWhite = ch; +		}  	} -	styler.ColourTo(lengthDoc - 1, state); +	cc.Complete();  }  static void FoldCppDoc(unsigned int startPos, int length, int initStyle, WordList *[], | 
