diff options
| author | nyamatongwe <devnull@localhost> | 2001-05-31 07:59:13 +0000 | 
|---|---|---|
| committer | nyamatongwe <devnull@localhost> | 2001-05-31 07:59:13 +0000 | 
| commit | 997a00b76b70b5f5ca2ba8918d0f70d9748834b1 (patch) | |
| tree | 2d4c39c9fbd6df3afd5935a123d6c709b5062fcb | |
| parent | f2564bf50771ff7a5b32fa5336fd0065dcd884a6 (diff) | |
| download | scintilla-mirror-997a00b76b70b5f5ca2ba8918d0f70d9748834b1.tar.gz | |
Updated to use new ColouriseContext class.
More accurate handling of comments.
Less possibility of different lexings dependent on which sets of lines
are lexed together.
2nd set of keywords available for tasks such as highlighting an API.
| -rw-r--r-- | include/SciLexer.h | 1 | ||||
| -rw-r--r-- | include/Scintilla.iface | 1 | ||||
| -rw-r--r-- | src/LexCPP.cxx | 483 | 
3 files changed, 273 insertions, 212 deletions
| diff --git a/include/SciLexer.h b/include/SciLexer.h index b55c5a375..24a9b8978 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -70,6 +70,7 @@  #define SCE_C_VERBATIM 13  #define SCE_C_REGEX 14  #define SCE_C_COMMENTLINEDOC 15 +#define SCE_C_WORD2 16  #define SCE_H_DEFAULT 0  #define SCE_H_TAG 1  #define SCE_H_TAGUNKNOWN 2 diff --git a/include/Scintilla.iface b/include/Scintilla.iface index 1431095c3..80579f27c 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -1262,6 +1262,7 @@ val SCE_C_STRINGEOL=12  val SCE_C_VERBATIM=13  val SCE_C_REGEX=14  val SCE_C_COMMENTLINEDOC=15 +val SCE_C_WORD2=16  # Lexical states for SCLEX_HTML, SCLEX_XML  val SCE_H_DEFAULT=0  val SCE_H_TAG=1 diff --git a/src/LexCPP.cxx b/src/LexCPP.cxx index b261e621a..b7ea9f47d 100644 --- a/src/LexCPP.cxx +++ b/src/LexCPP.cxx @@ -19,254 +19,313 @@  #include "Scintilla.h"  #include "SciLexer.h" -static bool classifyWordCpp(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { -	PLATFORM_ASSERT(end >= start); -	char s[100]; -	for (unsigned int i = 0; (i < end - start + 1) && (i < 30); i++) { +static bool IsOKBeforeRE(int ch) { +	return (ch == '(') || (ch == '=') || (ch == ','); +} + +static void getRange(unsigned int start, +		unsigned int end, +		Accessor &styler, +		char *s, +		unsigned int len) { +	unsigned int i = 0; +	while ((i < end - start + 1) && (i < len-1)) {  		s[i] = styler[start + i]; -		s[i + 1] = '\0'; +		i++;  	} -	bool wordIsUUID = false; -	char chAttr = SCE_C_IDENTIFIER; -	if (isdigit(s[0]) || (s[0] == '.')) -		chAttr = SCE_C_NUMBER; -	else { -		if (keywords.InList(s)) { -			chAttr = SCE_C_WORD; -			wordIsUUID = strcmp(s, "uuid") == 0; -		} -	} -	styler.ColourTo(end, chAttr); -	return wordIsUUID; +	s[i] = '\0';  } -static bool isOKBeforeRE(char ch) { -	return (ch == '(') || (ch == '=') || (ch == ','); +inline bool IsASpace(int ch) { +    return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); +} + +inline bool IsAWordChar(int  ch) { +	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_'); +} + +inline bool IsAWordStart(int ch) { +	return (ch < 0x80) && (isalnum(ch) || ch == '_'); +} + +inline bool IsADigit(int ch) { +	return (ch >= '0') && (ch <= '9');  } +// All languages handled so far can treat all characters >= 0x80 as one class +// which just continues the current token or starts an identifier if in default. +// DBCS treated specially as the second character can be < 0x80 and hence  +// syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80 +class ColouriseContext { +	Accessor &styler; +	int lengthDoc; +	int currentPos; +	ColouriseContext& operator=(const ColouriseContext&) { +		return *this; +	} +public: +	bool atEOL; +	int state; +	int chPrev; +	int ch; +	int chNext; + +	ColouriseContext(unsigned int startPos, int length, +                        int initStyle, Accessor &styler_) :  +		styler(styler_), +		lengthDoc(startPos + length), +		currentPos(startPos),  +		atEOL(false), +		state(initStyle),  +		chPrev(0), +		ch(0),  +		chNext(0) { +		styler.StartAt(startPos); +		styler.StartSegment(startPos); +		int pos = currentPos; +		ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); +		if (styler.IsLeadByte(static_cast<char>(ch))) { +			pos++; +			ch = ch << 8; +			ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos)); +		} +		chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1)); +		if (styler.IsLeadByte(static_cast<char>(chNext))) { +			chNext = chNext << 8; +			chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2)); +		} +		atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); +	} +	void Complete() { +		styler.ColourTo(currentPos - 1, state); +	} +	bool More() { +		return currentPos <= lengthDoc; +	} +	void Forward() { +		// A lot of this is repeated from the constructor - TODO: merge code +		chPrev = ch; +		currentPos++; +		if (ch >= 0x100) +			currentPos++; +		ch = chNext; +		chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+1)); +		if (styler.IsLeadByte(static_cast<char>(chNext))) { +			chNext = chNext << 8; +			chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + 2)); +		} +		// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) +		// Avoid triggering two times on Dos/Win +		// End of line +		atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); +	} +	void ChangeState(int state_) { +		state = state_; +	} +	void SetState(int state_) { +		styler.ColourTo(currentPos - 1, state); +		state = state_; +	} +	void ForwardSetState(int state_) { +		Forward(); +		styler.ColourTo(currentPos - 1, state); +		state = state_; +	} +	void GetCurrent(char *s, int len) { +		getRange(styler.GetStartSegment(), currentPos - 1, styler, s, len); +	} +	int LengthCurrent() { +		return currentPos - styler.GetStartSegment(); +	} +	bool Match(char ch0) { +		return ch == ch0; +	} +	bool Match(char ch0, char ch1) { +		return (ch == ch0) && (chNext == ch1); +	} +	bool Match(const char *s) { +		if (ch != *s) +			return false; +		s++; +		if (chNext != *s) +			return false; +		s++; +		for (int n=2; *s; n++) { +			if (*s != styler.SafeGetCharAt(currentPos+n)) +				return false; +			s++; +		} +		return true; +	} +}; +  static void ColouriseCppDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],                              Accessor &styler) {  	WordList &keywords = *keywordlists[0]; - -	styler.StartAt(startPos); +	WordList &keywords2 = *keywordlists[1];  	bool stylingWithinPreprocessor = styler.GetPropertyInt("styling.within.preprocessor"); -	//int lineCurrent = styler.GetLine(startPos); -	int state = initStyle; -	int styleBeforeLineStart = initStyle; -	if (state == SCE_C_STRINGEOL)	// Does not leak onto next line -		state = SCE_C_DEFAULT; -	char chPrev = ' '; -	char chNext = styler[startPos]; +	if (initStyle == SCE_C_STRINGEOL)	// Does not leak onto next line +		initStyle = SCE_C_DEFAULT; +  	char chPrevNonWhite = ' '; -	unsigned int lengthDoc = startPos + length;  	int visibleChars = 0; -	styler.StartSegment(startPos);  	bool lastWordWasUUID = false; -	for (unsigned int i = startPos; i < lengthDoc; i++) { -		char ch = chNext; -		chNext = styler.SafeGetCharAt(i + 1); -		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); -		if (atEOL) { -			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) -			// Avoid triggering two times on Dos/Win -			// End of line -			if (state == SCE_C_STRINGEOL) { -				styler.ColourTo(i, state); -				state = SCE_C_DEFAULT; -			} -		} +	ColouriseContext cc(startPos, length, initStyle, styler); -		if (styler.IsLeadByte(ch)) { -			chNext = styler.SafeGetCharAt(i + 2); -			chPrev = ' '; -			i += 1; -			continue; -		} +	for (; cc.More(); cc.Forward()) { -		if (state == SCE_C_DEFAULT) { -			if (ch == '@' && chNext == '\"') { -				styler.ColourTo(i - 1, state); -				state = SCE_C_VERBATIM; -				i++; -				ch = chNext; -				chNext = styler.SafeGetCharAt(i + 1); -			} else if (iswordstart(ch) || (ch == '@')) { -				styler.ColourTo(i - 1, state); -				if (lastWordWasUUID) { -					state = SCE_C_UUID; -					lastWordWasUUID = false; -				} else { -					state = SCE_C_IDENTIFIER; -				} -			} else if (ch == '/' && chNext == '*') { -				styler.ColourTo(i - 1, state); -				if (styler.SafeGetCharAt(i + 2) == '*' || -				        styler.SafeGetCharAt(i + 2) == '!')	// Support of Qt/Doxygen doc. style -					state = SCE_C_COMMENTDOC; -				else -					state = SCE_C_COMMENT; -			} else if (ch == '/' && chNext == '/') { -				styler.ColourTo(i - 1, state); -				if (styler.SafeGetCharAt(i + 2) == '/' || -				        styler.SafeGetCharAt(i + 2) == '!')	// Support of Qt/Doxygen doc. style -					state = SCE_C_COMMENTLINEDOC; -				else -					state = SCE_C_COMMENTLINE; -			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) { -				styler.ColourTo(i - 1, state); -				state = SCE_C_REGEX; -			} else if (ch == '\"') { -				styler.ColourTo(i - 1, state); -				state = SCE_C_STRING; -			} else if (ch == '\'') { -				styler.ColourTo(i - 1, state); -				state = SCE_C_CHARACTER; -			} else if (ch == '#' && visibleChars == 0) { -				// Preprocessor commands are alone on their line -				styler.ColourTo(i - 1, state); -				state = SCE_C_PREPROCESSOR; -				// Skip whitespace between # and preprocessor word -				do { -					i++; -					ch = chNext; -					chNext = styler.SafeGetCharAt(i + 1); -				} while (isspacechar(ch) && (i < lengthDoc)); -			} else if (isoperator(ch)) { -				styler.ColourTo(i-1, state); -				styler.ColourTo(i, SCE_C_OPERATOR); +		if (cc.state == SCE_C_STRINGEOL) { +			if (cc.atEOL) { +				cc.SetState(SCE_C_DEFAULT);  			} -		} else if (state == SCE_C_IDENTIFIER) { -			if (!iswordchar(ch)) { -				lastWordWasUUID = classifyWordCpp(styler.GetStartSegment(), i - 1, keywords, styler); -				state = SCE_C_DEFAULT; -				if (ch == '/' && chNext == '*') { -					if (styler.SafeGetCharAt(i + 2) == '*') -						state = SCE_C_COMMENTDOC; -					else -						state = SCE_C_COMMENT; -				} else if (ch == '/' && chNext == '/') { -					state = SCE_C_COMMENTLINE; -				} else if (ch == '\"') { -					state = SCE_C_STRING; -				} else if (ch == '\'') { -					state = SCE_C_CHARACTER; -				} else if (isoperator(ch)) { -					styler.ColourTo(i, SCE_C_OPERATOR); -				} +		} else if (cc.state == SCE_C_OPERATOR) { +			cc.SetState(SCE_C_DEFAULT); +		} else if (cc.state == SCE_C_NUMBER) { +			if (!IsAWordChar(cc.ch)) { +				cc.SetState(SCE_C_DEFAULT);  			} -		} else { -			if (state == SCE_C_PREPROCESSOR) { -				if (stylingWithinPreprocessor) { -					if (isspacechar(ch)) { -						styler.ColourTo(i - 1, state); -						state = SCE_C_DEFAULT; -					} -				} else { -					if (atEOL && (chPrev != '\\')) { -						styler.ColourTo(i - 1, state); -						state = SCE_C_DEFAULT; -					} +		} else if (cc.state == SCE_C_IDENTIFIER) { +			if (!IsAWordChar(cc.ch) || (cc.ch == '.')) { +				char s[100]; +				cc.GetCurrent(s, sizeof(s)); +				if (keywords.InList(s)) { +					lastWordWasUUID = strcmp(s, "uuid") == 0; +					cc.ChangeState(SCE_C_WORD); +				} else if (keywords2.InList(s)) { +					cc.ChangeState(SCE_C_WORD2);  				} -			} else if (state == SCE_C_COMMENT) { -				if (ch == '/' && chPrev == '*') { -					if (((i > styler.GetStartSegment() + 2) || ( -					            (styleBeforeLineStart == SCE_C_COMMENT) && -					            (i > styler.GetStartSegment())))) { -						styler.ColourTo(i, state); -						state = SCE_C_DEFAULT; -					} +				cc.SetState(SCE_C_DEFAULT); +			} +		} if (cc.state == SCE_C_PREPROCESSOR) { +			if (stylingWithinPreprocessor) { +				if (IsASpace(cc.ch)) { +					cc.SetState(SCE_C_DEFAULT);  				} -			} else if (state == SCE_C_COMMENTDOC) { -				if (ch == '/' && chPrev == '*') { -					if (((i > styler.GetStartSegment() + 2) || ( -					            (styleBeforeLineStart == SCE_C_COMMENTDOC) && -					            (i > styler.GetStartSegment())))) { -						styler.ColourTo(i, state); -						state = SCE_C_DEFAULT; -					} +			} else { +				if (cc.atEOL && (cc.chPrev != '\\')) { +					cc.SetState(SCE_C_DEFAULT);  				} -			} else if (state == SCE_C_COMMENTLINE || state == SCE_C_COMMENTLINEDOC) { -				if (ch == '\r' || ch == '\n') { -					styler.ColourTo(i - 1, state); -					state = SCE_C_DEFAULT; +			} +		} else if (cc.state == SCE_C_COMMENT) { +			if (cc.Match('*', '/')) { +				cc.Forward(); +				cc.ForwardSetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_COMMENTDOC) { +			if (cc.Match('*', '/')) { +				cc.Forward(); +				cc.ForwardSetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_COMMENTLINE || cc.state == SCE_C_COMMENTLINEDOC) { +			if (cc.ch == '\r' || cc.ch == '\n') { +				cc.SetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_STRING) { +			if (cc.ch == '\\') { +				if (cc.chNext == '\"' || cc.chNext == '\'' || cc.chNext == '\\') { +					cc.Forward();  				} -			} else if (state == SCE_C_STRING) { -				if (ch == '\\') { -					if (chNext == '\"' || chNext == '\'' || chNext == '\\') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} -				} else if (ch == '\"') { -					styler.ColourTo(i, state); -					state = SCE_C_DEFAULT; -				} else if ((chNext == '\r' || chNext == '\n') && (chPrev != '\\')) { -					styler.ColourTo(i - 1, SCE_C_STRINGEOL); -					state = SCE_C_STRINGEOL; +			} else if (cc.ch == '\"') { +				cc.ForwardSetState(SCE_C_DEFAULT); +			} else if ((cc.atEOL) && (cc.chPrev != '\\')) { +				cc.ChangeState(SCE_C_STRINGEOL); +			} +		} else if (cc.state == SCE_C_CHARACTER) { +			if ((cc.ch == '\r' || cc.ch == '\n') && (cc.chPrev != '\\')) { +				cc.ChangeState(SCE_C_STRINGEOL); +			} else if (cc.ch == '\\') { +				if (cc.chNext == '\"' || cc.chNext == '\'' || cc.chNext == '\\') { +					cc.Forward();  				} -			} else if (state == SCE_C_CHARACTER) { -				if ((ch == '\r' || ch == '\n') && (chPrev != '\\')) { -					styler.ColourTo(i - 1, SCE_C_STRINGEOL); -					state = SCE_C_STRINGEOL; -				} else if (ch == '\\') { -					if (chNext == '\"' || chNext == '\'' || chNext == '\\') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} -				} else if (ch == '\'') { -					styler.ColourTo(i, state); -					state = SCE_C_DEFAULT; +			} else if (cc.ch == '\'') { +				cc.ForwardSetState(SCE_C_DEFAULT); +			} +		} else if (cc.state == SCE_C_REGEX) { +			if (cc.ch == '\r' || cc.ch == '\n' || cc.ch == '/') { +				cc.ForwardSetState(SCE_C_DEFAULT); +			} else if (cc.ch == '\\') { +				// Gobble up the quoted character +				if (cc.chNext == '\\' || cc.chNext == '/') { +					cc.Forward();  				} -			} else if (state == SCE_C_REGEX) { -				if (ch == '\r' || ch == '\n' || ch == '/') { -					styler.ColourTo(i, state); -					state = SCE_C_DEFAULT; -				} else if (ch == '\\') { -					// Gobble up the quoted character -					if (chNext == '\\' || chNext == '/') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} +			} +		} else if (cc.state == SCE_C_VERBATIM) { +			if (cc.ch == '\"') { +				if (cc.chNext == '\"') { +					cc.Forward(); +				} else { +					cc.ForwardSetState(SCE_C_DEFAULT);  				} -			} else if (state == SCE_C_VERBATIM) { -				if (ch == '\"') { -					if (chNext == '\"') { -						i++; -						ch = chNext; -						chNext = styler.SafeGetCharAt(i + 1); -					} else { -						styler.ColourTo(i, state); -						state = SCE_C_DEFAULT; -					} +			} +		} else if (cc.state == SCE_C_UUID) { +			if (cc.ch == '\r' || cc.ch == '\n' || cc.ch == ')') { +				cc.SetState(SCE_C_DEFAULT); +			} +		} + +		if (cc.state == SCE_C_DEFAULT) { +			if (cc.Match('@', '\"')) { +				cc.SetState(SCE_C_VERBATIM); +				cc.Forward(); +			} else if (IsADigit(cc.ch) || (cc.ch == '.' && IsADigit(cc.chNext))) { +				if (lastWordWasUUID) { +					cc.SetState(SCE_C_UUID); +					lastWordWasUUID = false; +				} else { +					cc.SetState(SCE_C_NUMBER);  				} -			} else if (state == SCE_C_UUID) { -				if (ch == '\r' || ch == '\n' || ch == ')') { -					styler.ColourTo(i - 1, state); -					if (ch == ')') -						styler.ColourTo(i, SCE_C_OPERATOR); -					state = SCE_C_DEFAULT; +			} else if (IsAWordStart(cc.ch) || (cc.ch == '@')) { +				if (lastWordWasUUID) { +					cc.SetState(SCE_C_UUID); +					lastWordWasUUID = false; +				} else { +					cc.SetState(SCE_C_IDENTIFIER);  				} +			} else if (cc.Match('/', '*')) { +				if (cc.Match("/**") || cc.Match("/*!"))	// Support of Qt/Doxygen doc. style +					cc.SetState(SCE_C_COMMENTDOC); +				else +					cc.SetState(SCE_C_COMMENT); +				cc.Forward();	// Eat the * so it isn't used for the end of the comment +			} else if (cc.Match('/', '/')) { +				if (cc.Match("///") || cc.Match("//!"))	// Support of Qt/Doxygen doc. style +					cc.SetState(SCE_C_COMMENTLINEDOC); +				else +					cc.SetState(SCE_C_COMMENTLINE); +			} else if (cc.ch == '/' && IsOKBeforeRE(chPrevNonWhite)) { +				cc.SetState(SCE_C_REGEX); +			} else if (cc.ch == '\"') { +				cc.SetState(SCE_C_STRING); +			} else if (cc.ch == '\'') { +				cc.SetState(SCE_C_CHARACTER); +			} else if (cc.ch == '#' && visibleChars == 0) { +				// Preprocessor commands are alone on their line +				cc.SetState(SCE_C_PREPROCESSOR); +				// Skip whitespace between # and preprocessor word +				do { +					cc.Forward(); +				} while (IsASpace(cc.ch) && cc.More()); +			} else if (isoperator(cc.ch)) { +				cc.SetState(SCE_C_OPERATOR);  			}  		} - -		if (atEOL) { -			styleBeforeLineStart = state; +		if (cc.atEOL) { +			// Reset states to begining of colourise so no surprises  +			// if different sets of lines lexed. +			chPrevNonWhite = ' ';  			visibleChars = 0; +			lastWordWasUUID = false;  		} -		if (!isspacechar(ch)) +		if (!IsASpace(cc.ch)) { +			chPrevNonWhite = cc.ch;  			visibleChars++; - -		chPrev = ch; -		if (ch != ' ' && ch != '\t') -			chPrevNonWhite = ch; +		}  	} -	styler.ColourTo(lengthDoc - 1, state); +	cc.Complete();  }  static void FoldCppDoc(unsigned int startPos, int length, int initStyle, WordList *[], | 
