diff options
-rw-r--r-- | include/SciLexer.h | 1 | ||||
-rw-r--r-- | include/Scintilla.iface | 1 | ||||
-rw-r--r-- | src/LexCPP.cxx | 483 |
3 files changed, 273 insertions, 212 deletions
diff --git a/include/SciLexer.h b/include/SciLexer.h index b55c5a375..24a9b8978 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -70,6 +70,7 @@ #define SCE_C_VERBATIM 13 #define SCE_C_REGEX 14 #define SCE_C_COMMENTLINEDOC 15 +#define SCE_C_WORD2 16 #define SCE_H_DEFAULT 0 #define SCE_H_TAG 1 #define SCE_H_TAGUNKNOWN 2 diff --git a/include/Scintilla.iface b/include/Scintilla.iface index 1431095c3..80579f27c 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -1262,6 +1262,7 @@ val SCE_C_STRINGEOL=12 val SCE_C_VERBATIM=13 val SCE_C_REGEX=14 val SCE_C_COMMENTLINEDOC=15 +val SCE_C_WORD2=16 # Lexical states for SCLEX_HTML, SCLEX_XML val SCE_H_DEFAULT=0 val SCE_H_TAG=1 diff --git a/src/LexCPP.cxx b/src/LexCPP.cxx index b261e621a..b7ea9f47d 100644 --- a/src/LexCPP.cxx +++ b/src/LexCPP.cxx @@ -19,254 +19,313 @@ #include "Scintilla.h" #include "SciLexer.h" -static bool classifyWordCpp(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { - PLATFORM_ASSERT(end >= start); - char s[100]; - for (unsigned int i = 0; (i < end - start + 1) && (i < 30); i++) { +static bool IsOKBeforeRE(int ch) { + return (ch == '(') || (ch == '=') || (ch == ','); +} + +static void getRange(unsigned int start, + unsigned int end, + Accessor &styler, + char *s, + unsigned int len) { + unsigned int i = 0; + while ((i < end - start + 1) && (i < len-1)) { s[i] = styler[start + i]; - s[i + 1] = '\0'; + i++; } - bool wordIsUUID = false; - char chAttr = SCE_C_IDENTIFIER; - if (isdigit(s[0]) || (s[0] == '.')) - chAttr = SCE_C_NUMBER; - else { - if (keywords.InList(s)) { - chAttr = SCE_C_WORD; - wordIsUUID = strcmp(s, "uuid") == 0; - } - } - styler.ColourTo(end, chAttr); - return wordIsUUID; + s[i] = '\0'; } -static bool isOKBeforeRE(char ch) { - return (ch == '(') || (ch == '=') || (ch == ','); +inline bool IsASpace(int ch) { + return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); +} + +inline bool IsAWordChar(int ch) { + return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_'); +} + +inline bool IsAWordStart(int ch) { + return (ch < 0x80) && (isalnum(ch) || ch == '_'); +} + +inline bool IsADigit(int ch) { + return (ch >= '0') && (ch <= '9'); } +// All languages handled so far can treat all characters >= 0x80 as one class +// which just continues the current token or starts an identifier if in default. +// DBCS treated specially as the second character can be < 0x80 and hence +// syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80 +class ColouriseContext { + Accessor &styler; + int lengthDoc; + int currentPos; + ColouriseContext& operator=(const ColouriseContext&) { + return *this; + } +public: + bool atEOL; + int state; + int chPrev; + int ch; + int chNext; + + ColouriseContext(unsigned int startPos, int length, + int initStyle, Accessor &styler_) : + styler(styler_), + lengthDoc(startPos + length), + currentPos(startPos), + atEOL(false), + state(initStyle), + chPrev(0), + ch(0), + chNext(0) { + styler.StartAt(startPos); + styler.StartSegment(startPos); + int pos = currentPos; + ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); + if (styler.IsLeadByte(static_cast<char>(ch))) { + pos++; + ch = ch << 8; + ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos)); + } + chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1)); + if (styler.IsLeadByte(static_cast<char>(chNext))) { + chNext = chNext << 8; + chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2)); + } + atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); + } + void Complete() { + styler.ColourTo(currentPos - 1, state); + } + bool More() { + return currentPos <= lengthDoc; + } + void Forward() { + // A lot of this is repeated from the constructor - TODO: merge code + chPrev = ch; + currentPos++; + if (ch >= 0x100) + currentPos++; + ch = chNext; + chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+1)); + if (styler.IsLeadByte(static_cast<char>(chNext))) { + chNext = chNext << 8; + chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + 2)); + } + // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) + // Avoid triggering two times on Dos/Win + // End of line + atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); + } + void ChangeState(int state_) { + state = state_; + } + void SetState(int state_) { + styler.ColourTo(currentPos - 1, state); + state = state_; + } + void ForwardSetState(int state_) { + Forward(); + styler.ColourTo(currentPos - 1, state); + state = state_; + } + void GetCurrent(char *s, int len) { + getRange(styler.GetStartSegment(), currentPos - 1, styler, s, len); + } + int LengthCurrent() { + return currentPos - styler.GetStartSegment(); + } + bool Match(char ch0) { + return ch == ch0; + } + bool Match(char ch0, char ch1) { + return (ch == ch0) && (chNext == ch1); + } + bool Match(const char *s) { + if (ch != *s) + return false; + s++; + if (chNext != *s) + return false; + s++; + for (int n=2; *s; n++) { + if (*s != styler.SafeGetCharAt(currentPos+n)) + return false; + s++; + } + return true; + } +}; + static void ColouriseCppDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler) { WordList &keywords = *keywordlists[0]; - - styler.StartAt(startPos); + WordList &keywords2 = *keywordlists[1]; bool stylingWithinPreprocessor = styler.GetPropertyInt("styling.within.preprocessor"); - //int lineCurrent = styler.GetLine(startPos); - int state = initStyle; - int styleBeforeLineStart = initStyle; - if (state == SCE_C_STRINGEOL) // Does not leak onto next line - state = SCE_C_DEFAULT; - char chPrev = ' '; - char chNext = styler[startPos]; + if (initStyle == SCE_C_STRINGEOL) // Does not leak onto next line + initStyle = SCE_C_DEFAULT; + char chPrevNonWhite = ' '; - unsigned int lengthDoc = startPos + length; int visibleChars = 0; - styler.StartSegment(startPos); bool lastWordWasUUID = false; - for (unsigned int i = startPos; i < lengthDoc; i++) { - char ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); - if (atEOL) { - // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix) - // Avoid triggering two times on Dos/Win - // End of line - if (state == SCE_C_STRINGEOL) { - styler.ColourTo(i, state); - state = SCE_C_DEFAULT; - } - } + ColouriseContext cc(startPos, length, initStyle, styler); - if (styler.IsLeadByte(ch)) { - chNext = styler.SafeGetCharAt(i + 2); - chPrev = ' '; - i += 1; - continue; - } + for (; cc.More(); cc.Forward()) { - if (state == SCE_C_DEFAULT) { - if (ch == '@' && chNext == '\"') { - styler.ColourTo(i - 1, state); - state = SCE_C_VERBATIM; - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else if (iswordstart(ch) || (ch == '@')) { - styler.ColourTo(i - 1, state); - if (lastWordWasUUID) { - state = SCE_C_UUID; - lastWordWasUUID = false; - } else { - state = SCE_C_IDENTIFIER; - } - } else if (ch == '/' && chNext == '*') { - styler.ColourTo(i - 1, state); - if (styler.SafeGetCharAt(i + 2) == '*' || - styler.SafeGetCharAt(i + 2) == '!') // Support of Qt/Doxygen doc. style - state = SCE_C_COMMENTDOC; - else - state = SCE_C_COMMENT; - } else if (ch == '/' && chNext == '/') { - styler.ColourTo(i - 1, state); - if (styler.SafeGetCharAt(i + 2) == '/' || - styler.SafeGetCharAt(i + 2) == '!') // Support of Qt/Doxygen doc. style - state = SCE_C_COMMENTLINEDOC; - else - state = SCE_C_COMMENTLINE; - } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) { - styler.ColourTo(i - 1, state); - state = SCE_C_REGEX; - } else if (ch == '\"') { - styler.ColourTo(i - 1, state); - state = SCE_C_STRING; - } else if (ch == '\'') { - styler.ColourTo(i - 1, state); - state = SCE_C_CHARACTER; - } else if (ch == '#' && visibleChars == 0) { - // Preprocessor commands are alone on their line - styler.ColourTo(i - 1, state); - state = SCE_C_PREPROCESSOR; - // Skip whitespace between # and preprocessor word - do { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } while (isspacechar(ch) && (i < lengthDoc)); - } else if (isoperator(ch)) { - styler.ColourTo(i-1, state); - styler.ColourTo(i, SCE_C_OPERATOR); + if (cc.state == SCE_C_STRINGEOL) { + if (cc.atEOL) { + cc.SetState(SCE_C_DEFAULT); } - } else if (state == SCE_C_IDENTIFIER) { - if (!iswordchar(ch)) { - lastWordWasUUID = classifyWordCpp(styler.GetStartSegment(), i - 1, keywords, styler); - state = SCE_C_DEFAULT; - if (ch == '/' && chNext == '*') { - if (styler.SafeGetCharAt(i + 2) == '*') - state = SCE_C_COMMENTDOC; - else - state = SCE_C_COMMENT; - } else if (ch == '/' && chNext == '/') { - state = SCE_C_COMMENTLINE; - } else if (ch == '\"') { - state = SCE_C_STRING; - } else if (ch == '\'') { - state = SCE_C_CHARACTER; - } else if (isoperator(ch)) { - styler.ColourTo(i, SCE_C_OPERATOR); - } + } else if (cc.state == SCE_C_OPERATOR) { + cc.SetState(SCE_C_DEFAULT); + } else if (cc.state == SCE_C_NUMBER) { + if (!IsAWordChar(cc.ch)) { + cc.SetState(SCE_C_DEFAULT); } - } else { - if (state == SCE_C_PREPROCESSOR) { - if (stylingWithinPreprocessor) { - if (isspacechar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_C_DEFAULT; - } - } else { - if (atEOL && (chPrev != '\\')) { - styler.ColourTo(i - 1, state); - state = SCE_C_DEFAULT; - } + } else if (cc.state == SCE_C_IDENTIFIER) { + if (!IsAWordChar(cc.ch) || (cc.ch == '.')) { + char s[100]; + cc.GetCurrent(s, sizeof(s)); + if (keywords.InList(s)) { + lastWordWasUUID = strcmp(s, "uuid") == 0; + cc.ChangeState(SCE_C_WORD); + } else if (keywords2.InList(s)) { + cc.ChangeState(SCE_C_WORD2); } - } else if (state == SCE_C_COMMENT) { - if (ch == '/' && chPrev == '*') { - if (((i > styler.GetStartSegment() + 2) || ( - (styleBeforeLineStart == SCE_C_COMMENT) && - (i > styler.GetStartSegment())))) { - styler.ColourTo(i, state); - state = SCE_C_DEFAULT; - } + cc.SetState(SCE_C_DEFAULT); + } + } if (cc.state == SCE_C_PREPROCESSOR) { + if (stylingWithinPreprocessor) { + if (IsASpace(cc.ch)) { + cc.SetState(SCE_C_DEFAULT); } - } else if (state == SCE_C_COMMENTDOC) { - if (ch == '/' && chPrev == '*') { - if (((i > styler.GetStartSegment() + 2) || ( - (styleBeforeLineStart == SCE_C_COMMENTDOC) && - (i > styler.GetStartSegment())))) { - styler.ColourTo(i, state); - state = SCE_C_DEFAULT; - } + } else { + if (cc.atEOL && (cc.chPrev != '\\')) { + cc.SetState(SCE_C_DEFAULT); } - } else if (state == SCE_C_COMMENTLINE || state == SCE_C_COMMENTLINEDOC) { - if (ch == '\r' || ch == '\n') { - styler.ColourTo(i - 1, state); - state = SCE_C_DEFAULT; + } + } else if (cc.state == SCE_C_COMMENT) { + if (cc.Match('*', '/')) { + cc.Forward(); + cc.ForwardSetState(SCE_C_DEFAULT); + } + } else if (cc.state == SCE_C_COMMENTDOC) { + if (cc.Match('*', '/')) { + cc.Forward(); + cc.ForwardSetState(SCE_C_DEFAULT); + } + } else if (cc.state == SCE_C_COMMENTLINE || cc.state == SCE_C_COMMENTLINEDOC) { + if (cc.ch == '\r' || cc.ch == '\n') { + cc.SetState(SCE_C_DEFAULT); + } + } else if (cc.state == SCE_C_STRING) { + if (cc.ch == '\\') { + if (cc.chNext == '\"' || cc.chNext == '\'' || cc.chNext == '\\') { + cc.Forward(); } - } else if (state == SCE_C_STRING) { - if (ch == '\\') { - if (chNext == '\"' || chNext == '\'' || chNext == '\\') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } - } else if (ch == '\"') { - styler.ColourTo(i, state); - state = SCE_C_DEFAULT; - } else if ((chNext == '\r' || chNext == '\n') && (chPrev != '\\')) { - styler.ColourTo(i - 1, SCE_C_STRINGEOL); - state = SCE_C_STRINGEOL; + } else if (cc.ch == '\"') { + cc.ForwardSetState(SCE_C_DEFAULT); + } else if ((cc.atEOL) && (cc.chPrev != '\\')) { + cc.ChangeState(SCE_C_STRINGEOL); + } + } else if (cc.state == SCE_C_CHARACTER) { + if ((cc.ch == '\r' || cc.ch == '\n') && (cc.chPrev != '\\')) { + cc.ChangeState(SCE_C_STRINGEOL); + } else if (cc.ch == '\\') { + if (cc.chNext == '\"' || cc.chNext == '\'' || cc.chNext == '\\') { + cc.Forward(); } - } else if (state == SCE_C_CHARACTER) { - if ((ch == '\r' || ch == '\n') && (chPrev != '\\')) { - styler.ColourTo(i - 1, SCE_C_STRINGEOL); - state = SCE_C_STRINGEOL; - } else if (ch == '\\') { - if (chNext == '\"' || chNext == '\'' || chNext == '\\') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } - } else if (ch == '\'') { - styler.ColourTo(i, state); - state = SCE_C_DEFAULT; + } else if (cc.ch == '\'') { + cc.ForwardSetState(SCE_C_DEFAULT); + } + } else if (cc.state == SCE_C_REGEX) { + if (cc.ch == '\r' || cc.ch == '\n' || cc.ch == '/') { + cc.ForwardSetState(SCE_C_DEFAULT); + } else if (cc.ch == '\\') { + // Gobble up the quoted character + if (cc.chNext == '\\' || cc.chNext == '/') { + cc.Forward(); } - } else if (state == SCE_C_REGEX) { - if (ch == '\r' || ch == '\n' || ch == '/') { - styler.ColourTo(i, state); - state = SCE_C_DEFAULT; - } else if (ch == '\\') { - // Gobble up the quoted character - if (chNext == '\\' || chNext == '/') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } + } + } else if (cc.state == SCE_C_VERBATIM) { + if (cc.ch == '\"') { + if (cc.chNext == '\"') { + cc.Forward(); + } else { + cc.ForwardSetState(SCE_C_DEFAULT); } - } else if (state == SCE_C_VERBATIM) { - if (ch == '\"') { - if (chNext == '\"') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else { - styler.ColourTo(i, state); - state = SCE_C_DEFAULT; - } + } + } else if (cc.state == SCE_C_UUID) { + if (cc.ch == '\r' || cc.ch == '\n' || cc.ch == ')') { + cc.SetState(SCE_C_DEFAULT); + } + } + + if (cc.state == SCE_C_DEFAULT) { + if (cc.Match('@', '\"')) { + cc.SetState(SCE_C_VERBATIM); + cc.Forward(); + } else if (IsADigit(cc.ch) || (cc.ch == '.' && IsADigit(cc.chNext))) { + if (lastWordWasUUID) { + cc.SetState(SCE_C_UUID); + lastWordWasUUID = false; + } else { + cc.SetState(SCE_C_NUMBER); } - } else if (state == SCE_C_UUID) { - if (ch == '\r' || ch == '\n' || ch == ')') { - styler.ColourTo(i - 1, state); - if (ch == ')') - styler.ColourTo(i, SCE_C_OPERATOR); - state = SCE_C_DEFAULT; + } else if (IsAWordStart(cc.ch) || (cc.ch == '@')) { + if (lastWordWasUUID) { + cc.SetState(SCE_C_UUID); + lastWordWasUUID = false; + } else { + cc.SetState(SCE_C_IDENTIFIER); } + } else if (cc.Match('/', '*')) { + if (cc.Match("/**") || cc.Match("/*!")) // Support of Qt/Doxygen doc. style + cc.SetState(SCE_C_COMMENTDOC); + else + cc.SetState(SCE_C_COMMENT); + cc.Forward(); // Eat the * so it isn't used for the end of the comment + } else if (cc.Match('/', '/')) { + if (cc.Match("///") || cc.Match("//!")) // Support of Qt/Doxygen doc. style + cc.SetState(SCE_C_COMMENTLINEDOC); + else + cc.SetState(SCE_C_COMMENTLINE); + } else if (cc.ch == '/' && IsOKBeforeRE(chPrevNonWhite)) { + cc.SetState(SCE_C_REGEX); + } else if (cc.ch == '\"') { + cc.SetState(SCE_C_STRING); + } else if (cc.ch == '\'') { + cc.SetState(SCE_C_CHARACTER); + } else if (cc.ch == '#' && visibleChars == 0) { + // Preprocessor commands are alone on their line + cc.SetState(SCE_C_PREPROCESSOR); + // Skip whitespace between # and preprocessor word + do { + cc.Forward(); + } while (IsASpace(cc.ch) && cc.More()); + } else if (isoperator(cc.ch)) { + cc.SetState(SCE_C_OPERATOR); } } - - if (atEOL) { - styleBeforeLineStart = state; + if (cc.atEOL) { + // Reset states to begining of colourise so no surprises + // if different sets of lines lexed. + chPrevNonWhite = ' '; visibleChars = 0; + lastWordWasUUID = false; } - if (!isspacechar(ch)) + if (!IsASpace(cc.ch)) { + chPrevNonWhite = cc.ch; visibleChars++; - - chPrev = ch; - if (ch != ' ' && ch != '\t') - chPrevNonWhite = ch; + } } - styler.ColourTo(lengthDoc - 1, state); + cc.Complete(); } static void FoldCppDoc(unsigned int startPos, int length, int initStyle, WordList *[], |