diff options
-rw-r--r-- | lexers/LexBash.cxx | 220 |
1 files changed, 183 insertions, 37 deletions
diff --git a/lexers/LexBash.cxx b/lexers/LexBash.cxx index 5038bd1ef..987249dbc 100644 --- a/lexers/LexBash.cxx +++ b/lexers/LexBash.cxx @@ -2,7 +2,7 @@ /** @file LexBash.cxx ** Lexer for Bash. **/ -// Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org> +// Copyright 2004-2010 by Neil Hodgson <neilh@scintilla.org> // Adapted from LexPerl by Kein-Hong Man 2004 // The License.txt file describes the conditions under which this software may be distributed. @@ -28,20 +28,28 @@ using namespace Scintilla; #endif -#define HERE_DELIM_MAX 256 +#define HERE_DELIM_MAX 256 // define this if you want 'invalid octals' to be marked as errors // usually, this is not a good idea, permissive lexing is better #undef PEDANTIC_OCTAL -#define BASH_BASE_ERROR 65 -#define BASH_BASE_DECIMAL 66 -#define BASH_BASE_HEX 67 +#define BASH_BASE_ERROR 65 +#define BASH_BASE_DECIMAL 66 +#define BASH_BASE_HEX 67 #ifdef PEDANTIC_OCTAL -#define BASH_BASE_OCTAL 68 -#define BASH_BASE_OCTAL_ERROR 69 +#define BASH_BASE_OCTAL 68 +#define BASH_BASE_OCTAL_ERROR 69 #endif +// state constants for parts of a bash command segment +#define BASH_CMD_BODY 0 +#define BASH_CMD_START 1 +#define BASH_CMD_WORD 2 +#define BASH_CMD_TEST 3 +#define BASH_CMD_ARITH 4 +#define BASH_CMD_DELIM 5 + static inline int translateBashDigit(int ch) { if (ch >= '0' && ch <= '9') { return ch - '0'; @@ -82,11 +90,15 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler) { WordList &keywords = *keywordlists[0]; + WordList cmdDelimiter, bashStruct, bashStruct_in; + cmdDelimiter.Set("| || |& & && ; ;; ( ) { }"); + bashStruct.Set("if elif fi while until else then do done esac eval"); + bashStruct_in.Set("for case select"); CharacterSet setWordStart(CharacterSet::setAlpha, "_"); // note that [+-] are often parts of identifiers in shell scripts CharacterSet setWord(CharacterSet::setAlphaNum, "._+-"); - CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@"); + CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@"); CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn"); CharacterSet setParam(CharacterSet::setAlphaNum, "$_"); CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!"); @@ -146,46 +158,115 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, int numBase = 0; int digit; unsigned int endPos = startPos + length; + int cmdState = BASH_CMD_START; + int testExprType = 0; - // Backtrack to beginning of style if required... - // If in a long distance lexical state, backtrack to find quote characters - if (initStyle == SCE_SH_HERE_Q) { - while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) { - startPos--; - } - startPos = styler.LineStart(styler.GetLine(startPos)); - initStyle = styler.StyleAt(startPos - 1); - } - // Bash strings can be multi-line with embedded newlines, so backtrack. - // Bash numbers have additional state during lexing, so backtrack too. - if (initStyle == SCE_SH_STRING - || initStyle == SCE_SH_BACKTICKS - || initStyle == SCE_SH_CHARACTER - || initStyle == SCE_SH_NUMBER - || initStyle == SCE_SH_IDENTIFIER - || initStyle == SCE_SH_COMMENTLINE) { - while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { - startPos--; - } - initStyle = SCE_SH_DEFAULT; + // Always backtracks to the start of a line that is not a continuation + // of the previous line (i.e. start of a bash command segment) + int ln = styler.GetLine(startPos); + for (;;) { + startPos = styler.LineStart(ln); + if (ln == 0 || styler.GetLineState(ln) == BASH_CMD_START) + break; + ln--; } + initStyle = SCE_SH_DEFAULT; StyleContext sc(startPos, endPos - startPos, initStyle, styler); for (; sc.More(); sc.Forward()) { + // handle line continuation, updates per-line stored state + if (sc.atLineStart) { + ln = styler.GetLine(sc.currentPos); + if (sc.state == SCE_SH_STRING + || sc.state == SCE_SH_BACKTICKS + || sc.state == SCE_SH_CHARACTER + || sc.state == SCE_SH_HERE_Q + || sc.state == SCE_SH_COMMENTLINE + || sc.state == SCE_SH_PARAM) { + // force backtrack while retaining cmdState + styler.SetLineState(ln, BASH_CMD_BODY); + } else { + if (ln > 0) { + if ((sc.GetRelative(-3) == '\\' && sc.GetRelative(-2) == '\r' && sc.chPrev == '\n') + || sc.GetRelative(-2) == '\\') { // handle '\' line continuation + // retain last line's state + } else + cmdState = BASH_CMD_START; + } + styler.SetLineState(ln, cmdState); + } + } + + // controls change of cmdState at the end of a non-whitespace element + // states BODY|TEST|ARITH persist until the end of a command segment + // state WORD persist, but ends with 'in' or 'do' construct keywords + int cmdStateNew = BASH_CMD_BODY; + if (cmdState == BASH_CMD_TEST || cmdState == BASH_CMD_ARITH || cmdState == BASH_CMD_WORD) + cmdStateNew = cmdState; + int stylePrev = sc.state; + // Determine if the current state should terminate. switch (sc.state) { case SCE_SH_OPERATOR: sc.SetState(SCE_SH_DEFAULT); + if (cmdState == BASH_CMD_DELIM) // if command delimiter, start new command + cmdStateNew = BASH_CMD_START; + else if (sc.chPrev == '\\') // propagate command state if line continued + cmdStateNew = cmdState; break; case SCE_SH_WORD: // "." never used in Bash variable names but used in file names if (!setWord.Contains(sc.ch)) { - char s[1000]; + char s[500]; + char s2[10]; sc.GetCurrent(s, sizeof(s)); - if (s[0] != '-' && // for file operators - !keywords.InList(s)) { + // allow keywords ending in a whitespace or command delimiter + s2[0] = sc.ch; + s2[1] = '\0'; + bool keywordEnds = IsASpace(sc.ch) || cmdDelimiter.InList(s2); + // 'in' or 'do' may be construct keywords + if (cmdState == BASH_CMD_WORD) { + if (strcmp(s, "in") == 0 && keywordEnds) + cmdStateNew = BASH_CMD_BODY; + else if (strcmp(s, "do") == 0 && keywordEnds) + cmdStateNew = BASH_CMD_START; + else + sc.ChangeState(SCE_SH_IDENTIFIER); + sc.SetState(SCE_SH_DEFAULT); + break; + } + // a 'test' keyword starts a test expression + if (strcmp(s, "test") == 0) { + if (cmdState == BASH_CMD_START && keywordEnds) { + cmdStateNew = BASH_CMD_TEST; + testExprType = 0; + } else + sc.ChangeState(SCE_SH_IDENTIFIER); + } + // detect bash construct keywords + else if (bashStruct.InList(s)) { + if (cmdState == BASH_CMD_START && keywordEnds) + cmdStateNew = BASH_CMD_START; + else + sc.ChangeState(SCE_SH_IDENTIFIER); + } + // 'for'|'case'|'select' needs 'in'|'do' to be highlighted later + else if (bashStruct_in.InList(s)) { + if (cmdState == BASH_CMD_START && keywordEnds) + cmdStateNew = BASH_CMD_WORD; + else + sc.ChangeState(SCE_SH_IDENTIFIER); + } + // disambiguate option items and file test operators + else if (s[0] == '-') { + if (cmdState != BASH_CMD_TEST) + sc.ChangeState(SCE_SH_IDENTIFIER); + } + // disambiguate keywords and identifiers + else if (cmdState != BASH_CMD_START + || !(keywords.InList(s) && keywordEnds)) { sc.ChangeState(SCE_SH_IDENTIFIER); } sc.SetState(SCE_SH_DEFAULT); @@ -378,10 +459,17 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, sc.SetState(SCE_SH_HERE_Q); } + // update cmdState about the current command segment + if (stylePrev != SCE_SH_DEFAULT && sc.state == SCE_SH_DEFAULT) { + cmdState = cmdStateNew; + } // Determine if a new state should be entered. if (sc.state == SCE_SH_DEFAULT) { - if (sc.ch == '\\') { // escaped character + if (sc.ch == '\\') { + // Bash can escape any non-newline as a literal sc.SetState(SCE_SH_IDENTIFIER); + if (sc.chNext == '\r' || sc.chNext == '\n') + sc.SetState(SCE_SH_OPERATOR); } else if (IsADigit(sc.ch)) { sc.SetState(SCE_SH_NUMBER); numBase = BASH_BASE_DECIMAL; @@ -411,6 +499,10 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, sc.SetState(SCE_SH_BACKTICKS); Quote.Start(sc.ch); } else if (sc.ch == '$') { + if (sc.Match("$((")) { + sc.SetState(SCE_SH_OPERATOR); // handle '((' later + continue; + } sc.SetState(SCE_SH_SCALAR); sc.Forward(); if (sc.ch == '{') { @@ -421,9 +513,6 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, sc.ChangeState(SCE_SH_STRING); } else if (sc.ch == '(' || sc.ch == '`') { sc.ChangeState(SCE_SH_BACKTICKS); - if (sc.chNext == '(') { // $(( is lexed as operator - sc.ChangeState(SCE_SH_OPERATOR); - } } else { continue; // scalar has no delimiter pair } @@ -440,9 +529,66 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, sc.SetState(SCE_SH_WORD); sc.Forward(); } else if (setBashOperator.Contains(sc.ch)) { + char s[10]; + bool isCmdDelim = false; sc.SetState(SCE_SH_OPERATOR); + // handle opening delimiters for test/arithmetic expressions - ((,[[,[ + if (cmdState == BASH_CMD_START + || cmdState == BASH_CMD_BODY) { + if (sc.Match('(', '(')) { + cmdState = BASH_CMD_ARITH; + sc.Forward(); + } else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) { + cmdState = BASH_CMD_TEST; + testExprType = 1; + sc.Forward(); + } else if (sc.ch == '[' && IsASpace(sc.chNext)) { + cmdState = BASH_CMD_TEST; + testExprType = 2; + } + } + // special state -- for ((x;y;z)) in ... looping + if (cmdState == BASH_CMD_WORD && sc.Match('(', '(')) { + cmdState = BASH_CMD_ARITH; + sc.Forward(); + continue; + } + // handle command delimiters in command START|BODY|WORD state, also TEST if 'test' + if (cmdState == BASH_CMD_START + || cmdState == BASH_CMD_BODY + || cmdState == BASH_CMD_WORD + || (cmdState == BASH_CMD_TEST && testExprType == 0)) { + s[0] = sc.ch; + if (setBashOperator.Contains(sc.chNext)) { + s[1] = sc.chNext; + s[2] = '\0'; + isCmdDelim = cmdDelimiter.InList(s); + if (isCmdDelim) + sc.Forward(); + } + if (!isCmdDelim) { + s[1] = '\0'; + isCmdDelim = cmdDelimiter.InList(s); + } + if (isCmdDelim) { + cmdState = BASH_CMD_DELIM; + continue; + } + } + // handle closing delimiters for test/arithmetic expressions - )),]],] + if (cmdState == BASH_CMD_ARITH && sc.Match(')', ')')) { + cmdState = BASH_CMD_BODY; + sc.Forward(); + } else if (cmdState == BASH_CMD_TEST && IsASpace(sc.chPrev)) { + if (sc.Match(']', ']') && testExprType == 1) { + sc.Forward(); + cmdState = BASH_CMD_BODY; + } else if (sc.ch == ']' && testExprType == 2) { + cmdState = BASH_CMD_BODY; + } + } } - } + }// sc.state } sc.Complete(); } |