diff options
author | nyamatongwe <unknown> | 2008-07-26 12:20:46 +0000 |
---|---|---|
committer | nyamatongwe <unknown> | 2008-07-26 12:20:46 +0000 |
commit | 74bf9b9fc6186ad580d333823fb0bfb9db43f083 (patch) | |
tree | edada5e4e0418b45f89bc344370954bdc7955d20 /src | |
parent | cb440abffbe31914c1af455a5731d2c12f68edaa (diff) | |
download | scintilla-mirror-74bf9b9fc6186ad580d333823fb0bfb9db43f083.tar.gz |
Update from Kein-Hong Man simplifies the code.
Diffstat (limited to 'src')
-rw-r--r-- | src/LexBash.cxx | 753 |
1 files changed, 301 insertions, 452 deletions
diff --git a/src/LexBash.cxx b/src/LexBash.cxx index 0797e68a9..7b475a7de 100644 --- a/src/LexBash.cxx +++ b/src/LexBash.cxx @@ -2,8 +2,8 @@ /** @file LexBash.cxx ** Lexer for Bash. **/ -// Copyright 2004-2007 by Neil Hodgson <neilh@scintilla.org> -// Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004 +// Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org> +// Adapted from LexPerl by Kein-Hong Man 2004 // The License.txt file describes the conditions under which this software may be distributed. #include <stdlib.h> @@ -16,9 +16,17 @@ #include "PropSet.h" #include "Accessor.h" +#include "StyleContext.h" #include "KeyWords.h" #include "Scintilla.h" #include "SciLexer.h" +#include "CharacterSet.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +#define HERE_DELIM_MAX 256 // define this if you want 'invalid octals' to be marked as errors // usually, this is not a good idea, permissive lexing is better @@ -32,13 +40,7 @@ #define BASH_BASE_OCTAL_ERROR 69 #endif -#define HERE_DELIM_MAX 256 - -#ifdef SCI_NAMESPACE -using namespace Scintilla; -#endif - -static inline int translateBashDigit(char ch) { +static inline int translateBashDigit(int ch) { if (ch >= '0' && ch <= '9') { return ch - '0'; } else if (ch >= 'a' && ch <= 'z') { @@ -53,407 +55,210 @@ static inline int translateBashDigit(char ch) { return BASH_BASE_ERROR; } -static inline bool isEOLChar(char ch) { - return (ch == '\r') || (ch == '\n'); -} - -static bool isSingleCharOp(char ch) { - char strCharSet[2]; - strCharSet[0] = ch; - strCharSet[1] = '\0'; - return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet)); -} - -static inline bool isBashOperator(char ch) { - if (ch == '^' || ch == '&' || ch == '\\' || ch == '%' || - ch == '(' || ch == ')' || ch == '-' || ch == '+' || - ch == '=' || ch == '|' || ch == '{' || ch == '}' || - ch == '[' || ch == ']' || ch == ':' || ch == ';' || - ch == '>' || ch == ',' || ch == '/' || ch == '<' || - ch == '?' || ch == '!' || ch == '.' || ch == '~' || - ch == '@') - return true; - return false; -} - -static int classifyWordBash(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { - char s[100]; - for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) { - s[i] = styler[start + i]; - s[i + 1] = '\0'; - } - char chAttr = SCE_SH_IDENTIFIER; - if (keywords.InList(s)) - chAttr = SCE_SH_WORD; - styler.ColourTo(end, chAttr); - return chAttr; -} - -static inline int getBashNumberBase(unsigned int start, unsigned int end, Accessor &styler) { +static inline int getBashNumberBase(char *s) { + int i = 0; int base = 0; - for (unsigned int i = 0; i < end - start + 1 && i < 10; i++) { - base = base * 10 + (styler[start + i] - '0'); + while (*s) { + base = base * 10 + (*s++ - '0'); + i++; } - if (base > 64 || (end - start) > 1) { + if (base > 64 || i > 2) { return BASH_BASE_ERROR; } return base; } -static inline bool isEndVar(char ch) { - return !isalnum(ch) && ch != '$' && ch != '_'; -} - -static inline bool isNonQuote(char ch) { - return isalnum(ch) || ch == '_'; -} - -static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { - if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { - return false; - } - while (*val) { - if (*val != styler[pos++]) { - return false; - } - val++; - } - return true; -} - -static char opposite(char ch) { - if (ch == '(') - return ')'; - if (ch == '[') - return ']'; - if (ch == '{') - return '}'; - if (ch == '<') - return '>'; +static int opposite(int ch) { + if (ch == '(') return ')'; + if (ch == '[') return ']'; + if (ch == '{') return '}'; + if (ch == '<') return '>'; return ch; } static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, - WordList *keywordlists[], Accessor &styler) { - - // Lexer for bash often has to backtrack to start of current style to determine - // which characters are being used as quotes, how deeply nested is the - // start position and what the termination string is for here documents + WordList *keywordlists[], Accessor &styler) { WordList &keywords = *keywordlists[0]; - class HereDocCls { + CharacterSet setWordStart(CharacterSet::setAlpha, "_"); + // note that [+-] are often parts of identifiers in shell scripts + CharacterSet setWord(CharacterSet::setAlphaNum, "._+-"); + CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@"); + CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn"); + CharacterSet setParam(CharacterSet::setAlphaNum, "$_"); + CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!"); + CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!"); + CharacterSet setLeftShift(CharacterSet::setDigits, "=$"); + + class HereDocCls { // Class to manage HERE document elements public: int State; // 0: '<<' encountered // 1: collect the delimiter // 2: here doc text (lines after the delimiter) - char Quote; // the char after '<<' + int Quote; // the char after '<<' bool Quoted; // true if Quote in ('\'','"','`') bool Indent; // indented delimiter (for <<-) int DelimiterLength; // strlen(Delimiter) char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf HereDocCls() { State = 0; - Quote = 0; - Quoted = false; - Indent = 0; + Quote = 0; + Quoted = false; + Indent = 0; DelimiterLength = 0; Delimiter = new char[HERE_DELIM_MAX]; Delimiter[0] = '\0'; } + void Append(int ch) { + Delimiter[DelimiterLength++] = static_cast<char>(ch); + Delimiter[DelimiterLength] = '\0'; + } ~HereDocCls() { delete []Delimiter; } }; HereDocCls HereDoc; - class QuoteCls { + class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl) public: - int Rep; - int Count; - char Up; - char Down; + int Count; + int Up, Down; QuoteCls() { - this->New(1); - } - void New(int r) { - Rep = r; Count = 0; Up = '\0'; Down = '\0'; } - void Open(char u) { + void Open(int u) { Count++; Up = u; Down = opposite(Up); } + void Start(int u) { + Count = 0; + Open(u); + } }; QuoteCls Quote; - int state = initStyle; int numBase = 0; - unsigned int lengthDoc = startPos + length; + int digit; + unsigned int endPos = startPos + length; - // If in a long distance lexical state, seek to the beginning to find quote characters - // Bash strings can be multi-line with embedded newlines, so backtrack. - // Bash numbers have additional state during lexing, so backtrack too. - if (state == SCE_SH_HERE_Q) { + // Backtrack to beginning of style if required... + // If in a long distance lexical state, backtrack to find quote characters + if (initStyle == SCE_SH_HERE_Q) { while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) { startPos--; } startPos = styler.LineStart(styler.GetLine(startPos)); - state = styler.StyleAt(startPos - 1); + initStyle = styler.StyleAt(startPos - 1); } - if (state == SCE_SH_STRING - || state == SCE_SH_BACKTICKS - || state == SCE_SH_CHARACTER - || state == SCE_SH_NUMBER - || state == SCE_SH_IDENTIFIER - || state == SCE_SH_COMMENTLINE - ) { - while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { + // Bash strings can be multi-line with embedded newlines, so backtrack. + // Bash numbers have additional state during lexing, so backtrack too. + if (initStyle == SCE_SH_STRING + || initStyle == SCE_SH_BACKTICKS + || initStyle == SCE_SH_CHARACTER + || initStyle == SCE_SH_NUMBER + || initStyle == SCE_SH_IDENTIFIER + || initStyle == SCE_SH_COMMENTLINE) { + while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { startPos--; } - state = SCE_SH_DEFAULT; + initStyle = SCE_SH_DEFAULT; } - styler.StartAt(startPos); - char chPrev = styler.SafeGetCharAt(startPos - 1); - if (startPos == 0) - chPrev = '\n'; - char chNext = styler[startPos]; - styler.StartSegment(startPos); - - for (unsigned int i = startPos; i < lengthDoc; i++) { - char ch = chNext; - // if the current character is not consumed due to the completion of an - // earlier style, lexing can be restarted via a simple goto - restartLexer: - chNext = styler.SafeGetCharAt(i + 1); - char chNext2 = styler.SafeGetCharAt(i + 2); - - if (styler.IsLeadByte(ch)) { - chNext = styler.SafeGetCharAt(i + 2); - chPrev = ' '; - i += 1; - continue; - } - - if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows - styler.ColourTo(i, state); - chPrev = ch; - continue; - } - - if (HereDoc.State == 1 && isEOLChar(ch)) { - // Begin of here-doc (the line after the here-doc delimiter): - // Lexically, the here-doc starts from the next line after the >>, but the - // first line of here-doc seem to follow the style of the last EOL sequence - HereDoc.State = 2; - if (HereDoc.Quoted) { - if (state == SCE_SH_HERE_DELIM) { - // Missing quote at end of string! We are stricter than bash. - // Colour here-doc anyway while marking this bit as an error. - state = SCE_SH_ERROR; - } - styler.ColourTo(i - 1, state); - // HereDoc.Quote always == '\'' - state = SCE_SH_HERE_Q; - } else { - styler.ColourTo(i - 1, state); - // always switch - state = SCE_SH_HERE_Q; - } - } - - if (state == SCE_SH_DEFAULT) { - if (ch == '\\') { // escaped character - if (i < lengthDoc - 1) - i++; - ch = chNext; - chNext = chNext2; - styler.ColourTo(i, SCE_SH_IDENTIFIER); - } else if (isascii(ch) && isdigit(ch)) { - state = SCE_SH_NUMBER; - numBase = BASH_BASE_DECIMAL; - if (ch == '0') { // hex,octal - if (chNext == 'x' || chNext == 'X') { - numBase = BASH_BASE_HEX; - i++; - ch = chNext; - chNext = chNext2; - } else if (isdigit(chNext)) { -#ifdef PEDANTIC_OCTAL - numBase = BASH_BASE_OCTAL; -#else - numBase = BASH_BASE_HEX; -#endif + StyleContext sc(startPos, endPos - startPos, initStyle, styler); + + for (; sc.More(); sc.Forward()) { + + // Determine if the current state should terminate. + switch (sc.state) { + case SCE_SH_OPERATOR: + sc.SetState(SCE_SH_DEFAULT); + break; + case SCE_SH_WORD: + // "." never used in Bash variable names but used in file names + if (!setWord.Contains(sc.ch)) { + char s[1000]; + sc.GetCurrent(s, sizeof(s)); + if (s[0] != '-' && // for file operators + !keywords.InList(s)) { + sc.ChangeState(SCE_SH_IDENTIFIER); } + sc.SetState(SCE_SH_DEFAULT); } - } else if (iswordstart(ch)) { - state = SCE_SH_WORD; - if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { - // We need that if length of word == 1! - // This test is copied from the SCE_SH_WORD handler. - classifyWordBash(styler.GetStartSegment(), i, keywords, styler); - state = SCE_SH_DEFAULT; + break; + case SCE_SH_IDENTIFIER: + if (sc.chPrev == '\\') { // for escaped chars + sc.ForwardSetState(SCE_SH_DEFAULT); + } else if (!setWord.Contains(sc.ch)) { + sc.SetState(SCE_SH_DEFAULT); } - } else if (ch == '#') { - state = SCE_SH_COMMENTLINE; - } else if (ch == '\"') { - state = SCE_SH_STRING; - Quote.New(1); - Quote.Open(ch); - } else if (ch == '\'') { - state = SCE_SH_CHARACTER; - Quote.New(1); - Quote.Open(ch); - } else if (ch == '`') { - state = SCE_SH_BACKTICKS; - Quote.New(1); - Quote.Open(ch); - } else if (ch == '$') { - if (chNext == '{') { - state = SCE_SH_PARAM; - goto startQuote; - } else if (chNext == '\'') { - state = SCE_SH_CHARACTER; - goto startQuote; - } else if (chNext == '"') { - state = SCE_SH_STRING; - goto startQuote; - } else if (chNext == '(' && chNext2 == '(') { - styler.ColourTo(i, SCE_SH_OPERATOR); - state = SCE_SH_DEFAULT; - goto skipChar; - } else if (chNext == '(' || chNext == '`') { - state = SCE_SH_BACKTICKS; - startQuote: - Quote.New(1); - Quote.Open(chNext); - goto skipChar; - } else { - state = SCE_SH_SCALAR; - skipChar: - i++; - ch = chNext; - chNext = chNext2; - } - } else if (ch == '*') { - if (chNext == '*') { // exponentiation - i++; - ch = chNext; - chNext = chNext2; - } - styler.ColourTo(i, SCE_SH_OPERATOR); - } else if (ch == '<' && chNext == '<') { - state = SCE_SH_HERE_DELIM; - HereDoc.State = 0; - HereDoc.Indent = false; - } else if (ch == '-' // file test operators - && isSingleCharOp(chNext) - && !isalnum((chNext2 = styler.SafeGetCharAt(i+2))) - && isspace(chPrev)) { - styler.ColourTo(i + 1, SCE_SH_WORD); - state = SCE_SH_DEFAULT; - i++; - ch = chNext; - chNext = chNext2; - } else if (isBashOperator(ch)) { - styler.ColourTo(i, SCE_SH_OPERATOR); - } else { - // keep colouring defaults to make restart easier - styler.ColourTo(i, SCE_SH_DEFAULT); - } - } else if (state == SCE_SH_NUMBER) { - int digit = translateBashDigit(ch); - if (numBase == BASH_BASE_DECIMAL) { - if (ch == '#') { - numBase = getBashNumberBase(styler.GetStartSegment(), i - 1, styler); - if (numBase == BASH_BASE_ERROR) // take the rest as comment - goto numAtEnd; - } else if (!isdigit(ch)) - goto numAtEnd; - } else if (numBase == BASH_BASE_HEX) { - if ((digit < 16) || (digit >= 36 && digit <= 41)) { - // hex digit 0-9a-fA-F - } else - goto numAtEnd; + break; + case SCE_SH_NUMBER: + digit = translateBashDigit(sc.ch); + if (numBase == BASH_BASE_DECIMAL) { + if (sc.ch == '#') { + char s[10]; + sc.GetCurrent(s, sizeof(s)); + numBase = getBashNumberBase(s); + if (numBase != BASH_BASE_ERROR) + break; + } else if (IsADigit(sc.ch)) + break; + } else if (numBase == BASH_BASE_HEX) { + if (IsADigit(sc.ch, 16)) + break; #ifdef PEDANTIC_OCTAL - } else if (numBase == BASH_BASE_OCTAL || - numBase == BASH_BASE_OCTAL_ERROR) { - if (digit > 7) { + } else if (numBase == BASH_BASE_OCTAL || + numBase == BASH_BASE_OCTAL_ERROR) { + if (digit <= 7) + break; if (digit <= 9) { - numBase = BASH_BASE_OCTAL_ERROR; - } else - goto numAtEnd; - } -#endif - } else if (numBase == BASH_BASE_ERROR) { - if (digit > 9) - goto numAtEnd; - } else { // DD#DDDD number style handling - if (digit != BASH_BASE_ERROR) { - if (numBase <= 36) { - // case-insensitive if base<=36 - if (digit >= 36) digit -= 26; + numBase = BASH_BASE_OCTAL_ERROR; + break; } - if (digit >= numBase) { +#endif + } else if (numBase == BASH_BASE_ERROR) { + if (digit <= 9) + break; + } else { // DD#DDDD number style handling + if (digit != BASH_BASE_ERROR) { + if (numBase <= 36) { + // case-insensitive if base<=36 + if (digit >= 36) digit -= 26; + } + if (digit < numBase) + break; if (digit <= 9) { numBase = BASH_BASE_ERROR; - } else - goto numAtEnd; + break; + } } - } else { - numAtEnd: - if (numBase == BASH_BASE_ERROR + } + // fallthrough when number is at an end or error + if (numBase == BASH_BASE_ERROR #ifdef PEDANTIC_OCTAL - || numBase == BASH_BASE_OCTAL_ERROR + || numBase == BASH_BASE_OCTAL_ERROR #endif - ) - state = SCE_SH_ERROR; - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; + ) { + sc.ChangeState(SCE_SH_ERROR); } - } - } else if (state == SCE_SH_WORD) { - if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { - // "." never used in Bash variable names - // but used in file names - classifyWordBash(styler.GetStartSegment(), i, keywords, styler); - state = SCE_SH_DEFAULT; - ch = ' '; - } - } else if (state == SCE_SH_IDENTIFIER) { - if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { - styler.ColourTo(i, SCE_SH_IDENTIFIER); - state = SCE_SH_DEFAULT; - ch = ' '; - } - } else { - if (state == SCE_SH_COMMENTLINE) { - if (ch == '\\' && isEOLChar(chNext)) { + sc.SetState(SCE_SH_DEFAULT); + break; + case SCE_SH_COMMENTLINE: + if (sc.ch == '\\' && (sc.chNext == '\r' || sc.chNext == '\n')) { // comment continuation - if (chNext == '\r' && chNext2 == '\n') { - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else { - i++; - ch = chNext; - chNext = chNext2; + sc.Forward(); + if (sc.ch == '\r' && sc.chNext == '\n') { + sc.Forward(); } - } else if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; - } else if (isEOLChar(chNext)) { - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; + } else if (sc.atLineEnd) { + sc.ForwardSetState(SCE_SH_DEFAULT); } - } else if (state == SCE_SH_HERE_DELIM) { - // + break; + case SCE_SH_HERE_DELIM: // From Bash info: // --------------- // Specifier format is: <<[-]WORD @@ -461,150 +266,194 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, // Whitespace acceptable after <<[-] operator // if (HereDoc.State == 0) { // '<<' encountered - HereDoc.State = 1; - HereDoc.Quote = chNext; + HereDoc.Quote = sc.chNext; HereDoc.Quoted = false; HereDoc.DelimiterLength = 0; HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - if (chNext == '\'' || chNext == '\"') { // a quoted here-doc delimiter (' or ") - i++; - ch = chNext; - chNext = chNext2; + if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ") + sc.Forward(); HereDoc.Quoted = true; - } else if (!HereDoc.Indent && chNext == '-') { // <<- indent case + HereDoc.State = 1; + } else if (!HereDoc.Indent && sc.chNext == '-') { // <<- indent case HereDoc.Indent = true; - HereDoc.State = 0; - } else if (isalpha(chNext) || chNext == '_' || chNext == '\\' - || chNext == '-' || chNext == '+' || chNext == '!') { + } else if (setHereDoc.Contains(sc.chNext)) { // an unquoted here-doc delimiter, no special handling - // TODO check what exactly bash considers part of the delim - } else if (chNext == '<') { // HERE string <<< - i++; - ch = chNext; - chNext = chNext2; - styler.ColourTo(i, SCE_SH_HERE_DELIM); - state = SCE_SH_DEFAULT; - HereDoc.State = 0; - } else if (isspacechar(chNext)) { + // TODO check what exactly bash considers part of the delim + HereDoc.State = 1; + } else if (sc.chNext == '<') { // HERE string <<< + sc.Forward(); + sc.ForwardSetState(SCE_SH_DEFAULT); + } else if (IsASpace(sc.chNext)) { // eat whitespace - HereDoc.State = 0; - } else if (isdigit(chNext) || chNext == '=' || chNext == '$') { + } else if (setLeftShift.Contains(sc.chNext)) { // left shift << or <<= operator cases - styler.ColourTo(i, SCE_SH_OPERATOR); - state = SCE_SH_DEFAULT; - HereDoc.State = 0; + sc.ChangeState(SCE_SH_OPERATOR); + sc.ForwardSetState(SCE_SH_DEFAULT); } else { // symbols terminates; deprecated zero-length delimiter + HereDoc.State = 1; } } else if (HereDoc.State == 1) { // collect the delimiter if (HereDoc.Quoted) { // a quoted here-doc delimiter - if (ch == HereDoc.Quote) { // closing quote => end of delimiter - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; + if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter + sc.ForwardSetState(SCE_SH_DEFAULT); } else { - if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote - i++; - ch = chNext; - chNext = chNext2; + if (sc.ch == '\\' && sc.chNext == HereDoc.Quote) { // escaped quote + sc.Forward(); } - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; + HereDoc.Append(sc.ch); } } else { // an unquoted here-doc delimiter - if (isalnum(ch) || ch == '_' || ch == '-' || ch == '+' || ch == '!') { - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - } else if (ch == '\\') { + if (setHereDoc2.Contains(sc.ch)) { + HereDoc.Append(sc.ch); + } else if (sc.ch == '\\') { // skip escape prefix } else { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; + sc.SetState(SCE_SH_DEFAULT); } } - if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { - styler.ColourTo(i - 1, state); - state = SCE_SH_ERROR; - goto restartLexer; + if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup + sc.SetState(SCE_SH_ERROR); + HereDoc.State = 0; } } - } else if (HereDoc.State == 2) { - // state == SCE_SH_HERE_Q - if (isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { - if (!HereDoc.Indent && isEOLChar(chPrev)) { - endHereDoc: - // standard HERE delimiter - i += HereDoc.DelimiterLength; - chPrev = styler.SafeGetCharAt(i - 1); - ch = styler.SafeGetCharAt(i); - if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - HereDoc.State = 0; - goto restartLexer; - } - chNext = styler.SafeGetCharAt(i + 1); - } else if (HereDoc.Indent) { - // indented HERE delimiter - unsigned int bk = (i > 0)? i - 1: 0; - while (i > 0) { - ch = styler.SafeGetCharAt(bk--); - if (isEOLChar(ch)) { - goto endHereDoc; - } else if (!isspacechar(ch)) { - break; // got leading non-whitespace - } + break; + case SCE_SH_HERE_Q: + // HereDoc.State == 2 + if (sc.atLineStart) { + sc.SetState(SCE_SH_HERE_Q); + int prefixws = 0; + while (IsASpace(sc.ch) && !sc.atLineEnd) { // whitespace prefix + sc.Forward(); + prefixws++; + } + if (prefixws > 0) + sc.SetState(SCE_SH_HERE_Q); + while (!sc.atLineEnd) { + sc.Forward(); + } + char s[HERE_DELIM_MAX]; + sc.GetCurrent(s, sizeof(s)); + if (strcmp(HereDoc.Delimiter, s) == 0) { + if ((prefixws > 0 && HereDoc.Indent) || // indentation rule + (prefixws == 0 && !HereDoc.Indent)) { + sc.SetState(SCE_SH_DEFAULT); + break; } } } - } else if (state == SCE_SH_SCALAR) { // variable names - if (isEndVar(ch)) { - if ((state == SCE_SH_SCALAR) - && i == (styler.GetStartSegment() + 1)) { + break; + case SCE_SH_SCALAR: // variable names + if (!setParam.Contains(sc.ch)) { + if (sc.LengthCurrent() == 1) { // Special variable: $(, $_ etc. - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; + sc.ForwardSetState(SCE_SH_DEFAULT); } else { - styler.ColourTo(i - 1, state); - state = SCE_SH_DEFAULT; - goto restartLexer; + sc.SetState(SCE_SH_DEFAULT); } } - } else if (state == SCE_SH_STRING - || state == SCE_SH_CHARACTER - || state == SCE_SH_BACKTICKS - || state == SCE_SH_PARAM - ) { - if (!Quote.Down && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else if (ch == Quote.Down) { + break; + case SCE_SH_STRING: // delimited styles + case SCE_SH_CHARACTER: + case SCE_SH_BACKTICKS: + case SCE_SH_PARAM: + if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (sc.ch == Quote.Down) { Quote.Count--; if (Quote.Count == 0) { - Quote.Rep--; - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_SH_DEFAULT; - ch = ' '; - } - if (Quote.Up == Quote.Down) { - Quote.Count++; - } + sc.ForwardSetState(SCE_SH_DEFAULT); } - } else if (ch == Quote.Up) { + } else if (sc.ch == Quote.Up) { Quote.Count++; } + break; + } + + // Must check end of HereDoc state 1 before default state is handled + if (HereDoc.State == 1 && sc.atLineEnd) { + // Begin of here-doc (the line after the here-doc delimiter): + // Lexically, the here-doc starts from the next line after the >>, but the + // first line of here-doc seem to follow the style of the last EOL sequence + HereDoc.State = 2; + if (HereDoc.Quoted) { + if (sc.state == SCE_SH_HERE_DELIM) { + // Missing quote at end of string! We are stricter than bash. + // Colour here-doc anyway while marking this bit as an error. + sc.ChangeState(SCE_SH_ERROR); + } + // HereDoc.Quote always == '\'' } + sc.SetState(SCE_SH_HERE_Q); } - if (state == SCE_SH_ERROR) { - break; + + // Determine if a new state should be entered. + if (sc.state == SCE_SH_DEFAULT) { + if (sc.ch == '\\') { // escaped character + sc.SetState(SCE_SH_IDENTIFIER); + } else if (IsADigit(sc.ch)) { + sc.SetState(SCE_SH_NUMBER); + numBase = BASH_BASE_DECIMAL; + if (sc.ch == '0') { // hex,octal + if (sc.chNext == 'x' || sc.chNext == 'X') { + numBase = BASH_BASE_HEX; + sc.Forward(); + } else if (IsADigit(sc.chNext)) { +#ifdef PEDANTIC_OCTAL + numBase = BASH_BASE_OCTAL; +#else + numBase = BASH_BASE_HEX; +#endif + } + } + } else if (setWordStart.Contains(sc.ch)) { + sc.SetState(SCE_SH_WORD); + } else if (sc.ch == '#') { + sc.SetState(SCE_SH_COMMENTLINE); + } else if (sc.ch == '\"') { + sc.SetState(SCE_SH_STRING); + Quote.Start(sc.ch); + } else if (sc.ch == '\'') { + sc.SetState(SCE_SH_CHARACTER); + Quote.Start(sc.ch); + } else if (sc.ch == '`') { + sc.SetState(SCE_SH_BACKTICKS); + Quote.Start(sc.ch); + } else if (sc.ch == '$') { + sc.SetState(SCE_SH_SCALAR); + sc.Forward(); + if (sc.ch == '{') { + sc.ChangeState(SCE_SH_PARAM); + } else if (sc.ch == '\'') { + sc.ChangeState(SCE_SH_CHARACTER); + } else if (sc.ch == '"') { + sc.ChangeState(SCE_SH_STRING); + } else if (sc.ch == '(' || sc.ch == '`') { + sc.ChangeState(SCE_SH_BACKTICKS); + if (sc.chNext == '(') { // $(( is lexed as operator + sc.ChangeState(SCE_SH_OPERATOR); + } + } else { + continue; // scalar has no delimiter pair + } + // fallthrough, open delim for $[{'"(`] + Quote.Start(sc.ch); + } else if (sc.Match('<', '<')) { + sc.SetState(SCE_SH_HERE_DELIM); + HereDoc.State = 0; + HereDoc.Indent = false; + } else if (sc.ch == '-' && // one-char file test operators + setSingleCharOp.Contains(sc.chNext) && + !setWord.Contains(sc.GetRelative(2)) && + IsASpace(sc.chPrev)) { + sc.SetState(SCE_SH_WORD); + sc.Forward(); + } else if (setBashOperator.Contains(sc.ch)) { + sc.SetState(SCE_SH_OPERATOR); + } } - chPrev = ch; } - styler.ColourTo(lengthDoc - 1, state); + sc.Complete(); } static bool IsCommentLine(int line, Accessor &styler) { @@ -621,7 +470,7 @@ static bool IsCommentLine(int line, Accessor &styler) { } static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[], - Accessor &styler) { + Accessor &styler) { bool foldComment = styler.GetPropertyInt("fold.comment") != 0; bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; unsigned int endPos = startPos + length; @@ -637,16 +486,16 @@ static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[], int style = styleNext; styleNext = styler.StyleAt(i + 1); bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); - // Comment folding + // Comment folding if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) - { - if (!IsCommentLine(lineCurrent - 1, styler) - && IsCommentLine(lineCurrent + 1, styler)) - levelCurrent++; - else if (IsCommentLine(lineCurrent - 1, styler) - && !IsCommentLine(lineCurrent+1, styler)) - levelCurrent--; - } + { + if (!IsCommentLine(lineCurrent - 1, styler) + && IsCommentLine(lineCurrent + 1, styler)) + levelCurrent++; + else if (IsCommentLine(lineCurrent - 1, styler) + && !IsCommentLine(lineCurrent + 1, styler)) + levelCurrent--; + } if (style == SCE_SH_OPERATOR) { if (ch == '{') { levelCurrent++; |