diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/LexBash.cxx | 753 | 
1 files changed, 301 insertions, 452 deletions
diff --git a/src/LexBash.cxx b/src/LexBash.cxx index 0797e68a9..7b475a7de 100644 --- a/src/LexBash.cxx +++ b/src/LexBash.cxx @@ -2,8 +2,8 @@  /** @file LexBash.cxx   ** Lexer for Bash.   **/ -// Copyright 2004-2007 by Neil Hodgson <neilh@scintilla.org> -// Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004 +// Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org> +// Adapted from LexPerl by Kein-Hong Man 2004  // The License.txt file describes the conditions under which this software may be distributed.  #include <stdlib.h> @@ -16,9 +16,17 @@  #include "PropSet.h"  #include "Accessor.h" +#include "StyleContext.h"  #include "KeyWords.h"  #include "Scintilla.h"  #include "SciLexer.h" +#include "CharacterSet.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +#define HERE_DELIM_MAX 256  // define this if you want 'invalid octals' to be marked as errors  // usually, this is not a good idea, permissive lexing is better @@ -32,13 +40,7 @@  #define BASH_BASE_OCTAL_ERROR	69  #endif -#define HERE_DELIM_MAX 256 - -#ifdef SCI_NAMESPACE -using namespace Scintilla; -#endif - -static inline int translateBashDigit(char ch) { +static inline int translateBashDigit(int ch) {  	if (ch >= '0' && ch <= '9') {  		return ch - '0';  	} else if (ch >= 'a' && ch <= 'z') { @@ -53,407 +55,210 @@ static inline int translateBashDigit(char ch) {  	return BASH_BASE_ERROR;  } -static inline bool isEOLChar(char ch) { -	return (ch == '\r') || (ch == '\n'); -} - -static bool isSingleCharOp(char ch) { -	char strCharSet[2]; -	strCharSet[0] = ch; -	strCharSet[1] = '\0'; -	return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet)); -} - -static inline bool isBashOperator(char ch) { -	if (ch == '^' || ch == '&' || ch == '\\' || ch == '%' || -	        ch == '(' || ch == ')' || ch == '-' || ch == '+' || -	        ch == '=' || ch == '|' || ch == '{' || ch == '}' || -	        ch == '[' || ch == ']' || ch == ':' || ch == ';' || -	        ch == '>' || ch == ',' || ch == '/' || ch == '<' || -	        ch == '?' || ch == '!' || ch == '.' || ch == '~' || -		ch == '@') -		return true; -	return false; -} - -static int classifyWordBash(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { -	char s[100]; -	for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) { -		s[i] = styler[start + i]; -		s[i + 1] = '\0'; -	} -	char chAttr = SCE_SH_IDENTIFIER; -	if (keywords.InList(s)) -		chAttr = SCE_SH_WORD; -	styler.ColourTo(end, chAttr); -	return chAttr; -} - -static inline int getBashNumberBase(unsigned int start, unsigned int end, Accessor &styler) { +static inline int getBashNumberBase(char *s) { +	int i = 0;  	int base = 0; -	for (unsigned int i = 0; i < end - start + 1 && i < 10; i++) { -		base = base * 10 + (styler[start + i] - '0'); +	while (*s) { +		base = base * 10 + (*s++ - '0'); +		i++;  	} -	if (base > 64 || (end - start) > 1) { +	if (base > 64 || i > 2) {  		return BASH_BASE_ERROR;  	}  	return base;  } -static inline bool isEndVar(char ch) { -	return !isalnum(ch) && ch != '$' && ch != '_'; -} - -static inline bool isNonQuote(char ch) { -	return isalnum(ch) || ch == '_'; -} - -static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { -	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { -		return false; -	} -	while (*val) { -		if (*val != styler[pos++]) { -			return false; -		} -		val++; -	} -	return true; -} - -static char opposite(char ch) { -	if (ch == '(') -		return ')'; -	if (ch == '[') -		return ']'; -	if (ch == '{') -		return '}'; -	if (ch == '<') -		return '>'; +static int opposite(int ch) { +	if (ch == '(') return ')'; +	if (ch == '[') return ']'; +	if (ch == '{') return '}'; +	if (ch == '<') return '>';  	return ch;  }  static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, -                             WordList *keywordlists[], Accessor &styler) { - -	// Lexer for bash often has to backtrack to start of current style to determine -	// which characters are being used as quotes, how deeply nested is the -	// start position and what the termination string is for here documents +							 WordList *keywordlists[], Accessor &styler) {  	WordList &keywords = *keywordlists[0]; -	class HereDocCls { +	CharacterSet setWordStart(CharacterSet::setAlpha, "_"); +	// note that [+-] are often parts of identifiers in shell scripts +	CharacterSet setWord(CharacterSet::setAlphaNum, "._+-"); +	CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@"); +	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn"); +	CharacterSet setParam(CharacterSet::setAlphaNum, "$_"); +	CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!"); +	CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!"); +	CharacterSet setLeftShift(CharacterSet::setDigits, "=$"); + +	class HereDocCls {	// Class to manage HERE document elements  	public:  		int State;		// 0: '<<' encountered  		// 1: collect the delimiter  		// 2: here doc text (lines after the delimiter) -		char Quote;		// the char after '<<' +		int Quote;		// the char after '<<'  		bool Quoted;		// true if Quote in ('\'','"','`')  		bool Indent;		// indented delimiter (for <<-)  		int DelimiterLength;	// strlen(Delimiter)  		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf  		HereDocCls() {  			State = 0; -            Quote = 0; -            Quoted = false; -            Indent = 0; +			Quote = 0; +			Quoted = false; +			Indent = 0;  			DelimiterLength = 0;  			Delimiter = new char[HERE_DELIM_MAX];  			Delimiter[0] = '\0';  		} +		void Append(int ch) { +			Delimiter[DelimiterLength++] = static_cast<char>(ch); +			Delimiter[DelimiterLength] = '\0'; +		}  		~HereDocCls() {  			delete []Delimiter;  		}  	};  	HereDocCls HereDoc; -	class QuoteCls { +	class QuoteCls {	// Class to manage quote pairs (simplified vs LexPerl)  		public: -		int  Rep; -		int  Count; -		char Up; -		char Down; +		int Count; +		int Up, Down;  		QuoteCls() { -			this->New(1); -		} -		void New(int r) { -			Rep   = r;  			Count = 0;  			Up    = '\0';  			Down  = '\0';  		} -		void Open(char u) { +		void Open(int u) {  			Count++;  			Up    = u;  			Down  = opposite(Up);  		} +		void Start(int u) { +			Count = 0; +			Open(u); +		}  	};  	QuoteCls Quote; -	int state = initStyle;  	int numBase = 0; -	unsigned int lengthDoc = startPos + length; +	int digit; +	unsigned int endPos = startPos + length; -	// If in a long distance lexical state, seek to the beginning to find quote characters -	// Bash strings can be multi-line with embedded newlines, so backtrack. -	// Bash numbers have additional state during lexing, so backtrack too. -	if (state == SCE_SH_HERE_Q) { +	// Backtrack to beginning of style if required... +	// If in a long distance lexical state, backtrack to find quote characters +	if (initStyle == SCE_SH_HERE_Q) {  		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {  			startPos--;  		}  		startPos = styler.LineStart(styler.GetLine(startPos)); -		state = styler.StyleAt(startPos - 1); +		initStyle = styler.StyleAt(startPos - 1);  	} -	if (state == SCE_SH_STRING -	 || state == SCE_SH_BACKTICKS -	 || state == SCE_SH_CHARACTER -	 || state == SCE_SH_NUMBER -	 || state == SCE_SH_IDENTIFIER -	 || state == SCE_SH_COMMENTLINE -	) { -		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { +	// Bash strings can be multi-line with embedded newlines, so backtrack. +	// Bash numbers have additional state during lexing, so backtrack too. +	if (initStyle == SCE_SH_STRING +	 || initStyle == SCE_SH_BACKTICKS +	 || initStyle == SCE_SH_CHARACTER +	 || initStyle == SCE_SH_NUMBER +	 || initStyle == SCE_SH_IDENTIFIER +	 || initStyle == SCE_SH_COMMENTLINE) { +		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {  			startPos--;  		} -		state = SCE_SH_DEFAULT; +		initStyle = SCE_SH_DEFAULT;  	} -	styler.StartAt(startPos); -	char chPrev = styler.SafeGetCharAt(startPos - 1); -	if (startPos == 0) -		chPrev = '\n'; -	char chNext = styler[startPos]; -	styler.StartSegment(startPos); - -	for (unsigned int i = startPos; i < lengthDoc; i++) { -		char ch = chNext; -		// if the current character is not consumed due to the completion of an -		// earlier style, lexing can be restarted via a simple goto -	restartLexer: -		chNext = styler.SafeGetCharAt(i + 1); -		char chNext2 = styler.SafeGetCharAt(i + 2); - -		if (styler.IsLeadByte(ch)) { -			chNext = styler.SafeGetCharAt(i + 2); -			chPrev = ' '; -			i += 1; -			continue; -		} - -		if ((chPrev == '\r' && ch == '\n')) {	// skip on DOS/Windows -			styler.ColourTo(i, state); -			chPrev = ch; -			continue; -		} - -		if (HereDoc.State == 1 && isEOLChar(ch)) { -			// Begin of here-doc (the line after the here-doc delimiter): -			// Lexically, the here-doc starts from the next line after the >>, but the -			// first line of here-doc seem to follow the style of the last EOL sequence -			HereDoc.State = 2; -			if (HereDoc.Quoted) { -				if (state == SCE_SH_HERE_DELIM) { -					// Missing quote at end of string! We are stricter than bash. -					// Colour here-doc anyway while marking this bit as an error. -					state = SCE_SH_ERROR; -				} -				styler.ColourTo(i - 1, state); -				// HereDoc.Quote always == '\'' -				state = SCE_SH_HERE_Q; -			} else { -				styler.ColourTo(i - 1, state); -				// always switch -				state = SCE_SH_HERE_Q; -			} -		} - -		if (state == SCE_SH_DEFAULT) { -			if (ch == '\\') {	// escaped character -				if (i < lengthDoc - 1) -					i++; -				ch = chNext; -				chNext = chNext2; -				styler.ColourTo(i, SCE_SH_IDENTIFIER); -			} else if (isascii(ch) && isdigit(ch)) { -				state = SCE_SH_NUMBER; -				numBase = BASH_BASE_DECIMAL; -				if (ch == '0') {	// hex,octal -					if (chNext == 'x' || chNext == 'X') { -						numBase = BASH_BASE_HEX; -						i++; -						ch = chNext; -						chNext = chNext2; -					} else if (isdigit(chNext)) { -#ifdef PEDANTIC_OCTAL -						numBase = BASH_BASE_OCTAL; -#else -						numBase = BASH_BASE_HEX; -#endif +	StyleContext sc(startPos, endPos - startPos, initStyle, styler); + +	for (; sc.More(); sc.Forward()) { + +		// Determine if the current state should terminate. +		switch (sc.state) { +			case SCE_SH_OPERATOR: +				sc.SetState(SCE_SH_DEFAULT); +				break; +			case SCE_SH_WORD: +				// "." never used in Bash variable names but used in file names +				if (!setWord.Contains(sc.ch)) { +					char s[1000]; +					sc.GetCurrent(s, sizeof(s)); +					if (s[0] != '-' &&	// for file operators +						!keywords.InList(s)) { +						sc.ChangeState(SCE_SH_IDENTIFIER);  					} +					sc.SetState(SCE_SH_DEFAULT);  				} -			} else if (iswordstart(ch)) { -				state = SCE_SH_WORD; -				if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { -					// We need that if length of word == 1! -					// This test is copied from the SCE_SH_WORD handler. -					classifyWordBash(styler.GetStartSegment(), i, keywords, styler); -					state = SCE_SH_DEFAULT; +				break; +			case SCE_SH_IDENTIFIER: +				if (sc.chPrev == '\\') {	// for escaped chars +					sc.ForwardSetState(SCE_SH_DEFAULT); +				} else if (!setWord.Contains(sc.ch)) { +					sc.SetState(SCE_SH_DEFAULT);  				} -			} else if (ch == '#') { -				state = SCE_SH_COMMENTLINE; -			} else if (ch == '\"') { -				state = SCE_SH_STRING; -				Quote.New(1); -				Quote.Open(ch); -			} else if (ch == '\'') { -				state = SCE_SH_CHARACTER; -				Quote.New(1); -				Quote.Open(ch); -			} else if (ch == '`') { -				state = SCE_SH_BACKTICKS; -				Quote.New(1); -				Quote.Open(ch); -			} else if (ch == '$') { -				if (chNext == '{') { -					state = SCE_SH_PARAM; -					goto startQuote; -				} else if (chNext == '\'') { -					state = SCE_SH_CHARACTER; -					goto startQuote; -				} else if (chNext == '"') { -					state = SCE_SH_STRING; -					goto startQuote; -				} else if (chNext == '(' && chNext2 == '(') { -					styler.ColourTo(i, SCE_SH_OPERATOR); -					state = SCE_SH_DEFAULT; -					goto skipChar; -				} else if (chNext == '(' || chNext == '`') { -					state = SCE_SH_BACKTICKS; -				startQuote: -					Quote.New(1); -					Quote.Open(chNext); -					goto skipChar; -				} else { -					state = SCE_SH_SCALAR; -				skipChar: -					i++; -					ch = chNext; -					chNext = chNext2; -				} -			} else if (ch == '*') { -				if (chNext == '*') {	// exponentiation -					i++; -					ch = chNext; -					chNext = chNext2; -				} -				styler.ColourTo(i, SCE_SH_OPERATOR); -			} else if (ch == '<' && chNext == '<') { -				state = SCE_SH_HERE_DELIM; -				HereDoc.State = 0; -				HereDoc.Indent = false; -			} else if (ch == '-'	// file test operators -			           && isSingleCharOp(chNext) -			           && !isalnum((chNext2 = styler.SafeGetCharAt(i+2))) -			           && isspace(chPrev)) { -				styler.ColourTo(i + 1, SCE_SH_WORD); -				state = SCE_SH_DEFAULT; -				i++; -				ch = chNext; -				chNext = chNext2; -			} else if (isBashOperator(ch)) { -				styler.ColourTo(i, SCE_SH_OPERATOR); -			} else { -				// keep colouring defaults to make restart easier -				styler.ColourTo(i, SCE_SH_DEFAULT); -			} -		} else if (state == SCE_SH_NUMBER) { -			int digit = translateBashDigit(ch); -			if (numBase == BASH_BASE_DECIMAL) { -				if (ch == '#') { -					numBase = getBashNumberBase(styler.GetStartSegment(), i - 1, styler); -					if (numBase == BASH_BASE_ERROR)	// take the rest as comment -						goto numAtEnd; -				} else if (!isdigit(ch)) -					goto numAtEnd; -			} else if (numBase == BASH_BASE_HEX) { -				if ((digit < 16) || (digit >= 36 && digit <= 41)) { -					// hex digit 0-9a-fA-F -				} else -					goto numAtEnd; +				break; +			case SCE_SH_NUMBER: +				digit = translateBashDigit(sc.ch); +				if (numBase == BASH_BASE_DECIMAL) { +					if (sc.ch == '#') { +						char s[10]; +						sc.GetCurrent(s, sizeof(s)); +						numBase = getBashNumberBase(s); +						if (numBase != BASH_BASE_ERROR) +							break; +					} else if (IsADigit(sc.ch)) +						break; +				} else if (numBase == BASH_BASE_HEX) { +					if (IsADigit(sc.ch, 16)) +						break;  #ifdef PEDANTIC_OCTAL -			} else if (numBase == BASH_BASE_OCTAL || -				   numBase == BASH_BASE_OCTAL_ERROR) { -				if (digit > 7) { +				} else if (numBase == BASH_BASE_OCTAL || +						   numBase == BASH_BASE_OCTAL_ERROR) { +					if (digit <= 7) +						break;  					if (digit <= 9) { -                                                numBase = BASH_BASE_OCTAL_ERROR; -					} else -						goto numAtEnd; -				} -#endif -			} else if (numBase == BASH_BASE_ERROR) { -				if (digit > 9) -					goto numAtEnd; -			} else {	// DD#DDDD number style handling -				if (digit != BASH_BASE_ERROR) { -					if (numBase <= 36) { -						// case-insensitive if base<=36 -						if (digit >= 36) digit -= 26; +						numBase = BASH_BASE_OCTAL_ERROR; +						break;  					} -					if (digit >= numBase) { +#endif +				} else if (numBase == BASH_BASE_ERROR) { +					if (digit <= 9) +						break; +				} else {	// DD#DDDD number style handling +					if (digit != BASH_BASE_ERROR) { +						if (numBase <= 36) { +							// case-insensitive if base<=36 +							if (digit >= 36) digit -= 26; +						} +						if (digit < numBase) +							break;  						if (digit <= 9) {  							numBase = BASH_BASE_ERROR; -						} else -							goto numAtEnd; +							break; +						}  					} -				} else { -			numAtEnd: -					if (numBase == BASH_BASE_ERROR +				} +				// fallthrough when number is at an end or error +				if (numBase == BASH_BASE_ERROR  #ifdef PEDANTIC_OCTAL -					    || numBase == BASH_BASE_OCTAL_ERROR +					|| numBase == BASH_BASE_OCTAL_ERROR  #endif -                                           ) -						state = SCE_SH_ERROR; -					styler.ColourTo(i - 1, state); -					state = SCE_SH_DEFAULT; -					goto restartLexer; +				) { +					sc.ChangeState(SCE_SH_ERROR);  				} -			} -		} else if (state == SCE_SH_WORD) { -			if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { -				// "." never used in Bash variable names -				// but used in file names -				classifyWordBash(styler.GetStartSegment(), i, keywords, styler); -				state = SCE_SH_DEFAULT; -				ch = ' '; -			} -		} else if (state == SCE_SH_IDENTIFIER) { -			if (!iswordchar(chNext) && chNext != '+' && chNext != '-') { -				styler.ColourTo(i, SCE_SH_IDENTIFIER); -				state = SCE_SH_DEFAULT; -				ch = ' '; -			} -		} else { -			if (state == SCE_SH_COMMENTLINE) { -				if (ch == '\\' && isEOLChar(chNext)) { +				sc.SetState(SCE_SH_DEFAULT); +				break; +			case SCE_SH_COMMENTLINE: +				if (sc.ch == '\\' && (sc.chNext == '\r' || sc.chNext == '\n')) {  					// comment continuation -					if (chNext == '\r' && chNext2 == '\n') { -						i += 2; -						ch = styler.SafeGetCharAt(i); -						chNext = styler.SafeGetCharAt(i + 1); -					} else { -						i++; -						ch = chNext; -						chNext = chNext2; +					sc.Forward(); +					if (sc.ch == '\r' && sc.chNext == '\n') { +						sc.Forward();  					} -				} else if (isEOLChar(ch)) { -					styler.ColourTo(i - 1, state); -					state = SCE_SH_DEFAULT; -					goto restartLexer; -				} else if (isEOLChar(chNext)) { -					styler.ColourTo(i, state); -					state = SCE_SH_DEFAULT; +				} else if (sc.atLineEnd) { +					sc.ForwardSetState(SCE_SH_DEFAULT);  				} -			} else if (state == SCE_SH_HERE_DELIM) { -				// +				break; +			case SCE_SH_HERE_DELIM:  				// From Bash info:  				// ---------------  				// Specifier format is: <<[-]WORD @@ -461,150 +266,194 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,  				// Whitespace acceptable after <<[-] operator  				//  				if (HereDoc.State == 0) { // '<<' encountered -					HereDoc.State = 1; -					HereDoc.Quote = chNext; +					HereDoc.Quote = sc.chNext;  					HereDoc.Quoted = false;  					HereDoc.DelimiterLength = 0;  					HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; -					if (chNext == '\'' || chNext == '\"') {	// a quoted here-doc delimiter (' or ") -						i++; -						ch = chNext; -						chNext = chNext2; +					if (sc.chNext == '\'' || sc.chNext == '\"') {	// a quoted here-doc delimiter (' or ") +						sc.Forward();  						HereDoc.Quoted = true; -					} else if (!HereDoc.Indent && chNext == '-') {	// <<- indent case +						HereDoc.State = 1; +					} else if (!HereDoc.Indent && sc.chNext == '-') {	// <<- indent case  						HereDoc.Indent = true; -						HereDoc.State = 0; -					} else if (isalpha(chNext) || chNext == '_' || chNext == '\\' -						|| chNext == '-' || chNext == '+' || chNext == '!') { +					} else if (setHereDoc.Contains(sc.chNext)) {  						// an unquoted here-doc delimiter, no special handling -                        // TODO check what exactly bash considers part of the delim -					} else if (chNext == '<') {	// HERE string <<< -						i++; -						ch = chNext; -						chNext = chNext2; -						styler.ColourTo(i, SCE_SH_HERE_DELIM); -						state = SCE_SH_DEFAULT; -						HereDoc.State = 0; -					} else if (isspacechar(chNext)) { +						// TODO check what exactly bash considers part of the delim +						HereDoc.State = 1; +					} else if (sc.chNext == '<') {	// HERE string <<< +						sc.Forward(); +						sc.ForwardSetState(SCE_SH_DEFAULT); +					} else if (IsASpace(sc.chNext)) {  						// eat whitespace -						HereDoc.State = 0; -					} else if (isdigit(chNext) || chNext == '=' || chNext == '$') { +					} else if (setLeftShift.Contains(sc.chNext)) {  						// left shift << or <<= operator cases -						styler.ColourTo(i, SCE_SH_OPERATOR); -						state = SCE_SH_DEFAULT; -						HereDoc.State = 0; +						sc.ChangeState(SCE_SH_OPERATOR); +						sc.ForwardSetState(SCE_SH_DEFAULT);  					} else {  						// symbols terminates; deprecated zero-length delimiter +						HereDoc.State = 1;  					}  				} else if (HereDoc.State == 1) { // collect the delimiter  					if (HereDoc.Quoted) { // a quoted here-doc delimiter -						if (ch == HereDoc.Quote) { // closing quote => end of delimiter -							styler.ColourTo(i, state); -							state = SCE_SH_DEFAULT; +						if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter +							sc.ForwardSetState(SCE_SH_DEFAULT);  						} else { -							if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote -								i++; -								ch = chNext; -								chNext = chNext2; +							if (sc.ch == '\\' && sc.chNext == HereDoc.Quote) { // escaped quote +								sc.Forward();  							} -							HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; -							HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; +							HereDoc.Append(sc.ch);  						}  					} else { // an unquoted here-doc delimiter -						if (isalnum(ch) || ch == '_' || ch == '-' || ch == '+' || ch == '!') { -							HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; -							HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; -						} else if (ch == '\\') { +						if (setHereDoc2.Contains(sc.ch)) { +							HereDoc.Append(sc.ch); +						} else if (sc.ch == '\\') {  							// skip escape prefix  						} else { -							styler.ColourTo(i - 1, state); -							state = SCE_SH_DEFAULT; -							goto restartLexer; +							sc.SetState(SCE_SH_DEFAULT);  						}  					} -					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { -						styler.ColourTo(i - 1, state); -						state = SCE_SH_ERROR; -						goto restartLexer; +					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {	// force blowup +						sc.SetState(SCE_SH_ERROR); +						HereDoc.State = 0;  					}  				} -			} else if (HereDoc.State == 2) { -				// state == SCE_SH_HERE_Q -				if (isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { -					if (!HereDoc.Indent && isEOLChar(chPrev)) { -					endHereDoc: -						// standard HERE delimiter -						i += HereDoc.DelimiterLength; -						chPrev = styler.SafeGetCharAt(i - 1); -						ch = styler.SafeGetCharAt(i); -						if (isEOLChar(ch)) { -							styler.ColourTo(i - 1, state); -							state = SCE_SH_DEFAULT; -							HereDoc.State = 0; -							goto restartLexer; -						} -						chNext = styler.SafeGetCharAt(i + 1); -					} else if (HereDoc.Indent) { -						// indented HERE delimiter -						unsigned int bk = (i > 0)? i - 1: 0; -						while (i > 0) { -							ch = styler.SafeGetCharAt(bk--); -							if (isEOLChar(ch)) { -								goto endHereDoc; -							} else if (!isspacechar(ch)) { -								break;	// got leading non-whitespace -							} +				break; +			case SCE_SH_HERE_Q: +				// HereDoc.State == 2 +				if (sc.atLineStart) { +					sc.SetState(SCE_SH_HERE_Q); +					int prefixws = 0; +					while (IsASpace(sc.ch) && !sc.atLineEnd) {	// whitespace prefix +						sc.Forward(); +						prefixws++; +					} +					if (prefixws > 0) +						sc.SetState(SCE_SH_HERE_Q); +					while (!sc.atLineEnd) { +						sc.Forward(); +					} +					char s[HERE_DELIM_MAX]; +					sc.GetCurrent(s, sizeof(s)); +					if (strcmp(HereDoc.Delimiter, s) == 0) { +						if ((prefixws > 0 && HereDoc.Indent) ||	// indentation rule +							(prefixws == 0 && !HereDoc.Indent)) { +							sc.SetState(SCE_SH_DEFAULT); +							break;  						}  					}  				} -			} else if (state == SCE_SH_SCALAR) {	// variable names -				if (isEndVar(ch)) { -					if ((state == SCE_SH_SCALAR) -					    && i == (styler.GetStartSegment() + 1)) { +				break; +			case SCE_SH_SCALAR:	// variable names +				if (!setParam.Contains(sc.ch)) { +					if (sc.LengthCurrent() == 1) {  						// Special variable: $(, $_ etc. -						styler.ColourTo(i, state); -						state = SCE_SH_DEFAULT; +						sc.ForwardSetState(SCE_SH_DEFAULT);  					} else { -						styler.ColourTo(i - 1, state); -						state = SCE_SH_DEFAULT; -						goto restartLexer; +						sc.SetState(SCE_SH_DEFAULT);  					}  				} -			} else if (state == SCE_SH_STRING -				|| state == SCE_SH_CHARACTER -				|| state == SCE_SH_BACKTICKS -				|| state == SCE_SH_PARAM -				) { -				if (!Quote.Down && !isspacechar(ch)) { -					Quote.Open(ch); -				} else if (ch == '\\' && Quote.Up != '\\') { -					i++; -					ch = chNext; -					chNext = styler.SafeGetCharAt(i + 1); -				} else if (ch == Quote.Down) { +				break; +			case SCE_SH_STRING:	// delimited styles +			case SCE_SH_CHARACTER: +			case SCE_SH_BACKTICKS: +			case SCE_SH_PARAM: +				if (sc.ch == '\\' && Quote.Up != '\\') { +					sc.Forward(); +				} else if (sc.ch == Quote.Down) {  					Quote.Count--;  					if (Quote.Count == 0) { -						Quote.Rep--; -						if (Quote.Rep <= 0) { -							styler.ColourTo(i, state); -							state = SCE_SH_DEFAULT; -							ch = ' '; -						} -						if (Quote.Up == Quote.Down) { -							Quote.Count++; -						} +						sc.ForwardSetState(SCE_SH_DEFAULT);  					} -				} else if (ch == Quote.Up) { +				} else if (sc.ch == Quote.Up) {  					Quote.Count++;  				} +				break; +		} + +		// Must check end of HereDoc state 1 before default state is handled +		if (HereDoc.State == 1 && sc.atLineEnd) { +			// Begin of here-doc (the line after the here-doc delimiter): +			// Lexically, the here-doc starts from the next line after the >>, but the +			// first line of here-doc seem to follow the style of the last EOL sequence +			HereDoc.State = 2; +			if (HereDoc.Quoted) { +				if (sc.state == SCE_SH_HERE_DELIM) { +					// Missing quote at end of string! We are stricter than bash. +					// Colour here-doc anyway while marking this bit as an error. +					sc.ChangeState(SCE_SH_ERROR); +				} +				// HereDoc.Quote always == '\''  			} +			sc.SetState(SCE_SH_HERE_Q);  		} -		if (state == SCE_SH_ERROR) { -			break; + +		// Determine if a new state should be entered. +		if (sc.state == SCE_SH_DEFAULT) { +			if (sc.ch == '\\') {	// escaped character +				sc.SetState(SCE_SH_IDENTIFIER); +			} else if (IsADigit(sc.ch)) { +				sc.SetState(SCE_SH_NUMBER); +				numBase = BASH_BASE_DECIMAL; +				if (sc.ch == '0') {	// hex,octal +					if (sc.chNext == 'x' || sc.chNext == 'X') { +						numBase = BASH_BASE_HEX; +						sc.Forward(); +					} else if (IsADigit(sc.chNext)) { +#ifdef PEDANTIC_OCTAL +						numBase = BASH_BASE_OCTAL; +#else +						numBase = BASH_BASE_HEX; +#endif +					} +				} +			} else if (setWordStart.Contains(sc.ch)) { +				sc.SetState(SCE_SH_WORD); +			} else if (sc.ch == '#') { +				sc.SetState(SCE_SH_COMMENTLINE); +			} else if (sc.ch == '\"') { +				sc.SetState(SCE_SH_STRING); +				Quote.Start(sc.ch); +			} else if (sc.ch == '\'') { +				sc.SetState(SCE_SH_CHARACTER); +				Quote.Start(sc.ch); +			} else if (sc.ch == '`') { +				sc.SetState(SCE_SH_BACKTICKS); +				Quote.Start(sc.ch); +			} else if (sc.ch == '$') { +				sc.SetState(SCE_SH_SCALAR); +				sc.Forward(); +				if (sc.ch == '{') { +					sc.ChangeState(SCE_SH_PARAM); +				} else if (sc.ch == '\'') { +					sc.ChangeState(SCE_SH_CHARACTER); +				} else if (sc.ch == '"') { +					sc.ChangeState(SCE_SH_STRING); +				} else if (sc.ch == '(' || sc.ch == '`') { +					sc.ChangeState(SCE_SH_BACKTICKS); +					if (sc.chNext == '(') {	// $(( is lexed as operator +						sc.ChangeState(SCE_SH_OPERATOR); +					} +				} else { +					continue;	// scalar has no delimiter pair +				} +				// fallthrough, open delim for $[{'"(`] +				Quote.Start(sc.ch); +			} else if (sc.Match('<', '<')) { +				sc.SetState(SCE_SH_HERE_DELIM); +				HereDoc.State = 0; +				HereDoc.Indent = false; +			} else if (sc.ch == '-'	&&	// one-char file test operators +					   setSingleCharOp.Contains(sc.chNext) && +					   !setWord.Contains(sc.GetRelative(2)) && +					   IsASpace(sc.chPrev)) { +				sc.SetState(SCE_SH_WORD); +				sc.Forward(); +			} else if (setBashOperator.Contains(sc.ch)) { +				sc.SetState(SCE_SH_OPERATOR); +			}  		} -		chPrev = ch;  	} -	styler.ColourTo(lengthDoc - 1, state); +	sc.Complete();  }  static bool IsCommentLine(int line, Accessor &styler) { @@ -621,7 +470,7 @@ static bool IsCommentLine(int line, Accessor &styler) {  }  static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[], -                            Accessor &styler) { +						Accessor &styler) {  	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;  	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;  	unsigned int endPos = startPos + length; @@ -637,16 +486,16 @@ static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],  		int style = styleNext;  		styleNext = styler.StyleAt(i + 1);  		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); -        // Comment folding +		// Comment folding  		if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) -        { -            if (!IsCommentLine(lineCurrent - 1, styler) -                && IsCommentLine(lineCurrent + 1, styler)) -                levelCurrent++; -            else if (IsCommentLine(lineCurrent - 1, styler) -                     && !IsCommentLine(lineCurrent+1, styler)) -                levelCurrent--; -        } +		{ +			if (!IsCommentLine(lineCurrent - 1, styler) +				&& IsCommentLine(lineCurrent + 1, styler)) +				levelCurrent++; +			else if (IsCommentLine(lineCurrent - 1, styler) +					 && !IsCommentLine(lineCurrent + 1, styler)) +				levelCurrent--; +		}  		if (style == SCE_SH_OPERATOR) {  			if (ch == '{') {  				levelCurrent++;  | 
