diff options
| -rw-r--r-- | src/LexPerl.cxx | 2097 | 
1 files changed, 1000 insertions, 1097 deletions
| diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx index ff8d771ce..f57f73c29 100644 --- a/src/LexPerl.cxx +++ b/src/LexPerl.cxx @@ -1,6 +1,6 @@  // Scintilla source code edit control  /** @file LexPerl.cxx - ** Lexer for subset of Perl. + ** Lexer for Perl.   **/  // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>  // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> @@ -16,166 +16,336 @@  #include "PropSet.h"  #include "Accessor.h" +#include "StyleContext.h"  #include "KeyWords.h"  #include "Scintilla.h"  #include "SciLexer.h" +#include "CharacterSet.h"  #ifdef SCI_NAMESPACE  using namespace Scintilla;  #endif -#define PERLNUM_BINARY 1    // order is significant: 1-4 cannot have a dot -#define PERLNUM_HEX 2 -#define PERLNUM_OCTAL 3 -#define PERLNUM_FLOAT 4     // actually exponent part -#define PERLNUM_DECIMAL 5   // 1-5 are numbers; 6-7 are strings -#define PERLNUM_VECTOR 6 -#define PERLNUM_V_VECTOR 7 -#define PERLNUM_BAD 8 +// Info for HERE document handling from perldata.pod (reformatted): +// ---------------------------------------------------------------- +// A line-oriented form of quoting is based on the shell ``here-doc'' syntax. +// Following a << you specify a string to terminate the quoted material, and +// all lines following the current line down to the terminating string are +// the value of the item. +// * The terminating string may be either an identifier (a word), or some +//   quoted text. +// * If quoted, the type of quotes you use determines the treatment of the +//   text, just as in regular quoting. +// * An unquoted identifier works like double quotes. +// * There must be no space between the << and the identifier. +//   (If you put a space it will be treated as a null identifier, +//    which is valid, and matches the first empty line.) +//   (This is deprecated, -w warns of this syntax) +// * The terminating string must appear by itself (unquoted and +//   with no surrounding whitespace) on the terminating line. -#define BACK_NONE 0         // lookback state for bareword disambiguation: -#define BACK_OPERATOR 1     // whitespace/comments are insignificant -#define BACK_KEYWORD 2      // operators/keywords are needed for disambiguation +#define HERE_DELIM_MAX 256		// maximum length of HERE doc delimiter -#define HERE_DELIM_MAX 256 +#define PERLNUM_BINARY		1	// order is significant: 1-4 cannot have a dot +#define PERLNUM_HEX			2 +#define PERLNUM_OCTAL		3 +#define PERLNUM_FLOAT_EXP	4	// exponent part only +#define PERLNUM_DECIMAL		5	// 1-5 are numbers; 6-7 are strings +#define PERLNUM_VECTOR		6 +#define PERLNUM_V_VECTOR	7 +#define PERLNUM_BAD			8 -static inline bool isEOLChar(char ch) { -	return (ch == '\r') || (ch == '\n'); -} - -static bool isSingleCharOp(char ch) { -	char strCharSet[2]; -	strCharSet[0] = ch; -	strCharSet[1] = '\0'; -	return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet)); -} +#define BACK_NONE		0	// lookback state for bareword disambiguation: +#define BACK_OPERATOR	1	// whitespace/comments are insignificant +#define BACK_KEYWORD	2	// operators/keywords are needed for disambiguation -static inline bool isPerlOperator(char ch) { -	if (ch == '^' || ch == '&' || ch == '\\' || -	        ch == '(' || ch == ')' || ch == '-' || ch == '+' || -	        ch == '=' || ch == '|' || ch == '{' || ch == '}' || -	        ch == '[' || ch == ']' || ch == ':' || ch == ';' || -	        ch == '>' || ch == ',' || -	        ch == '?' || ch == '!' || ch == '.' || ch == '~') -		return true; -	// these chars are already tested before this call -	// ch == '%' || ch == '*' || ch == '<' || ch == '/' || -	return false; -} - -static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { +static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) +{ +	// old-style keyword matcher; needed because GetCurrent() needs +	// current segment to be committed, but we may abandon early...  	char s[100]; -    unsigned int i, len = end - start; -    if (len > 30) { len = 30; } +	unsigned int i, len = end - start; +	if (len > 30) { len = 30; }  	for (i = 0; i < len; i++, start++) s[i] = styler[start]; -    s[i] = '\0'; +	s[i] = '\0';  	return keywords.InList(s);  } -// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values -// Note: iswordchar() was used in only one place in LexPerl, it is -// unnecessary as '.' is processed as the concatenation operator, so -// only isWordStart() is used in LexPerl - -static inline bool isWordStart(char ch) { -	return !isascii(ch) || isalnum(ch) || ch == '_'; +static int disambiguateBareword(Accessor &styler, unsigned int bk, unsigned int fw, +                                int backFlag, unsigned int backPos, unsigned int endPos) +{ +	// identifiers are recognized by Perl as barewords under some +	// conditions, the following attempts to do the disambiguation +	// by looking backward and forward; result in 2 LSB +	int result = 0; +	bool moreback = false;		// true if passed newline/comments +	bool brace = false;			// true if opening brace found +	// if BACK_NONE, neither operator nor keyword, so skip test +	if (backFlag == BACK_NONE) +		return result; +	// first look backwards past whitespace/comments to set EOL flag +	// (some disambiguation patterns must be on a single line) +	if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk)))) +		moreback = true; +	// look backwards at last significant lexed item for disambiguation +	bk = backPos - 1; +	int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); +	if (ch == '{' && !moreback) { +		// {bareword: possible variable spec +		brace = true; +	} else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&') +			// &bareword: subroutine call +			   || styler.Match(bk - 1, "->") +			// ->bareword: part of variable spec +			   || styler.Match(bk - 2, "sub")) { +			// sub bareword: subroutine declaration +			// (implied BACK_KEYWORD, no keywords end in 'sub'!) +		result |= 1; +	} +	// next, scan forward after word past tab/spaces only; +	// if ch isn't one of '[{(,' we can skip the test +	if ((ch == '{' || ch == '(' || ch == '['|| ch == ',') +		&& fw < endPos) { +		while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)), +			   IsASpaceOrTab(ch) && fw < endPos) { +			fw++; +		} +		if ((ch == '}' && brace) +			// {bareword}: variable spec +			|| styler.Match(fw, "=>")) { +			// [{(, bareword=>: hash literal +			result |= 2; +		} +	} +	return result;  } -static inline bool isEndVar(char ch) { -	return isascii(ch) && !isalnum(ch) && ch != '#' && ch != '$' && -	       ch != '_' && ch != '\''; +static void skipWhitespaceComment(Accessor &styler, unsigned int &p) +{ +	// when backtracking, we need to skip whitespace and comments +	int style; +	while ((p > 0) && (style = styler.StyleAt(p), +		   style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE)) +		p--;  } -static inline bool isNonQuote(char ch) { -	return !isascii(ch) || isalnum(ch) || ch == '_'; +static int styleBeforeBracePair(Accessor &styler, unsigned int bk) +{ +	// backtrack to find open '{' corresponding to a '}', balanced +	// return significant style to be tested for '/' disambiguation +	int braceCount = 1; +	if (bk == 0) +		return SCE_PL_DEFAULT; +	while (--bk > 0) { +		if (styler.StyleAt(bk) == SCE_PL_OPERATOR) { +			int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); +			if (bkch == ';') {	// early out +				break; +			} else if (bkch == '}') { +				braceCount++; +			} else if (bkch == '{') { +				if (--braceCount == 0) break; +			} +		} +	} +	if (bk > 0 && braceCount == 0) { +		// balanced { found, bk > 0, skip more whitespace/comments +		bk--; +		skipWhitespaceComment(styler, bk); +		return styler.StyleAt(bk); +	} +	return SCE_PL_DEFAULT;  } -static inline char actualNumStyle(int numberStyle) { -    if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { -        return SCE_PL_STRING; -    } else if (numberStyle == PERLNUM_BAD) { -        return SCE_PL_ERROR; -    } -    return SCE_PL_NUMBER; +static int styleCheckIdentifier(Accessor &styler, unsigned int bk) +{ +	// backtrack to classify sub-styles of identifier under test +	// return sub-style to be tested for '/' disambiguation +	if (styler.SafeGetCharAt(bk) == '>')	// inputsymbol, like <foo> +		return 1; +	// backtrack to check for possible "->" or "::" before identifier +	while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { +		bk--; +	} +	while (bk > 0) { +		int bkstyle = styler.StyleAt(bk); +		if (bkstyle == SCE_PL_DEFAULT +			|| bkstyle == SCE_PL_COMMENTLINE) { +			// skip whitespace, comments +		} else if (bkstyle == SCE_PL_OPERATOR) { +			// test for "->" and "::" +			if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::")) +				return 2; +		} else +			return 3;	// bare identifier +		bk--; +	} +	return 0;  } -static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { -	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { -		return false; +static int inputsymbolScan(Accessor &styler, unsigned int pos, unsigned int endPos) +{ +	// looks forward for matching > on same line; a bit ugly +	unsigned int fw = pos; +	while (++fw < endPos) { +		int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)); +		if (fwch == '\r' || fwch == '\n') { +			return 0; +		} else if (fwch == '>') { +			if (styler.Match(fw - 2, "<=>"))	// '<=>' case +				return 0; +			return fw - pos; +		}  	} -	while (*val) { -		if (*val != styler[pos++]) { -			return false; +	return 0; +} + +static int podLineScan(Accessor &styler, unsigned int &pos, unsigned int endPos) +{ +	// forward scan the current line to classify line for POD style +	int state = -1; +	while (pos <= endPos) { +		int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); +		if (ch == '\n' || ch == '\r' || pos >= endPos) { +			if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++; +			break;  		} -		val++; +		if (IsASpaceOrTab(ch)) {	// whitespace, take note +			if (state == -1) +				state = SCE_PL_DEFAULT; +		} else if (state == SCE_PL_DEFAULT) {	// verbatim POD line +			state = SCE_PL_POD_VERB; +		} else if (state != SCE_PL_POD_VERB) {	// regular POD line +			state = SCE_PL_POD; +		} +		pos++; +	} +	if (state == -1) +		state = SCE_PL_DEFAULT; +	return state; +} + +static bool styleCheckSubPrototype(Accessor &styler, unsigned int bk) +{ +	// backtrack to identify if we're starting a subroutine prototype +	// we also need to ignore whitespace/comments: +	// 'sub' [whitespace|comment] <identifier> [whitespace|comment] +	styler.Flush(); +	skipWhitespaceComment(styler, bk); +	if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER)	// check identifier +		return false; +	while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) { +		bk--;  	} +	skipWhitespaceComment(styler, bk); +	if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD	// check "sub" keyword +		|| !styler.Match(bk - 2, "sub"))	// assume suffix is unique! +		return false;  	return true;  } -static char opposite(char ch) { -	if (ch == '(') -		return ')'; -	if (ch == '[') -		return ']'; -	if (ch == '{') -		return '}'; -	if (ch == '<') -		return '>'; +static bool isMatch(const char *sref, char *s) +{ +	// match per-line delimiter - must kill trailing CR if CRLF +	if (s[strlen(s) - 1] == '\r') +		s[strlen(s) - 1] = '\0'; +	return (strcmp(sref, s) == 0); +} + +static int actualNumStyle(int numberStyle) { +	if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { +		return SCE_PL_STRING; +	} else if (numberStyle == PERLNUM_BAD) { +		return SCE_PL_ERROR; +	} +	return SCE_PL_NUMBER; +} + +static int opposite(int ch) { +	if (ch == '(') return ')'; +	if (ch == '[') return ']'; +	if (ch == '{') return '}'; +	if (ch == '<') return '>';  	return ch;  }  static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                               WordList *keywordlists[], Accessor &styler) { -	// Lexer for perl often has to backtrack to start of current style to determine -	// which characters are being used as quotes, how deeply nested is the -	// start position and what the termination string is for here documents -  	WordList &keywords = *keywordlists[0]; -    // keywords that forces /PATTERN/ at all times -    WordList reWords; -    reWords.Set("elsif if split while"); +	// keywords that forces /PATTERN/ at all times; should track vim's behaviour +	WordList reWords; +	reWords.Set("elsif if split while"); -	class HereDocCls { +	// charset classes +	CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); +	CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true); +	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC"); +	// lexing of "%*</" operators is non-trivial; these are missing in the set below +	CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~"); +	CharacterSet setQDelim(CharacterSet::setNone, "qrwx"); +	CharacterSet setModifiers(CharacterSet::setAlpha); +	CharacterSet setPreferRE(CharacterSet::setNone, "*/<%"); +	// setArray and setHash also accepts chars for special vars like $_, +	// which are then truncated when the next char does not match setVar +	CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true); +	CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true); +	CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true); +	CharacterSet &setPOD = setModifiers; +	CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@"); +	CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_"); +	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];"); +	// for format identifiers +	CharacterSet setFormatStart(CharacterSet::setAlpha, "_="); +	CharacterSet &setFormat = setHereDocDelim; + +	// Lexer for perl often has to backtrack to start of current style to determine +	// which characters are being used as quotes, how deeply nested is the +	// start position and what the termination string is for HERE documents. + +	class HereDocCls {	// Class to manage HERE doc sequence  	public:  		int State;		// 0: '<<' encountered -		// 1: collect the delimiter -		// 2: here doc text (lines after the delimiter) -		char Quote;		// the char after '<<' +						// 1: collect the delimiter +						// 2: here doc text (lines after the delimiter) +		int Quote;		// the char after '<<'  		bool Quoted;		// true if Quote in ('\'','"','`')  		int DelimiterLength;	// strlen(Delimiter)  		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf  		HereDocCls() {  			State = 0; -            Quote = 0; -            Quoted = false; +			Quote = 0; +			Quoted = false;  			DelimiterLength = 0;  			Delimiter = new char[HERE_DELIM_MAX];  			Delimiter[0] = '\0';  		} +		void Append(int ch) { +			Delimiter[DelimiterLength++] = static_cast<char>(ch); +			Delimiter[DelimiterLength] = '\0'; +		}  		~HereDocCls() {  			delete []Delimiter;  		}  	}; -	HereDocCls HereDoc;	// TODO: FIFO for stacked here-docs +	HereDocCls HereDoc;		// TODO: FIFO for stacked here-docs -	class QuoteCls { +	class QuoteCls {	// Class to manage quote pairs  		public: -		int  Rep; -		int  Count; -		char Up; -		char Down; +		int Rep; +		int Count; +		int Up, Down;  		QuoteCls() {  			this->New(1);  		} -		void New(int r) { +		void New(int r = 1) {  			Rep   = r;  			Count = 0;  			Up    = '\0';  			Down  = '\0';  		} -		void Open(char u) { +		void Open(int u) {  			Count++;  			Up    = u;  			Down  = opposite(Up); @@ -183,525 +353,700 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  	};  	QuoteCls Quote; -	int state = initStyle; -	char numState = PERLNUM_DECIMAL; +	// additional state for number lexing +	int numState = PERLNUM_DECIMAL;  	int dotCount = 0; -	unsigned int lengthDoc = startPos + length; -	//int sookedpos = 0; // these have no apparent use, see POD state -	//char sooked[100]; -	//sooked[sookedpos] = '\0'; -	styler.StartAt(startPos, static_cast<char>(STYLE_MAX)); -	// If in a long distance lexical state, seek to the beginning to find quote characters -	// Perl strings can be multi-line with embedded newlines, so backtrack. -	// Perl numbers have additional state during lexing, so backtrack too. -	if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) { -		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) { +	unsigned int endPos = startPos + length; + +	// Backtrack to beginning of style if required... +	// If in a long distance lexical state, backtrack to find quote characters. +	// Includes strings (may be multi-line), numbers (additional state), format +	// bodies, as well as POD sections. +	if (initStyle == SCE_PL_HERE_Q +		|| initStyle == SCE_PL_HERE_QQ +		|| initStyle == SCE_PL_HERE_QX +		|| initStyle == SCE_PL_FORMAT +	) { +		int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM; +		while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {  			startPos--;  		}  		startPos = styler.LineStart(styler.GetLine(startPos)); -		state = styler.StyleAt(startPos - 1); +		initStyle = styler.StyleAt(startPos - 1);  	} -    // Backtrack for format body. -	if (state == SCE_PL_FORMAT) { -		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) { +	if (initStyle == SCE_PL_STRING_Q +		|| initStyle == SCE_PL_STRING_QQ +		|| initStyle == SCE_PL_STRING_QX +		|| initStyle == SCE_PL_STRING_QR +		|| initStyle == SCE_PL_STRING_QW +		|| initStyle == SCE_PL_REGEX +		|| initStyle == SCE_PL_REGSUBST +		|| initStyle == SCE_PL_STRING +		|| initStyle == SCE_PL_BACKTICKS +		|| initStyle == SCE_PL_CHARACTER +		|| initStyle == SCE_PL_NUMBER +		|| initStyle == SCE_PL_IDENTIFIER +		|| initStyle == SCE_PL_ERROR +		|| initStyle == SCE_PL_SUB_PROTOTYPE +	) { +		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {  			startPos--;  		} -		startPos = styler.LineStart(styler.GetLine(startPos)); -		state = styler.StyleAt(startPos - 1); -	} -	if ( state == SCE_PL_STRING_Q -	|| state == SCE_PL_STRING_QQ -	|| state == SCE_PL_STRING_QX -	|| state == SCE_PL_STRING_QR -	|| state == SCE_PL_STRING_QW -	|| state == SCE_PL_REGEX -	|| state == SCE_PL_REGSUBST -	|| state == SCE_PL_STRING -	|| state == SCE_PL_BACKTICKS -	|| state == SCE_PL_CHARACTER -	|| state == SCE_PL_NUMBER -	|| state == SCE_PL_IDENTIFIER -    || state == SCE_PL_ERROR -    || state == SCE_PL_SUB_PROTOTYPE +		initStyle = SCE_PL_DEFAULT; +	} else if (initStyle == SCE_PL_POD +			   || initStyle == SCE_PL_POD_VERB  	) { -		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { -			startPos--; +		// POD backtracking finds preceeding blank lines and goes back past them +		int ln = styler.GetLine(startPos); +		if (ln > 0) { +			initStyle = styler.StyleAt(styler.LineStart(--ln)); +			if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) { +				while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT) +					ln--; +			} +			startPos = styler.LineStart(++ln); +			initStyle = styler.StyleAt(startPos - 1); +		} else { +			startPos = 0; +			initStyle = SCE_PL_DEFAULT;  		} -		state = SCE_PL_DEFAULT;  	} -    // lookback at start of lexing to set proper state for backflag -    // after this, they are updated when elements are lexed -    int backflag = BACK_NONE; -    unsigned int backPos = startPos; -    if (backPos > 0) { -        backPos--; -        int sty = SCE_PL_DEFAULT; -        while ((backPos > 0) && (sty = styler.StyleAt(backPos), -               sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE)) -            backPos--; -        if (sty == SCE_PL_OPERATOR) -            backflag = BACK_OPERATOR; -        else if (sty == SCE_PL_WORD) -            backflag = BACK_KEYWORD; -    } +	// backFlag, backPos are additional state to aid identifier corner cases. +	// Look backwards past whitespace and comments in order to detect either +	// operator or keyword. Later updated as we go along. +	int backFlag = BACK_NONE; +	unsigned int backPos = startPos; +	if (backPos > 0) { +		backPos--; +		skipWhitespaceComment(styler, backPos); +		if (styler.StyleAt(backPos) == SCE_PL_OPERATOR) +			backFlag = BACK_OPERATOR; +		else if (styler.StyleAt(backPos) == SCE_PL_WORD) +			backFlag = BACK_KEYWORD; +		backPos++; +	} -	styler.StartAt(startPos, static_cast<char>(STYLE_MAX)); -	char chPrev = styler.SafeGetCharAt(startPos - 1); -	if (startPos == 0) -		chPrev = '\n'; -	char chNext = styler[startPos]; -	styler.StartSegment(startPos); +	StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX)); -	for (unsigned int i = startPos; i < lengthDoc; i++) { -		char ch = chNext; -		// if the current character is not consumed due to the completion of an -		// earlier style, lexing can be restarted via a simple goto -	restartLexer: -		chNext = styler.SafeGetCharAt(i + 1); -		char chNext2 = styler.SafeGetCharAt(i + 2); +	for (; sc.More(); sc.Forward()) { -		if (styler.IsLeadByte(ch)) { -			chNext = styler.SafeGetCharAt(i + 2); -			chPrev = ' '; -			i += 1; -			continue; +		// Determine if the current state should terminate. +		switch (sc.state) { +			case SCE_PL_OPERATOR: +				sc.SetState(SCE_PL_DEFAULT); +				backFlag = BACK_OPERATOR; +				backPos = sc.currentPos; +				break; +			case SCE_PL_IDENTIFIER:		// identifier, bareword, inputsymbol +				if ((!setWord.Contains(sc.ch) && sc.ch != '\'') +					|| sc.Match('.', '.') +					|| sc.chPrev == '>') {	// end of inputsymbol +					sc.SetState(SCE_PL_DEFAULT); +				} +				break; +			case SCE_PL_WORD:		// keyword, plus special cases +				if (!setWord.Contains(sc.ch)) { +					char s[100]; +					sc.GetCurrent(s, sizeof(s)); +					if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) { +						sc.ChangeState(SCE_PL_DATASECTION); +					} else { +						if ((strcmp(s, "format") == 0)) { +							sc.SetState(SCE_PL_FORMAT_IDENT); +							HereDoc.State = 0; +						} else { +							sc.SetState(SCE_PL_DEFAULT); +						} +						backFlag = BACK_KEYWORD; +						backPos = sc.currentPos; +					} +				} +				break; +			case SCE_PL_SCALAR: +			case SCE_PL_ARRAY: +			case SCE_PL_HASH: +			case SCE_PL_SYMBOLTABLE: +				if (sc.Match(':', ':')) {	// skip :: +					sc.Forward(); +				} else if (!setVar.Contains(sc.ch)) { +					if (sc.LengthCurrent() == 1) { +						// Special variable: $(, $_ etc. +						sc.Forward(); +					} +					sc.SetState(SCE_PL_DEFAULT); +				} +				break; +			case SCE_PL_NUMBER: +				// if no early break, number style is terminated at "(go through)" +				if (sc.ch == '.') { +					if (sc.chNext == '.') { +						// double dot is always an operator (go through) +					} else if (numState <= PERLNUM_FLOAT_EXP) { +						// non-decimal number or float exponent, consume next dot +						sc.SetState(SCE_PL_OPERATOR); +						break; +					} else {	// decimal or vectors allows dots +						dotCount++; +						if (numState == PERLNUM_DECIMAL) { +							if (dotCount <= 1)	// number with one dot in it +								break; +							if (IsADigit(sc.chNext)) {	// really a vector +								numState = PERLNUM_VECTOR; +								break; +							} +							// number then dot (go through) +						} else if (IsADigit(sc.chNext))	// vectors +							break; +						// vector then dot (go through) +					} +				} else if (sc.ch == '_') { +					// permissive underscoring for number and vector literals +					break; +				} else if (numState == PERLNUM_DECIMAL) { +					if (sc.ch == 'E' || sc.ch == 'e') {	// exponent, sign +						numState = PERLNUM_FLOAT_EXP; +						if (sc.chNext == '+' || sc.chNext == '-') { +							sc.Forward(); +						} +						break; +					} else if (IsADigit(sc.ch)) +						break; +					// number then word (go through) +				} else if (numState == PERLNUM_HEX) { +					if (IsADigit(sc.ch, 16)) +						break; +				} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { +					if (IsADigit(sc.ch))	// vector +						break; +					if (setWord.Contains(sc.ch) && dotCount == 0) {	// change to word +						sc.ChangeState(SCE_PL_IDENTIFIER); +						break; +					} +					// vector then word (go through) +				} else if (IsADigit(sc.ch)) { +					if (numState == PERLNUM_FLOAT_EXP) { +						break; +					} else if (numState == PERLNUM_OCTAL) { +						if (sc.ch <= '7') break; +					} else if (numState == PERLNUM_BINARY) { +						if (sc.ch <= '1') break; +					} +					// mark invalid octal, binary numbers (go through) +					numState = PERLNUM_BAD; +					break; +				} +				// complete current number or vector +				sc.ChangeState(actualNumStyle(numState)); +				sc.SetState(SCE_PL_DEFAULT); +				break; +			case SCE_PL_COMMENTLINE: +				if (sc.atLineEnd) { +					sc.SetState(SCE_PL_DEFAULT); +				} +				break; +			case SCE_PL_HERE_DELIM: +				if (HereDoc.State == 0) { // '<<' encountered +					int delim_ch = sc.chNext; +					int ws_skip = 0; +					HereDoc.State = 1;	// pre-init HERE doc class +					HereDoc.Quote = sc.chNext; +					HereDoc.Quoted = false; +					HereDoc.DelimiterLength = 0; +					HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; +					if (IsASpaceOrTab(delim_ch)) { +						// skip whitespace; legal only for quoted delimiters +						unsigned int i = sc.currentPos + 1; +						while ((i < endPos) && IsASpaceOrTab(delim_ch)) { +							i++; +							delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i)); +						} +						ws_skip = i - sc.currentPos - 1; +					} +					if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') { +						// a quoted here-doc delimiter; skip any whitespace +						sc.Forward(ws_skip + 1); +						HereDoc.Quote = delim_ch; +						HereDoc.Quoted = true; +					} else if (ws_skip == 0 && setNonHereDoc.Contains(sc.chNext) +							   || ws_skip > 0) { +						// left shift << or <<= operator cases +						// restore position if operator +						sc.ChangeState(SCE_PL_OPERATOR); +						sc.ForwardSetState(SCE_PL_DEFAULT); +						backFlag = BACK_OPERATOR; +						backPos = sc.currentPos; +						HereDoc.State = 0; +					} else { +						// specially handle initial '\' for identifier +						if (ws_skip == 0 && HereDoc.Quote == '\\') +							sc.Forward(); +						// an unquoted here-doc delimiter, no special handling +						// (cannot be prefixed by spaces/tabs), or +						// symbols terminates; deprecated zero-length delimiter +					} +				} else if (HereDoc.State == 1) { // collect the delimiter +					backFlag = BACK_NONE; +					if (HereDoc.Quoted) { // a quoted here-doc delimiter +						if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter +							sc.ForwardSetState(SCE_PL_DEFAULT); +						} else if (!sc.atLineEnd) { +							if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote +								sc.Forward(); +							} +							if (sc.ch != '\r') {	// skip CR if CRLF +								HereDoc.Append(sc.ch); +							} +						} +					} else { // an unquoted here-doc delimiter +						if (setHereDocDelim.Contains(sc.ch)) { +							HereDoc.Append(sc.ch); +						} else { +							sc.SetState(SCE_PL_DEFAULT); +						} +					} +					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { +						sc.SetState(SCE_PL_ERROR); +						HereDoc.State = 0; +					} +				} +				break; +			case SCE_PL_HERE_Q: +			case SCE_PL_HERE_QQ: +			case SCE_PL_HERE_QX: { +				// also implies HereDoc.State == 2 +				sc.Complete(); +				while (!sc.atLineEnd) +					sc.Forward(); +				char s[HERE_DELIM_MAX]; +				sc.GetCurrent(s, sizeof(s)); +				if (isMatch(HereDoc.Delimiter, s)) { +					sc.SetState(SCE_PL_DEFAULT); +					backFlag = BACK_NONE; +					HereDoc.State = 0; +				} +				} break; +			case SCE_PL_POD: +			case SCE_PL_POD_VERB: { +				unsigned int fw = sc.currentPos; +				int ln = styler.GetLine(fw); +				if (sc.atLineStart && sc.Match("=cut")) {	// end of POD +					sc.SetState(SCE_PL_POD); +					sc.Forward(4); +					sc.SetState(SCE_PL_DEFAULT); +					styler.SetLineState(ln, SCE_PL_POD); +					break; +				} +				int pod = podLineScan(styler, fw, endPos);	// classify POD line +				styler.SetLineState(ln, pod); +				if (pod == SCE_PL_DEFAULT) { +					if (sc.state == SCE_PL_POD_VERB) { +						unsigned int fw2 = fw; +						while (fw2 <= endPos && pod == SCE_PL_DEFAULT) { +							fw = fw2++;	// penultimate line (last blank line) +							pod = podLineScan(styler, fw2, endPos); +							styler.SetLineState(styler.GetLine(fw2), pod); +						} +						if (pod == SCE_PL_POD) {	// truncate verbatim POD early +							sc.SetState(SCE_PL_POD); +						} else +							fw = fw2; +					} else +						pod = SCE_PL_POD; +				} else { +					if (pod == SCE_PL_POD_VERB	// still part of current paragraph +					    && (styler.GetLineState(ln - 1) == SCE_PL_POD)) { +						pod = SCE_PL_POD; +						styler.SetLineState(ln, pod); +					} else if (pod == SCE_PL_POD +							   && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) { +						pod = SCE_PL_POD_VERB; +						styler.SetLineState(ln, pod); +					} +					sc.SetState(pod); +				} +				sc.Forward(fw - sc.currentPos);	// commit style +				} break; +			case SCE_PL_REGEX: +			case SCE_PL_STRING_QR: +				if (Quote.Rep <= 0) { +					if (!setModifiers.Contains(sc.ch)) +						sc.SetState(SCE_PL_DEFAULT); +				} else if (!Quote.Up && !IsASpace(sc.ch)) { +					Quote.Open(sc.ch); +				} else if (sc.ch == '\\' && Quote.Up != '\\') { +					sc.Forward(); +				} else if (sc.ch == Quote.Down) { +					Quote.Count--; +					if (Quote.Count == 0) +						Quote.Rep--; +				} else if (sc.ch == Quote.Up) { +					Quote.Count++; +				} +				break; +			case SCE_PL_REGSUBST: +				if (Quote.Rep <= 0) { +					if (!setModifiers.Contains(sc.ch)) +						sc.SetState(SCE_PL_DEFAULT); +				} else if (!Quote.Up && !IsASpace(sc.ch)) { +					Quote.Open(sc.ch); +				} else if (sc.ch == '\\' && Quote.Up != '\\') { +					sc.Forward(); +				} else if (Quote.Count == 0 && Quote.Rep == 1) { +					// We matched something like s(...) or tr{...}, Perl 5.10 +					// appears to allow almost any character for use as the +					// next delimiters. Whitespace and comments are accepted in +					// between, but we'll limit to whitespace here. +					// For '#', if no whitespace in between, it's a delimiter. +					if (IsASpace(sc.ch)) { +						// Keep going +					} else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) { +						sc.SetState(SCE_PL_DEFAULT); +					} else { +						Quote.Open(sc.ch); +					} +				} else if (sc.ch == Quote.Down) { +					Quote.Count--; +					if (Quote.Count == 0) +						Quote.Rep--; +					if (Quote.Up == Quote.Down) +						Quote.Count++; +				} else if (sc.ch == Quote.Up) { +					Quote.Count++; +				} +				break; +			case SCE_PL_STRING_Q: +			case SCE_PL_STRING_QQ: +			case SCE_PL_STRING_QX: +			case SCE_PL_STRING_QW: +			case SCE_PL_STRING: +			case SCE_PL_CHARACTER: +			case SCE_PL_BACKTICKS: +				if (!Quote.Down && !IsASpace(sc.ch)) { +					Quote.Open(sc.ch); +				} else if (sc.ch == '\\' && Quote.Up != '\\') { +					sc.Forward(); +				} else if (sc.ch == Quote.Down) { +					Quote.Count--; +					if (Quote.Count == 0) +						sc.ForwardSetState(SCE_PL_DEFAULT); +				} else if (sc.ch == Quote.Up) { +					Quote.Count++; +				} +				break; +			case SCE_PL_SUB_PROTOTYPE: { +				int i = 0; +				// forward scan; must all be valid proto characters +				while (setSubPrototype.Contains(sc.GetRelative(i))) +					i++; +				if (sc.GetRelative(i) == ')') {	// valid sub prototype +					sc.Forward(i); +					sc.ForwardSetState(SCE_PL_DEFAULT); +				} else { +					// abandon prototype, restart from '(' +					sc.ChangeState(SCE_PL_OPERATOR); +					sc.SetState(SCE_PL_DEFAULT); +				} +				} break; +			case SCE_PL_FORMAT: { +				sc.Complete(); +				while (!sc.atLineEnd) +					sc.Forward(); +				char s[10]; +				sc.GetCurrent(s, sizeof(s)); +				if (isMatch(".", s)) +					sc.SetState(SCE_PL_DEFAULT); +				} break; +			case SCE_PL_ERROR: +				break;  		} -		if ((chPrev == '\r' && ch == '\n')) {	// skip on DOS/Windows -			styler.ColourTo(i, state); -			chPrev = ch; -			continue; +		// Needed for specific continuation styles (one follows the other) +		switch (sc.state) { +			// continued from SCE_PL_WORD +			case SCE_PL_FORMAT_IDENT: +				// occupies HereDoc state 3 to avoid clashing with HERE docs +				if (IsASpaceOrTab(sc.ch)) {		// skip whitespace +					sc.ChangeState(SCE_PL_DEFAULT); +					while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) +						sc.Forward(); +					sc.SetState(SCE_PL_FORMAT_IDENT); +				} +				if (setFormatStart.Contains(sc.ch)) {	// identifier or '=' +					if (sc.ch != '=') { +						do { +							sc.Forward(); +						} while (setFormat.Contains(sc.ch)); +					} +					while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) +						sc.Forward(); +					if (sc.ch == '=') { +						sc.ForwardSetState(SCE_PL_DEFAULT); +						HereDoc.State = 3; +					} else { +						// invalid indentifier; inexact fallback, but hey +						sc.ChangeState(SCE_PL_IDENTIFIER); +						sc.SetState(SCE_PL_DEFAULT); +					} +				} else { +					sc.ChangeState(SCE_PL_DEFAULT);	// invalid indentifier +				} +				backFlag = BACK_NONE; +				break;  		} -		if (HereDoc.State == 1 && isEOLChar(ch)) { +		// Must check end of HereDoc states here before default state is handled +		if (HereDoc.State == 1 && sc.atLineEnd) {  			// Begin of here-doc (the line after the here-doc delimiter):  			// Lexically, the here-doc starts from the next line after the >>, but the  			// first line of here-doc seem to follow the style of the last EOL sequence +			int st_new = SCE_PL_HERE_QQ;  			HereDoc.State = 2;  			if (HereDoc.Quoted) { -				if (state == SCE_PL_HERE_DELIM) { +				if (sc.state == SCE_PL_HERE_DELIM) {  					// Missing quote at end of string! We are stricter than perl.  					// Colour here-doc anyway while marking this bit as an error. -					state = SCE_PL_ERROR; +					sc.ChangeState(SCE_PL_ERROR);  				} -				styler.ColourTo(i - 1, state);  				switch (HereDoc.Quote) { -				case '\'': -					state = SCE_PL_HERE_Q ; -					break; -				case '"': -					state = SCE_PL_HERE_QQ; -					break; -				case '`': -					state = SCE_PL_HERE_QX; -					break; +					case '\'': st_new = SCE_PL_HERE_Q ; break; +					case '"' : st_new = SCE_PL_HERE_QQ; break; +					case '`' : st_new = SCE_PL_HERE_QX; break;  				}  			} else { -				styler.ColourTo(i - 1, state); -				switch (HereDoc.Quote) { -				case '\\': -					state = SCE_PL_HERE_Q ; -					break; -				default : -					state = SCE_PL_HERE_QQ; -				} +				if (HereDoc.Quote == '\\') +					st_new = SCE_PL_HERE_Q;  			} +			sc.SetState(st_new); +		} +		if (HereDoc.State == 3 && sc.atLineEnd) { +			// Start of format body. +			HereDoc.State = 0; +			sc.SetState(SCE_PL_FORMAT);  		} -        if (HereDoc.State == 4 && isEOLChar(ch)) { -            // Start of format body. -            HereDoc.State = 0; -            styler.ColourTo(i - 1, state); -            state = SCE_PL_FORMAT; -        } -		if (state == SCE_PL_DEFAULT) { -			if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) && -				(ch == '.' || ch == 'v'))) { -				state = SCE_PL_NUMBER; -                backflag = BACK_NONE; +		// Determine if a new state should be entered. +		if (sc.state == SCE_PL_DEFAULT) { +			if (IsADigit(sc.ch) || +				(IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) { +				sc.SetState(SCE_PL_NUMBER); +				backFlag = BACK_NONE;  				numState = PERLNUM_DECIMAL;  				dotCount = 0; -				if (ch == '0') {	// hex,bin,octal -					if (chNext == 'x') { +				if (sc.ch == '0') {		// hex,bin,octal +					if (sc.chNext == 'x') {  						numState = PERLNUM_HEX; -					} else if (chNext == 'b') { -                        numState = PERLNUM_BINARY; -                    } else if (isascii(chNext) && isdigit(chNext)) { -                        numState = PERLNUM_OCTAL; -                    } -                    if (numState != PERLNUM_DECIMAL) { -						i++; -						ch = chNext; -						chNext = chNext2; -                    } -				} else if (ch == 'v') {	// vector +					} else if (sc.chNext == 'b') { +						numState = PERLNUM_BINARY; +					} else if (IsADigit(sc.chNext)) { +						numState = PERLNUM_OCTAL; +					} +					if (numState != PERLNUM_DECIMAL) { +						sc.Forward(); +					} +				} else if (sc.ch == 'v') {		// vector  					numState = PERLNUM_V_VECTOR;  				} -			} else if (isWordStart(ch)) { -                // if immediately prefixed by '::', always a bareword -                state = SCE_PL_WORD; -                if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') { -                    state = SCE_PL_IDENTIFIER; -                } -                unsigned int kw = i + 1; -                // first check for possible quote-like delimiter -				if (ch == 's' && !isNonQuote(chNext)) { -					state = SCE_PL_REGSUBST; +			} else if (setWord.Contains(sc.ch)) { +				// if immediately prefixed by '::', always a bareword +				sc.SetState(SCE_PL_WORD); +				if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') { +					sc.ChangeState(SCE_PL_IDENTIFIER); +				} +				unsigned int bk = sc.currentPos; +				unsigned int fw = sc.currentPos + 1; +				// first check for possible quote-like delimiter +				if (sc.ch == 's' && !setWord.Contains(sc.chNext)) { +					sc.ChangeState(SCE_PL_REGSUBST);  					Quote.New(2); -				} else if (ch == 'm' && !isNonQuote(chNext)) { -					state = SCE_PL_REGEX; -					Quote.New(1); -				} else if (ch == 'q' && !isNonQuote(chNext)) { -					state = SCE_PL_STRING_Q; -					Quote.New(1); -				} else if (ch == 'y' && !isNonQuote(chNext)) { -					state = SCE_PL_REGSUBST; +				} else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) { +					sc.ChangeState(SCE_PL_REGEX); +					Quote.New(); +				} else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) { +					sc.ChangeState(SCE_PL_STRING_Q); +					Quote.New(); +				} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { +					sc.ChangeState(SCE_PL_REGSUBST);  					Quote.New(2); -				} else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) { -					state = SCE_PL_REGSUBST; +				} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { +					sc.ChangeState(SCE_PL_REGSUBST);  					Quote.New(2); -                    kw++; -				} else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) { -					if      (chNext == 'q') state = SCE_PL_STRING_QQ; -					else if (chNext == 'x') state = SCE_PL_STRING_QX; -					else if (chNext == 'r') state = SCE_PL_STRING_QR; -					else if (chNext == 'w') state = SCE_PL_STRING_QW; -					Quote.New(1); -                    kw++; -				} else if (ch == 'x' && (chNext == '=' ||	// repetition -                           !isWordStart(chNext) || -                           (isdigit(chPrev) && isdigit(chNext)))) { -                    state = SCE_PL_OPERATOR; -                } -                // if potentially a keyword, scan forward and grab word, then check -                // if it's really one; if yes, disambiguation test is performed -                // otherwise it is always a bareword and we skip a lot of scanning -                // note: keywords assumed to be limited to [_a-zA-Z] only -                if (state == SCE_PL_WORD) { -                    while (isWordStart(styler.SafeGetCharAt(kw))) kw++; -                    if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) { -                        state = SCE_PL_IDENTIFIER; -                    } -                } -                // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this -                // for quote-like delimiters/keywords, attempt to disambiguate -                // to select for bareword, change state -> SCE_PL_IDENTIFIER -                if (state != SCE_PL_IDENTIFIER && i > 0) { -                    unsigned int j = i; -                    bool moreback = false;      // true if passed newline/comments -                    bool brace = false;         // true if opening brace found -                    char ch2; -                    // first look backwards past whitespace/comments for EOLs -                    // if BACK_NONE, neither operator nor keyword, so skip test -                    if (backflag != BACK_NONE) { -                        while (--j > backPos) { -                            if (isEOLChar(styler.SafeGetCharAt(j))) -                                moreback = true; -                        } -                        ch2 = styler.SafeGetCharAt(j); -                        if (ch2 == '{' && !moreback) { -                            // {bareword: possible variable spec -                            brace = true; -                        } else if ((ch2 == '&' && styler.SafeGetCharAt(j - 1) != '&') -                                // &bareword: subroutine call -                                || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-') -                                // ->bareword: part of variable spec -                                || (ch2 == 'b' && styler.Match(j - 2, "su"))) { -                                // sub bareword: subroutine declaration -                                // (implied BACK_KEYWORD, no keywords end in 'sub'!) -                            state = SCE_PL_IDENTIFIER; -                        } -                        // if status still ambiguous, look forward after word past -                        // tabs/spaces only; if ch2 isn't one of '[{(,' it can never -                        // match anything, so skip the whole thing -                        j = kw; -                        if (state != SCE_PL_IDENTIFIER -                            && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',') -                            && kw < lengthDoc) { -                            while (ch2 = styler.SafeGetCharAt(j), -                                   (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) { -                                j++; -                            } -                            if ((ch2 == '}' && brace) -                             // {bareword}: variable spec -                             || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) { -                             // [{(, bareword=>: hash literal -                                state = SCE_PL_IDENTIFIER; -                            } -                        } -                    } -                } -                backflag = BACK_NONE; -                // an identifier or bareword -                if (state == SCE_PL_IDENTIFIER) { -                    if ((!isWordStart(chNext) && chNext != '\'') -                        || (chNext == '.' && chNext2 == '.')) { -                        // We need that if length of word == 1! -                        // This test is copied from the SCE_PL_WORD handler. -                        styler.ColourTo(i, SCE_PL_IDENTIFIER); -                        state = SCE_PL_DEFAULT; -                    } -                // a keyword -                } else if (state == SCE_PL_WORD) { -                    i = kw - 1; -                    if (ch == '_' && chNext == '_' && -                        (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__") -                      || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) { -                        styler.ColourTo(i, SCE_PL_DATASECTION); -                        state = SCE_PL_DATASECTION; -                    } else { -                        if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "format")) { -                            state = SCE_PL_FORMAT_IDENT; -                            HereDoc.State = 0; -                        } else { -                            state = SCE_PL_DEFAULT; -                        } -                        styler.ColourTo(i, SCE_PL_WORD); -                        backflag = BACK_KEYWORD; -                        backPos = i; -                    } -                    ch = styler.SafeGetCharAt(i); -                    chNext = styler.SafeGetCharAt(i + 1); -                // a repetition operator 'x' -                } else if (state == SCE_PL_OPERATOR) { -                    state = SCE_PL_DEFAULT; -                    goto handleOperator; -                // quote-like delimiter, skip one char if double-char delimiter -                } else { -                    i = kw - 1; -                    chNext = styler.SafeGetCharAt(i + 1); -                } -			} else if (ch == '#') { -				state = SCE_PL_COMMENTLINE; -			} else if (ch == '\"') { -				state = SCE_PL_STRING; -				Quote.New(1); -				Quote.Open(ch); -                backflag = BACK_NONE; -			} else if (ch == '\'') { -				if (chPrev == '&') { -					// Archaic call -					styler.ColourTo(i, state); -				} else { -					state = SCE_PL_CHARACTER; -					Quote.New(1); -					Quote.Open(ch); +					sc.Forward(); +					fw++; +				} else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext) +						   && !setWord.Contains(sc.GetRelative(2))) { +					if      (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ); +					else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX); +					else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR); +					else sc.ChangeState(SCE_PL_STRING_QW);	// sc.chNext == 'w' +					Quote.New(); +					sc.Forward(); +					fw++; +				} else if (sc.ch == 'x' && (sc.chNext == '=' ||	// repetition +						   !setWord.Contains(sc.chNext) || +						   (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) { +					sc.ChangeState(SCE_PL_OPERATOR);  				} -                backflag = BACK_NONE; -			} else if (ch == '`') { -				state = SCE_PL_BACKTICKS; -				Quote.New(1); -				Quote.Open(ch); -                backflag = BACK_NONE; -			} else if (ch == '$') { -				if ((chNext == '{') || isspacechar(chNext)) { -					styler.ColourTo(i, SCE_PL_SCALAR); -				} else { -					state = SCE_PL_SCALAR; -					if ((chNext == '`' && chNext2 == '`') -                     || (chNext == ':' && chNext2 == ':')) { -						i += 2; -						ch = styler.SafeGetCharAt(i); -						chNext = styler.SafeGetCharAt(i + 1); -					} else { -						i++; -						ch = chNext; -						chNext = chNext2; +				// if potentially a keyword, scan forward and grab word, then check +				// if it's really one; if yes, disambiguation test is performed +				// otherwise it is always a bareword and we skip a lot of scanning +				if (sc.state == SCE_PL_WORD) { +					while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw)))) +						fw++; +					if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) { +						sc.ChangeState(SCE_PL_IDENTIFIER);  					}  				} -                backflag = BACK_NONE; -			} else if (ch == '@') { -				if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$' -					|| chNext == '_' || chNext == '+' || chNext == '-') { -					state = SCE_PL_ARRAY; -                } else if (chNext == ':' && chNext2 == ':') { -                    state = SCE_PL_ARRAY; -                    i += 2; -                    ch = styler.SafeGetCharAt(i); -                    chNext = styler.SafeGetCharAt(i + 1); -				} else if (chNext != '{' && chNext != '[') { -					styler.ColourTo(i, SCE_PL_ARRAY); -				} else { -					styler.ColourTo(i, SCE_PL_ARRAY); +				// if already SCE_PL_IDENTIFIER, then no ambiguity, skip this +				// for quote-like delimiters/keywords, attempt to disambiguate +				// to select for bareword, change state -> SCE_PL_IDENTIFIER +				if (sc.state != SCE_PL_IDENTIFIER && bk > 0) { +					if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos)) +						sc.ChangeState(SCE_PL_IDENTIFIER);  				} -                backflag = BACK_NONE; -			} else if (ch == '%') { -                backflag = BACK_NONE; -				if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$' -                    || chNext == '_' || chNext == '!' || chNext == '^') { -					state = SCE_PL_HASH; -                    i++; -                    ch = chNext; -                    chNext = chNext2; -                } else if (chNext == ':' && chNext2 == ':') { -                    state = SCE_PL_HASH; -                    i += 2; -                    ch = styler.SafeGetCharAt(i); -                    chNext = styler.SafeGetCharAt(i + 1); -				} else if (chNext == '{') { -					styler.ColourTo(i, SCE_PL_HASH); +				backFlag = BACK_NONE; +			} else if (sc.ch == '#') { +				sc.SetState(SCE_PL_COMMENTLINE); +			} else if (sc.ch == '\"') { +				sc.SetState(SCE_PL_STRING); +				Quote.New(); +				Quote.Open(sc.ch); +				backFlag = BACK_NONE; +			} else if (sc.ch == '\'') { +				if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) { +					// Archaic call +					sc.SetState(SCE_PL_IDENTIFIER);  				} else { -                    goto handleOperator; +					sc.SetState(SCE_PL_CHARACTER); +					Quote.New(); +					Quote.Open(sc.ch);  				} -			} else if (ch == '*') { -                backflag = BACK_NONE; -                char strch[2]; -                strch[0] = chNext; -                strch[1] = '\0'; -                if (chNext == ':' && chNext2 == ':') { -                    state = SCE_PL_SYMBOLTABLE; -                    i += 2; -                    ch = styler.SafeGetCharAt(i); -                    chNext = styler.SafeGetCharAt(i + 1); -				} else if (!isascii(chNext) || isalpha(chNext) || chNext == '_' -                        || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) { -					state = SCE_PL_SYMBOLTABLE; -                    i++; -                    ch = chNext; -                    chNext = chNext2; -				} else if (chNext == '{') { -					styler.ColourTo(i, SCE_PL_SYMBOLTABLE); +				backFlag = BACK_NONE; +			} else if (sc.ch == '`') { +				sc.SetState(SCE_PL_BACKTICKS); +				Quote.New(); +				Quote.Open(sc.ch); +				backFlag = BACK_NONE; +			} else if (sc.ch == '$') { +				sc.SetState(SCE_PL_SCALAR); +				if (sc.chNext == '{') { +					sc.ForwardSetState(SCE_PL_OPERATOR); +				} else if (IsASpace(sc.chNext)) { +					sc.ForwardSetState(SCE_PL_DEFAULT);  				} else { -					if (chNext == '*') {	// exponentiation -						i++; -						ch = chNext; -						chNext = chNext2; +					sc.Forward(); +					if (sc.Match('`', '`') || sc.Match(':', ':')) { +						sc.Forward();  					} -                    goto handleOperator;  				} -			} else if (ch == '/' || (ch == '<' && chNext == '<')) { +				backFlag = BACK_NONE; +			} else if (sc.ch == '@') { +				sc.SetState(SCE_PL_ARRAY); +				if (setArray.Contains(sc.chNext)) { +					// no special treatment +				} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { +					sc.Forward(2); +				} else if (sc.chNext == '{' || sc.chNext == '[') { +					sc.ForwardSetState(SCE_PL_OPERATOR); +				} else { +					sc.ChangeState(SCE_PL_OPERATOR); +				} +				backFlag = BACK_NONE; +			} else if (setPreferRE.Contains(sc.ch)) {  				// Explicit backward peeking to set a consistent preferRE for  				// any slash found, so no longer need to track preferRE state.  				// Find first previous significant lexed element and interpret. -                // Test for HERE doc start '<<' shares this code, helps to -                // determine if it should be an operator. +				// A few symbols shares this code for disambiguation.  				bool preferRE = false; -                bool isHereDoc = (ch == '<'); -                bool hereDocSpace = false;      // these are for corner case: -                bool hereDocScalar = false;     // SCALAR [whitespace] '<<' -				unsigned int bk = (i > 0)? i - 1: 0; -                unsigned int bkend; -				char bkch; +				bool isHereDoc = sc.Match('<', '<'); +				bool hereDocSpace = false;		// for: SCALAR [whitespace] '<<' +				unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0; +				unsigned int bkend;  				styler.Flush(); -                if (styler.StyleAt(bk) == SCE_PL_DEFAULT) -                    hereDocSpace = true; -				while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT || -					styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { -					bk--; -				} +				if (styler.StyleAt(bk) == SCE_PL_DEFAULT) +					hereDocSpace = true; +				skipWhitespaceComment(styler, bk);  				if (bk == 0) { -					// position 0 won't really be checked; rarely happens -					// hard to fix due to an unsigned index i +					// avoid backward scanning breakage  					preferRE = true;  				} else {  					int bkstyle = styler.StyleAt(bk); -					bkch = styler.SafeGetCharAt(bk); +					int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));  					switch(bkstyle) {  					case SCE_PL_OPERATOR:  						preferRE = true;  						if (bkch == ')' || bkch == ']') {  							preferRE = false;  						} else if (bkch == '}') { -							// backtrack further, count balanced brace pairs -							// if a brace pair found, see if it's a variable -							int braceCount = 1; -							while (--bk > 0) { -								bkstyle = styler.StyleAt(bk); -								if (bkstyle == SCE_PL_OPERATOR) { -									bkch = styler.SafeGetCharAt(bk); -									if (bkch == ';') {	// early out -										break; -									} else if (bkch == '}') { -										braceCount++; -									} else if (bkch == '{') { -										if (--braceCount == 0) -											break; -									} -								} -							} -							if (bk == 0) { -								// at beginning, true -							} else if (braceCount == 0) { -								// balanced { found, bk>0, skip more whitespace -								if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) { -									while (bk > 0) { -										bkstyle = styler.StyleAt(--bk); -										if (bkstyle != SCE_PL_DEFAULT) -											break; -									} -								} -								bkstyle = styler.StyleAt(bk); -								if (bkstyle == SCE_PL_SCALAR -								 || bkstyle == SCE_PL_ARRAY -								 || bkstyle == SCE_PL_HASH -								 || bkstyle == SCE_PL_SYMBOLTABLE -								 || bkstyle == SCE_PL_OPERATOR) { -									preferRE = false; -								} +							// backtrack by counting balanced brace pairs +							// needed to test for variables like ${}, @{} etc. +							bkstyle = styleBeforeBracePair(styler, bk); +							if (bkstyle == SCE_PL_SCALAR +								|| bkstyle == SCE_PL_ARRAY +								|| bkstyle == SCE_PL_HASH +								|| bkstyle == SCE_PL_SYMBOLTABLE +								|| bkstyle == SCE_PL_OPERATOR) { +								preferRE = false;  							} +						} else if (bkch == '+' || bkch == '-') { +							if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1)) +								&& bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2))) +							// exceptions for operators: unary suffixes ++, -- +							preferRE = false;  						}  						break;  					case SCE_PL_IDENTIFIER:  						preferRE = true; -						if (bkch == '>') {	// inputsymbol +						bkstyle = styleCheckIdentifier(styler, bk); +						if ((bkstyle == 1) || (bkstyle == 2)) { +							// inputsymbol or var with "->" or "::" before identifier  							preferRE = false; -							break; -						} -						// backtrack to find "->" or "::" before identifier -						while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { -							bk--; -						} -						while (bk > 0) { -							bkstyle = styler.StyleAt(bk); -							if (bkstyle == SCE_PL_DEFAULT || -							    bkstyle == SCE_PL_COMMENTLINE) { -							} else if (bkstyle == SCE_PL_OPERATOR) { -								bkch = styler.SafeGetCharAt(bk); -                                // test for "->" and "::" -								if ((bkch == '>' && styler.SafeGetCharAt(bk - 1) == '-') -                                 || (bkch == ':' && styler.SafeGetCharAt(bk - 1) == ':')) { -                                    preferRE = false; -                                    break; -								} -							} else { -                                // bare identifier, if '/', /PATTERN/ unless digit/space immediately after '/' -                                // if '//', always expect defined-or operator to follow identifier -								if (!isHereDoc && -                                    (isspacechar(chNext) || isdigit(chNext) || chNext == '/')) -                                    preferRE = false; -								// HERE docs cannot have a space after the >> -                                if (isspacechar(chNext)) -                                    preferRE = false; -								break; +						} else if (bkstyle == 3) { +							// bare identifier, test cases follows: +							if (sc.ch == '/') { +								// if '/', /PATTERN/ unless digit/space immediately after '/' +								// if '//', always expect defined-or operator to follow identifier +								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') +									preferRE = false; +							} else if (sc.ch == '*' || sc.ch == '%') { +								if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) +									preferRE = false; +							} else if (sc.ch == '<') { +								if (IsASpace(sc.chNext) || sc.chNext == '=') +									preferRE = false;  							} -							bk--;  						}  						break; -                    case SCE_PL_SCALAR:     // for $var<< case -                        hereDocScalar = true; -                        break; -                    // for HERE docs, always true for preferRE +					case SCE_PL_SCALAR:		// for $var<< case: +						if (isHereDoc && hereDocSpace)	// if SCALAR whitespace '<<', *always* a HERE doc +							preferRE = true; +						break;  					case SCE_PL_WORD: -                        preferRE = true; -                        if (isHereDoc) -                            break; -                        // adopt heuristics similar to vim-style rules: -                        // keywords always forced as /PATTERN/: split, if, elsif, while -                        // everything else /PATTERN/ unless digit/space immediately after '/' -                        // for '//', defined-or favoured unless special keywords -                        bkend = bk + 1; -						while (bk > 0 && styler.StyleAt(bk-1) == SCE_PL_WORD) { -							bk--; +						preferRE = true; +						// for HERE docs, always true +						if (sc.ch == '/') { +							// adopt heuristics similar to vim-style rules: +							// keywords always forced as /PATTERN/: split, if, elsif, while +							// everything else /PATTERN/ unless digit/space immediately after '/' +							// for '//', defined-or favoured unless special keywords +							bkend = bk + 1; +							while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) { +								bk--; +							} +							if (isPerlKeyword(bk, bkend, reWords, styler)) +								break; +							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') +								preferRE = false; +						} else if (sc.ch == '*' || sc.ch == '%') { +							if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) +								preferRE = false; +						} else if (sc.ch == '<') { +							if (IsASpace(sc.chNext) || sc.chNext == '=') +								preferRE = false;  						} -                        if (isPerlKeyword(bk, bkend, reWords, styler)) -                            break; -                        if (isspacechar(chNext) || isdigit(chNext) || chNext == '/') -                            preferRE = false; -                        break; +						break;  					// other styles uses the default, preferRE=false  					case SCE_PL_POD: -					case SCE_PL_POD_VERB:  					case SCE_PL_HERE_Q:  					case SCE_PL_HERE_QQ:  					case SCE_PL_HERE_QX: @@ -709,555 +1054,114 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  						break;  					}  				} -                backflag = BACK_NONE; -                if (isHereDoc) {    // handle HERE doc -                    // if SCALAR whitespace '<<', *always* a HERE doc -                    if (preferRE || (hereDocSpace && hereDocScalar)) { -                        state = SCE_PL_HERE_DELIM; -                        HereDoc.State = 0; -                    } else {        // << operator -						i++; -						ch = chNext; -						chNext = chNext2; -                        goto handleOperator; -                    } -                } else {            // handle regexp -                    if (preferRE) { -                        state = SCE_PL_REGEX; -                        Quote.New(1); -                        Quote.Open(ch); -                    } else {        // / and // operators -                        if (chNext == '/') { -                            i++; -                            ch = chNext; -                            chNext = chNext2; -                        } -                        goto handleOperator; -                    } -                } -			} else if (ch == '<') { -				// looks forward for matching > on same line -				unsigned int fw = i + 1; -				while (fw < lengthDoc) { -					char fwch = styler.SafeGetCharAt(fw); -					if (fwch == ' ') { -						if (styler.SafeGetCharAt(fw-1) != '\\' || -						    styler.SafeGetCharAt(fw-2) != '\\') -						goto handleOperator; -					} else if (isEOLChar(fwch) || isspacechar(fwch)) { -						goto handleOperator; -					} else if (fwch == '>') { -						if ((fw - i) == 2 &&	// '<=>' case -						    styler.SafeGetCharAt(fw-1) == '=') { -                            goto handleOperator; -						} -                        styler.ColourTo(fw, SCE_PL_IDENTIFIER); -						i = fw; -						ch = fwch; -						chNext = styler.SafeGetCharAt(i+1); -					} -					fw++; -				} -                if (fw == lengthDoc) -                    goto handleOperator; -			} else if (ch == '='	// POD -			           && isalpha(chNext) -			           && (isEOLChar(chPrev))) { -				state = SCE_PL_POD; -                backflag = BACK_NONE; -				//sookedpos = 0; -				//sooked[sookedpos] = '\0'; -			} else if (ch == '-'	// file test operators -			           && isSingleCharOp(chNext) -			           && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) { -				styler.ColourTo(i + 1, SCE_PL_WORD); -				state = SCE_PL_DEFAULT; -				i++; -				ch = chNext; -				chNext = chNext2; -                backflag = BACK_NONE; -            } else if (ch == '-'    // bareword promotion (-FOO cases) -                       && ((isascii(chNext) && isalpha(chNext)) || chNext == '_') -                       && backflag != BACK_NONE) { -				state = SCE_PL_IDENTIFIER; -                backflag = BACK_NONE; -            } else if (ch == '(' && i > 0) { -                // backtrack to identify if we're starting a sub prototype -                // for generality, we need to ignore whitespace/comments -				unsigned int bk = i - 1;    // i > 0 tested above -				styler.Flush(); -				while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT || -                       styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { -					bk--; -				} -                if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier -                    goto handleOperator; -				while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) { -					bk--; -				} -				while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT || -                       styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { -					bk--; -				} -                if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword -                    || !styler.Match(bk - 2, "sub"))    // assume suffix is unique! -                    goto handleOperator; -                state = SCE_PL_SUB_PROTOTYPE; -                backflag = BACK_NONE; -                backPos = i;    // needed for restart -			} else if (isPerlOperator(ch)) { -				if (ch == '.' && chNext == '.') { // .. and ... -					i++; -					if (chNext2 == '.') { i++; } -					state = SCE_PL_DEFAULT; -					ch = styler.SafeGetCharAt(i); -					chNext = styler.SafeGetCharAt(i + 1); -				} -        handleOperator: -				styler.ColourTo(i, SCE_PL_OPERATOR); -                backflag = BACK_OPERATOR; -                backPos = i; -			} else if (ch == 4 || ch == 26) {    // ^D and ^Z ends valid perl source -                styler.ColourTo(i, SCE_PL_DATASECTION); -                state = SCE_PL_DATASECTION; -            } else { -				// keep colouring defaults to make restart easier -				styler.ColourTo(i, SCE_PL_DEFAULT); -			} -		} else if (state == SCE_PL_NUMBER) { -			if (ch == '.') { -				if (chNext == '.') { -					// double dot is always an operator -					goto numAtEnd; -				} else if (numState <= PERLNUM_FLOAT) { -					// non-decimal number or float exponent, consume next dot -					styler.ColourTo(i - 1, SCE_PL_NUMBER); -					state = SCE_PL_DEFAULT; -                    goto handleOperator; -				} else { // decimal or vectors allows dots -					dotCount++; -					if (numState == PERLNUM_DECIMAL) { -						if (dotCount > 1) { -							if (isdigit(chNext)) { // really a vector -								numState = PERLNUM_VECTOR; -							} else	// number then dot -								goto numAtEnd; -						} -					} else { // vectors -						if (!isdigit(chNext))	// vector then dot -							goto numAtEnd; -					} -				} -			} else if (ch == '_') { -                // permissive underscoring for number and vector literals -			} else if (!isascii(ch) || isalnum(ch)) { -				if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { -					if (!isascii(ch) || isalpha(ch)) { -						if (dotCount == 0) { // change to word -							state = SCE_PL_IDENTIFIER; -						} else { // vector then word -							goto numAtEnd; -						} +				backFlag = BACK_NONE; +				if (isHereDoc) {	// handle '<<', HERE doc +					if (preferRE) { +						sc.SetState(SCE_PL_HERE_DELIM); +						HereDoc.State = 0; +					} else {		// << operator +						sc.SetState(SCE_PL_OPERATOR); +						sc.Forward();  					} -				} else if (numState == PERLNUM_DECIMAL) { -					if (ch == 'E' || ch == 'e') { // exponent -						numState = PERLNUM_FLOAT; -						if (chNext == '+' || chNext == '-') { -							i++; -							ch = chNext; -							chNext = chNext2; +				} else if (sc.ch == '*') {	// handle '*', typeglob +					if (preferRE) { +						sc.SetState(SCE_PL_SYMBOLTABLE); +						if (sc.chNext == ':' && sc.GetRelative(2) == ':') { +							sc.Forward(2); +						} else if (sc.chNext == '{') { +							sc.ForwardSetState(SCE_PL_OPERATOR); +						} else { +							sc.Forward();  						} -					} else if (!isascii(ch) || !isdigit(ch)) { // number then word -						goto numAtEnd; -					} -				} else if (numState == PERLNUM_FLOAT) { -					if (!isdigit(ch)) { // float then word -						goto numAtEnd; -					} -				} else if (numState == PERLNUM_OCTAL) { -                    if (!isdigit(ch)) -                        goto numAtEnd; -                    else if (ch > '7') -                        numState = PERLNUM_BAD; -                } else if (numState == PERLNUM_BINARY) { -                    if (!isdigit(ch)) -                        goto numAtEnd; -                    else if (ch > '1') -                        numState = PERLNUM_BAD; -                } else if (numState == PERLNUM_HEX) { -                    int ch2 = toupper(ch); -                    if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F')) -                        goto numAtEnd; -				} else {//(numState == PERLNUM_BAD) { -                    if (!isdigit(ch)) -                        goto numAtEnd; -                } -			} else { -				// complete current number or vector -			numAtEnd: -				styler.ColourTo(i - 1, actualNumStyle(numState)); -				state = SCE_PL_DEFAULT; -				goto restartLexer; -			} -		} else if (state == SCE_PL_IDENTIFIER) { -			if (!isWordStart(chNext) && chNext != '\'') { -				styler.ColourTo(i, SCE_PL_IDENTIFIER); -				state = SCE_PL_DEFAULT; -				ch = ' '; -			} -		} else { -			if (state == SCE_PL_COMMENTLINE) { -				if (isEOLChar(ch)) { -					styler.ColourTo(i - 1, state); -					state = SCE_PL_DEFAULT; -					goto restartLexer; -				} else if (isEOLChar(chNext)) { -					styler.ColourTo(i, state); -					state = SCE_PL_DEFAULT; -				} -			} else if (state == SCE_PL_HERE_DELIM) { -				// -				// From perldata.pod: -				// ------------------ -				// A line-oriented form of quoting is based on the shell ``here-doc'' -				// syntax. -				// Following a << you specify a string to terminate the quoted material, -				// and all lines following the current line down to the terminating -				// string are the value of the item. -				// The terminating string may be either an identifier (a word), -				// or some quoted text. -				// If quoted, the type of quotes you use determines the treatment of -				// the text, just as in regular quoting. -				// An unquoted identifier works like double quotes. -				// There must be no space between the << and the identifier. -				// (If you put a space it will be treated as a null identifier, -				// which is valid, and matches the first empty line.) -				// (This is deprecated, -w warns of this syntax) -				// The terminating string must appear by itself (unquoted and with no -				// surrounding whitespace) on the terminating line. -				// -				// From Bash info: -				// --------------- -				// Specifier format is: <<[-]WORD -				// Optional '-' is for removal of leading tabs from here-doc. -				// Whitespace acceptable after <<[-] operator. -				// -				if (HereDoc.State == 0) { // '<<' encountered -                    bool gotspace = false; -                    unsigned int oldi = i; -                    if (chNext == ' ' || chNext == '\t') { -                        // skip whitespace; legal for quoted delimiters -                        gotspace = true; -                        do { -                            i++; -                            chNext = styler.SafeGetCharAt(i + 1); -                        } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t')); -                        chNext2 = styler.SafeGetCharAt(i + 2); -                    } -					HereDoc.State = 1; -					HereDoc.Quote = chNext; -					HereDoc.Quoted = false; -					HereDoc.DelimiterLength = 0; -					HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; -					if (chNext == '\'' || chNext == '"' || chNext == '`') { -                        // a quoted here-doc delimiter -						i++; -						ch = chNext; -						chNext = chNext2; -						HereDoc.Quoted = true; -					} else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\' -						|| chNext == '=' || chNext == '$' || chNext == '@' -                        || ((isalpha(chNext) || chNext == '_') && gotspace)) { -						// left shift << or <<= operator cases -                        // restore position if operator -                        i = oldi; -						styler.ColourTo(i, SCE_PL_OPERATOR); -						state = SCE_PL_DEFAULT; -                        backflag = BACK_OPERATOR; -                        backPos = i; -						HereDoc.State = 0; -                        goto restartLexer;  					} else { -						// an unquoted here-doc delimiter, no special handling -                        // (cannot be prefixed by spaces/tabs), or -						// symbols terminates; deprecated zero-length delimiter +						sc.SetState(SCE_PL_OPERATOR); +						if (sc.chNext == '*') 	// exponentiation +							sc.Forward();  					} - -				} else if (HereDoc.State == 1) { // collect the delimiter -                    backflag = BACK_NONE; -					if (HereDoc.Quoted) { // a quoted here-doc delimiter -						if (ch == HereDoc.Quote) { // closing quote => end of delimiter -							styler.ColourTo(i, state); -							state = SCE_PL_DEFAULT; +				} else if (sc.ch == '%') {	// handle '%', hash +					if (preferRE) { +						sc.SetState(SCE_PL_HASH); +						if (setHash.Contains(sc.chNext)) { +							sc.Forward(); +						} else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { +							sc.Forward(2); +						} else if (sc.chNext == '{') { +							sc.ForwardSetState(SCE_PL_OPERATOR);  						} else { -							if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote -								i++; -								ch = chNext; -								chNext = chNext2; -							} -							HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; -							HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; +							sc.ChangeState(SCE_PL_OPERATOR);  						} -					} else { // an unquoted here-doc delimiter -						if (isalnum(ch) || ch == '_') { -							HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; -							HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; +					} else { +						sc.SetState(SCE_PL_OPERATOR); +					} +				} else if (sc.ch == '<') {	// handle '<', inputsymbol +					if (preferRE) { +						// forward scan +						int i = inputsymbolScan(styler, sc.currentPos, endPos); +						if (i > 0) { +							sc.SetState(SCE_PL_IDENTIFIER); +							sc.Forward(i);  						} else { -							styler.ColourTo(i - 1, state); -							state = SCE_PL_DEFAULT; -							goto restartLexer; +							sc.SetState(SCE_PL_OPERATOR);  						} -					} -					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { -						styler.ColourTo(i - 1, state); -						state = SCE_PL_ERROR; -						goto restartLexer; -					} -				} -			} else if (HereDoc.State == 2) { -				// state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX -				if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { -					i += HereDoc.DelimiterLength; -					chPrev = styler.SafeGetCharAt(i - 1); -					ch = styler.SafeGetCharAt(i); -					if (isEOLChar(ch)) { -						styler.ColourTo(i - 1, state); -						state = SCE_PL_DEFAULT; -                        backflag = BACK_NONE; -						HereDoc.State = 0; -						goto restartLexer; -					} -					chNext = styler.SafeGetCharAt(i + 1); -				} -			} else if (state == SCE_PL_POD -				|| state == SCE_PL_POD_VERB) { -				if (isEOLChar(chPrev)) { -					if (ch == ' ' || ch == '\t') { -						styler.ColourTo(i - 1, state); -						state = SCE_PL_POD_VERB;  					} else { -						styler.ColourTo(i - 1, state); -						state = SCE_PL_POD; -						if (ch == '=') { -							if (isMatch(styler, lengthDoc, i, "=cut")) { -								styler.ColourTo(i - 1 + 4, state); -								i += 4; -								state = SCE_PL_DEFAULT; -								ch = styler.SafeGetCharAt(i); -								//chNext = styler.SafeGetCharAt(i + 1); -								goto restartLexer; -							} -						} +						sc.SetState(SCE_PL_OPERATOR);  					} -				} -			} else if (state == SCE_PL_SCALAR	// variable names -				|| state == SCE_PL_ARRAY -				|| state == SCE_PL_HASH -				|| state == SCE_PL_SYMBOLTABLE) { -				if (ch == ':' && chNext == ':') {	// skip :: -					i++; -					ch = chNext; -					chNext = chNext2; -				} -				else if (isEndVar(ch)) { -					if (i == (styler.GetStartSegment() + 1)) { -						// Special variable: $(, $_ etc. -						styler.ColourTo(i, state); -						state = SCE_PL_DEFAULT; -					} else { -						styler.ColourTo(i - 1, state); -						state = SCE_PL_DEFAULT; -						goto restartLexer; +				} else {			// handle '/', regexp +					if (preferRE) { +						sc.SetState(SCE_PL_REGEX); +						Quote.New(); +						Quote.Open(sc.ch); +					} else {		// / and // operators +						sc.SetState(SCE_PL_OPERATOR); +						if (sc.chNext == '/') { +							sc.Forward(); +						}  					}  				} -			} else if (state == SCE_PL_REGEX -				|| state == SCE_PL_STRING_QR -				) { -				if (!Quote.Up && !isspacechar(ch)) { -					Quote.Open(ch); -				} else if (ch == '\\' && Quote.Up != '\\') { -					// SG: Is it save to skip *every* escaped char? -					i++; -					ch = chNext; -					chNext = styler.SafeGetCharAt(i + 1); +			} else if (sc.ch == '='		// POD +					   && setPOD.Contains(sc.chNext) +					   && sc.atLineStart) { +				sc.SetState(SCE_PL_POD); +				backFlag = BACK_NONE; +			} else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {	// extended '-' cases +				unsigned int bk = sc.currentPos; +				unsigned int fw = 2; +				if (setSingleCharOp.Contains(sc.chNext) &&	// file test operators +					!setWord.Contains(sc.GetRelative(2))) { +					sc.SetState(SCE_PL_WORD);  				} else { -					if (ch == Quote.Down /*&& chPrev != '\\'*/) { -						Quote.Count--; -						if (Quote.Count == 0) { -							Quote.Rep--; -							if (Quote.Up == Quote.Down) { -								Quote.Count++; -							} -						} -						if (!isalpha(chNext)) { -							if (Quote.Rep <= 0) { -								styler.ColourTo(i, state); -								state = SCE_PL_DEFAULT; -								ch = ' '; -							} -						} -					} else if (ch == Quote.Up /*&& chPrev != '\\'*/) { -						Quote.Count++; -					} else if (!isascii(chNext) || !isalpha(chNext)) { -						if (Quote.Rep <= 0) { -							styler.ColourTo(i, state); -							state = SCE_PL_DEFAULT; -							ch = ' '; -						} -					} +					// nominally a minus and bareword; find extent of bareword +					while (setWord.Contains(sc.GetRelative(fw))) +						fw++; +					sc.SetState(SCE_PL_OPERATOR);  				} -			} else if (state == SCE_PL_REGSUBST) { -				if (!Quote.Up && !isspacechar(ch)) { -					Quote.Open(ch); -				} else if (ch == '\\' && Quote.Up != '\\') { -					// SG: Is it save to skip *every* escaped char? -					i++; -					ch = chNext; -					chNext = styler.SafeGetCharAt(i + 1); +				// force to bareword for hash key => or {variable literal} cases +				if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) { +					sc.ChangeState(SCE_PL_IDENTIFIER); +				} +				backFlag = BACK_NONE; +			} else if (sc.ch == '(' && sc.currentPos > 0) {	// '(' or subroutine prototype +				if (styleCheckSubPrototype(styler, sc.currentPos - 1)) { +					sc.SetState(SCE_PL_SUB_PROTOTYPE); +					backFlag = BACK_NONE;  				} else { -					if (Quote.Count == 0 && Quote.Rep == 1) { -						/* We matched something like s(...) or tr{...} -						* and are looking for the next matcher characters, -						* which could be either bracketed ({...}) or non-bracketed -						* (/.../). -						* -						* Number-signs are problematic.  If they occur after -						* the close of the first part, treat them like -						* a Quote.Up char, even if they actually start comments. -						* -						* If we find an alnum, we end the regsubst, and punt. -						* -						* Eric Promislow   ericp@activestate.com  Aug 9,2000 -						*/ -						if (isspacechar(ch)) { -							// Keep going -						} -						else if (!isascii(ch) || isalnum(ch)) { -							styler.ColourTo(i, state); -							state = SCE_PL_DEFAULT; -							ch = ' '; -						} else { -							Quote.Open(ch); -						} -					} else if (ch == Quote.Down /*&& chPrev != '\\'*/) { -						Quote.Count--; -						if (Quote.Count == 0) { -							Quote.Rep--; -						} -						if (!isascii(chNext) || !isalpha(chNext)) { -							if (Quote.Rep <= 0) { -								styler.ColourTo(i, state); -								state = SCE_PL_DEFAULT; -								ch = ' '; -							} -						} -						if (Quote.Up == Quote.Down) { -							Quote.Count++; -						} -					} else if (ch == Quote.Up /*&& chPrev != '\\'*/) { -						Quote.Count++; -					} else if (!isascii(chNext) || !isalpha(chNext)) { -						if (Quote.Rep <= 0) { -							styler.ColourTo(i, state); -							state = SCE_PL_DEFAULT; -							ch = ' '; -						} -					} +					sc.SetState(SCE_PL_OPERATOR);  				} -			} else if (state == SCE_PL_STRING_Q -				|| state == SCE_PL_STRING_QQ -				|| state == SCE_PL_STRING_QX -				|| state == SCE_PL_STRING_QW -				|| state == SCE_PL_STRING -				|| state == SCE_PL_CHARACTER -				|| state == SCE_PL_BACKTICKS -				) { -				if (!Quote.Down && !isspacechar(ch)) { -					Quote.Open(ch); -				} else if (ch == '\\' && Quote.Up != '\\') { -					i++; -					ch = chNext; -					chNext = styler.SafeGetCharAt(i + 1); -				} else if (ch == Quote.Down) { -					Quote.Count--; -					if (Quote.Count == 0) { -						Quote.Rep--; -						if (Quote.Rep <= 0) { -							styler.ColourTo(i, state); -							state = SCE_PL_DEFAULT; -							ch = ' '; -						} -						if (Quote.Up == Quote.Down) { -							Quote.Count++; -						} -					} -				} else if (ch == Quote.Up) { -					Quote.Count++; +			} else if (setPerlOperator.Contains(sc.ch)) {	// operators +				sc.SetState(SCE_PL_OPERATOR); +				if (sc.Match('.', '.')) {	// .. and ... +					sc.Forward(); +					if (sc.chNext == '.') sc.Forward();  				} -			} else if (state == SCE_PL_SUB_PROTOTYPE) { -                char strch[2]; -                strch[0] = ch; -                strch[1] = '\0'; -                if (NULL != strstr("\\[$@%&*];", strch)) { -                    // keep going -                } else if (ch == ')') { -                    styler.ColourTo(i, state); -                    state = SCE_PL_DEFAULT; -                } else { -                    // abandon prototype, restart from '(' -                    i = backPos; -                    styler.ColourTo(i, SCE_PL_OPERATOR); -					ch = styler.SafeGetCharAt(i); -					chNext = styler.SafeGetCharAt(i + 1); -                    state = SCE_PL_DEFAULT; -                } -            } else if (state == SCE_PL_FORMAT_IDENT) { -                // occupies different HereDoc states to avoid clashing with HERE docs -                if (HereDoc.State == 0) { -                    if ((isascii(ch) && isalpha(ch)) || ch == '_'   // probable identifier -                        || ch == '=') {                             // no identifier -                        HereDoc.State = 3; -                        HereDoc.Quoted = false; // whitespace flag -                    } else if (ch == ' ' || ch == '\t') { -                        styler.ColourTo(i, SCE_PL_DEFAULT); -                    } else { -                        state = SCE_PL_DEFAULT; -                        HereDoc.State = 0; -                        goto restartLexer; -                    } -                } -                if (HereDoc.State == 3) {   // with just a '=', state goes 0->3->4 -                    if (ch == '=') { -                        styler.ColourTo(i, SCE_PL_FORMAT_IDENT); -                        state = SCE_PL_DEFAULT; -                        HereDoc.State = 4; -                    } else if (ch == ' ' || ch == '\t') { -                        HereDoc.Quoted = true; -                    } else if (isEOLChar(ch) || (HereDoc.Quoted && ch != '=')) { -                        // abandon format, restart from after 'format' -                        i = backPos + 1; -                        ch = styler.SafeGetCharAt(i); -                        chNext = styler.SafeGetCharAt(i + 1); -                        state = SCE_PL_DEFAULT; -                        HereDoc.State = 0; -                    } -                } -            } else if (state == SCE_PL_FORMAT) { -                if (isEOLChar(chPrev)) { -                    styler.ColourTo(i - 1, state); -                    if (ch == '.' && isEOLChar(chNext)) { -                        styler.ColourTo(i, state); -                        state = SCE_PL_DEFAULT; -                    } -                } -            } -		} -		if (state == SCE_PL_ERROR) { -			break; +			} else if (sc.ch == 4 || sc.ch == 26) {		// ^D and ^Z ends valid perl source +				sc.SetState(SCE_PL_DATASECTION); +			} else { +				// keep colouring defaults +				sc.Complete(); +			}  		} -		chPrev = ch;  	} -	styler.ColourTo(lengthDoc - 1, state); +	sc.Complete();  }  static bool IsCommentLine(int line, Accessor &styler) { @@ -1265,17 +1169,17 @@ static bool IsCommentLine(int line, Accessor &styler) {  	int eol_pos = styler.LineStart(line + 1) - 1;  	for (int i = pos; i < eol_pos; i++) {  		char ch = styler[i]; -        int style = styler.StyleAt(i); +		int style = styler.StyleAt(i);  		if (ch == '#' && style == SCE_PL_COMMENTLINE)  			return true; -		else if (ch != ' ' && ch != '\t') +		else if (!IsASpaceOrTab(ch))  			return false;  	}  	return false;  }  static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], -                            Accessor &styler) { +                        Accessor &styler) {  	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;  	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;  	// Custom folding of POD and packages @@ -1300,18 +1204,18 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],  		int style = styleNext;  		styleNext = styler.StyleAt(i + 1);  		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); -		bool atLineStart = isEOLChar(chPrev) || i == 0; -        // Comment folding +		bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0; +		// Comment folding  		if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) -        { -            if (!IsCommentLine(lineCurrent - 1, styler) -                && IsCommentLine(lineCurrent + 1, styler)) -                levelCurrent++; -            else if (IsCommentLine(lineCurrent - 1, styler) -                     && !IsCommentLine(lineCurrent+1, styler)) -                levelCurrent--; -        } -		if (style == SCE_C_OPERATOR) { +		{ +			if (!IsCommentLine(lineCurrent - 1, styler) +				&& IsCommentLine(lineCurrent + 1, styler)) +				levelCurrent++; +			else if (IsCommentLine(lineCurrent - 1, styler) +					 && !IsCommentLine(lineCurrent+1, styler)) +				levelCurrent--; +		} +		if (style == SCE_PL_OPERATOR) {  			if (ch == '{') {  				levelCurrent++;  			} else if (ch == '}') { @@ -1329,17 +1233,17 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],  				else if (styler.Match(i, "=head"))  					isPodHeading = true;  			} else if (style == SCE_PL_DATASECTION) { -                if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) -                    levelCurrent++; -                else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) -                    levelCurrent--; -                else if (styler.Match(i, "=head")) +				if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) +					levelCurrent++; +				else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) +					levelCurrent--; +				else if (styler.Match(i, "=head"))  					isPodHeading = true; -                // if package used or unclosed brace, level > SC_FOLDLEVELBASE! -                // reset needed as level test is vs. SC_FOLDLEVELBASE -                else if (styler.Match(i, "__END__")) -                    levelCurrent = SC_FOLDLEVELBASE; -            } +				// if package used or unclosed brace, level > SC_FOLDLEVELBASE! +				// reset needed as level test is vs. SC_FOLDLEVELBASE +				else if (styler.Match(i, "__END__")) +					levelCurrent = SC_FOLDLEVELBASE; +			}  		}  		// Custom package folding  		if (foldPackage && atLineStart) { @@ -1351,9 +1255,9 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],  		if (atEOL) {  			int lev = levelPrev;  			if (isPodHeading) { -                lev = levelPrev - 1; -                lev |= SC_FOLDLEVELHEADERFLAG; -                isPodHeading = false; +				lev = levelPrev - 1; +				lev |= SC_FOLDLEVELHEADERFLAG; +				isPodHeading = false;  			}  			// Check if line was a package declaration  			// because packages need "special" treatment @@ -1362,7 +1266,7 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],  				levelCurrent = SC_FOLDLEVELBASE + 1;  				isPackageLine = false;  			} -            lev |= levelCurrent << 16; +			lev |= levelCurrent << 16;  			if (visibleChars == 0 && foldCompact)  				lev |= SC_FOLDLEVELWHITEFLAG;  			if ((levelCurrent > levelPrev) && (visibleChars > 0)) @@ -1389,4 +1293,3 @@ static const char * const perlWordListDesc[] = {  };  LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8); - | 
