diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/LexPerl.cxx | 164 | 
1 files changed, 106 insertions, 58 deletions
| diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx index 0514c93f1..4edb3ccde 100644 --- a/src/LexPerl.cxx +++ b/src/LexPerl.cxx @@ -2,6 +2,7 @@  /** @file LexPerl.cxx   ** Lexer for subset of Perl.   **/ +// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> 20031020  // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>  // The License.txt file describes the conditions under which this software may be distributed. @@ -25,6 +26,8 @@  #define PERLNUM_VECTOR 4  #define PERLNUM_V_VECTOR 5 +#define HERE_DELIM_MAX 256 +  static inline bool isEOLChar(char ch) {  	return (ch == '\r') || (ch == '\n');  } @@ -37,9 +40,6 @@ static bool isSingleCharOp(char ch) {  }  static inline bool isPerlOperator(char ch) { -	if (isalnum(ch)) -		return false; -	// '.' left out as it is used to make up numbers  	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '\\' ||  	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||  	        ch == '=' || ch == '|' || ch == '{' || ch == '}' || @@ -70,14 +70,14 @@ static inline bool isEndVar(char ch) {  static inline char actualNumStyle(int numberStyle) {  	switch (numberStyle) { -		case PERLNUM_VECTOR: -		case PERLNUM_V_VECTOR: -			return SCE_PL_STRING; -		case PERLNUM_DECIMAL: -		case PERLNUM_NON_DEC: -		case PERLNUM_FLOAT: -		default: -			return SCE_PL_NUMBER; +	case PERLNUM_VECTOR: +	case PERLNUM_V_VECTOR: +		return SCE_PL_STRING; +	case PERLNUM_DECIMAL: +	case PERLNUM_NON_DEC: +	case PERLNUM_FLOAT: +	default: +		return SCE_PL_NUMBER;  	}  } @@ -123,12 +123,16 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  		char Quote;		// the char after '<<'  		bool Quoted;		// true if Quote in ('\'','"','`')  		int DelimiterLength;	// strlen(Delimiter) -		char Delimiter[256];	// the Delimiter, 256: sizeof PL_tokenbuf +		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf  		HereDocCls() {  			State = 0;  			DelimiterLength = 0; +			Delimiter = new char[HERE_DELIM_MAX];  			Delimiter[0] = '\0';  		} +		~HereDocCls() { +			delete []Delimiter; +		}  	};  	HereDocCls HereDoc;	// TODO: FIFO for stacked here-docs @@ -155,17 +159,17 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  	};  	QuoteCls Quote; -	char sooked[100]; -	int sookedpos = 0; -	bool preferRE = true; -	sooked[sookedpos] = '\0';  	int state = initStyle; -	int numState = PERLNUM_DECIMAL; +	char numState = PERLNUM_DECIMAL;  	int dotCount = 0;  	unsigned int lengthDoc = startPos + length; +	//int sookedpos = 0; // these have no apparent use, see POD state +	//char sooked[100]; +	//sooked[sookedpos] = '\0';  	// If in a long distance lexical state, seek to the beginning to find quote characters -	// Perl strings can be multi-line with embedded newlines. +	// Perl strings can be multi-line with embedded newlines, so backtrack. +	// Perl numbers have additional state during lexing, so backtrack too.  	if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {  		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {  			startPos--; @@ -259,7 +263,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				state = SCE_PL_NUMBER;  				numState = PERLNUM_DECIMAL;  				dotCount = 0; -				preferRE = false;  				if (ch == '0') {	// hex,bin,octal  					if (chNext == 'x' || chNext == 'b' || isdigit(chNext)) {  						numState = PERLNUM_NON_DEC; @@ -268,7 +271,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					numState = PERLNUM_V_VECTOR;  				}  			} else if (iswordstart(ch)) { -				styler.ColourTo(i - 1, state);  				if (ch == 's' && !isalnum(chNext)) {  					state = SCE_PL_REGSUBST;  					Quote.New(2); @@ -295,12 +297,11 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					chNext = chNext2;  					Quote.New(1);  				} else if (ch == 'x' && (chNext == '=' ||	// repetition -					   (chNext != '_' && !isalnum(chNext)))) { -					preferRE = true; +					   (chNext != '_' && !isalnum(chNext)) || +					   (isdigit(chPrev) && isdigit(chNext)))) {  					styler.ColourTo(i, SCE_PL_OPERATOR);  				} else {  					state = SCE_PL_WORD; -					preferRE = false;  					if ((!iswordchar(chNext) && chNext != '\'')  						|| (chNext == '.' && chNext2 == '.')) {  						// We need that if length of word == 1! @@ -329,13 +330,11 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				Quote.New(1);  				Quote.Open(ch);  			} else if (ch == '$') { -				preferRE = false;  				if ((chNext == '{') || isspacechar(chNext)) {  					styler.ColourTo(i, SCE_PL_SCALAR);  				} else {  					state = SCE_PL_SCALAR;  					if (chNext == '`' && chNext2 == '`') { -						styler.ColourTo(i - 1, SCE_PL_SCALAR);  						i += 2;  						ch = styler.SafeGetCharAt(i);  						chNext = styler.SafeGetCharAt(i + 1); @@ -346,7 +345,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					}  				}  			} else if (ch == '@') { -				preferRE = false;  				if (isalpha(chNext) || chNext == '#' || chNext == '$'  						    || chNext == '_' || chNext == '+') {  					state = SCE_PL_ARRAY; @@ -358,7 +356,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					styler.ColourTo(i, SCE_PL_ARRAY);  				}  			} else if (ch == '%') { -				preferRE = false;  				if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {  					state = SCE_PL_HASH;  				} else if (chNext == '{') { @@ -375,13 +372,78 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  						ch = chNext;  						chNext = chNext2;  					} +					styler.ColourTo(i, SCE_PL_OPERATOR); +				} +			} else if (ch == '/') { +				// Explicit backward peeking to set a consistent preferRE for +				// any slash found, so no longer need to track preferRE state. +				// Find first previous significant lexed element and interpret. +				bool preferRE = false; +				unsigned int bk = i - 1; +				char bkch; +				styler.Flush(); +				while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT || +					styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { +					bk--; +				} +				if (bk == 0) {  					preferRE = true; +				} else { +					int bkstyle = styler.StyleAt(bk); +					switch(bkstyle) { +					case SCE_PL_OPERATOR: +						preferRE = true; +						bkch = styler.SafeGetCharAt(bk); +						if (bkch == ')' || bkch == ']') { +							preferRE = false; +						} else if (bkch == '}') { +							// backtrack further, count balanced brace pairs +							// if a brace pair found, see if it's a variable +							int braceCount = 1; +							while (--bk > 0) { +								bkstyle = styler.StyleAt(bk); +								if (bkstyle == SCE_PL_OPERATOR) { +									bkch = styler.SafeGetCharAt(bk); +									if (bkch == '}') { +										braceCount++; +									} else if (bkch == '{') { +										if (--braceCount == 0) +											break; +									} +								} +							} +							if (bk == 0) { +								// at beginning, true +							} else if (braceCount == 0) { +								// balanced { found, check for variable +								bkstyle = styler.StyleAt(bk - 1); +								if (bkstyle == SCE_PL_SCALAR +								 || bkstyle == SCE_PL_ARRAY +								 || bkstyle == SCE_PL_HASH +								 || bkstyle == SCE_PL_SYMBOLTABLE) { +									preferRE = false; +								} +							} +						} +						break; +					// other styles uses the default, preferRE=false +					case SCE_PL_IDENTIFIER: +					case SCE_PL_POD: +					case SCE_PL_WORD: +					case SCE_PL_HERE_Q: +					case SCE_PL_HERE_QQ: +					case SCE_PL_HERE_QX: +						preferRE = true; +						break; +					} +				} +				if (preferRE) { +					state = SCE_PL_REGEX; +					Quote.New(1); +					Quote.Open(ch); +				} else {  					styler.ColourTo(i, SCE_PL_OPERATOR);  				} -			} else if (ch == '/' && preferRE) { -				state = SCE_PL_REGEX; -				Quote.New(1); -				Quote.Open(ch);  			} else if (ch == '<' && chNext == '<') {  				state = SCE_PL_HERE_DELIM;  				HereDoc.State = 0; @@ -389,14 +451,13 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  			           && isalpha(chNext)  			           && (isEOLChar(chPrev))) {  				state = SCE_PL_POD; -				sookedpos = 0; -				sooked[sookedpos] = '\0'; +				//sookedpos = 0; +				//sooked[sookedpos] = '\0';  			} else if (ch == '-'	// file test operators  			           && isSingleCharOp(chNext)  			           && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {  				styler.ColourTo(i + 1, SCE_PL_WORD);  				state = SCE_PL_DEFAULT; -				preferRE = false;  				i++;  				ch = chNext;  				chNext = chNext2; @@ -407,11 +468,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					state = SCE_PL_DEFAULT;  					ch = styler.SafeGetCharAt(i);  					chNext = styler.SafeGetCharAt(i + 1); -					preferRE = true; -				} else if (ch == ')' || ch == ']') { -					preferRE = false; -				} else -					preferRE = true; +				}  				styler.ColourTo(i, SCE_PL_OPERATOR);  			} else {  				// keep colouring defaults to make restart easier @@ -426,7 +483,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					// non-decimal number or float exponent, consume next dot  					styler.ColourTo(i - 1, SCE_PL_NUMBER);  					styler.ColourTo(i, SCE_PL_OPERATOR); -					preferRE = true;  					state = SCE_PL_DEFAULT;  				} else { // decimal or vectors allows dots  					dotCount++; @@ -446,7 +502,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {  					if (isalpha(ch)) {  						if (dotCount == 0) { // change to word -							preferRE = false;  							state = SCE_PL_WORD;  						} else { // vector then word  							goto numAtEnd; @@ -467,14 +522,13 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					if (!isdigit(ch)) { // float then word  						goto numAtEnd;  					} -				} else {// PERLNUM_NON_DEC +				} else {// (numState == PERLNUM_NON_DEC)  					// allow alphanum for bin,hex,oct for now  				}  			} else {  				// complete current number or vector  			numAtEnd:  				styler.ColourTo(i - 1, actualNumStyle(numState)); -				preferRE = false;  				state = SCE_PL_DEFAULT;  				goto restartLexer;  			} @@ -488,8 +542,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					styler.ColourTo(i, SCE_PL_DATASECTION);  					state = SCE_PL_DATASECTION;  				} else { -					if (classifyWordPerl(styler.GetStartSegment(), i, keywords, styler) == SCE_PL_WORD) -						preferRE = true; +					classifyWordPerl(styler.GetStartSegment(), i, keywords, styler);  					state = SCE_PL_DEFAULT;  					ch = ' ';  				} @@ -542,21 +595,16 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  						ch = chNext;  						chNext = chNext2;  						HereDoc.Quoted = true; -					} else if (chNext == '\\') { // ref? -						i++; -						ch = chNext; -						chNext = chNext2; -					} else if (isdigit(chNext) || chNext == '=') { // left shift << or <<= operator +					} else if (isalpha(chNext) || chNext == '_') { +						// an unquoted here-doc delimiter, no special handling +					} else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\' +						|| chNext == '=' || chNext == '$' || chNext == '@') { +						// left shift << or <<= operator cases  						styler.ColourTo(i, SCE_PL_OPERATOR);  						state = SCE_PL_DEFAULT;  						HereDoc.State = 0; -					} else if (isalpha(chNext) || chNext == '_') { // an unquoted here-doc delimiter -						// single word identifier, no special handling -					} else if (isspacechar(chNext)) { // deprecated here-doc delimiter || left shift operator -						styler.ColourTo(i, SCE_PL_OPERATOR); -						state = SCE_PL_DEFAULT; -						HereDoc.State = 0; -					} else { // TODO: ??? +					} else { +						// symbols terminates; deprecated zero-length delimiter  					}  				} else if (HereDoc.State == 1) { // collect the delimiter @@ -583,7 +631,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  							goto restartLexer;  						}  					} -					if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) { +					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {  						styler.ColourTo(i - 1, state);  						state = SCE_PL_ERROR;  						goto restartLexer; | 
