diff options
Diffstat (limited to 'lexers/LexPerl.cxx')
| -rw-r--r-- | lexers/LexPerl.cxx | 371 | 
1 files changed, 293 insertions, 78 deletions
| diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx index 7f0cbcf62..8a0f6422e 100644 --- a/lexers/LexPerl.cxx +++ b/lexers/LexPerl.cxx @@ -69,6 +69,10 @@ using namespace Scintilla;  #define BACK_OPERATOR	1	// whitespace/comments are insignificant  #define BACK_KEYWORD	2	// operators/keywords are needed for disambiguation +// all interpolated styles are different from their parent styles by a constant difference +// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value +#define	INTERPOLATE_SHIFT	(SCE_PL_STRING_VAR - SCE_PL_STRING) +  static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {  	// old-style keyword matcher; needed because GetCurrent() needs  	// current segment to be committed, but we may abandon early... @@ -246,14 +250,6 @@ static bool styleCheckSubPrototype(LexAccessor &styler, unsigned int bk) {  	return true;  } -static bool isMatch(const char *sref, char *s) { -	// match per-line delimiter - must kill trailing CR if CRLF -	int i = strlen(s); -	if (i != 0 && s[i - 1] == '\r') -		s[i - 1] = '\0'; -	return (strcmp(sref, s) == 0); -} -  static int actualNumStyle(int numberStyle) {  	if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {  		return SCE_PL_STRING; @@ -360,11 +356,19 @@ struct OptionSetPerl : public OptionSet<OptionsPerl> {  };  class LexerPerl : public ILexer { +	CharacterSet setWordStart; +	CharacterSet setWord; +	CharacterSet setSpecialVar; +	CharacterSet setControlVar;  	WordList keywords;  	OptionsPerl options;  	OptionSetPerl osPerl;  public: -	LexerPerl() { +	LexerPerl() : +		setWordStart(CharacterSet::setAlpha, "_", 0x80, true), +		setWord(CharacterSet::setAlphaNum, "_", 0x80, true), +		setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"), +		setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {  	}  	~LexerPerl() {  	} @@ -398,6 +402,7 @@ public:  	static ILexer *LexerFactoryPerl() {  		return new LexerPerl();  	} +	void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);  };  int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) { @@ -426,6 +431,90 @@ int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {  	return firstModification;  } +void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) { +	// interpolate a segment (with no active backslashes or delimiters within) +	// switch in or out of an interpolation style or continue current style +	// commit variable patterns if found, trim segment, repeat until done +	while (maxSeg > 0) { +		bool isVar = false; +		int sLen = 0; +		if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) { +			// $#[$]*word [$@][$]*word (where word or {word} is always present) +			bool braces = false; +			sLen = 1; +			if (sc.ch == '$' && sc.chNext == '#') {	// starts with $# +				sLen++; +			} +			while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$'))	// >0 $ dereference within +				sLen++; +			if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) {	// { start for {word} +				sLen++; +				braces = true; +			} +			if (maxSeg > sLen) { +				int c = sc.GetRelative(sLen); +				if (setWordStart.Contains(c)) {	// word (various) +					sLen++; +					isVar = true; +					while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen))) +						sLen++; +				} else if (braces && IsADigit(c) && (sLen == 2)) {	// digit for ${digit} +					sLen++; +					isVar = true; +				} +			} +			if (braces) { +				if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) {	// } end for {word} +					sLen++; +				} else +					isVar = false; +			} +		} +		if (!isVar && (maxSeg > 1)) {	// $- or @-specific variable patterns +			sLen = 1; +			int c = sc.chNext; +			if (sc.ch == '$') { +				if (IsADigit(c)) {	// $[0-9] and slurp trailing digits +					sLen++; +					isVar = true; +					while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen))) +						sLen++; +				} else if (setSpecialVar.Contains(c)) {	// $ special variables +					sLen++; +					isVar = true; +				} else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {	// $ additional +					sLen++; +					isVar = true; +				} else if (c == '^') {	// $^A control-char style +					sLen++; +					if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) { +						sLen++; +						isVar = true; +					} +				} +			} else if (sc.ch == '@') { +				if (!isPattern && ((c == '+') || (c == '-'))) {	// @ specials non-pattern +					sLen++; +					isVar = true; +				} +			} +		} +		if (isVar) {	// commit as interpolated variable or normal character +			if (sc.state < SCE_PL_STRING_VAR) +				sc.SetState(sc.state + INTERPOLATE_SHIFT); +			sc.Forward(sLen); +			maxSeg -= sLen; +		} else { +			if (sc.state >= SCE_PL_STRING_VAR) +				sc.SetState(sc.state - INTERPOLATE_SHIFT); +			sc.Forward(); +			maxSeg--; +		} +	} +	if (sc.state >= SCE_PL_STRING_VAR) +		sc.SetState(sc.state - INTERPOLATE_SHIFT); +} +  void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {  	LexAccessor styler(pAccess); @@ -434,8 +523,6 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  	reWords.Set("elsif if split while");  	// charset classes -	CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); -	CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);  	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");  	// lexing of "%*</" operators is non-trivial; these are missing in the set below  	CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~"); @@ -450,7 +537,7 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  	CharacterSet &setPOD = setModifiers;  	CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");  	CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_"); -	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];"); +	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];");  	// for format identifiers  	CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");  	CharacterSet &setFormat = setHereDocDelim; @@ -520,10 +607,13 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  	// Includes strings (may be multi-line), numbers (additional state), format  	// bodies, as well as POD sections.  	if (initStyle == SCE_PL_HERE_Q -	        || initStyle == SCE_PL_HERE_QQ -	        || initStyle == SCE_PL_HERE_QX -	        || initStyle == SCE_PL_FORMAT +	    || initStyle == SCE_PL_HERE_QQ +	    || initStyle == SCE_PL_HERE_QX +	    || initStyle == SCE_PL_FORMAT +	    || initStyle == SCE_PL_HERE_QQ_VAR +	    || initStyle == SCE_PL_HERE_QX_VAR  	   ) { +		// backtrack through multiple styles to reach the delimiter start  		int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;  		while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {  			startPos--; @@ -531,15 +621,34 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  		startPos = styler.LineStart(styler.GetLine(startPos));  		initStyle = styler.StyleAt(startPos - 1);  	} -	if (initStyle == SCE_PL_STRING_Q -	        || initStyle == SCE_PL_STRING_QQ -	        || initStyle == SCE_PL_STRING_QX -	        || initStyle == SCE_PL_STRING_QR +	if (initStyle == SCE_PL_STRING +	    || initStyle == SCE_PL_STRING_QQ +	    || initStyle == SCE_PL_BACKTICKS +	    || initStyle == SCE_PL_STRING_QX +	    || initStyle == SCE_PL_REGEX +	    || initStyle == SCE_PL_STRING_QR +	    || initStyle == SCE_PL_REGSUBST +	    || initStyle == SCE_PL_STRING_VAR +	    || initStyle == SCE_PL_STRING_QQ_VAR +	    || initStyle == SCE_PL_BACKTICKS_VAR +	    || initStyle == SCE_PL_STRING_QX_VAR +	    || initStyle == SCE_PL_REGEX_VAR +	    || initStyle == SCE_PL_STRING_QR_VAR +	    || initStyle == SCE_PL_REGSUBST_VAR +	   ) { +		// for interpolation, must backtrack through a mix of two different styles +		int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ? +			initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT; +		while (startPos > 1) { +			int st = styler.StyleAt(startPos - 1); +			if ((st != initStyle) && (st != otherStyle)) +				break; +			startPos--; +		} +		initStyle = SCE_PL_DEFAULT; +	} else if (initStyle == SCE_PL_STRING_Q  	        || initStyle == SCE_PL_STRING_QW -	        || initStyle == SCE_PL_REGEX -	        || initStyle == SCE_PL_REGSUBST -	        || initStyle == SCE_PL_STRING -	        || initStyle == SCE_PL_BACKTICKS +	        || initStyle == SCE_PL_XLAT  	        || initStyle == SCE_PL_CHARACTER  	        || initStyle == SCE_PL_NUMBER  	        || initStyle == SCE_PL_IDENTIFIER @@ -770,17 +879,48 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  			break;  		case SCE_PL_HERE_Q:  		case SCE_PL_HERE_QQ: -		case SCE_PL_HERE_QX: { -				// also implies HereDoc.State == 2 -				sc.Complete(); -				while (!sc.atLineEnd) -					sc.Forward(); -				char s[HERE_DELIM_MAX]; -				sc.GetCurrent(s, sizeof(s)); -				if (isMatch(HereDoc.Delimiter, s)) { +		case SCE_PL_HERE_QX: +			// also implies HereDoc.State == 2 +			sc.Complete(); +			if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) { +				int c = sc.GetRelative(HereDoc.DelimiterLength); +				if (c == '\r' || c == '\n') {	// peek first, do not consume match +					sc.Forward(HereDoc.DelimiterLength);  					sc.SetState(SCE_PL_DEFAULT);  					backFlag = BACK_NONE;  					HereDoc.State = 0; +					if (!sc.atLineEnd) +						sc.Forward(); +					break; +				} +			} +			if (sc.state == SCE_PL_HERE_Q) {	// \EOF and 'EOF' non-interpolated +				while (!sc.atLineEnd) +					sc.Forward(); +				break; +			} +			while (!sc.atLineEnd) {		// "EOF" and `EOF` interpolated +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (c == '\\') { +						endType = 1; break; +					} else if (c == '\r' || c == '\n') { +						endType = 2; break; +					} +					s++; +				} +				if (s > 0)	// process non-empty segments +					InterpolateSegment(sc, s); +				if (endType == 1) { +					sc.Forward(); +					// \ at end-of-line does not appear to have any effect, skip +					if (sc.ch != '\r' && sc.ch != '\n') +						sc.Forward(); +				} else if (endType == 2) { +					if (!sc.atLineEnd) +						sc.Forward();  				}  			}  			break; @@ -833,45 +973,89 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  					sc.SetState(SCE_PL_DEFAULT);  			} else if (!Quote.Up && !IsASpace(sc.ch)) {  				Quote.Open(sc.ch); -			} else if (sc.ch == '\\' && Quote.Up != '\\') { -				sc.Forward(); -			} else if (sc.ch == Quote.Down) { -				Quote.Count--; -				if (Quote.Count == 0) -					Quote.Rep--; -			} else if (sc.ch == Quote.Up) { -				Quote.Count++; +			} else { +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (IsASpace(c)) { +						break; +					} else if (c == '\\' && Quote.Up != '\\') { +						endType = 1; break; +					} else if (c == Quote.Down) { +						Quote.Count--; +						if (Quote.Count == 0) { +							Quote.Rep--; +							break; +						} +					} else if (c == Quote.Up) +						Quote.Count++; +					s++; +				} +				if (s > 0) {	// process non-empty segments +					if (Quote.Up != '\'') { +						InterpolateSegment(sc, s, true); +					} else		// non-interpolated path +						sc.Forward(s); +				} +				if (endType == 1) +					sc.Forward();  			}  			break;  		case SCE_PL_REGSUBST: +		case SCE_PL_XLAT:  			if (Quote.Rep <= 0) {  				if (!setModifiers.Contains(sc.ch))  					sc.SetState(SCE_PL_DEFAULT);  			} else if (!Quote.Up && !IsASpace(sc.ch)) {  				Quote.Open(sc.ch); -			} else if (sc.ch == '\\' && Quote.Up != '\\') { -				sc.Forward(); -			} else if (Quote.Count == 0 && Quote.Rep == 1) { -				// We matched something like s(...) or tr{...}, Perl 5.10 -				// appears to allow almost any character for use as the -				// next delimiters. Whitespace and comments are accepted in -				// between, but we'll limit to whitespace here. -				// For '#', if no whitespace in between, it's a delimiter. -				if (IsASpace(sc.ch)) { -					// Keep going -				} else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) { -					sc.SetState(SCE_PL_DEFAULT); -				} else { -					Quote.Open(sc.ch); +			} else { +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				bool isPattern = (Quote.Rep == 2); +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (c == '\\' && Quote.Up != '\\') { +						endType = 2; break; +					} else if (Quote.Count == 0 && Quote.Rep == 1) { +						// We matched something like s(...) or tr{...}, Perl 5.10 +						// appears to allow almost any character for use as the +						// next delimiters. Whitespace and comments are accepted in +						// between, but we'll limit to whitespace here. +						// For '#', if no whitespace in between, it's a delimiter. +						if (IsASpace(c)) { +							// Keep going +						} else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) { +							endType = 3; +						} else +							Quote.Open(c); +						break; +					} else if (c == Quote.Down) { +						Quote.Count--; +						if (Quote.Count == 0) { +							Quote.Rep--; +							endType = 1; +						} +						if (Quote.Up == Quote.Down) +							Quote.Count++; +						if (endType == 1) +							break; +					} else if (c == Quote.Up) { +						Quote.Count++; +					} else if (IsASpace(c)) +						break; +					s++; +				} +				if (s > 0) {	// process non-empty segments +					if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') { +						InterpolateSegment(sc, s, isPattern); +					} else		// non-interpolated path +						sc.Forward(s);  				} -			} else if (sc.ch == Quote.Down) { -				Quote.Count--; -				if (Quote.Count == 0) -					Quote.Rep--; -				if (Quote.Up == Quote.Down) -					Quote.Count++; -			} else if (sc.ch == Quote.Up) { -				Quote.Count++; +				if (endType == 2) { +					sc.Forward(); +				} else if (endType == 3) +					sc.SetState(SCE_PL_DEFAULT);  			}  			break;  		case SCE_PL_STRING_Q: @@ -883,14 +1067,45 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  		case SCE_PL_BACKTICKS:  			if (!Quote.Down && !IsASpace(sc.ch)) {  				Quote.Open(sc.ch); -			} else if (sc.ch == '\\' && Quote.Up != '\\') { -				sc.Forward(); -			} else if (sc.ch == Quote.Down) { -				Quote.Count--; -				if (Quote.Count == 0) +			} else { +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (IsASpace(c)) { +						break; +					} else if (c == '\\' && Quote.Up != '\\') { +						endType = 2; break; +					} else if (c == Quote.Down) { +						Quote.Count--; +						if (Quote.Count == 0) { +							endType = 3; break; +						} +					} else if (c == Quote.Up) +						Quote.Count++; +					s++; +				} +				if (s > 0) {	// process non-empty segments +					switch (sc.state) { +					case SCE_PL_STRING: +					case SCE_PL_STRING_QQ: +					case SCE_PL_BACKTICKS: +						InterpolateSegment(sc, s); +						break; +					case SCE_PL_STRING_QX: +						if (Quote.Up != '\'') { +							InterpolateSegment(sc, s); +							break; +						} +						// (continued for ' delim) +					default:	// non-interpolated path +						sc.Forward(s); +					} +				} +				if (endType == 2) { +					sc.Forward(); +				} else if (endType == 3)  					sc.ForwardSetState(SCE_PL_DEFAULT); -			} else if (sc.ch == Quote.Up) { -				Quote.Count++;  			}  			break;  		case SCE_PL_SUB_PROTOTYPE: { @@ -910,12 +1125,13 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  			break;  		case SCE_PL_FORMAT: {  				sc.Complete(); +				if (sc.Match('.')) { +					sc.Forward(); +					if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) +						sc.SetState(SCE_PL_DEFAULT); +				}  				while (!sc.atLineEnd)  					sc.Forward(); -				char s[10]; -				sc.GetCurrent(s, sizeof(s)); -				if (isMatch(".", s)) -					sc.SetState(SCE_PL_DEFAULT);  			}  			break;  		case SCE_PL_ERROR: @@ -1000,9 +1216,9 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  				numState = PERLNUM_DECIMAL;  				dotCount = 0;  				if (sc.ch == '0') {		// hex,bin,octal -					if (sc.chNext == 'x') { +					if (sc.chNext == 'x' || sc.chNext == 'X') {  						numState = PERLNUM_HEX; -					} else if (sc.chNext == 'b') { +					} else if (sc.chNext == 'b' || sc.chNext == 'B') {  						numState = PERLNUM_BINARY;  					} else if (IsADigit(sc.chNext)) {  						numState = PERLNUM_OCTAL; @@ -1032,10 +1248,10 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  					sc.ChangeState(SCE_PL_STRING_Q);  					Quote.New();  				} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { -					sc.ChangeState(SCE_PL_REGSUBST); +					sc.ChangeState(SCE_PL_XLAT);  					Quote.New(2);  				} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { -					sc.ChangeState(SCE_PL_REGSUBST); +					sc.ChangeState(SCE_PL_XLAT);  					Quote.New(2);  					sc.Forward();  					fw++; @@ -1127,7 +1343,6 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  				bool isHereDoc = sc.Match('<', '<');  				bool hereDocSpace = false;		// for: SCALAR [whitespace] '<<'  				unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0; -				unsigned int bkend;  				sc.Complete();  				styler.Flush();  				if (styler.StyleAt(bk) == SCE_PL_DEFAULT) @@ -1196,7 +1411,7 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  							// keywords always forced as /PATTERN/: split, if, elsif, while  							// everything else /PATTERN/ unless digit/space immediately after '/'  							// for '//', defined-or favoured unless special keywords -							bkend = bk + 1; +							unsigned int bkend = bk + 1;  							while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {  								bk--;  							} | 
