diff options
Diffstat (limited to 'lexers/LexPerl.cxx')
| -rw-r--r-- | lexers/LexPerl.cxx | 342 | 
1 files changed, 282 insertions, 60 deletions
| diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx index 5efc1d636..8a0f6422e 100644 --- a/lexers/LexPerl.cxx +++ b/lexers/LexPerl.cxx @@ -69,6 +69,10 @@ using namespace Scintilla;  #define BACK_OPERATOR	1	// whitespace/comments are insignificant  #define BACK_KEYWORD	2	// operators/keywords are needed for disambiguation +// all interpolated styles are different from their parent styles by a constant difference +// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value +#define	INTERPOLATE_SHIFT	(SCE_PL_STRING_VAR - SCE_PL_STRING) +  static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {  	// old-style keyword matcher; needed because GetCurrent() needs  	// current segment to be committed, but we may abandon early... @@ -352,11 +356,19 @@ struct OptionSetPerl : public OptionSet<OptionsPerl> {  };  class LexerPerl : public ILexer { +	CharacterSet setWordStart; +	CharacterSet setWord; +	CharacterSet setSpecialVar; +	CharacterSet setControlVar;  	WordList keywords;  	OptionsPerl options;  	OptionSetPerl osPerl;  public: -	LexerPerl() { +	LexerPerl() : +		setWordStart(CharacterSet::setAlpha, "_", 0x80, true), +		setWord(CharacterSet::setAlphaNum, "_", 0x80, true), +		setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"), +		setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {  	}  	~LexerPerl() {  	} @@ -390,6 +402,7 @@ public:  	static ILexer *LexerFactoryPerl() {  		return new LexerPerl();  	} +	void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);  };  int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) { @@ -418,6 +431,90 @@ int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {  	return firstModification;  } +void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) { +	// interpolate a segment (with no active backslashes or delimiters within) +	// switch in or out of an interpolation style or continue current style +	// commit variable patterns if found, trim segment, repeat until done +	while (maxSeg > 0) { +		bool isVar = false; +		int sLen = 0; +		if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) { +			// $#[$]*word [$@][$]*word (where word or {word} is always present) +			bool braces = false; +			sLen = 1; +			if (sc.ch == '$' && sc.chNext == '#') {	// starts with $# +				sLen++; +			} +			while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$'))	// >0 $ dereference within +				sLen++; +			if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) {	// { start for {word} +				sLen++; +				braces = true; +			} +			if (maxSeg > sLen) { +				int c = sc.GetRelative(sLen); +				if (setWordStart.Contains(c)) {	// word (various) +					sLen++; +					isVar = true; +					while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen))) +						sLen++; +				} else if (braces && IsADigit(c) && (sLen == 2)) {	// digit for ${digit} +					sLen++; +					isVar = true; +				} +			} +			if (braces) { +				if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) {	// } end for {word} +					sLen++; +				} else +					isVar = false; +			} +		} +		if (!isVar && (maxSeg > 1)) {	// $- or @-specific variable patterns +			sLen = 1; +			int c = sc.chNext; +			if (sc.ch == '$') { +				if (IsADigit(c)) {	// $[0-9] and slurp trailing digits +					sLen++; +					isVar = true; +					while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen))) +						sLen++; +				} else if (setSpecialVar.Contains(c)) {	// $ special variables +					sLen++; +					isVar = true; +				} else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {	// $ additional +					sLen++; +					isVar = true; +				} else if (c == '^') {	// $^A control-char style +					sLen++; +					if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) { +						sLen++; +						isVar = true; +					} +				} +			} else if (sc.ch == '@') { +				if (!isPattern && ((c == '+') || (c == '-'))) {	// @ specials non-pattern +					sLen++; +					isVar = true; +				} +			} +		} +		if (isVar) {	// commit as interpolated variable or normal character +			if (sc.state < SCE_PL_STRING_VAR) +				sc.SetState(sc.state + INTERPOLATE_SHIFT); +			sc.Forward(sLen); +			maxSeg -= sLen; +		} else { +			if (sc.state >= SCE_PL_STRING_VAR) +				sc.SetState(sc.state - INTERPOLATE_SHIFT); +			sc.Forward(); +			maxSeg--; +		} +	} +	if (sc.state >= SCE_PL_STRING_VAR) +		sc.SetState(sc.state - INTERPOLATE_SHIFT); +} +  void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {  	LexAccessor styler(pAccess); @@ -426,8 +523,6 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  	reWords.Set("elsif if split while");  	// charset classes -	CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); -	CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);  	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");  	// lexing of "%*</" operators is non-trivial; these are missing in the set below  	CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~"); @@ -512,10 +607,13 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  	// Includes strings (may be multi-line), numbers (additional state), format  	// bodies, as well as POD sections.  	if (initStyle == SCE_PL_HERE_Q -	        || initStyle == SCE_PL_HERE_QQ -	        || initStyle == SCE_PL_HERE_QX -	        || initStyle == SCE_PL_FORMAT +	    || initStyle == SCE_PL_HERE_QQ +	    || initStyle == SCE_PL_HERE_QX +	    || initStyle == SCE_PL_FORMAT +	    || initStyle == SCE_PL_HERE_QQ_VAR +	    || initStyle == SCE_PL_HERE_QX_VAR  	   ) { +		// backtrack through multiple styles to reach the delimiter start  		int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;  		while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {  			startPos--; @@ -523,15 +621,34 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  		startPos = styler.LineStart(styler.GetLine(startPos));  		initStyle = styler.StyleAt(startPos - 1);  	} -	if (initStyle == SCE_PL_STRING_Q -	        || initStyle == SCE_PL_STRING_QQ -	        || initStyle == SCE_PL_STRING_QX -	        || initStyle == SCE_PL_STRING_QR +	if (initStyle == SCE_PL_STRING +	    || initStyle == SCE_PL_STRING_QQ +	    || initStyle == SCE_PL_BACKTICKS +	    || initStyle == SCE_PL_STRING_QX +	    || initStyle == SCE_PL_REGEX +	    || initStyle == SCE_PL_STRING_QR +	    || initStyle == SCE_PL_REGSUBST +	    || initStyle == SCE_PL_STRING_VAR +	    || initStyle == SCE_PL_STRING_QQ_VAR +	    || initStyle == SCE_PL_BACKTICKS_VAR +	    || initStyle == SCE_PL_STRING_QX_VAR +	    || initStyle == SCE_PL_REGEX_VAR +	    || initStyle == SCE_PL_STRING_QR_VAR +	    || initStyle == SCE_PL_REGSUBST_VAR +	   ) { +		// for interpolation, must backtrack through a mix of two different styles +		int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ? +			initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT; +		while (startPos > 1) { +			int st = styler.StyleAt(startPos - 1); +			if ((st != initStyle) && (st != otherStyle)) +				break; +			startPos--; +		} +		initStyle = SCE_PL_DEFAULT; +	} else if (initStyle == SCE_PL_STRING_Q  	        || initStyle == SCE_PL_STRING_QW -	        || initStyle == SCE_PL_REGEX -	        || initStyle == SCE_PL_REGSUBST -	        || initStyle == SCE_PL_STRING -	        || initStyle == SCE_PL_BACKTICKS +	        || initStyle == SCE_PL_XLAT  	        || initStyle == SCE_PL_CHARACTER  	        || initStyle == SCE_PL_NUMBER  	        || initStyle == SCE_PL_IDENTIFIER @@ -762,19 +879,49 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  			break;  		case SCE_PL_HERE_Q:  		case SCE_PL_HERE_QQ: -		case SCE_PL_HERE_QX: { -				// also implies HereDoc.State == 2 -				sc.Complete(); -				if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) { +		case SCE_PL_HERE_QX: +			// also implies HereDoc.State == 2 +			sc.Complete(); +			if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) { +				int c = sc.GetRelative(HereDoc.DelimiterLength); +				if (c == '\r' || c == '\n') {	// peek first, do not consume match  					sc.Forward(HereDoc.DelimiterLength); -					if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) {  					sc.SetState(SCE_PL_DEFAULT);  					backFlag = BACK_NONE;  					HereDoc.State = 0; +					if (!sc.atLineEnd) +						sc.Forward(); +					break;  				}  			} +			if (sc.state == SCE_PL_HERE_Q) {	// \EOF and 'EOF' non-interpolated  				while (!sc.atLineEnd)  					sc.Forward(); +				break; +			} +			while (!sc.atLineEnd) {		// "EOF" and `EOF` interpolated +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (c == '\\') { +						endType = 1; break; +					} else if (c == '\r' || c == '\n') { +						endType = 2; break; +					} +					s++; +				} +				if (s > 0)	// process non-empty segments +					InterpolateSegment(sc, s); +				if (endType == 1) { +					sc.Forward(); +					// \ at end-of-line does not appear to have any effect, skip +					if (sc.ch != '\r' && sc.ch != '\n') +						sc.Forward(); +				} else if (endType == 2) { +					if (!sc.atLineEnd) +						sc.Forward(); +				}  			}  			break;  		case SCE_PL_POD: @@ -826,45 +973,89 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  					sc.SetState(SCE_PL_DEFAULT);  			} else if (!Quote.Up && !IsASpace(sc.ch)) {  				Quote.Open(sc.ch); -			} else if (sc.ch == '\\' && Quote.Up != '\\') { -				sc.Forward(); -			} else if (sc.ch == Quote.Down) { -				Quote.Count--; -				if (Quote.Count == 0) -					Quote.Rep--; -			} else if (sc.ch == Quote.Up) { -				Quote.Count++; +			} else { +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (IsASpace(c)) { +						break; +					} else if (c == '\\' && Quote.Up != '\\') { +						endType = 1; break; +					} else if (c == Quote.Down) { +						Quote.Count--; +						if (Quote.Count == 0) { +							Quote.Rep--; +							break; +						} +					} else if (c == Quote.Up) +						Quote.Count++; +					s++; +				} +				if (s > 0) {	// process non-empty segments +					if (Quote.Up != '\'') { +						InterpolateSegment(sc, s, true); +					} else		// non-interpolated path +						sc.Forward(s); +				} +				if (endType == 1) +					sc.Forward();  			}  			break;  		case SCE_PL_REGSUBST: +		case SCE_PL_XLAT:  			if (Quote.Rep <= 0) {  				if (!setModifiers.Contains(sc.ch))  					sc.SetState(SCE_PL_DEFAULT);  			} else if (!Quote.Up && !IsASpace(sc.ch)) {  				Quote.Open(sc.ch); -			} else if (sc.ch == '\\' && Quote.Up != '\\') { -				sc.Forward(); -			} else if (Quote.Count == 0 && Quote.Rep == 1) { -				// We matched something like s(...) or tr{...}, Perl 5.10 -				// appears to allow almost any character for use as the -				// next delimiters. Whitespace and comments are accepted in -				// between, but we'll limit to whitespace here. -				// For '#', if no whitespace in between, it's a delimiter. -				if (IsASpace(sc.ch)) { -					// Keep going -				} else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) { -					sc.SetState(SCE_PL_DEFAULT); -				} else { -					Quote.Open(sc.ch); +			} else { +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				bool isPattern = (Quote.Rep == 2); +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (c == '\\' && Quote.Up != '\\') { +						endType = 2; break; +					} else if (Quote.Count == 0 && Quote.Rep == 1) { +						// We matched something like s(...) or tr{...}, Perl 5.10 +						// appears to allow almost any character for use as the +						// next delimiters. Whitespace and comments are accepted in +						// between, but we'll limit to whitespace here. +						// For '#', if no whitespace in between, it's a delimiter. +						if (IsASpace(c)) { +							// Keep going +						} else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) { +							endType = 3; +						} else +							Quote.Open(c); +						break; +					} else if (c == Quote.Down) { +						Quote.Count--; +						if (Quote.Count == 0) { +							Quote.Rep--; +							endType = 1; +						} +						if (Quote.Up == Quote.Down) +							Quote.Count++; +						if (endType == 1) +							break; +					} else if (c == Quote.Up) { +						Quote.Count++; +					} else if (IsASpace(c)) +						break; +					s++;  				} -			} else if (sc.ch == Quote.Down) { -				Quote.Count--; -				if (Quote.Count == 0) -					Quote.Rep--; -				if (Quote.Up == Quote.Down) -					Quote.Count++; -			} else if (sc.ch == Quote.Up) { -				Quote.Count++; +				if (s > 0) {	// process non-empty segments +					if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') { +						InterpolateSegment(sc, s, isPattern); +					} else		// non-interpolated path +						sc.Forward(s); +				} +				if (endType == 2) { +					sc.Forward(); +				} else if (endType == 3) +					sc.SetState(SCE_PL_DEFAULT);  			}  			break;  		case SCE_PL_STRING_Q: @@ -876,14 +1067,45 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  		case SCE_PL_BACKTICKS:  			if (!Quote.Down && !IsASpace(sc.ch)) {  				Quote.Open(sc.ch); -			} else if (sc.ch == '\\' && Quote.Up != '\\') { -				sc.Forward(); -			} else if (sc.ch == Quote.Down) { -				Quote.Count--; -				if (Quote.Count == 0) +			} else { +				int s = 0, endType = 0; +				int maxSeg = endPos - sc.currentPos; +				while (s < maxSeg) {	// scan to break string into segments +					int c = sc.GetRelative(s); +					if (IsASpace(c)) { +						break; +					} else if (c == '\\' && Quote.Up != '\\') { +						endType = 2; break; +					} else if (c == Quote.Down) { +						Quote.Count--; +						if (Quote.Count == 0) { +							endType = 3; break; +						} +					} else if (c == Quote.Up) +						Quote.Count++; +					s++; +				} +				if (s > 0) {	// process non-empty segments +					switch (sc.state) { +					case SCE_PL_STRING: +					case SCE_PL_STRING_QQ: +					case SCE_PL_BACKTICKS: +						InterpolateSegment(sc, s); +						break; +					case SCE_PL_STRING_QX: +						if (Quote.Up != '\'') { +							InterpolateSegment(sc, s); +							break; +						} +						// (continued for ' delim) +					default:	// non-interpolated path +						sc.Forward(s); +					} +				} +				if (endType == 2) { +					sc.Forward(); +				} else if (endType == 3)  					sc.ForwardSetState(SCE_PL_DEFAULT); -			} else if (sc.ch == Quote.Up) { -				Quote.Count++;  			}  			break;  		case SCE_PL_SUB_PROTOTYPE: { @@ -906,8 +1128,8 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  				if (sc.Match('.')) {  					sc.Forward();  					if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) -					sc.SetState(SCE_PL_DEFAULT); -			} +						sc.SetState(SCE_PL_DEFAULT); +				}  				while (!sc.atLineEnd)  					sc.Forward();  			} @@ -1026,10 +1248,10 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,  					sc.ChangeState(SCE_PL_STRING_Q);  					Quote.New();  				} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { -					sc.ChangeState(SCE_PL_REGSUBST); +					sc.ChangeState(SCE_PL_XLAT);  					Quote.New(2);  				} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { -					sc.ChangeState(SCE_PL_REGSUBST); +					sc.ChangeState(SCE_PL_XLAT);  					Quote.New(2);  					sc.Forward();  					fw++; | 
