diff options
| -rw-r--r-- | src/LexPerl.cxx | 199 | 
1 files changed, 113 insertions, 86 deletions
| diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx index 8acfdbb1f..cb9152eca 100644 --- a/src/LexPerl.cxx +++ b/src/LexPerl.cxx @@ -29,6 +29,10 @@  #define PERLNUM_V_VECTOR 7  #define PERLNUM_BAD 8 +#define BACK_NONE 0         // lookback state for bareword disambiguation: +#define BACK_OPERATOR 1     // whitespace/comments are insignificant +#define BACK_KEYWORD 2      // operators/keywords are needed for disambiguation +  #define HERE_DELIM_MAX 256  static inline bool isEOLChar(char ch) { @@ -55,17 +59,13 @@ static inline bool isPerlOperator(char ch) {  	return false;  } -static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { +static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {  	char s[100]; -	for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) { -		s[i] = styler[start + i]; -		s[i + 1] = '\0'; -	} -	char chAttr = SCE_PL_IDENTIFIER; -	if (keywords.InList(s)) -		chAttr = SCE_PL_WORD; -	styler.ColourTo(end, chAttr); -	return chAttr; +    unsigned int i, len = end - start; +    if (len > 30) { len = 30; } +	for (i = 0; i < len; i++, start++) s[i] = styler[start]; +    s[i] = '\0'; +	return keywords.InList(s);  }  static inline bool isEndVar(char ch) { @@ -205,6 +205,22 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  		state = SCE_PL_DEFAULT;  	} +    // lookback at start of lexing to set proper state for backflag +    // after this, they are updated when elements are lexed +    int backflag = BACK_NONE; +    unsigned int backPos = startPos; +    if (backPos > 0) { +        backPos--; +        int sty = SCE_PL_DEFAULT; +        while ((backPos > 0) && (sty = styler.StyleAt(backPos), +               sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE)) +            backPos--; +        if (sty == SCE_PL_OPERATOR) +            backflag = BACK_OPERATOR; +        else if (sty == SCE_PL_WORD) +            backflag = BACK_KEYWORD; +    } +  	styler.StartAt(startPos);  	char chPrev = styler.SafeGetCharAt(startPos - 1);  	if (startPos == 0) @@ -271,6 +287,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  			if (isdigit(ch) || (isdigit(chNext) &&  				(ch == '.' || ch == 'v'))) {  				state = SCE_PL_NUMBER; +                backflag = BACK_NONE;  				numState = PERLNUM_DECIMAL;  				dotCount = 0;  				if (ch == '0') {	// hex,bin,octal @@ -292,11 +309,11 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  			} else if (iswordstart(ch)) {                  // if immediately prefixed by '::', always a bareword                  state = SCE_PL_WORD; -                if (styler.SafeGetCharAt(i - 1) == ':' && styler.SafeGetCharAt(i - 2) == ':') { +                if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {                      state = SCE_PL_IDENTIFIER;                  } +                unsigned int kw = i + 1;                  // first check for possible quote-like delimiter -                bool dblchar = false;  				if (ch == 's' && !isNonQuote(chNext)) {  					state = SCE_PL_REGSUBST;  					Quote.New(2); @@ -312,99 +329,111 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				} else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {  					state = SCE_PL_REGSUBST;  					Quote.New(2); -                    dblchar = true; +                    kw++;  				} else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {  					if      (chNext == 'q') state = SCE_PL_STRING_QQ;  					else if (chNext == 'x') state = SCE_PL_STRING_QX;  					else if (chNext == 'r') state = SCE_PL_STRING_QR;  					else if (chNext == 'w') state = SCE_PL_STRING_QW;  					Quote.New(1); -                    dblchar = true; +                    kw++;  				} else if (ch == 'x' && (chNext == '=' ||	// repetition                             (chNext != '_' && !isalnum(chNext)) ||                             (isdigit(chPrev) && isdigit(chNext)))) {                      state = SCE_PL_OPERATOR;                  } -                // for quote-like delimiters, attempt to disambiguate +                // if potentially a keyword, scan forward and grab word, then check +                // if it's really one; if yes, disambiguation test is performed +                // otherwise it is always a bareword and we skip a lot of scanning +                // note: keywords assumed to be limited to [_a-zA-Z] only +                if (state == SCE_PL_WORD) { +                    while (iswordstart(styler.SafeGetCharAt(kw))) kw++; +                    if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) { +                        state = SCE_PL_IDENTIFIER; +                    } +                } +                // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this +                // for quote-like delimiters/keywords, attempt to disambiguate                  // to select for bareword, change state -> SCE_PL_IDENTIFIER -                // also helps to disambiguate keywords/barewords -                if (i > 0) { -                    unsigned int j = i - 1; +                if (state != SCE_PL_IDENTIFIER && i > 0) { +                    unsigned int j = i;                      bool moreback = false;      // true if passed newline/comments                      bool brace = false;         // true if opening brace found                      char ch2; -                    styler.Flush(); -                    // first look backwards past whitespace/comments -                    while ((j > 0) && (styler.StyleAt(j) == SCE_PL_DEFAULT -                           || styler.StyleAt(j) == SCE_PL_COMMENTLINE)) { -                        if (isEOLChar(styler.SafeGetCharAt(j)) -                            || styler.StyleAt(j) == SCE_PL_COMMENTLINE) -                            moreback = true; -                        j--; -                    } -                    if (styler.StyleAt(j) != SCE_PL_DEFAULT -                        && styler.StyleAt(j) != SCE_PL_COMMENTLINE) { +                    // first look backwards past whitespace/comments for EOLs +                    // if BACK_NONE, neither operator nor keyword, so skip test +                    if (backflag != BACK_NONE) { +                        while (--j > backPos) { +                            if (isEOLChar(styler.SafeGetCharAt(j))) +                                moreback = true; +                        }                          ch2 = styler.SafeGetCharAt(j); -                        if (!moreback && ch2 == '{') { +                        if (ch2 == '{' && !moreback) {                              // {bareword: possible variable spec                              brace = true;                          } else if ((ch2 == '&')                                  // &bareword: subroutine call -                                || (ch2 == '>' && j >= 1 -                                    && styler.SafeGetCharAt(j - 1) == '-') +                                || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')                                  // ->bareword: part of variable spec -                                || (styler.StyleAt(j) == SCE_PL_WORD && j >= 2 -                                    && styler.Match(j - 2, "sub"))) { +                                || (ch2 == 'b' && styler.Match(j - 2, "su"))) {                                  // sub bareword: subroutine declaration +                                // (implied BACK_KEYWORD, no keywords end in 'sub'!)                              state = SCE_PL_IDENTIFIER;                          } -                    } -                    // next look forward past tabs/spaces only -                    // skip past word first -                    j = i; -                    while (iswordchar(styler.SafeGetCharAt(j + 1))) j++; -                    if (j + 1 < lengthDoc) { -                        do { -                            ch2 = styler.SafeGetCharAt(++j); -                        } while ((j + 1 < lengthDoc) && (ch2 == ' ' || ch2 == '\t')); -                        if (ch2 != ' ' && ch2 != '\t') { -                            if ((brace && ch2 == '}') +                        // if status still ambiguous, look forward after word past +                        // tabs/spaces only; if ch2 isn't one of '[{(,' it can never +                        // match anything, so skip the whole thing +                        j = kw; +                        if (state != SCE_PL_IDENTIFIER +                            && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',') +                            && kw < lengthDoc) { +                            while (ch2 = styler.SafeGetCharAt(j), +                                   (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) { +                                j++; +                            } +                            if ((ch2 == '}' && brace)                               // {bareword}: variable spec -                             || (ch2 == '=' && j + 1 < lengthDoc -                                 && styler.SafeGetCharAt(j + 1) == '>')) { -                             // bareword=>: hash literal +                             || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) { +                             // [{(, bareword=>: hash literal                                  state = SCE_PL_IDENTIFIER;                              }                          }                      }                  } -                // if enters with a state of SCE_PL_IDENTIFIER, it has no chance -                // of becoming a keyword; otherwise it might be a keyword -                if (state == SCE_PL_IDENTIFIER || state == SCE_PL_WORD) { +                backflag = BACK_NONE; +                // an identifier or bareword +                if (state == SCE_PL_IDENTIFIER) {                      if ((!iswordchar(chNext) && chNext != '\'')                          || (chNext == '.' && chNext2 == '.')) {                          // We need that if length of word == 1!                          // This test is copied from the SCE_PL_WORD handler. -                        if (state == SCE_PL_WORD) { -                            classifyWordPerl(styler.GetStartSegment(), i, keywords, styler); -                        } else { -                            styler.ColourTo(i, SCE_PL_IDENTIFIER); -                        } +                        styler.ColourTo(i, SCE_PL_IDENTIFIER);                          state = SCE_PL_DEFAULT;                      } -                // either quote-like operators or repetition operator -                } else { -                    if (state == SCE_PL_OPERATOR) { -                        // repetition operator 'x' -                        styler.ColourTo(i, SCE_PL_OPERATOR); -                        state = SCE_PL_DEFAULT; +                // a keyword +                } else if (state == SCE_PL_WORD) { +                    i = kw - 1; +                    if (ch == '_' && chNext == '_' && +                        (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__") +                      || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) { +                        styler.ColourTo(i, SCE_PL_DATASECTION); +                        state = SCE_PL_DATASECTION;                      } else { -                        // quote-like delimiter, skip one char if double-char delimiter -                        if (dblchar) { -                            i++; -                            chNext = chNext2; -                        } +                        styler.ColourTo(i, SCE_PL_WORD); +                        state = SCE_PL_DEFAULT; +                        backflag = BACK_KEYWORD; +                        backPos = i;                      } +                    ch = styler.SafeGetCharAt(i); +                    chNext = styler.SafeGetCharAt(i + 1); +                // a repetition operator 'x' +                } else if (state == SCE_PL_OPERATOR) { +                    styler.ColourTo(i, SCE_PL_OPERATOR); +                    state = SCE_PL_DEFAULT; +                // quote-like delimiter, skip one char if double-char delimiter +                } else { +                    i = kw - 1; +                    chNext = styler.SafeGetCharAt(i + 1);                  }  			} else if (ch == '#') {  				state = SCE_PL_COMMENTLINE; @@ -412,6 +441,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				state = SCE_PL_STRING;  				Quote.New(1);  				Quote.Open(ch); +                backflag = BACK_NONE;  			} else if (ch == '\'') {  				if (chPrev == '&') {  					// Archaic call @@ -421,10 +451,12 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					Quote.New(1);  					Quote.Open(ch);  				} +                backflag = BACK_NONE;  			} else if (ch == '`') {  				state = SCE_PL_BACKTICKS;  				Quote.New(1);  				Quote.Open(ch); +                backflag = BACK_NONE;  			} else if (ch == '$') {  				if ((chNext == '{') || isspacechar(chNext)) {  					styler.ColourTo(i, SCE_PL_SCALAR); @@ -440,6 +472,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  						chNext = chNext2;  					}  				} +                backflag = BACK_NONE;  			} else if (ch == '@') {  				if (isalpha(chNext) || chNext == '#' || chNext == '$'  					|| chNext == '_' || chNext == '+' || chNext == '-') { @@ -449,6 +482,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				} else {  					styler.ColourTo(i, SCE_PL_ARRAY);  				} +                backflag = BACK_NONE;  			} else if (ch == '%') {  				if (isalpha(chNext) || chNext == '#' || chNext == '$'                      || chNext == '_' || chNext == '!' || chNext == '^') { @@ -461,6 +495,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				} else {  					styler.ColourTo(i, SCE_PL_OPERATOR);  				} +                backflag = BACK_NONE;  			} else if (ch == '*') {                  char strch[2];                  strch[0] = chNext; @@ -481,6 +516,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					}  					styler.ColourTo(i, SCE_PL_OPERATOR);  				} +                backflag = BACK_NONE;  			} else if (ch == '/' || (ch == '<' && chNext == '<')) {  				// Explicit backward peeking to set a consistent preferRE for  				// any slash found, so no longer need to track preferRE state. @@ -626,6 +662,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                          styler.ColourTo(i, SCE_PL_OPERATOR);                      }                  } +                backflag = BACK_NONE;  			} else if (ch == '<') {  				// looks forward for matching > on same line  				unsigned int fw = i + 1; @@ -651,10 +688,12 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					fw++;  				}  				styler.ColourTo(i, SCE_PL_OPERATOR); +                backflag = BACK_NONE;  			} else if (ch == '='	// POD  			           && isalpha(chNext)  			           && (isEOLChar(chPrev))) {  				state = SCE_PL_POD; +                backflag = BACK_NONE;  				//sookedpos = 0;  				//sooked[sookedpos] = '\0';  			} else if (ch == '-'	// file test operators @@ -665,6 +704,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				i++;  				ch = chNext;  				chNext = chNext2; +                backflag = BACK_NONE;  			} else if (isPerlOperator(ch)) {  				if (ch == '.' && chNext == '.') { // .. and ...  					i++; @@ -674,6 +714,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					chNext = styler.SafeGetCharAt(i + 1);  				}  				styler.ColourTo(i, SCE_PL_OPERATOR); +                backflag = BACK_OPERATOR; +                backPos = i;  			} else {  				// keep colouring defaults to make restart easier  				styler.ColourTo(i, SCE_PL_DEFAULT); @@ -710,7 +752,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {  					if (isalpha(ch)) {  						if (dotCount == 0) { // change to word -							state = SCE_PL_WORD; +							state = SCE_PL_IDENTIFIER;  						} else { // vector then word  							goto numAtEnd;  						} @@ -755,25 +797,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				state = SCE_PL_DEFAULT;  				goto restartLexer;  			} -		} else if (state == SCE_PL_WORD) { -			if ((!iswordchar(chNext) && chNext != '\'') -				|| chNext == '.') { -				// ".." is always an operator if preceded by a SCE_PL_WORD. -				// "." never used in Perl variable names -				// Archaic Perl has quotes inside names -				if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__") -				 || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) { -					styler.ColourTo(i, SCE_PL_DATASECTION); -					state = SCE_PL_DATASECTION; -				} else { -					classifyWordPerl(styler.GetStartSegment(), i, keywords, styler); -					state = SCE_PL_DEFAULT; -					ch = ' '; -				} -			}  		} else if (state == SCE_PL_IDENTIFIER) { -			if ((!iswordchar(chNext) && chNext != '\'') -				|| chNext == '.') { +			if (!iswordstart(chNext) && chNext != '\'') {  				styler.ColourTo(i, SCE_PL_IDENTIFIER);  				state = SCE_PL_DEFAULT;  				ch = ' '; @@ -855,6 +880,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					}  				} else if (HereDoc.State == 1) { // collect the delimiter +                    backflag = BACK_NONE;  					if (HereDoc.Quoted) { // a quoted here-doc delimiter  						if (ch == HereDoc.Quote) { // closing quote => end of delimiter  							styler.ColourTo(i, state); @@ -893,6 +919,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					if (isEOLChar(ch)) {  						styler.ColourTo(i - 1, state);  						state = SCE_PL_DEFAULT; +                        backflag = BACK_NONE;  						HereDoc.State = 0;  						goto restartLexer;  					} | 
