diff options
| -rw-r--r-- | src/LexPerl.cxx | 41 | 
1 files changed, 26 insertions, 15 deletions
| diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx index 975fa79ad..0c10f4ee0 100644 --- a/src/LexPerl.cxx +++ b/src/LexPerl.cxx @@ -68,14 +68,22 @@ static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywor  	return keywords.InList(s);  } +// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values +// Note: iswordchar() was used in only one place in LexPerl, it is +// unnecessary as '.' is processed as the concatenation operator, so +// only isWordStart() is used in LexPerl + +static inline bool isWordStart(char ch) { +	return ch < 0 || isalnum(ch) || ch == '_'; +} +  static inline bool isEndVar(char ch) { -	return !isalnum(ch) && ch != '#' && ch != '$' && +	return !(ch < 0) && !isalnum(ch) && ch != '#' && ch != '$' &&  	       ch != '_' && ch != '\'';  } -  static inline bool isNonQuote(char ch) { -	return isalnum(ch) || ch == '_'; +	return ch < 0 || isalnum(ch) || ch == '_';  }  static inline char actualNumStyle(int numberStyle) { @@ -306,7 +314,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				} else if (ch == 'v') {	// vector  					numState = PERLNUM_V_VECTOR;  				} -			} else if (iswordstart(ch)) { +			} else if (isWordStart(ch)) {                  // if immediately prefixed by '::', always a bareword                  state = SCE_PL_WORD;                  if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') { @@ -338,7 +346,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  					Quote.New(1);                      kw++;  				} else if (ch == 'x' && (chNext == '=' ||	// repetition -                           (chNext != '_' && !isalnum(chNext)) || +                           !isWordStart(chNext) ||                             (isdigit(chPrev) && isdigit(chNext)))) {                      state = SCE_PL_OPERATOR;                  } @@ -347,7 +355,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                  // otherwise it is always a bareword and we skip a lot of scanning                  // note: keywords assumed to be limited to [_a-zA-Z] only                  if (state == SCE_PL_WORD) { -                    while (iswordstart(styler.SafeGetCharAt(kw))) kw++; +                    while (isWordStart(styler.SafeGetCharAt(kw))) kw++;                      if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {                          state = SCE_PL_IDENTIFIER;                      } @@ -403,7 +411,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                  backflag = BACK_NONE;                  // an identifier or bareword                  if (state == SCE_PL_IDENTIFIER) { -                    if ((!iswordchar(chNext) && chNext != '\'') +                    if ((!isWordStart(chNext) && chNext != '\'')                          || (chNext == '.' && chNext2 == '.')) {                          // We need that if length of word == 1!                          // This test is copied from the SCE_PL_WORD handler. @@ -476,7 +484,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                  backflag = BACK_NONE;  			} else if (ch == '@') {  				if (isalpha(chNext) || chNext == '#' || chNext == '$' -					|| chNext == '_' || chNext == '+' || chNext == '-') { +					|| chNext == '_' || chNext == '+' || chNext == '-' +                    || chNext < 0) {  					state = SCE_PL_ARRAY;                  } else if (chNext == ':' && chNext2 == ':') {                      state = SCE_PL_ARRAY; @@ -491,7 +500,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                  backflag = BACK_NONE;  			} else if (ch == '%') {  				if (isalpha(chNext) || chNext == '#' || chNext == '$' -                    || chNext == '_' || chNext == '!' || chNext == '^') { +                    || chNext == '_' || chNext == '!' || chNext == '^' +                    || chNext < 0) {  					state = SCE_PL_HASH;                      i++;                      ch = chNext; @@ -516,8 +526,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,                      i += 2;                      ch = styler.SafeGetCharAt(i);                      chNext = styler.SafeGetCharAt(i + 1); -				} else if (isalpha(chNext) || chNext == '_' || -                    NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) { +				} else if (isalpha(chNext) || chNext == '_' +                        || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch) +                        || chNext < 0) {  					state = SCE_PL_SYMBOLTABLE;                      i++;                      ch = chNext; @@ -764,9 +775,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				if (!isdigit(chNext)) {  					goto numAtEnd;  				} -			} else if (isalnum(ch)) { +			} else if (isalnum(ch) || ch < 0) {  				if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { -					if (isalpha(ch)) { +					if (isalpha(ch) || ch < 0) {  						if (dotCount == 0) { // change to word  							state = SCE_PL_IDENTIFIER;  						} else { // vector then word @@ -814,7 +825,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  				goto restartLexer;  			}  		} else if (state == SCE_PL_IDENTIFIER) { -			if (!iswordstart(chNext) && chNext != '\'') { +			if (!isWordStart(chNext) && chNext != '\'') {  				styler.ColourTo(i, SCE_PL_IDENTIFIER);  				state = SCE_PL_DEFAULT;  				ch = ' '; @@ -1044,7 +1055,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,  						if (isspacechar(ch)) {  							// Keep going  						} -						else if (isalnum(ch)) { +						else if (isalnum(ch) || ch < 0) {  							styler.ColourTo(i, state);  							state = SCE_PL_DEFAULT;  							ch = ' '; | 
