diff options
author | nyamatongwe <unknown> | 2006-08-12 13:09:21 +0000 |
---|---|---|
committer | nyamatongwe <unknown> | 2006-08-12 13:09:21 +0000 |
commit | a535be72948b03726dda1b1cba03bcdae28dfe36 (patch) | |
tree | 9558720cc51f80c24a0b8fce4e124937055ac8ab /src/LexPerl.cxx | |
parent | 70a65327fb77d24d757e22016c3219dbf418d29e (diff) | |
download | scintilla-mirror-a535be72948b03726dda1b1cba03bcdae28dfe36.tar.gz |
Identifiers allowed to contain UTF-8.
Diffstat (limited to 'src/LexPerl.cxx')
-rw-r--r-- | src/LexPerl.cxx | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx index 975fa79ad..0c10f4ee0 100644 --- a/src/LexPerl.cxx +++ b/src/LexPerl.cxx @@ -68,14 +68,22 @@ static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywor return keywords.InList(s); } +// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values +// Note: iswordchar() was used in only one place in LexPerl, it is +// unnecessary as '.' is processed as the concatenation operator, so +// only isWordStart() is used in LexPerl + +static inline bool isWordStart(char ch) { + return ch < 0 || isalnum(ch) || ch == '_'; +} + static inline bool isEndVar(char ch) { - return !isalnum(ch) && ch != '#' && ch != '$' && + return !(ch < 0) && !isalnum(ch) && ch != '#' && ch != '$' && ch != '_' && ch != '\''; } - static inline bool isNonQuote(char ch) { - return isalnum(ch) || ch == '_'; + return ch < 0 || isalnum(ch) || ch == '_'; } static inline char actualNumStyle(int numberStyle) { @@ -306,7 +314,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, } else if (ch == 'v') { // vector numState = PERLNUM_V_VECTOR; } - } else if (iswordstart(ch)) { + } else if (isWordStart(ch)) { // if immediately prefixed by '::', always a bareword state = SCE_PL_WORD; if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') { @@ -338,7 +346,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, Quote.New(1); kw++; } else if (ch == 'x' && (chNext == '=' || // repetition - (chNext != '_' && !isalnum(chNext)) || + !isWordStart(chNext) || (isdigit(chPrev) && isdigit(chNext)))) { state = SCE_PL_OPERATOR; } @@ -347,7 +355,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, // otherwise it is always a bareword and we skip a lot of scanning // note: keywords assumed to be limited to [_a-zA-Z] only if (state == SCE_PL_WORD) { - while (iswordstart(styler.SafeGetCharAt(kw))) kw++; + while (isWordStart(styler.SafeGetCharAt(kw))) kw++; if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) { state = SCE_PL_IDENTIFIER; } @@ -403,7 +411,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, backflag = BACK_NONE; // an identifier or bareword if (state == SCE_PL_IDENTIFIER) { - if ((!iswordchar(chNext) && chNext != '\'') + if ((!isWordStart(chNext) && chNext != '\'') || (chNext == '.' && chNext2 == '.')) { // We need that if length of word == 1! // This test is copied from the SCE_PL_WORD handler. @@ -476,7 +484,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, backflag = BACK_NONE; } else if (ch == '@') { if (isalpha(chNext) || chNext == '#' || chNext == '$' - || chNext == '_' || chNext == '+' || chNext == '-') { + || chNext == '_' || chNext == '+' || chNext == '-' + || chNext < 0) { state = SCE_PL_ARRAY; } else if (chNext == ':' && chNext2 == ':') { state = SCE_PL_ARRAY; @@ -491,7 +500,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, backflag = BACK_NONE; } else if (ch == '%') { if (isalpha(chNext) || chNext == '#' || chNext == '$' - || chNext == '_' || chNext == '!' || chNext == '^') { + || chNext == '_' || chNext == '!' || chNext == '^' + || chNext < 0) { state = SCE_PL_HASH; i++; ch = chNext; @@ -516,8 +526,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, i += 2; ch = styler.SafeGetCharAt(i); chNext = styler.SafeGetCharAt(i + 1); - } else if (isalpha(chNext) || chNext == '_' || - NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) { + } else if (isalpha(chNext) || chNext == '_' + || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch) + || chNext < 0) { state = SCE_PL_SYMBOLTABLE; i++; ch = chNext; @@ -764,9 +775,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, if (!isdigit(chNext)) { goto numAtEnd; } - } else if (isalnum(ch)) { + } else if (isalnum(ch) || ch < 0) { if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { - if (isalpha(ch)) { + if (isalpha(ch) || ch < 0) { if (dotCount == 0) { // change to word state = SCE_PL_IDENTIFIER; } else { // vector then word @@ -814,7 +825,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, goto restartLexer; } } else if (state == SCE_PL_IDENTIFIER) { - if (!iswordstart(chNext) && chNext != '\'') { + if (!isWordStart(chNext) && chNext != '\'') { styler.ColourTo(i, SCE_PL_IDENTIFIER); state = SCE_PL_DEFAULT; ch = ' '; @@ -1044,7 +1055,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, if (isspacechar(ch)) { // Keep going } - else if (isalnum(ch)) { + else if (isalnum(ch) || ch < 0) { styler.ColourTo(i, state); state = SCE_PL_DEFAULT; ch = ' '; |