aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authornyamatongwe <unknown>2006-08-12 13:09:21 +0000
committernyamatongwe <unknown>2006-08-12 13:09:21 +0000
commita535be72948b03726dda1b1cba03bcdae28dfe36 (patch)
tree9558720cc51f80c24a0b8fce4e124937055ac8ab /src
parent70a65327fb77d24d757e22016c3219dbf418d29e (diff)
downloadscintilla-mirror-a535be72948b03726dda1b1cba03bcdae28dfe36.tar.gz
Identifiers allowed to contain UTF-8.
Diffstat (limited to 'src')
-rw-r--r--src/LexPerl.cxx41
1 files changed, 26 insertions, 15 deletions
diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx
index 975fa79ad..0c10f4ee0 100644
--- a/src/LexPerl.cxx
+++ b/src/LexPerl.cxx
@@ -68,14 +68,22 @@ static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywor
return keywords.InList(s);
}
+// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values
+// Note: iswordchar() was used in only one place in LexPerl, it is
+// unnecessary as '.' is processed as the concatenation operator, so
+// only isWordStart() is used in LexPerl
+
+static inline bool isWordStart(char ch) {
+ return ch < 0 || isalnum(ch) || ch == '_';
+}
+
static inline bool isEndVar(char ch) {
- return !isalnum(ch) && ch != '#' && ch != '$' &&
+ return !(ch < 0) && !isalnum(ch) && ch != '#' && ch != '$' &&
ch != '_' && ch != '\'';
}
-
static inline bool isNonQuote(char ch) {
- return isalnum(ch) || ch == '_';
+ return ch < 0 || isalnum(ch) || ch == '_';
}
static inline char actualNumStyle(int numberStyle) {
@@ -306,7 +314,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
} else if (ch == 'v') { // vector
numState = PERLNUM_V_VECTOR;
}
- } else if (iswordstart(ch)) {
+ } else if (isWordStart(ch)) {
// if immediately prefixed by '::', always a bareword
state = SCE_PL_WORD;
if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {
@@ -338,7 +346,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
Quote.New(1);
kw++;
} else if (ch == 'x' && (chNext == '=' || // repetition
- (chNext != '_' && !isalnum(chNext)) ||
+ !isWordStart(chNext) ||
(isdigit(chPrev) && isdigit(chNext)))) {
state = SCE_PL_OPERATOR;
}
@@ -347,7 +355,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
// otherwise it is always a bareword and we skip a lot of scanning
// note: keywords assumed to be limited to [_a-zA-Z] only
if (state == SCE_PL_WORD) {
- while (iswordstart(styler.SafeGetCharAt(kw))) kw++;
+ while (isWordStart(styler.SafeGetCharAt(kw))) kw++;
if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {
state = SCE_PL_IDENTIFIER;
}
@@ -403,7 +411,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
backflag = BACK_NONE;
// an identifier or bareword
if (state == SCE_PL_IDENTIFIER) {
- if ((!iswordchar(chNext) && chNext != '\'')
+ if ((!isWordStart(chNext) && chNext != '\'')
|| (chNext == '.' && chNext2 == '.')) {
// We need that if length of word == 1!
// This test is copied from the SCE_PL_WORD handler.
@@ -476,7 +484,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
backflag = BACK_NONE;
} else if (ch == '@') {
if (isalpha(chNext) || chNext == '#' || chNext == '$'
- || chNext == '_' || chNext == '+' || chNext == '-') {
+ || chNext == '_' || chNext == '+' || chNext == '-'
+ || chNext < 0) {
state = SCE_PL_ARRAY;
} else if (chNext == ':' && chNext2 == ':') {
state = SCE_PL_ARRAY;
@@ -491,7 +500,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
backflag = BACK_NONE;
} else if (ch == '%') {
if (isalpha(chNext) || chNext == '#' || chNext == '$'
- || chNext == '_' || chNext == '!' || chNext == '^') {
+ || chNext == '_' || chNext == '!' || chNext == '^'
+ || chNext < 0) {
state = SCE_PL_HASH;
i++;
ch = chNext;
@@ -516,8 +526,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
i += 2;
ch = styler.SafeGetCharAt(i);
chNext = styler.SafeGetCharAt(i + 1);
- } else if (isalpha(chNext) || chNext == '_' ||
- NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) {
+ } else if (isalpha(chNext) || chNext == '_'
+ || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)
+ || chNext < 0) {
state = SCE_PL_SYMBOLTABLE;
i++;
ch = chNext;
@@ -764,9 +775,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
if (!isdigit(chNext)) {
goto numAtEnd;
}
- } else if (isalnum(ch)) {
+ } else if (isalnum(ch) || ch < 0) {
if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
- if (isalpha(ch)) {
+ if (isalpha(ch) || ch < 0) {
if (dotCount == 0) { // change to word
state = SCE_PL_IDENTIFIER;
} else { // vector then word
@@ -814,7 +825,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
goto restartLexer;
}
} else if (state == SCE_PL_IDENTIFIER) {
- if (!iswordstart(chNext) && chNext != '\'') {
+ if (!isWordStart(chNext) && chNext != '\'') {
styler.ColourTo(i, SCE_PL_IDENTIFIER);
state = SCE_PL_DEFAULT;
ch = ' ';
@@ -1044,7 +1055,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
if (isspacechar(ch)) {
// Keep going
}
- else if (isalnum(ch)) {
+ else if (isalnum(ch) || ch < 0) {
styler.ColourTo(i, state);
state = SCE_PL_DEFAULT;
ch = ' ';