aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/LexPerl.cxx272
1 files changed, 183 insertions, 89 deletions
diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx
index e8434e0cc..0514c93f1 100644
--- a/src/LexPerl.cxx
+++ b/src/LexPerl.cxx
@@ -2,7 +2,7 @@
/** @file LexPerl.cxx
** Lexer for subset of Perl.
**/
-// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
+// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <stdlib.h>
@@ -19,6 +19,12 @@
#include "Scintilla.h"
#include "SciLexer.h"
+#define PERLNUM_DECIMAL 1
+#define PERLNUM_NON_DEC 2
+#define PERLNUM_FLOAT 3
+#define PERLNUM_VECTOR 4
+#define PERLNUM_V_VECTOR 5
+
static inline bool isEOLChar(char ch) {
return (ch == '\r') || (ch == '\n');
}
@@ -46,18 +52,13 @@ static inline bool isPerlOperator(char ch) {
static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
char s[100];
- bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
s[i] = styler[start + i];
s[i + 1] = '\0';
}
char chAttr = SCE_PL_IDENTIFIER;
- if (wordIsNumber)
- chAttr = SCE_PL_NUMBER;
- else {
- if (keywords.InList(s))
- chAttr = SCE_PL_WORD;
- }
+ if (keywords.InList(s))
+ chAttr = SCE_PL_WORD;
styler.ColourTo(end, chAttr);
return chAttr;
}
@@ -67,6 +68,19 @@ static inline bool isEndVar(char ch) {
ch != '_' && ch != '\'';
}
+static inline char actualNumStyle(int numberStyle) {
+ switch (numberStyle) {
+ case PERLNUM_VECTOR:
+ case PERLNUM_V_VECTOR:
+ return SCE_PL_STRING;
+ case PERLNUM_DECIMAL:
+ case PERLNUM_NON_DEC:
+ case PERLNUM_FLOAT:
+ default:
+ return SCE_PL_NUMBER;
+ }
+}
+
static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
return false;
@@ -146,9 +160,12 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
bool preferRE = true;
sooked[sookedpos] = '\0';
int state = initStyle;
+ int numState = PERLNUM_DECIMAL;
+ int dotCount = 0;
unsigned int lengthDoc = startPos + length;
- // If in a long distance lexical state, seek to the beginning to find quote characters
+ // If in a long distance lexical state, seek to the beginning to find quote characters
+ // Perl strings can be multi-line with embedded newlines.
if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
startPos--;
@@ -163,6 +180,10 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
|| state == SCE_PL_STRING_QW
|| state == SCE_PL_REGEX
|| state == SCE_PL_REGSUBST
+ || state == SCE_PL_STRING
+ || state == SCE_PL_BACKTICKS
+ || state == SCE_PL_CHARACTER
+ || state == SCE_PL_NUMBER
) {
while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
startPos--;
@@ -179,6 +200,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
for (unsigned int i = startPos; i < lengthDoc; i++) {
char ch = chNext;
+ // if the current character is not consumed due to the completion of an
+ // earlier style, lexing can be restarted via a simple goto
+ restartLexer:
chNext = styler.SafeGetCharAt(i + 1);
char chNext2 = styler.SafeGetCharAt(i + 2);
@@ -189,6 +213,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
continue;
}
if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
+ styler.ColourTo(i, state);
chPrev = ch;
continue;
}
@@ -229,7 +254,20 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
}
if (state == SCE_PL_DEFAULT) {
- if (iswordstart(ch)) {
+ if (isdigit(ch) || (isdigit(chNext) &&
+ (ch == '.' || ch == 'v'))) {
+ state = SCE_PL_NUMBER;
+ numState = PERLNUM_DECIMAL;
+ dotCount = 0;
+ preferRE = false;
+ if (ch == '0') { // hex,bin,octal
+ if (chNext == 'x' || chNext == 'b' || isdigit(chNext)) {
+ numState = PERLNUM_NON_DEC;
+ }
+ } else if (ch == 'v') { // vector
+ numState = PERLNUM_V_VECTOR;
+ }
+ } else if (iswordstart(ch)) {
styler.ColourTo(i - 1, state);
if (ch == 's' && !isalnum(chNext)) {
state = SCE_PL_REGSUBST;
@@ -256,6 +294,10 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
i++;
chNext = chNext2;
Quote.New(1);
+ } else if (ch == 'x' && (chNext == '=' || // repetition
+ (chNext != '_' && !isalnum(chNext)))) {
+ preferRE = true;
+ styler.ColourTo(i, SCE_PL_OPERATOR);
} else {
state = SCE_PL_WORD;
preferRE = false;
@@ -268,10 +310,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
}
}
} else if (ch == '#') {
- styler.ColourTo(i - 1, state);
state = SCE_PL_COMMENTLINE;
} else if (ch == '\"') {
- styler.ColourTo(i - 1, state);
state = SCE_PL_STRING;
Quote.New(1);
Quote.Open(ch);
@@ -280,31 +320,35 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
// Archaic call
styler.ColourTo(i, state);
} else {
- styler.ColourTo(i - 1, state);
state = SCE_PL_CHARACTER;
Quote.New(1);
Quote.Open(ch);
}
} else if (ch == '`') {
- styler.ColourTo(i - 1, state);
state = SCE_PL_BACKTICKS;
Quote.New(1);
Quote.Open(ch);
} else if (ch == '$') {
preferRE = false;
- styler.ColourTo(i - 1, state);
if ((chNext == '{') || isspacechar(chNext)) {
styler.ColourTo(i, SCE_PL_SCALAR);
} else {
state = SCE_PL_SCALAR;
- i++;
- ch = chNext;
- chNext = chNext2;
+ if (chNext == '`' && chNext2 == '`') {
+ styler.ColourTo(i - 1, SCE_PL_SCALAR);
+ i += 2;
+ ch = styler.SafeGetCharAt(i);
+ chNext = styler.SafeGetCharAt(i + 1);
+ } else {
+ i++;
+ ch = chNext;
+ chNext = chNext2;
+ }
}
} else if (ch == '@') {
preferRE = false;
- styler.ColourTo(i - 1, state);
- if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
+ if (isalpha(chNext) || chNext == '#' || chNext == '$'
+ || chNext == '_' || chNext == '+') {
state = SCE_PL_ARRAY;
} else if (chNext != '{' && chNext != '[') {
styler.ColourTo(i, SCE_PL_ARRAY);
@@ -315,7 +359,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
}
} else if (ch == '%') {
preferRE = false;
- styler.ColourTo(i - 1, state);
if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
state = SCE_PL_HASH;
} else if (chNext == '{') {
@@ -324,51 +367,124 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
styler.ColourTo(i, SCE_PL_OPERATOR);
}
} else if (ch == '*') {
- styler.ColourTo(i - 1, state);
- state = SCE_PL_SYMBOLTABLE;
+ if (isalpha(chNext) || chNext == '_' || chNext == '{') {
+ state = SCE_PL_SYMBOLTABLE;
+ } else {
+ if (chNext == '*') { // exponentiation
+ i++;
+ ch = chNext;
+ chNext = chNext2;
+ }
+ preferRE = true;
+ styler.ColourTo(i, SCE_PL_OPERATOR);
+ }
} else if (ch == '/' && preferRE) {
- styler.ColourTo(i - 1, state);
state = SCE_PL_REGEX;
Quote.New(1);
Quote.Open(ch);
} else if (ch == '<' && chNext == '<') {
- styler.ColourTo(i - 1, state);
state = SCE_PL_HERE_DELIM;
HereDoc.State = 0;
- } else if (ch == '='
+ } else if (ch == '=' // POD
&& isalpha(chNext)
&& (isEOLChar(chPrev))) {
- styler.ColourTo(i - 1, state);
state = SCE_PL_POD;
sookedpos = 0;
sooked[sookedpos] = '\0';
- } else if (ch == '-'
+ } else if (ch == '-' // file test operators
&& isSingleCharOp(chNext)
&& !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
- styler.ColourTo(i - 1, state);
styler.ColourTo(i + 1, SCE_PL_WORD);
state = SCE_PL_DEFAULT;
preferRE = false;
- i += 2;
- ch = chNext2;
- chNext = chNext2 = styler.SafeGetCharAt(i + 1);
+ i++;
+ ch = chNext;
+ chNext = chNext2;
} else if (isPerlOperator(ch)) {
- if (ch == ')' || ch == ']')
+ if (ch == '.' && chNext == '.') { // .. and ...
+ i++;
+ if (chNext2 == '.') { i++; }
+ state = SCE_PL_DEFAULT;
+ ch = styler.SafeGetCharAt(i);
+ chNext = styler.SafeGetCharAt(i + 1);
+ preferRE = true;
+ } else if (ch == ')' || ch == ']') {
preferRE = false;
- else
+ } else
preferRE = true;
- styler.ColourTo(i - 1, state);
styler.ColourTo(i, SCE_PL_OPERATOR);
+ } else {
+ // keep colouring defaults to make restart easier
+ styler.ColourTo(i, SCE_PL_DEFAULT);
+ }
+ } else if (state == SCE_PL_NUMBER) {
+ if (ch == '.') {
+ if (chNext == '.') {
+ // double dot is always an operator
+ goto numAtEnd;
+ } else if (numState == PERLNUM_NON_DEC || numState == PERLNUM_FLOAT) {
+ // non-decimal number or float exponent, consume next dot
+ styler.ColourTo(i - 1, SCE_PL_NUMBER);
+ styler.ColourTo(i, SCE_PL_OPERATOR);
+ preferRE = true;
+ state = SCE_PL_DEFAULT;
+ } else { // decimal or vectors allows dots
+ dotCount++;
+ if (numState == PERLNUM_DECIMAL) {
+ if (dotCount > 1) {
+ if (isdigit(chNext)) { // really a vector
+ numState = PERLNUM_VECTOR;
+ } else // number then dot
+ goto numAtEnd;
+ }
+ } else { // vectors
+ if (!isdigit(chNext)) // vector then dot
+ goto numAtEnd;
+ }
+ }
+ } else if (isalnum(ch)) {
+ if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
+ if (isalpha(ch)) {
+ if (dotCount == 0) { // change to word
+ preferRE = false;
+ state = SCE_PL_WORD;
+ } else { // vector then word
+ goto numAtEnd;
+ }
+ }
+ } else if (numState == PERLNUM_DECIMAL) {
+ if (ch == 'E' || ch == 'e') { // exponent
+ numState = PERLNUM_FLOAT;
+ if (chNext == '+' || chNext == '-') {
+ i++;
+ ch = chNext;
+ chNext = chNext2;
+ }
+ } else if (!isdigit(ch)) { // number then word
+ goto numAtEnd;
+ }
+ } else if (numState == PERLNUM_FLOAT) {
+ if (!isdigit(ch)) { // float then word
+ goto numAtEnd;
+ }
+ } else {// PERLNUM_NON_DEC
+ // allow alphanum for bin,hex,oct for now
+ }
+ } else {
+ // complete current number or vector
+ numAtEnd:
+ styler.ColourTo(i - 1, actualNumStyle(numState));
+ preferRE = false;
+ state = SCE_PL_DEFAULT;
+ goto restartLexer;
}
} else if (state == SCE_PL_WORD) {
if ((!iswordchar(chNext) && chNext != '\'')
|| (chNext == '.' && chNext2 == '.')) {
// ".." is always an operator if preceded by a SCE_PL_WORD.
// Archaic Perl has quotes inside names
- if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")) {
- styler.ColourTo(i, SCE_PL_DATASECTION);
- state = SCE_PL_DATASECTION;
- } else if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
+ if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
+ || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
styler.ColourTo(i, SCE_PL_DATASECTION);
state = SCE_PL_DATASECTION;
} else {
@@ -383,6 +499,10 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
if (isEOLChar(ch)) {
styler.ColourTo(i - 1, state);
state = SCE_PL_DEFAULT;
+ goto restartLexer;
+ } else if (isEOLChar(chNext)) {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
}
} else if (state == SCE_PL_HERE_DELIM) {
//
@@ -426,7 +546,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
i++;
ch = chNext;
chNext = chNext2;
- } else if (isdigit(chNext)) { // left shift operator if next char is a digit
+ } else if (isdigit(chNext) || chNext == '=') { // left shift << or <<= operator
styler.ColourTo(i, SCE_PL_OPERATOR);
state = SCE_PL_DEFAULT;
HereDoc.State = 0;
@@ -444,9 +564,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
if (ch == HereDoc.Quote) { // closing quote => end of delimiter
styler.ColourTo(i, state);
state = SCE_PL_DEFAULT;
- i++;
- ch = chNext;
- chNext = chNext2;
} else {
if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
i++;
@@ -463,24 +580,27 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
} else {
styler.ColourTo(i - 1, state);
state = SCE_PL_DEFAULT;
+ goto restartLexer;
}
}
if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
styler.ColourTo(i - 1, state);
state = SCE_PL_ERROR;
+ goto restartLexer;
}
}
} else if (HereDoc.State == 2) {
// state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
i += HereDoc.DelimiterLength;
- chNext = styler.SafeGetCharAt(i);
- if (isEOLChar(chNext)) {
+ chPrev = styler.SafeGetCharAt(i - 1);
+ ch = styler.SafeGetCharAt(i);
+ if (isEOLChar(ch)) {
styler.ColourTo(i - 1, state);
state = SCE_PL_DEFAULT;
HereDoc.State = 0;
+ goto restartLexer;
}
- ch = chNext;
chNext = styler.SafeGetCharAt(i + 1);
}
} else if (state == SCE_PL_POD) {
@@ -490,33 +610,30 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
i += 4;
state = SCE_PL_DEFAULT;
ch = styler.SafeGetCharAt(i);
- chNext = styler.SafeGetCharAt(i + 1);
+ //chNext = styler.SafeGetCharAt(i + 1);
+ goto restartLexer;
}
}
- } else if (state == SCE_PL_SCALAR) {
- if (isEndVar(ch)) {
- if (i == (styler.GetStartSegment() + 1)) {
+ } else if (state == SCE_PL_SCALAR // variable names
+ || state == SCE_PL_ARRAY
+ || state == SCE_PL_HASH
+ || state == SCE_PL_SYMBOLTABLE) {
+ if (ch == ':' && chNext == ':') { // skip ::
+ i++;
+ ch = chNext;
+ chNext = chNext2;
+ }
+ else if (isEndVar(ch)) {
+ if ((state == SCE_PL_SCALAR || state == SCE_PL_ARRAY)
+ && i == (styler.GetStartSegment() + 1)) {
// Special variable: $(, $_ etc.
styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
} else {
styler.ColourTo(i - 1, state);
+ state = SCE_PL_DEFAULT;
+ goto restartLexer;
}
- state = SCE_PL_DEFAULT;
- }
- } else if (state == SCE_PL_ARRAY) {
- if (isEndVar(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_PL_DEFAULT;
- }
- } else if (state == SCE_PL_HASH) {
- if (isEndVar(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_PL_DEFAULT;
- }
- } else if (state == SCE_PL_SYMBOLTABLE) {
- if (isEndVar(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_PL_DEFAULT;
}
} else if (state == SCE_PL_REGEX
|| state == SCE_PL_STRING_QR
@@ -643,29 +760,6 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
Quote.Count++;
}
}
-
- if (state == SCE_PL_DEFAULT) { // One of the above succeeded
- if (ch == '#') {
- state = SCE_PL_COMMENTLINE;
- } else if (ch == '\"') {
- state = SCE_PL_STRING;
- Quote.New(1);
- Quote.Open(ch);
- } else if (ch == '\'') {
- state = SCE_PL_CHARACTER;
- Quote.New(1);
- Quote.Open(ch);
- } else if (iswordstart(ch)) {
- state = SCE_PL_WORD;
- preferRE = false;
- } else if (isPerlOperator(ch)) {
- if (ch == ')' || ch == ']')
- preferRE = false;
- else
- preferRE = true;
- styler.ColourTo(i, SCE_PL_OPERATOR);
- }
- }
}
if (state == SCE_PL_ERROR) {
break;