aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <unknown>2007-07-14 10:58:32 +0000
committernyamatongwe <unknown>2007-07-14 10:58:32 +0000
commit665de67a3e1e045af6e0131a0e917df8a171d2d5 (patch)
tree95ff937f63be879b9e37d0bd7caa05a7956a99f7
parent79692af6c51bb115e4a8ddb657eda3d713152730 (diff)
downloadscintilla-mirror-665de67a3e1e045af6e0131a0e917df8a171d2d5.tar.gz
Changes from Kein-Hong Man supports fixes bareword issues and underscores
in numeric literals, and handles ^D and ^Z, subroutine prototypes, and formats.
-rw-r--r--include/SciLexer.h3
-rw-r--r--include/Scintilla.iface3
-rw-r--r--src/LexPerl.cxx173
3 files changed, 142 insertions, 37 deletions
diff --git a/include/SciLexer.h b/include/SciLexer.h
index 85fdfe36f..692cc2198 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -317,6 +317,9 @@
#define SCE_PL_STRING_QR 29
#define SCE_PL_STRING_QW 30
#define SCE_PL_POD_VERB 31
+#define SCE_PL_SUB_PROTOTYPE 40
+#define SCE_PL_FORMAT_IDENT 41
+#define SCE_PL_FORMAT 42
#define SCE_RB_DEFAULT 0
#define SCE_RB_ERROR 1
#define SCE_RB_COMMENTLINE 2
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index 6d05e437e..8e2910cb9 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -2237,6 +2237,9 @@ val SCE_PL_STRING_QX=28
val SCE_PL_STRING_QR=29
val SCE_PL_STRING_QW=30
val SCE_PL_POD_VERB=31
+val SCE_PL_SUB_PROTOTYPE=40
+val SCE_PL_FORMAT_IDENT=41
+val SCE_PL_FORMAT=42
# Lexical states for SCLEX_RUBY
lex Ruby=SCLEX_RUBY SCE_RB_
val SCE_RB_DEFAULT=0
diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx
index bcc74d972..22b79d711 100644
--- a/src/LexPerl.cxx
+++ b/src/LexPerl.cxx
@@ -2,7 +2,7 @@
/** @file LexPerl.cxx
** Lexer for subset of Perl.
**/
-// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
+// Copyright 1998-2007 by Neil Hodgson <neilh@scintilla.org>
// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
// The License.txt file describes the conditions under which this software may be distributed.
@@ -187,6 +187,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
//char sooked[100];
//sooked[sookedpos] = '\0';
+ styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
// If in a long distance lexical state, seek to the beginning to find quote characters
// Perl strings can be multi-line with embedded newlines, so backtrack.
// Perl numbers have additional state during lexing, so backtrack too.
@@ -197,6 +198,14 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
startPos = styler.LineStart(styler.GetLine(startPos));
state = styler.StyleAt(startPos - 1);
}
+ // Backtrack for format body.
+ if (state == SCE_PL_FORMAT) {
+ while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) {
+ startPos--;
+ }
+ startPos = styler.LineStart(styler.GetLine(startPos));
+ state = styler.StyleAt(startPos - 1);
+ }
if ( state == SCE_PL_STRING_Q
|| state == SCE_PL_STRING_QQ
|| state == SCE_PL_STRING_QX
@@ -210,6 +219,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
|| state == SCE_PL_NUMBER
|| state == SCE_PL_IDENTIFIER
|| state == SCE_PL_ERROR
+ || state == SCE_PL_SUB_PROTOTYPE
) {
while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
startPos--;
@@ -233,7 +243,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
backflag = BACK_KEYWORD;
}
- styler.StartAt(startPos);
+ styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
char chPrev = styler.SafeGetCharAt(startPos - 1);
if (startPos == 0)
chPrev = '\n';
@@ -294,6 +304,12 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
}
}
}
+ if (HereDoc.State == 4 && isEOLChar(ch)) {
+ // Start of format body.
+ HereDoc.State = 0;
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_FORMAT;
+ }
if (state == SCE_PL_DEFAULT) {
if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) &&
@@ -431,8 +447,13 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
styler.ColourTo(i, SCE_PL_DATASECTION);
state = SCE_PL_DATASECTION;
} else {
+ if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "format")) {
+ state = SCE_PL_FORMAT_IDENT;
+ HereDoc.State = 0;
+ } else {
+ state = SCE_PL_DEFAULT;
+ }
styler.ColourTo(i, SCE_PL_WORD);
- state = SCE_PL_DEFAULT;
backflag = BACK_KEYWORD;
backPos = i;
}
@@ -440,8 +461,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
chNext = styler.SafeGetCharAt(i + 1);
// a repetition operator 'x'
} else if (state == SCE_PL_OPERATOR) {
- styler.ColourTo(i, SCE_PL_OPERATOR);
state = SCE_PL_DEFAULT;
+ goto handleOperator;
// quote-like delimiter, skip one char if double-char delimiter
} else {
i = kw - 1;
@@ -502,6 +523,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
}
backflag = BACK_NONE;
} else if (ch == '%') {
+ backflag = BACK_NONE;
if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$'
|| chNext == '_' || chNext == '!' || chNext == '^') {
state = SCE_PL_HASH;
@@ -516,10 +538,10 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
} else if (chNext == '{') {
styler.ColourTo(i, SCE_PL_HASH);
} else {
- styler.ColourTo(i, SCE_PL_OPERATOR);
+ goto handleOperator;
}
- backflag = BACK_NONE;
} else if (ch == '*') {
+ backflag = BACK_NONE;
char strch[2];
strch[0] = chNext;
strch[1] = '\0';
@@ -542,9 +564,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
ch = chNext;
chNext = chNext2;
}
- styler.ColourTo(i, SCE_PL_OPERATOR);
+ goto handleOperator;
}
- backflag = BACK_NONE;
} else if (ch == '/' || (ch == '<' && chNext == '<')) {
// Explicit backward peeking to set a consistent preferRE for
// any slash found, so no longer need to track preferRE state.
@@ -631,18 +652,12 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
if (bkstyle == SCE_PL_DEFAULT ||
bkstyle == SCE_PL_COMMENTLINE) {
} else if (bkstyle == SCE_PL_OPERATOR) {
- // gcc 3.2.3 bloats if more compact form used
bkch = styler.SafeGetCharAt(bk);
- if (bkch == '>') { // "->"
- if (styler.SafeGetCharAt(bk - 1) == '-') {
- preferRE = false;
- break;
- }
- } else if (bkch == ':') { // "::"
- if (styler.SafeGetCharAt(bk - 1) == ':') {
- preferRE = false;
- break;
- }
+ // test for "->" and "::"
+ if ((bkch == '>' && styler.SafeGetCharAt(bk - 1) == '-')
+ || (bkch == ':' && styler.SafeGetCharAt(bk - 1) == ':')) {
+ preferRE = false;
+ break;
}
} else {// bare identifier, usually a function call but Perl
// optimizes them as pseudo-constants, then the next
@@ -670,6 +685,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
break;
}
}
+ backflag = BACK_NONE;
if (isHereDoc) { // handle HERE doc
// if SCALAR whitespace '<<', *always* a HERE doc
if (preferRE || (hereDocSpace && hereDocScalar)) {
@@ -679,7 +695,7 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
i++;
ch = chNext;
chNext = chNext2;
- styler.ColourTo(i, SCE_PL_OPERATOR);
+ goto handleOperator;
}
} else { // handle regexp
if (preferRE) {
@@ -687,10 +703,9 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
Quote.New(1);
Quote.Open(ch);
} else { // / operator
- styler.ColourTo(i, SCE_PL_OPERATOR);
+ goto handleOperator;
}
}
- backflag = BACK_NONE;
} else if (ch == '<') {
// looks forward for matching > on same line
unsigned int fw = i + 1;
@@ -699,24 +714,23 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
if (fwch == ' ') {
if (styler.SafeGetCharAt(fw-1) != '\\' ||
styler.SafeGetCharAt(fw-2) != '\\')
- break;
+ goto handleOperator;
} else if (isEOLChar(fwch) || isspacechar(fwch)) {
- break;
+ goto handleOperator;
} else if (fwch == '>') {
if ((fw - i) == 2 && // '<=>' case
styler.SafeGetCharAt(fw-1) == '=') {
- styler.ColourTo(fw, SCE_PL_OPERATOR);
- } else {
- styler.ColourTo(fw, SCE_PL_IDENTIFIER);
+ goto handleOperator;
}
+ styler.ColourTo(fw, SCE_PL_IDENTIFIER);
i = fw;
ch = fwch;
chNext = styler.SafeGetCharAt(i+1);
}
fw++;
}
- styler.ColourTo(i, SCE_PL_OPERATOR);
- backflag = BACK_NONE;
+ if (fw == lengthDoc)
+ goto handleOperator;
} else if (ch == '=' // POD
&& isalpha(chNext)
&& (isEOLChar(chPrev))) {
@@ -733,6 +747,35 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
ch = chNext;
chNext = chNext2;
backflag = BACK_NONE;
+ } else if (ch == '-' // bareword promotion (-FOO cases)
+ && ((isascii(chNext) && isalpha(chNext)) || chNext == '_')
+ && backflag != BACK_NONE) {
+ state = SCE_PL_IDENTIFIER;
+ backflag = BACK_NONE;
+ } else if (ch == '(' && i > 0) {
+ // backtrack to identify if we're starting a sub prototype
+ // for generality, we need to ignore whitespace/comments
+ unsigned int bk = i - 1; // i > 0 tested above
+ styler.Flush();
+ while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
+ styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
+ bk--;
+ }
+ if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
+ goto handleOperator;
+ while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
+ bk--;
+ }
+ while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
+ styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
+ bk--;
+ }
+ if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
+ || !styler.Match(bk - 2, "sub")) // assume suffix is unique!
+ goto handleOperator;
+ state = SCE_PL_SUB_PROTOTYPE;
+ backflag = BACK_NONE;
+ backPos = i; // needed for restart
} else if (isPerlOperator(ch)) {
if (ch == '.' && chNext == '.') { // .. and ...
i++;
@@ -741,10 +784,14 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
ch = styler.SafeGetCharAt(i);
chNext = styler.SafeGetCharAt(i + 1);
}
+ handleOperator:
styler.ColourTo(i, SCE_PL_OPERATOR);
backflag = BACK_OPERATOR;
backPos = i;
- } else {
+ } else if (ch == 4 || ch == 26) { // ^D and ^Z ends valid perl source
+ styler.ColourTo(i, SCE_PL_DATASECTION);
+ state = SCE_PL_DATASECTION;
+ } else {
// keep colouring defaults to make restart easier
styler.ColourTo(i, SCE_PL_DEFAULT);
}
@@ -756,8 +803,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
} else if (numState <= PERLNUM_FLOAT) {
// non-decimal number or float exponent, consume next dot
styler.ColourTo(i - 1, SCE_PL_NUMBER);
- styler.ColourTo(i, SCE_PL_OPERATOR);
state = SCE_PL_DEFAULT;
+ goto handleOperator;
} else { // decimal or vectors allows dots
dotCount++;
if (numState == PERLNUM_DECIMAL) {
@@ -772,10 +819,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
goto numAtEnd;
}
}
- } else if (ch == '_' && numState == PERLNUM_DECIMAL) {
- if (!isdigit(chNext)) {
- goto numAtEnd;
- }
+ } else if (ch == '_') {
+ // permissive underscoring for number and vector literals
} else if (!isascii(ch) || isalnum(ch)) {
if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
if (!isascii(ch) || isalpha(ch)) {
@@ -899,6 +944,8 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
i = oldi;
styler.ColourTo(i, SCE_PL_OPERATOR);
state = SCE_PL_DEFAULT;
+ backflag = BACK_OPERATOR;
+ backPos = i;
HereDoc.State = 0;
goto restartLexer;
} else {
@@ -1118,7 +1165,59 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
} else if (ch == Quote.Up) {
Quote.Count++;
}
- }
+ } else if (state == SCE_PL_SUB_PROTOTYPE) {
+ char strch[2];
+ strch[0] = ch;
+ strch[1] = '\0';
+ if (NULL != strstr("\\[$@%&*];", strch)) {
+ // keep going
+ } else if (ch == ')') {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ } else {
+ // abandon prototype, restart from '('
+ i = backPos;
+ styler.ColourTo(i, SCE_PL_OPERATOR);
+ ch = styler.SafeGetCharAt(i);
+ chNext = styler.SafeGetCharAt(i + 1);
+ state = SCE_PL_DEFAULT;
+ }
+ } else if (state == SCE_PL_FORMAT_IDENT) {
+ // occupies different HereDoc states to avoid clashing with HERE docs
+ if (HereDoc.State == 0) {
+ if (chNext != ' ' && chNext != '\t') {
+ styler.ColourTo(i, SCE_PL_DEFAULT);
+ if ((isascii(chNext) && isalpha(chNext)) || chNext == '_' // probable identifier
+ || chNext == '=') { // no identifier
+ HereDoc.State = 3;
+ } else {
+ state = SCE_PL_DEFAULT;
+ HereDoc.State = 0;
+ }
+ }
+ } else if (HereDoc.State == 3) {
+ if (ch == '=') {
+ styler.ColourTo(i, SCE_PL_FORMAT_IDENT);
+ state = SCE_PL_DEFAULT;
+ HereDoc.State = 4;
+ } else if (isEOLChar(ch)) {
+ // abandon format, restart from after 'format'
+ i = backPos + 1;
+ ch = styler.SafeGetCharAt(i);
+ chNext = styler.SafeGetCharAt(i + 1);
+ state = SCE_PL_DEFAULT;
+ HereDoc.State = 0;
+ }
+ }
+ } else if (state == SCE_PL_FORMAT) {
+ if (isEOLChar(chPrev)) {
+ styler.ColourTo(i - 1, state);
+ if (ch == '.' && isEOLChar(chNext)) {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ }
+ }
+ }
}
if (state == SCE_PL_ERROR) {
break;
@@ -1256,5 +1355,5 @@ static const char * const perlWordListDesc[] = {
0
};
-LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc);
+LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8);