aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--include/SciLexer.h10
-rw-r--r--include/Scintilla.iface10
-rw-r--r--lexers/LexPerl.cxx342
3 files changed, 302 insertions, 60 deletions
diff --git a/include/SciLexer.h b/include/SciLexer.h
index b265012cb..e8805f2d6 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -344,6 +344,16 @@
#define SCE_PL_SUB_PROTOTYPE 40
#define SCE_PL_FORMAT_IDENT 41
#define SCE_PL_FORMAT 42
+#define SCE_PL_STRING_VAR 43
+#define SCE_PL_XLAT 44
+#define SCE_PL_REGEX_VAR 54
+#define SCE_PL_REGSUBST_VAR 55
+#define SCE_PL_BACKTICKS_VAR 57
+#define SCE_PL_HERE_QQ_VAR 61
+#define SCE_PL_HERE_QX_VAR 62
+#define SCE_PL_STRING_QQ_VAR 64
+#define SCE_PL_STRING_QX_VAR 65
+#define SCE_PL_STRING_QR_VAR 66
#define SCE_RB_DEFAULT 0
#define SCE_RB_ERROR 1
#define SCE_RB_COMMENTLINE 2
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index 7f637ef0b..9530f4292 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -2683,6 +2683,16 @@ val SCE_PL_POD_VERB=31
val SCE_PL_SUB_PROTOTYPE=40
val SCE_PL_FORMAT_IDENT=41
val SCE_PL_FORMAT=42
+val SCE_PL_STRING_VAR=43
+val SCE_PL_XLAT=44
+val SCE_PL_REGEX_VAR=54
+val SCE_PL_REGSUBST_VAR=55
+val SCE_PL_BACKTICKS_VAR=57
+val SCE_PL_HERE_QQ_VAR=61
+val SCE_PL_HERE_QX_VAR=62
+val SCE_PL_STRING_QQ_VAR=64
+val SCE_PL_STRING_QX_VAR=65
+val SCE_PL_STRING_QR_VAR=66
# Lexical states for SCLEX_RUBY
lex Ruby=SCLEX_RUBY SCE_RB_
val SCE_RB_DEFAULT=0
diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx
index 5efc1d636..8a0f6422e 100644
--- a/lexers/LexPerl.cxx
+++ b/lexers/LexPerl.cxx
@@ -69,6 +69,10 @@ using namespace Scintilla;
#define BACK_OPERATOR 1 // whitespace/comments are insignificant
#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
+// all interpolated styles are different from their parent styles by a constant difference
+// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
+#define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
+
static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
// old-style keyword matcher; needed because GetCurrent() needs
// current segment to be committed, but we may abandon early...
@@ -352,11 +356,19 @@ struct OptionSetPerl : public OptionSet<OptionsPerl> {
};
class LexerPerl : public ILexer {
+ CharacterSet setWordStart;
+ CharacterSet setWord;
+ CharacterSet setSpecialVar;
+ CharacterSet setControlVar;
WordList keywords;
OptionsPerl options;
OptionSetPerl osPerl;
public:
- LexerPerl() {
+ LexerPerl() :
+ setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
+ setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
+ setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
+ setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
}
~LexerPerl() {
}
@@ -390,6 +402,7 @@ public:
static ILexer *LexerFactoryPerl() {
return new LexerPerl();
}
+ void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
};
int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
@@ -418,6 +431,90 @@ int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
return firstModification;
}
+void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
+ // interpolate a segment (with no active backslashes or delimiters within)
+ // switch in or out of an interpolation style or continue current style
+ // commit variable patterns if found, trim segment, repeat until done
+ while (maxSeg > 0) {
+ bool isVar = false;
+ int sLen = 0;
+ if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
+ // $#[$]*word [$@][$]*word (where word or {word} is always present)
+ bool braces = false;
+ sLen = 1;
+ if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
+ sLen++;
+ }
+ while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$')) // >0 $ dereference within
+ sLen++;
+ if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) { // { start for {word}
+ sLen++;
+ braces = true;
+ }
+ if (maxSeg > sLen) {
+ int c = sc.GetRelative(sLen);
+ if (setWordStart.Contains(c)) { // word (various)
+ sLen++;
+ isVar = true;
+ while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen)))
+ sLen++;
+ } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
+ sLen++;
+ isVar = true;
+ }
+ }
+ if (braces) {
+ if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) { // } end for {word}
+ sLen++;
+ } else
+ isVar = false;
+ }
+ }
+ if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
+ sLen = 1;
+ int c = sc.chNext;
+ if (sc.ch == '$') {
+ if (IsADigit(c)) { // $[0-9] and slurp trailing digits
+ sLen++;
+ isVar = true;
+ while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen)))
+ sLen++;
+ } else if (setSpecialVar.Contains(c)) { // $ special variables
+ sLen++;
+ isVar = true;
+ } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
+ sLen++;
+ isVar = true;
+ } else if (c == '^') { // $^A control-char style
+ sLen++;
+ if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) {
+ sLen++;
+ isVar = true;
+ }
+ }
+ } else if (sc.ch == '@') {
+ if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
+ sLen++;
+ isVar = true;
+ }
+ }
+ }
+ if (isVar) { // commit as interpolated variable or normal character
+ if (sc.state < SCE_PL_STRING_VAR)
+ sc.SetState(sc.state + INTERPOLATE_SHIFT);
+ sc.Forward(sLen);
+ maxSeg -= sLen;
+ } else {
+ if (sc.state >= SCE_PL_STRING_VAR)
+ sc.SetState(sc.state - INTERPOLATE_SHIFT);
+ sc.Forward();
+ maxSeg--;
+ }
+ }
+ if (sc.state >= SCE_PL_STRING_VAR)
+ sc.SetState(sc.state - INTERPOLATE_SHIFT);
+}
+
void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
LexAccessor styler(pAccess);
@@ -426,8 +523,6 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
reWords.Set("elsif if split while");
// charset classes
- CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
- CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
// lexing of "%*</" operators is non-trivial; these are missing in the set below
CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
@@ -512,10 +607,13 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
// Includes strings (may be multi-line), numbers (additional state), format
// bodies, as well as POD sections.
if (initStyle == SCE_PL_HERE_Q
- || initStyle == SCE_PL_HERE_QQ
- || initStyle == SCE_PL_HERE_QX
- || initStyle == SCE_PL_FORMAT
+ || initStyle == SCE_PL_HERE_QQ
+ || initStyle == SCE_PL_HERE_QX
+ || initStyle == SCE_PL_FORMAT
+ || initStyle == SCE_PL_HERE_QQ_VAR
+ || initStyle == SCE_PL_HERE_QX_VAR
) {
+ // backtrack through multiple styles to reach the delimiter start
int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
startPos--;
@@ -523,15 +621,34 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
startPos = styler.LineStart(styler.GetLine(startPos));
initStyle = styler.StyleAt(startPos - 1);
}
- if (initStyle == SCE_PL_STRING_Q
- || initStyle == SCE_PL_STRING_QQ
- || initStyle == SCE_PL_STRING_QX
- || initStyle == SCE_PL_STRING_QR
+ if (initStyle == SCE_PL_STRING
+ || initStyle == SCE_PL_STRING_QQ
+ || initStyle == SCE_PL_BACKTICKS
+ || initStyle == SCE_PL_STRING_QX
+ || initStyle == SCE_PL_REGEX
+ || initStyle == SCE_PL_STRING_QR
+ || initStyle == SCE_PL_REGSUBST
+ || initStyle == SCE_PL_STRING_VAR
+ || initStyle == SCE_PL_STRING_QQ_VAR
+ || initStyle == SCE_PL_BACKTICKS_VAR
+ || initStyle == SCE_PL_STRING_QX_VAR
+ || initStyle == SCE_PL_REGEX_VAR
+ || initStyle == SCE_PL_STRING_QR_VAR
+ || initStyle == SCE_PL_REGSUBST_VAR
+ ) {
+ // for interpolation, must backtrack through a mix of two different styles
+ int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
+ initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
+ while (startPos > 1) {
+ int st = styler.StyleAt(startPos - 1);
+ if ((st != initStyle) && (st != otherStyle))
+ break;
+ startPos--;
+ }
+ initStyle = SCE_PL_DEFAULT;
+ } else if (initStyle == SCE_PL_STRING_Q
|| initStyle == SCE_PL_STRING_QW
- || initStyle == SCE_PL_REGEX
- || initStyle == SCE_PL_REGSUBST
- || initStyle == SCE_PL_STRING
- || initStyle == SCE_PL_BACKTICKS
+ || initStyle == SCE_PL_XLAT
|| initStyle == SCE_PL_CHARACTER
|| initStyle == SCE_PL_NUMBER
|| initStyle == SCE_PL_IDENTIFIER
@@ -762,19 +879,49 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
break;
case SCE_PL_HERE_Q:
case SCE_PL_HERE_QQ:
- case SCE_PL_HERE_QX: {
- // also implies HereDoc.State == 2
- sc.Complete();
- if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
+ case SCE_PL_HERE_QX:
+ // also implies HereDoc.State == 2
+ sc.Complete();
+ if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
+ int c = sc.GetRelative(HereDoc.DelimiterLength);
+ if (c == '\r' || c == '\n') { // peek first, do not consume match
sc.Forward(HereDoc.DelimiterLength);
- if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) {
sc.SetState(SCE_PL_DEFAULT);
backFlag = BACK_NONE;
HereDoc.State = 0;
+ if (!sc.atLineEnd)
+ sc.Forward();
+ break;
}
}
+ if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
while (!sc.atLineEnd)
sc.Forward();
+ break;
+ }
+ while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (c == '\\') {
+ endType = 1; break;
+ } else if (c == '\r' || c == '\n') {
+ endType = 2; break;
+ }
+ s++;
+ }
+ if (s > 0) // process non-empty segments
+ InterpolateSegment(sc, s);
+ if (endType == 1) {
+ sc.Forward();
+ // \ at end-of-line does not appear to have any effect, skip
+ if (sc.ch != '\r' && sc.ch != '\n')
+ sc.Forward();
+ } else if (endType == 2) {
+ if (!sc.atLineEnd)
+ sc.Forward();
+ }
}
break;
case SCE_PL_POD:
@@ -826,45 +973,89 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
sc.SetState(SCE_PL_DEFAULT);
} else if (!Quote.Up && !IsASpace(sc.ch)) {
Quote.Open(sc.ch);
- } else if (sc.ch == '\\' && Quote.Up != '\\') {
- sc.Forward();
- } else if (sc.ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0)
- Quote.Rep--;
- } else if (sc.ch == Quote.Up) {
- Quote.Count++;
+ } else {
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (IsASpace(c)) {
+ break;
+ } else if (c == '\\' && Quote.Up != '\\') {
+ endType = 1; break;
+ } else if (c == Quote.Down) {
+ Quote.Count--;
+ if (Quote.Count == 0) {
+ Quote.Rep--;
+ break;
+ }
+ } else if (c == Quote.Up)
+ Quote.Count++;
+ s++;
+ }
+ if (s > 0) { // process non-empty segments
+ if (Quote.Up != '\'') {
+ InterpolateSegment(sc, s, true);
+ } else // non-interpolated path
+ sc.Forward(s);
+ }
+ if (endType == 1)
+ sc.Forward();
}
break;
case SCE_PL_REGSUBST:
+ case SCE_PL_XLAT:
if (Quote.Rep <= 0) {
if (!setModifiers.Contains(sc.ch))
sc.SetState(SCE_PL_DEFAULT);
} else if (!Quote.Up && !IsASpace(sc.ch)) {
Quote.Open(sc.ch);
- } else if (sc.ch == '\\' && Quote.Up != '\\') {
- sc.Forward();
- } else if (Quote.Count == 0 && Quote.Rep == 1) {
- // We matched something like s(...) or tr{...}, Perl 5.10
- // appears to allow almost any character for use as the
- // next delimiters. Whitespace and comments are accepted in
- // between, but we'll limit to whitespace here.
- // For '#', if no whitespace in between, it's a delimiter.
- if (IsASpace(sc.ch)) {
- // Keep going
- } else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) {
- sc.SetState(SCE_PL_DEFAULT);
- } else {
- Quote.Open(sc.ch);
+ } else {
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ bool isPattern = (Quote.Rep == 2);
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (c == '\\' && Quote.Up != '\\') {
+ endType = 2; break;
+ } else if (Quote.Count == 0 && Quote.Rep == 1) {
+ // We matched something like s(...) or tr{...}, Perl 5.10
+ // appears to allow almost any character for use as the
+ // next delimiters. Whitespace and comments are accepted in
+ // between, but we'll limit to whitespace here.
+ // For '#', if no whitespace in between, it's a delimiter.
+ if (IsASpace(c)) {
+ // Keep going
+ } else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) {
+ endType = 3;
+ } else
+ Quote.Open(c);
+ break;
+ } else if (c == Quote.Down) {
+ Quote.Count--;
+ if (Quote.Count == 0) {
+ Quote.Rep--;
+ endType = 1;
+ }
+ if (Quote.Up == Quote.Down)
+ Quote.Count++;
+ if (endType == 1)
+ break;
+ } else if (c == Quote.Up) {
+ Quote.Count++;
+ } else if (IsASpace(c))
+ break;
+ s++;
}
- } else if (sc.ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0)
- Quote.Rep--;
- if (Quote.Up == Quote.Down)
- Quote.Count++;
- } else if (sc.ch == Quote.Up) {
- Quote.Count++;
+ if (s > 0) { // process non-empty segments
+ if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
+ InterpolateSegment(sc, s, isPattern);
+ } else // non-interpolated path
+ sc.Forward(s);
+ }
+ if (endType == 2) {
+ sc.Forward();
+ } else if (endType == 3)
+ sc.SetState(SCE_PL_DEFAULT);
}
break;
case SCE_PL_STRING_Q:
@@ -876,14 +1067,45 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
case SCE_PL_BACKTICKS:
if (!Quote.Down && !IsASpace(sc.ch)) {
Quote.Open(sc.ch);
- } else if (sc.ch == '\\' && Quote.Up != '\\') {
- sc.Forward();
- } else if (sc.ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0)
+ } else {
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (IsASpace(c)) {
+ break;
+ } else if (c == '\\' && Quote.Up != '\\') {
+ endType = 2; break;
+ } else if (c == Quote.Down) {
+ Quote.Count--;
+ if (Quote.Count == 0) {
+ endType = 3; break;
+ }
+ } else if (c == Quote.Up)
+ Quote.Count++;
+ s++;
+ }
+ if (s > 0) { // process non-empty segments
+ switch (sc.state) {
+ case SCE_PL_STRING:
+ case SCE_PL_STRING_QQ:
+ case SCE_PL_BACKTICKS:
+ InterpolateSegment(sc, s);
+ break;
+ case SCE_PL_STRING_QX:
+ if (Quote.Up != '\'') {
+ InterpolateSegment(sc, s);
+ break;
+ }
+ // (continued for ' delim)
+ default: // non-interpolated path
+ sc.Forward(s);
+ }
+ }
+ if (endType == 2) {
+ sc.Forward();
+ } else if (endType == 3)
sc.ForwardSetState(SCE_PL_DEFAULT);
- } else if (sc.ch == Quote.Up) {
- Quote.Count++;
}
break;
case SCE_PL_SUB_PROTOTYPE: {
@@ -906,8 +1128,8 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
if (sc.Match('.')) {
sc.Forward();
if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
- sc.SetState(SCE_PL_DEFAULT);
- }
+ sc.SetState(SCE_PL_DEFAULT);
+ }
while (!sc.atLineEnd)
sc.Forward();
}
@@ -1026,10 +1248,10 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
sc.ChangeState(SCE_PL_STRING_Q);
Quote.New();
} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
- sc.ChangeState(SCE_PL_REGSUBST);
+ sc.ChangeState(SCE_PL_XLAT);
Quote.New(2);
} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
- sc.ChangeState(SCE_PL_REGSUBST);
+ sc.ChangeState(SCE_PL_XLAT);
Quote.New(2);
sc.Forward();
fw++;