aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexers/LexPerl.cxx
diff options
context:
space:
mode:
authornyamatongwe <nyamatongwe@gmail.com>2011-08-20 20:11:21 +1000
committernyamatongwe <nyamatongwe@gmail.com>2011-08-20 20:11:21 +1000
commit88f9a63f05e08fed53c767cad7a72bb6d2f95e88 (patch)
tree77214f4ac00b2d9e1e30ee1c78780c7a63e0b64e /lexers/LexPerl.cxx
parent4d5260e36e1353b0d317e4538ca252fa430e14c2 (diff)
downloadscintilla-mirror-88f9a63f05e08fed53c767cad7a72bb6d2f95e88.tar.gz
Styling of variable interpolation for Perl. Feature #3394258.
From Kein-Hong Man.
Diffstat (limited to 'lexers/LexPerl.cxx')
-rw-r--r--lexers/LexPerl.cxx342
1 files changed, 282 insertions, 60 deletions
diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx
index 5efc1d636..8a0f6422e 100644
--- a/lexers/LexPerl.cxx
+++ b/lexers/LexPerl.cxx
@@ -69,6 +69,10 @@ using namespace Scintilla;
#define BACK_OPERATOR 1 // whitespace/comments are insignificant
#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
+// all interpolated styles are different from their parent styles by a constant difference
+// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
+#define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
+
static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, LexAccessor &styler) {
// old-style keyword matcher; needed because GetCurrent() needs
// current segment to be committed, but we may abandon early...
@@ -352,11 +356,19 @@ struct OptionSetPerl : public OptionSet<OptionsPerl> {
};
class LexerPerl : public ILexer {
+ CharacterSet setWordStart;
+ CharacterSet setWord;
+ CharacterSet setSpecialVar;
+ CharacterSet setControlVar;
WordList keywords;
OptionsPerl options;
OptionSetPerl osPerl;
public:
- LexerPerl() {
+ LexerPerl() :
+ setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
+ setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
+ setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
+ setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
}
~LexerPerl() {
}
@@ -390,6 +402,7 @@ public:
static ILexer *LexerFactoryPerl() {
return new LexerPerl();
}
+ void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
};
int SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
@@ -418,6 +431,90 @@ int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
return firstModification;
}
+void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
+ // interpolate a segment (with no active backslashes or delimiters within)
+ // switch in or out of an interpolation style or continue current style
+ // commit variable patterns if found, trim segment, repeat until done
+ while (maxSeg > 0) {
+ bool isVar = false;
+ int sLen = 0;
+ if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
+ // $#[$]*word [$@][$]*word (where word or {word} is always present)
+ bool braces = false;
+ sLen = 1;
+ if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
+ sLen++;
+ }
+ while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$')) // >0 $ dereference within
+ sLen++;
+ if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) { // { start for {word}
+ sLen++;
+ braces = true;
+ }
+ if (maxSeg > sLen) {
+ int c = sc.GetRelative(sLen);
+ if (setWordStart.Contains(c)) { // word (various)
+ sLen++;
+ isVar = true;
+ while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen)))
+ sLen++;
+ } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
+ sLen++;
+ isVar = true;
+ }
+ }
+ if (braces) {
+ if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) { // } end for {word}
+ sLen++;
+ } else
+ isVar = false;
+ }
+ }
+ if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
+ sLen = 1;
+ int c = sc.chNext;
+ if (sc.ch == '$') {
+ if (IsADigit(c)) { // $[0-9] and slurp trailing digits
+ sLen++;
+ isVar = true;
+ while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen)))
+ sLen++;
+ } else if (setSpecialVar.Contains(c)) { // $ special variables
+ sLen++;
+ isVar = true;
+ } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
+ sLen++;
+ isVar = true;
+ } else if (c == '^') { // $^A control-char style
+ sLen++;
+ if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) {
+ sLen++;
+ isVar = true;
+ }
+ }
+ } else if (sc.ch == '@') {
+ if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
+ sLen++;
+ isVar = true;
+ }
+ }
+ }
+ if (isVar) { // commit as interpolated variable or normal character
+ if (sc.state < SCE_PL_STRING_VAR)
+ sc.SetState(sc.state + INTERPOLATE_SHIFT);
+ sc.Forward(sLen);
+ maxSeg -= sLen;
+ } else {
+ if (sc.state >= SCE_PL_STRING_VAR)
+ sc.SetState(sc.state - INTERPOLATE_SHIFT);
+ sc.Forward();
+ maxSeg--;
+ }
+ }
+ if (sc.state >= SCE_PL_STRING_VAR)
+ sc.SetState(sc.state - INTERPOLATE_SHIFT);
+}
+
void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
LexAccessor styler(pAccess);
@@ -426,8 +523,6 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
reWords.Set("elsif if split while");
// charset classes
- CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true);
- CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true);
CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
// lexing of "%*</" operators is non-trivial; these are missing in the set below
CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
@@ -512,10 +607,13 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
// Includes strings (may be multi-line), numbers (additional state), format
// bodies, as well as POD sections.
if (initStyle == SCE_PL_HERE_Q
- || initStyle == SCE_PL_HERE_QQ
- || initStyle == SCE_PL_HERE_QX
- || initStyle == SCE_PL_FORMAT
+ || initStyle == SCE_PL_HERE_QQ
+ || initStyle == SCE_PL_HERE_QX
+ || initStyle == SCE_PL_FORMAT
+ || initStyle == SCE_PL_HERE_QQ_VAR
+ || initStyle == SCE_PL_HERE_QX_VAR
) {
+ // backtrack through multiple styles to reach the delimiter start
int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
startPos--;
@@ -523,15 +621,34 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
startPos = styler.LineStart(styler.GetLine(startPos));
initStyle = styler.StyleAt(startPos - 1);
}
- if (initStyle == SCE_PL_STRING_Q
- || initStyle == SCE_PL_STRING_QQ
- || initStyle == SCE_PL_STRING_QX
- || initStyle == SCE_PL_STRING_QR
+ if (initStyle == SCE_PL_STRING
+ || initStyle == SCE_PL_STRING_QQ
+ || initStyle == SCE_PL_BACKTICKS
+ || initStyle == SCE_PL_STRING_QX
+ || initStyle == SCE_PL_REGEX
+ || initStyle == SCE_PL_STRING_QR
+ || initStyle == SCE_PL_REGSUBST
+ || initStyle == SCE_PL_STRING_VAR
+ || initStyle == SCE_PL_STRING_QQ_VAR
+ || initStyle == SCE_PL_BACKTICKS_VAR
+ || initStyle == SCE_PL_STRING_QX_VAR
+ || initStyle == SCE_PL_REGEX_VAR
+ || initStyle == SCE_PL_STRING_QR_VAR
+ || initStyle == SCE_PL_REGSUBST_VAR
+ ) {
+ // for interpolation, must backtrack through a mix of two different styles
+ int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
+ initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
+ while (startPos > 1) {
+ int st = styler.StyleAt(startPos - 1);
+ if ((st != initStyle) && (st != otherStyle))
+ break;
+ startPos--;
+ }
+ initStyle = SCE_PL_DEFAULT;
+ } else if (initStyle == SCE_PL_STRING_Q
|| initStyle == SCE_PL_STRING_QW
- || initStyle == SCE_PL_REGEX
- || initStyle == SCE_PL_REGSUBST
- || initStyle == SCE_PL_STRING
- || initStyle == SCE_PL_BACKTICKS
+ || initStyle == SCE_PL_XLAT
|| initStyle == SCE_PL_CHARACTER
|| initStyle == SCE_PL_NUMBER
|| initStyle == SCE_PL_IDENTIFIER
@@ -762,19 +879,49 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
break;
case SCE_PL_HERE_Q:
case SCE_PL_HERE_QQ:
- case SCE_PL_HERE_QX: {
- // also implies HereDoc.State == 2
- sc.Complete();
- if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
+ case SCE_PL_HERE_QX:
+ // also implies HereDoc.State == 2
+ sc.Complete();
+ if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
+ int c = sc.GetRelative(HereDoc.DelimiterLength);
+ if (c == '\r' || c == '\n') { // peek first, do not consume match
sc.Forward(HereDoc.DelimiterLength);
- if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n'))) {
sc.SetState(SCE_PL_DEFAULT);
backFlag = BACK_NONE;
HereDoc.State = 0;
+ if (!sc.atLineEnd)
+ sc.Forward();
+ break;
}
}
+ if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
while (!sc.atLineEnd)
sc.Forward();
+ break;
+ }
+ while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (c == '\\') {
+ endType = 1; break;
+ } else if (c == '\r' || c == '\n') {
+ endType = 2; break;
+ }
+ s++;
+ }
+ if (s > 0) // process non-empty segments
+ InterpolateSegment(sc, s);
+ if (endType == 1) {
+ sc.Forward();
+ // \ at end-of-line does not appear to have any effect, skip
+ if (sc.ch != '\r' && sc.ch != '\n')
+ sc.Forward();
+ } else if (endType == 2) {
+ if (!sc.atLineEnd)
+ sc.Forward();
+ }
}
break;
case SCE_PL_POD:
@@ -826,45 +973,89 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
sc.SetState(SCE_PL_DEFAULT);
} else if (!Quote.Up && !IsASpace(sc.ch)) {
Quote.Open(sc.ch);
- } else if (sc.ch == '\\' && Quote.Up != '\\') {
- sc.Forward();
- } else if (sc.ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0)
- Quote.Rep--;
- } else if (sc.ch == Quote.Up) {
- Quote.Count++;
+ } else {
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (IsASpace(c)) {
+ break;
+ } else if (c == '\\' && Quote.Up != '\\') {
+ endType = 1; break;
+ } else if (c == Quote.Down) {
+ Quote.Count--;
+ if (Quote.Count == 0) {
+ Quote.Rep--;
+ break;
+ }
+ } else if (c == Quote.Up)
+ Quote.Count++;
+ s++;
+ }
+ if (s > 0) { // process non-empty segments
+ if (Quote.Up != '\'') {
+ InterpolateSegment(sc, s, true);
+ } else // non-interpolated path
+ sc.Forward(s);
+ }
+ if (endType == 1)
+ sc.Forward();
}
break;
case SCE_PL_REGSUBST:
+ case SCE_PL_XLAT:
if (Quote.Rep <= 0) {
if (!setModifiers.Contains(sc.ch))
sc.SetState(SCE_PL_DEFAULT);
} else if (!Quote.Up && !IsASpace(sc.ch)) {
Quote.Open(sc.ch);
- } else if (sc.ch == '\\' && Quote.Up != '\\') {
- sc.Forward();
- } else if (Quote.Count == 0 && Quote.Rep == 1) {
- // We matched something like s(...) or tr{...}, Perl 5.10
- // appears to allow almost any character for use as the
- // next delimiters. Whitespace and comments are accepted in
- // between, but we'll limit to whitespace here.
- // For '#', if no whitespace in between, it's a delimiter.
- if (IsASpace(sc.ch)) {
- // Keep going
- } else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) {
- sc.SetState(SCE_PL_DEFAULT);
- } else {
- Quote.Open(sc.ch);
+ } else {
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ bool isPattern = (Quote.Rep == 2);
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (c == '\\' && Quote.Up != '\\') {
+ endType = 2; break;
+ } else if (Quote.Count == 0 && Quote.Rep == 1) {
+ // We matched something like s(...) or tr{...}, Perl 5.10
+ // appears to allow almost any character for use as the
+ // next delimiters. Whitespace and comments are accepted in
+ // between, but we'll limit to whitespace here.
+ // For '#', if no whitespace in between, it's a delimiter.
+ if (IsASpace(c)) {
+ // Keep going
+ } else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) {
+ endType = 3;
+ } else
+ Quote.Open(c);
+ break;
+ } else if (c == Quote.Down) {
+ Quote.Count--;
+ if (Quote.Count == 0) {
+ Quote.Rep--;
+ endType = 1;
+ }
+ if (Quote.Up == Quote.Down)
+ Quote.Count++;
+ if (endType == 1)
+ break;
+ } else if (c == Quote.Up) {
+ Quote.Count++;
+ } else if (IsASpace(c))
+ break;
+ s++;
}
- } else if (sc.ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0)
- Quote.Rep--;
- if (Quote.Up == Quote.Down)
- Quote.Count++;
- } else if (sc.ch == Quote.Up) {
- Quote.Count++;
+ if (s > 0) { // process non-empty segments
+ if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
+ InterpolateSegment(sc, s, isPattern);
+ } else // non-interpolated path
+ sc.Forward(s);
+ }
+ if (endType == 2) {
+ sc.Forward();
+ } else if (endType == 3)
+ sc.SetState(SCE_PL_DEFAULT);
}
break;
case SCE_PL_STRING_Q:
@@ -876,14 +1067,45 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
case SCE_PL_BACKTICKS:
if (!Quote.Down && !IsASpace(sc.ch)) {
Quote.Open(sc.ch);
- } else if (sc.ch == '\\' && Quote.Up != '\\') {
- sc.Forward();
- } else if (sc.ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0)
+ } else {
+ int s = 0, endType = 0;
+ int maxSeg = endPos - sc.currentPos;
+ while (s < maxSeg) { // scan to break string into segments
+ int c = sc.GetRelative(s);
+ if (IsASpace(c)) {
+ break;
+ } else if (c == '\\' && Quote.Up != '\\') {
+ endType = 2; break;
+ } else if (c == Quote.Down) {
+ Quote.Count--;
+ if (Quote.Count == 0) {
+ endType = 3; break;
+ }
+ } else if (c == Quote.Up)
+ Quote.Count++;
+ s++;
+ }
+ if (s > 0) { // process non-empty segments
+ switch (sc.state) {
+ case SCE_PL_STRING:
+ case SCE_PL_STRING_QQ:
+ case SCE_PL_BACKTICKS:
+ InterpolateSegment(sc, s);
+ break;
+ case SCE_PL_STRING_QX:
+ if (Quote.Up != '\'') {
+ InterpolateSegment(sc, s);
+ break;
+ }
+ // (continued for ' delim)
+ default: // non-interpolated path
+ sc.Forward(s);
+ }
+ }
+ if (endType == 2) {
+ sc.Forward();
+ } else if (endType == 3)
sc.ForwardSetState(SCE_PL_DEFAULT);
- } else if (sc.ch == Quote.Up) {
- Quote.Count++;
}
break;
case SCE_PL_SUB_PROTOTYPE: {
@@ -906,8 +1128,8 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
if (sc.Match('.')) {
sc.Forward();
if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
- sc.SetState(SCE_PL_DEFAULT);
- }
+ sc.SetState(SCE_PL_DEFAULT);
+ }
while (!sc.atLineEnd)
sc.Forward();
}
@@ -1026,10 +1248,10 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle,
sc.ChangeState(SCE_PL_STRING_Q);
Quote.New();
} else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
- sc.ChangeState(SCE_PL_REGSUBST);
+ sc.ChangeState(SCE_PL_XLAT);
Quote.New(2);
} else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
- sc.ChangeState(SCE_PL_REGSUBST);
+ sc.ChangeState(SCE_PL_XLAT);
Quote.New(2);
sc.Forward();
fw++;