diff options
-rw-r--r-- | src/LexPerl.cxx | 2097 |
1 files changed, 1000 insertions, 1097 deletions
diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx index ff8d771ce..f57f73c29 100644 --- a/src/LexPerl.cxx +++ b/src/LexPerl.cxx @@ -1,6 +1,6 @@ // Scintilla source code edit control /** @file LexPerl.cxx - ** Lexer for subset of Perl. + ** Lexer for Perl. **/ // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org> // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my> @@ -16,166 +16,336 @@ #include "PropSet.h" #include "Accessor.h" +#include "StyleContext.h" #include "KeyWords.h" #include "Scintilla.h" #include "SciLexer.h" +#include "CharacterSet.h" #ifdef SCI_NAMESPACE using namespace Scintilla; #endif -#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot -#define PERLNUM_HEX 2 -#define PERLNUM_OCTAL 3 -#define PERLNUM_FLOAT 4 // actually exponent part -#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings -#define PERLNUM_VECTOR 6 -#define PERLNUM_V_VECTOR 7 -#define PERLNUM_BAD 8 +// Info for HERE document handling from perldata.pod (reformatted): +// ---------------------------------------------------------------- +// A line-oriented form of quoting is based on the shell ``here-doc'' syntax. +// Following a << you specify a string to terminate the quoted material, and +// all lines following the current line down to the terminating string are +// the value of the item. +// * The terminating string may be either an identifier (a word), or some +// quoted text. +// * If quoted, the type of quotes you use determines the treatment of the +// text, just as in regular quoting. +// * An unquoted identifier works like double quotes. +// * There must be no space between the << and the identifier. +// (If you put a space it will be treated as a null identifier, +// which is valid, and matches the first empty line.) +// (This is deprecated, -w warns of this syntax) +// * The terminating string must appear by itself (unquoted and +// with no surrounding whitespace) on the terminating line. -#define BACK_NONE 0 // lookback state for bareword disambiguation: -#define BACK_OPERATOR 1 // whitespace/comments are insignificant -#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation +#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter -#define HERE_DELIM_MAX 256 +#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot +#define PERLNUM_HEX 2 +#define PERLNUM_OCTAL 3 +#define PERLNUM_FLOAT_EXP 4 // exponent part only +#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings +#define PERLNUM_VECTOR 6 +#define PERLNUM_V_VECTOR 7 +#define PERLNUM_BAD 8 -static inline bool isEOLChar(char ch) { - return (ch == '\r') || (ch == '\n'); -} - -static bool isSingleCharOp(char ch) { - char strCharSet[2]; - strCharSet[0] = ch; - strCharSet[1] = '\0'; - return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet)); -} +#define BACK_NONE 0 // lookback state for bareword disambiguation: +#define BACK_OPERATOR 1 // whitespace/comments are insignificant +#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation -static inline bool isPerlOperator(char ch) { - if (ch == '^' || ch == '&' || ch == '\\' || - ch == '(' || ch == ')' || ch == '-' || ch == '+' || - ch == '=' || ch == '|' || ch == '{' || ch == '}' || - ch == '[' || ch == ']' || ch == ':' || ch == ';' || - ch == '>' || ch == ',' || - ch == '?' || ch == '!' || ch == '.' || ch == '~') - return true; - // these chars are already tested before this call - // ch == '%' || ch == '*' || ch == '<' || ch == '/' || - return false; -} - -static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) { +static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) +{ + // old-style keyword matcher; needed because GetCurrent() needs + // current segment to be committed, but we may abandon early... char s[100]; - unsigned int i, len = end - start; - if (len > 30) { len = 30; } + unsigned int i, len = end - start; + if (len > 30) { len = 30; } for (i = 0; i < len; i++, start++) s[i] = styler[start]; - s[i] = '\0'; + s[i] = '\0'; return keywords.InList(s); } -// Note: as lexer uses chars, UTF-8 bytes are considered as <0 values -// Note: iswordchar() was used in only one place in LexPerl, it is -// unnecessary as '.' is processed as the concatenation operator, so -// only isWordStart() is used in LexPerl - -static inline bool isWordStart(char ch) { - return !isascii(ch) || isalnum(ch) || ch == '_'; +static int disambiguateBareword(Accessor &styler, unsigned int bk, unsigned int fw, + int backFlag, unsigned int backPos, unsigned int endPos) +{ + // identifiers are recognized by Perl as barewords under some + // conditions, the following attempts to do the disambiguation + // by looking backward and forward; result in 2 LSB + int result = 0; + bool moreback = false; // true if passed newline/comments + bool brace = false; // true if opening brace found + // if BACK_NONE, neither operator nor keyword, so skip test + if (backFlag == BACK_NONE) + return result; + // first look backwards past whitespace/comments to set EOL flag + // (some disambiguation patterns must be on a single line) + if (backPos <= static_cast<unsigned int>(styler.LineStart(styler.GetLine(bk)))) + moreback = true; + // look backwards at last significant lexed item for disambiguation + bk = backPos - 1; + int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); + if (ch == '{' && !moreback) { + // {bareword: possible variable spec + brace = true; + } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&') + // &bareword: subroutine call + || styler.Match(bk - 1, "->") + // ->bareword: part of variable spec + || styler.Match(bk - 2, "sub")) { + // sub bareword: subroutine declaration + // (implied BACK_KEYWORD, no keywords end in 'sub'!) + result |= 1; + } + // next, scan forward after word past tab/spaces only; + // if ch isn't one of '[{(,' we can skip the test + if ((ch == '{' || ch == '(' || ch == '['|| ch == ',') + && fw < endPos) { + while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)), + IsASpaceOrTab(ch) && fw < endPos) { + fw++; + } + if ((ch == '}' && brace) + // {bareword}: variable spec + || styler.Match(fw, "=>")) { + // [{(, bareword=>: hash literal + result |= 2; + } + } + return result; } -static inline bool isEndVar(char ch) { - return isascii(ch) && !isalnum(ch) && ch != '#' && ch != '$' && - ch != '_' && ch != '\''; +static void skipWhitespaceComment(Accessor &styler, unsigned int &p) +{ + // when backtracking, we need to skip whitespace and comments + int style; + while ((p > 0) && (style = styler.StyleAt(p), + style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE)) + p--; } -static inline bool isNonQuote(char ch) { - return !isascii(ch) || isalnum(ch) || ch == '_'; +static int styleBeforeBracePair(Accessor &styler, unsigned int bk) +{ + // backtrack to find open '{' corresponding to a '}', balanced + // return significant style to be tested for '/' disambiguation + int braceCount = 1; + if (bk == 0) + return SCE_PL_DEFAULT; + while (--bk > 0) { + if (styler.StyleAt(bk) == SCE_PL_OPERATOR) { + int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); + if (bkch == ';') { // early out + break; + } else if (bkch == '}') { + braceCount++; + } else if (bkch == '{') { + if (--braceCount == 0) break; + } + } + } + if (bk > 0 && braceCount == 0) { + // balanced { found, bk > 0, skip more whitespace/comments + bk--; + skipWhitespaceComment(styler, bk); + return styler.StyleAt(bk); + } + return SCE_PL_DEFAULT; } -static inline char actualNumStyle(int numberStyle) { - if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { - return SCE_PL_STRING; - } else if (numberStyle == PERLNUM_BAD) { - return SCE_PL_ERROR; - } - return SCE_PL_NUMBER; +static int styleCheckIdentifier(Accessor &styler, unsigned int bk) +{ + // backtrack to classify sub-styles of identifier under test + // return sub-style to be tested for '/' disambiguation + if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo> + return 1; + // backtrack to check for possible "->" or "::" before identifier + while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { + bk--; + } + while (bk > 0) { + int bkstyle = styler.StyleAt(bk); + if (bkstyle == SCE_PL_DEFAULT + || bkstyle == SCE_PL_COMMENTLINE) { + // skip whitespace, comments + } else if (bkstyle == SCE_PL_OPERATOR) { + // test for "->" and "::" + if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::")) + return 2; + } else + return 3; // bare identifier + bk--; + } + return 0; } -static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) { - if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { - return false; +static int inputsymbolScan(Accessor &styler, unsigned int pos, unsigned int endPos) +{ + // looks forward for matching > on same line; a bit ugly + unsigned int fw = pos; + while (++fw < endPos) { + int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)); + if (fwch == '\r' || fwch == '\n') { + return 0; + } else if (fwch == '>') { + if (styler.Match(fw - 2, "<=>")) // '<=>' case + return 0; + return fw - pos; + } } - while (*val) { - if (*val != styler[pos++]) { - return false; + return 0; +} + +static int podLineScan(Accessor &styler, unsigned int &pos, unsigned int endPos) +{ + // forward scan the current line to classify line for POD style + int state = -1; + while (pos <= endPos) { + int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); + if (ch == '\n' || ch == '\r' || pos >= endPos) { + if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++; + break; } - val++; + if (IsASpaceOrTab(ch)) { // whitespace, take note + if (state == -1) + state = SCE_PL_DEFAULT; + } else if (state == SCE_PL_DEFAULT) { // verbatim POD line + state = SCE_PL_POD_VERB; + } else if (state != SCE_PL_POD_VERB) { // regular POD line + state = SCE_PL_POD; + } + pos++; + } + if (state == -1) + state = SCE_PL_DEFAULT; + return state; +} + +static bool styleCheckSubPrototype(Accessor &styler, unsigned int bk) +{ + // backtrack to identify if we're starting a subroutine prototype + // we also need to ignore whitespace/comments: + // 'sub' [whitespace|comment] <identifier> [whitespace|comment] + styler.Flush(); + skipWhitespaceComment(styler, bk); + if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier + return false; + while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) { + bk--; } + skipWhitespaceComment(styler, bk); + if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword + || !styler.Match(bk - 2, "sub")) // assume suffix is unique! + return false; return true; } -static char opposite(char ch) { - if (ch == '(') - return ')'; - if (ch == '[') - return ']'; - if (ch == '{') - return '}'; - if (ch == '<') - return '>'; +static bool isMatch(const char *sref, char *s) +{ + // match per-line delimiter - must kill trailing CR if CRLF + if (s[strlen(s) - 1] == '\r') + s[strlen(s) - 1] = '\0'; + return (strcmp(sref, s) == 0); +} + +static int actualNumStyle(int numberStyle) { + if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) { + return SCE_PL_STRING; + } else if (numberStyle == PERLNUM_BAD) { + return SCE_PL_ERROR; + } + return SCE_PL_NUMBER; +} + +static int opposite(int ch) { + if (ch == '(') return ')'; + if (ch == '[') return ']'; + if (ch == '{') return '}'; + if (ch == '<') return '>'; return ch; } static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[], Accessor &styler) { - // Lexer for perl often has to backtrack to start of current style to determine - // which characters are being used as quotes, how deeply nested is the - // start position and what the termination string is for here documents - WordList &keywords = *keywordlists[0]; - // keywords that forces /PATTERN/ at all times - WordList reWords; - reWords.Set("elsif if split while"); + // keywords that forces /PATTERN/ at all times; should track vim's behaviour + WordList reWords; + reWords.Set("elsif if split while"); - class HereDocCls { + // charset classes + CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); + CharacterSet setWord(CharacterSet::setAlphaNum, "_", 0x80, true); + CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC"); + // lexing of "%*</" operators is non-trivial; these are missing in the set below + CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~"); + CharacterSet setQDelim(CharacterSet::setNone, "qrwx"); + CharacterSet setModifiers(CharacterSet::setAlpha); + CharacterSet setPreferRE(CharacterSet::setNone, "*/<%"); + // setArray and setHash also accepts chars for special vars like $_, + // which are then truncated when the next char does not match setVar + CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true); + CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true); + CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true); + CharacterSet &setPOD = setModifiers; + CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@"); + CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_"); + CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*];"); + // for format identifiers + CharacterSet setFormatStart(CharacterSet::setAlpha, "_="); + CharacterSet &setFormat = setHereDocDelim; + + // Lexer for perl often has to backtrack to start of current style to determine + // which characters are being used as quotes, how deeply nested is the + // start position and what the termination string is for HERE documents. + + class HereDocCls { // Class to manage HERE doc sequence public: int State; // 0: '<<' encountered - // 1: collect the delimiter - // 2: here doc text (lines after the delimiter) - char Quote; // the char after '<<' + // 1: collect the delimiter + // 2: here doc text (lines after the delimiter) + int Quote; // the char after '<<' bool Quoted; // true if Quote in ('\'','"','`') int DelimiterLength; // strlen(Delimiter) char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf HereDocCls() { State = 0; - Quote = 0; - Quoted = false; + Quote = 0; + Quoted = false; DelimiterLength = 0; Delimiter = new char[HERE_DELIM_MAX]; Delimiter[0] = '\0'; } + void Append(int ch) { + Delimiter[DelimiterLength++] = static_cast<char>(ch); + Delimiter[DelimiterLength] = '\0'; + } ~HereDocCls() { delete []Delimiter; } }; - HereDocCls HereDoc; // TODO: FIFO for stacked here-docs + HereDocCls HereDoc; // TODO: FIFO for stacked here-docs - class QuoteCls { + class QuoteCls { // Class to manage quote pairs public: - int Rep; - int Count; - char Up; - char Down; + int Rep; + int Count; + int Up, Down; QuoteCls() { this->New(1); } - void New(int r) { + void New(int r = 1) { Rep = r; Count = 0; Up = '\0'; Down = '\0'; } - void Open(char u) { + void Open(int u) { Count++; Up = u; Down = opposite(Up); @@ -183,525 +353,700 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, }; QuoteCls Quote; - int state = initStyle; - char numState = PERLNUM_DECIMAL; + // additional state for number lexing + int numState = PERLNUM_DECIMAL; int dotCount = 0; - unsigned int lengthDoc = startPos + length; - //int sookedpos = 0; // these have no apparent use, see POD state - //char sooked[100]; - //sooked[sookedpos] = '\0'; - styler.StartAt(startPos, static_cast<char>(STYLE_MAX)); - // If in a long distance lexical state, seek to the beginning to find quote characters - // Perl strings can be multi-line with embedded newlines, so backtrack. - // Perl numbers have additional state during lexing, so backtrack too. - if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) { - while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) { + unsigned int endPos = startPos + length; + + // Backtrack to beginning of style if required... + // If in a long distance lexical state, backtrack to find quote characters. + // Includes strings (may be multi-line), numbers (additional state), format + // bodies, as well as POD sections. + if (initStyle == SCE_PL_HERE_Q + || initStyle == SCE_PL_HERE_QQ + || initStyle == SCE_PL_HERE_QX + || initStyle == SCE_PL_FORMAT + ) { + int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM; + while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) { startPos--; } startPos = styler.LineStart(styler.GetLine(startPos)); - state = styler.StyleAt(startPos - 1); + initStyle = styler.StyleAt(startPos - 1); } - // Backtrack for format body. - if (state == SCE_PL_FORMAT) { - while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) { + if (initStyle == SCE_PL_STRING_Q + || initStyle == SCE_PL_STRING_QQ + || initStyle == SCE_PL_STRING_QX + || initStyle == SCE_PL_STRING_QR + || initStyle == SCE_PL_STRING_QW + || initStyle == SCE_PL_REGEX + || initStyle == SCE_PL_REGSUBST + || initStyle == SCE_PL_STRING + || initStyle == SCE_PL_BACKTICKS + || initStyle == SCE_PL_CHARACTER + || initStyle == SCE_PL_NUMBER + || initStyle == SCE_PL_IDENTIFIER + || initStyle == SCE_PL_ERROR + || initStyle == SCE_PL_SUB_PROTOTYPE + ) { + while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { startPos--; } - startPos = styler.LineStart(styler.GetLine(startPos)); - state = styler.StyleAt(startPos - 1); - } - if ( state == SCE_PL_STRING_Q - || state == SCE_PL_STRING_QQ - || state == SCE_PL_STRING_QX - || state == SCE_PL_STRING_QR - || state == SCE_PL_STRING_QW - || state == SCE_PL_REGEX - || state == SCE_PL_REGSUBST - || state == SCE_PL_STRING - || state == SCE_PL_BACKTICKS - || state == SCE_PL_CHARACTER - || state == SCE_PL_NUMBER - || state == SCE_PL_IDENTIFIER - || state == SCE_PL_ERROR - || state == SCE_PL_SUB_PROTOTYPE + initStyle = SCE_PL_DEFAULT; + } else if (initStyle == SCE_PL_POD + || initStyle == SCE_PL_POD_VERB ) { - while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { - startPos--; + // POD backtracking finds preceeding blank lines and goes back past them + int ln = styler.GetLine(startPos); + if (ln > 0) { + initStyle = styler.StyleAt(styler.LineStart(--ln)); + if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) { + while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT) + ln--; + } + startPos = styler.LineStart(++ln); + initStyle = styler.StyleAt(startPos - 1); + } else { + startPos = 0; + initStyle = SCE_PL_DEFAULT; } - state = SCE_PL_DEFAULT; } - // lookback at start of lexing to set proper state for backflag - // after this, they are updated when elements are lexed - int backflag = BACK_NONE; - unsigned int backPos = startPos; - if (backPos > 0) { - backPos--; - int sty = SCE_PL_DEFAULT; - while ((backPos > 0) && (sty = styler.StyleAt(backPos), - sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE)) - backPos--; - if (sty == SCE_PL_OPERATOR) - backflag = BACK_OPERATOR; - else if (sty == SCE_PL_WORD) - backflag = BACK_KEYWORD; - } + // backFlag, backPos are additional state to aid identifier corner cases. + // Look backwards past whitespace and comments in order to detect either + // operator or keyword. Later updated as we go along. + int backFlag = BACK_NONE; + unsigned int backPos = startPos; + if (backPos > 0) { + backPos--; + skipWhitespaceComment(styler, backPos); + if (styler.StyleAt(backPos) == SCE_PL_OPERATOR) + backFlag = BACK_OPERATOR; + else if (styler.StyleAt(backPos) == SCE_PL_WORD) + backFlag = BACK_KEYWORD; + backPos++; + } - styler.StartAt(startPos, static_cast<char>(STYLE_MAX)); - char chPrev = styler.SafeGetCharAt(startPos - 1); - if (startPos == 0) - chPrev = '\n'; - char chNext = styler[startPos]; - styler.StartSegment(startPos); + StyleContext sc(startPos, endPos - startPos, initStyle, styler, static_cast<char>(STYLE_MAX)); - for (unsigned int i = startPos; i < lengthDoc; i++) { - char ch = chNext; - // if the current character is not consumed due to the completion of an - // earlier style, lexing can be restarted via a simple goto - restartLexer: - chNext = styler.SafeGetCharAt(i + 1); - char chNext2 = styler.SafeGetCharAt(i + 2); + for (; sc.More(); sc.Forward()) { - if (styler.IsLeadByte(ch)) { - chNext = styler.SafeGetCharAt(i + 2); - chPrev = ' '; - i += 1; - continue; + // Determine if the current state should terminate. + switch (sc.state) { + case SCE_PL_OPERATOR: + sc.SetState(SCE_PL_DEFAULT); + backFlag = BACK_OPERATOR; + backPos = sc.currentPos; + break; + case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol + if ((!setWord.Contains(sc.ch) && sc.ch != '\'') + || sc.Match('.', '.') + || sc.chPrev == '>') { // end of inputsymbol + sc.SetState(SCE_PL_DEFAULT); + } + break; + case SCE_PL_WORD: // keyword, plus special cases + if (!setWord.Contains(sc.ch)) { + char s[100]; + sc.GetCurrent(s, sizeof(s)); + if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) { + sc.ChangeState(SCE_PL_DATASECTION); + } else { + if ((strcmp(s, "format") == 0)) { + sc.SetState(SCE_PL_FORMAT_IDENT); + HereDoc.State = 0; + } else { + sc.SetState(SCE_PL_DEFAULT); + } + backFlag = BACK_KEYWORD; + backPos = sc.currentPos; + } + } + break; + case SCE_PL_SCALAR: + case SCE_PL_ARRAY: + case SCE_PL_HASH: + case SCE_PL_SYMBOLTABLE: + if (sc.Match(':', ':')) { // skip :: + sc.Forward(); + } else if (!setVar.Contains(sc.ch)) { + if (sc.LengthCurrent() == 1) { + // Special variable: $(, $_ etc. + sc.Forward(); + } + sc.SetState(SCE_PL_DEFAULT); + } + break; + case SCE_PL_NUMBER: + // if no early break, number style is terminated at "(go through)" + if (sc.ch == '.') { + if (sc.chNext == '.') { + // double dot is always an operator (go through) + } else if (numState <= PERLNUM_FLOAT_EXP) { + // non-decimal number or float exponent, consume next dot + sc.SetState(SCE_PL_OPERATOR); + break; + } else { // decimal or vectors allows dots + dotCount++; + if (numState == PERLNUM_DECIMAL) { + if (dotCount <= 1) // number with one dot in it + break; + if (IsADigit(sc.chNext)) { // really a vector + numState = PERLNUM_VECTOR; + break; + } + // number then dot (go through) + } else if (IsADigit(sc.chNext)) // vectors + break; + // vector then dot (go through) + } + } else if (sc.ch == '_') { + // permissive underscoring for number and vector literals + break; + } else if (numState == PERLNUM_DECIMAL) { + if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign + numState = PERLNUM_FLOAT_EXP; + if (sc.chNext == '+' || sc.chNext == '-') { + sc.Forward(); + } + break; + } else if (IsADigit(sc.ch)) + break; + // number then word (go through) + } else if (numState == PERLNUM_HEX) { + if (IsADigit(sc.ch, 16)) + break; + } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { + if (IsADigit(sc.ch)) // vector + break; + if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word + sc.ChangeState(SCE_PL_IDENTIFIER); + break; + } + // vector then word (go through) + } else if (IsADigit(sc.ch)) { + if (numState == PERLNUM_FLOAT_EXP) { + break; + } else if (numState == PERLNUM_OCTAL) { + if (sc.ch <= '7') break; + } else if (numState == PERLNUM_BINARY) { + if (sc.ch <= '1') break; + } + // mark invalid octal, binary numbers (go through) + numState = PERLNUM_BAD; + break; + } + // complete current number or vector + sc.ChangeState(actualNumStyle(numState)); + sc.SetState(SCE_PL_DEFAULT); + break; + case SCE_PL_COMMENTLINE: + if (sc.atLineEnd) { + sc.SetState(SCE_PL_DEFAULT); + } + break; + case SCE_PL_HERE_DELIM: + if (HereDoc.State == 0) { // '<<' encountered + int delim_ch = sc.chNext; + int ws_skip = 0; + HereDoc.State = 1; // pre-init HERE doc class + HereDoc.Quote = sc.chNext; + HereDoc.Quoted = false; + HereDoc.DelimiterLength = 0; + HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; + if (IsASpaceOrTab(delim_ch)) { + // skip whitespace; legal only for quoted delimiters + unsigned int i = sc.currentPos + 1; + while ((i < endPos) && IsASpaceOrTab(delim_ch)) { + i++; + delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i)); + } + ws_skip = i - sc.currentPos - 1; + } + if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') { + // a quoted here-doc delimiter; skip any whitespace + sc.Forward(ws_skip + 1); + HereDoc.Quote = delim_ch; + HereDoc.Quoted = true; + } else if (ws_skip == 0 && setNonHereDoc.Contains(sc.chNext) + || ws_skip > 0) { + // left shift << or <<= operator cases + // restore position if operator + sc.ChangeState(SCE_PL_OPERATOR); + sc.ForwardSetState(SCE_PL_DEFAULT); + backFlag = BACK_OPERATOR; + backPos = sc.currentPos; + HereDoc.State = 0; + } else { + // specially handle initial '\' for identifier + if (ws_skip == 0 && HereDoc.Quote == '\\') + sc.Forward(); + // an unquoted here-doc delimiter, no special handling + // (cannot be prefixed by spaces/tabs), or + // symbols terminates; deprecated zero-length delimiter + } + } else if (HereDoc.State == 1) { // collect the delimiter + backFlag = BACK_NONE; + if (HereDoc.Quoted) { // a quoted here-doc delimiter + if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter + sc.ForwardSetState(SCE_PL_DEFAULT); + } else if (!sc.atLineEnd) { + if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote + sc.Forward(); + } + if (sc.ch != '\r') { // skip CR if CRLF + HereDoc.Append(sc.ch); + } + } + } else { // an unquoted here-doc delimiter + if (setHereDocDelim.Contains(sc.ch)) { + HereDoc.Append(sc.ch); + } else { + sc.SetState(SCE_PL_DEFAULT); + } + } + if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { + sc.SetState(SCE_PL_ERROR); + HereDoc.State = 0; + } + } + break; + case SCE_PL_HERE_Q: + case SCE_PL_HERE_QQ: + case SCE_PL_HERE_QX: { + // also implies HereDoc.State == 2 + sc.Complete(); + while (!sc.atLineEnd) + sc.Forward(); + char s[HERE_DELIM_MAX]; + sc.GetCurrent(s, sizeof(s)); + if (isMatch(HereDoc.Delimiter, s)) { + sc.SetState(SCE_PL_DEFAULT); + backFlag = BACK_NONE; + HereDoc.State = 0; + } + } break; + case SCE_PL_POD: + case SCE_PL_POD_VERB: { + unsigned int fw = sc.currentPos; + int ln = styler.GetLine(fw); + if (sc.atLineStart && sc.Match("=cut")) { // end of POD + sc.SetState(SCE_PL_POD); + sc.Forward(4); + sc.SetState(SCE_PL_DEFAULT); + styler.SetLineState(ln, SCE_PL_POD); + break; + } + int pod = podLineScan(styler, fw, endPos); // classify POD line + styler.SetLineState(ln, pod); + if (pod == SCE_PL_DEFAULT) { + if (sc.state == SCE_PL_POD_VERB) { + unsigned int fw2 = fw; + while (fw2 <= endPos && pod == SCE_PL_DEFAULT) { + fw = fw2++; // penultimate line (last blank line) + pod = podLineScan(styler, fw2, endPos); + styler.SetLineState(styler.GetLine(fw2), pod); + } + if (pod == SCE_PL_POD) { // truncate verbatim POD early + sc.SetState(SCE_PL_POD); + } else + fw = fw2; + } else + pod = SCE_PL_POD; + } else { + if (pod == SCE_PL_POD_VERB // still part of current paragraph + && (styler.GetLineState(ln - 1) == SCE_PL_POD)) { + pod = SCE_PL_POD; + styler.SetLineState(ln, pod); + } else if (pod == SCE_PL_POD + && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) { + pod = SCE_PL_POD_VERB; + styler.SetLineState(ln, pod); + } + sc.SetState(pod); + } + sc.Forward(fw - sc.currentPos); // commit style + } break; + case SCE_PL_REGEX: + case SCE_PL_STRING_QR: + if (Quote.Rep <= 0) { + if (!setModifiers.Contains(sc.ch)) + sc.SetState(SCE_PL_DEFAULT); + } else if (!Quote.Up && !IsASpace(sc.ch)) { + Quote.Open(sc.ch); + } else if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (sc.ch == Quote.Down) { + Quote.Count--; + if (Quote.Count == 0) + Quote.Rep--; + } else if (sc.ch == Quote.Up) { + Quote.Count++; + } + break; + case SCE_PL_REGSUBST: + if (Quote.Rep <= 0) { + if (!setModifiers.Contains(sc.ch)) + sc.SetState(SCE_PL_DEFAULT); + } else if (!Quote.Up && !IsASpace(sc.ch)) { + Quote.Open(sc.ch); + } else if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (Quote.Count == 0 && Quote.Rep == 1) { + // We matched something like s(...) or tr{...}, Perl 5.10 + // appears to allow almost any character for use as the + // next delimiters. Whitespace and comments are accepted in + // between, but we'll limit to whitespace here. + // For '#', if no whitespace in between, it's a delimiter. + if (IsASpace(sc.ch)) { + // Keep going + } else if (sc.ch == '#' && IsASpaceOrTab(sc.chPrev)) { + sc.SetState(SCE_PL_DEFAULT); + } else { + Quote.Open(sc.ch); + } + } else if (sc.ch == Quote.Down) { + Quote.Count--; + if (Quote.Count == 0) + Quote.Rep--; + if (Quote.Up == Quote.Down) + Quote.Count++; + } else if (sc.ch == Quote.Up) { + Quote.Count++; + } + break; + case SCE_PL_STRING_Q: + case SCE_PL_STRING_QQ: + case SCE_PL_STRING_QX: + case SCE_PL_STRING_QW: + case SCE_PL_STRING: + case SCE_PL_CHARACTER: + case SCE_PL_BACKTICKS: + if (!Quote.Down && !IsASpace(sc.ch)) { + Quote.Open(sc.ch); + } else if (sc.ch == '\\' && Quote.Up != '\\') { + sc.Forward(); + } else if (sc.ch == Quote.Down) { + Quote.Count--; + if (Quote.Count == 0) + sc.ForwardSetState(SCE_PL_DEFAULT); + } else if (sc.ch == Quote.Up) { + Quote.Count++; + } + break; + case SCE_PL_SUB_PROTOTYPE: { + int i = 0; + // forward scan; must all be valid proto characters + while (setSubPrototype.Contains(sc.GetRelative(i))) + i++; + if (sc.GetRelative(i) == ')') { // valid sub prototype + sc.Forward(i); + sc.ForwardSetState(SCE_PL_DEFAULT); + } else { + // abandon prototype, restart from '(' + sc.ChangeState(SCE_PL_OPERATOR); + sc.SetState(SCE_PL_DEFAULT); + } + } break; + case SCE_PL_FORMAT: { + sc.Complete(); + while (!sc.atLineEnd) + sc.Forward(); + char s[10]; + sc.GetCurrent(s, sizeof(s)); + if (isMatch(".", s)) + sc.SetState(SCE_PL_DEFAULT); + } break; + case SCE_PL_ERROR: + break; } - if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows - styler.ColourTo(i, state); - chPrev = ch; - continue; + // Needed for specific continuation styles (one follows the other) + switch (sc.state) { + // continued from SCE_PL_WORD + case SCE_PL_FORMAT_IDENT: + // occupies HereDoc state 3 to avoid clashing with HERE docs + if (IsASpaceOrTab(sc.ch)) { // skip whitespace + sc.ChangeState(SCE_PL_DEFAULT); + while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) + sc.Forward(); + sc.SetState(SCE_PL_FORMAT_IDENT); + } + if (setFormatStart.Contains(sc.ch)) { // identifier or '=' + if (sc.ch != '=') { + do { + sc.Forward(); + } while (setFormat.Contains(sc.ch)); + } + while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) + sc.Forward(); + if (sc.ch == '=') { + sc.ForwardSetState(SCE_PL_DEFAULT); + HereDoc.State = 3; + } else { + // invalid indentifier; inexact fallback, but hey + sc.ChangeState(SCE_PL_IDENTIFIER); + sc.SetState(SCE_PL_DEFAULT); + } + } else { + sc.ChangeState(SCE_PL_DEFAULT); // invalid indentifier + } + backFlag = BACK_NONE; + break; } - if (HereDoc.State == 1 && isEOLChar(ch)) { + // Must check end of HereDoc states here before default state is handled + if (HereDoc.State == 1 && sc.atLineEnd) { // Begin of here-doc (the line after the here-doc delimiter): // Lexically, the here-doc starts from the next line after the >>, but the // first line of here-doc seem to follow the style of the last EOL sequence + int st_new = SCE_PL_HERE_QQ; HereDoc.State = 2; if (HereDoc.Quoted) { - if (state == SCE_PL_HERE_DELIM) { + if (sc.state == SCE_PL_HERE_DELIM) { // Missing quote at end of string! We are stricter than perl. // Colour here-doc anyway while marking this bit as an error. - state = SCE_PL_ERROR; + sc.ChangeState(SCE_PL_ERROR); } - styler.ColourTo(i - 1, state); switch (HereDoc.Quote) { - case '\'': - state = SCE_PL_HERE_Q ; - break; - case '"': - state = SCE_PL_HERE_QQ; - break; - case '`': - state = SCE_PL_HERE_QX; - break; + case '\'': st_new = SCE_PL_HERE_Q ; break; + case '"' : st_new = SCE_PL_HERE_QQ; break; + case '`' : st_new = SCE_PL_HERE_QX; break; } } else { - styler.ColourTo(i - 1, state); - switch (HereDoc.Quote) { - case '\\': - state = SCE_PL_HERE_Q ; - break; - default : - state = SCE_PL_HERE_QQ; - } + if (HereDoc.Quote == '\\') + st_new = SCE_PL_HERE_Q; } + sc.SetState(st_new); + } + if (HereDoc.State == 3 && sc.atLineEnd) { + // Start of format body. + HereDoc.State = 0; + sc.SetState(SCE_PL_FORMAT); } - if (HereDoc.State == 4 && isEOLChar(ch)) { - // Start of format body. - HereDoc.State = 0; - styler.ColourTo(i - 1, state); - state = SCE_PL_FORMAT; - } - if (state == SCE_PL_DEFAULT) { - if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) && - (ch == '.' || ch == 'v'))) { - state = SCE_PL_NUMBER; - backflag = BACK_NONE; + // Determine if a new state should be entered. + if (sc.state == SCE_PL_DEFAULT) { + if (IsADigit(sc.ch) || + (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) { + sc.SetState(SCE_PL_NUMBER); + backFlag = BACK_NONE; numState = PERLNUM_DECIMAL; dotCount = 0; - if (ch == '0') { // hex,bin,octal - if (chNext == 'x') { + if (sc.ch == '0') { // hex,bin,octal + if (sc.chNext == 'x') { numState = PERLNUM_HEX; - } else if (chNext == 'b') { - numState = PERLNUM_BINARY; - } else if (isascii(chNext) && isdigit(chNext)) { - numState = PERLNUM_OCTAL; - } - if (numState != PERLNUM_DECIMAL) { - i++; - ch = chNext; - chNext = chNext2; - } - } else if (ch == 'v') { // vector + } else if (sc.chNext == 'b') { + numState = PERLNUM_BINARY; + } else if (IsADigit(sc.chNext)) { + numState = PERLNUM_OCTAL; + } + if (numState != PERLNUM_DECIMAL) { + sc.Forward(); + } + } else if (sc.ch == 'v') { // vector numState = PERLNUM_V_VECTOR; } - } else if (isWordStart(ch)) { - // if immediately prefixed by '::', always a bareword - state = SCE_PL_WORD; - if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') { - state = SCE_PL_IDENTIFIER; - } - unsigned int kw = i + 1; - // first check for possible quote-like delimiter - if (ch == 's' && !isNonQuote(chNext)) { - state = SCE_PL_REGSUBST; + } else if (setWord.Contains(sc.ch)) { + // if immediately prefixed by '::', always a bareword + sc.SetState(SCE_PL_WORD); + if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') { + sc.ChangeState(SCE_PL_IDENTIFIER); + } + unsigned int bk = sc.currentPos; + unsigned int fw = sc.currentPos + 1; + // first check for possible quote-like delimiter + if (sc.ch == 's' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_REGSUBST); Quote.New(2); - } else if (ch == 'm' && !isNonQuote(chNext)) { - state = SCE_PL_REGEX; - Quote.New(1); - } else if (ch == 'q' && !isNonQuote(chNext)) { - state = SCE_PL_STRING_Q; - Quote.New(1); - } else if (ch == 'y' && !isNonQuote(chNext)) { - state = SCE_PL_REGSUBST; + } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_REGEX); + Quote.New(); + } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_STRING_Q); + Quote.New(); + } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) { + sc.ChangeState(SCE_PL_REGSUBST); Quote.New(2); - } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) { - state = SCE_PL_REGSUBST; + } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) { + sc.ChangeState(SCE_PL_REGSUBST); Quote.New(2); - kw++; - } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) { - if (chNext == 'q') state = SCE_PL_STRING_QQ; - else if (chNext == 'x') state = SCE_PL_STRING_QX; - else if (chNext == 'r') state = SCE_PL_STRING_QR; - else if (chNext == 'w') state = SCE_PL_STRING_QW; - Quote.New(1); - kw++; - } else if (ch == 'x' && (chNext == '=' || // repetition - !isWordStart(chNext) || - (isdigit(chPrev) && isdigit(chNext)))) { - state = SCE_PL_OPERATOR; - } - // if potentially a keyword, scan forward and grab word, then check - // if it's really one; if yes, disambiguation test is performed - // otherwise it is always a bareword and we skip a lot of scanning - // note: keywords assumed to be limited to [_a-zA-Z] only - if (state == SCE_PL_WORD) { - while (isWordStart(styler.SafeGetCharAt(kw))) kw++; - if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) { - state = SCE_PL_IDENTIFIER; - } - } - // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this - // for quote-like delimiters/keywords, attempt to disambiguate - // to select for bareword, change state -> SCE_PL_IDENTIFIER - if (state != SCE_PL_IDENTIFIER && i > 0) { - unsigned int j = i; - bool moreback = false; // true if passed newline/comments - bool brace = false; // true if opening brace found - char ch2; - // first look backwards past whitespace/comments for EOLs - // if BACK_NONE, neither operator nor keyword, so skip test - if (backflag != BACK_NONE) { - while (--j > backPos) { - if (isEOLChar(styler.SafeGetCharAt(j))) - moreback = true; - } - ch2 = styler.SafeGetCharAt(j); - if (ch2 == '{' && !moreback) { - // {bareword: possible variable spec - brace = true; - } else if ((ch2 == '&' && styler.SafeGetCharAt(j - 1) != '&') - // &bareword: subroutine call - || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-') - // ->bareword: part of variable spec - || (ch2 == 'b' && styler.Match(j - 2, "su"))) { - // sub bareword: subroutine declaration - // (implied BACK_KEYWORD, no keywords end in 'sub'!) - state = SCE_PL_IDENTIFIER; - } - // if status still ambiguous, look forward after word past - // tabs/spaces only; if ch2 isn't one of '[{(,' it can never - // match anything, so skip the whole thing - j = kw; - if (state != SCE_PL_IDENTIFIER - && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',') - && kw < lengthDoc) { - while (ch2 = styler.SafeGetCharAt(j), - (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) { - j++; - } - if ((ch2 == '}' && brace) - // {bareword}: variable spec - || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) { - // [{(, bareword=>: hash literal - state = SCE_PL_IDENTIFIER; - } - } - } - } - backflag = BACK_NONE; - // an identifier or bareword - if (state == SCE_PL_IDENTIFIER) { - if ((!isWordStart(chNext) && chNext != '\'') - || (chNext == '.' && chNext2 == '.')) { - // We need that if length of word == 1! - // This test is copied from the SCE_PL_WORD handler. - styler.ColourTo(i, SCE_PL_IDENTIFIER); - state = SCE_PL_DEFAULT; - } - // a keyword - } else if (state == SCE_PL_WORD) { - i = kw - 1; - if (ch == '_' && chNext == '_' && - (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__") - || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) { - styler.ColourTo(i, SCE_PL_DATASECTION); - state = SCE_PL_DATASECTION; - } else { - if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "format")) { - state = SCE_PL_FORMAT_IDENT; - HereDoc.State = 0; - } else { - state = SCE_PL_DEFAULT; - } - styler.ColourTo(i, SCE_PL_WORD); - backflag = BACK_KEYWORD; - backPos = i; - } - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - // a repetition operator 'x' - } else if (state == SCE_PL_OPERATOR) { - state = SCE_PL_DEFAULT; - goto handleOperator; - // quote-like delimiter, skip one char if double-char delimiter - } else { - i = kw - 1; - chNext = styler.SafeGetCharAt(i + 1); - } - } else if (ch == '#') { - state = SCE_PL_COMMENTLINE; - } else if (ch == '\"') { - state = SCE_PL_STRING; - Quote.New(1); - Quote.Open(ch); - backflag = BACK_NONE; - } else if (ch == '\'') { - if (chPrev == '&') { - // Archaic call - styler.ColourTo(i, state); - } else { - state = SCE_PL_CHARACTER; - Quote.New(1); - Quote.Open(ch); + sc.Forward(); + fw++; + } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext) + && !setWord.Contains(sc.GetRelative(2))) { + if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ); + else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX); + else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR); + else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w' + Quote.New(); + sc.Forward(); + fw++; + } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition + !setWord.Contains(sc.chNext) || + (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) { + sc.ChangeState(SCE_PL_OPERATOR); } - backflag = BACK_NONE; - } else if (ch == '`') { - state = SCE_PL_BACKTICKS; - Quote.New(1); - Quote.Open(ch); - backflag = BACK_NONE; - } else if (ch == '$') { - if ((chNext == '{') || isspacechar(chNext)) { - styler.ColourTo(i, SCE_PL_SCALAR); - } else { - state = SCE_PL_SCALAR; - if ((chNext == '`' && chNext2 == '`') - || (chNext == ':' && chNext2 == ':')) { - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else { - i++; - ch = chNext; - chNext = chNext2; + // if potentially a keyword, scan forward and grab word, then check + // if it's really one; if yes, disambiguation test is performed + // otherwise it is always a bareword and we skip a lot of scanning + if (sc.state == SCE_PL_WORD) { + while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw)))) + fw++; + if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) { + sc.ChangeState(SCE_PL_IDENTIFIER); } } - backflag = BACK_NONE; - } else if (ch == '@') { - if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$' - || chNext == '_' || chNext == '+' || chNext == '-') { - state = SCE_PL_ARRAY; - } else if (chNext == ':' && chNext2 == ':') { - state = SCE_PL_ARRAY; - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else if (chNext != '{' && chNext != '[') { - styler.ColourTo(i, SCE_PL_ARRAY); - } else { - styler.ColourTo(i, SCE_PL_ARRAY); + // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this + // for quote-like delimiters/keywords, attempt to disambiguate + // to select for bareword, change state -> SCE_PL_IDENTIFIER + if (sc.state != SCE_PL_IDENTIFIER && bk > 0) { + if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos)) + sc.ChangeState(SCE_PL_IDENTIFIER); } - backflag = BACK_NONE; - } else if (ch == '%') { - backflag = BACK_NONE; - if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$' - || chNext == '_' || chNext == '!' || chNext == '^') { - state = SCE_PL_HASH; - i++; - ch = chNext; - chNext = chNext2; - } else if (chNext == ':' && chNext2 == ':') { - state = SCE_PL_HASH; - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else if (chNext == '{') { - styler.ColourTo(i, SCE_PL_HASH); + backFlag = BACK_NONE; + } else if (sc.ch == '#') { + sc.SetState(SCE_PL_COMMENTLINE); + } else if (sc.ch == '\"') { + sc.SetState(SCE_PL_STRING); + Quote.New(); + Quote.Open(sc.ch); + backFlag = BACK_NONE; + } else if (sc.ch == '\'') { + if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) { + // Archaic call + sc.SetState(SCE_PL_IDENTIFIER); } else { - goto handleOperator; + sc.SetState(SCE_PL_CHARACTER); + Quote.New(); + Quote.Open(sc.ch); } - } else if (ch == '*') { - backflag = BACK_NONE; - char strch[2]; - strch[0] = chNext; - strch[1] = '\0'; - if (chNext == ':' && chNext2 == ':') { - state = SCE_PL_SYMBOLTABLE; - i += 2; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } else if (!isascii(chNext) || isalpha(chNext) || chNext == '_' - || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) { - state = SCE_PL_SYMBOLTABLE; - i++; - ch = chNext; - chNext = chNext2; - } else if (chNext == '{') { - styler.ColourTo(i, SCE_PL_SYMBOLTABLE); + backFlag = BACK_NONE; + } else if (sc.ch == '`') { + sc.SetState(SCE_PL_BACKTICKS); + Quote.New(); + Quote.Open(sc.ch); + backFlag = BACK_NONE; + } else if (sc.ch == '$') { + sc.SetState(SCE_PL_SCALAR); + if (sc.chNext == '{') { + sc.ForwardSetState(SCE_PL_OPERATOR); + } else if (IsASpace(sc.chNext)) { + sc.ForwardSetState(SCE_PL_DEFAULT); } else { - if (chNext == '*') { // exponentiation - i++; - ch = chNext; - chNext = chNext2; + sc.Forward(); + if (sc.Match('`', '`') || sc.Match(':', ':')) { + sc.Forward(); } - goto handleOperator; } - } else if (ch == '/' || (ch == '<' && chNext == '<')) { + backFlag = BACK_NONE; + } else if (sc.ch == '@') { + sc.SetState(SCE_PL_ARRAY); + if (setArray.Contains(sc.chNext)) { + // no special treatment + } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { + sc.Forward(2); + } else if (sc.chNext == '{' || sc.chNext == '[') { + sc.ForwardSetState(SCE_PL_OPERATOR); + } else { + sc.ChangeState(SCE_PL_OPERATOR); + } + backFlag = BACK_NONE; + } else if (setPreferRE.Contains(sc.ch)) { // Explicit backward peeking to set a consistent preferRE for // any slash found, so no longer need to track preferRE state. // Find first previous significant lexed element and interpret. - // Test for HERE doc start '<<' shares this code, helps to - // determine if it should be an operator. + // A few symbols shares this code for disambiguation. bool preferRE = false; - bool isHereDoc = (ch == '<'); - bool hereDocSpace = false; // these are for corner case: - bool hereDocScalar = false; // SCALAR [whitespace] '<<' - unsigned int bk = (i > 0)? i - 1: 0; - unsigned int bkend; - char bkch; + bool isHereDoc = sc.Match('<', '<'); + bool hereDocSpace = false; // for: SCALAR [whitespace] '<<' + unsigned int bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0; + unsigned int bkend; styler.Flush(); - if (styler.StyleAt(bk) == SCE_PL_DEFAULT) - hereDocSpace = true; - while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT || - styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { - bk--; - } + if (styler.StyleAt(bk) == SCE_PL_DEFAULT) + hereDocSpace = true; + skipWhitespaceComment(styler, bk); if (bk == 0) { - // position 0 won't really be checked; rarely happens - // hard to fix due to an unsigned index i + // avoid backward scanning breakage preferRE = true; } else { int bkstyle = styler.StyleAt(bk); - bkch = styler.SafeGetCharAt(bk); + int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk)); switch(bkstyle) { case SCE_PL_OPERATOR: preferRE = true; if (bkch == ')' || bkch == ']') { preferRE = false; } else if (bkch == '}') { - // backtrack further, count balanced brace pairs - // if a brace pair found, see if it's a variable - int braceCount = 1; - while (--bk > 0) { - bkstyle = styler.StyleAt(bk); - if (bkstyle == SCE_PL_OPERATOR) { - bkch = styler.SafeGetCharAt(bk); - if (bkch == ';') { // early out - break; - } else if (bkch == '}') { - braceCount++; - } else if (bkch == '{') { - if (--braceCount == 0) - break; - } - } - } - if (bk == 0) { - // at beginning, true - } else if (braceCount == 0) { - // balanced { found, bk>0, skip more whitespace - if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) { - while (bk > 0) { - bkstyle = styler.StyleAt(--bk); - if (bkstyle != SCE_PL_DEFAULT) - break; - } - } - bkstyle = styler.StyleAt(bk); - if (bkstyle == SCE_PL_SCALAR - || bkstyle == SCE_PL_ARRAY - || bkstyle == SCE_PL_HASH - || bkstyle == SCE_PL_SYMBOLTABLE - || bkstyle == SCE_PL_OPERATOR) { - preferRE = false; - } + // backtrack by counting balanced brace pairs + // needed to test for variables like ${}, @{} etc. + bkstyle = styleBeforeBracePair(styler, bk); + if (bkstyle == SCE_PL_SCALAR + || bkstyle == SCE_PL_ARRAY + || bkstyle == SCE_PL_HASH + || bkstyle == SCE_PL_SYMBOLTABLE + || bkstyle == SCE_PL_OPERATOR) { + preferRE = false; } + } else if (bkch == '+' || bkch == '-') { + if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1)) + && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2))) + // exceptions for operators: unary suffixes ++, -- + preferRE = false; } break; case SCE_PL_IDENTIFIER: preferRE = true; - if (bkch == '>') { // inputsymbol + bkstyle = styleCheckIdentifier(styler, bk); + if ((bkstyle == 1) || (bkstyle == 2)) { + // inputsymbol or var with "->" or "::" before identifier preferRE = false; - break; - } - // backtrack to find "->" or "::" before identifier - while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) { - bk--; - } - while (bk > 0) { - bkstyle = styler.StyleAt(bk); - if (bkstyle == SCE_PL_DEFAULT || - bkstyle == SCE_PL_COMMENTLINE) { - } else if (bkstyle == SCE_PL_OPERATOR) { - bkch = styler.SafeGetCharAt(bk); - // test for "->" and "::" - if ((bkch == '>' && styler.SafeGetCharAt(bk - 1) == '-') - || (bkch == ':' && styler.SafeGetCharAt(bk - 1) == ':')) { - preferRE = false; - break; - } - } else { - // bare identifier, if '/', /PATTERN/ unless digit/space immediately after '/' - // if '//', always expect defined-or operator to follow identifier - if (!isHereDoc && - (isspacechar(chNext) || isdigit(chNext) || chNext == '/')) - preferRE = false; - // HERE docs cannot have a space after the >> - if (isspacechar(chNext)) - preferRE = false; - break; + } else if (bkstyle == 3) { + // bare identifier, test cases follows: + if (sc.ch == '/') { + // if '/', /PATTERN/ unless digit/space immediately after '/' + // if '//', always expect defined-or operator to follow identifier + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') + preferRE = false; + } else if (sc.ch == '*' || sc.ch == '%') { + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) + preferRE = false; + } else if (sc.ch == '<') { + if (IsASpace(sc.chNext) || sc.chNext == '=') + preferRE = false; } - bk--; } break; - case SCE_PL_SCALAR: // for $var<< case - hereDocScalar = true; - break; - // for HERE docs, always true for preferRE + case SCE_PL_SCALAR: // for $var<< case: + if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc + preferRE = true; + break; case SCE_PL_WORD: - preferRE = true; - if (isHereDoc) - break; - // adopt heuristics similar to vim-style rules: - // keywords always forced as /PATTERN/: split, if, elsif, while - // everything else /PATTERN/ unless digit/space immediately after '/' - // for '//', defined-or favoured unless special keywords - bkend = bk + 1; - while (bk > 0 && styler.StyleAt(bk-1) == SCE_PL_WORD) { - bk--; + preferRE = true; + // for HERE docs, always true + if (sc.ch == '/') { + // adopt heuristics similar to vim-style rules: + // keywords always forced as /PATTERN/: split, if, elsif, while + // everything else /PATTERN/ unless digit/space immediately after '/' + // for '//', defined-or favoured unless special keywords + bkend = bk + 1; + while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) { + bk--; + } + if (isPerlKeyword(bk, bkend, reWords, styler)) + break; + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/') + preferRE = false; + } else if (sc.ch == '*' || sc.ch == '%') { + if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*')) + preferRE = false; + } else if (sc.ch == '<') { + if (IsASpace(sc.chNext) || sc.chNext == '=') + preferRE = false; } - if (isPerlKeyword(bk, bkend, reWords, styler)) - break; - if (isspacechar(chNext) || isdigit(chNext) || chNext == '/') - preferRE = false; - break; + break; // other styles uses the default, preferRE=false case SCE_PL_POD: - case SCE_PL_POD_VERB: case SCE_PL_HERE_Q: case SCE_PL_HERE_QQ: case SCE_PL_HERE_QX: @@ -709,555 +1054,114 @@ static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, break; } } - backflag = BACK_NONE; - if (isHereDoc) { // handle HERE doc - // if SCALAR whitespace '<<', *always* a HERE doc - if (preferRE || (hereDocSpace && hereDocScalar)) { - state = SCE_PL_HERE_DELIM; - HereDoc.State = 0; - } else { // << operator - i++; - ch = chNext; - chNext = chNext2; - goto handleOperator; - } - } else { // handle regexp - if (preferRE) { - state = SCE_PL_REGEX; - Quote.New(1); - Quote.Open(ch); - } else { // / and // operators - if (chNext == '/') { - i++; - ch = chNext; - chNext = chNext2; - } - goto handleOperator; - } - } - } else if (ch == '<') { - // looks forward for matching > on same line - unsigned int fw = i + 1; - while (fw < lengthDoc) { - char fwch = styler.SafeGetCharAt(fw); - if (fwch == ' ') { - if (styler.SafeGetCharAt(fw-1) != '\\' || - styler.SafeGetCharAt(fw-2) != '\\') - goto handleOperator; - } else if (isEOLChar(fwch) || isspacechar(fwch)) { - goto handleOperator; - } else if (fwch == '>') { - if ((fw - i) == 2 && // '<=>' case - styler.SafeGetCharAt(fw-1) == '=') { - goto handleOperator; - } - styler.ColourTo(fw, SCE_PL_IDENTIFIER); - i = fw; - ch = fwch; - chNext = styler.SafeGetCharAt(i+1); - } - fw++; - } - if (fw == lengthDoc) - goto handleOperator; - } else if (ch == '=' // POD - && isalpha(chNext) - && (isEOLChar(chPrev))) { - state = SCE_PL_POD; - backflag = BACK_NONE; - //sookedpos = 0; - //sooked[sookedpos] = '\0'; - } else if (ch == '-' // file test operators - && isSingleCharOp(chNext) - && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) { - styler.ColourTo(i + 1, SCE_PL_WORD); - state = SCE_PL_DEFAULT; - i++; - ch = chNext; - chNext = chNext2; - backflag = BACK_NONE; - } else if (ch == '-' // bareword promotion (-FOO cases) - && ((isascii(chNext) && isalpha(chNext)) || chNext == '_') - && backflag != BACK_NONE) { - state = SCE_PL_IDENTIFIER; - backflag = BACK_NONE; - } else if (ch == '(' && i > 0) { - // backtrack to identify if we're starting a sub prototype - // for generality, we need to ignore whitespace/comments - unsigned int bk = i - 1; // i > 0 tested above - styler.Flush(); - while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT || - styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { - bk--; - } - if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier - goto handleOperator; - while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) { - bk--; - } - while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT || - styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) { - bk--; - } - if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword - || !styler.Match(bk - 2, "sub")) // assume suffix is unique! - goto handleOperator; - state = SCE_PL_SUB_PROTOTYPE; - backflag = BACK_NONE; - backPos = i; // needed for restart - } else if (isPerlOperator(ch)) { - if (ch == '.' && chNext == '.') { // .. and ... - i++; - if (chNext2 == '.') { i++; } - state = SCE_PL_DEFAULT; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - } - handleOperator: - styler.ColourTo(i, SCE_PL_OPERATOR); - backflag = BACK_OPERATOR; - backPos = i; - } else if (ch == 4 || ch == 26) { // ^D and ^Z ends valid perl source - styler.ColourTo(i, SCE_PL_DATASECTION); - state = SCE_PL_DATASECTION; - } else { - // keep colouring defaults to make restart easier - styler.ColourTo(i, SCE_PL_DEFAULT); - } - } else if (state == SCE_PL_NUMBER) { - if (ch == '.') { - if (chNext == '.') { - // double dot is always an operator - goto numAtEnd; - } else if (numState <= PERLNUM_FLOAT) { - // non-decimal number or float exponent, consume next dot - styler.ColourTo(i - 1, SCE_PL_NUMBER); - state = SCE_PL_DEFAULT; - goto handleOperator; - } else { // decimal or vectors allows dots - dotCount++; - if (numState == PERLNUM_DECIMAL) { - if (dotCount > 1) { - if (isdigit(chNext)) { // really a vector - numState = PERLNUM_VECTOR; - } else // number then dot - goto numAtEnd; - } - } else { // vectors - if (!isdigit(chNext)) // vector then dot - goto numAtEnd; - } - } - } else if (ch == '_') { - // permissive underscoring for number and vector literals - } else if (!isascii(ch) || isalnum(ch)) { - if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { - if (!isascii(ch) || isalpha(ch)) { - if (dotCount == 0) { // change to word - state = SCE_PL_IDENTIFIER; - } else { // vector then word - goto numAtEnd; - } + backFlag = BACK_NONE; + if (isHereDoc) { // handle '<<', HERE doc + if (preferRE) { + sc.SetState(SCE_PL_HERE_DELIM); + HereDoc.State = 0; + } else { // << operator + sc.SetState(SCE_PL_OPERATOR); + sc.Forward(); } - } else if (numState == PERLNUM_DECIMAL) { - if (ch == 'E' || ch == 'e') { // exponent - numState = PERLNUM_FLOAT; - if (chNext == '+' || chNext == '-') { - i++; - ch = chNext; - chNext = chNext2; + } else if (sc.ch == '*') { // handle '*', typeglob + if (preferRE) { + sc.SetState(SCE_PL_SYMBOLTABLE); + if (sc.chNext == ':' && sc.GetRelative(2) == ':') { + sc.Forward(2); + } else if (sc.chNext == '{') { + sc.ForwardSetState(SCE_PL_OPERATOR); + } else { + sc.Forward(); } - } else if (!isascii(ch) || !isdigit(ch)) { // number then word - goto numAtEnd; - } - } else if (numState == PERLNUM_FLOAT) { - if (!isdigit(ch)) { // float then word - goto numAtEnd; - } - } else if (numState == PERLNUM_OCTAL) { - if (!isdigit(ch)) - goto numAtEnd; - else if (ch > '7') - numState = PERLNUM_BAD; - } else if (numState == PERLNUM_BINARY) { - if (!isdigit(ch)) - goto numAtEnd; - else if (ch > '1') - numState = PERLNUM_BAD; - } else if (numState == PERLNUM_HEX) { - int ch2 = toupper(ch); - if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F')) - goto numAtEnd; - } else {//(numState == PERLNUM_BAD) { - if (!isdigit(ch)) - goto numAtEnd; - } - } else { - // complete current number or vector - numAtEnd: - styler.ColourTo(i - 1, actualNumStyle(numState)); - state = SCE_PL_DEFAULT; - goto restartLexer; - } - } else if (state == SCE_PL_IDENTIFIER) { - if (!isWordStart(chNext) && chNext != '\'') { - styler.ColourTo(i, SCE_PL_IDENTIFIER); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } else { - if (state == SCE_PL_COMMENTLINE) { - if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - goto restartLexer; - } else if (isEOLChar(chNext)) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } - } else if (state == SCE_PL_HERE_DELIM) { - // - // From perldata.pod: - // ------------------ - // A line-oriented form of quoting is based on the shell ``here-doc'' - // syntax. - // Following a << you specify a string to terminate the quoted material, - // and all lines following the current line down to the terminating - // string are the value of the item. - // The terminating string may be either an identifier (a word), - // or some quoted text. - // If quoted, the type of quotes you use determines the treatment of - // the text, just as in regular quoting. - // An unquoted identifier works like double quotes. - // There must be no space between the << and the identifier. - // (If you put a space it will be treated as a null identifier, - // which is valid, and matches the first empty line.) - // (This is deprecated, -w warns of this syntax) - // The terminating string must appear by itself (unquoted and with no - // surrounding whitespace) on the terminating line. - // - // From Bash info: - // --------------- - // Specifier format is: <<[-]WORD - // Optional '-' is for removal of leading tabs from here-doc. - // Whitespace acceptable after <<[-] operator. - // - if (HereDoc.State == 0) { // '<<' encountered - bool gotspace = false; - unsigned int oldi = i; - if (chNext == ' ' || chNext == '\t') { - // skip whitespace; legal for quoted delimiters - gotspace = true; - do { - i++; - chNext = styler.SafeGetCharAt(i + 1); - } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t')); - chNext2 = styler.SafeGetCharAt(i + 2); - } - HereDoc.State = 1; - HereDoc.Quote = chNext; - HereDoc.Quoted = false; - HereDoc.DelimiterLength = 0; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; - if (chNext == '\'' || chNext == '"' || chNext == '`') { - // a quoted here-doc delimiter - i++; - ch = chNext; - chNext = chNext2; - HereDoc.Quoted = true; - } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\' - || chNext == '=' || chNext == '$' || chNext == '@' - || ((isalpha(chNext) || chNext == '_') && gotspace)) { - // left shift << or <<= operator cases - // restore position if operator - i = oldi; - styler.ColourTo(i, SCE_PL_OPERATOR); - state = SCE_PL_DEFAULT; - backflag = BACK_OPERATOR; - backPos = i; - HereDoc.State = 0; - goto restartLexer; } else { - // an unquoted here-doc delimiter, no special handling - // (cannot be prefixed by spaces/tabs), or - // symbols terminates; deprecated zero-length delimiter + sc.SetState(SCE_PL_OPERATOR); + if (sc.chNext == '*') // exponentiation + sc.Forward(); } - - } else if (HereDoc.State == 1) { // collect the delimiter - backflag = BACK_NONE; - if (HereDoc.Quoted) { // a quoted here-doc delimiter - if (ch == HereDoc.Quote) { // closing quote => end of delimiter - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; + } else if (sc.ch == '%') { // handle '%', hash + if (preferRE) { + sc.SetState(SCE_PL_HASH); + if (setHash.Contains(sc.chNext)) { + sc.Forward(); + } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') { + sc.Forward(2); + } else if (sc.chNext == '{') { + sc.ForwardSetState(SCE_PL_OPERATOR); } else { - if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote - i++; - ch = chNext; - chNext = chNext2; - } - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; + sc.ChangeState(SCE_PL_OPERATOR); } - } else { // an unquoted here-doc delimiter - if (isalnum(ch) || ch == '_') { - HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch; - HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; + } else { + sc.SetState(SCE_PL_OPERATOR); + } + } else if (sc.ch == '<') { // handle '<', inputsymbol + if (preferRE) { + // forward scan + int i = inputsymbolScan(styler, sc.currentPos, endPos); + if (i > 0) { + sc.SetState(SCE_PL_IDENTIFIER); + sc.Forward(i); } else { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - goto restartLexer; + sc.SetState(SCE_PL_OPERATOR); } - } - if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { - styler.ColourTo(i - 1, state); - state = SCE_PL_ERROR; - goto restartLexer; - } - } - } else if (HereDoc.State == 2) { - // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX - if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) { - i += HereDoc.DelimiterLength; - chPrev = styler.SafeGetCharAt(i - 1); - ch = styler.SafeGetCharAt(i); - if (isEOLChar(ch)) { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - backflag = BACK_NONE; - HereDoc.State = 0; - goto restartLexer; - } - chNext = styler.SafeGetCharAt(i + 1); - } - } else if (state == SCE_PL_POD - || state == SCE_PL_POD_VERB) { - if (isEOLChar(chPrev)) { - if (ch == ' ' || ch == '\t') { - styler.ColourTo(i - 1, state); - state = SCE_PL_POD_VERB; } else { - styler.ColourTo(i - 1, state); - state = SCE_PL_POD; - if (ch == '=') { - if (isMatch(styler, lengthDoc, i, "=cut")) { - styler.ColourTo(i - 1 + 4, state); - i += 4; - state = SCE_PL_DEFAULT; - ch = styler.SafeGetCharAt(i); - //chNext = styler.SafeGetCharAt(i + 1); - goto restartLexer; - } - } + sc.SetState(SCE_PL_OPERATOR); } - } - } else if (state == SCE_PL_SCALAR // variable names - || state == SCE_PL_ARRAY - || state == SCE_PL_HASH - || state == SCE_PL_SYMBOLTABLE) { - if (ch == ':' && chNext == ':') { // skip :: - i++; - ch = chNext; - chNext = chNext2; - } - else if (isEndVar(ch)) { - if (i == (styler.GetStartSegment() + 1)) { - // Special variable: $(, $_ etc. - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } else { - styler.ColourTo(i - 1, state); - state = SCE_PL_DEFAULT; - goto restartLexer; + } else { // handle '/', regexp + if (preferRE) { + sc.SetState(SCE_PL_REGEX); + Quote.New(); + Quote.Open(sc.ch); + } else { // / and // operators + sc.SetState(SCE_PL_OPERATOR); + if (sc.chNext == '/') { + sc.Forward(); + } } } - } else if (state == SCE_PL_REGEX - || state == SCE_PL_STRING_QR - ) { - if (!Quote.Up && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - // SG: Is it save to skip *every* escaped char? - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); + } else if (sc.ch == '=' // POD + && setPOD.Contains(sc.chNext) + && sc.atLineStart) { + sc.SetState(SCE_PL_POD); + backFlag = BACK_NONE; + } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases + unsigned int bk = sc.currentPos; + unsigned int fw = 2; + if (setSingleCharOp.Contains(sc.chNext) && // file test operators + !setWord.Contains(sc.GetRelative(2))) { + sc.SetState(SCE_PL_WORD); } else { - if (ch == Quote.Down /*&& chPrev != '\\'*/) { - Quote.Count--; - if (Quote.Count == 0) { - Quote.Rep--; - if (Quote.Up == Quote.Down) { - Quote.Count++; - } - } - if (!isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } - } else if (ch == Quote.Up /*&& chPrev != '\\'*/) { - Quote.Count++; - } else if (!isascii(chNext) || !isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } + // nominally a minus and bareword; find extent of bareword + while (setWord.Contains(sc.GetRelative(fw))) + fw++; + sc.SetState(SCE_PL_OPERATOR); } - } else if (state == SCE_PL_REGSUBST) { - if (!Quote.Up && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - // SG: Is it save to skip *every* escaped char? - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); + // force to bareword for hash key => or {variable literal} cases + if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) { + sc.ChangeState(SCE_PL_IDENTIFIER); + } + backFlag = BACK_NONE; + } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype + if (styleCheckSubPrototype(styler, sc.currentPos - 1)) { + sc.SetState(SCE_PL_SUB_PROTOTYPE); + backFlag = BACK_NONE; } else { - if (Quote.Count == 0 && Quote.Rep == 1) { - /* We matched something like s(...) or tr{...} - * and are looking for the next matcher characters, - * which could be either bracketed ({...}) or non-bracketed - * (/.../). - * - * Number-signs are problematic. If they occur after - * the close of the first part, treat them like - * a Quote.Up char, even if they actually start comments. - * - * If we find an alnum, we end the regsubst, and punt. - * - * Eric Promislow ericp@activestate.com Aug 9,2000 - */ - if (isspacechar(ch)) { - // Keep going - } - else if (!isascii(ch) || isalnum(ch)) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } else { - Quote.Open(ch); - } - } else if (ch == Quote.Down /*&& chPrev != '\\'*/) { - Quote.Count--; - if (Quote.Count == 0) { - Quote.Rep--; - } - if (!isascii(chNext) || !isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } - if (Quote.Up == Quote.Down) { - Quote.Count++; - } - } else if (ch == Quote.Up /*&& chPrev != '\\'*/) { - Quote.Count++; - } else if (!isascii(chNext) || !isalpha(chNext)) { - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - } + sc.SetState(SCE_PL_OPERATOR); } - } else if (state == SCE_PL_STRING_Q - || state == SCE_PL_STRING_QQ - || state == SCE_PL_STRING_QX - || state == SCE_PL_STRING_QW - || state == SCE_PL_STRING - || state == SCE_PL_CHARACTER - || state == SCE_PL_BACKTICKS - ) { - if (!Quote.Down && !isspacechar(ch)) { - Quote.Open(ch); - } else if (ch == '\\' && Quote.Up != '\\') { - i++; - ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - } else if (ch == Quote.Down) { - Quote.Count--; - if (Quote.Count == 0) { - Quote.Rep--; - if (Quote.Rep <= 0) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - ch = ' '; - } - if (Quote.Up == Quote.Down) { - Quote.Count++; - } - } - } else if (ch == Quote.Up) { - Quote.Count++; + } else if (setPerlOperator.Contains(sc.ch)) { // operators + sc.SetState(SCE_PL_OPERATOR); + if (sc.Match('.', '.')) { // .. and ... + sc.Forward(); + if (sc.chNext == '.') sc.Forward(); } - } else if (state == SCE_PL_SUB_PROTOTYPE) { - char strch[2]; - strch[0] = ch; - strch[1] = '\0'; - if (NULL != strstr("\\[$@%&*];", strch)) { - // keep going - } else if (ch == ')') { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } else { - // abandon prototype, restart from '(' - i = backPos; - styler.ColourTo(i, SCE_PL_OPERATOR); - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - state = SCE_PL_DEFAULT; - } - } else if (state == SCE_PL_FORMAT_IDENT) { - // occupies different HereDoc states to avoid clashing with HERE docs - if (HereDoc.State == 0) { - if ((isascii(ch) && isalpha(ch)) || ch == '_' // probable identifier - || ch == '=') { // no identifier - HereDoc.State = 3; - HereDoc.Quoted = false; // whitespace flag - } else if (ch == ' ' || ch == '\t') { - styler.ColourTo(i, SCE_PL_DEFAULT); - } else { - state = SCE_PL_DEFAULT; - HereDoc.State = 0; - goto restartLexer; - } - } - if (HereDoc.State == 3) { // with just a '=', state goes 0->3->4 - if (ch == '=') { - styler.ColourTo(i, SCE_PL_FORMAT_IDENT); - state = SCE_PL_DEFAULT; - HereDoc.State = 4; - } else if (ch == ' ' || ch == '\t') { - HereDoc.Quoted = true; - } else if (isEOLChar(ch) || (HereDoc.Quoted && ch != '=')) { - // abandon format, restart from after 'format' - i = backPos + 1; - ch = styler.SafeGetCharAt(i); - chNext = styler.SafeGetCharAt(i + 1); - state = SCE_PL_DEFAULT; - HereDoc.State = 0; - } - } - } else if (state == SCE_PL_FORMAT) { - if (isEOLChar(chPrev)) { - styler.ColourTo(i - 1, state); - if (ch == '.' && isEOLChar(chNext)) { - styler.ColourTo(i, state); - state = SCE_PL_DEFAULT; - } - } - } - } - if (state == SCE_PL_ERROR) { - break; + } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source + sc.SetState(SCE_PL_DATASECTION); + } else { + // keep colouring defaults + sc.Complete(); + } } - chPrev = ch; } - styler.ColourTo(lengthDoc - 1, state); + sc.Complete(); } static bool IsCommentLine(int line, Accessor &styler) { @@ -1265,17 +1169,17 @@ static bool IsCommentLine(int line, Accessor &styler) { int eol_pos = styler.LineStart(line + 1) - 1; for (int i = pos; i < eol_pos; i++) { char ch = styler[i]; - int style = styler.StyleAt(i); + int style = styler.StyleAt(i); if (ch == '#' && style == SCE_PL_COMMENTLINE) return true; - else if (ch != ' ' && ch != '\t') + else if (!IsASpaceOrTab(ch)) return false; } return false; } static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], - Accessor &styler) { + Accessor &styler) { bool foldComment = styler.GetPropertyInt("fold.comment") != 0; bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; // Custom folding of POD and packages @@ -1300,18 +1204,18 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], int style = styleNext; styleNext = styler.StyleAt(i + 1); bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); - bool atLineStart = isEOLChar(chPrev) || i == 0; - // Comment folding + bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0; + // Comment folding if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) - { - if (!IsCommentLine(lineCurrent - 1, styler) - && IsCommentLine(lineCurrent + 1, styler)) - levelCurrent++; - else if (IsCommentLine(lineCurrent - 1, styler) - && !IsCommentLine(lineCurrent+1, styler)) - levelCurrent--; - } - if (style == SCE_C_OPERATOR) { + { + if (!IsCommentLine(lineCurrent - 1, styler) + && IsCommentLine(lineCurrent + 1, styler)) + levelCurrent++; + else if (IsCommentLine(lineCurrent - 1, styler) + && !IsCommentLine(lineCurrent+1, styler)) + levelCurrent--; + } + if (style == SCE_PL_OPERATOR) { if (ch == '{') { levelCurrent++; } else if (ch == '}') { @@ -1329,17 +1233,17 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], else if (styler.Match(i, "=head")) isPodHeading = true; } else if (style == SCE_PL_DATASECTION) { - if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) - levelCurrent++; - else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) - levelCurrent--; - else if (styler.Match(i, "=head")) + if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE) + levelCurrent++; + else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE) + levelCurrent--; + else if (styler.Match(i, "=head")) isPodHeading = true; - // if package used or unclosed brace, level > SC_FOLDLEVELBASE! - // reset needed as level test is vs. SC_FOLDLEVELBASE - else if (styler.Match(i, "__END__")) - levelCurrent = SC_FOLDLEVELBASE; - } + // if package used or unclosed brace, level > SC_FOLDLEVELBASE! + // reset needed as level test is vs. SC_FOLDLEVELBASE + else if (styler.Match(i, "__END__")) + levelCurrent = SC_FOLDLEVELBASE; + } } // Custom package folding if (foldPackage && atLineStart) { @@ -1351,9 +1255,9 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], if (atEOL) { int lev = levelPrev; if (isPodHeading) { - lev = levelPrev - 1; - lev |= SC_FOLDLEVELHEADERFLAG; - isPodHeading = false; + lev = levelPrev - 1; + lev |= SC_FOLDLEVELHEADERFLAG; + isPodHeading = false; } // Check if line was a package declaration // because packages need "special" treatment @@ -1362,7 +1266,7 @@ static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[], levelCurrent = SC_FOLDLEVELBASE + 1; isPackageLine = false; } - lev |= levelCurrent << 16; + lev |= levelCurrent << 16; if (visibleChars == 0 && foldCompact) lev |= SC_FOLDLEVELWHITEFLAG; if ((levelCurrent > levelPrev) && (visibleChars > 0)) @@ -1389,4 +1293,3 @@ static const char * const perlWordListDesc[] = { }; LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8); - |