aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexers/LexPerl.cxx
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2021-01-29 20:51:34 +1100
committerNeil <nyamatongwe@gmail.com>2021-01-29 20:51:34 +1100
commit54341053b273c905afa7503d8dadcc4c46a0d2d3 (patch)
treee25fe4ee686d7d59068f46746006d62a8ca30692 /lexers/LexPerl.cxx
parentba8b1a91525dd90f8fdcc75480f37815fecce2d2 (diff)
downloadscintilla-mirror-54341053b273c905afa7503d8dadcc4c46a0d2d3.tar.gz
Remove Lexilla files from Scintilla
Diffstat (limited to 'lexers/LexPerl.cxx')
-rw-r--r--lexers/LexPerl.cxx1848
1 files changed, 0 insertions, 1848 deletions
diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx
deleted file mode 100644
index 705463991..000000000
--- a/lexers/LexPerl.cxx
+++ /dev/null
@@ -1,1848 +0,0 @@
-// Scintilla source code edit control
-/** @file LexPerl.cxx
- ** Lexer for Perl.
- ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
- **/
-// Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
-// Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
-// The License.txt file describes the conditions under which this software may be distributed.
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <assert.h>
-#include <ctype.h>
-
-#include <string>
-#include <map>
-
-#include "ILexer.h"
-#include "Scintilla.h"
-#include "SciLexer.h"
-
-#include "WordList.h"
-#include "LexAccessor.h"
-#include "StyleContext.h"
-#include "CharacterSet.h"
-#include "LexerModule.h"
-#include "OptionSet.h"
-#include "DefaultLexer.h"
-
-using namespace Scintilla;
-
-// Info for HERE document handling from perldata.pod (reformatted):
-// ----------------------------------------------------------------
-// A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
-// Following a << you specify a string to terminate the quoted material, and
-// all lines following the current line down to the terminating string are
-// the value of the item.
-// Prefixing the terminating string with a "~" specifies that you want to
-// use "Indented Here-docs" (see below).
-// * The terminating string may be either an identifier (a word), or some
-// quoted text.
-// * If quoted, the type of quotes you use determines the treatment of the
-// text, just as in regular quoting.
-// * An unquoted identifier works like double quotes.
-// * There must be no space between the << and the identifier.
-// (If you put a space it will be treated as a null identifier,
-// which is valid, and matches the first empty line.)
-// (This is deprecated, -w warns of this syntax)
-// * The terminating string must appear by itself (unquoted and
-// with no surrounding whitespace) on the terminating line.
-//
-// Indented Here-docs
-// ------------------
-// The here-doc modifier "~" allows you to indent your here-docs to
-// make the code more readable.
-// The delimiter is used to determine the exact whitespace to remove
-// from the beginning of each line. All lines must have at least the
-// same starting whitespace (except lines only containing a newline)
-// or perl will croak. Tabs and spaces can be mixed, but are matched
-// exactly. One tab will not be equal to 8 spaces!
-// Additional beginning whitespace (beyond what preceded the
-// delimiter) will be preserved.
-
-#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
-
-#define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
-#define PERLNUM_OCTAL 2
-#define PERLNUM_FLOAT_EXP 3 // exponent part only
-#define PERLNUM_HEX 4 // may be a hex float
-#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
-#define PERLNUM_VECTOR 6
-#define PERLNUM_V_VECTOR 7
-#define PERLNUM_BAD 8
-
-#define BACK_NONE 0 // lookback state for bareword disambiguation:
-#define BACK_OPERATOR 1 // whitespace/comments are insignificant
-#define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
-
-#define SUB_BEGIN 0 // states for subroutine prototype scan:
-#define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes
-#define SUB_HAS_ATTRIB 2 // other attributes can exist leftward
-#define SUB_HAS_MODULE 3 // sub name can have a ::identifier part
-#define SUB_HAS_SUB 4 // 'sub' keyword
-
-// all interpolated styles are different from their parent styles by a constant difference
-// we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
-#define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
-
-static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
- // old-style keyword matcher; needed because GetCurrent() needs
- // current segment to be committed, but we may abandon early...
- char s[100];
- Sci_PositionU i, len = end - start;
- if (len > 30) { len = 30; }
- for (i = 0; i < len; i++, start++) s[i] = styler[start];
- s[i] = '\0';
- return keywords.InList(s);
-}
-
-static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
- int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
- // identifiers are recognized by Perl as barewords under some
- // conditions, the following attempts to do the disambiguation
- // by looking backward and forward; result in 2 LSB
- int result = 0;
- bool moreback = false; // true if passed newline/comments
- bool brace = false; // true if opening brace found
- // if BACK_NONE, neither operator nor keyword, so skip test
- if (backFlag == BACK_NONE)
- return result;
- // first look backwards past whitespace/comments to set EOL flag
- // (some disambiguation patterns must be on a single line)
- if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
- moreback = true;
- // look backwards at last significant lexed item for disambiguation
- bk = backPos - 1;
- int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
- if (ch == '{' && !moreback) {
- // {bareword: possible variable spec
- brace = true;
- } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
- // &bareword: subroutine call
- || styler.Match(bk - 1, "->")
- // ->bareword: part of variable spec
- || styler.Match(bk - 1, "::")
- // ::bareword: part of module spec
- || styler.Match(bk - 2, "sub")) {
- // sub bareword: subroutine declaration
- // (implied BACK_KEYWORD, no keywords end in 'sub'!)
- result |= 1;
- }
- // next, scan forward after word past tab/spaces only;
- // if ch isn't one of '[{(,' we can skip the test
- if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
- && fw < endPos) {
- while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)))
- && fw < endPos) {
- fw++;
- }
- if ((ch == '}' && brace)
- // {bareword}: variable spec
- || styler.Match(fw, "=>")) {
- // [{(, bareword=>: hash literal
- result |= 2;
- }
- }
- return result;
-}
-
-static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
- // when backtracking, we need to skip whitespace and comments
- while (p > 0) {
- const int style = styler.StyleAt(p);
- if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE)
- break;
- p--;
- }
-}
-
-static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
- // scan backward past whitespace and comments to find a lexeme
- skipWhitespaceComment(styler, bk);
- if (bk == 0)
- return 0;
- int sz = 1;
- style = styler.StyleAt(bk);
- while (bk > 0) { // find extent of lexeme
- if (styler.StyleAt(bk - 1) == style) {
- bk--; sz++;
- } else
- break;
- }
- return sz;
-}
-
-static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
- // backtrack to find open '{' corresponding to a '}', balanced
- // return significant style to be tested for '/' disambiguation
- int braceCount = 1;
- if (bk == 0)
- return SCE_PL_DEFAULT;
- while (--bk > 0) {
- if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
- int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
- if (bkch == ';') { // early out
- break;
- } else if (bkch == '}') {
- braceCount++;
- } else if (bkch == '{') {
- if (--braceCount == 0) break;
- }
- }
- }
- if (bk > 0 && braceCount == 0) {
- // balanced { found, bk > 0, skip more whitespace/comments
- bk--;
- skipWhitespaceComment(styler, bk);
- return styler.StyleAt(bk);
- }
- return SCE_PL_DEFAULT;
-}
-
-static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
- // backtrack to classify sub-styles of identifier under test
- // return sub-style to be tested for '/' disambiguation
- if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
- return 1;
- // backtrack to check for possible "->" or "::" before identifier
- while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
- bk--;
- }
- while (bk > 0) {
- int bkstyle = styler.StyleAt(bk);
- if (bkstyle == SCE_PL_DEFAULT
- || bkstyle == SCE_PL_COMMENTLINE) {
- // skip whitespace, comments
- } else if (bkstyle == SCE_PL_OPERATOR) {
- // test for "->" and "::"
- if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
- return 2;
- } else
- return 3; // bare identifier
- bk--;
- }
- return 0;
-}
-
-static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
- // forward scan the current line to classify line for POD style
- int state = -1;
- while (pos < endPos) {
- int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
- if (ch == '\n' || ch == '\r') {
- if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
- break;
- }
- if (IsASpaceOrTab(ch)) { // whitespace, take note
- if (state == -1)
- state = SCE_PL_DEFAULT;
- } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
- state = SCE_PL_POD_VERB;
- } else if (state != SCE_PL_POD_VERB) { // regular POD line
- state = SCE_PL_POD;
- }
- pos++;
- }
- if (state == -1)
- state = SCE_PL_DEFAULT;
- return state;
-}
-
-static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
- // backtrack to identify if we're starting a subroutine prototype
- // we also need to ignore whitespace/comments, format is like:
- // sub abc::pqr :const :prototype(...)
- // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
- // and a state machine generates legal subroutine syntax matches
- styler.Flush();
- int state = SUB_BEGIN;
- do {
- // find two lexemes, lexeme 2 follows lexeme 1
- int style2 = SCE_PL_DEFAULT;
- Sci_PositionU pos2 = bk;
- int len2 = findPrevLexeme(styler, pos2, style2);
- int style1 = SCE_PL_DEFAULT;
- Sci_PositionU pos1 = pos2;
- if (pos1 > 0) pos1--;
- int len1 = findPrevLexeme(styler, pos1, style1);
- if (len1 == 0 || len2 == 0) // lexeme pair must exist
- break;
-
- // match parts of syntax, if invalid subroutine syntax, break off
- if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
- styler.SafeGetCharAt(pos1) == ':') { // ':'
- if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
- if (len2 == 9 && styler.Match(pos2, "prototype")) { // ':' 'prototype'
- if (state == SUB_BEGIN) {
- state = SUB_HAS_PROTO;
- } else
- break;
- } else { // ':' <attribute>
- if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
- state = SUB_HAS_ATTRIB;
- } else
- break;
- }
- } else
- break;
- } else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
- styler.Match(pos1, "::")) { // '::'
- if (style2 == SCE_PL_IDENTIFIER) { // '::' <identifier>
- state = SUB_HAS_MODULE;
- } else
- break;
- } else if (style1 == SCE_PL_WORD && len1 == 3 &&
- styler.Match(pos1, "sub")) { // 'sub'
- if (style2 == SCE_PL_IDENTIFIER) { // 'sub' <identifier>
- state = SUB_HAS_SUB;
- } else
- break;
- } else
- break;
- bk = pos1; // set position for finding next lexeme pair
- if (bk > 0) bk--;
- } while (state != SUB_HAS_SUB);
- return (state == SUB_HAS_SUB);
-}
-
-static int actualNumStyle(int numberStyle) {
- if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
- return SCE_PL_STRING;
- } else if (numberStyle == PERLNUM_BAD) {
- return SCE_PL_ERROR;
- }
- return SCE_PL_NUMBER;
-}
-
-static int opposite(int ch) {
- if (ch == '(') return ')';
- if (ch == '[') return ']';
- if (ch == '{') return '}';
- if (ch == '<') return '>';
- return ch;
-}
-
-static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
- Sci_Position pos = styler.LineStart(line);
- Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
- for (Sci_Position i = pos; i < eol_pos; i++) {
- char ch = styler[i];
- int style = styler.StyleAt(i);
- if (ch == '#' && style == SCE_PL_COMMENTLINE)
- return true;
- else if (!IsASpaceOrTab(ch))
- return false;
- }
- return false;
-}
-
-static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
- Sci_Position pos = styler.LineStart(line);
- int style = styler.StyleAt(pos);
- if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
- return true;
- }
- return false;
-}
-
-static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
- int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
- if (lvl >= '1' && lvl <= '4') {
- return lvl - '0';
- }
- return 0;
-}
-
-// An individual named option for use in an OptionSet
-
-// Options used for LexerPerl
-struct OptionsPerl {
- bool fold;
- bool foldComment;
- bool foldCompact;
- // Custom folding of POD and packages
- bool foldPOD; // fold.perl.pod
- // Enable folding Pod blocks when using the Perl lexer.
- bool foldPackage; // fold.perl.package
- // Enable folding packages when using the Perl lexer.
-
- bool foldCommentExplicit;
-
- bool foldAtElse;
-
- OptionsPerl() {
- fold = false;
- foldComment = false;
- foldCompact = true;
- foldPOD = true;
- foldPackage = true;
- foldCommentExplicit = true;
- foldAtElse = false;
- }
-};
-
-static const char *const perlWordListDesc[] = {
- "Keywords",
- 0
-};
-
-struct OptionSetPerl : public OptionSet<OptionsPerl> {
- OptionSetPerl() {
- DefineProperty("fold", &OptionsPerl::fold);
-
- DefineProperty("fold.comment", &OptionsPerl::foldComment);
-
- DefineProperty("fold.compact", &OptionsPerl::foldCompact);
-
- DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
- "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
-
- DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
- "Set to 0 to disable folding packages when using the Perl lexer.");
-
- DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
- "Set to 0 to disable explicit folding.");
-
- DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
- "This option enables Perl folding on a \"} else {\" line of an if statement.");
-
- DefineWordListSets(perlWordListDesc);
- }
-};
-
-class LexerPerl : public DefaultLexer {
- CharacterSet setWordStart;
- CharacterSet setWord;
- CharacterSet setSpecialVar;
- CharacterSet setControlVar;
- WordList keywords;
- OptionsPerl options;
- OptionSetPerl osPerl;
-public:
- LexerPerl() :
- DefaultLexer("perl", SCLEX_PERL),
- setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
- setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
- setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
- setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
- }
- virtual ~LexerPerl() {
- }
- void SCI_METHOD Release() override {
- delete this;
- }
- int SCI_METHOD Version() const override {
- return lvRelease5;
- }
- const char *SCI_METHOD PropertyNames() override {
- return osPerl.PropertyNames();
- }
- int SCI_METHOD PropertyType(const char *name) override {
- return osPerl.PropertyType(name);
- }
- const char *SCI_METHOD DescribeProperty(const char *name) override {
- return osPerl.DescribeProperty(name);
- }
- Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
- const char * SCI_METHOD PropertyGet(const char *key) override {
- return osPerl.PropertyGet(key);
- }
- const char *SCI_METHOD DescribeWordListSets() override {
- return osPerl.DescribeWordListSets();
- }
- Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
- void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
- void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
-
- void *SCI_METHOD PrivateCall(int, void *) override {
- return 0;
- }
-
- static ILexer5 *LexerFactoryPerl() {
- return new LexerPerl();
- }
- int InputSymbolScan(StyleContext &sc);
- void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
-};
-
-Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
- if (osPerl.PropertySet(&options, key, val)) {
- return 0;
- }
- return -1;
-}
-
-Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
- WordList *wordListN = 0;
- switch (n) {
- case 0:
- wordListN = &keywords;
- break;
- }
- Sci_Position firstModification = -1;
- if (wordListN) {
- WordList wlNew;
- wlNew.Set(wl);
- if (*wordListN != wlNew) {
- wordListN->Set(wl);
- firstModification = 0;
- }
- }
- return firstModification;
-}
-
-int LexerPerl::InputSymbolScan(StyleContext &sc) {
- // forward scan for matching > on same line; file handles
- int c, sLen = 0;
- while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
- if (c == '\r' || c == '\n') {
- return 0;
- } else if (c == '>') {
- if (sc.Match("<=>")) // '<=>' case
- return 0;
- return sLen;
- }
- }
- return 0;
-}
-
-void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
- // interpolate a segment (with no active backslashes or delimiters within)
- // switch in or out of an interpolation style or continue current style
- // commit variable patterns if found, trim segment, repeat until done
- while (maxSeg > 0) {
- bool isVar = false;
- int sLen = 0;
- if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
- // $#[$]*word [$@][$]*word (where word or {word} is always present)
- bool braces = false;
- sLen = 1;
- if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
- sLen++;
- }
- while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within
- sLen++;
- if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word}
- sLen++;
- braces = true;
- }
- if (maxSeg > sLen) {
- int c = sc.GetRelativeCharacter(sLen);
- if (setWordStart.Contains(c)) { // word (various)
- sLen++;
- isVar = true;
- while (maxSeg > sLen) {
- if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
- break;
- sLen++;
- }
- } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
- sLen++;
- isVar = true;
- }
- }
- if (braces) {
- if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word}
- sLen++;
- } else
- isVar = false;
- }
- }
- if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
- int c = sc.chNext;
- if (sc.ch == '$') {
- sLen = 1;
- if (IsADigit(c)) { // $[0-9] and slurp trailing digits
- sLen++;
- isVar = true;
- while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
- sLen++;
- } else if (setSpecialVar.Contains(c)) { // $ special variables
- sLen++;
- isVar = true;
- } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
- sLen++;
- isVar = true;
- } else if (c == '^') { // $^A control-char style
- sLen++;
- if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
- sLen++;
- isVar = true;
- }
- }
- } else if (sc.ch == '@') {
- sLen = 1;
- if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
- sLen++;
- isVar = true;
- }
- }
- }
- if (isVar) { // commit as interpolated variable or normal character
- if (sc.state < SCE_PL_STRING_VAR)
- sc.SetState(sc.state + INTERPOLATE_SHIFT);
- sc.Forward(sLen);
- maxSeg -= sLen;
- } else {
- if (sc.state >= SCE_PL_STRING_VAR)
- sc.SetState(sc.state - INTERPOLATE_SHIFT);
- sc.Forward();
- maxSeg--;
- }
- }
- if (sc.state >= SCE_PL_STRING_VAR)
- sc.SetState(sc.state - INTERPOLATE_SHIFT);
-}
-
-void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
- LexAccessor styler(pAccess);
-
- // keywords that forces /PATTERN/ at all times; should track vim's behaviour
- WordList reWords;
- reWords.Set("elsif if split while");
-
- // charset classes
- CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
- // lexing of "%*</" operators is non-trivial; these are missing in the set below
- CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
- CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
- CharacterSet setModifiers(CharacterSet::setAlpha);
- CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
- // setArray and setHash also accepts chars for special vars like $_,
- // which are then truncated when the next char does not match setVar
- CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
- CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
- CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
- CharacterSet &setPOD = setModifiers;
- CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
- CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
- CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
- CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
- // for format identifiers
- CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
- CharacterSet &setFormat = setHereDocDelim;
-
- // Lexer for perl often has to backtrack to start of current style to determine
- // which characters are being used as quotes, how deeply nested is the
- // start position and what the termination string is for HERE documents.
-
- class HereDocCls { // Class to manage HERE doc sequence
- public:
- int State;
- // 0: '<<' encountered
- // 1: collect the delimiter
- // 2: here doc text (lines after the delimiter)
- int Quote; // the char after '<<'
- bool Quoted; // true if Quote in ('\'','"','`')
- bool StripIndent; // true if '<<~' requested to strip leading whitespace
- int DelimiterLength; // strlen(Delimiter)
- char Delimiter[HERE_DELIM_MAX]; // the Delimiter
- HereDocCls() {
- State = 0;
- Quote = 0;
- Quoted = false;
- StripIndent = false;
- DelimiterLength = 0;
- Delimiter[0] = '\0';
- }
- void Append(int ch) {
- Delimiter[DelimiterLength++] = static_cast<char>(ch);
- Delimiter[DelimiterLength] = '\0';
- }
- ~HereDocCls() {
- }
- };
- HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
-
- class QuoteCls { // Class to manage quote pairs
- public:
- int Rep;
- int Count;
- int Up, Down;
- QuoteCls() {
- New(1);
- }
- void New(int r = 1) {
- Rep = r;
- Count = 0;
- Up = '\0';
- Down = '\0';
- }
- void Open(int u) {
- Count++;
- Up = u;
- Down = opposite(Up);
- }
- };
- QuoteCls Quote;
-
- // additional state for number lexing
- int numState = PERLNUM_DECIMAL;
- int dotCount = 0;
-
- Sci_PositionU endPos = startPos + length;
-
- // Backtrack to beginning of style if required...
- // If in a long distance lexical state, backtrack to find quote characters.
- // Includes strings (may be multi-line), numbers (additional state), format
- // bodies, as well as POD sections.
- if (initStyle == SCE_PL_HERE_Q
- || initStyle == SCE_PL_HERE_QQ
- || initStyle == SCE_PL_HERE_QX
- || initStyle == SCE_PL_FORMAT
- || initStyle == SCE_PL_HERE_QQ_VAR
- || initStyle == SCE_PL_HERE_QX_VAR
- ) {
- // backtrack through multiple styles to reach the delimiter start
- int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
- while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
- startPos--;
- }
- startPos = styler.LineStart(styler.GetLine(startPos));
- initStyle = styler.StyleAt(startPos - 1);
- }
- if (initStyle == SCE_PL_STRING
- || initStyle == SCE_PL_STRING_QQ
- || initStyle == SCE_PL_BACKTICKS
- || initStyle == SCE_PL_STRING_QX
- || initStyle == SCE_PL_REGEX
- || initStyle == SCE_PL_STRING_QR
- || initStyle == SCE_PL_REGSUBST
- || initStyle == SCE_PL_STRING_VAR
- || initStyle == SCE_PL_STRING_QQ_VAR
- || initStyle == SCE_PL_BACKTICKS_VAR
- || initStyle == SCE_PL_STRING_QX_VAR
- || initStyle == SCE_PL_REGEX_VAR
- || initStyle == SCE_PL_STRING_QR_VAR
- || initStyle == SCE_PL_REGSUBST_VAR
- ) {
- // for interpolation, must backtrack through a mix of two different styles
- int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
- initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
- while (startPos > 1) {
- int st = styler.StyleAt(startPos - 1);
- if ((st != initStyle) && (st != otherStyle))
- break;
- startPos--;
- }
- initStyle = SCE_PL_DEFAULT;
- } else if (initStyle == SCE_PL_STRING_Q
- || initStyle == SCE_PL_STRING_QW
- || initStyle == SCE_PL_XLAT
- || initStyle == SCE_PL_CHARACTER
- || initStyle == SCE_PL_NUMBER
- || initStyle == SCE_PL_IDENTIFIER
- || initStyle == SCE_PL_ERROR
- || initStyle == SCE_PL_SUB_PROTOTYPE
- ) {
- while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
- startPos--;
- }
- initStyle = SCE_PL_DEFAULT;
- } else if (initStyle == SCE_PL_POD
- || initStyle == SCE_PL_POD_VERB
- ) {
- // POD backtracking finds preceding blank lines and goes back past them
- Sci_Position ln = styler.GetLine(startPos);
- if (ln > 0) {
- initStyle = styler.StyleAt(styler.LineStart(--ln));
- if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
- while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
- ln--;
- }
- startPos = styler.LineStart(++ln);
- initStyle = styler.StyleAt(startPos - 1);
- } else {
- startPos = 0;
- initStyle = SCE_PL_DEFAULT;
- }
- }
-
- // backFlag, backPos are additional state to aid identifier corner cases.
- // Look backwards past whitespace and comments in order to detect either
- // operator or keyword. Later updated as we go along.
- int backFlag = BACK_NONE;
- Sci_PositionU backPos = startPos;
- if (backPos > 0) {
- backPos--;
- skipWhitespaceComment(styler, backPos);
- if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
- backFlag = BACK_OPERATOR;
- else if (styler.StyleAt(backPos) == SCE_PL_WORD)
- backFlag = BACK_KEYWORD;
- backPos++;
- }
-
- StyleContext sc(startPos, endPos - startPos, initStyle, styler);
-
- for (; sc.More(); sc.Forward()) {
-
- // Determine if the current state should terminate.
- switch (sc.state) {
- case SCE_PL_OPERATOR:
- sc.SetState(SCE_PL_DEFAULT);
- backFlag = BACK_OPERATOR;
- backPos = sc.currentPos;
- break;
- case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
- if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
- || sc.Match('.', '.')
- || sc.chPrev == '>') { // end of inputsymbol
- sc.SetState(SCE_PL_DEFAULT);
- }
- break;
- case SCE_PL_WORD: // keyword, plus special cases
- if (!setWord.Contains(sc.ch)) {
- char s[100];
- sc.GetCurrent(s, sizeof(s));
- if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
- sc.ChangeState(SCE_PL_DATASECTION);
- } else {
- if ((strcmp(s, "format") == 0)) {
- sc.SetState(SCE_PL_FORMAT_IDENT);
- HereDoc.State = 0;
- } else {
- sc.SetState(SCE_PL_DEFAULT);
- }
- backFlag = BACK_KEYWORD;
- backPos = sc.currentPos;
- }
- }
- break;
- case SCE_PL_SCALAR:
- case SCE_PL_ARRAY:
- case SCE_PL_HASH:
- case SCE_PL_SYMBOLTABLE:
- if (sc.Match(':', ':')) { // skip ::
- sc.Forward();
- } else if (!setVar.Contains(sc.ch)) {
- if (sc.LengthCurrent() == 1) {
- // Special variable: $(, $_ etc.
- sc.Forward();
- }
- sc.SetState(SCE_PL_DEFAULT);
- }
- break;
- case SCE_PL_NUMBER:
- // if no early break, number style is terminated at "(go through)"
- if (sc.ch == '.') {
- if (sc.chNext == '.') {
- // double dot is always an operator (go through)
- } else if (numState <= PERLNUM_FLOAT_EXP) {
- // non-decimal number or float exponent, consume next dot
- sc.SetState(SCE_PL_OPERATOR);
- break;
- } else { // decimal or vectors allows dots
- dotCount++;
- if (numState == PERLNUM_DECIMAL) {
- if (dotCount <= 1) // number with one dot in it
- break;
- if (IsADigit(sc.chNext)) { // really a vector
- numState = PERLNUM_VECTOR;
- break;
- }
- // number then dot (go through)
- } else if (numState == PERLNUM_HEX) {
- if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
- break; // hex with one dot is a hex float
- } else {
- sc.SetState(SCE_PL_OPERATOR);
- break;
- }
- // hex then dot (go through)
- } else if (IsADigit(sc.chNext)) // vectors
- break;
- // vector then dot (go through)
- }
- } else if (sc.ch == '_') {
- // permissive underscoring for number and vector literals
- break;
- } else if (numState == PERLNUM_DECIMAL) {
- if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
- numState = PERLNUM_FLOAT_EXP;
- if (sc.chNext == '+' || sc.chNext == '-') {
- sc.Forward();
- }
- break;
- } else if (IsADigit(sc.ch))
- break;
- // number then word (go through)
- } else if (numState == PERLNUM_HEX) {
- if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign
- numState = PERLNUM_FLOAT_EXP;
- if (sc.chNext == '+' || sc.chNext == '-') {
- sc.Forward();
- }
- break;
- } else if (IsADigit(sc.ch, 16))
- break;
- // hex or hex float then word (go through)
- } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
- if (IsADigit(sc.ch)) // vector
- break;
- if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
- sc.ChangeState(SCE_PL_IDENTIFIER);
- break;
- }
- // vector then word (go through)
- } else if (IsADigit(sc.ch)) {
- if (numState == PERLNUM_FLOAT_EXP) {
- break;
- } else if (numState == PERLNUM_OCTAL) {
- if (sc.ch <= '7') break;
- } else if (numState == PERLNUM_BINARY) {
- if (sc.ch <= '1') break;
- }
- // mark invalid octal, binary numbers (go through)
- numState = PERLNUM_BAD;
- break;
- }
- // complete current number or vector
- sc.ChangeState(actualNumStyle(numState));
- sc.SetState(SCE_PL_DEFAULT);
- break;
- case SCE_PL_COMMENTLINE:
- if (sc.atLineStart) {
- sc.SetState(SCE_PL_DEFAULT);
- }
- break;
- case SCE_PL_HERE_DELIM:
- if (HereDoc.State == 0) { // '<<' encountered
- int delim_ch = sc.chNext;
- Sci_Position ws_skip = 0;
- HereDoc.State = 1; // pre-init HERE doc class
- HereDoc.Quote = sc.chNext;
- HereDoc.Quoted = false;
- HereDoc.StripIndent = false;
- HereDoc.DelimiterLength = 0;
- HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
- if (delim_ch == '~') { // was actually '<<~'
- sc.Forward();
- HereDoc.StripIndent = true;
- HereDoc.Quote = delim_ch = sc.chNext;
- }
- if (IsASpaceOrTab(delim_ch)) {
- // skip whitespace; legal only for quoted delimiters
- Sci_PositionU i = sc.currentPos + 1;
- while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
- i++;
- delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
- }
- ws_skip = i - sc.currentPos - 1;
- }
- if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
- // a quoted here-doc delimiter; skip any whitespace
- sc.Forward(ws_skip + 1);
- HereDoc.Quote = delim_ch;
- HereDoc.Quoted = true;
- } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
- || ws_skip > 0) {
- // left shift << or <<= operator cases
- // restore position if operator
- sc.ChangeState(SCE_PL_OPERATOR);
- sc.ForwardSetState(SCE_PL_DEFAULT);
- backFlag = BACK_OPERATOR;
- backPos = sc.currentPos;
- HereDoc.State = 0;
- } else {
- // specially handle initial '\' for identifier
- if (ws_skip == 0 && HereDoc.Quote == '\\')
- sc.Forward();
- // an unquoted here-doc delimiter, no special handling
- // (cannot be prefixed by spaces/tabs), or
- // symbols terminates; deprecated zero-length delimiter
- }
- } else if (HereDoc.State == 1) { // collect the delimiter
- backFlag = BACK_NONE;
- if (HereDoc.Quoted) { // a quoted here-doc delimiter
- if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
- sc.ForwardSetState(SCE_PL_DEFAULT);
- } else if (!sc.atLineEnd) {
- if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
- sc.Forward();
- }
- if (sc.ch != '\r') { // skip CR if CRLF
- int i = 0; // else append char, possibly an extended char
- while (i < sc.width) {
- HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
- i++;
- }
- }
- }
- } else { // an unquoted here-doc delimiter, no extended charsets
- if (setHereDocDelim.Contains(sc.ch)) {
- HereDoc.Append(sc.ch);
- } else {
- sc.SetState(SCE_PL_DEFAULT);
- }
- }
- if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
- sc.SetState(SCE_PL_ERROR);
- HereDoc.State = 0;
- }
- }
- break;
- case SCE_PL_HERE_Q:
- case SCE_PL_HERE_QQ:
- case SCE_PL_HERE_QX:
- // also implies HereDoc.State == 2
- sc.Complete();
- if (HereDoc.StripIndent) {
- // skip whitespace
- while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
- sc.Forward();
- }
- if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
- int c = sc.GetRelative(HereDoc.DelimiterLength);
- if (c == '\r' || c == '\n') { // peek first, do not consume match
- sc.ForwardBytes(HereDoc.DelimiterLength);
- sc.SetState(SCE_PL_DEFAULT);
- backFlag = BACK_NONE;
- HereDoc.State = 0;
- if (!sc.atLineEnd)
- sc.Forward();
- break;
- }
- }
- if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
- while (!sc.atLineEnd)
- sc.Forward();
- break;
- }
- while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
- int c, sLen = 0, endType = 0;
- while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
- // scan to break string into segments
- if (c == '\\') {
- endType = 1; break;
- } else if (c == '\r' || c == '\n') {
- endType = 2; break;
- }
- sLen++;
- }
- if (sLen > 0) // process non-empty segments
- InterpolateSegment(sc, sLen);
- if (endType == 1) {
- sc.Forward();
- // \ at end-of-line does not appear to have any effect, skip
- if (sc.ch != '\r' && sc.ch != '\n')
- sc.Forward();
- } else if (endType == 2) {
- if (!sc.atLineEnd)
- sc.Forward();
- }
- }
- break;
- case SCE_PL_POD:
- case SCE_PL_POD_VERB: {
- Sci_PositionU fw = sc.currentPos;
- Sci_Position ln = styler.GetLine(fw);
- if (sc.atLineStart && sc.Match("=cut")) { // end of POD
- sc.SetState(SCE_PL_POD);
- sc.Forward(4);
- sc.SetState(SCE_PL_DEFAULT);
- styler.SetLineState(ln, SCE_PL_POD);
- break;
- }
- int pod = podLineScan(styler, fw, endPos); // classify POD line
- styler.SetLineState(ln, pod);
- if (pod == SCE_PL_DEFAULT) {
- if (sc.state == SCE_PL_POD_VERB) {
- Sci_PositionU fw2 = fw;
- while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
- fw = fw2++; // penultimate line (last blank line)
- pod = podLineScan(styler, fw2, endPos);
- styler.SetLineState(styler.GetLine(fw2), pod);
- }
- if (pod == SCE_PL_POD) { // truncate verbatim POD early
- sc.SetState(SCE_PL_POD);
- } else
- fw = fw2;
- }
- } else {
- if (pod == SCE_PL_POD_VERB // still part of current paragraph
- && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
- pod = SCE_PL_POD;
- styler.SetLineState(ln, pod);
- } else if (pod == SCE_PL_POD
- && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
- pod = SCE_PL_POD_VERB;
- styler.SetLineState(ln, pod);
- }
- sc.SetState(pod);
- }
- sc.ForwardBytes(fw - sc.currentPos); // commit style
- }
- break;
- case SCE_PL_REGEX:
- case SCE_PL_STRING_QR:
- if (Quote.Rep <= 0) {
- if (!setModifiers.Contains(sc.ch))
- sc.SetState(SCE_PL_DEFAULT);
- } else if (!Quote.Up && !IsASpace(sc.ch)) {
- Quote.Open(sc.ch);
- } else {
- int c, sLen = 0, endType = 0;
- while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
- // scan to break string into segments
- if (IsASpace(c)) {
- break;
- } else if (c == '\\' && Quote.Up != '\\') {
- endType = 1; break;
- } else if (c == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0) {
- Quote.Rep--;
- break;
- }
- } else if (c == Quote.Up)
- Quote.Count++;
- sLen++;
- }
- if (sLen > 0) { // process non-empty segments
- if (Quote.Up != '\'') {
- InterpolateSegment(sc, sLen, true);
- } else // non-interpolated path
- sc.Forward(sLen);
- }
- if (endType == 1)
- sc.Forward();
- }
- break;
- case SCE_PL_REGSUBST:
- case SCE_PL_XLAT:
- if (Quote.Rep <= 0) {
- if (!setModifiers.Contains(sc.ch))
- sc.SetState(SCE_PL_DEFAULT);
- } else if (!Quote.Up && !IsASpace(sc.ch)) {
- Quote.Open(sc.ch);
- } else {
- int c, sLen = 0, endType = 0;
- bool isPattern = (Quote.Rep == 2);
- while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
- // scan to break string into segments
- if (c == '\\' && Quote.Up != '\\') {
- endType = 2; break;
- } else if (Quote.Count == 0 && Quote.Rep == 1) {
- // We matched something like s(...) or tr{...}, Perl 5.10
- // appears to allow almost any character for use as the
- // next delimiters. Whitespace and comments are accepted in
- // between, but we'll limit to whitespace here.
- // For '#', if no whitespace in between, it's a delimiter.
- if (IsASpace(c)) {
- // Keep going
- } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
- endType = 3;
- } else
- Quote.Open(c);
- break;
- } else if (c == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0) {
- Quote.Rep--;
- endType = 1;
- }
- if (Quote.Up == Quote.Down)
- Quote.Count++;
- if (endType == 1)
- break;
- } else if (c == Quote.Up) {
- Quote.Count++;
- } else if (IsASpace(c))
- break;
- sLen++;
- }
- if (sLen > 0) { // process non-empty segments
- if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
- InterpolateSegment(sc, sLen, isPattern);
- } else // non-interpolated path
- sc.Forward(sLen);
- }
- if (endType == 2) {
- sc.Forward();
- } else if (endType == 3)
- sc.SetState(SCE_PL_DEFAULT);
- }
- break;
- case SCE_PL_STRING_Q:
- case SCE_PL_STRING_QQ:
- case SCE_PL_STRING_QX:
- case SCE_PL_STRING_QW:
- case SCE_PL_STRING:
- case SCE_PL_CHARACTER:
- case SCE_PL_BACKTICKS:
- if (!Quote.Down && !IsASpace(sc.ch)) {
- Quote.Open(sc.ch);
- } else {
- int c, sLen = 0, endType = 0;
- while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
- // scan to break string into segments
- if (IsASpace(c)) {
- break;
- } else if (c == '\\' && Quote.Up != '\\') {
- endType = 2; break;
- } else if (c == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0) {
- endType = 3; break;
- }
- } else if (c == Quote.Up)
- Quote.Count++;
- sLen++;
- }
- if (sLen > 0) { // process non-empty segments
- switch (sc.state) {
- case SCE_PL_STRING:
- case SCE_PL_STRING_QQ:
- case SCE_PL_BACKTICKS:
- InterpolateSegment(sc, sLen);
- break;
- case SCE_PL_STRING_QX:
- if (Quote.Up != '\'') {
- InterpolateSegment(sc, sLen);
- break;
- }
- // (continued for ' delim)
- // Falls through.
- default: // non-interpolated path
- sc.Forward(sLen);
- }
- }
- if (endType == 2) {
- sc.Forward();
- } else if (endType == 3)
- sc.ForwardSetState(SCE_PL_DEFAULT);
- }
- break;
- case SCE_PL_SUB_PROTOTYPE: {
- int i = 0;
- // forward scan; must all be valid proto characters
- while (setSubPrototype.Contains(sc.GetRelative(i)))
- i++;
- if (sc.GetRelative(i) == ')') { // valid sub prototype
- sc.ForwardBytes(i);
- sc.ForwardSetState(SCE_PL_DEFAULT);
- } else {
- // abandon prototype, restart from '('
- sc.ChangeState(SCE_PL_OPERATOR);
- sc.SetState(SCE_PL_DEFAULT);
- }
- }
- break;
- case SCE_PL_FORMAT: {
- sc.Complete();
- if (sc.Match('.')) {
- sc.Forward();
- if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
- sc.SetState(SCE_PL_DEFAULT);
- }
- while (!sc.atLineEnd)
- sc.Forward();
- }
- break;
- case SCE_PL_ERROR:
- break;
- }
- // Needed for specific continuation styles (one follows the other)
- switch (sc.state) {
- // continued from SCE_PL_WORD
- case SCE_PL_FORMAT_IDENT:
- // occupies HereDoc state 3 to avoid clashing with HERE docs
- if (IsASpaceOrTab(sc.ch)) { // skip whitespace
- sc.ChangeState(SCE_PL_DEFAULT);
- while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
- sc.Forward();
- sc.SetState(SCE_PL_FORMAT_IDENT);
- }
- if (setFormatStart.Contains(sc.ch)) { // identifier or '='
- if (sc.ch != '=') {
- do {
- sc.Forward();
- } while (setFormat.Contains(sc.ch));
- }
- while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
- sc.Forward();
- if (sc.ch == '=') {
- sc.ForwardSetState(SCE_PL_DEFAULT);
- HereDoc.State = 3;
- } else {
- // invalid identifier; inexact fallback, but hey
- sc.ChangeState(SCE_PL_IDENTIFIER);
- sc.SetState(SCE_PL_DEFAULT);
- }
- } else {
- sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
- }
- backFlag = BACK_NONE;
- break;
- }
-
- // Must check end of HereDoc states here before default state is handled
- if (HereDoc.State == 1 && sc.atLineEnd) {
- // Begin of here-doc (the line after the here-doc delimiter):
- // Lexically, the here-doc starts from the next line after the >>, but the
- // first line of here-doc seem to follow the style of the last EOL sequence
- int st_new = SCE_PL_HERE_QQ;
- HereDoc.State = 2;
- if (HereDoc.Quoted) {
- if (sc.state == SCE_PL_HERE_DELIM) {
- // Missing quote at end of string! We are stricter than perl.
- // Colour here-doc anyway while marking this bit as an error.
- sc.ChangeState(SCE_PL_ERROR);
- }
- switch (HereDoc.Quote) {
- case '\'':
- st_new = SCE_PL_HERE_Q;
- break;
- case '"' :
- st_new = SCE_PL_HERE_QQ;
- break;
- case '`' :
- st_new = SCE_PL_HERE_QX;
- break;
- }
- } else {
- if (HereDoc.Quote == '\\')
- st_new = SCE_PL_HERE_Q;
- }
- sc.SetState(st_new);
- }
- if (HereDoc.State == 3 && sc.atLineEnd) {
- // Start of format body.
- HereDoc.State = 0;
- sc.SetState(SCE_PL_FORMAT);
- }
-
- // Determine if a new state should be entered.
- if (sc.state == SCE_PL_DEFAULT) {
- if (IsADigit(sc.ch) ||
- (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
- sc.SetState(SCE_PL_NUMBER);
- backFlag = BACK_NONE;
- numState = PERLNUM_DECIMAL;
- dotCount = 0;
- if (sc.ch == '0') { // hex,bin,octal
- if (sc.chNext == 'x' || sc.chNext == 'X') {
- numState = PERLNUM_HEX;
- } else if (sc.chNext == 'b' || sc.chNext == 'B') {
- numState = PERLNUM_BINARY;
- } else if (IsADigit(sc.chNext)) {
- numState = PERLNUM_OCTAL;
- }
- if (numState != PERLNUM_DECIMAL) {
- sc.Forward();
- }
- } else if (sc.ch == 'v') { // vector
- numState = PERLNUM_V_VECTOR;
- }
- } else if (setWord.Contains(sc.ch)) {
- // if immediately prefixed by '::', always a bareword
- sc.SetState(SCE_PL_WORD);
- if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
- sc.ChangeState(SCE_PL_IDENTIFIER);
- }
- Sci_PositionU bk = sc.currentPos;
- Sci_PositionU fw = sc.currentPos + 1;
- // first check for possible quote-like delimiter
- if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
- sc.ChangeState(SCE_PL_REGSUBST);
- Quote.New(2);
- } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
- sc.ChangeState(SCE_PL_REGEX);
- Quote.New();
- } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
- sc.ChangeState(SCE_PL_STRING_Q);
- Quote.New();
- } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
- sc.ChangeState(SCE_PL_XLAT);
- Quote.New(2);
- } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
- sc.ChangeState(SCE_PL_XLAT);
- Quote.New(2);
- sc.Forward();
- fw++;
- } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
- && !setWord.Contains(sc.GetRelative(2))) {
- if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
- else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
- else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
- else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
- Quote.New();
- sc.Forward();
- fw++;
- } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
- !setWord.Contains(sc.chNext) ||
- (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
- sc.ChangeState(SCE_PL_OPERATOR);
- }
- // if potentially a keyword, scan forward and grab word, then check
- // if it's really one; if yes, disambiguation test is performed
- // otherwise it is always a bareword and we skip a lot of scanning
- if (sc.state == SCE_PL_WORD) {
- while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
- fw++;
- if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
- sc.ChangeState(SCE_PL_IDENTIFIER);
- }
- }
- // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
- // for quote-like delimiters/keywords, attempt to disambiguate
- // to select for bareword, change state -> SCE_PL_IDENTIFIER
- if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
- if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
- sc.ChangeState(SCE_PL_IDENTIFIER);
- }
- backFlag = BACK_NONE;
- } else if (sc.ch == '#') {
- sc.SetState(SCE_PL_COMMENTLINE);
- } else if (sc.ch == '\"') {
- sc.SetState(SCE_PL_STRING);
- Quote.New();
- Quote.Open(sc.ch);
- backFlag = BACK_NONE;
- } else if (sc.ch == '\'') {
- if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
- // Archaic call
- sc.SetState(SCE_PL_IDENTIFIER);
- } else {
- sc.SetState(SCE_PL_CHARACTER);
- Quote.New();
- Quote.Open(sc.ch);
- }
- backFlag = BACK_NONE;
- } else if (sc.ch == '`') {
- sc.SetState(SCE_PL_BACKTICKS);
- Quote.New();
- Quote.Open(sc.ch);
- backFlag = BACK_NONE;
- } else if (sc.ch == '$') {
- sc.SetState(SCE_PL_SCALAR);
- if (sc.chNext == '{') {
- sc.ForwardSetState(SCE_PL_OPERATOR);
- } else if (IsASpace(sc.chNext)) {
- sc.ForwardSetState(SCE_PL_DEFAULT);
- } else {
- sc.Forward();
- if (sc.Match('`', '`') || sc.Match(':', ':')) {
- sc.Forward();
- }
- }
- backFlag = BACK_NONE;
- } else if (sc.ch == '@') {
- sc.SetState(SCE_PL_ARRAY);
- if (setArray.Contains(sc.chNext)) {
- // no special treatment
- } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
- sc.ForwardBytes(2);
- } else if (sc.chNext == '{' || sc.chNext == '[') {
- sc.ForwardSetState(SCE_PL_OPERATOR);
- } else {
- sc.ChangeState(SCE_PL_OPERATOR);
- }
- backFlag = BACK_NONE;
- } else if (setPreferRE.Contains(sc.ch)) {
- // Explicit backward peeking to set a consistent preferRE for
- // any slash found, so no longer need to track preferRE state.
- // Find first previous significant lexed element and interpret.
- // A few symbols shares this code for disambiguation.
- bool preferRE = false;
- bool isHereDoc = sc.Match('<', '<');
- bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
- Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
- sc.Complete();
- styler.Flush();
- if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
- hereDocSpace = true;
- skipWhitespaceComment(styler, bk);
- if (bk == 0) {
- // avoid backward scanning breakage
- preferRE = true;
- } else {
- int bkstyle = styler.StyleAt(bk);
- int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
- switch (bkstyle) {
- case SCE_PL_OPERATOR:
- preferRE = true;
- if (bkch == ')' || bkch == ']') {
- preferRE = false;
- } else if (bkch == '}') {
- // backtrack by counting balanced brace pairs
- // needed to test for variables like ${}, @{} etc.
- bkstyle = styleBeforeBracePair(styler, bk);
- if (bkstyle == SCE_PL_SCALAR
- || bkstyle == SCE_PL_ARRAY
- || bkstyle == SCE_PL_HASH
- || bkstyle == SCE_PL_SYMBOLTABLE
- || bkstyle == SCE_PL_OPERATOR) {
- preferRE = false;
- }
- } else if (bkch == '+' || bkch == '-') {
- if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
- && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
- // exceptions for operators: unary suffixes ++, --
- preferRE = false;
- }
- break;
- case SCE_PL_IDENTIFIER:
- preferRE = true;
- bkstyle = styleCheckIdentifier(styler, bk);
- if ((bkstyle == 1) || (bkstyle == 2)) {
- // inputsymbol or var with "->" or "::" before identifier
- preferRE = false;
- } else if (bkstyle == 3) {
- // bare identifier, test cases follows:
- if (sc.ch == '/') {
- // if '/', /PATTERN/ unless digit/space immediately after '/'
- // if '//', always expect defined-or operator to follow identifier
- if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
- preferRE = false;
- } else if (sc.ch == '*' || sc.ch == '%') {
- if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
- preferRE = false;
- } else if (sc.ch == '<') {
- if (IsASpace(sc.chNext) || sc.chNext == '=')
- preferRE = false;
- }
- }
- break;
- case SCE_PL_SCALAR: // for $var<< case:
- if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
- preferRE = true;
- break;
- case SCE_PL_WORD:
- preferRE = true;
- // for HERE docs, always true
- if (sc.ch == '/') {
- // adopt heuristics similar to vim-style rules:
- // keywords always forced as /PATTERN/: split, if, elsif, while
- // everything else /PATTERN/ unless digit/space immediately after '/'
- // for '//', defined-or favoured unless special keywords
- Sci_PositionU bkend = bk + 1;
- while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
- bk--;
- }
- if (isPerlKeyword(bk, bkend, reWords, styler))
- break;
- if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
- preferRE = false;
- } else if (sc.ch == '*' || sc.ch == '%') {
- if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
- preferRE = false;
- } else if (sc.ch == '<') {
- if (IsASpace(sc.chNext) || sc.chNext == '=')
- preferRE = false;
- }
- break;
-
- // other styles uses the default, preferRE=false
- case SCE_PL_POD:
- case SCE_PL_HERE_Q:
- case SCE_PL_HERE_QQ:
- case SCE_PL_HERE_QX:
- preferRE = true;
- break;
- }
- }
- backFlag = BACK_NONE;
- if (isHereDoc) { // handle '<<', HERE doc
- if (sc.Match("<<>>")) { // double-diamond operator (5.22)
- sc.SetState(SCE_PL_OPERATOR);
- sc.Forward(3);
- } else if (preferRE) {
- sc.SetState(SCE_PL_HERE_DELIM);
- HereDoc.State = 0;
- } else { // << operator
- sc.SetState(SCE_PL_OPERATOR);
- sc.Forward();
- }
- } else if (sc.ch == '*') { // handle '*', typeglob
- if (preferRE) {
- sc.SetState(SCE_PL_SYMBOLTABLE);
- if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
- sc.ForwardBytes(2);
- } else if (sc.chNext == '{') {
- sc.ForwardSetState(SCE_PL_OPERATOR);
- } else {
- sc.Forward();
- }
- } else {
- sc.SetState(SCE_PL_OPERATOR);
- if (sc.chNext == '*') // exponentiation
- sc.Forward();
- }
- } else if (sc.ch == '%') { // handle '%', hash
- if (preferRE) {
- sc.SetState(SCE_PL_HASH);
- if (setHash.Contains(sc.chNext)) {
- sc.Forward();
- } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
- sc.ForwardBytes(2);
- } else if (sc.chNext == '{') {
- sc.ForwardSetState(SCE_PL_OPERATOR);
- } else {
- sc.ChangeState(SCE_PL_OPERATOR);
- }
- } else {
- sc.SetState(SCE_PL_OPERATOR);
- }
- } else if (sc.ch == '<') { // handle '<', inputsymbol
- if (preferRE) {
- // forward scan
- int i = InputSymbolScan(sc);
- if (i > 0) {
- sc.SetState(SCE_PL_IDENTIFIER);
- sc.Forward(i);
- } else {
- sc.SetState(SCE_PL_OPERATOR);
- }
- } else {
- sc.SetState(SCE_PL_OPERATOR);
- }
- } else { // handle '/', regexp
- if (preferRE) {
- sc.SetState(SCE_PL_REGEX);
- Quote.New();
- Quote.Open(sc.ch);
- } else { // / and // operators
- sc.SetState(SCE_PL_OPERATOR);
- if (sc.chNext == '/') {
- sc.Forward();
- }
- }
- }
- } else if (sc.ch == '=' // POD
- && setPOD.Contains(sc.chNext)
- && sc.atLineStart) {
- sc.SetState(SCE_PL_POD);
- backFlag = BACK_NONE;
- } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
- Sci_PositionU bk = sc.currentPos;
- Sci_PositionU fw = 2;
- if (setSingleCharOp.Contains(sc.chNext) && // file test operators
- !setWord.Contains(sc.GetRelative(2))) {
- sc.SetState(SCE_PL_WORD);
- } else {
- // nominally a minus and bareword; find extent of bareword
- while (setWord.Contains(sc.GetRelative(fw)))
- fw++;
- sc.SetState(SCE_PL_OPERATOR);
- }
- // force to bareword for hash key => or {variable literal} cases
- if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
- sc.ChangeState(SCE_PL_IDENTIFIER);
- }
- backFlag = BACK_NONE;
- } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
- sc.Complete();
- if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
- sc.SetState(SCE_PL_SUB_PROTOTYPE);
- backFlag = BACK_NONE;
- } else {
- sc.SetState(SCE_PL_OPERATOR);
- }
- } else if (setPerlOperator.Contains(sc.ch)) { // operators
- sc.SetState(SCE_PL_OPERATOR);
- if (sc.Match('.', '.')) { // .. and ...
- sc.Forward();
- if (sc.chNext == '.') sc.Forward();
- }
- } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
- sc.SetState(SCE_PL_DATASECTION);
- } else {
- // keep colouring defaults
- sc.Complete();
- }
- }
- }
- sc.Complete();
- if (sc.state == SCE_PL_HERE_Q
- || sc.state == SCE_PL_HERE_QQ
- || sc.state == SCE_PL_HERE_QX
- || sc.state == SCE_PL_FORMAT) {
- styler.ChangeLexerState(sc.currentPos, styler.Length());
- }
- sc.Complete();
-}
-
-#define PERL_HEADFOLD_SHIFT 4
-#define PERL_HEADFOLD_MASK 0xF0
-
-void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
-
- if (!options.fold)
- return;
-
- LexAccessor styler(pAccess);
-
- Sci_PositionU endPos = startPos + length;
- int visibleChars = 0;
- Sci_Position lineCurrent = styler.GetLine(startPos);
-
- // Backtrack to previous line in case need to fix its fold status
- if (startPos > 0) {
- if (lineCurrent > 0) {
- lineCurrent--;
- startPos = styler.LineStart(lineCurrent);
- }
- }
-
- int levelPrev = SC_FOLDLEVELBASE;
- if (lineCurrent > 0)
- levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
- int levelCurrent = levelPrev;
- char chNext = styler[startPos];
- char chPrev = styler.SafeGetCharAt(startPos - 1);
- int styleNext = styler.StyleAt(startPos);
- // Used at end of line to determine if the line was a package definition
- bool isPackageLine = false;
- int podHeading = 0;
- for (Sci_PositionU i = startPos; i < endPos; i++) {
- char ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
- int style = styleNext;
- styleNext = styler.StyleAt(i + 1);
- int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
- bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
- bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
- // Comment folding
- if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
- if (!IsCommentLine(lineCurrent - 1, styler)
- && IsCommentLine(lineCurrent + 1, styler))
- levelCurrent++;
- else if (IsCommentLine(lineCurrent - 1, styler)
- && !IsCommentLine(lineCurrent + 1, styler))
- levelCurrent--;
- }
- // {} [] block folding
- if (style == SCE_PL_OPERATOR) {
- if (ch == '{') {
- if (options.foldAtElse && levelCurrent < levelPrev)
- --levelPrev;
- levelCurrent++;
- } else if (ch == '}') {
- levelCurrent--;
- }
- if (ch == '[') {
- if (options.foldAtElse && levelCurrent < levelPrev)
- --levelPrev;
- levelCurrent++;
- } else if (ch == ']') {
- levelCurrent--;
- }
- } else if (style == SCE_PL_STRING_QW) {
- // qw
- if (stylePrevCh != style)
- levelCurrent++;
- else if (styleNext != style)
- levelCurrent--;
- }
- // POD folding
- if (options.foldPOD && atLineStart) {
- if (style == SCE_PL_POD) {
- if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
- levelCurrent++;
- else if (styler.Match(i, "=cut"))
- levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
- else if (styler.Match(i, "=head"))
- podHeading = PodHeadingLevel(i, styler);
- } else if (style == SCE_PL_DATASECTION) {
- if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
- levelCurrent++;
- else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
- levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
- else if (styler.Match(i, "=head"))
- podHeading = PodHeadingLevel(i, styler);
- // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
- // reset needed as level test is vs. SC_FOLDLEVELBASE
- else if (stylePrevCh != SCE_PL_DATASECTION)
- levelCurrent = SC_FOLDLEVELBASE;
- }
- }
- // package folding
- if (options.foldPackage && atLineStart) {
- if (IsPackageLine(lineCurrent, styler)
- && !IsPackageLine(lineCurrent + 1, styler))
- isPackageLine = true;
- }
-
- //heredoc folding
- switch (style) {
- case SCE_PL_HERE_QQ :
- case SCE_PL_HERE_Q :
- case SCE_PL_HERE_QX :
- switch (stylePrevCh) {
- case SCE_PL_HERE_QQ :
- case SCE_PL_HERE_Q :
- case SCE_PL_HERE_QX :
- //do nothing;
- break;
- default :
- levelCurrent++;
- break;
- }
- break;
- default:
- switch (stylePrevCh) {
- case SCE_PL_HERE_QQ :
- case SCE_PL_HERE_Q :
- case SCE_PL_HERE_QX :
- levelCurrent--;
- break;
- default :
- //do nothing;
- break;
- }
- break;
- }
-
- //explicit folding
- if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
- if (chNext == '{') {
- levelCurrent++;
- } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') {
- levelCurrent--;
- }
- }
-
- if (atEOL) {
- int lev = levelPrev;
- // POD headings occupy bits 7-4, leaving some breathing room for
- // non-standard practice -- POD sections stuck in blocks, etc.
- if (podHeading > 0) {
- levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
- lev = levelCurrent - 1;
- lev |= SC_FOLDLEVELHEADERFLAG;
- podHeading = 0;
- }
- // Check if line was a package declaration
- // because packages need "special" treatment
- if (isPackageLine) {
- lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
- levelCurrent = SC_FOLDLEVELBASE + 1;
- isPackageLine = false;
- }
- lev |= levelCurrent << 16;
- if (visibleChars == 0 && options.foldCompact)
- lev |= SC_FOLDLEVELWHITEFLAG;
- if ((levelCurrent > levelPrev) && (visibleChars > 0))
- lev |= SC_FOLDLEVELHEADERFLAG;
- if (lev != styler.LevelAt(lineCurrent)) {
- styler.SetLevel(lineCurrent, lev);
- }
- lineCurrent++;
- levelPrev = levelCurrent;
- visibleChars = 0;
- }
- if (!isspacechar(ch))
- visibleChars++;
- chPrev = ch;
- }
- // Fill in the real level of the next line, keeping the current flags as they will be filled in later
- int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
- styler.SetLevel(lineCurrent, levelPrev | flagsNext);
-}
-
-LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);