diff options
Diffstat (limited to 'src/LexPerl.cxx')
-rw-r--r-- | src/LexPerl.cxx | 488 |
1 files changed, 488 insertions, 0 deletions
diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx new file mode 100644 index 000000000..f9170b9c3 --- /dev/null +++ b/src/LexPerl.cxx @@ -0,0 +1,488 @@ +// SciTE - Scintilla based Text Editor +// LexPerl.cxx - lexer for subset of Perl +// Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdarg.h> + +#include "Platform.h" + +#include "PropSet.h" +#include "Accessor.h" +#include "KeyWords.h" +#include "Scintilla.h" +#include "SciLexer.h" + +inline bool isPerlOperator(char ch) { + if (isalnum(ch)) + return false; + // '.' left out as it is used to make up numbers + if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '\\' || + ch == '(' || ch == ')' || ch == '-' || ch == '+' || + ch == '=' || ch == '|' || ch == '{' || ch == '}' || + ch == '[' || ch == ']' || ch == ':' || ch == ';' || + ch == '<' || ch == '>' || ch == ',' || ch == '/' || + ch == '?' || ch == '!' || ch == '.' || ch == '~') + return true; + return false; +} + +static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, StylingContext &styler) { + char s[100]; + bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.'); + for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) { + s[i] = styler[start + i]; + s[i + 1] = '\0'; + } + char chAttr = SCE_PL_IDENTIFIER; + if (wordIsNumber) + chAttr = SCE_PL_NUMBER; + else { + if (keywords.InList(s)) + chAttr = SCE_PL_WORD; + } + styler.ColourTo(end, chAttr); + return chAttr; +} + +static bool isEndVar(char ch) { + return !isalnum(ch) && ch != '#' && ch != '$' && + ch != '_' && ch != '\''; +} + +static bool isMatch(StylingContext &styler, int lengthDoc, int pos, const char *val) { + if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) { + return false; + } + while (*val) { + if (*val != styler[pos++]) { + return false; + } + val++; + } + return true; +} + +static bool isOKQuote(char ch) { + if (isalnum(ch)) + return false; + if (isspace(ch)) + return false; + if (iscntrl(ch)) + return false; + return true; +} + +static char opposite(char ch) { + if (ch == '(') + return ')'; + if (ch == '[') + return ']'; + if (ch == '{') + return '}'; + if (ch == '<') + return '>'; + return ch; +} + +static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle, + WordList *keywordlists[], StylingContext &styler) { + + // Lexer for perl often has to backtrack to start of current style to determine + // which characters are being used as quotes, how deeply nested is the + // start position and what the termination string is for here documents + + WordList &keywords = *keywordlists[0]; + + char sooked[100]; + int quotes = 0; + char quoteDown = 'd'; + char quoteUp = 'd'; + int quoteRep = 1; + int sookedpos = 0; + bool preferRE = true; + sooked[sookedpos] = '\0'; + int state = initStyle; + int lengthDoc = startPos + length; + // If in a long distance lexical state, seek to the beginning to find quote characters + if (state == SCE_PL_HERE || state == SCE_PL_REGEX || + state == SCE_PL_REGSUBST || state == SCE_PL_LONGQUOTE) { + while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) { + startPos--; + } + state = SCE_PL_DEFAULT; + } + styler.StartAt(startPos); + char chPrev = ' '; + char chNext = styler[startPos]; + styler.StartSegment(startPos); + for (int i = startPos; i <= lengthDoc; i++) { + char ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + char chNext2 = styler.SafeGetCharAt(i + 2); + + if (styler.IsLeadByte(ch)) { + chNext = styler.SafeGetCharAt(i + 2); + chPrev = ' '; + i += 1; + continue; + } + + if (state == SCE_PL_DEFAULT) { + if (iswordstart(ch)) { + styler.ColourTo(i - 1, state); + if (ch == 's' && !isalnum(chNext)) { + state = SCE_PL_REGSUBST; + quotes = 0; + quoteUp = '\0'; + quoteDown = '\0'; + quoteRep = 2; + } else if (ch == 'm' && !isalnum(chNext)) { + state = SCE_PL_REGEX; + quotes = 0; + quoteUp = '\0'; + quoteDown = '\0'; + quoteRep = 1; + } else if (ch == 't' && chNext == 'r' && !isalnum(chNext2)) { + state = SCE_PL_REGSUBST; + quotes = 0; + quoteUp = '\0'; + quoteDown = '\0'; + quoteRep = 2; + i++; + chNext = chNext2; + } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isalnum(chNext2)) { + state = SCE_PL_LONGQUOTE; + i++; + chNext = chNext2; + quotes = 0; + quoteUp = '\0'; + quoteDown = '\0'; + quoteRep = 1; + } else { + state = SCE_PL_WORD; + preferRE = false; + } + } else if (ch == '#') { + styler.ColourTo(i - 1, state); + state = SCE_PL_COMMENTLINE; + } else if (ch == '\"') { + styler.ColourTo(i - 1, state); + state = SCE_PL_STRING; + } else if (ch == '\'') { + if (chPrev == '&') { + // Archaic call + styler.ColourTo(i, state); + } else { + styler.ColourTo(i - 1, state); + state = SCE_PL_CHARACTER; + } + } else if (ch == '`') { + styler.ColourTo(i - 1, state); + state = SCE_PL_BACKTICKS; + } else if (ch == '$') { + preferRE = false; + styler.ColourTo(i - 1, state); + if (isalnum(chNext) || chNext == '#' || chNext == '$' || chNext == '_') { + state = SCE_PL_SCALAR; + } else if (chNext != '{' && chNext != '[') { + styler.ColourTo(i, SCE_PL_SCALAR); + i++; + ch = ' '; + chNext = ' '; + } else { + styler.ColourTo(i, SCE_PL_SCALAR); + } + } else if (ch == '@') { + preferRE = false; + styler.ColourTo(i - 1, state); + if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') { + state = SCE_PL_ARRAY; + } else if (chNext != '{' && chNext != '[') { + styler.ColourTo(i, SCE_PL_ARRAY); + i++; + ch = ' '; + } else { + styler.ColourTo(i, SCE_PL_ARRAY); + } + } else if (ch == '%') { + preferRE = false; + styler.ColourTo(i - 1, state); + if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') { + state = SCE_PL_HASH; + } else if (chNext != '{' && chNext != '[') { + styler.ColourTo(i, SCE_PL_HASH); + i++; + ch = ' '; + } else { + styler.ColourTo(i, SCE_PL_HASH); + } + } else if (ch == '*') { + styler.ColourTo(i - 1, state); + state = SCE_PL_SYMBOLTABLE; + } else if (ch == '/' && preferRE) { + styler.ColourTo(i - 1, state); + state = SCE_PL_REGEX; + quoteUp = '/'; + quoteDown = '/'; + quotes = 1; + quoteRep = 1; + } else if (ch == '<' && chNext == '<') { + styler.ColourTo(i - 1, state); + state = SCE_PL_HERE; + i++; + ch = chNext; + chNext = chNext2; + quotes = 0; + sookedpos = 0; + sooked[sookedpos] = '\0'; + } else if (ch == '=' && isalpha(chNext)) { + styler.ColourTo(i - 1, state); + state = SCE_PL_POD; + quotes = 0; + sookedpos = 0; + sooked[sookedpos] = '\0'; + } else if (isPerlOperator(ch)) { + if (ch == ')' || ch == ']') + preferRE = false; + else + preferRE = true; + styler.ColourTo(i - 1, state); + styler.ColourTo(i, SCE_PL_OPERATOR); + } + } else if (state == SCE_PL_WORD) { + if (!iswordchar(ch) && ch != '\'') { // Archaic Perl has quotes inside names + if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")) { + styler.ColourTo(i, SCE_PL_DATASECTION); + state = SCE_PL_DATASECTION; + } else if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) { + styler.ColourTo(i, SCE_PL_DATASECTION); + state = SCE_PL_DATASECTION; + } else { + if (classifyWordPerl(styler.GetStartSegment(), i - 1, keywords, styler) == SCE_PL_WORD) + preferRE = true; + state = SCE_PL_DEFAULT; + if (ch == '#') { + state = SCE_PL_COMMENTLINE; + } else if (ch == '\"') { + state = SCE_PL_STRING; + } else if (ch == '\'') { + state = SCE_PL_CHARACTER; + } else if (ch == '<' && chNext == '<') { + state = SCE_PL_HERE; + quotes = 0; + sookedpos = 0; + sooked[sookedpos] = '\0'; + } else if (isPerlOperator(ch)) { + if (ch == ')' || ch == ']') + preferRE = false; + else + preferRE = true; + styler.ColourTo(i, SCE_PL_OPERATOR); + state = SCE_PL_DEFAULT; + } + } + } + } else { + if (state == SCE_PL_COMMENTLINE) { + if (ch == '\r' || ch == '\n') { + styler.ColourTo(i - 1, state); + state = SCE_PL_DEFAULT; + } + } else if (state == SCE_PL_HERE) { + if (isalnum(ch) && quotes < 2) { + sooked[sookedpos++] = ch; + sooked[sookedpos] = '\0'; + if (quotes == 0) + quotes = 1; + } else { + quotes++; + } + + if (quotes > 1 && isMatch(styler, lengthDoc, i, sooked)) { + styler.ColourTo(i + sookedpos - 1, SCE_PL_HERE); + state = SCE_PL_DEFAULT; + i += sookedpos; + chNext = ' '; + } + } else if (state == SCE_PL_STRING) { + if (ch == '\\') { + if (chNext == '\"' || chNext == '\'' || chNext == '\\') { + i++; + ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + } + } else if (ch == '\"') { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + i++; + ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + } + } else if (state == SCE_PL_CHARACTER) { + if (ch == '\\') { + if (chNext == '\"' || chNext == '\'' || chNext == '\\') { + i++; + ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + } + } else if (ch == '\'') { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + i++; + ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + } + } else if (state == SCE_PL_BACKTICKS) { + if (ch == '`') { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + i++; + ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + } + } else if (state == SCE_PL_POD) { + if (ch == '=') { + if (isMatch(styler, lengthDoc, i, "=cut")) { + styler.ColourTo(i - 1 + 4, state); + i += 4; + state = SCE_PL_DEFAULT; + chNext = ' '; + ch = ' '; + } + } + } else if (state == SCE_PL_SCALAR) { + if (isEndVar(ch)) { + styler.ColourTo(i - 1, state); + state = SCE_PL_DEFAULT; + } + } else if (state == SCE_PL_ARRAY) { + if (isEndVar(ch)) { + styler.ColourTo(i - 1, state); + state = SCE_PL_DEFAULT; + } + } else if (state == SCE_PL_HASH) { + if (isEndVar(ch)) { + styler.ColourTo(i - 1, state); + state = SCE_PL_DEFAULT; + } + } else if (state == SCE_PL_SYMBOLTABLE) { + if (isEndVar(ch)) { + styler.ColourTo(i - 1, state); + state = SCE_PL_DEFAULT; + } + } else if (state == SCE_PL_REF) { + if (isEndVar(ch)) { + styler.ColourTo(i - 1, state); + state = SCE_PL_DEFAULT; + } + } else if (state == SCE_PL_REGEX) { + if (!quoteUp && !isspace(ch)) { + quoteUp = ch; + quoteDown = opposite(ch); + quotes++; + } else { + if (ch == quoteDown && chPrev != '\\') { + quotes--; + if (quotes == 0) { + quoteRep--; + if (quoteUp == quoteDown) { + quotes++; + } + } + if (!isalpha(chNext)) { + if (quoteRep <= 0) { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + ch = ' '; + } + } + } else if (ch == quoteUp && chPrev != '\\') { + quotes++; + } else if (!isalpha(chNext)) { + if (quoteRep <= 0) { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + ch = ' '; + } + } + } + } else if (state == SCE_PL_REGSUBST) { + if (!quoteUp && !isspace(ch)) { + quoteUp = ch; + quoteDown = opposite(ch); + quotes++; + } else { + if (ch == quoteDown && chPrev != '\\') { + quotes--; + if (quotes == 0) { + quoteRep--; + } + if (!isalpha(chNext)) { + if (quoteRep <= 0) { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + ch = ' '; + } + } + if (quoteUp == quoteDown) { + quotes++; + } + } else if (ch == quoteUp && chPrev != '\\') { + quotes++; + } else if (!isalpha(chNext)) { + if (quoteRep <= 0) { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + ch = ' '; + } + } + } + } else if (state == SCE_PL_LONGQUOTE) { + if (!quoteDown && !isspace(ch)) { + quoteUp = ch; + quoteDown = opposite(quoteUp); + quotes++; + } else if (ch == quoteDown) { + quotes--; + if (quotes == 0) { + quoteRep--; + if (quoteRep <= 0) { + styler.ColourTo(i, state); + state = SCE_PL_DEFAULT; + ch = ' '; + } + if (quoteUp == quoteDown) { + quotes++; + } + } + } else if (ch == quoteUp) { + quotes++; + } + } + + if (state == SCE_PL_DEFAULT) { // One of the above succeeded + if (ch == '#') { + state = SCE_PL_COMMENTLINE; + } else if (ch == '\"') { + state = SCE_PL_STRING; + } else if (ch == '\'') { + state = SCE_PL_CHARACTER; + } else if (iswordstart(ch)) { + state = SCE_PL_WORD; + preferRE = false; + } else if (isoperator(ch)) { + styler.ColourTo(i, SCE_PL_OPERATOR); + } + } + } + chPrev = ch; + } + styler.ColourTo(lengthDoc, state); +} + +static LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc); |