aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/LexPerl.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/LexPerl.cxx')
-rw-r--r--src/LexPerl.cxx488
1 files changed, 488 insertions, 0 deletions
diff --git a/src/LexPerl.cxx b/src/LexPerl.cxx
new file mode 100644
index 000000000..f9170b9c3
--- /dev/null
+++ b/src/LexPerl.cxx
@@ -0,0 +1,488 @@
+// SciTE - Scintilla based Text Editor
+// LexPerl.cxx - lexer for subset of Perl
+// Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org>
+// The License.txt file describes the conditions under which this software may be distributed.
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "Platform.h"
+
+#include "PropSet.h"
+#include "Accessor.h"
+#include "KeyWords.h"
+#include "Scintilla.h"
+#include "SciLexer.h"
+
+inline bool isPerlOperator(char ch) {
+ if (isalnum(ch))
+ return false;
+ // '.' left out as it is used to make up numbers
+ if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || ch == '\\' ||
+ ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
+ ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
+ ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
+ ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
+ ch == '?' || ch == '!' || ch == '.' || ch == '~')
+ return true;
+ return false;
+}
+
+static int classifyWordPerl(unsigned int start, unsigned int end, WordList &keywords, StylingContext &styler) {
+ char s[100];
+ bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
+ for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
+ s[i] = styler[start + i];
+ s[i + 1] = '\0';
+ }
+ char chAttr = SCE_PL_IDENTIFIER;
+ if (wordIsNumber)
+ chAttr = SCE_PL_NUMBER;
+ else {
+ if (keywords.InList(s))
+ chAttr = SCE_PL_WORD;
+ }
+ styler.ColourTo(end, chAttr);
+ return chAttr;
+}
+
+static bool isEndVar(char ch) {
+ return !isalnum(ch) && ch != '#' && ch != '$' &&
+ ch != '_' && ch != '\'';
+}
+
+static bool isMatch(StylingContext &styler, int lengthDoc, int pos, const char *val) {
+ if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
+ return false;
+ }
+ while (*val) {
+ if (*val != styler[pos++]) {
+ return false;
+ }
+ val++;
+ }
+ return true;
+}
+
+static bool isOKQuote(char ch) {
+ if (isalnum(ch))
+ return false;
+ if (isspace(ch))
+ return false;
+ if (iscntrl(ch))
+ return false;
+ return true;
+}
+
+static char opposite(char ch) {
+ if (ch == '(')
+ return ')';
+ if (ch == '[')
+ return ']';
+ if (ch == '{')
+ return '}';
+ if (ch == '<')
+ return '>';
+ return ch;
+}
+
+static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
+ WordList *keywordlists[], StylingContext &styler) {
+
+ // Lexer for perl often has to backtrack to start of current style to determine
+ // which characters are being used as quotes, how deeply nested is the
+ // start position and what the termination string is for here documents
+
+ WordList &keywords = *keywordlists[0];
+
+ char sooked[100];
+ int quotes = 0;
+ char quoteDown = 'd';
+ char quoteUp = 'd';
+ int quoteRep = 1;
+ int sookedpos = 0;
+ bool preferRE = true;
+ sooked[sookedpos] = '\0';
+ int state = initStyle;
+ int lengthDoc = startPos + length;
+ // If in a long distance lexical state, seek to the beginning to find quote characters
+ if (state == SCE_PL_HERE || state == SCE_PL_REGEX ||
+ state == SCE_PL_REGSUBST || state == SCE_PL_LONGQUOTE) {
+ while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
+ startPos--;
+ }
+ state = SCE_PL_DEFAULT;
+ }
+ styler.StartAt(startPos);
+ char chPrev = ' ';
+ char chNext = styler[startPos];
+ styler.StartSegment(startPos);
+ for (int i = startPos; i <= lengthDoc; i++) {
+ char ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ char chNext2 = styler.SafeGetCharAt(i + 2);
+
+ if (styler.IsLeadByte(ch)) {
+ chNext = styler.SafeGetCharAt(i + 2);
+ chPrev = ' ';
+ i += 1;
+ continue;
+ }
+
+ if (state == SCE_PL_DEFAULT) {
+ if (iswordstart(ch)) {
+ styler.ColourTo(i - 1, state);
+ if (ch == 's' && !isalnum(chNext)) {
+ state = SCE_PL_REGSUBST;
+ quotes = 0;
+ quoteUp = '\0';
+ quoteDown = '\0';
+ quoteRep = 2;
+ } else if (ch == 'm' && !isalnum(chNext)) {
+ state = SCE_PL_REGEX;
+ quotes = 0;
+ quoteUp = '\0';
+ quoteDown = '\0';
+ quoteRep = 1;
+ } else if (ch == 't' && chNext == 'r' && !isalnum(chNext2)) {
+ state = SCE_PL_REGSUBST;
+ quotes = 0;
+ quoteUp = '\0';
+ quoteDown = '\0';
+ quoteRep = 2;
+ i++;
+ chNext = chNext2;
+ } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isalnum(chNext2)) {
+ state = SCE_PL_LONGQUOTE;
+ i++;
+ chNext = chNext2;
+ quotes = 0;
+ quoteUp = '\0';
+ quoteDown = '\0';
+ quoteRep = 1;
+ } else {
+ state = SCE_PL_WORD;
+ preferRE = false;
+ }
+ } else if (ch == '#') {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_COMMENTLINE;
+ } else if (ch == '\"') {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_STRING;
+ } else if (ch == '\'') {
+ if (chPrev == '&') {
+ // Archaic call
+ styler.ColourTo(i, state);
+ } else {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_CHARACTER;
+ }
+ } else if (ch == '`') {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_BACKTICKS;
+ } else if (ch == '$') {
+ preferRE = false;
+ styler.ColourTo(i - 1, state);
+ if (isalnum(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
+ state = SCE_PL_SCALAR;
+ } else if (chNext != '{' && chNext != '[') {
+ styler.ColourTo(i, SCE_PL_SCALAR);
+ i++;
+ ch = ' ';
+ chNext = ' ';
+ } else {
+ styler.ColourTo(i, SCE_PL_SCALAR);
+ }
+ } else if (ch == '@') {
+ preferRE = false;
+ styler.ColourTo(i - 1, state);
+ if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
+ state = SCE_PL_ARRAY;
+ } else if (chNext != '{' && chNext != '[') {
+ styler.ColourTo(i, SCE_PL_ARRAY);
+ i++;
+ ch = ' ';
+ } else {
+ styler.ColourTo(i, SCE_PL_ARRAY);
+ }
+ } else if (ch == '%') {
+ preferRE = false;
+ styler.ColourTo(i - 1, state);
+ if (isalpha(chNext) || chNext == '#' || chNext == '$' || chNext == '_') {
+ state = SCE_PL_HASH;
+ } else if (chNext != '{' && chNext != '[') {
+ styler.ColourTo(i, SCE_PL_HASH);
+ i++;
+ ch = ' ';
+ } else {
+ styler.ColourTo(i, SCE_PL_HASH);
+ }
+ } else if (ch == '*') {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_SYMBOLTABLE;
+ } else if (ch == '/' && preferRE) {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_REGEX;
+ quoteUp = '/';
+ quoteDown = '/';
+ quotes = 1;
+ quoteRep = 1;
+ } else if (ch == '<' && chNext == '<') {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_HERE;
+ i++;
+ ch = chNext;
+ chNext = chNext2;
+ quotes = 0;
+ sookedpos = 0;
+ sooked[sookedpos] = '\0';
+ } else if (ch == '=' && isalpha(chNext)) {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_POD;
+ quotes = 0;
+ sookedpos = 0;
+ sooked[sookedpos] = '\0';
+ } else if (isPerlOperator(ch)) {
+ if (ch == ')' || ch == ']')
+ preferRE = false;
+ else
+ preferRE = true;
+ styler.ColourTo(i - 1, state);
+ styler.ColourTo(i, SCE_PL_OPERATOR);
+ }
+ } else if (state == SCE_PL_WORD) {
+ if (!iswordchar(ch) && ch != '\'') { // Archaic Perl has quotes inside names
+ if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")) {
+ styler.ColourTo(i, SCE_PL_DATASECTION);
+ state = SCE_PL_DATASECTION;
+ } else if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__")) {
+ styler.ColourTo(i, SCE_PL_DATASECTION);
+ state = SCE_PL_DATASECTION;
+ } else {
+ if (classifyWordPerl(styler.GetStartSegment(), i - 1, keywords, styler) == SCE_PL_WORD)
+ preferRE = true;
+ state = SCE_PL_DEFAULT;
+ if (ch == '#') {
+ state = SCE_PL_COMMENTLINE;
+ } else if (ch == '\"') {
+ state = SCE_PL_STRING;
+ } else if (ch == '\'') {
+ state = SCE_PL_CHARACTER;
+ } else if (ch == '<' && chNext == '<') {
+ state = SCE_PL_HERE;
+ quotes = 0;
+ sookedpos = 0;
+ sooked[sookedpos] = '\0';
+ } else if (isPerlOperator(ch)) {
+ if (ch == ')' || ch == ']')
+ preferRE = false;
+ else
+ preferRE = true;
+ styler.ColourTo(i, SCE_PL_OPERATOR);
+ state = SCE_PL_DEFAULT;
+ }
+ }
+ }
+ } else {
+ if (state == SCE_PL_COMMENTLINE) {
+ if (ch == '\r' || ch == '\n') {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_DEFAULT;
+ }
+ } else if (state == SCE_PL_HERE) {
+ if (isalnum(ch) && quotes < 2) {
+ sooked[sookedpos++] = ch;
+ sooked[sookedpos] = '\0';
+ if (quotes == 0)
+ quotes = 1;
+ } else {
+ quotes++;
+ }
+
+ if (quotes > 1 && isMatch(styler, lengthDoc, i, sooked)) {
+ styler.ColourTo(i + sookedpos - 1, SCE_PL_HERE);
+ state = SCE_PL_DEFAULT;
+ i += sookedpos;
+ chNext = ' ';
+ }
+ } else if (state == SCE_PL_STRING) {
+ if (ch == '\\') {
+ if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
+ i++;
+ ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ }
+ } else if (ch == '\"') {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ i++;
+ ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ }
+ } else if (state == SCE_PL_CHARACTER) {
+ if (ch == '\\') {
+ if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
+ i++;
+ ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ }
+ } else if (ch == '\'') {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ i++;
+ ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ }
+ } else if (state == SCE_PL_BACKTICKS) {
+ if (ch == '`') {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ i++;
+ ch = chNext;
+ chNext = styler.SafeGetCharAt(i + 1);
+ }
+ } else if (state == SCE_PL_POD) {
+ if (ch == '=') {
+ if (isMatch(styler, lengthDoc, i, "=cut")) {
+ styler.ColourTo(i - 1 + 4, state);
+ i += 4;
+ state = SCE_PL_DEFAULT;
+ chNext = ' ';
+ ch = ' ';
+ }
+ }
+ } else if (state == SCE_PL_SCALAR) {
+ if (isEndVar(ch)) {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_DEFAULT;
+ }
+ } else if (state == SCE_PL_ARRAY) {
+ if (isEndVar(ch)) {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_DEFAULT;
+ }
+ } else if (state == SCE_PL_HASH) {
+ if (isEndVar(ch)) {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_DEFAULT;
+ }
+ } else if (state == SCE_PL_SYMBOLTABLE) {
+ if (isEndVar(ch)) {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_DEFAULT;
+ }
+ } else if (state == SCE_PL_REF) {
+ if (isEndVar(ch)) {
+ styler.ColourTo(i - 1, state);
+ state = SCE_PL_DEFAULT;
+ }
+ } else if (state == SCE_PL_REGEX) {
+ if (!quoteUp && !isspace(ch)) {
+ quoteUp = ch;
+ quoteDown = opposite(ch);
+ quotes++;
+ } else {
+ if (ch == quoteDown && chPrev != '\\') {
+ quotes--;
+ if (quotes == 0) {
+ quoteRep--;
+ if (quoteUp == quoteDown) {
+ quotes++;
+ }
+ }
+ if (!isalpha(chNext)) {
+ if (quoteRep <= 0) {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ ch = ' ';
+ }
+ }
+ } else if (ch == quoteUp && chPrev != '\\') {
+ quotes++;
+ } else if (!isalpha(chNext)) {
+ if (quoteRep <= 0) {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ ch = ' ';
+ }
+ }
+ }
+ } else if (state == SCE_PL_REGSUBST) {
+ if (!quoteUp && !isspace(ch)) {
+ quoteUp = ch;
+ quoteDown = opposite(ch);
+ quotes++;
+ } else {
+ if (ch == quoteDown && chPrev != '\\') {
+ quotes--;
+ if (quotes == 0) {
+ quoteRep--;
+ }
+ if (!isalpha(chNext)) {
+ if (quoteRep <= 0) {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ ch = ' ';
+ }
+ }
+ if (quoteUp == quoteDown) {
+ quotes++;
+ }
+ } else if (ch == quoteUp && chPrev != '\\') {
+ quotes++;
+ } else if (!isalpha(chNext)) {
+ if (quoteRep <= 0) {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ ch = ' ';
+ }
+ }
+ }
+ } else if (state == SCE_PL_LONGQUOTE) {
+ if (!quoteDown && !isspace(ch)) {
+ quoteUp = ch;
+ quoteDown = opposite(quoteUp);
+ quotes++;
+ } else if (ch == quoteDown) {
+ quotes--;
+ if (quotes == 0) {
+ quoteRep--;
+ if (quoteRep <= 0) {
+ styler.ColourTo(i, state);
+ state = SCE_PL_DEFAULT;
+ ch = ' ';
+ }
+ if (quoteUp == quoteDown) {
+ quotes++;
+ }
+ }
+ } else if (ch == quoteUp) {
+ quotes++;
+ }
+ }
+
+ if (state == SCE_PL_DEFAULT) { // One of the above succeeded
+ if (ch == '#') {
+ state = SCE_PL_COMMENTLINE;
+ } else if (ch == '\"') {
+ state = SCE_PL_STRING;
+ } else if (ch == '\'') {
+ state = SCE_PL_CHARACTER;
+ } else if (iswordstart(ch)) {
+ state = SCE_PL_WORD;
+ preferRE = false;
+ } else if (isoperator(ch)) {
+ styler.ColourTo(i, SCE_PL_OPERATOR);
+ }
+ }
+ }
+ chPrev = ch;
+ }
+ styler.ColourTo(lengthDoc, state);
+}
+
+static LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc);