aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <devnull@localhost>2008-07-26 12:20:46 +0000
committernyamatongwe <devnull@localhost>2008-07-26 12:20:46 +0000
commit8967b23bcad9622e11787d77708cf157b24e7dd4 (patch)
treeedada5e4e0418b45f89bc344370954bdc7955d20
parentd657cb26e2e7a023a620ecdcae4a79da53f13bc2 (diff)
downloadscintilla-mirror-8967b23bcad9622e11787d77708cf157b24e7dd4.tar.gz
Update from Kein-Hong Man simplifies the code.
-rw-r--r--src/LexBash.cxx753
1 files changed, 301 insertions, 452 deletions
diff --git a/src/LexBash.cxx b/src/LexBash.cxx
index 0797e68a9..7b475a7de 100644
--- a/src/LexBash.cxx
+++ b/src/LexBash.cxx
@@ -2,8 +2,8 @@
/** @file LexBash.cxx
** Lexer for Bash.
**/
-// Copyright 2004-2007 by Neil Hodgson <neilh@scintilla.org>
-// Adapted from LexPerl by Kein-Hong Man <mkh@pl.jaring.my> 2004
+// Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
+// Adapted from LexPerl by Kein-Hong Man 2004
// The License.txt file describes the conditions under which this software may be distributed.
#include <stdlib.h>
@@ -16,9 +16,17 @@
#include "PropSet.h"
#include "Accessor.h"
+#include "StyleContext.h"
#include "KeyWords.h"
#include "Scintilla.h"
#include "SciLexer.h"
+#include "CharacterSet.h"
+
+#ifdef SCI_NAMESPACE
+using namespace Scintilla;
+#endif
+
+#define HERE_DELIM_MAX 256
// define this if you want 'invalid octals' to be marked as errors
// usually, this is not a good idea, permissive lexing is better
@@ -32,13 +40,7 @@
#define BASH_BASE_OCTAL_ERROR 69
#endif
-#define HERE_DELIM_MAX 256
-
-#ifdef SCI_NAMESPACE
-using namespace Scintilla;
-#endif
-
-static inline int translateBashDigit(char ch) {
+static inline int translateBashDigit(int ch) {
if (ch >= '0' && ch <= '9') {
return ch - '0';
} else if (ch >= 'a' && ch <= 'z') {
@@ -53,407 +55,210 @@ static inline int translateBashDigit(char ch) {
return BASH_BASE_ERROR;
}
-static inline bool isEOLChar(char ch) {
- return (ch == '\r') || (ch == '\n');
-}
-
-static bool isSingleCharOp(char ch) {
- char strCharSet[2];
- strCharSet[0] = ch;
- strCharSet[1] = '\0';
- return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMACahGLNn", strCharSet));
-}
-
-static inline bool isBashOperator(char ch) {
- if (ch == '^' || ch == '&' || ch == '\\' || ch == '%' ||
- ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
- ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
- ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
- ch == '>' || ch == ',' || ch == '/' || ch == '<' ||
- ch == '?' || ch == '!' || ch == '.' || ch == '~' ||
- ch == '@')
- return true;
- return false;
-}
-
-static int classifyWordBash(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
- char s[100];
- for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
- s[i] = styler[start + i];
- s[i + 1] = '\0';
- }
- char chAttr = SCE_SH_IDENTIFIER;
- if (keywords.InList(s))
- chAttr = SCE_SH_WORD;
- styler.ColourTo(end, chAttr);
- return chAttr;
-}
-
-static inline int getBashNumberBase(unsigned int start, unsigned int end, Accessor &styler) {
+static inline int getBashNumberBase(char *s) {
+ int i = 0;
int base = 0;
- for (unsigned int i = 0; i < end - start + 1 && i < 10; i++) {
- base = base * 10 + (styler[start + i] - '0');
+ while (*s) {
+ base = base * 10 + (*s++ - '0');
+ i++;
}
- if (base > 64 || (end - start) > 1) {
+ if (base > 64 || i > 2) {
return BASH_BASE_ERROR;
}
return base;
}
-static inline bool isEndVar(char ch) {
- return !isalnum(ch) && ch != '$' && ch != '_';
-}
-
-static inline bool isNonQuote(char ch) {
- return isalnum(ch) || ch == '_';
-}
-
-static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
- if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
- return false;
- }
- while (*val) {
- if (*val != styler[pos++]) {
- return false;
- }
- val++;
- }
- return true;
-}
-
-static char opposite(char ch) {
- if (ch == '(')
- return ')';
- if (ch == '[')
- return ']';
- if (ch == '{')
- return '}';
- if (ch == '<')
- return '>';
+static int opposite(int ch) {
+ if (ch == '(') return ')';
+ if (ch == '[') return ']';
+ if (ch == '{') return '}';
+ if (ch == '<') return '>';
return ch;
}
static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
- WordList *keywordlists[], Accessor &styler) {
-
- // Lexer for bash often has to backtrack to start of current style to determine
- // which characters are being used as quotes, how deeply nested is the
- // start position and what the termination string is for here documents
+ WordList *keywordlists[], Accessor &styler) {
WordList &keywords = *keywordlists[0];
- class HereDocCls {
+ CharacterSet setWordStart(CharacterSet::setAlpha, "_");
+ // note that [+-] are often parts of identifiers in shell scripts
+ CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
+ CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
+ CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
+ CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
+ CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!");
+ CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!");
+ CharacterSet setLeftShift(CharacterSet::setDigits, "=$");
+
+ class HereDocCls { // Class to manage HERE document elements
public:
int State; // 0: '<<' encountered
// 1: collect the delimiter
// 2: here doc text (lines after the delimiter)
- char Quote; // the char after '<<'
+ int Quote; // the char after '<<'
bool Quoted; // true if Quote in ('\'','"','`')
bool Indent; // indented delimiter (for <<-)
int DelimiterLength; // strlen(Delimiter)
char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
HereDocCls() {
State = 0;
- Quote = 0;
- Quoted = false;
- Indent = 0;
+ Quote = 0;
+ Quoted = false;
+ Indent = 0;
DelimiterLength = 0;
Delimiter = new char[HERE_DELIM_MAX];
Delimiter[0] = '\0';
}
+ void Append(int ch) {
+ Delimiter[DelimiterLength++] = static_cast<char>(ch);
+ Delimiter[DelimiterLength] = '\0';
+ }
~HereDocCls() {
delete []Delimiter;
}
};
HereDocCls HereDoc;
- class QuoteCls {
+ class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
public:
- int Rep;
- int Count;
- char Up;
- char Down;
+ int Count;
+ int Up, Down;
QuoteCls() {
- this->New(1);
- }
- void New(int r) {
- Rep = r;
Count = 0;
Up = '\0';
Down = '\0';
}
- void Open(char u) {
+ void Open(int u) {
Count++;
Up = u;
Down = opposite(Up);
}
+ void Start(int u) {
+ Count = 0;
+ Open(u);
+ }
};
QuoteCls Quote;
- int state = initStyle;
int numBase = 0;
- unsigned int lengthDoc = startPos + length;
+ int digit;
+ unsigned int endPos = startPos + length;
- // If in a long distance lexical state, seek to the beginning to find quote characters
- // Bash strings can be multi-line with embedded newlines, so backtrack.
- // Bash numbers have additional state during lexing, so backtrack too.
- if (state == SCE_SH_HERE_Q) {
+ // Backtrack to beginning of style if required...
+ // If in a long distance lexical state, backtrack to find quote characters
+ if (initStyle == SCE_SH_HERE_Q) {
while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {
startPos--;
}
startPos = styler.LineStart(styler.GetLine(startPos));
- state = styler.StyleAt(startPos - 1);
+ initStyle = styler.StyleAt(startPos - 1);
}
- if (state == SCE_SH_STRING
- || state == SCE_SH_BACKTICKS
- || state == SCE_SH_CHARACTER
- || state == SCE_SH_NUMBER
- || state == SCE_SH_IDENTIFIER
- || state == SCE_SH_COMMENTLINE
- ) {
- while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
+ // Bash strings can be multi-line with embedded newlines, so backtrack.
+ // Bash numbers have additional state during lexing, so backtrack too.
+ if (initStyle == SCE_SH_STRING
+ || initStyle == SCE_SH_BACKTICKS
+ || initStyle == SCE_SH_CHARACTER
+ || initStyle == SCE_SH_NUMBER
+ || initStyle == SCE_SH_IDENTIFIER
+ || initStyle == SCE_SH_COMMENTLINE) {
+ while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
startPos--;
}
- state = SCE_SH_DEFAULT;
+ initStyle = SCE_SH_DEFAULT;
}
- styler.StartAt(startPos);
- char chPrev = styler.SafeGetCharAt(startPos - 1);
- if (startPos == 0)
- chPrev = '\n';
- char chNext = styler[startPos];
- styler.StartSegment(startPos);
-
- for (unsigned int i = startPos; i < lengthDoc; i++) {
- char ch = chNext;
- // if the current character is not consumed due to the completion of an
- // earlier style, lexing can be restarted via a simple goto
- restartLexer:
- chNext = styler.SafeGetCharAt(i + 1);
- char chNext2 = styler.SafeGetCharAt(i + 2);
-
- if (styler.IsLeadByte(ch)) {
- chNext = styler.SafeGetCharAt(i + 2);
- chPrev = ' ';
- i += 1;
- continue;
- }
-
- if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
- styler.ColourTo(i, state);
- chPrev = ch;
- continue;
- }
-
- if (HereDoc.State == 1 && isEOLChar(ch)) {
- // Begin of here-doc (the line after the here-doc delimiter):
- // Lexically, the here-doc starts from the next line after the >>, but the
- // first line of here-doc seem to follow the style of the last EOL sequence
- HereDoc.State = 2;
- if (HereDoc.Quoted) {
- if (state == SCE_SH_HERE_DELIM) {
- // Missing quote at end of string! We are stricter than bash.
- // Colour here-doc anyway while marking this bit as an error.
- state = SCE_SH_ERROR;
- }
- styler.ColourTo(i - 1, state);
- // HereDoc.Quote always == '\''
- state = SCE_SH_HERE_Q;
- } else {
- styler.ColourTo(i - 1, state);
- // always switch
- state = SCE_SH_HERE_Q;
- }
- }
-
- if (state == SCE_SH_DEFAULT) {
- if (ch == '\\') { // escaped character
- if (i < lengthDoc - 1)
- i++;
- ch = chNext;
- chNext = chNext2;
- styler.ColourTo(i, SCE_SH_IDENTIFIER);
- } else if (isascii(ch) && isdigit(ch)) {
- state = SCE_SH_NUMBER;
- numBase = BASH_BASE_DECIMAL;
- if (ch == '0') { // hex,octal
- if (chNext == 'x' || chNext == 'X') {
- numBase = BASH_BASE_HEX;
- i++;
- ch = chNext;
- chNext = chNext2;
- } else if (isdigit(chNext)) {
-#ifdef PEDANTIC_OCTAL
- numBase = BASH_BASE_OCTAL;
-#else
- numBase = BASH_BASE_HEX;
-#endif
+ StyleContext sc(startPos, endPos - startPos, initStyle, styler);
+
+ for (; sc.More(); sc.Forward()) {
+
+ // Determine if the current state should terminate.
+ switch (sc.state) {
+ case SCE_SH_OPERATOR:
+ sc.SetState(SCE_SH_DEFAULT);
+ break;
+ case SCE_SH_WORD:
+ // "." never used in Bash variable names but used in file names
+ if (!setWord.Contains(sc.ch)) {
+ char s[1000];
+ sc.GetCurrent(s, sizeof(s));
+ if (s[0] != '-' && // for file operators
+ !keywords.InList(s)) {
+ sc.ChangeState(SCE_SH_IDENTIFIER);
}
+ sc.SetState(SCE_SH_DEFAULT);
}
- } else if (iswordstart(ch)) {
- state = SCE_SH_WORD;
- if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
- // We need that if length of word == 1!
- // This test is copied from the SCE_SH_WORD handler.
- classifyWordBash(styler.GetStartSegment(), i, keywords, styler);
- state = SCE_SH_DEFAULT;
+ break;
+ case SCE_SH_IDENTIFIER:
+ if (sc.chPrev == '\\') { // for escaped chars
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ } else if (!setWord.Contains(sc.ch)) {
+ sc.SetState(SCE_SH_DEFAULT);
}
- } else if (ch == '#') {
- state = SCE_SH_COMMENTLINE;
- } else if (ch == '\"') {
- state = SCE_SH_STRING;
- Quote.New(1);
- Quote.Open(ch);
- } else if (ch == '\'') {
- state = SCE_SH_CHARACTER;
- Quote.New(1);
- Quote.Open(ch);
- } else if (ch == '`') {
- state = SCE_SH_BACKTICKS;
- Quote.New(1);
- Quote.Open(ch);
- } else if (ch == '$') {
- if (chNext == '{') {
- state = SCE_SH_PARAM;
- goto startQuote;
- } else if (chNext == '\'') {
- state = SCE_SH_CHARACTER;
- goto startQuote;
- } else if (chNext == '"') {
- state = SCE_SH_STRING;
- goto startQuote;
- } else if (chNext == '(' && chNext2 == '(') {
- styler.ColourTo(i, SCE_SH_OPERATOR);
- state = SCE_SH_DEFAULT;
- goto skipChar;
- } else if (chNext == '(' || chNext == '`') {
- state = SCE_SH_BACKTICKS;
- startQuote:
- Quote.New(1);
- Quote.Open(chNext);
- goto skipChar;
- } else {
- state = SCE_SH_SCALAR;
- skipChar:
- i++;
- ch = chNext;
- chNext = chNext2;
- }
- } else if (ch == '*') {
- if (chNext == '*') { // exponentiation
- i++;
- ch = chNext;
- chNext = chNext2;
- }
- styler.ColourTo(i, SCE_SH_OPERATOR);
- } else if (ch == '<' && chNext == '<') {
- state = SCE_SH_HERE_DELIM;
- HereDoc.State = 0;
- HereDoc.Indent = false;
- } else if (ch == '-' // file test operators
- && isSingleCharOp(chNext)
- && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))
- && isspace(chPrev)) {
- styler.ColourTo(i + 1, SCE_SH_WORD);
- state = SCE_SH_DEFAULT;
- i++;
- ch = chNext;
- chNext = chNext2;
- } else if (isBashOperator(ch)) {
- styler.ColourTo(i, SCE_SH_OPERATOR);
- } else {
- // keep colouring defaults to make restart easier
- styler.ColourTo(i, SCE_SH_DEFAULT);
- }
- } else if (state == SCE_SH_NUMBER) {
- int digit = translateBashDigit(ch);
- if (numBase == BASH_BASE_DECIMAL) {
- if (ch == '#') {
- numBase = getBashNumberBase(styler.GetStartSegment(), i - 1, styler);
- if (numBase == BASH_BASE_ERROR) // take the rest as comment
- goto numAtEnd;
- } else if (!isdigit(ch))
- goto numAtEnd;
- } else if (numBase == BASH_BASE_HEX) {
- if ((digit < 16) || (digit >= 36 && digit <= 41)) {
- // hex digit 0-9a-fA-F
- } else
- goto numAtEnd;
+ break;
+ case SCE_SH_NUMBER:
+ digit = translateBashDigit(sc.ch);
+ if (numBase == BASH_BASE_DECIMAL) {
+ if (sc.ch == '#') {
+ char s[10];
+ sc.GetCurrent(s, sizeof(s));
+ numBase = getBashNumberBase(s);
+ if (numBase != BASH_BASE_ERROR)
+ break;
+ } else if (IsADigit(sc.ch))
+ break;
+ } else if (numBase == BASH_BASE_HEX) {
+ if (IsADigit(sc.ch, 16))
+ break;
#ifdef PEDANTIC_OCTAL
- } else if (numBase == BASH_BASE_OCTAL ||
- numBase == BASH_BASE_OCTAL_ERROR) {
- if (digit > 7) {
+ } else if (numBase == BASH_BASE_OCTAL ||
+ numBase == BASH_BASE_OCTAL_ERROR) {
+ if (digit <= 7)
+ break;
if (digit <= 9) {
- numBase = BASH_BASE_OCTAL_ERROR;
- } else
- goto numAtEnd;
- }
-#endif
- } else if (numBase == BASH_BASE_ERROR) {
- if (digit > 9)
- goto numAtEnd;
- } else { // DD#DDDD number style handling
- if (digit != BASH_BASE_ERROR) {
- if (numBase <= 36) {
- // case-insensitive if base<=36
- if (digit >= 36) digit -= 26;
+ numBase = BASH_BASE_OCTAL_ERROR;
+ break;
}
- if (digit >= numBase) {
+#endif
+ } else if (numBase == BASH_BASE_ERROR) {
+ if (digit <= 9)
+ break;
+ } else { // DD#DDDD number style handling
+ if (digit != BASH_BASE_ERROR) {
+ if (numBase <= 36) {
+ // case-insensitive if base<=36
+ if (digit >= 36) digit -= 26;
+ }
+ if (digit < numBase)
+ break;
if (digit <= 9) {
numBase = BASH_BASE_ERROR;
- } else
- goto numAtEnd;
+ break;
+ }
}
- } else {
- numAtEnd:
- if (numBase == BASH_BASE_ERROR
+ }
+ // fallthrough when number is at an end or error
+ if (numBase == BASH_BASE_ERROR
#ifdef PEDANTIC_OCTAL
- || numBase == BASH_BASE_OCTAL_ERROR
+ || numBase == BASH_BASE_OCTAL_ERROR
#endif
- )
- state = SCE_SH_ERROR;
- styler.ColourTo(i - 1, state);
- state = SCE_SH_DEFAULT;
- goto restartLexer;
+ ) {
+ sc.ChangeState(SCE_SH_ERROR);
}
- }
- } else if (state == SCE_SH_WORD) {
- if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
- // "." never used in Bash variable names
- // but used in file names
- classifyWordBash(styler.GetStartSegment(), i, keywords, styler);
- state = SCE_SH_DEFAULT;
- ch = ' ';
- }
- } else if (state == SCE_SH_IDENTIFIER) {
- if (!iswordchar(chNext) && chNext != '+' && chNext != '-') {
- styler.ColourTo(i, SCE_SH_IDENTIFIER);
- state = SCE_SH_DEFAULT;
- ch = ' ';
- }
- } else {
- if (state == SCE_SH_COMMENTLINE) {
- if (ch == '\\' && isEOLChar(chNext)) {
+ sc.SetState(SCE_SH_DEFAULT);
+ break;
+ case SCE_SH_COMMENTLINE:
+ if (sc.ch == '\\' && (sc.chNext == '\r' || sc.chNext == '\n')) {
// comment continuation
- if (chNext == '\r' && chNext2 == '\n') {
- i += 2;
- ch = styler.SafeGetCharAt(i);
- chNext = styler.SafeGetCharAt(i + 1);
- } else {
- i++;
- ch = chNext;
- chNext = chNext2;
+ sc.Forward();
+ if (sc.ch == '\r' && sc.chNext == '\n') {
+ sc.Forward();
}
- } else if (isEOLChar(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_SH_DEFAULT;
- goto restartLexer;
- } else if (isEOLChar(chNext)) {
- styler.ColourTo(i, state);
- state = SCE_SH_DEFAULT;
+ } else if (sc.atLineEnd) {
+ sc.ForwardSetState(SCE_SH_DEFAULT);
}
- } else if (state == SCE_SH_HERE_DELIM) {
- //
+ break;
+ case SCE_SH_HERE_DELIM:
// From Bash info:
// ---------------
// Specifier format is: <<[-]WORD
@@ -461,150 +266,194 @@ static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
// Whitespace acceptable after <<[-] operator
//
if (HereDoc.State == 0) { // '<<' encountered
- HereDoc.State = 1;
- HereDoc.Quote = chNext;
+ HereDoc.Quote = sc.chNext;
HereDoc.Quoted = false;
HereDoc.DelimiterLength = 0;
HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
- if (chNext == '\'' || chNext == '\"') { // a quoted here-doc delimiter (' or ")
- i++;
- ch = chNext;
- chNext = chNext2;
+ if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
+ sc.Forward();
HereDoc.Quoted = true;
- } else if (!HereDoc.Indent && chNext == '-') { // <<- indent case
+ HereDoc.State = 1;
+ } else if (!HereDoc.Indent && sc.chNext == '-') { // <<- indent case
HereDoc.Indent = true;
- HereDoc.State = 0;
- } else if (isalpha(chNext) || chNext == '_' || chNext == '\\'
- || chNext == '-' || chNext == '+' || chNext == '!') {
+ } else if (setHereDoc.Contains(sc.chNext)) {
// an unquoted here-doc delimiter, no special handling
- // TODO check what exactly bash considers part of the delim
- } else if (chNext == '<') { // HERE string <<<
- i++;
- ch = chNext;
- chNext = chNext2;
- styler.ColourTo(i, SCE_SH_HERE_DELIM);
- state = SCE_SH_DEFAULT;
- HereDoc.State = 0;
- } else if (isspacechar(chNext)) {
+ // TODO check what exactly bash considers part of the delim
+ HereDoc.State = 1;
+ } else if (sc.chNext == '<') { // HERE string <<<
+ sc.Forward();
+ sc.ForwardSetState(SCE_SH_DEFAULT);
+ } else if (IsASpace(sc.chNext)) {
// eat whitespace
- HereDoc.State = 0;
- } else if (isdigit(chNext) || chNext == '=' || chNext == '$') {
+ } else if (setLeftShift.Contains(sc.chNext)) {
// left shift << or <<= operator cases
- styler.ColourTo(i, SCE_SH_OPERATOR);
- state = SCE_SH_DEFAULT;
- HereDoc.State = 0;
+ sc.ChangeState(SCE_SH_OPERATOR);
+ sc.ForwardSetState(SCE_SH_DEFAULT);
} else {
// symbols terminates; deprecated zero-length delimiter
+ HereDoc.State = 1;
}
} else if (HereDoc.State == 1) { // collect the delimiter
if (HereDoc.Quoted) { // a quoted here-doc delimiter
- if (ch == HereDoc.Quote) { // closing quote => end of delimiter
- styler.ColourTo(i, state);
- state = SCE_SH_DEFAULT;
+ if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
+ sc.ForwardSetState(SCE_SH_DEFAULT);
} else {
- if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
- i++;
- ch = chNext;
- chNext = chNext2;
+ if (sc.ch == '\\' && sc.chNext == HereDoc.Quote) { // escaped quote
+ sc.Forward();
}
- HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
- HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
+ HereDoc.Append(sc.ch);
}
} else { // an unquoted here-doc delimiter
- if (isalnum(ch) || ch == '_' || ch == '-' || ch == '+' || ch == '!') {
- HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
- HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
- } else if (ch == '\\') {
+ if (setHereDoc2.Contains(sc.ch)) {
+ HereDoc.Append(sc.ch);
+ } else if (sc.ch == '\\') {
// skip escape prefix
} else {
- styler.ColourTo(i - 1, state);
- state = SCE_SH_DEFAULT;
- goto restartLexer;
+ sc.SetState(SCE_SH_DEFAULT);
}
}
- if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
- styler.ColourTo(i - 1, state);
- state = SCE_SH_ERROR;
- goto restartLexer;
+ if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
+ sc.SetState(SCE_SH_ERROR);
+ HereDoc.State = 0;
}
}
- } else if (HereDoc.State == 2) {
- // state == SCE_SH_HERE_Q
- if (isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
- if (!HereDoc.Indent && isEOLChar(chPrev)) {
- endHereDoc:
- // standard HERE delimiter
- i += HereDoc.DelimiterLength;
- chPrev = styler.SafeGetCharAt(i - 1);
- ch = styler.SafeGetCharAt(i);
- if (isEOLChar(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_SH_DEFAULT;
- HereDoc.State = 0;
- goto restartLexer;
- }
- chNext = styler.SafeGetCharAt(i + 1);
- } else if (HereDoc.Indent) {
- // indented HERE delimiter
- unsigned int bk = (i > 0)? i - 1: 0;
- while (i > 0) {
- ch = styler.SafeGetCharAt(bk--);
- if (isEOLChar(ch)) {
- goto endHereDoc;
- } else if (!isspacechar(ch)) {
- break; // got leading non-whitespace
- }
+ break;
+ case SCE_SH_HERE_Q:
+ // HereDoc.State == 2
+ if (sc.atLineStart) {
+ sc.SetState(SCE_SH_HERE_Q);
+ int prefixws = 0;
+ while (IsASpace(sc.ch) && !sc.atLineEnd) { // whitespace prefix
+ sc.Forward();
+ prefixws++;
+ }
+ if (prefixws > 0)
+ sc.SetState(SCE_SH_HERE_Q);
+ while (!sc.atLineEnd) {
+ sc.Forward();
+ }
+ char s[HERE_DELIM_MAX];
+ sc.GetCurrent(s, sizeof(s));
+ if (strcmp(HereDoc.Delimiter, s) == 0) {
+ if ((prefixws > 0 && HereDoc.Indent) || // indentation rule
+ (prefixws == 0 && !HereDoc.Indent)) {
+ sc.SetState(SCE_SH_DEFAULT);
+ break;
}
}
}
- } else if (state == SCE_SH_SCALAR) { // variable names
- if (isEndVar(ch)) {
- if ((state == SCE_SH_SCALAR)
- && i == (styler.GetStartSegment() + 1)) {
+ break;
+ case SCE_SH_SCALAR: // variable names
+ if (!setParam.Contains(sc.ch)) {
+ if (sc.LengthCurrent() == 1) {
// Special variable: $(, $_ etc.
- styler.ColourTo(i, state);
- state = SCE_SH_DEFAULT;
+ sc.ForwardSetState(SCE_SH_DEFAULT);
} else {
- styler.ColourTo(i - 1, state);
- state = SCE_SH_DEFAULT;
- goto restartLexer;
+ sc.SetState(SCE_SH_DEFAULT);
}
}
- } else if (state == SCE_SH_STRING
- || state == SCE_SH_CHARACTER
- || state == SCE_SH_BACKTICKS
- || state == SCE_SH_PARAM
- ) {
- if (!Quote.Down && !isspacechar(ch)) {
- Quote.Open(ch);
- } else if (ch == '\\' && Quote.Up != '\\') {
- i++;
- ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
- } else if (ch == Quote.Down) {
+ break;
+ case SCE_SH_STRING: // delimited styles
+ case SCE_SH_CHARACTER:
+ case SCE_SH_BACKTICKS:
+ case SCE_SH_PARAM:
+ if (sc.ch == '\\' && Quote.Up != '\\') {
+ sc.Forward();
+ } else if (sc.ch == Quote.Down) {
Quote.Count--;
if (Quote.Count == 0) {
- Quote.Rep--;
- if (Quote.Rep <= 0) {
- styler.ColourTo(i, state);
- state = SCE_SH_DEFAULT;
- ch = ' ';
- }
- if (Quote.Up == Quote.Down) {
- Quote.Count++;
- }
+ sc.ForwardSetState(SCE_SH_DEFAULT);
}
- } else if (ch == Quote.Up) {
+ } else if (sc.ch == Quote.Up) {
Quote.Count++;
}
+ break;
+ }
+
+ // Must check end of HereDoc state 1 before default state is handled
+ if (HereDoc.State == 1 && sc.atLineEnd) {
+ // Begin of here-doc (the line after the here-doc delimiter):
+ // Lexically, the here-doc starts from the next line after the >>, but the
+ // first line of here-doc seem to follow the style of the last EOL sequence
+ HereDoc.State = 2;
+ if (HereDoc.Quoted) {
+ if (sc.state == SCE_SH_HERE_DELIM) {
+ // Missing quote at end of string! We are stricter than bash.
+ // Colour here-doc anyway while marking this bit as an error.
+ sc.ChangeState(SCE_SH_ERROR);
+ }
+ // HereDoc.Quote always == '\''
}
+ sc.SetState(SCE_SH_HERE_Q);
}
- if (state == SCE_SH_ERROR) {
- break;
+
+ // Determine if a new state should be entered.
+ if (sc.state == SCE_SH_DEFAULT) {
+ if (sc.ch == '\\') { // escaped character
+ sc.SetState(SCE_SH_IDENTIFIER);
+ } else if (IsADigit(sc.ch)) {
+ sc.SetState(SCE_SH_NUMBER);
+ numBase = BASH_BASE_DECIMAL;
+ if (sc.ch == '0') { // hex,octal
+ if (sc.chNext == 'x' || sc.chNext == 'X') {
+ numBase = BASH_BASE_HEX;
+ sc.Forward();
+ } else if (IsADigit(sc.chNext)) {
+#ifdef PEDANTIC_OCTAL
+ numBase = BASH_BASE_OCTAL;
+#else
+ numBase = BASH_BASE_HEX;
+#endif
+ }
+ }
+ } else if (setWordStart.Contains(sc.ch)) {
+ sc.SetState(SCE_SH_WORD);
+ } else if (sc.ch == '#') {
+ sc.SetState(SCE_SH_COMMENTLINE);
+ } else if (sc.ch == '\"') {
+ sc.SetState(SCE_SH_STRING);
+ Quote.Start(sc.ch);
+ } else if (sc.ch == '\'') {
+ sc.SetState(SCE_SH_CHARACTER);
+ Quote.Start(sc.ch);
+ } else if (sc.ch == '`') {
+ sc.SetState(SCE_SH_BACKTICKS);
+ Quote.Start(sc.ch);
+ } else if (sc.ch == '$') {
+ sc.SetState(SCE_SH_SCALAR);
+ sc.Forward();
+ if (sc.ch == '{') {
+ sc.ChangeState(SCE_SH_PARAM);
+ } else if (sc.ch == '\'') {
+ sc.ChangeState(SCE_SH_CHARACTER);
+ } else if (sc.ch == '"') {
+ sc.ChangeState(SCE_SH_STRING);
+ } else if (sc.ch == '(' || sc.ch == '`') {
+ sc.ChangeState(SCE_SH_BACKTICKS);
+ if (sc.chNext == '(') { // $(( is lexed as operator
+ sc.ChangeState(SCE_SH_OPERATOR);
+ }
+ } else {
+ continue; // scalar has no delimiter pair
+ }
+ // fallthrough, open delim for $[{'"(`]
+ Quote.Start(sc.ch);
+ } else if (sc.Match('<', '<')) {
+ sc.SetState(SCE_SH_HERE_DELIM);
+ HereDoc.State = 0;
+ HereDoc.Indent = false;
+ } else if (sc.ch == '-' && // one-char file test operators
+ setSingleCharOp.Contains(sc.chNext) &&
+ !setWord.Contains(sc.GetRelative(2)) &&
+ IsASpace(sc.chPrev)) {
+ sc.SetState(SCE_SH_WORD);
+ sc.Forward();
+ } else if (setBashOperator.Contains(sc.ch)) {
+ sc.SetState(SCE_SH_OPERATOR);
+ }
}
- chPrev = ch;
}
- styler.ColourTo(lengthDoc - 1, state);
+ sc.Complete();
}
static bool IsCommentLine(int line, Accessor &styler) {
@@ -621,7 +470,7 @@ static bool IsCommentLine(int line, Accessor &styler) {
}
static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
- Accessor &styler) {
+ Accessor &styler) {
bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
unsigned int endPos = startPos + length;
@@ -637,16 +486,16 @@ static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
int style = styleNext;
styleNext = styler.StyleAt(i + 1);
bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
- // Comment folding
+ // Comment folding
if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
- {
- if (!IsCommentLine(lineCurrent - 1, styler)
- && IsCommentLine(lineCurrent + 1, styler))
- levelCurrent++;
- else if (IsCommentLine(lineCurrent - 1, styler)
- && !IsCommentLine(lineCurrent+1, styler))
- levelCurrent--;
- }
+ {
+ if (!IsCommentLine(lineCurrent - 1, styler)
+ && IsCommentLine(lineCurrent + 1, styler))
+ levelCurrent++;
+ else if (IsCommentLine(lineCurrent - 1, styler)
+ && !IsCommentLine(lineCurrent + 1, styler))
+ levelCurrent--;
+ }
if (style == SCE_SH_OPERATOR) {
if (ch == '{') {
levelCurrent++;