diff options
| author | nyamatongwe <unknown> | 2010-07-13 21:16:48 +1000 | 
|---|---|---|
| committer | nyamatongwe <unknown> | 2010-07-13 21:16:48 +1000 | 
| commit | fd7c21165bbc5200aa24ef89560a8bb40fd37255 (patch) | |
| tree | b5cdec307f1b412f0527eb826a14b806b58cf006 /lexers/LexBash.cxx | |
| parent | d47313970ebf3736afffec781f2f9030fa3c8e1a (diff) | |
| download | scintilla-mirror-fd7c21165bbc5200aa24ef89560a8bb40fd37255.tar.gz | |
Moved lexers into lexers directory.
Diffstat (limited to 'lexers/LexBash.cxx')
| -rw-r--r-- | lexers/LexBash.cxx | 521 | 
1 files changed, 521 insertions, 0 deletions
| diff --git a/lexers/LexBash.cxx b/lexers/LexBash.cxx new file mode 100644 index 000000000..1f97e4829 --- /dev/null +++ b/lexers/LexBash.cxx @@ -0,0 +1,521 @@ +// Scintilla source code edit control +/** @file LexBash.cxx + ** Lexer for Bash. + **/ +// Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org> +// Adapted from LexPerl by Kein-Hong Man 2004 +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdarg.h> + +#include "Platform.h" + +#include "PropSet.h" +#include "Accessor.h" +#include "StyleContext.h" +#include "KeyWords.h" +#include "Scintilla.h" +#include "SciLexer.h" +#include "CharacterSet.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +#define HERE_DELIM_MAX 256 + +// define this if you want 'invalid octals' to be marked as errors +// usually, this is not a good idea, permissive lexing is better +#undef PEDANTIC_OCTAL + +#define BASH_BASE_ERROR		65 +#define BASH_BASE_DECIMAL	66 +#define BASH_BASE_HEX		67 +#ifdef PEDANTIC_OCTAL +#define BASH_BASE_OCTAL		68 +#define BASH_BASE_OCTAL_ERROR	69 +#endif + +static inline int translateBashDigit(int ch) { +	if (ch >= '0' && ch <= '9') { +		return ch - '0'; +	} else if (ch >= 'a' && ch <= 'z') { +		return ch - 'a' + 10; +	} else if (ch >= 'A' && ch <= 'Z') { +		return ch - 'A' + 36; +	} else if (ch == '@') { +		return 62; +	} else if (ch == '_') { +		return 63; +	} +	return BASH_BASE_ERROR; +} + +static inline int getBashNumberBase(char *s) { +	int i = 0; +	int base = 0; +	while (*s) { +		base = base * 10 + (*s++ - '0'); +		i++; +	} +	if (base > 64 || i > 2) { +		return BASH_BASE_ERROR; +	} +	return base; +} + +static int opposite(int ch) { +	if (ch == '(') return ')'; +	if (ch == '[') return ']'; +	if (ch == '{') return '}'; +	if (ch == '<') return '>'; +	return ch; +} + +static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle, +							 WordList *keywordlists[], Accessor &styler) { + +	WordList &keywords = *keywordlists[0]; + +	CharacterSet setWordStart(CharacterSet::setAlpha, "_"); +	// note that [+-] are often parts of identifiers in shell scripts +	CharacterSet setWord(CharacterSet::setAlphaNum, "._+-"); +	CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@"); +	CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn"); +	CharacterSet setParam(CharacterSet::setAlphaNum, "$_"); +	CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!"); +	CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!"); +	CharacterSet setLeftShift(CharacterSet::setDigits, "=$"); + +	class HereDocCls {	// Class to manage HERE document elements +	public: +		int State;		// 0: '<<' encountered +		// 1: collect the delimiter +		// 2: here doc text (lines after the delimiter) +		int Quote;		// the char after '<<' +		bool Quoted;		// true if Quote in ('\'','"','`') +		bool Indent;		// indented delimiter (for <<-) +		int DelimiterLength;	// strlen(Delimiter) +		char *Delimiter;	// the Delimiter, 256: sizeof PL_tokenbuf +		HereDocCls() { +			State = 0; +			Quote = 0; +			Quoted = false; +			Indent = 0; +			DelimiterLength = 0; +			Delimiter = new char[HERE_DELIM_MAX]; +			Delimiter[0] = '\0'; +		} +		void Append(int ch) { +			Delimiter[DelimiterLength++] = static_cast<char>(ch); +			Delimiter[DelimiterLength] = '\0'; +		} +		~HereDocCls() { +			delete []Delimiter; +		} +	}; +	HereDocCls HereDoc; + +	class QuoteCls {	// Class to manage quote pairs (simplified vs LexPerl) +		public: +		int Count; +		int Up, Down; +		QuoteCls() { +			Count = 0; +			Up    = '\0'; +			Down  = '\0'; +		} +		void Open(int u) { +			Count++; +			Up    = u; +			Down  = opposite(Up); +		} +		void Start(int u) { +			Count = 0; +			Open(u); +		} +	}; +	QuoteCls Quote; + +	int numBase = 0; +	int digit; +	unsigned int endPos = startPos + length; + +	// Backtrack to beginning of style if required... +	// If in a long distance lexical state, backtrack to find quote characters +	if (initStyle == SCE_SH_HERE_Q) { +		while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) { +			startPos--; +		} +		startPos = styler.LineStart(styler.GetLine(startPos)); +		initStyle = styler.StyleAt(startPos - 1); +	} +	// Bash strings can be multi-line with embedded newlines, so backtrack. +	// Bash numbers have additional state during lexing, so backtrack too. +	if (initStyle == SCE_SH_STRING +	 || initStyle == SCE_SH_BACKTICKS +	 || initStyle == SCE_SH_CHARACTER +	 || initStyle == SCE_SH_NUMBER +	 || initStyle == SCE_SH_IDENTIFIER +	 || initStyle == SCE_SH_COMMENTLINE) { +		while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) { +			startPos--; +		} +		initStyle = SCE_SH_DEFAULT; +	} + +	StyleContext sc(startPos, endPos - startPos, initStyle, styler); + +	for (; sc.More(); sc.Forward()) { + +		// Determine if the current state should terminate. +		switch (sc.state) { +			case SCE_SH_OPERATOR: +				sc.SetState(SCE_SH_DEFAULT); +				break; +			case SCE_SH_WORD: +				// "." never used in Bash variable names but used in file names +				if (!setWord.Contains(sc.ch)) { +					char s[1000]; +					sc.GetCurrent(s, sizeof(s)); +					if (s[0] != '-' &&	// for file operators +						!keywords.InList(s)) { +						sc.ChangeState(SCE_SH_IDENTIFIER); +					} +					sc.SetState(SCE_SH_DEFAULT); +				} +				break; +			case SCE_SH_IDENTIFIER: +				if (sc.chPrev == '\\') {	// for escaped chars +					sc.ForwardSetState(SCE_SH_DEFAULT); +				} else if (!setWord.Contains(sc.ch)) { +					sc.SetState(SCE_SH_DEFAULT); +				} +				break; +			case SCE_SH_NUMBER: +				digit = translateBashDigit(sc.ch); +				if (numBase == BASH_BASE_DECIMAL) { +					if (sc.ch == '#') { +						char s[10]; +						sc.GetCurrent(s, sizeof(s)); +						numBase = getBashNumberBase(s); +						if (numBase != BASH_BASE_ERROR) +							break; +					} else if (IsADigit(sc.ch)) +						break; +				} else if (numBase == BASH_BASE_HEX) { +					if (IsADigit(sc.ch, 16)) +						break; +#ifdef PEDANTIC_OCTAL +				} else if (numBase == BASH_BASE_OCTAL || +						   numBase == BASH_BASE_OCTAL_ERROR) { +					if (digit <= 7) +						break; +					if (digit <= 9) { +						numBase = BASH_BASE_OCTAL_ERROR; +						break; +					} +#endif +				} else if (numBase == BASH_BASE_ERROR) { +					if (digit <= 9) +						break; +				} else {	// DD#DDDD number style handling +					if (digit != BASH_BASE_ERROR) { +						if (numBase <= 36) { +							// case-insensitive if base<=36 +							if (digit >= 36) digit -= 26; +						} +						if (digit < numBase) +							break; +						if (digit <= 9) { +							numBase = BASH_BASE_ERROR; +							break; +						} +					} +				} +				// fallthrough when number is at an end or error +				if (numBase == BASH_BASE_ERROR +#ifdef PEDANTIC_OCTAL +					|| numBase == BASH_BASE_OCTAL_ERROR +#endif +				) { +					sc.ChangeState(SCE_SH_ERROR); +				} +				sc.SetState(SCE_SH_DEFAULT); +				break; +			case SCE_SH_COMMENTLINE: +				if (sc.atLineEnd && sc.chPrev != '\\') { +					sc.SetState(SCE_SH_DEFAULT); +				} +				break; +			case SCE_SH_HERE_DELIM: +				// From Bash info: +				// --------------- +				// Specifier format is: <<[-]WORD +				// Optional '-' is for removal of leading tabs from here-doc. +				// Whitespace acceptable after <<[-] operator +				// +				if (HereDoc.State == 0) { // '<<' encountered +					HereDoc.Quote = sc.chNext; +					HereDoc.Quoted = false; +					HereDoc.DelimiterLength = 0; +					HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0'; +					if (sc.chNext == '\'' || sc.chNext == '\"') {	// a quoted here-doc delimiter (' or ") +						sc.Forward(); +						HereDoc.Quoted = true; +						HereDoc.State = 1; +					} else if (!HereDoc.Indent && sc.chNext == '-') {	// <<- indent case +						HereDoc.Indent = true; +					} else if (setHereDoc.Contains(sc.chNext)) { +						// an unquoted here-doc delimiter, no special handling +						// TODO check what exactly bash considers part of the delim +						HereDoc.State = 1; +					} else if (sc.chNext == '<') {	// HERE string <<< +						sc.Forward(); +						sc.ForwardSetState(SCE_SH_DEFAULT); +					} else if (IsASpace(sc.chNext)) { +						// eat whitespace +					} else if (setLeftShift.Contains(sc.chNext)) { +						// left shift << or <<= operator cases +						sc.ChangeState(SCE_SH_OPERATOR); +						sc.ForwardSetState(SCE_SH_DEFAULT); +					} else { +						// symbols terminates; deprecated zero-length delimiter +						HereDoc.State = 1; +					} +				} else if (HereDoc.State == 1) { // collect the delimiter +					if (setHereDoc2.Contains(sc.ch) || sc.chPrev == '\\') { +						HereDoc.Append(sc.ch); +					} else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) {	// closing quote => end of delimiter +						sc.ForwardSetState(SCE_SH_DEFAULT); +					} else if (sc.ch == '\\') { +						// skip escape prefix +					} else { +						sc.SetState(SCE_SH_DEFAULT); +					} +					if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {	// force blowup +						sc.SetState(SCE_SH_ERROR); +						HereDoc.State = 0; +					} +				} +				break; +			case SCE_SH_HERE_Q: +				// HereDoc.State == 2 +				if (sc.atLineStart) { +					sc.SetState(SCE_SH_HERE_Q); +					int prefixws = 0; +					while (IsASpace(sc.ch) && !sc.atLineEnd) {	// whitespace prefix +						sc.Forward(); +						prefixws++; +					} +					if (prefixws > 0) +						sc.SetState(SCE_SH_HERE_Q); +					while (!sc.atLineEnd) { +						sc.Forward(); +					} +					char s[HERE_DELIM_MAX]; +					sc.GetCurrent(s, sizeof(s)); +					if (sc.LengthCurrent() == 0) +						break; +					if (s[strlen(s) - 1] == '\r') +						s[strlen(s) - 1] = '\0'; +					if (strcmp(HereDoc.Delimiter, s) == 0) { +						if ((prefixws > 0 && HereDoc.Indent) ||	// indentation rule +							(prefixws == 0 && !HereDoc.Indent)) { +							sc.SetState(SCE_SH_DEFAULT); +							break; +						} +					} +				} +				break; +			case SCE_SH_SCALAR:	// variable names +				if (!setParam.Contains(sc.ch)) { +					if (sc.LengthCurrent() == 1) { +						// Special variable: $(, $_ etc. +						sc.ForwardSetState(SCE_SH_DEFAULT); +					} else { +						sc.SetState(SCE_SH_DEFAULT); +					} +				} +				break; +			case SCE_SH_STRING:	// delimited styles +			case SCE_SH_CHARACTER: +			case SCE_SH_BACKTICKS: +			case SCE_SH_PARAM: +				if (sc.ch == '\\' && Quote.Up != '\\') { +					sc.Forward(); +				} else if (sc.ch == Quote.Down) { +					Quote.Count--; +					if (Quote.Count == 0) { +						sc.ForwardSetState(SCE_SH_DEFAULT); +					} +				} else if (sc.ch == Quote.Up) { +					Quote.Count++; +				} +				break; +		} + +		// Must check end of HereDoc state 1 before default state is handled +		if (HereDoc.State == 1 && sc.atLineEnd) { +			// Begin of here-doc (the line after the here-doc delimiter): +			// Lexically, the here-doc starts from the next line after the >>, but the +			// first line of here-doc seem to follow the style of the last EOL sequence +			HereDoc.State = 2; +			if (HereDoc.Quoted) { +				if (sc.state == SCE_SH_HERE_DELIM) { +					// Missing quote at end of string! We are stricter than bash. +					// Colour here-doc anyway while marking this bit as an error. +					sc.ChangeState(SCE_SH_ERROR); +				} +				// HereDoc.Quote always == '\'' +			} +			sc.SetState(SCE_SH_HERE_Q); +		} + +		// Determine if a new state should be entered. +		if (sc.state == SCE_SH_DEFAULT) { +			if (sc.ch == '\\') {	// escaped character +				sc.SetState(SCE_SH_IDENTIFIER); +			} else if (IsADigit(sc.ch)) { +				sc.SetState(SCE_SH_NUMBER); +				numBase = BASH_BASE_DECIMAL; +				if (sc.ch == '0') {	// hex,octal +					if (sc.chNext == 'x' || sc.chNext == 'X') { +						numBase = BASH_BASE_HEX; +						sc.Forward(); +					} else if (IsADigit(sc.chNext)) { +#ifdef PEDANTIC_OCTAL +						numBase = BASH_BASE_OCTAL; +#else +						numBase = BASH_BASE_HEX; +#endif +					} +				} +			} else if (setWordStart.Contains(sc.ch)) { +				sc.SetState(SCE_SH_WORD); +			} else if (sc.ch == '#') { +				sc.SetState(SCE_SH_COMMENTLINE); +			} else if (sc.ch == '\"') { +				sc.SetState(SCE_SH_STRING); +				Quote.Start(sc.ch); +			} else if (sc.ch == '\'') { +				sc.SetState(SCE_SH_CHARACTER); +				Quote.Start(sc.ch); +			} else if (sc.ch == '`') { +				sc.SetState(SCE_SH_BACKTICKS); +				Quote.Start(sc.ch); +			} else if (sc.ch == '$') { +				sc.SetState(SCE_SH_SCALAR); +				sc.Forward(); +				if (sc.ch == '{') { +					sc.ChangeState(SCE_SH_PARAM); +				} else if (sc.ch == '\'') { +					sc.ChangeState(SCE_SH_CHARACTER); +				} else if (sc.ch == '"') { +					sc.ChangeState(SCE_SH_STRING); +				} else if (sc.ch == '(' || sc.ch == '`') { +					sc.ChangeState(SCE_SH_BACKTICKS); +					if (sc.chNext == '(') {	// $(( is lexed as operator +						sc.ChangeState(SCE_SH_OPERATOR); +					} +				} else { +					continue;	// scalar has no delimiter pair +				} +				// fallthrough, open delim for $[{'"(`] +				Quote.Start(sc.ch); +			} else if (sc.Match('<', '<')) { +				sc.SetState(SCE_SH_HERE_DELIM); +				HereDoc.State = 0; +				HereDoc.Indent = false; +			} else if (sc.ch == '-'	&&	// one-char file test operators +					   setSingleCharOp.Contains(sc.chNext) && +					   !setWord.Contains(sc.GetRelative(2)) && +					   IsASpace(sc.chPrev)) { +				sc.SetState(SCE_SH_WORD); +				sc.Forward(); +			} else if (setBashOperator.Contains(sc.ch)) { +				sc.SetState(SCE_SH_OPERATOR); +			} +		} +	} +	sc.Complete(); +} + +static bool IsCommentLine(int line, Accessor &styler) { +	int pos = styler.LineStart(line); +	int eol_pos = styler.LineStart(line + 1) - 1; +	for (int i = pos; i < eol_pos; i++) { +		char ch = styler[i]; +		if (ch == '#') +			return true; +		else if (ch != ' ' && ch != '\t') +			return false; +	} +	return false; +} + +static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[], +						Accessor &styler) { +	bool foldComment = styler.GetPropertyInt("fold.comment") != 0; +	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; +	unsigned int endPos = startPos + length; +	int visibleChars = 0; +	int lineCurrent = styler.GetLine(startPos); +	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; +	int levelCurrent = levelPrev; +	char chNext = styler[startPos]; +	int styleNext = styler.StyleAt(startPos); +	for (unsigned int i = startPos; i < endPos; i++) { +		char ch = chNext; +		chNext = styler.SafeGetCharAt(i + 1); +		int style = styleNext; +		styleNext = styler.StyleAt(i + 1); +		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); +		// Comment folding +		if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) +		{ +			if (!IsCommentLine(lineCurrent - 1, styler) +				&& IsCommentLine(lineCurrent + 1, styler)) +				levelCurrent++; +			else if (IsCommentLine(lineCurrent - 1, styler) +					 && !IsCommentLine(lineCurrent + 1, styler)) +				levelCurrent--; +		} +		if (style == SCE_SH_OPERATOR) { +			if (ch == '{') { +				levelCurrent++; +			} else if (ch == '}') { +				levelCurrent--; +			} +		} +		if (atEOL) { +			int lev = levelPrev; +			if (visibleChars == 0 && foldCompact) +				lev |= SC_FOLDLEVELWHITEFLAG; +			if ((levelCurrent > levelPrev) && (visibleChars > 0)) +				lev |= SC_FOLDLEVELHEADERFLAG; +			if (lev != styler.LevelAt(lineCurrent)) { +				styler.SetLevel(lineCurrent, lev); +			} +			lineCurrent++; +			levelPrev = levelCurrent; +			visibleChars = 0; +		} +		if (!isspacechar(ch)) +			visibleChars++; +	} +	// Fill in the real level of the next line, keeping the current flags as they will be filled in later +	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; +	styler.SetLevel(lineCurrent, levelPrev | flagsNext); +} + +static const char * const bashWordListDesc[] = { +	"Keywords", +	0 +}; + +LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc); | 
