diff options
Diffstat (limited to 'lexers/LexErlang.cxx')
| -rw-r--r-- | lexers/LexErlang.cxx | 619 | 
1 files changed, 619 insertions, 0 deletions
diff --git a/lexers/LexErlang.cxx b/lexers/LexErlang.cxx new file mode 100644 index 000000000..45577bda4 --- /dev/null +++ b/lexers/LexErlang.cxx @@ -0,0 +1,619 @@ +// Scintilla source code edit control +// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. +/** @file LexErlang.cxx + ** Lexer for Erlang. + ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com) + ** Originally wrote by Peter-Henry Mander,  + ** based on Matlab lexer by José Fonseca. + **/ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdarg.h> + +#include "Platform.h" +#include "PropSet.h" +#include "Accessor.h" +#include "StyleContext.h" +#include "KeyWords.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +static int is_radix(int radix, int ch) { +	int digit; + +	if (36 < radix || 2 > radix) +		return 0; + +	if (isdigit(ch)) { +		digit = ch - '0'; +	} else if (isalnum(ch)) { +		digit = toupper(ch) - 'A' + 10; +	} else { +		return 0; +	} + +	return (digit < radix); +} + +typedef enum { +	STATE_NULL, +	COMMENT, +	COMMENT_FUNCTION, +	COMMENT_MODULE, +	COMMENT_DOC, +	COMMENT_DOC_MACRO, +	ATOM_UNQUOTED, +	ATOM_QUOTED, +	NODE_NAME_UNQUOTED, +	NODE_NAME_QUOTED, +	MACRO_START, +	MACRO_UNQUOTED, +	MACRO_QUOTED, +	RECORD_START, +	RECORD_UNQUOTED, +	RECORD_QUOTED, +	NUMERAL_START, +	NUMERAL_BASE_VALUE, +	NUMERAL_FLOAT, +	NUMERAL_EXPONENT, +	PREPROCESSOR +} atom_parse_state_t; + +static inline bool IsAWordChar(const int ch) { +	return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_'); +} + +static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle, +								WordList *keywordlists[], Accessor &styler) { + +	StyleContext sc(startPos, length, initStyle, styler); +	WordList &reservedWords = *keywordlists[0]; +	WordList &erlangBIFs = *keywordlists[1]; +	WordList &erlangPreproc = *keywordlists[2]; +	WordList &erlangModulesAtt = *keywordlists[3]; +	WordList &erlangDoc = *keywordlists[4]; +	WordList &erlangDocMacro = *keywordlists[5]; +	int radix_digits = 0; +	int exponent_digits = 0; +	atom_parse_state_t parse_state = STATE_NULL; +	atom_parse_state_t old_parse_state = STATE_NULL; +	bool to_late_to_comment = false; +	char cur[100]; +	int old_style = SCE_ERLANG_DEFAULT; + +	styler.StartAt(startPos); + +	for (; sc.More(); sc.Forward()) { +		int style = SCE_ERLANG_DEFAULT; +		if (STATE_NULL != parse_state) { + +			switch (parse_state) { + +				case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break; + +			/* COMMENTS ------------------------------------------------------*/ +				case COMMENT : { +					if (sc.ch != '%') { +						to_late_to_comment = true; +					} else if (!to_late_to_comment && sc.ch == '%') { +						// Switch to comment level 2 (Function) +						sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION); +						old_style = SCE_ERLANG_COMMENT_FUNCTION; +						parse_state = COMMENT_FUNCTION; +						sc.Forward(); +					} +				} +				// V--- Falling through! +				case COMMENT_FUNCTION : { +					if (sc.ch != '%') { +						to_late_to_comment = true; +					} else if (!to_late_to_comment && sc.ch == '%') { +						// Switch to comment level 3 (Module) +						sc.ChangeState(SCE_ERLANG_COMMENT_MODULE); +						old_style = SCE_ERLANG_COMMENT_MODULE; +						parse_state = COMMENT_MODULE; +						sc.Forward(); +					} +				} +				// V--- Falling through! +				case COMMENT_MODULE : { +					if (parse_state != COMMENT) { +						// Search for comment documentation +						if (sc.chNext == '@') { +							old_parse_state = parse_state; +							parse_state = ('{' == sc.ch) +											? COMMENT_DOC_MACRO +											: COMMENT_DOC; +							sc.ForwardSetState(sc.state); +						} +					} + +					// All comments types fall here. +					if (sc.atLineEnd) { +						to_late_to_comment = false; +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +				case COMMENT_DOC : +				// V--- Falling through! +				case COMMENT_DOC_MACRO : { + +					if (!isalnum(sc.ch)) { +						// Try to match documentation comment +						sc.GetCurrent(cur, sizeof(cur)); + +						if (parse_state == COMMENT_DOC_MACRO  +							&& erlangDocMacro.InList(cur)) { +								sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO); +								while (sc.ch != '}' && !sc.atLineEnd) +									sc.Forward(); +						} else if (erlangDoc.InList(cur)) { +							sc.ChangeState(SCE_ERLANG_COMMENT_DOC); +						} else { +							sc.ChangeState(old_style); +						} + +						// Switch back to old state +						sc.SetState(old_style); +						parse_state = old_parse_state; +					} + +					if (sc.atLineEnd) { +						to_late_to_comment = false; +						sc.ChangeState(old_style); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* -------------------------------------------------------------- */ +			/* Atoms ---------------------------------------------------------*/ +				case ATOM_UNQUOTED : { +					if ('@' == sc.ch){ +						parse_state = NODE_NAME_UNQUOTED; +					} else if (sc.ch == ':') { +						// Searching for module name +						if (sc.chNext == ' ') { +							// error +							sc.ChangeState(SCE_ERLANG_UNKNOWN); +							parse_state = STATE_NULL; +						} else { +							sc.Forward(); +							if (isalnum(sc.ch))  { +								sc.GetCurrent(cur, sizeof(cur)); +								sc.ChangeState(SCE_ERLANG_MODULES); +								sc.SetState(SCE_ERLANG_MODULES); +							} +						} +					} else if (!IsAWordChar(sc.ch)) { + +						sc.GetCurrent(cur, sizeof(cur)); +						if (reservedWords.InList(cur)) { +							style = SCE_ERLANG_KEYWORD; +						} else if (erlangBIFs.InList(cur) +									&& strcmp(cur,"erlang:")){ +							style = SCE_ERLANG_BIFS; +						} else if (sc.ch == '(' || '/' == sc.ch){ +							style = SCE_ERLANG_FUNCTION_NAME; +						} else { +							style = SCE_ERLANG_ATOM; +						} + +						sc.ChangeState(style); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} + +				} break; + +				case ATOM_QUOTED : { +					if ( '@' == sc.ch ){ +						parse_state = NODE_NAME_QUOTED; +					} else if ('\'' == sc.ch && '\\' != sc.chPrev) { +						sc.ChangeState(SCE_ERLANG_ATOM); +						sc.ForwardSetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* -------------------------------------------------------------- */ +			/* Node names ----------------------------------------------------*/ +				case NODE_NAME_UNQUOTED : { +					if ('@' == sc.ch) { +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} else if (!IsAWordChar(sc.ch)) { +						sc.ChangeState(SCE_ERLANG_NODE_NAME); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +				case NODE_NAME_QUOTED : { +					if ('@' == sc.ch) { +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} else if ('\'' == sc.ch && '\\' != sc.chPrev) { +						sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED); +						sc.ForwardSetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* -------------------------------------------------------------- */ +			/* Records -------------------------------------------------------*/ +				case RECORD_START : { +					if ('\'' == sc.ch) { +						parse_state = RECORD_QUOTED; +					} else if (isalpha(sc.ch) && islower(sc.ch)) { +						parse_state = RECORD_UNQUOTED; +					} else { // error +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +				case RECORD_UNQUOTED : { +					if (!IsAWordChar(sc.ch)) { +						sc.ChangeState(SCE_ERLANG_RECORD); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +				case RECORD_QUOTED : { +					if ('\'' == sc.ch && '\\' != sc.chPrev) { +						sc.ChangeState(SCE_ERLANG_RECORD_QUOTED); +						sc.ForwardSetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* -------------------------------------------------------------- */ +			/* Macros --------------------------------------------------------*/ +				case MACRO_START : { +					if ('\'' == sc.ch) { +						parse_state = MACRO_QUOTED; +					} else if (isalpha(sc.ch)) { +						parse_state = MACRO_UNQUOTED; +					} else { // error +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +				case MACRO_UNQUOTED : { +					if (!IsAWordChar(sc.ch)) { +						sc.ChangeState(SCE_ERLANG_MACRO); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +				case MACRO_QUOTED : { +					if ('\'' == sc.ch && '\\' != sc.chPrev) { +						sc.ChangeState(SCE_ERLANG_MACRO_QUOTED); +						sc.ForwardSetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* -------------------------------------------------------------- */ +			/* Numerics ------------------------------------------------------*/ +			/* Simple integer */ +				case NUMERAL_START : { +					if (isdigit(sc.ch)) { +						radix_digits *= 10; +						radix_digits += sc.ch - '0'; // Assuming ASCII here! +					} else if ('#' == sc.ch) { +						if (2 > radix_digits || 36 < radix_digits) { +							sc.SetState(SCE_ERLANG_DEFAULT); +							parse_state = STATE_NULL; +						} else { +							parse_state = NUMERAL_BASE_VALUE; +						} +					} else if ('.' == sc.ch && isdigit(sc.chNext)) { +						radix_digits = 0; +						parse_state = NUMERAL_FLOAT; +					} else if ('e' == sc.ch || 'E' == sc.ch) { +						exponent_digits = 0; +						parse_state = NUMERAL_EXPONENT; +					} else { +						radix_digits = 0; +						sc.ChangeState(SCE_ERLANG_NUMBER); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* Integer in other base than 10 (x#yyy) */ +				case NUMERAL_BASE_VALUE : { +					if (!is_radix(radix_digits,sc.ch)) { +						radix_digits = 0; +				 +						if (!isalnum(sc.ch)) +							sc.ChangeState(SCE_ERLANG_NUMBER); + +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* Float (x.yyy) */ +				case NUMERAL_FLOAT : { +					if ('e' == sc.ch || 'E' == sc.ch) { +						exponent_digits = 0; +						parse_state = NUMERAL_EXPONENT; +					} else if (!isdigit(sc.ch)) { +						sc.ChangeState(SCE_ERLANG_NUMBER); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			/* Exponent, either integer or float (xEyy, x.yyEzzz) */ +				case NUMERAL_EXPONENT : { +					if (('-' == sc.ch || '+' == sc.ch) +							&& (isdigit(sc.chNext))) { +						sc.Forward(); +					} else if (!isdigit(sc.ch)) { +						if (0 < exponent_digits) +							sc.ChangeState(SCE_ERLANG_NUMBER); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} else { +						++exponent_digits; +					} +				} break; + +			/* -------------------------------------------------------------- */ +			/* Preprocessor --------------------------------------------------*/ +				case PREPROCESSOR : { +					if (!IsAWordChar(sc.ch)) { +						 +						sc.GetCurrent(cur, sizeof(cur)); +						if (erlangPreproc.InList(cur)) { +							style = SCE_ERLANG_PREPROC; +						} else if (erlangModulesAtt.InList(cur)) { +							style = SCE_ERLANG_MODULES_ATT; +						} + +						sc.ChangeState(style); +						sc.SetState(SCE_ERLANG_DEFAULT); +						parse_state = STATE_NULL; +					} +				} break; + +			} + +		} /* End of : STATE_NULL != parse_state */ +		else +		{ +			switch (sc.state) { +				case SCE_ERLANG_VARIABLE : { +					if (!IsAWordChar(sc.ch)) +						sc.SetState(SCE_ERLANG_DEFAULT); +				} break; +				case SCE_ERLANG_STRING : { +					 if (sc.ch == '\"' && sc.chPrev != '\\') +						sc.ForwardSetState(SCE_ERLANG_DEFAULT); +				} break; +				case SCE_ERLANG_COMMENT : { +					 if (sc.atLineEnd) +						sc.SetState(SCE_ERLANG_DEFAULT); +				} break; +				case SCE_ERLANG_CHARACTER : { +					if (sc.chPrev == '\\') { +						sc.ForwardSetState(SCE_ERLANG_DEFAULT); +					} else if (sc.ch != '\\') { +						sc.ForwardSetState(SCE_ERLANG_DEFAULT); +					} +				} break; +				case SCE_ERLANG_OPERATOR : { +					if (sc.chPrev == '.') { +						if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'  +							|| sc.ch == '^') { +							sc.ForwardSetState(SCE_ERLANG_DEFAULT); +						} else if (sc.ch == '\'') { +							sc.ForwardSetState(SCE_ERLANG_DEFAULT); +						} else { +							sc.SetState(SCE_ERLANG_DEFAULT); +						} +					} else { +						sc.SetState(SCE_ERLANG_DEFAULT); +					} +				} break; +			} +		} + +		if (sc.state == SCE_ERLANG_DEFAULT) { +			bool no_new_state = false; + +			switch (sc.ch) { +				case '\"' : sc.SetState(SCE_ERLANG_STRING); break; +				case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break; +				case '%' : { +					parse_state = COMMENT; +					sc.SetState(SCE_ERLANG_COMMENT); +				} break; +				case '#' : { +					parse_state = RECORD_START; +					sc.SetState(SCE_ERLANG_UNKNOWN); +				} break; +				case '?' : { +					parse_state = MACRO_START; +					sc.SetState(SCE_ERLANG_UNKNOWN); +				} break; +				case '\'' : { +					parse_state = ATOM_QUOTED; +					sc.SetState(SCE_ERLANG_UNKNOWN); +				} break; +				case '+' : +				case '-' : { +					if (IsADigit(sc.chNext)) { +						parse_state = NUMERAL_START; +						radix_digits = 0; +						sc.SetState(SCE_ERLANG_UNKNOWN); +					} else if (sc.ch != '+') { +						parse_state = PREPROCESSOR; +						sc.SetState(SCE_ERLANG_UNKNOWN); +					} +				} break; +				default : no_new_state = true; +			} + +			if (no_new_state) { +				if (isdigit(sc.ch)) { +					parse_state = NUMERAL_START; +					radix_digits = sc.ch - '0'; +					sc.SetState(SCE_ERLANG_UNKNOWN); +				} else if (isupper(sc.ch) || '_' == sc.ch) { +					sc.SetState(SCE_ERLANG_VARIABLE); +				} else if (isalpha(sc.ch)) { +					parse_state = ATOM_UNQUOTED; +					sc.SetState(SCE_ERLANG_UNKNOWN); +				} else if (isoperator(static_cast<char>(sc.ch)) +							|| sc.ch == '\\') { +					sc.SetState(SCE_ERLANG_OPERATOR); +				} +			} +		} + +	} +	sc.Complete(); +} + +static int ClassifyErlangFoldPoint( +	Accessor &styler, +	int styleNext, +	int keyword_start +) { +	int lev = 0; +	if (styler.Match(keyword_start,"case") +		|| ( +			styler.Match(keyword_start,"fun") +			&& (SCE_ERLANG_FUNCTION_NAME != styleNext) +			) +		|| styler.Match(keyword_start,"if") +		|| styler.Match(keyword_start,"query") +		|| styler.Match(keyword_start,"receive") +	) { +		++lev; +	} else if (styler.Match(keyword_start,"end")) { +		--lev; +	} + +	return lev; +} + +static void FoldErlangDoc( +	unsigned int startPos, int length, int initStyle, +	WordList** /*keywordlists*/, Accessor &styler +) { +	unsigned int endPos = startPos + length; +	int currentLine = styler.GetLine(startPos); +	int lev; +	int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK; +	int currentLevel = previousLevel; +	int styleNext = styler.StyleAt(startPos); +	int style = initStyle; +	int stylePrev; +	int keyword_start = 0; +	char ch; +	char chNext = styler.SafeGetCharAt(startPos); +	bool atEOL; + +	for (unsigned int i = startPos; i < endPos; i++) { +		ch = chNext; +		chNext = styler.SafeGetCharAt(i + 1); + +		// Get styles +		stylePrev = style; +		style = styleNext; +		styleNext = styler.StyleAt(i + 1); +		atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n'); + +		if (stylePrev != SCE_ERLANG_KEYWORD +			&& style == SCE_ERLANG_KEYWORD) { +			keyword_start = i; +		} + +		// Fold on keywords +		if (stylePrev == SCE_ERLANG_KEYWORD +			&& style != SCE_ERLANG_KEYWORD +			&& style != SCE_ERLANG_ATOM +		) { +			currentLevel += ClassifyErlangFoldPoint(styler, +													styleNext, +													keyword_start); +		} + +		// Fold on comments +		if (style == SCE_ERLANG_COMMENT +			|| style == SCE_ERLANG_COMMENT_MODULE +			|| style == SCE_ERLANG_COMMENT_FUNCTION) { + +			if (ch == '%' && chNext == '{') { +				currentLevel++; +			} else if (ch == '%' && chNext == '}') { +				currentLevel--; +			} +		} + +		// Fold on braces +		if (style == SCE_ERLANG_OPERATOR) { +			if (ch == '{' || ch == '(' || ch == '[') { +				currentLevel++; +			} else if (ch == '}' || ch == ')' || ch == ']') { +				currentLevel--; +			} +		} + + +		if (atEOL) { +			lev = previousLevel; + +			if (currentLevel > previousLevel) +				lev |= SC_FOLDLEVELHEADERFLAG; + +			if (lev != styler.LevelAt(currentLine)) +				styler.SetLevel(currentLine, lev); + +			currentLine++; +			previousLevel = currentLevel; +		} + +	} + +	// Fill in the real level of the next line, keeping the current flags as they will be filled in later +	styler.SetLevel(currentLine, +					previousLevel +					| (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK)); +} + +static const char * const erlangWordListDesc[] = { +	"Erlang Reserved words", +	"Erlang BIFs", +	"Erlang Preprocessor", +	"Erlang Module Attributes", +	"Erlang Documentation", +	"Erlang Documentation Macro", +	0 +}; + +LexerModule lmErlang( +	SCLEX_ERLANG, +	ColouriseErlangDoc, +	"erlang", +	FoldErlangDoc, +	erlangWordListDesc);  | 
