diff options
Diffstat (limited to 'lexers/LexErlang.cxx')
-rw-r--r-- | lexers/LexErlang.cxx | 619 |
1 files changed, 619 insertions, 0 deletions
diff --git a/lexers/LexErlang.cxx b/lexers/LexErlang.cxx new file mode 100644 index 000000000..45577bda4 --- /dev/null +++ b/lexers/LexErlang.cxx @@ -0,0 +1,619 @@ +// Scintilla source code edit control +// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. +/** @file LexErlang.cxx + ** Lexer for Erlang. + ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com) + ** Originally wrote by Peter-Henry Mander, + ** based on Matlab lexer by José Fonseca. + **/ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdarg.h> + +#include "Platform.h" +#include "PropSet.h" +#include "Accessor.h" +#include "StyleContext.h" +#include "KeyWords.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +static int is_radix(int radix, int ch) { + int digit; + + if (36 < radix || 2 > radix) + return 0; + + if (isdigit(ch)) { + digit = ch - '0'; + } else if (isalnum(ch)) { + digit = toupper(ch) - 'A' + 10; + } else { + return 0; + } + + return (digit < radix); +} + +typedef enum { + STATE_NULL, + COMMENT, + COMMENT_FUNCTION, + COMMENT_MODULE, + COMMENT_DOC, + COMMENT_DOC_MACRO, + ATOM_UNQUOTED, + ATOM_QUOTED, + NODE_NAME_UNQUOTED, + NODE_NAME_QUOTED, + MACRO_START, + MACRO_UNQUOTED, + MACRO_QUOTED, + RECORD_START, + RECORD_UNQUOTED, + RECORD_QUOTED, + NUMERAL_START, + NUMERAL_BASE_VALUE, + NUMERAL_FLOAT, + NUMERAL_EXPONENT, + PREPROCESSOR +} atom_parse_state_t; + +static inline bool IsAWordChar(const int ch) { + return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_'); +} + +static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle, + WordList *keywordlists[], Accessor &styler) { + + StyleContext sc(startPos, length, initStyle, styler); + WordList &reservedWords = *keywordlists[0]; + WordList &erlangBIFs = *keywordlists[1]; + WordList &erlangPreproc = *keywordlists[2]; + WordList &erlangModulesAtt = *keywordlists[3]; + WordList &erlangDoc = *keywordlists[4]; + WordList &erlangDocMacro = *keywordlists[5]; + int radix_digits = 0; + int exponent_digits = 0; + atom_parse_state_t parse_state = STATE_NULL; + atom_parse_state_t old_parse_state = STATE_NULL; + bool to_late_to_comment = false; + char cur[100]; + int old_style = SCE_ERLANG_DEFAULT; + + styler.StartAt(startPos); + + for (; sc.More(); sc.Forward()) { + int style = SCE_ERLANG_DEFAULT; + if (STATE_NULL != parse_state) { + + switch (parse_state) { + + case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break; + + /* COMMENTS ------------------------------------------------------*/ + case COMMENT : { + if (sc.ch != '%') { + to_late_to_comment = true; + } else if (!to_late_to_comment && sc.ch == '%') { + // Switch to comment level 2 (Function) + sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION); + old_style = SCE_ERLANG_COMMENT_FUNCTION; + parse_state = COMMENT_FUNCTION; + sc.Forward(); + } + } + // V--- Falling through! + case COMMENT_FUNCTION : { + if (sc.ch != '%') { + to_late_to_comment = true; + } else if (!to_late_to_comment && sc.ch == '%') { + // Switch to comment level 3 (Module) + sc.ChangeState(SCE_ERLANG_COMMENT_MODULE); + old_style = SCE_ERLANG_COMMENT_MODULE; + parse_state = COMMENT_MODULE; + sc.Forward(); + } + } + // V--- Falling through! + case COMMENT_MODULE : { + if (parse_state != COMMENT) { + // Search for comment documentation + if (sc.chNext == '@') { + old_parse_state = parse_state; + parse_state = ('{' == sc.ch) + ? COMMENT_DOC_MACRO + : COMMENT_DOC; + sc.ForwardSetState(sc.state); + } + } + + // All comments types fall here. + if (sc.atLineEnd) { + to_late_to_comment = false; + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + case COMMENT_DOC : + // V--- Falling through! + case COMMENT_DOC_MACRO : { + + if (!isalnum(sc.ch)) { + // Try to match documentation comment + sc.GetCurrent(cur, sizeof(cur)); + + if (parse_state == COMMENT_DOC_MACRO + && erlangDocMacro.InList(cur)) { + sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO); + while (sc.ch != '}' && !sc.atLineEnd) + sc.Forward(); + } else if (erlangDoc.InList(cur)) { + sc.ChangeState(SCE_ERLANG_COMMENT_DOC); + } else { + sc.ChangeState(old_style); + } + + // Switch back to old state + sc.SetState(old_style); + parse_state = old_parse_state; + } + + if (sc.atLineEnd) { + to_late_to_comment = false; + sc.ChangeState(old_style); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* -------------------------------------------------------------- */ + /* Atoms ---------------------------------------------------------*/ + case ATOM_UNQUOTED : { + if ('@' == sc.ch){ + parse_state = NODE_NAME_UNQUOTED; + } else if (sc.ch == ':') { + // Searching for module name + if (sc.chNext == ' ') { + // error + sc.ChangeState(SCE_ERLANG_UNKNOWN); + parse_state = STATE_NULL; + } else { + sc.Forward(); + if (isalnum(sc.ch)) { + sc.GetCurrent(cur, sizeof(cur)); + sc.ChangeState(SCE_ERLANG_MODULES); + sc.SetState(SCE_ERLANG_MODULES); + } + } + } else if (!IsAWordChar(sc.ch)) { + + sc.GetCurrent(cur, sizeof(cur)); + if (reservedWords.InList(cur)) { + style = SCE_ERLANG_KEYWORD; + } else if (erlangBIFs.InList(cur) + && strcmp(cur,"erlang:")){ + style = SCE_ERLANG_BIFS; + } else if (sc.ch == '(' || '/' == sc.ch){ + style = SCE_ERLANG_FUNCTION_NAME; + } else { + style = SCE_ERLANG_ATOM; + } + + sc.ChangeState(style); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + + } break; + + case ATOM_QUOTED : { + if ( '@' == sc.ch ){ + parse_state = NODE_NAME_QUOTED; + } else if ('\'' == sc.ch && '\\' != sc.chPrev) { + sc.ChangeState(SCE_ERLANG_ATOM); + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* -------------------------------------------------------------- */ + /* Node names ----------------------------------------------------*/ + case NODE_NAME_UNQUOTED : { + if ('@' == sc.ch) { + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } else if (!IsAWordChar(sc.ch)) { + sc.ChangeState(SCE_ERLANG_NODE_NAME); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + case NODE_NAME_QUOTED : { + if ('@' == sc.ch) { + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } else if ('\'' == sc.ch && '\\' != sc.chPrev) { + sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED); + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* -------------------------------------------------------------- */ + /* Records -------------------------------------------------------*/ + case RECORD_START : { + if ('\'' == sc.ch) { + parse_state = RECORD_QUOTED; + } else if (isalpha(sc.ch) && islower(sc.ch)) { + parse_state = RECORD_UNQUOTED; + } else { // error + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + case RECORD_UNQUOTED : { + if (!IsAWordChar(sc.ch)) { + sc.ChangeState(SCE_ERLANG_RECORD); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + case RECORD_QUOTED : { + if ('\'' == sc.ch && '\\' != sc.chPrev) { + sc.ChangeState(SCE_ERLANG_RECORD_QUOTED); + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* -------------------------------------------------------------- */ + /* Macros --------------------------------------------------------*/ + case MACRO_START : { + if ('\'' == sc.ch) { + parse_state = MACRO_QUOTED; + } else if (isalpha(sc.ch)) { + parse_state = MACRO_UNQUOTED; + } else { // error + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + case MACRO_UNQUOTED : { + if (!IsAWordChar(sc.ch)) { + sc.ChangeState(SCE_ERLANG_MACRO); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + case MACRO_QUOTED : { + if ('\'' == sc.ch && '\\' != sc.chPrev) { + sc.ChangeState(SCE_ERLANG_MACRO_QUOTED); + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* -------------------------------------------------------------- */ + /* Numerics ------------------------------------------------------*/ + /* Simple integer */ + case NUMERAL_START : { + if (isdigit(sc.ch)) { + radix_digits *= 10; + radix_digits += sc.ch - '0'; // Assuming ASCII here! + } else if ('#' == sc.ch) { + if (2 > radix_digits || 36 < radix_digits) { + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } else { + parse_state = NUMERAL_BASE_VALUE; + } + } else if ('.' == sc.ch && isdigit(sc.chNext)) { + radix_digits = 0; + parse_state = NUMERAL_FLOAT; + } else if ('e' == sc.ch || 'E' == sc.ch) { + exponent_digits = 0; + parse_state = NUMERAL_EXPONENT; + } else { + radix_digits = 0; + sc.ChangeState(SCE_ERLANG_NUMBER); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* Integer in other base than 10 (x#yyy) */ + case NUMERAL_BASE_VALUE : { + if (!is_radix(radix_digits,sc.ch)) { + radix_digits = 0; + + if (!isalnum(sc.ch)) + sc.ChangeState(SCE_ERLANG_NUMBER); + + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* Float (x.yyy) */ + case NUMERAL_FLOAT : { + if ('e' == sc.ch || 'E' == sc.ch) { + exponent_digits = 0; + parse_state = NUMERAL_EXPONENT; + } else if (!isdigit(sc.ch)) { + sc.ChangeState(SCE_ERLANG_NUMBER); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + /* Exponent, either integer or float (xEyy, x.yyEzzz) */ + case NUMERAL_EXPONENT : { + if (('-' == sc.ch || '+' == sc.ch) + && (isdigit(sc.chNext))) { + sc.Forward(); + } else if (!isdigit(sc.ch)) { + if (0 < exponent_digits) + sc.ChangeState(SCE_ERLANG_NUMBER); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } else { + ++exponent_digits; + } + } break; + + /* -------------------------------------------------------------- */ + /* Preprocessor --------------------------------------------------*/ + case PREPROCESSOR : { + if (!IsAWordChar(sc.ch)) { + + sc.GetCurrent(cur, sizeof(cur)); + if (erlangPreproc.InList(cur)) { + style = SCE_ERLANG_PREPROC; + } else if (erlangModulesAtt.InList(cur)) { + style = SCE_ERLANG_MODULES_ATT; + } + + sc.ChangeState(style); + sc.SetState(SCE_ERLANG_DEFAULT); + parse_state = STATE_NULL; + } + } break; + + } + + } /* End of : STATE_NULL != parse_state */ + else + { + switch (sc.state) { + case SCE_ERLANG_VARIABLE : { + if (!IsAWordChar(sc.ch)) + sc.SetState(SCE_ERLANG_DEFAULT); + } break; + case SCE_ERLANG_STRING : { + if (sc.ch == '\"' && sc.chPrev != '\\') + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + } break; + case SCE_ERLANG_COMMENT : { + if (sc.atLineEnd) + sc.SetState(SCE_ERLANG_DEFAULT); + } break; + case SCE_ERLANG_CHARACTER : { + if (sc.chPrev == '\\') { + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + } else if (sc.ch != '\\') { + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + } + } break; + case SCE_ERLANG_OPERATOR : { + if (sc.chPrev == '.') { + if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\' + || sc.ch == '^') { + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + } else if (sc.ch == '\'') { + sc.ForwardSetState(SCE_ERLANG_DEFAULT); + } else { + sc.SetState(SCE_ERLANG_DEFAULT); + } + } else { + sc.SetState(SCE_ERLANG_DEFAULT); + } + } break; + } + } + + if (sc.state == SCE_ERLANG_DEFAULT) { + bool no_new_state = false; + + switch (sc.ch) { + case '\"' : sc.SetState(SCE_ERLANG_STRING); break; + case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break; + case '%' : { + parse_state = COMMENT; + sc.SetState(SCE_ERLANG_COMMENT); + } break; + case '#' : { + parse_state = RECORD_START; + sc.SetState(SCE_ERLANG_UNKNOWN); + } break; + case '?' : { + parse_state = MACRO_START; + sc.SetState(SCE_ERLANG_UNKNOWN); + } break; + case '\'' : { + parse_state = ATOM_QUOTED; + sc.SetState(SCE_ERLANG_UNKNOWN); + } break; + case '+' : + case '-' : { + if (IsADigit(sc.chNext)) { + parse_state = NUMERAL_START; + radix_digits = 0; + sc.SetState(SCE_ERLANG_UNKNOWN); + } else if (sc.ch != '+') { + parse_state = PREPROCESSOR; + sc.SetState(SCE_ERLANG_UNKNOWN); + } + } break; + default : no_new_state = true; + } + + if (no_new_state) { + if (isdigit(sc.ch)) { + parse_state = NUMERAL_START; + radix_digits = sc.ch - '0'; + sc.SetState(SCE_ERLANG_UNKNOWN); + } else if (isupper(sc.ch) || '_' == sc.ch) { + sc.SetState(SCE_ERLANG_VARIABLE); + } else if (isalpha(sc.ch)) { + parse_state = ATOM_UNQUOTED; + sc.SetState(SCE_ERLANG_UNKNOWN); + } else if (isoperator(static_cast<char>(sc.ch)) + || sc.ch == '\\') { + sc.SetState(SCE_ERLANG_OPERATOR); + } + } + } + + } + sc.Complete(); +} + +static int ClassifyErlangFoldPoint( + Accessor &styler, + int styleNext, + int keyword_start +) { + int lev = 0; + if (styler.Match(keyword_start,"case") + || ( + styler.Match(keyword_start,"fun") + && (SCE_ERLANG_FUNCTION_NAME != styleNext) + ) + || styler.Match(keyword_start,"if") + || styler.Match(keyword_start,"query") + || styler.Match(keyword_start,"receive") + ) { + ++lev; + } else if (styler.Match(keyword_start,"end")) { + --lev; + } + + return lev; +} + +static void FoldErlangDoc( + unsigned int startPos, int length, int initStyle, + WordList** /*keywordlists*/, Accessor &styler +) { + unsigned int endPos = startPos + length; + int currentLine = styler.GetLine(startPos); + int lev; + int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK; + int currentLevel = previousLevel; + int styleNext = styler.StyleAt(startPos); + int style = initStyle; + int stylePrev; + int keyword_start = 0; + char ch; + char chNext = styler.SafeGetCharAt(startPos); + bool atEOL; + + for (unsigned int i = startPos; i < endPos; i++) { + ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + + // Get styles + stylePrev = style; + style = styleNext; + styleNext = styler.StyleAt(i + 1); + atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n'); + + if (stylePrev != SCE_ERLANG_KEYWORD + && style == SCE_ERLANG_KEYWORD) { + keyword_start = i; + } + + // Fold on keywords + if (stylePrev == SCE_ERLANG_KEYWORD + && style != SCE_ERLANG_KEYWORD + && style != SCE_ERLANG_ATOM + ) { + currentLevel += ClassifyErlangFoldPoint(styler, + styleNext, + keyword_start); + } + + // Fold on comments + if (style == SCE_ERLANG_COMMENT + || style == SCE_ERLANG_COMMENT_MODULE + || style == SCE_ERLANG_COMMENT_FUNCTION) { + + if (ch == '%' && chNext == '{') { + currentLevel++; + } else if (ch == '%' && chNext == '}') { + currentLevel--; + } + } + + // Fold on braces + if (style == SCE_ERLANG_OPERATOR) { + if (ch == '{' || ch == '(' || ch == '[') { + currentLevel++; + } else if (ch == '}' || ch == ')' || ch == ']') { + currentLevel--; + } + } + + + if (atEOL) { + lev = previousLevel; + + if (currentLevel > previousLevel) + lev |= SC_FOLDLEVELHEADERFLAG; + + if (lev != styler.LevelAt(currentLine)) + styler.SetLevel(currentLine, lev); + + currentLine++; + previousLevel = currentLevel; + } + + } + + // Fill in the real level of the next line, keeping the current flags as they will be filled in later + styler.SetLevel(currentLine, + previousLevel + | (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK)); +} + +static const char * const erlangWordListDesc[] = { + "Erlang Reserved words", + "Erlang BIFs", + "Erlang Preprocessor", + "Erlang Module Attributes", + "Erlang Documentation", + "Erlang Documentation Macro", + 0 +}; + +LexerModule lmErlang( + SCLEX_ERLANG, + ColouriseErlangDoc, + "erlang", + FoldErlangDoc, + erlangWordListDesc); |