// Scintilla source code edit control /** @file LexNim.cxx ** Lexer for Nim ** Written by Jad Altahan (github.com/xv) ** Nim manual: https://nim-lang.org/docs/manual.html **/ // Copyright 1998-2001 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #include #include #include #include #include #include #include #include #include #include "ILexer.h" #include "Scintilla.h" #include "SciLexer.h" #include "StringCopy.h" #include "WordList.h" #include "LexAccessor.h" #include "Accessor.h" #include "StyleContext.h" #include "CharacterSet.h" #include "CharacterCategory.h" #include "LexerModule.h" #include "OptionSet.h" #include "DefaultLexer.h" using namespace Scintilla; namespace { // Use an unnamed namespace to protect the functions and classes from name conflicts enum NumType { Binary, Octal, Exponent, Hexadecimal, Decimal, FormatError }; int GetNumStyle(const int numType) { if (numType == NumType::FormatError) { return SCE_NIM_NUMERROR; } return SCE_NIM_NUMBER; } bool IsAWordChar(const int ch) { return ch < 0x80 && (isalnum(ch) || ch == '_' || ch == '.'); } int IsNumHex(const StyleContext &sc) { return sc.chNext == 'x' || sc.chNext == 'X'; } int IsNumBinary(const StyleContext &sc) { return sc.chNext == 'b' || sc.chNext == 'B'; } int IsNumOctal(const StyleContext &sc) { return IsADigit(sc.chNext) || sc.chNext == 'o'; } bool IsNewline(const int ch) { return (ch == '\n' || ch == '\r'); } constexpr bool IsTripleLiteral(const int style) noexcept { return style == SCE_NIM_TRIPLE || style == SCE_NIM_TRIPLEDOUBLE; } constexpr bool IsLineComment(const int style) noexcept { return style == SCE_NIM_COMMENTLINE || style == SCE_NIM_COMMENTLINEDOC; } constexpr bool IsStreamComment(const int style) noexcept { return style == SCE_NIM_COMMENT || style == SCE_NIM_COMMENTDOC; } // Adopted from Accessor.cxx int GetIndent(const Sci_Position line, Accessor &styler) { Sci_Position startPos = styler.LineStart(line); Sci_Position eolPos = styler.LineStart(line + 1) - 1; char ch = styler[startPos]; int style = styler.StyleAt(startPos); int indent = 0; bool inPrevPrefix = line > 0; Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line - 1) : 0; // No fold points inside block comments and triple literals while ((IsASpaceOrTab(ch) || IsStreamComment(style) || IsTripleLiteral(style)) && (startPos < eolPos)) { if (inPrevPrefix) { char chPrev = styler[posPrev++]; if (chPrev != ' ' && chPrev != '\t') { inPrevPrefix = false; } } if (ch == '\t') { indent = (indent / 8 + 1) * 8; } else { indent++; } startPos++; ch = styler[startPos]; style = styler.StyleAt(startPos); } indent += SC_FOLDLEVELBASE; if (styler.LineStart(line) == styler.Length() || IsASpaceOrTab(ch) || IsNewline(ch) || IsLineComment(style)) { return indent | SC_FOLDLEVELWHITEFLAG; } else { return indent; } } int IndentAmount(const Sci_Position line, Accessor &styler) { const int indent = GetIndent(line, styler); const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK; return indentLevel <= SC_FOLDLEVELBASE ? indent : indentLevel | (indent & ~SC_FOLDLEVELNUMBERMASK); } struct OptionsNim { bool fold; bool foldCompact; OptionsNim() { fold = true; foldCompact = true; } }; static const char *const nimWordListDesc[] = { "Keywords", 0 }; struct OptionSetNim : public OptionSet { OptionSetNim() { DefineProperty("fold", &OptionsNim::fold); DefineProperty("fold.compact", &OptionsNim::foldCompact); DefineWordListSets(nimWordListDesc); } }; LexicalClass lexicalClasses[] = { // Lexer Nim SCLEX_NIM SCE_NIM_: 0, "SCE_NIM_DEFAULT", "default", "White space", 1, "SCE_NIM_COMMENT", "comment block", "Block comment", 2, "SCE_NIM_COMMENTDOC", "comment block doc", "Block doc comment", 3, "SCE_NIM_COMMENTLINE", "comment line", "Line comment", 4, "SCE_NIM_COMMENTLINEDOC", "comment doc", "Line doc comment", 5, "SCE_NIM_NUMBER", "literal numeric", "Number", 6, "SCE_NIM_STRING", "literal string", "String", 7, "SCE_NIM_CHARACTER", "literal string", "Single quoted string", 8, "SCE_NIM_WORD", "keyword", "Keyword", 9, "SCE_NIM_TRIPLE", "literal string", "Triple quotes", 10, "SCE_NIM_TRIPLEDOUBLE", "literal string", "Triple double quotes", 11, "SCE_NIM_BACKTICKS", "operator definition", "Identifiers", 12, "SCE_NIM_FUNCNAME", "identifier", "Function name definition", 13, "SCE_NIM_STRINGEOL", "error literal string", "String is not closed", 14, "SCE_NIM_NUMERROR", "numeric error", "Numeric format error", 15, "SCE_NIM_OPERATOR", "operator", "Operators", 16, "SCE_NIM_IDENTIFIER", "identifier", "Identifiers", }; } class LexerNim : public DefaultLexer { CharacterSet setWord; WordList keywords; OptionsNim options; OptionSetNim osNim; public: LexerNim() : DefaultLexer(lexicalClasses, ELEMENTS(lexicalClasses)), setWord(CharacterSet::setAlphaNum, "_", 0x80, true) { } virtual ~LexerNim() { } void SCI_METHOD Release() override { delete this; } int SCI_METHOD Version() const override { return lvMetaData; } const char * SCI_METHOD PropertyNames() override { return osNim.PropertyNames(); } int SCI_METHOD PropertyType(const char *name) override { return osNim.PropertyType(name); } const char * SCI_METHOD DescribeProperty(const char *name) override { return osNim.DescribeProperty(name); } Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; const char * SCI_METHOD DescribeWordListSets() override { return osNim.DescribeWordListSets(); } Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; void * SCI_METHOD PrivateCall(int, void *) override { return 0; } int SCI_METHOD LineEndTypesSupported() override { return SC_LINE_END_TYPE_UNICODE; } int SCI_METHOD PrimaryStyleFromStyle(int style) override { return style; } static ILexer *LexerFactoryNim() { return new LexerNim(); } }; Sci_Position SCI_METHOD LexerNim::PropertySet(const char *key, const char *val) { if (osNim.PropertySet(&options, key, val)) { return 0; } return -1; } Sci_Position SCI_METHOD LexerNim::WordListSet(int n, const char *wl) { WordList *wordListN = 0; switch (n) { case 0: wordListN = &keywords; break; } Sci_Position firstModification = -1; if (wordListN) { WordList wlNew; wlNew.Set(wl); if (*wordListN != wlNew) { wordListN->Set(wl); firstModification = 0; } } return firstModification; } void SCI_METHOD LexerNim::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { // No one likes a leaky string if (initStyle == SCE_NIM_STRINGEOL) { initStyle = SCE_NIM_DEFAULT; } Accessor styler(pAccess, NULL); StyleContext sc(startPos, length, initStyle, styler); // Nim supports nested block comments! Sci_Position lineCurrent = styler.GetLine(startPos); int commentNestLevel = lineCurrent > 0 ? styler.GetLineState(lineCurrent - 1) : 0; int numType = NumType::Decimal; int decimalCount = 0; bool funcNameExists = false; for (; sc.More(); sc.Forward()) { if (sc.atLineStart) { if (sc.state == SCE_NIM_STRING) { sc.SetState(SCE_NIM_STRING); } lineCurrent = styler.GetLine(sc.currentPos); styler.SetLineState(lineCurrent, commentNestLevel); } // Handle string line continuation if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r') && (sc.state == SCE_NIM_STRING || sc.state == SCE_NIM_CHARACTER)) { sc.Forward(); if (sc.ch == '\r' && sc.chNext == '\n') { sc.Forward(); } continue; } switch (sc.state) { case SCE_NIM_OPERATOR: funcNameExists = false; sc.SetState(SCE_NIM_DEFAULT); break; case SCE_NIM_NUMBER: // For a type suffix, such as 0x80'u8 if (sc.ch == '\'') { if (sc.chNext == 'i' || sc.chNext == 'I' || sc.chNext == 'u' || sc.chNext == 'U' || sc.chNext == 'f' || sc.chNext == 'F' || sc.chNext == 'd' || sc.chNext == 'D') { sc.Forward(2); } } else if (sc.ch == '.') { if (sc.chNext == '.') { // Pass } else if (numType <= NumType::Exponent) { sc.SetState(SCE_NIM_OPERATOR); break; } else { decimalCount++; if (numType == NumType::Decimal) { if (decimalCount <= 1 && !IsAWordChar(sc.chNext)) { break; } } else if (numType == NumType::Hexadecimal) { if (decimalCount <= 1 && IsADigit(sc.chNext, 16)) { break; } sc.SetState(SCE_NIM_OPERATOR); break; } } } else if (sc.ch == '_') { break; } else if (numType == NumType::Decimal) { if (sc.chPrev != '\'' && (sc.ch == 'e' || sc.ch == 'E')) { numType = NumType::Exponent; if (sc.chNext == '-' || sc.chNext == '+') { sc.Forward(); } break; } if (IsADigit(sc.ch)) { break; } } else if (numType == NumType::Hexadecimal) { if (IsADigit(sc.ch, 16)) { break; } } else if (IsADigit(sc.ch)) { if (numType == NumType::Exponent) { break; } if (numType == NumType::Octal) { // Accept only 0-7 if (sc.ch <= '7') { break; } } else if (numType == NumType::Binary) { // Accept only 0 and 1 if (sc.ch <= '1') { break; } } numType = NumType::FormatError; break; } sc.ChangeState(GetNumStyle(numType)); sc.SetState(SCE_NIM_DEFAULT); break; case SCE_NIM_IDENTIFIER: if (!IsAWordChar(sc.ch)) { char s[100]; sc.GetCurrent(s, sizeof(s)); int style = SCE_NIM_IDENTIFIER; if (keywords.InList(s) && !funcNameExists) { style = SCE_NIM_WORD; } else if (funcNameExists) { style = SCE_NIM_FUNCNAME; } sc.ChangeState(style); sc.SetState(SCE_NIM_DEFAULT); if (style == SCE_NIM_WORD) { if (0 == strcmp(s, "proc") || 0 == strcmp(s, "func") || 0 == strcmp(s, "macro") || 0 == strcmp(s, "method") || 0 == strcmp(s, "template") || 0 == strcmp(s, "iterator") || 0 == strcmp(s, "converter")) { funcNameExists = true; } else { funcNameExists = false; } } else { funcNameExists = false; } } break; case SCE_NIM_COMMENT: if (sc.Match(']', '#')) { if (commentNestLevel > 0) { commentNestLevel--; } lineCurrent = styler.GetLine(sc.currentPos); styler.SetLineState(lineCurrent, commentNestLevel); sc.Forward(); if (commentNestLevel == 0) { sc.ForwardSetState(SCE_NIM_DEFAULT); } } else if (sc.Match('#', '[')) { commentNestLevel++; lineCurrent = styler.GetLine(sc.currentPos); styler.SetLineState(lineCurrent, commentNestLevel); } break; case SCE_NIM_COMMENTDOC: if (sc.Match("]##")) { if (commentNestLevel > 0) { commentNestLevel--; } lineCurrent = styler.GetLine(sc.currentPos); styler.SetLineState(lineCurrent, commentNestLevel); sc.Forward(2); if (commentNestLevel == 0) { sc.ForwardSetState(SCE_NIM_DEFAULT); } } else if (sc.Match("##[")) { commentNestLevel++; lineCurrent = styler.GetLine(sc.currentPos); styler.SetLineState(lineCurrent, commentNestLevel); } break; case SCE_NIM_COMMENTLINE: case SCE_NIM_COMMENTLINEDOC: if (sc.atLineStart) { sc.SetState(SCE_NIM_DEFAULT); } break; case SCE_NIM_STRING: if (sc.ch == '\\') { if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { sc.Forward(); } } else if (sc.ch == '\"') { sc.ForwardSetState(SCE_NIM_DEFAULT); } else if (sc.atLineEnd) { sc.ChangeState(SCE_NIM_STRINGEOL); sc.ForwardSetState(SCE_NIM_DEFAULT); } break; case SCE_NIM_CHARACTER: if (sc.ch == '\\') { if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') { sc.Forward(); } } else if (sc.ch == '\'') { sc.ForwardSetState(SCE_NIM_DEFAULT); } else if (sc.atLineEnd) { sc.ChangeState(SCE_NIM_STRINGEOL); sc.ForwardSetState(SCE_NIM_DEFAULT); } break; case SCE_NIM_BACKTICKS: if (sc.ch == '`' || sc.atLineEnd) { sc.ForwardSetState(SCE_NIM_DEFAULT); } break; case SCE_NIM_TRIPLEDOUBLE: if (sc.Match(R"(""")")) { sc.Forward(2); sc.ForwardSetState(SCE_NIM_DEFAULT); } break; case SCE_NIM_TRIPLE: if (sc.Match("'''")) { sc.Forward(2); sc.ForwardSetState(SCE_NIM_DEFAULT); } break; } if (sc.state == SCE_NIM_DEFAULT) { // Number if (IsADigit(sc.ch) || (IsADigit(sc.chNext) && sc.ch == '.')) { sc.SetState(SCE_NIM_NUMBER); numType = NumType::Decimal; decimalCount = 0; if (sc.ch == '0') { if (IsNumHex(sc)) { numType = NumType::Hexadecimal; } else if (IsNumBinary(sc)) { numType = NumType::Binary; } else if (IsNumOctal(sc)) { numType = NumType::Octal; } if (numType != NumType::Decimal) { sc.Forward(); } } } // Raw string else if ((sc.ch == 'r' || sc.ch == 'R') && sc.chNext == '\"') { sc.SetState(SCE_NIM_STRING); sc.Forward(); } // String and triple double literal else if (sc.ch == '\"') { if (sc.Match(R"(""")")) { sc.SetState(SCE_NIM_TRIPLEDOUBLE); } else { sc.SetState(SCE_NIM_STRING); } } // Charecter and triple literal else if (sc.ch == '\'') { if (sc.Match("'''")) { sc.SetState(SCE_NIM_TRIPLE); } else { sc.SetState(SCE_NIM_CHARACTER); } } // Operator definition else if (sc.ch == '`') { sc.SetState(SCE_NIM_BACKTICKS); if (funcNameExists) { funcNameExists = false; } } // Keyword else if (iswordstart(sc.ch)) { sc.SetState(SCE_NIM_IDENTIFIER); } // Comments else if (sc.ch == '#') { if (sc.Match("##[") || sc.Match("#[")) { commentNestLevel++; lineCurrent = styler.GetLine(sc.currentPos); styler.SetLineState(lineCurrent, commentNestLevel); } if (sc.Match("##[")) { sc.SetState(SCE_NIM_COMMENTDOC); sc.Forward(); } else if (sc.Match("#[")) { sc.SetState(SCE_NIM_COMMENT); sc.Forward(); } else if (sc.Match("##")) { sc.SetState(SCE_NIM_COMMENTLINEDOC); } else { sc.SetState(SCE_NIM_COMMENTLINE); } } // Operators else if (strchr("()[]{}:=;-\\/&%$!+<>|^?,.*~@", sc.ch)) { sc.SetState(SCE_NIM_OPERATOR); // Ignore decimal coloring in input like: range[0..5] if (sc.Match('.', '.')) { sc.Forward(); if (sc.chNext == '.') { sc.Forward(); } } } } if (sc.atLineEnd) { funcNameExists = false; } } sc.Complete(); } void SCI_METHOD LexerNim::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) { if (!options.fold) { return; } Accessor styler(pAccess, NULL); const Sci_Position docLines = styler.GetLine(styler.Length()); const Sci_Position maxPos = startPos + length; const Sci_Position maxLines = styler.GetLine(maxPos == styler.Length() ? maxPos : maxPos - 1); Sci_Position lineCurrent = styler.GetLine(startPos); int indentCurrent = IndentAmount(lineCurrent, styler); while (lineCurrent > 0) { lineCurrent--; indentCurrent = IndentAmount(lineCurrent, styler); if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) { break; } } int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK; indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK); while (lineCurrent <= docLines && lineCurrent <= maxLines) { Sci_Position lineNext = lineCurrent + 1; int indentNext = indentCurrent; int lev = indentCurrent; if (lineNext <= docLines) { indentNext = IndentAmount(lineNext, styler); } if (indentNext & SC_FOLDLEVELWHITEFLAG) { indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel; } while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) { lineNext++; indentNext = IndentAmount(lineNext, styler); } const int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK; indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK); const int levelBeforeComments = std::max(indentCurrentLevel, indentNextLevel); Sci_Position skipLine = lineNext; int skipLevel = indentNextLevel; while (--skipLine > lineCurrent) { int skipLineIndent = IndentAmount(skipLine, styler); if (options.foldCompact) { if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) { skipLevel = levelBeforeComments; } int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG; styler.SetLevel(skipLine, skipLevel | whiteFlag); } else { if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) { skipLevel = levelBeforeComments; } styler.SetLevel(skipLine, skipLevel); } } if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) { if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK)) { lev |= SC_FOLDLEVELHEADERFLAG; } } styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG); indentCurrent = indentNext; indentCurrentLevel = indentNextLevel; lineCurrent = lineNext; } } LexerModule lmNim(SCLEX_NIM, LexerNim::LexerFactoryNim, "nim", nimWordListDesc);