diff options
Diffstat (limited to 'lexers/LexDataflex.cxx')
-rw-r--r-- | lexers/LexDataflex.cxx | 608 |
1 files changed, 608 insertions, 0 deletions
diff --git a/lexers/LexDataflex.cxx b/lexers/LexDataflex.cxx new file mode 100644 index 000000000..42bd39b6b --- /dev/null +++ b/lexers/LexDataflex.cxx @@ -0,0 +1,608 @@ +// Scintilla source code edit control +/** @file LexDataflex.cxx + ** Lexer for DataFlex. + ** Based on LexPascal.cxx + ** Written by Wil van Antwerpen, June 2019 + **/ + +/* +// The License.txt file describes the conditions under which this software may be distributed. + +A few words about features of LexDataflex... + +Generally speaking LexDataflex tries to support all available DataFlex features (up +to DataFlex 19.1 at this time). + +~ FOLDING: + +Folding is supported in the following cases: + +- Folding of stream-like comments +- Folding of groups of consecutive line comments +- Folding of preprocessor blocks (the following preprocessor blocks are +supported: #IFDEF, #IFNDEF, #ENDIF and #HEADER / #ENDHEADER +blocks), +- Folding of code blocks on appropriate keywords (the following code blocks are +supported: "begin, struct, type, case / end" blocks, class & object +declarations and interface declarations) + +Remarks: + +- We pass 4 arrays to the lexer: +1. The DataFlex keyword list, these are normal DataFlex keywords +2. The Scope Open list, for example, begin / procedure / while +3. The Scope Close list, for example, end / end_procedure / loop +4. Operator list, for ex. + / - / * / Lt / iand +These lists are all mutually exclusive, scope open words should not be in the keyword list and vice versa + +- Folding of code blocks tries to handle all special cases in which folding +should not occur. + +~ KEYWORDS: + +The list of keywords that can be used in dataflex.properties file (up to DataFlex +19.1): + +- Keywords: .. snipped .. see dataflex.properties file. + +*/ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <ctype.h> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "WordList.h" +#include "LexAccessor.h" +#include "Accessor.h" +#include "StyleContext.h" +#include "CharacterSet.h" +#include "LexerModule.h" + +using namespace Scintilla; + + +static void GetRangeLowered(Sci_PositionU start, + Sci_PositionU end, + Accessor &styler, + char *s, + Sci_PositionU len) { + Sci_PositionU i = 0; + while ((i < end - start + 1) && (i < len-1)) { + s[i] = static_cast<char>(tolower(styler[start + i])); + i++; + } + s[i] = '\0'; +} + +static void GetForwardRangeLowered(Sci_PositionU start, + CharacterSet &charSet, + Accessor &styler, + char *s, + Sci_PositionU len) { + Sci_PositionU i = 0; + while ((i < len-1) && charSet.Contains(styler.SafeGetCharAt(start + i))) { + s[i] = static_cast<char>(tolower(styler.SafeGetCharAt(start + i))); + i++; + } + s[i] = '\0'; + +} + +enum { + stateInICode = 0x1000, + stateSingleQuoteOpen = 0x2000, + stateDoubleQuoteOpen = 0x4000, + stateFoldInPreprocessor = 0x0100, + stateFoldInCaseStatement = 0x0200, + stateFoldInPreprocessorLevelMask = 0x00FF, + stateFoldMaskAll = 0x0FFF +}; + + +static bool IsFirstDataFlexWord(Sci_Position pos, Accessor &styler) { + Sci_Position line = styler.GetLine(pos); + Sci_Position start_pos = styler.LineStart(line); + for (Sci_Position i = start_pos; i < pos; i++) { + char ch = styler.SafeGetCharAt(i); + if (!(ch == ' ' || ch == '\t')) + return false; + } + return true; +} + + +inline bool IsADataFlexField(int ch) { + return (ch == '.'); +} + + +static void ClassifyDataFlexWord(WordList *keywordlists[], StyleContext &sc, Accessor &styler) { + WordList& keywords = *keywordlists[0]; + WordList& scopeOpen = *keywordlists[1]; + WordList& scopeClosed = *keywordlists[2]; + WordList& operators = *keywordlists[3]; + + char s[100]; + int oldState; + int newState; + size_t tokenlen; + + oldState = sc.state; + newState = oldState; + sc.GetCurrentLowered(s, sizeof(s)); + tokenlen = strnlen(s,sizeof(s)); + if (keywords.InList(s)) { + // keywords in DataFlex can be used as table column names (file.field) and as such they + // should not be characterized as a keyword. So test for that. + // for ex. somebody using date as field name. + if (!IsADataFlexField(sc.GetRelative(-static_cast<int>(tokenlen+1)))) { + newState = SCE_DF_WORD; + } + } + if (oldState == newState) { + if ((scopeOpen.InList(s) || scopeClosed.InList(s)) && (strcmp(s, "for") != 0) && (strcmp(s, "repeat") != 0)) { + // scope words in DataFlex can be used as table column names (file.field) and as such they + // should not be characterized as a scope word. So test for that. + // for ex. somebody using procedure for field name. + if (!IsADataFlexField(sc.GetRelative(-static_cast<int>(tokenlen+1)))) { + newState = SCE_DF_SCOPEWORD; + } + } + // no code folding on the next words, but just want to paint them like keywords (as they are) (??? doesn't the code to the opposite?) + if (strcmp(s, "if") == 0 || + strcmp(s, "ifnot") == 0 || + strcmp(s, "case") == 0 || + strcmp(s, "else") == 0 ) { + newState = SCE_DF_SCOPEWORD; + } + } + if (oldState != newState && newState == SCE_DF_WORD) { + // a for loop must have for at the start of the line, for is also used in "define abc for 123" + if ( (strcmp(s, "for") == 0) && (IsFirstDataFlexWord(sc.currentPos-3, styler)) ) { + newState = SCE_DF_SCOPEWORD; + } + } + if (oldState != newState && newState == SCE_DF_WORD) { + // a repeat loop must have repeat at the start of the line, repeat is also used in 'move (repeat("d",5)) to sFoo' + if ( (strcmp(s, "repeat") == 0) && (IsFirstDataFlexWord(sc.currentPos-6, styler)) ) { + newState = SCE_DF_SCOPEWORD; + } + } + if (oldState == newState) { + if (operators.InList(s)) { + newState = SCE_DF_OPERATOR; + } + } + + if (oldState != newState) { + sc.ChangeState(newState); + } + sc.SetState(SCE_DF_DEFAULT); +} + +static void ColouriseDataFlexDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], + Accessor &styler) { +// bool bSmartHighlighting = styler.GetPropertyInt("lexer.dataflex.smart.highlighting", 1) != 0; + + CharacterSet setWordStart(CharacterSet::setAlpha, "_$#@", 0x80, true); + CharacterSet setWord(CharacterSet::setAlphaNum, "_$#@", 0x80, true); + CharacterSet setNumber(CharacterSet::setDigits, ".-+eE"); + CharacterSet setHexNumber(CharacterSet::setDigits, "abcdefABCDEF"); + CharacterSet setOperator(CharacterSet::setNone, "*+-/<=>^"); + + Sci_Position curLine = styler.GetLine(startPos); + int curLineState = curLine > 0 ? styler.GetLineState(curLine - 1) : 0; + + StyleContext sc(startPos, length, initStyle, styler); + + for (; sc.More(); sc.Forward()) { + if (sc.atLineEnd) { + // Update the line state, so it can be seen by next line + curLine = styler.GetLine(sc.currentPos); + styler.SetLineState(curLine, curLineState); + } + + // Determine if the current state should terminate. + switch (sc.state) { + case SCE_DF_NUMBER: + if (!setNumber.Contains(sc.ch) || (sc.ch == '.' && sc.chNext == '.')) { + sc.SetState(SCE_DF_DEFAULT); + } else if (sc.ch == '-' || sc.ch == '+') { + if (sc.chPrev != 'E' && sc.chPrev != 'e') { + sc.SetState(SCE_DF_DEFAULT); + } + } + break; + case SCE_DF_IDENTIFIER: + if (!setWord.Contains(sc.ch)) { + ClassifyDataFlexWord(keywordlists, sc, styler); + } + break; + case SCE_DF_HEXNUMBER: + if (!(setHexNumber.Contains(sc.ch) || sc.ch == 'I') ) { // in |CI$22a we also want to color the "I" + sc.SetState(SCE_DF_DEFAULT); + } + break; + case SCE_DF_METATAG: + if (sc.atLineStart || sc.chPrev == '}') { + sc.SetState(SCE_DF_DEFAULT); + } + break; + case SCE_DF_PREPROCESSOR: + if (sc.atLineStart || IsASpaceOrTab(sc.ch)) { + sc.SetState(SCE_DF_DEFAULT); + } + break; + case SCE_DF_IMAGE: + if (sc.atLineStart && sc.Match("/*")) { + sc.Forward(); // these characters are still part of the DF Image + sc.ForwardSetState(SCE_DF_DEFAULT); + } + break; + case SCE_DF_PREPROCESSOR2: + // we don't have inline comments or preprocessor2 commands + //if (sc.Match('*', ')')) { + // sc.Forward(); + // sc.ForwardSetState(SCE_DF_DEFAULT); + //} + break; + case SCE_DF_COMMENTLINE: + if (sc.atLineStart) { + sc.SetState(SCE_DF_DEFAULT); + } + break; + case SCE_DF_STRING: + if (sc.atLineEnd) { + sc.ChangeState(SCE_DF_STRINGEOL); + } else if (sc.ch == '\'' && sc.chNext == '\'') { + sc.Forward(); + } else if (sc.ch == '\"' && sc.chNext == '\"') { + sc.Forward(); + } else if (sc.ch == '\'' || sc.ch == '\"') { + if (sc.ch == '\'' && (curLineState & stateSingleQuoteOpen) ) { + curLineState &= ~(stateSingleQuoteOpen); + sc.ForwardSetState(SCE_DF_DEFAULT); + } + else if (sc.ch == '\"' && (curLineState & stateDoubleQuoteOpen) ) { + curLineState &= ~(stateDoubleQuoteOpen); + sc.ForwardSetState(SCE_DF_DEFAULT); + } + } + break; + case SCE_DF_STRINGEOL: + if (sc.atLineStart) { + sc.SetState(SCE_DF_DEFAULT); + } + break; + case SCE_DF_SCOPEWORD: + //if (!setHexNumber.Contains(sc.ch) && sc.ch != '$') { + // sc.SetState(SCE_DF_DEFAULT); + //} + break; + case SCE_DF_OPERATOR: +// if (bSmartHighlighting && sc.chPrev == ';') { +// curLineState &= ~(stateInProperty | stateInExport); +// } + sc.SetState(SCE_DF_DEFAULT); + break; + case SCE_DF_ICODE: + if (sc.atLineStart || IsASpace(sc.ch) || isoperator(sc.ch)) { + sc.SetState(SCE_DF_DEFAULT); + } + break; + } + + // Determine if a new state should be entered. + if (sc.state == SCE_DF_DEFAULT) { + if (IsADigit(sc.ch)) { + sc.SetState(SCE_DF_NUMBER); + } else if (sc.Match('/', '/') || sc.Match("#REM")) { + sc.SetState(SCE_DF_COMMENTLINE); + } else if ((sc.ch == '#' && !sc.Match("#REM")) && IsFirstDataFlexWord(sc.currentPos, styler)) { + sc.SetState(SCE_DF_PREPROCESSOR); + // || (sc.ch == '|' && sc.chNext == 'C' && sc.GetRelativeCharacter(2) == 'I' && sc.GetRelativeCharacter(3) == '$') ) { + } else if ((sc.ch == '$' && ((!setWord.Contains(sc.chPrev)) || sc.chPrev == 'I' ) ) || (sc.Match("|CI$")) ) { + sc.SetState(SCE_DF_HEXNUMBER); // start with $ and previous character not in a..zA..Z0..9 excluding "I" OR start with |CI$ + } else if (setWordStart.Contains(sc.ch)) { + sc.SetState(SCE_DF_IDENTIFIER); + } else if (sc.ch == '{') { + sc.SetState(SCE_DF_METATAG); + //} else if (sc.Match("(*$")) { + // sc.SetState(SCE_DF_PREPROCESSOR2); + } else if (sc.ch == '/' && setWord.Contains(sc.chNext) && sc.atLineStart) { + sc.SetState(SCE_DF_IMAGE); + // sc.Forward(); // Eat the * so it isn't used for the end of the comment + } else if (sc.ch == '\'' || sc.ch == '\"') { + if (sc.ch == '\'' && !(curLineState & stateDoubleQuoteOpen)) { + curLineState |= stateSingleQuoteOpen; + } else if (sc.ch == '\"' && !(curLineState & stateSingleQuoteOpen)) { + curLineState |= stateDoubleQuoteOpen; + } + sc.SetState(SCE_DF_STRING); + } else if (setOperator.Contains(sc.ch)) { + sc.SetState(SCE_DF_OPERATOR); +// } else if (curLineState & stateInICode) { + // ICode start ! in a string followed by close string mark is not icode + } else if ((sc.ch == '!') && !(sc.ch == '!' && ((sc.chNext == '\"') || (sc.ch == '\'')) )) { + sc.SetState(SCE_DF_ICODE); + } + } + } + + if (sc.state == SCE_DF_IDENTIFIER && setWord.Contains(sc.chPrev)) { + ClassifyDataFlexWord(keywordlists, sc, styler); + } + + sc.Complete(); +} + +static bool IsStreamCommentStyle(int style) { + return style == SCE_DF_IMAGE; +} + +static bool IsCommentLine(Sci_Position line, Accessor &styler) { + Sci_Position pos = styler.LineStart(line); + Sci_Position eolPos = styler.LineStart(line + 1) - 1; + for (Sci_Position i = pos; i < eolPos; i++) { + char ch = styler[i]; + char chNext = styler.SafeGetCharAt(i + 1); + int style = styler.StyleAt(i); + if (ch == '/' && chNext == '/' && style == SCE_DF_COMMENTLINE) { + return true; + } else if (!IsASpaceOrTab(ch)) { + return false; + } + } + return false; +} + + + +static unsigned int GetFoldInPreprocessorLevelFlag(int lineFoldStateCurrent) { + return lineFoldStateCurrent & stateFoldInPreprocessorLevelMask; +} + +static void SetFoldInPreprocessorLevelFlag(int &lineFoldStateCurrent, unsigned int nestLevel) { + lineFoldStateCurrent &= ~stateFoldInPreprocessorLevelMask; + lineFoldStateCurrent |= nestLevel & stateFoldInPreprocessorLevelMask; +} + +static int ClassifyDataFlexPreprocessorFoldPoint(int &levelCurrent, int &lineFoldStateCurrent, + Sci_PositionU startPos, Accessor &styler) { + CharacterSet setWord(CharacterSet::setAlpha); + + char s[100]; // Size of the longest possible keyword + one additional character + null + GetForwardRangeLowered(startPos, setWord, styler, s, sizeof(s)); + size_t iLen = strnlen(s,sizeof(s)); + size_t iWordSize = 0; + + unsigned int nestLevel = GetFoldInPreprocessorLevelFlag(lineFoldStateCurrent); + + if (strcmp(s, "command") == 0 || + // The #if/#ifdef etcetera commands are not currently foldable as it is easy to write code that + // breaks the collaps logic, so we keep things simple and not include that for now. + strcmp(s, "header") == 0) { + nestLevel++; + SetFoldInPreprocessorLevelFlag(lineFoldStateCurrent, nestLevel); + lineFoldStateCurrent |= stateFoldInPreprocessor; + levelCurrent++; + iWordSize = iLen; + } else if (strcmp(s, "endcommand") == 0 || + strcmp(s, "endheader") == 0) { + nestLevel--; + SetFoldInPreprocessorLevelFlag(lineFoldStateCurrent, nestLevel); + if (nestLevel == 0) { + lineFoldStateCurrent &= ~stateFoldInPreprocessor; + } + levelCurrent--; + iWordSize = iLen; + if (levelCurrent < SC_FOLDLEVELBASE) { + levelCurrent = SC_FOLDLEVELBASE; + } + } + return static_cast<int>(iWordSize); +} + + +static void ClassifyDataFlexWordFoldPoint(int &levelCurrent, int &lineFoldStateCurrent, + Sci_PositionU lastStart, Sci_PositionU currentPos, WordList *[], Accessor &styler) { + char s[100]; + + // property fold.dataflex.compilerlist + // Set to 1 for enabling the code folding feature in *.prn files + bool foldPRN = styler.GetPropertyInt("fold.dataflex.compilerlist",0) != 0; + + GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s)); + + if (strcmp(s, "case") == 0) { + lineFoldStateCurrent |= stateFoldInCaseStatement; + } else if (strcmp(s, "begin") == 0) { + levelCurrent++; + } else if (strcmp(s, "for") == 0 || + strcmp(s, "while") == 0 || + strcmp(s, "repeat") == 0 || + strcmp(s, "for_all") == 0 || + strcmp(s, "struct") == 0 || + strcmp(s, "type") == 0 || + strcmp(s, "begin_row") == 0 || + strcmp(s, "item_list") == 0 || + strcmp(s, "begin_constraints") == 0 || + strcmp(s, "begin_transaction") == 0 || + strcmp(s, "enum_list") == 0 || + strcmp(s, "class") == 0 || + strcmp(s, "object") == 0 || + strcmp(s, "cd_popup_object") == 0 || + strcmp(s, "procedure") == 0 || + strcmp(s, "procedure_section") == 0 || + strcmp(s, "function") == 0 ) { + if ((IsFirstDataFlexWord(lastStart, styler )) || foldPRN) { + levelCurrent++; + } + } else if (strcmp(s, "end") == 0) { // end is not always the first keyword, for example "case end" + levelCurrent--; + if (levelCurrent < SC_FOLDLEVELBASE) { + levelCurrent = SC_FOLDLEVELBASE; + } + } else if (strcmp(s, "loop") == 0 || + strcmp(s, "until") == 0 || + strcmp(s, "end_class") == 0 || + strcmp(s, "end_object") == 0 || + strcmp(s, "cd_end_object") == 0 || + strcmp(s, "end_procedure") == 0 || + strcmp(s, "end_function") == 0 || + strcmp(s, "end_for_all") == 0 || + strcmp(s, "end_struct") == 0 || + strcmp(s, "end_type") == 0 || + strcmp(s, "end_row") == 0 || + strcmp(s, "end_item_list") == 0 || + strcmp(s, "end_constraints") == 0 || + strcmp(s, "end_transaction") == 0 || + strcmp(s, "end_enum_list") == 0 ) { + // lineFoldStateCurrent &= ~stateFoldInRecord; + if ((IsFirstDataFlexWord(lastStart, styler )) || foldPRN) { + levelCurrent--; + if (levelCurrent < SC_FOLDLEVELBASE) { + levelCurrent = SC_FOLDLEVELBASE; + } + } + } + +} + + +static void ClassifyDataFlexMetaDataFoldPoint(int &levelCurrent, + Sci_PositionU lastStart, Sci_PositionU currentPos, WordList *[], Accessor &styler) { + char s[100]; + + GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s)); + + if (strcmp(s, "#beginsection") == 0) { + levelCurrent++; + } else if (strcmp(s, "#endsection") == 0) { + levelCurrent--; + if (levelCurrent < SC_FOLDLEVELBASE) { + levelCurrent = SC_FOLDLEVELBASE; + } + } + +} + +static void FoldDataFlexDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], + Accessor &styler) { + bool foldComment = styler.GetPropertyInt("fold.comment") != 0; + bool foldPreprocessor = styler.GetPropertyInt("fold.preprocessor") != 0; + bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; + Sci_PositionU endPos = startPos + length; + int visibleChars = 0; + Sci_Position lineCurrent = styler.GetLine(startPos); + int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; + int levelCurrent = levelPrev; + int lineFoldStateCurrent = lineCurrent > 0 ? styler.GetLineState(lineCurrent - 1) & stateFoldMaskAll : 0; + char chNext = styler[startPos]; + int styleNext = styler.StyleAt(startPos); + int style = initStyle; + int iWordSize; + + Sci_Position lastStart = 0; + CharacterSet setWord(CharacterSet::setAlphaNum, "_$#@", 0x80, true); + + for (Sci_PositionU i = startPos; i < endPos; i++) { + char ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + int stylePrev = style; + style = styleNext; + styleNext = styler.StyleAt(i + 1); + bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); + + if (foldComment && IsStreamCommentStyle(style)) { + if (!IsStreamCommentStyle(stylePrev)) { + levelCurrent++; + } else if (!IsStreamCommentStyle(styleNext)) { + levelCurrent--; + } + } + if (foldComment && atEOL && IsCommentLine(lineCurrent, styler)) + { + if (!IsCommentLine(lineCurrent - 1, styler) + && IsCommentLine(lineCurrent + 1, styler)) + levelCurrent++; + else if (IsCommentLine(lineCurrent - 1, styler) + && !IsCommentLine(lineCurrent+1, styler)) + levelCurrent--; + } + if (foldPreprocessor) { + if (style == SCE_DF_PREPROCESSOR) { + iWordSize = ClassifyDataFlexPreprocessorFoldPoint(levelCurrent, lineFoldStateCurrent, i + 1, styler); + //} else if (style == SCE_DF_PREPROCESSOR2 && ch == '(' && chNext == '*' + // && styler.SafeGetCharAt(i + 2) == '$') { + // ClassifyDataFlexPreprocessorFoldPoint(levelCurrent, lineFoldStateCurrent, i + 3, styler); + i = i + iWordSize; + } + } + + if (stylePrev != SCE_DF_SCOPEWORD && style == SCE_DF_SCOPEWORD) + { + // Store last word start point. + lastStart = i; + } + if (stylePrev == SCE_DF_SCOPEWORD) { + if(setWord.Contains(ch) && !setWord.Contains(chNext)) { + ClassifyDataFlexWordFoldPoint(levelCurrent, lineFoldStateCurrent, lastStart, i, keywordlists, styler); + } + } + + if (stylePrev == SCE_DF_METATAG && ch == '#') + { + // Store last word start point. + lastStart = i; + } + if (stylePrev == SCE_DF_METATAG) { + if(setWord.Contains(ch) && !setWord.Contains(chNext)) { + ClassifyDataFlexMetaDataFoldPoint(levelCurrent, lastStart, i, keywordlists, styler); + } + } + + if (!IsASpace(ch)) + visibleChars++; + + if (atEOL) { + int lev = levelPrev; + if (visibleChars == 0 && foldCompact) + lev |= SC_FOLDLEVELWHITEFLAG; + if ((levelCurrent > levelPrev) && (visibleChars > 0)) + lev |= SC_FOLDLEVELHEADERFLAG; + if (lev != styler.LevelAt(lineCurrent)) { + styler.SetLevel(lineCurrent, lev); + } + int newLineState = (styler.GetLineState(lineCurrent) & ~stateFoldMaskAll) | lineFoldStateCurrent; + styler.SetLineState(lineCurrent, newLineState); + lineCurrent++; + levelPrev = levelCurrent; + visibleChars = 0; + } + } + + // If we didn't reach the EOL in previous loop, store line level and whitespace information. + // The rest will be filled in later... + int lev = levelPrev; + if (visibleChars == 0 && foldCompact) + lev |= SC_FOLDLEVELWHITEFLAG; + styler.SetLevel(lineCurrent, lev); +} + +static const char * const dataflexWordListDesc[] = { + "Keywords", + "Scope open", + "Scope close", + "Operators", + 0 +}; + +LexerModule lmDataflex(SCLEX_DATAFLEX, ColouriseDataFlexDoc, "dataflex", FoldDataFlexDoc, dataflexWordListDesc); |