diff options
author | nyamatongwe <devnull@localhost> | 2009-06-12 02:45:14 +0000 |
---|---|---|
committer | nyamatongwe <devnull@localhost> | 2009-06-12 02:45:14 +0000 |
commit | 33e228945100aba33dabd40a8cbe77e621ba1bd3 (patch) | |
tree | 9de84e3380b67c04970baef63013a3c82d29c950 | |
parent | e471d01efe2d893df610ac7e0c5d5a659530fa05 (diff) | |
download | scintilla-mirror-33e228945100aba33dabd40a8cbe77e621ba1bd3.tar.gz |
New FORTH lexer from bug #2804894.
-rw-r--r-- | src/LexForth.cxx | 438 |
1 files changed, 130 insertions, 308 deletions
diff --git a/src/LexForth.cxx b/src/LexForth.cxx index f097b0e00..45d8903e8 100644 --- a/src/LexForth.cxx +++ b/src/LexForth.cxx @@ -1,10 +1,8 @@ // Scintilla source code edit control -/** @file LexCrontab.cxx - ** Lexer to use with extended crontab files used by a powerful - ** Windows scheduler/event monitor/automation manager nnCron. - ** (http://nemtsev.eserv.ru/) +/** @file LexForth.cxx + ** Lexer for FORTH **/ -// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> +// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org> // The License.txt file describes the conditions under which this software may be distributed. #include <stdlib.h> @@ -17,6 +15,7 @@ #include "PropSet.h" #include "Accessor.h" +#include "StyleContext.h" #include "KeyWords.h" #include "Scintilla.h" #include "SciLexer.h" @@ -25,100 +24,29 @@ using namespace Scintilla; #endif -bool is_whitespace(int ch){ - return ch == '\n' || ch == '\r' || ch == '\t' || ch == ' '; +static inline bool IsAWordChar(int ch) { + return (ch < 0x80) && (isalnum(ch) || ch == '.' || + ch == '_' || ch == '?' || ch == '"' || ch == '@' || + ch == '!' || ch == '[' || ch == ']' || ch == '/' || + ch == '+' || ch == '-' || ch == '*' || ch == '<' || + ch == '>' || ch == '=' || ch == ';' || ch == '(' || + ch == ')' ); } -bool is_blank(int ch){ - return ch == '\t' || ch == ' '; +static inline bool IsAWordStart(int ch) { + return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.'); } -//#define FORTH_DEBUG -#ifdef FORTH_DEBUG -static FILE *f_debug; -#define log(x) fputs(f_debug,x); -#else -#define log(x) -#endif - -#define STATE_LOCALE -#define BL ' ' -static Accessor *st; -static int cur_pos,pos1,pos2,pos0,lengthDoc; -char *buffer; - -char getChar(bool is_bl){ - char ch=st->SafeGetCharAt(cur_pos); - if(is_bl) if(is_whitespace(ch)) ch=BL; - return ch; +static inline bool IsANumChar(int ch) { + return (ch < 0x80) && (isxdigit(ch) || ch == '.' || ch == 'e' || ch == 'E' ); } -char getCharBL(){ - char ch=st->SafeGetCharAt(cur_pos); - return ch; -} -bool is_eol(char ch){ - return ch=='\n' || ch=='\r'; -} - -int parse(char ch, bool skip_eol){ -// pos1 - start pos of word -// pos2 - pos after of word -// pos0 - start pos - char c=0; - int len; - bool is_bl=ch==BL; - pos0=pos1=pos2=cur_pos; - for(;cur_pos<lengthDoc && (c=getChar(is_bl))==ch; cur_pos++){ - if(is_eol(c) && !skip_eol){ - pos2=pos1; - return 0; - } - } - pos1=cur_pos; - pos2=pos1; - if(cur_pos==lengthDoc) return 0; - for(len=0;cur_pos<lengthDoc && (c=getChar(is_bl))!=ch; cur_pos++){ - if(is_eol(c) && !skip_eol) break; - pos2++; - buffer[len++]=c; - } - if(c==ch) pos2--; - buffer[len]='\0'; -#ifdef FORTH_DEBUG - fprintf(f_debug,"parse: %c %s\n",ch,buffer); -#endif - return len; +static inline bool IsASpaceChar(int ch) { + return (ch < 0x80) && isspace(ch); } -bool _is_number(char *s,int base){ - for(;*s;s++){ - int digit=((int)*s)-(int)'0'; -#ifdef FORTH_DEBUG - fprintf(f_debug,"digit: %c %d\n",*s,digit); -#endif - if(digit>9 && base>10) digit-=7; - if(digit<0) return false; - if(digit>=base) return false; - } - return true; -} - -bool is_number(char *s){ - if(strncmp(s,"0x",2)==0) return _is_number(s+2,16); - return _is_number(s,10); -} - -static void ColouriseForthDoc(unsigned int startPos, int length, int, WordList *keywordLists[], Accessor &styler) -{ - st=&styler; - cur_pos=startPos; - lengthDoc = startPos + length; - buffer = new char[length]; - -#ifdef FORTH_DEBUG - f_debug=fopen("c:\\sci.log","at"); -#endif +static void ColouriseForthDoc(unsigned int startPos, int length, int initStyle, WordList *keywordLists[], + Accessor &styler) { WordList &control = *keywordLists[0]; WordList &keyword = *keywordLists[1]; @@ -127,226 +55,120 @@ static void ColouriseForthDoc(unsigned int startPos, int length, int, WordList * WordList &preword2 = *keywordLists[4]; WordList &strings = *keywordLists[5]; - // go through all provided text segment - // using the hand-written state machine shown below - styler.StartAt(startPos); - styler.StartSegment(startPos); - while(parse(BL,true)!=0){ - if(pos0!=pos1){ - styler.ColourTo(pos0,SCE_FORTH_DEFAULT); - styler.ColourTo(pos1-1,SCE_FORTH_DEFAULT); - } - if(strcmp("\\",buffer)==0){ - styler.ColourTo(pos1,SCE_FORTH_COMMENT); - parse(1,false); - styler.ColourTo(pos2,SCE_FORTH_COMMENT); - }else if(strcmp("(",buffer)==0){ - styler.ColourTo(pos1,SCE_FORTH_COMMENT); - parse(')',true); - if(cur_pos<lengthDoc) cur_pos++; - styler.ColourTo(cur_pos,SCE_FORTH_COMMENT); - }else if(strcmp("[",buffer)==0){ - styler.ColourTo(pos1,SCE_FORTH_STRING); - parse(']',true); - if(cur_pos<lengthDoc) cur_pos++; - styler.ColourTo(cur_pos,SCE_FORTH_STRING); - }else if(strcmp("{",buffer)==0){ - styler.ColourTo(pos1,SCE_FORTH_LOCALE); - parse('}',false); - if(cur_pos<lengthDoc) cur_pos++; - styler.ColourTo(cur_pos,SCE_FORTH_LOCALE); - }else if(strings.InList(buffer)) { - styler.ColourTo(pos1,SCE_FORTH_STRING); - parse('"',false); - if(cur_pos<lengthDoc) cur_pos++; - styler.ColourTo(cur_pos,SCE_FORTH_STRING); - }else if(control.InList(buffer)) { - styler.ColourTo(pos1,SCE_FORTH_CONTROL); - styler.ColourTo(pos2,SCE_FORTH_CONTROL); - }else if(keyword.InList(buffer)) { - styler.ColourTo(pos1,SCE_FORTH_KEYWORD); - styler.ColourTo(pos2,SCE_FORTH_KEYWORD); - }else if(defword.InList(buffer)) { - styler.ColourTo(pos1,SCE_FORTH_KEYWORD); - styler.ColourTo(pos2,SCE_FORTH_KEYWORD); - parse(BL,false); - styler.ColourTo(pos1-1,SCE_FORTH_DEFAULT); - styler.ColourTo(pos1,SCE_FORTH_DEFWORD); - styler.ColourTo(pos2,SCE_FORTH_DEFWORD); - }else if(preword1.InList(buffer)) { - styler.ColourTo(pos1,SCE_FORTH_PREWORD1); - parse(BL,false); - styler.ColourTo(pos2,SCE_FORTH_PREWORD1); - }else if(preword2.InList(buffer)) { - styler.ColourTo(pos1,SCE_FORTH_PREWORD2); - parse(BL,false); - styler.ColourTo(pos2,SCE_FORTH_PREWORD2); - parse(BL,false); - styler.ColourTo(pos1,SCE_FORTH_STRING); - styler.ColourTo(pos2,SCE_FORTH_STRING); - }else if(is_number(buffer)){ - styler.ColourTo(pos1,SCE_FORTH_NUMBER); - styler.ColourTo(pos2,SCE_FORTH_NUMBER); - } - } -#ifdef FORTH_DEBUG - fclose(f_debug); -#endif - delete []buffer; - return; -/* - if(control.InList(buffer)) { - styler.ColourTo(i,SCE_FORTH_CONTROL); - } else if(keyword.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_KEYWORD ); - } else if(defword.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_DEFWORD ); -// prev_state=SCE_FORTH_DEFWORD - } else if(preword1.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_PREWORD1 ); -// state=SCE_FORTH_PREWORD1; - } else if(preword2.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_PREWORD2 ); - } else { - styler.ColourTo(i-1,SCE_FORTH_DEFAULT); - } -*/ -/* - chPrev=' '; - for (int i = startPos; i < lengthDoc; i++) { - char ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - if(i!=startPos) chPrev=styler.SafeGetCharAt(i - 1); - - if (styler.IsLeadByte(ch)) { - chNext = styler.SafeGetCharAt(i + 2); - i++; - continue; - } -#ifdef FORTH_DEBUG - fprintf(f_debug,"%c %d ",ch,state); -#endif - switch(state) { - case SCE_FORTH_DEFAULT: - if(is_whitespace(ch)) { - // whitespace is simply ignored here... - styler.ColourTo(i,SCE_FORTH_DEFAULT); - break; - } else if( ch == '\\' && is_blank(chNext)) { - // signals the start of an one line comment... - state = SCE_FORTH_COMMENT; - styler.ColourTo(i,SCE_FORTH_COMMENT); - } else if( is_whitespace(chPrev) && ch == '(' && is_whitespace(chNext)) { - // signals the start of a plain comment... - state = SCE_FORTH_COMMENT_ML; - styler.ColourTo(i,SCE_FORTH_COMMENT_ML); - } else if( isdigit(ch) ) { - // signals the start of a number - bufferCount = 0; - buffer[bufferCount++] = ch; - state = SCE_FORTH_NUMBER; - } else if( !is_whitespace(ch)) { - // signals the start of an identifier - bufferCount = 0; - buffer[bufferCount++] = ch; - state = SCE_FORTH_IDENTIFIER; - } else { - // style it the default style.. - styler.ColourTo(i,SCE_FORTH_DEFAULT); - } - break; - - case SCE_FORTH_COMMENT: - // if we find a newline here, - // we simply go to default state - // else continue to work on it... - if( ch == '\n' || ch == '\r' ) { - state = SCE_FORTH_DEFAULT; - } else { - styler.ColourTo(i,SCE_FORTH_COMMENT); - } - break; - - case SCE_FORTH_COMMENT_ML: - if( ch == ')') { - state = SCE_FORTH_DEFAULT; - } else { - styler.ColourTo(i+1,SCE_FORTH_COMMENT_ML); - } - break; - - case SCE_FORTH_IDENTIFIER: - // stay in CONF_IDENTIFIER state until we find a non-alphanumeric - if( !is_whitespace(ch) ) { - buffer[bufferCount++] = ch; - } else { - state = SCE_FORTH_DEFAULT; - buffer[bufferCount] = '\0'; -#ifdef FORTH_DEBUG - fprintf(f_debug,"\nid %s\n",buffer); -#endif - - // check if the buffer contains a keyword, - // and highlight it if it is a keyword... -// switch(prev_state) -// case SCE_FORTH_DEFAULT: - if(control.InList(buffer)) { - styler.ColourTo(i,SCE_FORTH_CONTROL); - } else if(keyword.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_KEYWORD ); - } else if(defword.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_DEFWORD ); -// prev_state=SCE_FORTH_DEFWORD - } else if(preword1.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_PREWORD1 ); -// state=SCE_FORTH_PREWORD1; - } else if(preword2.InList(buffer)) { - styler.ColourTo(i-1,SCE_FORTH_PREWORD2 ); - } else { - styler.ColourTo(i-1,SCE_FORTH_DEFAULT); - } -// break; -// case - - // push back the faulty character - chNext = styler[i--]; - } - break; - - case SCE_FORTH_NUMBER: - // stay in CONF_NUMBER state until we find a non-numeric - if( isdigit(ch) ) { - buffer[bufferCount++] = ch; - } else { - state = SCE_FORTH_DEFAULT; - buffer[bufferCount] = '\0'; - // Colourize here... (normal number) - styler.ColourTo(i-1,SCE_FORTH_NUMBER); - // push back a character - chNext = styler[i--]; - } - break; - } - } -#ifdef FORTH_DEBUG - fclose(f_debug); -#endif - delete []buffer; -*/ + StyleContext sc(startPos, length, initStyle, styler); + + for (; sc.More(); sc.Forward()) + { + // Determine if the current state should terminate. + if (sc.state == SCE_FORTH_COMMENT) { + if (sc.atLineEnd) { + sc.SetState(SCE_FORTH_DEFAULT); + } + }else if (sc.state == SCE_FORTH_COMMENT_ML) { + if (sc.ch == ')') { + sc.ForwardSetState(SCE_FORTH_DEFAULT); + } + }else if (sc.state == SCE_FORTH_IDENTIFIER || sc.state == SCE_FORTH_NUMBER) { + // handle numbers here too, because what we thought was a number might + // turn out to be a keyword e.g. 2DUP + if (IsASpaceChar(sc.ch) ) { + char s[100]; + sc.GetCurrentLowered(s, sizeof(s)); + int newState = sc.state == SCE_FORTH_NUMBER ? SCE_FORTH_NUMBER : SCE_FORTH_DEFAULT; + if (control.InList(s)) { + sc.ChangeState(SCE_FORTH_CONTROL); + } else if (keyword.InList(s)) { + sc.ChangeState(SCE_FORTH_KEYWORD); + } else if (defword.InList(s)) { + sc.ChangeState(SCE_FORTH_DEFWORD); + } else if (preword1.InList(s)) { + sc.ChangeState(SCE_FORTH_PREWORD1); + } else if (preword2.InList(s)) { + sc.ChangeState(SCE_FORTH_PREWORD2); + } else if (strings.InList(s)) { + sc.ChangeState(SCE_FORTH_STRING); + newState = SCE_FORTH_STRING; + } + sc.SetState(newState); + } + if (sc.state == SCE_FORTH_NUMBER) { + if (IsASpaceChar(sc.ch)) { + sc.SetState(SCE_FORTH_DEFAULT); + } + } + }else if (sc.state == SCE_FORTH_STRING) { + if (sc.ch == '\"') { + sc.ForwardSetState(SCE_FORTH_DEFAULT); + } + }else if (sc.state == SCE_FORTH_LOCALE) { + if (sc.ch == '}') { + sc.ForwardSetState(SCE_FORTH_DEFAULT); + } + }else if (sc.state == SCE_FORTH_DEFWORD) { + if (IsASpaceChar(sc.ch)) { + sc.SetState(SCE_FORTH_DEFAULT); + } + } + + // Determine if a new state should be entered. + if (sc.state == SCE_FORTH_DEFAULT) { + if (sc.ch == '\\'){ + sc.SetState(SCE_FORTH_COMMENT); + } else if (sc.ch == '(' && + (sc.atLineStart || IsASpaceChar(sc.chPrev)) && + (sc.atLineEnd || IsASpaceChar(sc.chNext))) { + sc.SetState(SCE_FORTH_COMMENT_ML); + } else if ( (sc.ch == '$' && (isascii(sc.chNext) && isxdigit(sc.chNext))) ) { + // number starting with $ is a hex number + sc.SetState(SCE_FORTH_NUMBER); + while(sc.More() && isascii(sc.chNext) && isxdigit(sc.chNext)) + sc.Forward(); + } else if ( (sc.ch == '%' && (isascii(sc.chNext) && (sc.chNext == '0' || sc.chNext == '1'))) ) { + // number starting with % is binary + sc.SetState(SCE_FORTH_NUMBER); + while(sc.More() && isascii(sc.chNext) && (sc.chNext == '0' || sc.chNext == '1')) + sc.Forward(); + } else if ( isascii(sc.ch) && + (isxdigit(sc.ch) || ((sc.ch == '.' || sc.ch == '-') && isascii(sc.chNext) && isxdigit(sc.chNext)) ) + ){ + sc.SetState(SCE_FORTH_NUMBER); + } else if (IsAWordStart(sc.ch)) { + sc.SetState(SCE_FORTH_IDENTIFIER); + } else if (sc.ch == '{') { + sc.SetState(SCE_FORTH_LOCALE); + } else if (sc.ch == ':' && isascii(sc.chNext) && isspace(sc.chNext)) { + // highlight word definitions e.g. : GCD ( n n -- n ) ..... ; + // ^ ^^^ + sc.SetState(SCE_FORTH_DEFWORD); + while(sc.More() && isascii(sc.chNext) && isspace(sc.chNext)) + sc.Forward(); + } else if (sc.ch == ';' && + (sc.atLineStart || IsASpaceChar(sc.chPrev)) && + (sc.atLineEnd || IsASpaceChar(sc.chNext)) ) { + // mark the ';' that ends a word + sc.SetState(SCE_FORTH_DEFWORD); + sc.ForwardSetState(SCE_FORTH_DEFAULT); + } + } + + } + sc.Complete(); } static void FoldForthDoc(unsigned int, int, int, WordList *[], - Accessor &) { + Accessor &) { } static const char * const forthWordLists[] = { - "control keywords", - "keywords", - "definition words", - "prewords with one argument", - "prewords with two arguments", - "string definition keywords", - 0, - }; - -LexerModule lmForth(SCLEX_FORTH, ColouriseForthDoc, "forth",FoldForthDoc,forthWordLists); + "control keywords", + "keywords", + "definition words", + "prewords with one argument", + "prewords with two arguments", + "string definition keywords", + 0, + }; + +LexerModule lmForth(SCLEX_FORTH, ColouriseForthDoc, "forth", FoldForthDoc, forthWordLists); + + |