diff options
| author | nyamatongwe <devnull@localhost> | 2009-06-12 02:45:14 +0000 | 
|---|---|---|
| committer | nyamatongwe <devnull@localhost> | 2009-06-12 02:45:14 +0000 | 
| commit | 33e228945100aba33dabd40a8cbe77e621ba1bd3 (patch) | |
| tree | 9de84e3380b67c04970baef63013a3c82d29c950 /src/LexForth.cxx | |
| parent | e471d01efe2d893df610ac7e0c5d5a659530fa05 (diff) | |
| download | scintilla-mirror-33e228945100aba33dabd40a8cbe77e621ba1bd3.tar.gz | |
New FORTH lexer from bug #2804894.
Diffstat (limited to 'src/LexForth.cxx')
| -rw-r--r-- | src/LexForth.cxx | 438 | 
1 files changed, 130 insertions, 308 deletions
| diff --git a/src/LexForth.cxx b/src/LexForth.cxx index f097b0e00..45d8903e8 100644 --- a/src/LexForth.cxx +++ b/src/LexForth.cxx @@ -1,10 +1,8 @@  // Scintilla source code edit control -/** @file LexCrontab.cxx - ** Lexer to use with extended crontab files used by a powerful - ** Windows scheduler/event monitor/automation manager nnCron. - ** (http://nemtsev.eserv.ru/) +/** @file LexForth.cxx + ** Lexer for FORTH   **/ -// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> +// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>  // The License.txt file describes the conditions under which this software may be distributed.  #include <stdlib.h> @@ -17,6 +15,7 @@  #include "PropSet.h"  #include "Accessor.h" +#include "StyleContext.h"  #include "KeyWords.h"  #include "Scintilla.h"  #include "SciLexer.h" @@ -25,100 +24,29 @@  using namespace Scintilla;  #endif -bool is_whitespace(int ch){ -    return ch == '\n' || ch == '\r' || ch == '\t' || ch == ' '; +static inline bool IsAWordChar(int ch) { +	return (ch < 0x80) && (isalnum(ch) || ch == '.' || +		ch == '_' || ch == '?' || ch == '"' || ch == '@' || +		ch == '!' || ch == '[' || ch == ']' || ch == '/' || +		ch == '+' || ch == '-' || ch == '*' || ch == '<' || +		ch == '>' || ch == '=' || ch == ';' || ch == '(' || +		ch == ')' );  } -bool is_blank(int ch){ -    return ch == '\t' || ch == ' '; +static inline bool IsAWordStart(int ch) { +	return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.');  } -//#define FORTH_DEBUG -#ifdef FORTH_DEBUG -static FILE *f_debug; -#define log(x)  fputs(f_debug,x); -#else -#define log(x) -#endif - -#define STATE_LOCALE -#define BL ' ' -static Accessor *st; -static int cur_pos,pos1,pos2,pos0,lengthDoc; -char *buffer; - -char getChar(bool is_bl){ -    char ch=st->SafeGetCharAt(cur_pos); -    if(is_bl) if(is_whitespace(ch)) ch=BL; -    return ch; +static inline bool IsANumChar(int ch) { +	return (ch < 0x80) && (isxdigit(ch) || ch == '.' || ch == 'e' || ch == 'E' );  } -char getCharBL(){ -    char ch=st->SafeGetCharAt(cur_pos); -    return ch; -} -bool is_eol(char ch){ -    return ch=='\n' || ch=='\r'; -} - -int parse(char ch, bool skip_eol){ -// pos1 - start pos of word -// pos2 - pos after of word -// pos0 - start pos -    char c=0; -    int len; -    bool is_bl=ch==BL; -    pos0=pos1=pos2=cur_pos; -    for(;cur_pos<lengthDoc && (c=getChar(is_bl))==ch; cur_pos++){ -        if(is_eol(c) && !skip_eol){ -            pos2=pos1; -            return 0; -        } -    } -    pos1=cur_pos; -    pos2=pos1; -    if(cur_pos==lengthDoc) return 0; -    for(len=0;cur_pos<lengthDoc && (c=getChar(is_bl))!=ch; cur_pos++){ -        if(is_eol(c) && !skip_eol) break; -        pos2++; -        buffer[len++]=c; -    } -    if(c==ch) pos2--; -    buffer[len]='\0'; -#ifdef FORTH_DEBUG -    fprintf(f_debug,"parse: %c %s\n",ch,buffer); -#endif -    return len; +static inline bool IsASpaceChar(int ch) { +	return (ch < 0x80) && isspace(ch);  } -bool _is_number(char *s,int base){ -    for(;*s;s++){ -        int digit=((int)*s)-(int)'0'; -#ifdef FORTH_DEBUG -    fprintf(f_debug,"digit: %c %d\n",*s,digit); -#endif -        if(digit>9 && base>10) digit-=7; -        if(digit<0) return false; -        if(digit>=base) return false; -    } -    return true; -} - -bool is_number(char *s){ -    if(strncmp(s,"0x",2)==0) return _is_number(s+2,16); -    return _is_number(s,10); -} - -static void ColouriseForthDoc(unsigned int startPos, int length, int, WordList *keywordLists[], Accessor &styler) -{ -    st=&styler; -    cur_pos=startPos; -    lengthDoc = startPos + length; -    buffer = new char[length]; - -#ifdef FORTH_DEBUG -    f_debug=fopen("c:\\sci.log","at"); -#endif +static void ColouriseForthDoc(unsigned int startPos, int length, int initStyle, WordList *keywordLists[], +                            Accessor &styler) {      WordList &control = *keywordLists[0];      WordList &keyword = *keywordLists[1]; @@ -127,226 +55,120 @@ static void ColouriseForthDoc(unsigned int startPos, int length, int, WordList *      WordList &preword2 = *keywordLists[4];      WordList &strings = *keywordLists[5]; -    // go through all provided text segment -    // using the hand-written state machine shown below -    styler.StartAt(startPos); -    styler.StartSegment(startPos); -    while(parse(BL,true)!=0){ -        if(pos0!=pos1){ -            styler.ColourTo(pos0,SCE_FORTH_DEFAULT); -            styler.ColourTo(pos1-1,SCE_FORTH_DEFAULT); -        } -        if(strcmp("\\",buffer)==0){ -            styler.ColourTo(pos1,SCE_FORTH_COMMENT); -            parse(1,false); -            styler.ColourTo(pos2,SCE_FORTH_COMMENT); -        }else if(strcmp("(",buffer)==0){ -            styler.ColourTo(pos1,SCE_FORTH_COMMENT); -            parse(')',true); -            if(cur_pos<lengthDoc) cur_pos++; -            styler.ColourTo(cur_pos,SCE_FORTH_COMMENT); -        }else if(strcmp("[",buffer)==0){ -            styler.ColourTo(pos1,SCE_FORTH_STRING); -            parse(']',true); -            if(cur_pos<lengthDoc) cur_pos++; -            styler.ColourTo(cur_pos,SCE_FORTH_STRING); -        }else if(strcmp("{",buffer)==0){ -            styler.ColourTo(pos1,SCE_FORTH_LOCALE); -            parse('}',false); -            if(cur_pos<lengthDoc) cur_pos++; -            styler.ColourTo(cur_pos,SCE_FORTH_LOCALE); -        }else if(strings.InList(buffer)) { -            styler.ColourTo(pos1,SCE_FORTH_STRING); -            parse('"',false); -            if(cur_pos<lengthDoc) cur_pos++; -            styler.ColourTo(cur_pos,SCE_FORTH_STRING); -        }else if(control.InList(buffer)) { -            styler.ColourTo(pos1,SCE_FORTH_CONTROL); -            styler.ColourTo(pos2,SCE_FORTH_CONTROL); -        }else if(keyword.InList(buffer)) { -            styler.ColourTo(pos1,SCE_FORTH_KEYWORD); -            styler.ColourTo(pos2,SCE_FORTH_KEYWORD); -        }else if(defword.InList(buffer)) { -            styler.ColourTo(pos1,SCE_FORTH_KEYWORD); -            styler.ColourTo(pos2,SCE_FORTH_KEYWORD); -            parse(BL,false); -            styler.ColourTo(pos1-1,SCE_FORTH_DEFAULT); -            styler.ColourTo(pos1,SCE_FORTH_DEFWORD); -            styler.ColourTo(pos2,SCE_FORTH_DEFWORD); -        }else if(preword1.InList(buffer)) { -            styler.ColourTo(pos1,SCE_FORTH_PREWORD1); -            parse(BL,false); -            styler.ColourTo(pos2,SCE_FORTH_PREWORD1); -        }else if(preword2.InList(buffer)) { -            styler.ColourTo(pos1,SCE_FORTH_PREWORD2); -            parse(BL,false); -            styler.ColourTo(pos2,SCE_FORTH_PREWORD2); -            parse(BL,false); -            styler.ColourTo(pos1,SCE_FORTH_STRING); -            styler.ColourTo(pos2,SCE_FORTH_STRING); -        }else if(is_number(buffer)){ -            styler.ColourTo(pos1,SCE_FORTH_NUMBER); -            styler.ColourTo(pos2,SCE_FORTH_NUMBER); -        } -    } -#ifdef FORTH_DEBUG -    fclose(f_debug); -#endif -    delete []buffer; -    return; -/* -                        if(control.InList(buffer)) { -                            styler.ColourTo(i,SCE_FORTH_CONTROL); -                        } else if(keyword.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_KEYWORD ); -                        } else if(defword.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_DEFWORD ); -//                            prev_state=SCE_FORTH_DEFWORD -                        } else if(preword1.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_PREWORD1 ); -//                            state=SCE_FORTH_PREWORD1; -                        } else if(preword2.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_PREWORD2 ); -                         } else { -                            styler.ColourTo(i-1,SCE_FORTH_DEFAULT); -                        } -*/ -/* -    chPrev=' '; -    for (int i = startPos; i < lengthDoc; i++) { -        char ch = chNext; -        chNext = styler.SafeGetCharAt(i + 1); -        if(i!=startPos) chPrev=styler.SafeGetCharAt(i - 1); - -        if (styler.IsLeadByte(ch)) { -            chNext = styler.SafeGetCharAt(i + 2); -            i++; -            continue; -        } -#ifdef FORTH_DEBUG -        fprintf(f_debug,"%c %d ",ch,state); -#endif -        switch(state) { -            case SCE_FORTH_DEFAULT: -                if(is_whitespace(ch)) { -                    // whitespace is simply ignored here... -                    styler.ColourTo(i,SCE_FORTH_DEFAULT); -                    break; -                } else if( ch == '\\' && is_blank(chNext)) { -                    // signals the start of an one line comment... -                    state = SCE_FORTH_COMMENT; -                    styler.ColourTo(i,SCE_FORTH_COMMENT); -                } else if( is_whitespace(chPrev) &&  ch == '(' &&  is_whitespace(chNext)) { -                    // signals the start of a plain comment... -                    state = SCE_FORTH_COMMENT_ML; -                    styler.ColourTo(i,SCE_FORTH_COMMENT_ML); -                } else if( isdigit(ch) ) { -                    // signals the start of a number -                    bufferCount = 0; -                    buffer[bufferCount++] = ch; -                    state = SCE_FORTH_NUMBER; -                } else if( !is_whitespace(ch)) { -                    // signals the start of an identifier -                    bufferCount = 0; -                    buffer[bufferCount++] = ch; -                    state = SCE_FORTH_IDENTIFIER; -                } else { -                    // style it the default style.. -                    styler.ColourTo(i,SCE_FORTH_DEFAULT); -                } -                break; - -            case SCE_FORTH_COMMENT: -                // if we find a newline here, -                // we simply go to default state -                // else continue to work on it... -                if( ch == '\n' || ch == '\r' ) { -                    state = SCE_FORTH_DEFAULT; -                } else { -                    styler.ColourTo(i,SCE_FORTH_COMMENT); -                } -                break; - -            case SCE_FORTH_COMMENT_ML: -                if( ch == ')') { -                    state = SCE_FORTH_DEFAULT; -                } else { -                    styler.ColourTo(i+1,SCE_FORTH_COMMENT_ML); -                } -                break; - -            case SCE_FORTH_IDENTIFIER: -                // stay  in CONF_IDENTIFIER state until we find a non-alphanumeric -                if( !is_whitespace(ch) ) { -                    buffer[bufferCount++] = ch; -                } else { -                    state = SCE_FORTH_DEFAULT; -                    buffer[bufferCount] = '\0'; -#ifdef FORTH_DEBUG -        fprintf(f_debug,"\nid %s\n",buffer); -#endif - -                    // check if the buffer contains a keyword, -                    // and highlight it if it is a keyword... -//                    switch(prev_state) -//                    case SCE_FORTH_DEFAULT: -                        if(control.InList(buffer)) { -                            styler.ColourTo(i,SCE_FORTH_CONTROL); -                        } else if(keyword.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_KEYWORD ); -                        } else if(defword.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_DEFWORD ); -//                            prev_state=SCE_FORTH_DEFWORD -                        } else if(preword1.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_PREWORD1 ); -//                            state=SCE_FORTH_PREWORD1; -                        } else if(preword2.InList(buffer)) { -                            styler.ColourTo(i-1,SCE_FORTH_PREWORD2 ); -                         } else { -                            styler.ColourTo(i-1,SCE_FORTH_DEFAULT); -                        } -//                        break; -//                    case - -                    // push back the faulty character -                    chNext = styler[i--]; -                } -                break; - -            case SCE_FORTH_NUMBER: -                // stay  in CONF_NUMBER state until we find a non-numeric -                if( isdigit(ch) ) { -                    buffer[bufferCount++] = ch; -                } else { -                    state = SCE_FORTH_DEFAULT; -                    buffer[bufferCount] = '\0'; -                    // Colourize here... (normal number) -                    styler.ColourTo(i-1,SCE_FORTH_NUMBER); -                    // push back a character -                    chNext = styler[i--]; -                } -                break; -        } -    } -#ifdef FORTH_DEBUG -    fclose(f_debug); -#endif -    delete []buffer; -*/ +	StyleContext sc(startPos, length, initStyle, styler); + +	for (; sc.More(); sc.Forward()) +	{ +		// Determine if the current state should terminate. +		if (sc.state == SCE_FORTH_COMMENT) { +			if (sc.atLineEnd) { +				sc.SetState(SCE_FORTH_DEFAULT); +			} +		}else if (sc.state == SCE_FORTH_COMMENT_ML) { +			if (sc.ch == ')') { +				sc.ForwardSetState(SCE_FORTH_DEFAULT); +			} +		}else if (sc.state == SCE_FORTH_IDENTIFIER || sc.state == SCE_FORTH_NUMBER) { +			// handle numbers here too, because what we thought was a number might +			// turn out to be a keyword e.g. 2DUP +			if (IsASpaceChar(sc.ch) ) { +				char s[100]; +				sc.GetCurrentLowered(s, sizeof(s)); +				int newState = sc.state == SCE_FORTH_NUMBER ? SCE_FORTH_NUMBER : SCE_FORTH_DEFAULT; +				if (control.InList(s)) { +					sc.ChangeState(SCE_FORTH_CONTROL); +				} else if (keyword.InList(s)) { +					sc.ChangeState(SCE_FORTH_KEYWORD); +				} else if (defword.InList(s)) { +					sc.ChangeState(SCE_FORTH_DEFWORD); +				}  else if (preword1.InList(s)) { +					sc.ChangeState(SCE_FORTH_PREWORD1); +				} else if (preword2.InList(s)) { +					sc.ChangeState(SCE_FORTH_PREWORD2); +				} else if (strings.InList(s)) { +					sc.ChangeState(SCE_FORTH_STRING); +					newState = SCE_FORTH_STRING; +				} +				sc.SetState(newState); +			} +			if (sc.state == SCE_FORTH_NUMBER) { +				if (IsASpaceChar(sc.ch)) { +					sc.SetState(SCE_FORTH_DEFAULT); +				} +			} +		}else if (sc.state == SCE_FORTH_STRING) { +			if (sc.ch == '\"') { +				sc.ForwardSetState(SCE_FORTH_DEFAULT); +			} +		}else if (sc.state == SCE_FORTH_LOCALE) { +			if (sc.ch == '}') { +				sc.ForwardSetState(SCE_FORTH_DEFAULT); +			} +		}else if (sc.state == SCE_FORTH_DEFWORD) { +			if (IsASpaceChar(sc.ch)) { +				sc.SetState(SCE_FORTH_DEFAULT); +			} +		} + +		// Determine if a new state should be entered. +		if (sc.state == SCE_FORTH_DEFAULT) { +			if (sc.ch == '\\'){ +				sc.SetState(SCE_FORTH_COMMENT); +			} else if (sc.ch == '(' && +					(sc.atLineStart || IsASpaceChar(sc.chPrev)) && +					(sc.atLineEnd   || IsASpaceChar(sc.chNext))) { +				sc.SetState(SCE_FORTH_COMMENT_ML); +			} else if (	(sc.ch == '$' && (isascii(sc.chNext) && isxdigit(sc.chNext))) ) { +				// number starting with $ is a hex number +				sc.SetState(SCE_FORTH_NUMBER); +				while(sc.More() && isascii(sc.chNext) && isxdigit(sc.chNext)) +					sc.Forward(); +			} else if ( (sc.ch == '%' && (isascii(sc.chNext) && (sc.chNext == '0' || sc.chNext == '1'))) ) { +				// number starting with % is binary +				sc.SetState(SCE_FORTH_NUMBER); +				while(sc.More() && isascii(sc.chNext) && (sc.chNext == '0' || sc.chNext == '1')) +					sc.Forward(); +			} else if (	isascii(sc.ch) &&  +						(isxdigit(sc.ch) || ((sc.ch == '.' || sc.ch == '-') && isascii(sc.chNext) && isxdigit(sc.chNext)) ) +					){ +				sc.SetState(SCE_FORTH_NUMBER); +			} else if (IsAWordStart(sc.ch)) { +				sc.SetState(SCE_FORTH_IDENTIFIER); +			} else if (sc.ch == '{') { +				sc.SetState(SCE_FORTH_LOCALE); +			} else if (sc.ch == ':' && isascii(sc.chNext) && isspace(sc.chNext)) { +				// highlight word definitions e.g.  : GCD ( n n -- n ) ..... ; +				//                                  ^ ^^^ +				sc.SetState(SCE_FORTH_DEFWORD); +				while(sc.More() && isascii(sc.chNext) && isspace(sc.chNext)) +					sc.Forward(); +			} else if (sc.ch == ';' && +					(sc.atLineStart || IsASpaceChar(sc.chPrev)) && +					(sc.atLineEnd   || IsASpaceChar(sc.chNext))	) { +				// mark the ';' that ends a word +				sc.SetState(SCE_FORTH_DEFWORD); +				sc.ForwardSetState(SCE_FORTH_DEFAULT); +			} +		} + +	} +	sc.Complete();  }  static void FoldForthDoc(unsigned int, int, int, WordList *[], -                       Accessor &) { +						Accessor &) {  }  static const char * const forthWordLists[] = { -            "control keywords", -            "keywords", -            "definition words", -            "prewords with one argument", -            "prewords with two arguments", -            "string definition keywords", -            0, -        }; - -LexerModule lmForth(SCLEX_FORTH, ColouriseForthDoc, "forth",FoldForthDoc,forthWordLists); +			"control keywords", +			"keywords", +			"definition words", +			"prewords with one argument", +			"prewords with two arguments", +			"string definition keywords", +			0, +		}; + +LexerModule lmForth(SCLEX_FORTH, ColouriseForthDoc, "forth", FoldForthDoc, forthWordLists); + + 	  	  | 
