aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <devnull@localhost>2009-06-12 02:45:14 +0000
committernyamatongwe <devnull@localhost>2009-06-12 02:45:14 +0000
commit33e228945100aba33dabd40a8cbe77e621ba1bd3 (patch)
tree9de84e3380b67c04970baef63013a3c82d29c950
parente471d01efe2d893df610ac7e0c5d5a659530fa05 (diff)
downloadscintilla-mirror-33e228945100aba33dabd40a8cbe77e621ba1bd3.tar.gz
New FORTH lexer from bug #2804894.
-rw-r--r--src/LexForth.cxx438
1 files changed, 130 insertions, 308 deletions
diff --git a/src/LexForth.cxx b/src/LexForth.cxx
index f097b0e00..45d8903e8 100644
--- a/src/LexForth.cxx
+++ b/src/LexForth.cxx
@@ -1,10 +1,8 @@
// Scintilla source code edit control
-/** @file LexCrontab.cxx
- ** Lexer to use with extended crontab files used by a powerful
- ** Windows scheduler/event monitor/automation manager nnCron.
- ** (http://nemtsev.eserv.ru/)
+/** @file LexForth.cxx
+ ** Lexer for FORTH
**/
-// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
+// Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <stdlib.h>
@@ -17,6 +15,7 @@
#include "PropSet.h"
#include "Accessor.h"
+#include "StyleContext.h"
#include "KeyWords.h"
#include "Scintilla.h"
#include "SciLexer.h"
@@ -25,100 +24,29 @@
using namespace Scintilla;
#endif
-bool is_whitespace(int ch){
- return ch == '\n' || ch == '\r' || ch == '\t' || ch == ' ';
+static inline bool IsAWordChar(int ch) {
+ return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
+ ch == '_' || ch == '?' || ch == '"' || ch == '@' ||
+ ch == '!' || ch == '[' || ch == ']' || ch == '/' ||
+ ch == '+' || ch == '-' || ch == '*' || ch == '<' ||
+ ch == '>' || ch == '=' || ch == ';' || ch == '(' ||
+ ch == ')' );
}
-bool is_blank(int ch){
- return ch == '\t' || ch == ' ';
+static inline bool IsAWordStart(int ch) {
+ return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.');
}
-//#define FORTH_DEBUG
-#ifdef FORTH_DEBUG
-static FILE *f_debug;
-#define log(x) fputs(f_debug,x);
-#else
-#define log(x)
-#endif
-
-#define STATE_LOCALE
-#define BL ' '
-static Accessor *st;
-static int cur_pos,pos1,pos2,pos0,lengthDoc;
-char *buffer;
-
-char getChar(bool is_bl){
- char ch=st->SafeGetCharAt(cur_pos);
- if(is_bl) if(is_whitespace(ch)) ch=BL;
- return ch;
+static inline bool IsANumChar(int ch) {
+ return (ch < 0x80) && (isxdigit(ch) || ch == '.' || ch == 'e' || ch == 'E' );
}
-char getCharBL(){
- char ch=st->SafeGetCharAt(cur_pos);
- return ch;
-}
-bool is_eol(char ch){
- return ch=='\n' || ch=='\r';
-}
-
-int parse(char ch, bool skip_eol){
-// pos1 - start pos of word
-// pos2 - pos after of word
-// pos0 - start pos
- char c=0;
- int len;
- bool is_bl=ch==BL;
- pos0=pos1=pos2=cur_pos;
- for(;cur_pos<lengthDoc && (c=getChar(is_bl))==ch; cur_pos++){
- if(is_eol(c) && !skip_eol){
- pos2=pos1;
- return 0;
- }
- }
- pos1=cur_pos;
- pos2=pos1;
- if(cur_pos==lengthDoc) return 0;
- for(len=0;cur_pos<lengthDoc && (c=getChar(is_bl))!=ch; cur_pos++){
- if(is_eol(c) && !skip_eol) break;
- pos2++;
- buffer[len++]=c;
- }
- if(c==ch) pos2--;
- buffer[len]='\0';
-#ifdef FORTH_DEBUG
- fprintf(f_debug,"parse: %c %s\n",ch,buffer);
-#endif
- return len;
+static inline bool IsASpaceChar(int ch) {
+ return (ch < 0x80) && isspace(ch);
}
-bool _is_number(char *s,int base){
- for(;*s;s++){
- int digit=((int)*s)-(int)'0';
-#ifdef FORTH_DEBUG
- fprintf(f_debug,"digit: %c %d\n",*s,digit);
-#endif
- if(digit>9 && base>10) digit-=7;
- if(digit<0) return false;
- if(digit>=base) return false;
- }
- return true;
-}
-
-bool is_number(char *s){
- if(strncmp(s,"0x",2)==0) return _is_number(s+2,16);
- return _is_number(s,10);
-}
-
-static void ColouriseForthDoc(unsigned int startPos, int length, int, WordList *keywordLists[], Accessor &styler)
-{
- st=&styler;
- cur_pos=startPos;
- lengthDoc = startPos + length;
- buffer = new char[length];
-
-#ifdef FORTH_DEBUG
- f_debug=fopen("c:\\sci.log","at");
-#endif
+static void ColouriseForthDoc(unsigned int startPos, int length, int initStyle, WordList *keywordLists[],
+ Accessor &styler) {
WordList &control = *keywordLists[0];
WordList &keyword = *keywordLists[1];
@@ -127,226 +55,120 @@ static void ColouriseForthDoc(unsigned int startPos, int length, int, WordList *
WordList &preword2 = *keywordLists[4];
WordList &strings = *keywordLists[5];
- // go through all provided text segment
- // using the hand-written state machine shown below
- styler.StartAt(startPos);
- styler.StartSegment(startPos);
- while(parse(BL,true)!=0){
- if(pos0!=pos1){
- styler.ColourTo(pos0,SCE_FORTH_DEFAULT);
- styler.ColourTo(pos1-1,SCE_FORTH_DEFAULT);
- }
- if(strcmp("\\",buffer)==0){
- styler.ColourTo(pos1,SCE_FORTH_COMMENT);
- parse(1,false);
- styler.ColourTo(pos2,SCE_FORTH_COMMENT);
- }else if(strcmp("(",buffer)==0){
- styler.ColourTo(pos1,SCE_FORTH_COMMENT);
- parse(')',true);
- if(cur_pos<lengthDoc) cur_pos++;
- styler.ColourTo(cur_pos,SCE_FORTH_COMMENT);
- }else if(strcmp("[",buffer)==0){
- styler.ColourTo(pos1,SCE_FORTH_STRING);
- parse(']',true);
- if(cur_pos<lengthDoc) cur_pos++;
- styler.ColourTo(cur_pos,SCE_FORTH_STRING);
- }else if(strcmp("{",buffer)==0){
- styler.ColourTo(pos1,SCE_FORTH_LOCALE);
- parse('}',false);
- if(cur_pos<lengthDoc) cur_pos++;
- styler.ColourTo(cur_pos,SCE_FORTH_LOCALE);
- }else if(strings.InList(buffer)) {
- styler.ColourTo(pos1,SCE_FORTH_STRING);
- parse('"',false);
- if(cur_pos<lengthDoc) cur_pos++;
- styler.ColourTo(cur_pos,SCE_FORTH_STRING);
- }else if(control.InList(buffer)) {
- styler.ColourTo(pos1,SCE_FORTH_CONTROL);
- styler.ColourTo(pos2,SCE_FORTH_CONTROL);
- }else if(keyword.InList(buffer)) {
- styler.ColourTo(pos1,SCE_FORTH_KEYWORD);
- styler.ColourTo(pos2,SCE_FORTH_KEYWORD);
- }else if(defword.InList(buffer)) {
- styler.ColourTo(pos1,SCE_FORTH_KEYWORD);
- styler.ColourTo(pos2,SCE_FORTH_KEYWORD);
- parse(BL,false);
- styler.ColourTo(pos1-1,SCE_FORTH_DEFAULT);
- styler.ColourTo(pos1,SCE_FORTH_DEFWORD);
- styler.ColourTo(pos2,SCE_FORTH_DEFWORD);
- }else if(preword1.InList(buffer)) {
- styler.ColourTo(pos1,SCE_FORTH_PREWORD1);
- parse(BL,false);
- styler.ColourTo(pos2,SCE_FORTH_PREWORD1);
- }else if(preword2.InList(buffer)) {
- styler.ColourTo(pos1,SCE_FORTH_PREWORD2);
- parse(BL,false);
- styler.ColourTo(pos2,SCE_FORTH_PREWORD2);
- parse(BL,false);
- styler.ColourTo(pos1,SCE_FORTH_STRING);
- styler.ColourTo(pos2,SCE_FORTH_STRING);
- }else if(is_number(buffer)){
- styler.ColourTo(pos1,SCE_FORTH_NUMBER);
- styler.ColourTo(pos2,SCE_FORTH_NUMBER);
- }
- }
-#ifdef FORTH_DEBUG
- fclose(f_debug);
-#endif
- delete []buffer;
- return;
-/*
- if(control.InList(buffer)) {
- styler.ColourTo(i,SCE_FORTH_CONTROL);
- } else if(keyword.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_KEYWORD );
- } else if(defword.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_DEFWORD );
-// prev_state=SCE_FORTH_DEFWORD
- } else if(preword1.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_PREWORD1 );
-// state=SCE_FORTH_PREWORD1;
- } else if(preword2.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_PREWORD2 );
- } else {
- styler.ColourTo(i-1,SCE_FORTH_DEFAULT);
- }
-*/
-/*
- chPrev=' ';
- for (int i = startPos; i < lengthDoc; i++) {
- char ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
- if(i!=startPos) chPrev=styler.SafeGetCharAt(i - 1);
-
- if (styler.IsLeadByte(ch)) {
- chNext = styler.SafeGetCharAt(i + 2);
- i++;
- continue;
- }
-#ifdef FORTH_DEBUG
- fprintf(f_debug,"%c %d ",ch,state);
-#endif
- switch(state) {
- case SCE_FORTH_DEFAULT:
- if(is_whitespace(ch)) {
- // whitespace is simply ignored here...
- styler.ColourTo(i,SCE_FORTH_DEFAULT);
- break;
- } else if( ch == '\\' && is_blank(chNext)) {
- // signals the start of an one line comment...
- state = SCE_FORTH_COMMENT;
- styler.ColourTo(i,SCE_FORTH_COMMENT);
- } else if( is_whitespace(chPrev) && ch == '(' && is_whitespace(chNext)) {
- // signals the start of a plain comment...
- state = SCE_FORTH_COMMENT_ML;
- styler.ColourTo(i,SCE_FORTH_COMMENT_ML);
- } else if( isdigit(ch) ) {
- // signals the start of a number
- bufferCount = 0;
- buffer[bufferCount++] = ch;
- state = SCE_FORTH_NUMBER;
- } else if( !is_whitespace(ch)) {
- // signals the start of an identifier
- bufferCount = 0;
- buffer[bufferCount++] = ch;
- state = SCE_FORTH_IDENTIFIER;
- } else {
- // style it the default style..
- styler.ColourTo(i,SCE_FORTH_DEFAULT);
- }
- break;
-
- case SCE_FORTH_COMMENT:
- // if we find a newline here,
- // we simply go to default state
- // else continue to work on it...
- if( ch == '\n' || ch == '\r' ) {
- state = SCE_FORTH_DEFAULT;
- } else {
- styler.ColourTo(i,SCE_FORTH_COMMENT);
- }
- break;
-
- case SCE_FORTH_COMMENT_ML:
- if( ch == ')') {
- state = SCE_FORTH_DEFAULT;
- } else {
- styler.ColourTo(i+1,SCE_FORTH_COMMENT_ML);
- }
- break;
-
- case SCE_FORTH_IDENTIFIER:
- // stay in CONF_IDENTIFIER state until we find a non-alphanumeric
- if( !is_whitespace(ch) ) {
- buffer[bufferCount++] = ch;
- } else {
- state = SCE_FORTH_DEFAULT;
- buffer[bufferCount] = '\0';
-#ifdef FORTH_DEBUG
- fprintf(f_debug,"\nid %s\n",buffer);
-#endif
-
- // check if the buffer contains a keyword,
- // and highlight it if it is a keyword...
-// switch(prev_state)
-// case SCE_FORTH_DEFAULT:
- if(control.InList(buffer)) {
- styler.ColourTo(i,SCE_FORTH_CONTROL);
- } else if(keyword.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_KEYWORD );
- } else if(defword.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_DEFWORD );
-// prev_state=SCE_FORTH_DEFWORD
- } else if(preword1.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_PREWORD1 );
-// state=SCE_FORTH_PREWORD1;
- } else if(preword2.InList(buffer)) {
- styler.ColourTo(i-1,SCE_FORTH_PREWORD2 );
- } else {
- styler.ColourTo(i-1,SCE_FORTH_DEFAULT);
- }
-// break;
-// case
-
- // push back the faulty character
- chNext = styler[i--];
- }
- break;
-
- case SCE_FORTH_NUMBER:
- // stay in CONF_NUMBER state until we find a non-numeric
- if( isdigit(ch) ) {
- buffer[bufferCount++] = ch;
- } else {
- state = SCE_FORTH_DEFAULT;
- buffer[bufferCount] = '\0';
- // Colourize here... (normal number)
- styler.ColourTo(i-1,SCE_FORTH_NUMBER);
- // push back a character
- chNext = styler[i--];
- }
- break;
- }
- }
-#ifdef FORTH_DEBUG
- fclose(f_debug);
-#endif
- delete []buffer;
-*/
+ StyleContext sc(startPos, length, initStyle, styler);
+
+ for (; sc.More(); sc.Forward())
+ {
+ // Determine if the current state should terminate.
+ if (sc.state == SCE_FORTH_COMMENT) {
+ if (sc.atLineEnd) {
+ sc.SetState(SCE_FORTH_DEFAULT);
+ }
+ }else if (sc.state == SCE_FORTH_COMMENT_ML) {
+ if (sc.ch == ')') {
+ sc.ForwardSetState(SCE_FORTH_DEFAULT);
+ }
+ }else if (sc.state == SCE_FORTH_IDENTIFIER || sc.state == SCE_FORTH_NUMBER) {
+ // handle numbers here too, because what we thought was a number might
+ // turn out to be a keyword e.g. 2DUP
+ if (IsASpaceChar(sc.ch) ) {
+ char s[100];
+ sc.GetCurrentLowered(s, sizeof(s));
+ int newState = sc.state == SCE_FORTH_NUMBER ? SCE_FORTH_NUMBER : SCE_FORTH_DEFAULT;
+ if (control.InList(s)) {
+ sc.ChangeState(SCE_FORTH_CONTROL);
+ } else if (keyword.InList(s)) {
+ sc.ChangeState(SCE_FORTH_KEYWORD);
+ } else if (defword.InList(s)) {
+ sc.ChangeState(SCE_FORTH_DEFWORD);
+ } else if (preword1.InList(s)) {
+ sc.ChangeState(SCE_FORTH_PREWORD1);
+ } else if (preword2.InList(s)) {
+ sc.ChangeState(SCE_FORTH_PREWORD2);
+ } else if (strings.InList(s)) {
+ sc.ChangeState(SCE_FORTH_STRING);
+ newState = SCE_FORTH_STRING;
+ }
+ sc.SetState(newState);
+ }
+ if (sc.state == SCE_FORTH_NUMBER) {
+ if (IsASpaceChar(sc.ch)) {
+ sc.SetState(SCE_FORTH_DEFAULT);
+ }
+ }
+ }else if (sc.state == SCE_FORTH_STRING) {
+ if (sc.ch == '\"') {
+ sc.ForwardSetState(SCE_FORTH_DEFAULT);
+ }
+ }else if (sc.state == SCE_FORTH_LOCALE) {
+ if (sc.ch == '}') {
+ sc.ForwardSetState(SCE_FORTH_DEFAULT);
+ }
+ }else if (sc.state == SCE_FORTH_DEFWORD) {
+ if (IsASpaceChar(sc.ch)) {
+ sc.SetState(SCE_FORTH_DEFAULT);
+ }
+ }
+
+ // Determine if a new state should be entered.
+ if (sc.state == SCE_FORTH_DEFAULT) {
+ if (sc.ch == '\\'){
+ sc.SetState(SCE_FORTH_COMMENT);
+ } else if (sc.ch == '(' &&
+ (sc.atLineStart || IsASpaceChar(sc.chPrev)) &&
+ (sc.atLineEnd || IsASpaceChar(sc.chNext))) {
+ sc.SetState(SCE_FORTH_COMMENT_ML);
+ } else if ( (sc.ch == '$' && (isascii(sc.chNext) && isxdigit(sc.chNext))) ) {
+ // number starting with $ is a hex number
+ sc.SetState(SCE_FORTH_NUMBER);
+ while(sc.More() && isascii(sc.chNext) && isxdigit(sc.chNext))
+ sc.Forward();
+ } else if ( (sc.ch == '%' && (isascii(sc.chNext) && (sc.chNext == '0' || sc.chNext == '1'))) ) {
+ // number starting with % is binary
+ sc.SetState(SCE_FORTH_NUMBER);
+ while(sc.More() && isascii(sc.chNext) && (sc.chNext == '0' || sc.chNext == '1'))
+ sc.Forward();
+ } else if ( isascii(sc.ch) &&
+ (isxdigit(sc.ch) || ((sc.ch == '.' || sc.ch == '-') && isascii(sc.chNext) && isxdigit(sc.chNext)) )
+ ){
+ sc.SetState(SCE_FORTH_NUMBER);
+ } else if (IsAWordStart(sc.ch)) {
+ sc.SetState(SCE_FORTH_IDENTIFIER);
+ } else if (sc.ch == '{') {
+ sc.SetState(SCE_FORTH_LOCALE);
+ } else if (sc.ch == ':' && isascii(sc.chNext) && isspace(sc.chNext)) {
+ // highlight word definitions e.g. : GCD ( n n -- n ) ..... ;
+ // ^ ^^^
+ sc.SetState(SCE_FORTH_DEFWORD);
+ while(sc.More() && isascii(sc.chNext) && isspace(sc.chNext))
+ sc.Forward();
+ } else if (sc.ch == ';' &&
+ (sc.atLineStart || IsASpaceChar(sc.chPrev)) &&
+ (sc.atLineEnd || IsASpaceChar(sc.chNext)) ) {
+ // mark the ';' that ends a word
+ sc.SetState(SCE_FORTH_DEFWORD);
+ sc.ForwardSetState(SCE_FORTH_DEFAULT);
+ }
+ }
+
+ }
+ sc.Complete();
}
static void FoldForthDoc(unsigned int, int, int, WordList *[],
- Accessor &) {
+ Accessor &) {
}
static const char * const forthWordLists[] = {
- "control keywords",
- "keywords",
- "definition words",
- "prewords with one argument",
- "prewords with two arguments",
- "string definition keywords",
- 0,
- };
-
-LexerModule lmForth(SCLEX_FORTH, ColouriseForthDoc, "forth",FoldForthDoc,forthWordLists);
+ "control keywords",
+ "keywords",
+ "definition words",
+ "prewords with one argument",
+ "prewords with two arguments",
+ "string definition keywords",
+ 0,
+ };
+
+LexerModule lmForth(SCLEX_FORTH, ColouriseForthDoc, "forth", FoldForthDoc, forthWordLists);
+
+