diff options
| -rw-r--r-- | doc/ScintillaHistory.html | 7 | ||||
| -rw-r--r-- | include/SciLexer.h | 1 | ||||
| -rw-r--r-- | include/Scintilla.iface | 1 | ||||
| -rw-r--r-- | src/LexCaml.cxx | 153 | 
4 files changed, 109 insertions, 53 deletions
| diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index a907a5528..111e0e8fa 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -403,6 +403,9 @@  	Batch file lexer understands variables surrounded by '!'.  	</li>  	<li> +	CAML lexer also supports SML. +	</li> +	<li>  	D lexer handles string and numeric literals more accurately. Feature #2793782.  	</li>  	<li> @@ -416,6 +419,10 @@  	Pascal lexer bug fixed to prevent hang when 'interface' near beginning of file. Bug #2802863.  	</li>  	<li> +	Perl lexer bug fixed where previous lexical states persisted causing "/" special case styling and +	subroutine prototype styling to not be correct. Bug #2809168. +	</li> +	<li>  	XML lexer fixes bug where Unicode entities like '&—' were broken into fragments. Bug #2804760.  	</li>  	<li> diff --git a/include/SciLexer.h b/include/SciLexer.h index f136d9931..e741ff486 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -938,6 +938,7 @@  #define SCE_CAML_OPERATOR 7  #define SCE_CAML_NUMBER 8  #define SCE_CAML_CHAR 9 +#define SCE_CAML_WHITE 10  #define SCE_CAML_STRING 11  #define SCE_CAML_COMMENT 12  #define SCE_CAML_COMMENT1 13 diff --git a/include/Scintilla.iface b/include/Scintilla.iface index 66ad42e8a..91d19ef85 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -3090,6 +3090,7 @@ val SCE_CAML_LINENUM=6  val SCE_CAML_OPERATOR=7  val SCE_CAML_NUMBER=8  val SCE_CAML_CHAR=9 +val SCE_CAML_WHITE=10  val SCE_CAML_STRING=11  val SCE_CAML_COMMENT=12  val SCE_CAML_COMMENT1=13 diff --git a/src/LexCaml.cxx b/src/LexCaml.cxx index 539eee0de..6570dcc07 100644 --- a/src/LexCaml.cxx +++ b/src/LexCaml.cxx @@ -2,7 +2,7 @@  /** @file LexCaml.cxx   ** Lexer for Objective Caml.   **/ -// Copyright 2005 by Robert Roessler <robertr@rftp.com> +// Copyright 2005-2009 by Robert Roessler <robertr@rftp.com>  // The License.txt file describes the conditions under which this software may be distributed.  /*	Release History  	20050204 Initial release. @@ -15,6 +15,7 @@  	20051125 Added 2nd "optional" keywords class.  	20051129 Support "magic" (read-only) comments for RCaml.  	20051204 Swtich to using StyleContext infrastructure. +	20090629 Add full Standard ML '97 support.  */  #include <stdlib.h> @@ -35,7 +36,6 @@  //	Since the Microsoft __iscsym[f] funcs are not ANSI...  inline int  iscaml(int c) {return isalnum(c) || c == '_';}  inline int iscamlf(int c) {return isalpha(c) || c == '_';} -inline int iscamld(int c) {return isdigit(c) || c == '_';}  static const int baseT[24] = {  	0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* A - L */ @@ -179,25 +179,27 @@ void ColouriseCamlDoc(  {  	// initialize styler  	StyleContext sc(startPos, length, initStyle, styler); -	// set up [initial] state info (terminating states that shouldn't "bleed") -	int nesting = 0; -	if (sc.state < SCE_CAML_STRING) -		sc.state = SCE_CAML_DEFAULT; -	if (sc.state >= SCE_CAML_COMMENT) -		nesting = (sc.state & 0x0f) - SCE_CAML_COMMENT;  	int chBase = 0, chToken = 0, chLit = 0;  	WordList& keywords  = *keywordlists[0];  	WordList& keywords2 = *keywordlists[1];  	WordList& keywords3 = *keywordlists[2]; +	const bool isSML = keywords.InList("andalso");  	const int useMagic = styler.GetPropertyInt("lexer.caml.magic", 0); +	// set up [initial] state info (terminating states that shouldn't "bleed") +	const int state_ = sc.state & 0x0f; +	if (state_ <= SCE_CAML_CHAR +		|| (isSML && state_ == SCE_CAML_STRING)) +		sc.state = SCE_CAML_DEFAULT; +	int nesting = (state_ >= SCE_CAML_COMMENT)? (state_ - SCE_CAML_COMMENT): 0; +  	// foreach char in range...  	while (sc.More()) {  		// set up [per-char] state info -		int state2 = -1;		// (ASSUME no state change) +		int state2 = -1;				// (ASSUME no state change)  		int chColor = sc.currentPos - 1;// (ASSUME standard coloring range) -		bool advance = true;	// (ASSUME scanner "eats" 1 char) +		bool advance = true;			// (ASSUME scanner "eats" 1 char)  		// step state machine  		switch (sc.state & 0x0f) { @@ -206,25 +208,38 @@ void ColouriseCamlDoc(  			// it's wide open; what do we have?  			if (iscamlf(sc.ch))  				state2 = SCE_CAML_IDENTIFIER; -			else if (sc.Match('`') && iscamlf(sc.chNext)) +			else if (!isSML && sc.Match('`') && iscamlf(sc.chNext))  				state2 = SCE_CAML_TAGNAME; -			else if (sc.Match('#') && isdigit(sc.chNext)) +			else if (!isSML && sc.Match('#') && isdigit(sc.chNext))  				state2 = SCE_CAML_LINENUM;  			else if (isdigit(sc.ch)) { +				// it's a number, assume base 10  				state2 = SCE_CAML_NUMBER, chBase = 10; -				if (sc.Match('0') && strchr("bBoOxX", sc.chNext)) -					chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward(); -			} else if (sc.Match('\''))	/* (char literal?) */ +				if (sc.Match('0')) { +					// there MAY be a base specified... +					const char* baseC = "bBoOxX"; +					if (isSML) { +						if (sc.chNext == 'w') +							sc.Forward();	// (consume SML "word" indicator) +						baseC = "x"; +					} +					// ... change to specified base AS REQUIRED +					if (strchr(baseC, sc.chNext)) +						chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward(); +				} +			} else if (!isSML && sc.Match('\''))	// (Caml char literal?)  				state2 = SCE_CAML_CHAR, chLit = 0; -			else if (sc.Match('\"')) +			else if (isSML && sc.Match('#', '"'))	// (SML char literal?) +				state2 = SCE_CAML_CHAR, sc.Forward(); +			else if (sc.Match('"'))  				state2 = SCE_CAML_STRING;  			else if (sc.Match('(', '*')) -				state2 = SCE_CAML_COMMENT, -					sc.ch = ' ',	// (make SURE "(*)" isn't seen as a closed comment) -					sc.Forward(); -			else if (strchr("!?~"		/* Caml "prefix-symbol" */ -					"=<>@^|&+-*/$%"		/* Caml "infix-symbol" */ -					"()[]{};,:.#", sc.ch))	/* Caml "bracket" or ;,:.# */ +				state2 = SCE_CAML_COMMENT, sc.Forward(), sc.ch = ' '; // (*)... +			else if (strchr("!?~"			/* Caml "prefix-symbol" */ +					"=<>@^|&+-*/$%"			/* Caml "infix-symbol" */ +					"()[]{};,:.#", sc.ch)	// Caml "bracket" or ;,:.# +											// SML "extra" ident chars +				|| (isSML && (sc.Match('\\') || sc.Match('`'))))  				state2 = SCE_CAML_OPERATOR;  			break; @@ -273,9 +288,12 @@ void ColouriseCamlDoc(  		case SCE_CAML_OPERATOR: {  			// [try to] interpret as [additional] operator char  			const char* o = 0; -			if (iscaml(sc.ch) || isspace(sc.ch)		   /* ident or whitespace */ -				|| (o = strchr(")]};,\'\"`#", sc.ch),o)/* "termination" chars */ -				|| !strchr("!$%&*+-./:<=>?@^|~", sc.ch)/* "operator" chars */) { +			if (iscaml(sc.ch) || isspace(sc.ch)			// ident or whitespace +				|| (o = strchr(")]};,\'\"#", sc.ch),o)	// "termination" chars +				|| (!isSML && sc.Match('`'))			// Caml extra term char +				|| (!strchr("!$%&*+-./:<=>?@^|~", sc.ch)// "operator" chars +														// SML extra ident chars +					&& !(isSML && (sc.Match('\\') || sc.Match('`'))))) {  				// check for INCLUSIVE termination  				if (o && strchr(")]};,", sc.ch)) {  					if ((sc.Match(')') && sc.chPrev == '(') @@ -292,24 +310,27 @@ void ColouriseCamlDoc(  		case SCE_CAML_NUMBER:  			// [try to] interpret as [additional] numeric literal char -			// N.B. - improperly accepts "extra" digits in base 2 or 8 literals -			if (iscamld(sc.ch) || IsADigit(sc.ch, chBase)) +			if ((!isSML && sc.Match('_')) || IsADigit(sc.ch, chBase))  				break;  			// how about an integer suffix? -			if ((sc.Match('l') || sc.Match('L') || sc.Match('n')) -				&& (iscamld(sc.chPrev) || IsADigit(sc.chPrev, chBase))) +			if (!isSML && (sc.Match('l') || sc.Match('L') || sc.Match('n')) +				&& (sc.chPrev == '_' || IsADigit(sc.chPrev, chBase)))  				break;  			// or a floating-point literal?  			if (chBase == 10) {  				// with a decimal point? -				if (sc.Match('.') && iscamld(sc.chPrev)) +				if (sc.Match('.') +					&& ((!isSML && sc.chPrev == '_') +						|| IsADigit(sc.chPrev, chBase)))  					break;  				// with an exponent? (I)  				if ((sc.Match('e') || sc.Match('E')) -					&& (iscamld(sc.chPrev) || sc.chPrev == '.')) +					&& ((!isSML && (sc.chPrev == '.' || sc.chPrev == '_')) +						|| IsADigit(sc.chPrev, chBase)))  					break;  				// with an exponent? (II) -				if ((sc.Match('+') || sc.Match('-')) +				if (((!isSML && (sc.Match('+') || sc.Match('-'))) +						|| (isSML && sc.Match('~')))  					&& (sc.chPrev == 'e' || sc.chPrev == 'E'))  					break;  			} @@ -318,29 +339,56 @@ void ColouriseCamlDoc(  			break;  		case SCE_CAML_CHAR: -			// [try to] interpret as [additional] char literal char -			if (sc.Match('\\')) { -				chLit = 1;	// (definitely IS a char literal) -				if (sc.chPrev == '\\') -					sc.ch = ' ';	// (so termination test isn't fooled) +			if (!isSML) { +				// [try to] interpret as [additional] char literal char +				if (sc.Match('\\')) { +					chLit = 1;	// (definitely IS a char literal) +					if (sc.chPrev == '\\') +						sc.ch = ' ';	// (...\\') +				// should we be terminating - one way or another? +				} else if ((sc.Match('\'') && sc.chPrev != '\\') +					|| sc.atLineEnd) { +					state2 = SCE_CAML_DEFAULT; +					if (sc.Match('\'')) +						chColor++; +					else +						sc.ChangeState(SCE_CAML_IDENTIFIER); +				// ... maybe a char literal, maybe not +				} else if (chLit < 1 && sc.currentPos - chToken >= 2) +					sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false; +				break; +			}/* else +				// fall through for SML char literal (handle like string) */ + +		case SCE_CAML_STRING: +			// [try to] interpret as [additional] [SML char/] string literal char +			if (isSML && sc.Match('\\') && sc.chPrev != '\\' && isspace(sc.chNext)) +				state2 = SCE_CAML_WHITE; +			else if (sc.Match('\\') && sc.chPrev == '\\') +				sc.ch = ' ';	// (...\\")  			// should we be terminating - one way or another? -			} else if ((sc.Match('\'') && sc.chPrev != '\\') || sc.atLineEnd) { +			else if ((sc.Match('"') && sc.chPrev != '\\') +				|| (isSML && sc.atLineEnd)) {  				state2 = SCE_CAML_DEFAULT; -				if (sc.Match('\'')) +				if (sc.Match('"'))  					chColor++; -				else -					sc.ChangeState(SCE_CAML_IDENTIFIER); -			// ... maybe a char literal, maybe not -			} else if (chLit < 1 && sc.currentPos - chToken >= 2) -				sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false; +			}  			break; -		case SCE_CAML_STRING: -			// [try to] interpret as [additional] string literal char -			if (sc.Match('\\') && sc.chPrev == '\\') -				sc.ch = ' ';	// (so '\\' doesn't cause us trouble) -			else if (sc.Match('\"') && sc.chPrev != '\\') -				state2 = SCE_CAML_DEFAULT, chColor++; +		case SCE_CAML_WHITE: +			// [try to] interpret as [additional] SML embedded whitespace char +			if (sc.Match('\\')) { +				// style this puppy NOW... +				state2 = SCE_CAML_STRING, sc.ch = ' ' /* (...\") */, chColor++, +					styler.ColourTo(chColor, SCE_CAML_WHITE), styler.Flush(); +				// ... then backtrack to determine original SML literal type +				int p = chColor - 2; +				for (; p >= 0 && styler.StyleAt(p) == SCE_CAML_WHITE; p--) ; +				if (p >= 0) +					state2 = static_cast<int>(styler.StyleAt(p)); +				// take care of state change NOW +				sc.ChangeState(state2), state2 = -1; +			}  			break;  		case SCE_CAML_COMMENT: @@ -350,8 +398,7 @@ void ColouriseCamlDoc(  			// we're IN a comment - does this start a NESTED comment?  			if (sc.Match('(', '*'))  				state2 = sc.state + 1, chToken = sc.currentPos, -					sc.ch = ' ',	// (make SURE "(*)" isn't seen as a closed comment) -					sc.Forward(), nesting++; +					sc.Forward(), sc.ch = ' ' /* (*)... */, nesting++;  			// [try to] interpret as [additional] comment char  			else if (sc.Match(')') && sc.chPrev == '*') {  				if (nesting) @@ -366,7 +413,7 @@ void ColouriseCamlDoc(  			break;  		} -		// handle state change and char coloring as required +		// handle state change and char coloring AS REQUIRED  		if (state2 >= 0)  			styler.ColourTo(chColor, sc.state), sc.ChangeState(state2);  		// move to next char UNLESS re-scanning current char | 
