aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <devnull@localhost>2009-07-01 09:56:09 +0000
committernyamatongwe <devnull@localhost>2009-07-01 09:56:09 +0000
commit8d5a53cffef2184123db292618cbcbbaf2b002d3 (patch)
tree06dbb515849f45a525d531abb608312d7c0b9422
parent54778e84b158929f9fa76808772d7ff2e77e0fe3 (diff)
downloadscintilla-mirror-8d5a53cffef2184123db292618cbcbbaf2b002d3.tar.gz
Update to CAML lexer from Robert Roessler includes support for SMLrel-1-79
and some bug fixes.
-rw-r--r--doc/ScintillaHistory.html7
-rw-r--r--include/SciLexer.h1
-rw-r--r--include/Scintilla.iface1
-rw-r--r--src/LexCaml.cxx153
4 files changed, 109 insertions, 53 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index a907a5528..111e0e8fa 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -403,6 +403,9 @@
Batch file lexer understands variables surrounded by '!'.
</li>
<li>
+ CAML lexer also supports SML.
+ </li>
+ <li>
D lexer handles string and numeric literals more accurately. Feature #2793782.
</li>
<li>
@@ -416,6 +419,10 @@
Pascal lexer bug fixed to prevent hang when 'interface' near beginning of file. Bug #2802863.
</li>
<li>
+ Perl lexer bug fixed where previous lexical states persisted causing "/" special case styling and
+ subroutine prototype styling to not be correct. Bug #2809168.
+ </li>
+ <li>
XML lexer fixes bug where Unicode entities like '&amp;—' were broken into fragments. Bug #2804760.
</li>
<li>
diff --git a/include/SciLexer.h b/include/SciLexer.h
index f136d9931..e741ff486 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -938,6 +938,7 @@
#define SCE_CAML_OPERATOR 7
#define SCE_CAML_NUMBER 8
#define SCE_CAML_CHAR 9
+#define SCE_CAML_WHITE 10
#define SCE_CAML_STRING 11
#define SCE_CAML_COMMENT 12
#define SCE_CAML_COMMENT1 13
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index 66ad42e8a..91d19ef85 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -3090,6 +3090,7 @@ val SCE_CAML_LINENUM=6
val SCE_CAML_OPERATOR=7
val SCE_CAML_NUMBER=8
val SCE_CAML_CHAR=9
+val SCE_CAML_WHITE=10
val SCE_CAML_STRING=11
val SCE_CAML_COMMENT=12
val SCE_CAML_COMMENT1=13
diff --git a/src/LexCaml.cxx b/src/LexCaml.cxx
index 539eee0de..6570dcc07 100644
--- a/src/LexCaml.cxx
+++ b/src/LexCaml.cxx
@@ -2,7 +2,7 @@
/** @file LexCaml.cxx
** Lexer for Objective Caml.
**/
-// Copyright 2005 by Robert Roessler <robertr@rftp.com>
+// Copyright 2005-2009 by Robert Roessler <robertr@rftp.com>
// The License.txt file describes the conditions under which this software may be distributed.
/* Release History
20050204 Initial release.
@@ -15,6 +15,7 @@
20051125 Added 2nd "optional" keywords class.
20051129 Support "magic" (read-only) comments for RCaml.
20051204 Swtich to using StyleContext infrastructure.
+ 20090629 Add full Standard ML '97 support.
*/
#include <stdlib.h>
@@ -35,7 +36,6 @@
// Since the Microsoft __iscsym[f] funcs are not ANSI...
inline int iscaml(int c) {return isalnum(c) || c == '_';}
inline int iscamlf(int c) {return isalpha(c) || c == '_';}
-inline int iscamld(int c) {return isdigit(c) || c == '_';}
static const int baseT[24] = {
0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
@@ -179,25 +179,27 @@ void ColouriseCamlDoc(
{
// initialize styler
StyleContext sc(startPos, length, initStyle, styler);
- // set up [initial] state info (terminating states that shouldn't "bleed")
- int nesting = 0;
- if (sc.state < SCE_CAML_STRING)
- sc.state = SCE_CAML_DEFAULT;
- if (sc.state >= SCE_CAML_COMMENT)
- nesting = (sc.state & 0x0f) - SCE_CAML_COMMENT;
int chBase = 0, chToken = 0, chLit = 0;
WordList& keywords = *keywordlists[0];
WordList& keywords2 = *keywordlists[1];
WordList& keywords3 = *keywordlists[2];
+ const bool isSML = keywords.InList("andalso");
const int useMagic = styler.GetPropertyInt("lexer.caml.magic", 0);
+ // set up [initial] state info (terminating states that shouldn't "bleed")
+ const int state_ = sc.state & 0x0f;
+ if (state_ <= SCE_CAML_CHAR
+ || (isSML && state_ == SCE_CAML_STRING))
+ sc.state = SCE_CAML_DEFAULT;
+ int nesting = (state_ >= SCE_CAML_COMMENT)? (state_ - SCE_CAML_COMMENT): 0;
+
// foreach char in range...
while (sc.More()) {
// set up [per-char] state info
- int state2 = -1; // (ASSUME no state change)
+ int state2 = -1; // (ASSUME no state change)
int chColor = sc.currentPos - 1;// (ASSUME standard coloring range)
- bool advance = true; // (ASSUME scanner "eats" 1 char)
+ bool advance = true; // (ASSUME scanner "eats" 1 char)
// step state machine
switch (sc.state & 0x0f) {
@@ -206,25 +208,38 @@ void ColouriseCamlDoc(
// it's wide open; what do we have?
if (iscamlf(sc.ch))
state2 = SCE_CAML_IDENTIFIER;
- else if (sc.Match('`') && iscamlf(sc.chNext))
+ else if (!isSML && sc.Match('`') && iscamlf(sc.chNext))
state2 = SCE_CAML_TAGNAME;
- else if (sc.Match('#') && isdigit(sc.chNext))
+ else if (!isSML && sc.Match('#') && isdigit(sc.chNext))
state2 = SCE_CAML_LINENUM;
else if (isdigit(sc.ch)) {
+ // it's a number, assume base 10
state2 = SCE_CAML_NUMBER, chBase = 10;
- if (sc.Match('0') && strchr("bBoOxX", sc.chNext))
- chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward();
- } else if (sc.Match('\'')) /* (char literal?) */
+ if (sc.Match('0')) {
+ // there MAY be a base specified...
+ const char* baseC = "bBoOxX";
+ if (isSML) {
+ if (sc.chNext == 'w')
+ sc.Forward(); // (consume SML "word" indicator)
+ baseC = "x";
+ }
+ // ... change to specified base AS REQUIRED
+ if (strchr(baseC, sc.chNext))
+ chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward();
+ }
+ } else if (!isSML && sc.Match('\'')) // (Caml char literal?)
state2 = SCE_CAML_CHAR, chLit = 0;
- else if (sc.Match('\"'))
+ else if (isSML && sc.Match('#', '"')) // (SML char literal?)
+ state2 = SCE_CAML_CHAR, sc.Forward();
+ else if (sc.Match('"'))
state2 = SCE_CAML_STRING;
else if (sc.Match('(', '*'))
- state2 = SCE_CAML_COMMENT,
- sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)
- sc.Forward();
- else if (strchr("!?~" /* Caml "prefix-symbol" */
- "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
- "()[]{};,:.#", sc.ch)) /* Caml "bracket" or ;,:.# */
+ state2 = SCE_CAML_COMMENT, sc.Forward(), sc.ch = ' '; // (*)...
+ else if (strchr("!?~" /* Caml "prefix-symbol" */
+ "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
+ "()[]{};,:.#", sc.ch) // Caml "bracket" or ;,:.#
+ // SML "extra" ident chars
+ || (isSML && (sc.Match('\\') || sc.Match('`'))))
state2 = SCE_CAML_OPERATOR;
break;
@@ -273,9 +288,12 @@ void ColouriseCamlDoc(
case SCE_CAML_OPERATOR: {
// [try to] interpret as [additional] operator char
const char* o = 0;
- if (iscaml(sc.ch) || isspace(sc.ch) /* ident or whitespace */
- || (o = strchr(")]};,\'\"`#", sc.ch),o)/* "termination" chars */
- || !strchr("!$%&*+-./:<=>?@^|~", sc.ch)/* "operator" chars */) {
+ if (iscaml(sc.ch) || isspace(sc.ch) // ident or whitespace
+ || (o = strchr(")]};,\'\"#", sc.ch),o) // "termination" chars
+ || (!isSML && sc.Match('`')) // Caml extra term char
+ || (!strchr("!$%&*+-./:<=>?@^|~", sc.ch)// "operator" chars
+ // SML extra ident chars
+ && !(isSML && (sc.Match('\\') || sc.Match('`'))))) {
// check for INCLUSIVE termination
if (o && strchr(")]};,", sc.ch)) {
if ((sc.Match(')') && sc.chPrev == '(')
@@ -292,24 +310,27 @@ void ColouriseCamlDoc(
case SCE_CAML_NUMBER:
// [try to] interpret as [additional] numeric literal char
- // N.B. - improperly accepts "extra" digits in base 2 or 8 literals
- if (iscamld(sc.ch) || IsADigit(sc.ch, chBase))
+ if ((!isSML && sc.Match('_')) || IsADigit(sc.ch, chBase))
break;
// how about an integer suffix?
- if ((sc.Match('l') || sc.Match('L') || sc.Match('n'))
- && (iscamld(sc.chPrev) || IsADigit(sc.chPrev, chBase)))
+ if (!isSML && (sc.Match('l') || sc.Match('L') || sc.Match('n'))
+ && (sc.chPrev == '_' || IsADigit(sc.chPrev, chBase)))
break;
// or a floating-point literal?
if (chBase == 10) {
// with a decimal point?
- if (sc.Match('.') && iscamld(sc.chPrev))
+ if (sc.Match('.')
+ && ((!isSML && sc.chPrev == '_')
+ || IsADigit(sc.chPrev, chBase)))
break;
// with an exponent? (I)
if ((sc.Match('e') || sc.Match('E'))
- && (iscamld(sc.chPrev) || sc.chPrev == '.'))
+ && ((!isSML && (sc.chPrev == '.' || sc.chPrev == '_'))
+ || IsADigit(sc.chPrev, chBase)))
break;
// with an exponent? (II)
- if ((sc.Match('+') || sc.Match('-'))
+ if (((!isSML && (sc.Match('+') || sc.Match('-')))
+ || (isSML && sc.Match('~')))
&& (sc.chPrev == 'e' || sc.chPrev == 'E'))
break;
}
@@ -318,29 +339,56 @@ void ColouriseCamlDoc(
break;
case SCE_CAML_CHAR:
- // [try to] interpret as [additional] char literal char
- if (sc.Match('\\')) {
- chLit = 1; // (definitely IS a char literal)
- if (sc.chPrev == '\\')
- sc.ch = ' '; // (so termination test isn't fooled)
+ if (!isSML) {
+ // [try to] interpret as [additional] char literal char
+ if (sc.Match('\\')) {
+ chLit = 1; // (definitely IS a char literal)
+ if (sc.chPrev == '\\')
+ sc.ch = ' '; // (...\\')
+ // should we be terminating - one way or another?
+ } else if ((sc.Match('\'') && sc.chPrev != '\\')
+ || sc.atLineEnd) {
+ state2 = SCE_CAML_DEFAULT;
+ if (sc.Match('\''))
+ chColor++;
+ else
+ sc.ChangeState(SCE_CAML_IDENTIFIER);
+ // ... maybe a char literal, maybe not
+ } else if (chLit < 1 && sc.currentPos - chToken >= 2)
+ sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false;
+ break;
+ }/* else
+ // fall through for SML char literal (handle like string) */
+
+ case SCE_CAML_STRING:
+ // [try to] interpret as [additional] [SML char/] string literal char
+ if (isSML && sc.Match('\\') && sc.chPrev != '\\' && isspace(sc.chNext))
+ state2 = SCE_CAML_WHITE;
+ else if (sc.Match('\\') && sc.chPrev == '\\')
+ sc.ch = ' '; // (...\\")
// should we be terminating - one way or another?
- } else if ((sc.Match('\'') && sc.chPrev != '\\') || sc.atLineEnd) {
+ else if ((sc.Match('"') && sc.chPrev != '\\')
+ || (isSML && sc.atLineEnd)) {
state2 = SCE_CAML_DEFAULT;
- if (sc.Match('\''))
+ if (sc.Match('"'))
chColor++;
- else
- sc.ChangeState(SCE_CAML_IDENTIFIER);
- // ... maybe a char literal, maybe not
- } else if (chLit < 1 && sc.currentPos - chToken >= 2)
- sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false;
+ }
break;
- case SCE_CAML_STRING:
- // [try to] interpret as [additional] string literal char
- if (sc.Match('\\') && sc.chPrev == '\\')
- sc.ch = ' '; // (so '\\' doesn't cause us trouble)
- else if (sc.Match('\"') && sc.chPrev != '\\')
- state2 = SCE_CAML_DEFAULT, chColor++;
+ case SCE_CAML_WHITE:
+ // [try to] interpret as [additional] SML embedded whitespace char
+ if (sc.Match('\\')) {
+ // style this puppy NOW...
+ state2 = SCE_CAML_STRING, sc.ch = ' ' /* (...\") */, chColor++,
+ styler.ColourTo(chColor, SCE_CAML_WHITE), styler.Flush();
+ // ... then backtrack to determine original SML literal type
+ int p = chColor - 2;
+ for (; p >= 0 && styler.StyleAt(p) == SCE_CAML_WHITE; p--) ;
+ if (p >= 0)
+ state2 = static_cast<int>(styler.StyleAt(p));
+ // take care of state change NOW
+ sc.ChangeState(state2), state2 = -1;
+ }
break;
case SCE_CAML_COMMENT:
@@ -350,8 +398,7 @@ void ColouriseCamlDoc(
// we're IN a comment - does this start a NESTED comment?
if (sc.Match('(', '*'))
state2 = sc.state + 1, chToken = sc.currentPos,
- sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)
- sc.Forward(), nesting++;
+ sc.Forward(), sc.ch = ' ' /* (*)... */, nesting++;
// [try to] interpret as [additional] comment char
else if (sc.Match(')') && sc.chPrev == '*') {
if (nesting)
@@ -366,7 +413,7 @@ void ColouriseCamlDoc(
break;
}
- // handle state change and char coloring as required
+ // handle state change and char coloring AS REQUIRED
if (state2 >= 0)
styler.ColourTo(chColor, sc.state), sc.ChangeState(state2);
// move to next char UNLESS re-scanning current char