diff options
author | rdaneelolivaw <unknown> | 2005-12-13 04:33:02 +0000 |
---|---|---|
committer | rdaneelolivaw <unknown> | 2005-12-13 04:33:02 +0000 |
commit | 6ac21186c4c1def57af868aa7f75dcd98e4ab455 (patch) | |
tree | d6385b0def6278e44850502e51216167f99fbeca /src | |
parent | a1c4062415cb852db67910f63ca3c68a499c9a5b (diff) | |
download | scintilla-mirror-6ac21186c4c1def57af868aa7f75dcd98e4ab455.tar.gz |
RBR - add new keyword class, support "magic" read-only comments for RCaml, and switch to StyleContext infrastructure
Diffstat (limited to 'src')
-rw-r--r-- | src/LexCaml.cxx | 225 |
1 files changed, 113 insertions, 112 deletions
diff --git a/src/LexCaml.cxx b/src/LexCaml.cxx index 2d11ad6cc..e424bec9b 100644 --- a/src/LexCaml.cxx +++ b/src/LexCaml.cxx @@ -12,6 +12,9 @@ 20050306 Fix for 1st-char-in-doc "corner" case. 20050502 Fix for [harmless] one-past-the-end coloring. 20050515 Refined numeric token recognition logic. + 20051125 Added 2nd "optional" keywords class. + 20051129 Support "magic" (read-only) comments for RCaml. + 20051204 Swtich to using StyleContext infrastructure. */ #include <stdlib.h> @@ -34,6 +37,11 @@ inline int iscaml(int c) {return isalnum(c) || c == '_';} inline int iscamlf(int c) {return isalpha(c) || c == '_';} inline int iscamld(int c) {return isdigit(c) || c == '_';} +static const int baseT[24] = { + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */ + 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */ +}; + #ifdef BUILD_AS_EXTERNAL_LEXER /* (actually seems to work!) @@ -166,125 +174,113 @@ void ColouriseCamlDoc( Accessor &styler) { // initialize styler - styler.StartAt(startPos); - styler.StartSegment(startPos); + StyleContext sc(startPos, length, initStyle, styler); // set up [initial] state info (terminating states that shouldn't "bleed") - int state = initStyle, nesting = 0; - if (state < SCE_CAML_STRING) - state = SCE_CAML_DEFAULT; - if (state >= SCE_CAML_COMMENT) - nesting = state - SCE_CAML_COMMENT; - int chLast = startPos? static_cast<unsigned char>(styler[startPos - 1]): ' '; - int chNext = static_cast<unsigned char>(styler[startPos]); - - int chBase = 'd', chToken = 0, chLit = 0, chSkip; - WordList& keywords = *keywordlists[0]; + int nesting = 0; + if (sc.state < SCE_CAML_STRING) + sc.state = SCE_CAML_DEFAULT; + if (sc.state >= SCE_CAML_COMMENT) + nesting = (sc.state & 0x0f) - SCE_CAML_COMMENT; + + int chBase = 0, chToken = 0, chLit = 0; + WordList& keywords = *keywordlists[0]; WordList& keywords2 = *keywordlists[1]; + WordList& keywords3 = *keywordlists[2]; + const int useMagic = styler.GetPropertyInt("lexer.caml.magic", 0); // foreach char in range... - unsigned int i = startPos; - const unsigned int endPos = startPos + length; - for (; i < endPos; i += chSkip) { + while (sc.More()) { // set up [per-char] state info - int ch = chNext; - chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1)); - int state2 = -1; // (ASSUME no state change) - int chColor = i - 1;// (ASSUME standard coloring range) - chSkip = 1; // (ASSUME scanner "eats" 1 char) - - // this may be the correct thing to do... or not - if (styler.IsLeadByte(static_cast<char>(ch))) { - chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2)), - chSkip++; - continue; - } + int state2 = -1; // (ASSUME no state change) + int chColor = sc.currentPos - 1;// (ASSUME standard coloring range) + bool advance = true; // (ASSUME scanner "eats" 1 char) // step state machine - switch (state) { + switch (sc.state & 0x0f) { case SCE_CAML_DEFAULT: + chToken = sc.currentPos; // save [possible] token start (JIC) // it's wide open; what do we have? - if (iscamlf(ch)) - state2 = SCE_CAML_IDENTIFIER, chToken = i; - else if (ch == '`') - state2 = SCE_CAML_TAGNAME, chToken = i; - else if (ch == '#' && isdigit(chNext)) - state2 = SCE_CAML_LINENUM, chToken = i; - else if (isdigit(ch)) { - state2 = SCE_CAML_NUMBER, - chBase = strchr("xXoObB", chNext)? chNext: 'd'; - if (chBase != 'd') - ch = chNext, - chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2)), - chSkip++; - } else if (ch == '\'') /* (char literal?) */ - state2 = SCE_CAML_CHAR, chToken = i, chLit = 0; - else if (ch == '\"') + if (iscamlf(sc.ch)) + state2 = SCE_CAML_IDENTIFIER; + else if (sc.Match('`')) + state2 = SCE_CAML_TAGNAME; + else if (sc.Match('#') && isdigit(sc.chNext)) + state2 = SCE_CAML_LINENUM; + else if (isdigit(sc.ch)) { + state2 = SCE_CAML_NUMBER, chBase = 10; + if (sc.ch == '0' && strchr("bBoOxX", sc.chNext)) + chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward(); + } else if (sc.Match('\'')) /* (char literal?) */ + state2 = SCE_CAML_CHAR, chLit = 0; + else if (sc.Match('\"')) state2 = SCE_CAML_STRING; - else if (ch == '(' && chNext == '*') + else if (sc.Match('(', '*')) state2 = SCE_CAML_COMMENT, - ch = ' ', // (make SURE "(*)" isn't seen as a closed comment) - chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2)), - chSkip++, nesting = 0; + sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment) + sc.Forward(); else if (strchr("!?~" /* Caml "prefix-symbol" */ "=<>@^|&+-*/$%" /* Caml "infix-symbol" */ - "()[]{};,:.#", ch)) /* Caml "bracket" or ;,:.# */ - state2 = SCE_CAML_OPERATOR, chToken = i; + "()[]{};,:.#", sc.ch)) /* Caml "bracket" or ;,:.# */ + state2 = SCE_CAML_OPERATOR; break; case SCE_CAML_IDENTIFIER: // [try to] interpret as [additional] identifier char - if (!(iscaml(ch) || ch == '\'')) { - const int n = i - chToken; + if (!(iscaml(sc.ch) || sc.Match('\''))) { + const int n = sc.currentPos - chToken; if (n < 24) { // length is believable as keyword, [re-]construct token char t[24]; - int p = 0; - for (int q = chToken; p < n; p++, q++) - t[p] = styler[q]; - t[p] = '\0'; + for (int i = -n; i < 0; i++) + t[n + i] = sc.GetRelative(i); + t[n] = '\0'; // special-case "_" token as KEYWORD - if ((n == 1 && chLast == '_') || keywords.InList(t)) - state = SCE_CAML_KEYWORD; + if ((n == 1 && sc.chPrev == '_') || keywords.InList(t)) + sc.ChangeState(SCE_CAML_KEYWORD); else if (keywords2.InList(t)) - state = SCE_CAML_KEYWORD2; + sc.ChangeState(SCE_CAML_KEYWORD2); + else if (keywords3.InList(t)) + sc.ChangeState(SCE_CAML_KEYWORD3); } - state2 = SCE_CAML_DEFAULT, chNext = ch, chSkip--; + state2 = SCE_CAML_DEFAULT, advance = false; } break; case SCE_CAML_TAGNAME: // [try to] interpret as [additional] tagname char - if (!(iscaml(ch) || ch == '\'')) - state2 = SCE_CAML_DEFAULT, chNext = ch, chSkip--; + if (!(iscaml(sc.ch) || sc.Match('\''))) + state2 = SCE_CAML_DEFAULT, advance = false; break; /*case SCE_CAML_KEYWORD: case SCE_CAML_KEYWORD2: + case SCE_CAML_KEYWORD3: // [try to] interpret as [additional] keyword char if (!iscaml(ch)) - state2 = SCE_CAML_DEFAULT, chNext = ch, chSkip--; + state2 = SCE_CAML_DEFAULT, advance = false; break;*/ case SCE_CAML_LINENUM: // [try to] interpret as [additional] linenum directive char - if (!isdigit(ch)) - state2 = SCE_CAML_DEFAULT, chNext = ch, chSkip--; + if (!isdigit(sc.ch)) + state2 = SCE_CAML_DEFAULT, advance = false; break; case SCE_CAML_OPERATOR: { // [try to] interpret as [additional] operator char const char* o = 0; - if (iscaml(ch) || isspace(ch) /* ident or whitespace */ - || ((o = strchr(")]};,\'\"`#", ch)) != 0)/* "termination" chars */ - || !strchr("!$%&*+-./:<=>?@^|~", ch)/* "operator" chars */) { + if (iscaml(sc.ch) || isspace(sc.ch) /* ident or whitespace */ + || (o = strchr(")]};,\'\"`#", sc.ch),o)/* "termination" chars */ + || !strchr("!$%&*+-./:<=>?@^|~", sc.ch)/* "operator" chars */) { // check for INCLUSIVE termination - if (o && strchr(")]};,", ch)) { - if ((ch == ')' && chLast == '(') || (ch == ']' && chLast == '[')) + if (o && strchr(")]};,", sc.ch)) { + if ((sc.Match(')') && sc.chPrev == '(') + || (sc.Match(']') && sc.chPrev == '[')) // special-case "()" and "[]" tokens as KEYWORDS - state = SCE_CAML_KEYWORD; + sc.ChangeState(SCE_CAML_KEYWORD); chColor++; } else - chNext = ch, chSkip--; + advance = false; state2 = SCE_CAML_DEFAULT; } break; @@ -293,85 +289,89 @@ void ColouriseCamlDoc( case SCE_CAML_NUMBER: // [try to] interpret as [additional] numeric literal char // N.B. - improperly accepts "extra" digits in base 2 or 8 literals - if (iscamld(ch) || ((chBase == 'x' || chBase == 'X') && isxdigit(ch))) + if (iscamld(sc.ch) || IsADigit(sc.ch, chBase)) break; // how about an integer suffix? - if ((ch == 'l' || ch == 'L' || ch == 'n')&& (iscamld(chLast) - || ((chBase == 'x' || chBase == 'X') && isxdigit(chLast)))) + if ((sc.Match('l') || sc.Match('L') || sc.Match('n')) + && (iscamld(sc.chPrev) || IsADigit(sc.chPrev, chBase))) break; // or a floating-point literal? - if (chBase == 'd') { + if (chBase == 10) { // with a decimal point? - if (ch == '.' && iscamld(chLast)) + if (sc.Match('.') && iscamld(sc.chPrev)) break; // with an exponent? (I) - if ((ch == 'e' || ch == 'E') && (iscamld(chLast) || chLast == '.')) + if ((sc.Match('e') || sc.Match('E')) + && (iscamld(sc.chPrev) || sc.chPrev == '.')) break; // with an exponent? (II) - if ((ch == '+' || ch == '-') && (chLast == 'e' || chLast == 'E')) + if ((sc.Match('+') || sc.Match('-')) + && (sc.chPrev == 'e' || sc.chPrev == 'E')) break; } // it looks like we have run out of number - state2 = SCE_CAML_DEFAULT, chNext = ch, chSkip--; + state2 = SCE_CAML_DEFAULT, advance = false; break; case SCE_CAML_CHAR: // [try to] interpret as [additional] char literal char - if (ch == '\\') { + if (sc.Match('\\')) { chLit = 1; // (definitely IS a char literal) - if (chLast == '\\') - ch = ' '; // (so termination test isn't fooled) + if (sc.chPrev == '\\') + sc.ch = ' '; // (so termination test isn't fooled) // should we be terminating - one way or another? - } else if ((ch == '\'' && chLast != '\\') || ch == '\r' || ch == '\n') { + } else if ((sc.Match('\'') && sc.chPrev != '\\') || sc.atLineEnd) { state2 = SCE_CAML_DEFAULT; - if (ch == '\'') + if (sc.Match('\'')) chColor++; else - state = SCE_CAML_IDENTIFIER; + sc.ChangeState(SCE_CAML_IDENTIFIER); // ... maybe a char literal, maybe not - } else if (chLit < 1 && i - chToken >= 2) - state = SCE_CAML_IDENTIFIER, chNext = ch, chSkip--; + } else if (chLit < 1 && sc.currentPos - chToken >= 2) + sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false; break; case SCE_CAML_STRING: // [try to] interpret as [additional] string literal char - if (ch == '\\' && chLast == '\\') - ch = ' '; // (so '\\' doesn't cause us trouble) - else if (ch == '\"' && chLast != '\\') + if (sc.Match('\\') && sc.chPrev == '\\') + sc.ch = ' '; // (so '\\' doesn't cause us trouble) + else if (sc.Match('\"') && sc.chPrev != '\\') state2 = SCE_CAML_DEFAULT, chColor++; break; case SCE_CAML_COMMENT: - case SCE_CAML_COMMENT+1: - case SCE_CAML_COMMENT+2: - case SCE_CAML_COMMENT+3: + case SCE_CAML_COMMENT1: + case SCE_CAML_COMMENT2: + case SCE_CAML_COMMENT3: // we're IN a comment - does this start a NESTED comment? - if (ch == '(' && chNext == '*') - state2 = state + 1, - ch = ' ', // (make SURE "(*)" isn't seen as a closed comment) - chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2)), - chSkip++, nesting++; + if (sc.Match('(', '*')) + state2 = sc.state + 1, chToken = sc.currentPos, + sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment) + sc.Forward(), nesting++; // [try to] interpret as [additional] comment char - else if (ch == ')' && chLast == '*') - state2 = nesting? (state - 1): SCE_CAML_DEFAULT, chColor++, nesting--; + else if (sc.Match(')') && sc.chPrev == '*') { + if (nesting) + state2 = (sc.state & 0x0f) - 1, chToken = 0, nesting--; + else + state2 = SCE_CAML_DEFAULT; + chColor++; + // enable "magic" (read-only) comment AS REQUIRED + } else if (useMagic && sc.currentPos - chToken == 4 + && sc.Match('c') && sc.chPrev == 'r' && sc.GetRelative(-2) == '@') + sc.state |= 0x10; // (switch to read-only comment style) break; } // handle state change and char coloring as required - if (state2 >= 0) { - // (1st char will NOT be colored until AT LEAST 2nd char) - if (chColor >= 0) - styler.ColourTo(chColor, state); - state = state2; - } - chLast = ch; + if (state2 >= 0) + styler.ColourTo(chColor, sc.state), sc.ChangeState(state2); + // move to next char UNLESS re-scanning current char + if (advance) + sc.Forward(); } // do any required terminal char coloring (JIC) - if (i >= endPos) - i = endPos - 1; - styler.ColourTo(i, state); -// styler.Flush(); // (is this always done by calling code?) + sc.Complete(); } #ifdef BUILD_AS_EXTERNAL_LEXER @@ -390,6 +390,7 @@ void FoldCamlDoc( static const char * const camlWordListDesc[] = { "Keywords", // primary Objective Caml keywords "Keywords2", // "optional" keywords (typically from Pervasives) + "Keywords3", // "optional" keywords (typically typenames) 0 }; |