aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexers/LexHaskell.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'lexers/LexHaskell.cxx')
-rw-r--r--lexers/LexHaskell.cxx224
1 files changed, 148 insertions, 76 deletions
diff --git a/lexers/LexHaskell.cxx b/lexers/LexHaskell.cxx
index 8d7b6468d..77297c8b0 100644
--- a/lexers/LexHaskell.cxx
+++ b/lexers/LexHaskell.cxx
@@ -52,35 +52,85 @@ using namespace Scintilla;
#define HA_MODE_MODULE 4
#define HA_MODE_FFI 5
#define HA_MODE_TYPE 6
+#define HA_MODE_PRAGMA 7
#define INDENT_OFFSET 1
-static inline bool IsAlpha(const int ch) {
- return (ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z');
+static int u_iswalpha(int);
+static int u_iswalnum(int);
+static int u_iswupper(int);
+static int u_IsHaskellSymbol(int);
+
+// #define HASKELL_UNICODE
+
+#ifndef HASKELL_UNICODE
+
+// Stubs
+
+static int u_iswalpha(int) {
+ return 0;
}
-static inline bool IsAnIdentifierStart(const int ch) {
- return (IsLowerCase(ch) || ch == '_');
+static int u_iswalnum(int) {
+ return 0;
}
-static inline bool IsAWordStart(const int ch) {
- return (IsAlpha(ch) || ch == '_');
+static int u_iswupper(int) {
+ return 0;
}
-static inline bool IsAWordChar(const int ch) {
- return ( IsAlphaNumeric(ch)
- || ch == '_'
- || ch == '\'');
+static int u_IsHaskellSymbol(int) {
+ return 0;
+}
+
+#endif
+
+static inline bool IsHaskellLetter(const int ch) {
+ if (IsASCII(ch)) {
+ return (ch >= 'a' && ch <= 'z')
+ || (ch >= 'A' && ch <= 'Z');
+ } else {
+ return u_iswalpha(ch) != 0;
+ }
+}
+
+static inline bool IsHaskellAlphaNumeric(const int ch) {
+ if (IsASCII(ch)) {
+ return IsAlphaNumeric(ch);
+ } else {
+ return u_iswalnum(ch) != 0;
+ }
+}
+
+static inline bool IsHaskellUpperCase(const int ch) {
+ if (IsASCII(ch)) {
+ return ch >= 'A' && ch <= 'Z';
+ } else {
+ return u_iswupper(ch) != 0;
+ }
}
-static inline bool IsAnOperatorChar(const int ch) {
- return
- ( ch == '!' || ch == '#' || ch == '$' || ch == '%'
- || ch == '&' || ch == '*' || ch == '+' || ch == '-'
- || ch == '.' || ch == '/' || ch == ':' || ch == '<'
- || ch == '=' || ch == '>' || ch == '?' || ch == '@'
- || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
+static inline bool IsAnHaskellOperatorChar(const int ch) {
+ if (IsASCII(ch)) {
+ return
+ ( ch == '!' || ch == '#' || ch == '$' || ch == '%'
+ || ch == '&' || ch == '*' || ch == '+' || ch == '-'
+ || ch == '.' || ch == '/' || ch == ':' || ch == '<'
+ || ch == '=' || ch == '>' || ch == '?' || ch == '@'
+ || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
+ } else {
+ return u_IsHaskellSymbol(ch) != 0;
+ }
+}
+
+static inline bool IsAHaskellWordStart(const int ch) {
+ return IsHaskellLetter(ch) || ch == '_';
+}
+
+static inline bool IsAHaskellWordChar(const int ch) {
+ return ( IsHaskellAlphaNumeric(ch)
+ || ch == '_'
+ || ch == '\'');
}
static inline bool IsCommentBlockStyle(int style) {
@@ -174,6 +224,7 @@ class LexerHaskell : public ILexer {
int firstImportLine;
WordList keywords;
WordList ffi;
+ WordList reserved_operators;
OptionsHaskell options;
OptionSetHaskell osHaskell;
@@ -274,6 +325,9 @@ int SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
case 1:
wordListN = &ffi;
break;
+ case 2:
+ wordListN = &reserved_operators;
+ break;
}
int firstModification = -1;
if (wordListN) {
@@ -292,8 +346,8 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
LexAccessor styler(pAccess);
// Do not leak onto next line
- if (initStyle == SCE_HA_STRINGEOL)
- initStyle = SCE_HA_DEFAULT;
+ if (initStyle == SCE_HA_STRINGEOL)
+ initStyle = SCE_HA_DEFAULT;
StyleContext sc(startPos, length, initStyle, styler);
@@ -306,6 +360,8 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
int base = 10;
bool inDashes = false;
+ assert(!(IsCommentBlockStyle(initStyle) && nestLevel <= 0));
+
while (sc.More()) {
// Check for state end
@@ -320,20 +376,21 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
}
if (sc.atLineStart && (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER)) {
- // Prevent SCE_HA_STRINGEOL from leaking back to previous line
- sc.SetState(sc.state);
- }
+ // Prevent SCE_HA_STRINGEOL from leaking back to previous line
+ sc.SetState(sc.state);
+ }
// Handle line continuation generically.
if (sc.ch == '\\' &&
( sc.state == SCE_HA_STRING
|| sc.state == SCE_HA_PREPROCESSOR)) {
if (sc.chNext == '\n' || sc.chNext == '\r') {
+ sc.Forward();
+
// Remember the line state for future incremental lexing
styler.SetLineState(lineCurrent, (nestLevel << 3) | mode);
lineCurrent++;
- sc.Forward();
if (sc.ch == '\r' && sc.chNext == '\n') {
sc.Forward();
}
@@ -348,42 +405,48 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
if (sc.ch == ':' &&
// except "::"
- !(sc.chNext == ':' && !IsAnOperatorChar(sc.GetRelative(2)))) {
+ !(sc.chNext == ':' && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
style = SCE_HA_CAPITAL;
}
- while (IsAnOperatorChar(sc.ch))
+ while (IsAnHaskellOperatorChar(sc.ch))
sc.Forward();
+ char s[100];
+ sc.GetCurrent(s, sizeof(s));
+
+ if (reserved_operators.InList(s))
+ style = SCE_HA_RESERVED_OPERATOR;
+
styler.ColourTo(sc.currentPos - 1, style);
sc.ChangeState(SCE_HA_DEFAULT);
}
// String
else if (sc.state == SCE_HA_STRING) {
- if (sc.ch == '\"') {
+ if (sc.atLineEnd) {
+ sc.ChangeState(SCE_HA_STRINGEOL);
+ sc.ForwardSetState(SCE_HA_DEFAULT);
+ } else if (sc.ch == '\"') {
sc.Forward();
skipMagicHash(sc, false);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\\') {
sc.Forward(2);
- } else if (sc.atLineEnd) {
- sc.ChangeState(SCE_HA_STRINGEOL);
- sc.ForwardSetState(SCE_HA_DEFAULT);
} else {
sc.Forward();
}
}
// Char
else if (sc.state == SCE_HA_CHARACTER) {
- if (sc.ch == '\'') {
+ if (sc.atLineEnd) {
+ sc.ChangeState(SCE_HA_STRINGEOL);
+ sc.ForwardSetState(SCE_HA_DEFAULT);
+ } else if (sc.ch == '\'') {
sc.Forward();
skipMagicHash(sc, false);
sc.SetState(SCE_HA_DEFAULT);
} else if (sc.ch == '\\') {
sc.Forward(2);
- } else if (sc.atLineEnd) {
- sc.ChangeState(SCE_HA_STRINGEOL);
- sc.ForwardSetState(SCE_HA_DEFAULT);
} else {
sc.Forward();
}
@@ -406,27 +469,29 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
}
// Keyword or Identifier
else if (sc.state == SCE_HA_IDENTIFIER) {
- int style = isupper(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
+ int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
+
+ assert(IsAHaskellWordStart(sc.ch));
sc.Forward();
while (sc.More()) {
- if (IsAWordChar(sc.ch)) {
+ if (IsAHaskellWordChar(sc.ch)) {
sc.Forward();
} else if (sc.ch == '#' && options.magicHash) {
sc.Forward();
break;
- } else if (style == SCE_HA_CAPITAL && sc.ch=='.') {
- if (isupper(sc.chNext)) {
+ } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
+ if (IsHaskellUpperCase(sc.chNext)) {
sc.Forward();
style = SCE_HA_CAPITAL;
- } else if (IsAWordStart(sc.chNext)) {
+ } else if (IsAHaskellWordStart(sc.chNext)) {
sc.Forward();
style = SCE_HA_IDENTIFIER;
- } else if (IsAnOperatorChar(sc.chNext)) {
+ } else if (IsAnHaskellOperatorChar(sc.chNext)) {
sc.Forward();
style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
- while (IsAnOperatorChar(sc.ch))
+ while (IsAnHaskellOperatorChar(sc.ch))
sc.Forward();
break;
} else {
@@ -498,13 +563,13 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
// Comments
// Oneliner
else if (sc.state == SCE_HA_COMMENTLINE) {
- if (inDashes && sc.ch != '-') {
+ if (sc.atLineEnd) {
+ sc.SetState(mode == HA_MODE_PRAGMA ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
+ sc.Forward(); // prevent double counting a line
+ } else if (inDashes && sc.ch != '-' && mode != HA_MODE_PRAGMA) {
inDashes = false;
- if (IsAnOperatorChar(sc.ch))
+ if (IsAnHaskellOperatorChar(sc.ch))
sc.ChangeState(SCE_HA_OPERATOR);
- } else if (sc.atLineEnd) {
- sc.SetState(SCE_HA_DEFAULT);
- sc.Forward(); // prevent double counting a line
} else {
sc.Forward();
}
@@ -515,12 +580,14 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
sc.SetState(StyleFromNestLevel(nestLevel));
sc.Forward(2);
nestLevel++;
- } else if (sc.Match('-','}')) {
+ } else if (sc.Match('-','}')
+ && !(mode == HA_MODE_PRAGMA && sc.chPrev == '#')) {
sc.Forward(2);
nestLevel--;
+ assert(nestLevel >= 0);
sc.SetState(
- nestLevel == 0
- ? SCE_HA_DEFAULT
+ nestLevel <= 0
+ ? (mode == HA_MODE_PRAGMA ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
: StyleFromNestLevel(nestLevel - 1));
} else {
sc.Forward();
@@ -530,19 +597,28 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
else if (sc.state == SCE_HA_PRAGMA) {
// GHC pragma end should always be indented further than it's start.
if (sc.Match("#-}") && !sc.atLineStart) {
+ mode = HA_MODE_DEFAULT;
sc.Forward(3);
sc.SetState(SCE_HA_DEFAULT);
+ } else if (sc.Match('-','-')) {
+ sc.SetState(SCE_HA_COMMENTLINE);
+ sc.Forward(2);
+ inDashes = false;
+ } else if (sc.Match('{','-')) {
+ sc.SetState(StyleFromNestLevel(nestLevel));
+ sc.Forward(2);
+ nestLevel++;
} else {
sc.Forward();
}
}
// Preprocessor
else if (sc.state == SCE_HA_PREPROCESSOR) {
- if (options.stylingWithinPreprocessor && !IsAlpha(sc.ch)) {
- sc.SetState(SCE_HA_DEFAULT);
- } else if (sc.atLineEnd) {
+ if (sc.atLineEnd) {
sc.SetState(SCE_HA_DEFAULT);
sc.Forward(); // prevent double counting a line
+ } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
+ sc.SetState(SCE_HA_DEFAULT);
} else {
sc.Forward();
}
@@ -557,7 +633,7 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
sc.Forward(2);
base = 16;
} else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
- // Match anything starting with "0x" or "0X", too
+ // Match anything starting with "0o" or "0O", too
sc.Forward(2);
base = 8;
} else {
@@ -568,6 +644,7 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
}
// Pragma
else if (sc.Match("{-#")) {
+ mode = HA_MODE_PRAGMA;
sc.SetState(SCE_HA_PRAGMA);
sc.Forward(3);
}
@@ -590,43 +667,39 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
}
// Character or quoted name
else if (sc.ch == '\'') {
- styler.ColourTo(sc.currentPos - 1, state);
+ sc.SetState(SCE_HA_CHARACTER);
sc.Forward();
- int style = SCE_HA_CHARACTER;
-
if (options.allowQuotes) {
// Quoted type ''T
- if (sc.ch=='\'' && IsAWordStart(sc.chNext)) {
+ if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
sc.Forward();
- style=SCE_HA_IDENTIFIER;
+ sc.ChangeState(SCE_HA_IDENTIFIER);
} else if (sc.chNext != '\'') {
// Quoted value or promoted constructor 'N
- if (IsAWordStart(sc.ch)) {
- style=SCE_HA_IDENTIFIER;
+ if (IsAHaskellWordStart(sc.ch)) {
+ sc.ChangeState(SCE_HA_IDENTIFIER);
// Promoted constructor operator ':~>
} else if (sc.ch == ':') {
- style=SCE_HA_OPERATOR;
+ sc.ChangeState(SCE_HA_OPERATOR);
// Promoted list or tuple '[T]
} else if (sc.ch == '[' || sc.ch== '(') {
styler.ColourTo(sc.currentPos - 1, SCE_HA_OPERATOR);
- style=SCE_HA_DEFAULT;
+ sc.ChangeState(SCE_HA_DEFAULT);
}
}
}
-
- sc.ChangeState(style);
}
// Operator starting with '?' or an implicit parameter
else if (sc.ch == '?') {
- int style = SCE_HA_OPERATOR;
+ sc.SetState(SCE_HA_OPERATOR);
- if (options.implicitParams && IsAnIdentifierStart(sc.chNext)) {
- sc.Forward();
- style = SCE_HA_IDENTIFIER;
+ if ( options.implicitParams
+ && IsAHaskellWordStart(sc.chNext)
+ && !IsHaskellUpperCase(sc.chNext)) {
+ sc.Forward();
+ sc.ChangeState(SCE_HA_IDENTIFIER);
}
-
- sc.ChangeState(style);
}
// Preprocessor
else if (sc.atLineStart && sc.ch == '#') {
@@ -635,7 +708,7 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
sc.Forward();
}
// Operator
- else if (IsAnOperatorChar(sc.ch)) {
+ else if (IsAnHaskellOperatorChar(sc.ch)) {
mode = HA_MODE_DEFAULT;
sc.SetState(SCE_HA_OPERATOR);
}
@@ -645,11 +718,10 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
|| sc.ch == '[' || sc.ch == ']'
|| sc.ch == '{' || sc.ch == '}') {
sc.SetState(SCE_HA_OPERATOR);
- sc.Forward();
- sc.SetState(SCE_HA_DEFAULT);
+ sc.ForwardSetState(SCE_HA_DEFAULT);
}
// Keyword or Identifier
- else if (IsAWordStart(sc.ch)) {
+ else if (IsAHaskellWordStart(sc.ch)) {
sc.SetState(SCE_HA_IDENTIFIER);
// Something we don't care about
} else {
@@ -657,6 +729,7 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
}
}
}
+ styler.SetLineState(lineCurrent, (nestLevel << 3) | mode);
sc.Complete();
}
@@ -688,7 +761,6 @@ void SCI_METHOD LexerHaskell::Fold(unsigned int startPos, int length, int // ini
Accessor styler(pAccess, NULL);
-
const int maxPos = startPos + length;
const int maxLines =
maxPos == styler.Length()
@@ -767,7 +839,7 @@ void SCI_METHOD LexerHaskell::Fold(unsigned int startPos, int length, int // ini
int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
int indentNextMask = indentNext & ~SC_FOLDLEVELNUMBERMASK;
-
+
if (indentNextLevel != (SC_FOLDLEVELBASE & SC_FOLDLEVELNUMBERMASK)) {
indentNext = (indentNextLevel + INDENT_OFFSET) | indentNextMask;
}