aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexers/LexHaskell.cxx
diff options
context:
space:
mode:
authorkudah <kudahkukarek@gmail.com>2013-05-09 23:10:04 +0300
committerkudah <kudahkukarek@gmail.com>2013-05-09 23:10:04 +0300
commitfbf53dce4aca781ab5470b13188cbfbc781f3f99 (patch)
tree3a4ba18324eb149f594d57a985e553af0e5d86c7 /lexers/LexHaskell.cxx
parent3c942cdc536135a3b480547ab687038a61183719 (diff)
downloadscintilla-mirror-fbf53dce4aca781ab5470b13188cbfbc781f3f99.tar.gz
* Literate Haskell support
* CPP highlighting is disableable * CPP is now correctly colored when inside comments and strings * Some bugs with incremental lexing fixed * Indented imports folding is now undisableable
Diffstat (limited to 'lexers/LexHaskell.cxx')
-rw-r--r--lexers/LexHaskell.cxx247
1 files changed, 178 insertions, 69 deletions
diff --git a/lexers/LexHaskell.cxx b/lexers/LexHaskell.cxx
index 4e83916e0..7e677e4ff 100644
--- a/lexers/LexHaskell.cxx
+++ b/lexers/LexHaskell.cxx
@@ -129,7 +129,16 @@ static inline bool IsCommentBlockStyle(int style) {
}
static inline bool IsCommentStyle(int style) {
- return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3);
+ return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
+ || ( style == SCE_HA_LITERATE_COMMENT
+ || style == SCE_HA_LITERATE_CODEDELIM);
+}
+
+// styles which do not belong to Haskell, but to external tools
+static inline bool IsExternalStyle(int style) {
+ return ( style == SCE_HA_PREPROCESSOR
+ || style == SCE_HA_LITERATE_COMMENT
+ || style == SCE_HA_LITERATE_CODEDELIM);
}
inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
@@ -141,29 +150,30 @@ struct OptionsHaskell {
bool allowQuotes;
bool implicitParams;
bool highlightSafe;
+ bool cpp;
bool stylingWithinPreprocessor;
bool fold;
bool foldComment;
bool foldCompact;
bool foldImports;
- bool foldIndentedImports;
OptionsHaskell() {
magicHash = true; // Widespread use, enabled by default.
allowQuotes = true; // Widespread use, enabled by default.
implicitParams = false; // Fell out of favor, seldom used, disabled.
highlightSafe = true; // Moderately used, doesn't hurt to enable.
+ cpp = true; // Widespread use, enabled by default;
stylingWithinPreprocessor = false;
fold = false;
foldComment = false;
foldCompact = false;
foldImports = false;
- foldIndentedImports = true;
}
};
static const char * const haskellWordListDesc[] = {
"Keywords",
"FFI",
+ "Reserved operators",
0
};
@@ -188,6 +198,10 @@ struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
"Set to 0 to disallow \"safe\" keyword in imports "
"(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
+ DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
+ "Set to 0 to disable C-preprocessor highlighting "
+ "(-XCPP extension)");
+
DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
"For Haskell code, determines whether all preprocessor code is styled in the "
"preprocessor style (0, the default) or only from the initial # to the end "
@@ -203,15 +217,12 @@ struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
"Set to 1 to enable folding of import declarations");
- DefineProperty("fold.haskell.imports.indented", &OptionsHaskell::foldIndentedImports,
- "Set this property to 0 to disable folding imports not starting at "
- "column 0 when fold.haskell.imports=1");
-
DefineWordListSets(haskellWordListDesc);
}
};
class LexerHaskell : public ILexer {
+ const bool literate;
int firstImportLine;
WordList keywords;
WordList ffi;
@@ -235,21 +246,41 @@ class LexerHaskell : public ILexer {
,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
};
+ enum LiterateMode {
+ LITERATE_BIRD = 0 // if '>' is the first character on the line,
+ // color '>' as a codedelim and the rest of
+ // the line as code.
+ // else if "\begin{code}" is the only word on the
+ // line except whitespace, switch to LITERATE_BLOCK
+ // otherwise color the line as a literate comment.
+ ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
+ // 0 ignoring all later characters, color the line
+ // as a codedelim and switch to LITERATE_BIRD
+ // otherwise color the line as code.
+ };
+
struct HaskellLineInfo {
- unsigned int nestLevel;
+ unsigned int nestLevel; // 22 bits ought to be enough for anybody
+ unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
+ // beyond 31.
bool pragma;
+ LiterateMode lmode;
KeywordMode mode;
HaskellLineInfo(int state) :
- nestLevel (state >> 4)
- , pragma (state & 0x8)
+ nestLevel (state >> 10)
+ , nonexternalStyle ((state >> 5) & 0x1F)
+ , pragma ((state >> 4) & 0x1)
+ , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
, mode (static_cast<KeywordMode>(state & 0x7))
{}
int ToLineState() {
return
- (nestLevel << 4)
- | pragma
+ (nestLevel << 10)
+ | (nonexternalStyle << 5)
+ | (pragma << 4)
+ | (lmode << 3)
| mode;
}
};
@@ -272,19 +303,17 @@ class LexerHaskell : public ILexer {
int currentPos = styler.LineStart(line);
int style = styler.StyleAt(currentPos);
- if (options.foldIndentedImports) {
- int eol_pos = styler.LineStart(line + 1) - 1;
+ int eol_pos = styler.LineStart(line + 1) - 1;
- while (currentPos < eol_pos) {
- int ch = styler[currentPos];
- style = styler.StyleAt(currentPos);
+ while (currentPos < eol_pos) {
+ int ch = styler[currentPos];
+ style = styler.StyleAt(currentPos);
- if (ch == ' ' || ch == '\t'
- || IsCommentBlockStyle(style)) {
- currentPos++;
- } else {
- break;
- }
+ if (ch == ' ' || ch == '\t'
+ || IsCommentBlockStyle(style)) {
+ currentPos++;
+ } else {
+ break;
}
}
@@ -296,7 +325,7 @@ class LexerHaskell : public ILexer {
}
public:
- LexerHaskell() : firstImportLine(-1) {}
+ LexerHaskell(const bool literate_) : literate(literate_), firstImportLine(-1) {}
virtual ~LexerHaskell() {}
void SCI_METHOD Release() {
@@ -336,7 +365,11 @@ public:
}
static ILexer *LexerFactoryHaskell() {
- return new LexerHaskell();
+ return new LexerHaskell(false);
+ }
+
+ static ILexer *LexerFactoryLiterateHaskell() {
+ return new LexerHaskell(true);
}
};
@@ -376,16 +409,18 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
,IDocument *pAccess) {
LexAccessor styler(pAccess);
+ int lineCurrent = styler.GetLine(startPos);
+
+ HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
+
// Do not leak onto next line
if (initStyle == SCE_HA_STRINGEOL)
initStyle = SCE_HA_DEFAULT;
+ else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
+ initStyle = hs.nonexternalStyle;
StyleContext sc(startPos, length, initStyle, styler);
- int lineCurrent = styler.GetLine(startPos);
-
- HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
-
int base = 10;
bool dot = false;
@@ -397,11 +432,14 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
while (sc.More()) {
// Check for state end
- // For line numbering (and by extension, nested comments) to work,
- // states should always forward one character at a time.
- // states should match on line ends using OnLineEnd function.
- // If a state sometimes does _not_ forward a character, it should check
- // first if it's not on a line end and forward otherwise.
+ if (!IsExternalStyle(sc.state)) {
+ hs.nonexternalStyle = sc.state;
+ }
+
+ // For lexer to work, states should unconditionally forward at least one
+ // character.
+ // If they don't, they should still check if they are at line end and
+ // forward if so.
// If a state forwards more than one character, it should check every time
// that it is not a line end and cease forwarding otherwise.
if (sc.atLineEnd) {
@@ -410,32 +448,111 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
lineCurrent++;
}
- if (sc.atLineStart && (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER)) {
- // Prevent SCE_HA_STRINGEOL from leaking back to previous line
- sc.SetState(sc.state);
+ // Handle line continuation generically.
+ if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
+ && ( sc.state == SCE_HA_STRING
+ || sc.state == SCE_HA_PREPROCESSOR)) {
+ // Remember the line state for future incremental lexing
+ styler.SetLineState(lineCurrent, hs.ToLineState());
+ lineCurrent++;
+
+ sc.Forward();
+ if (sc.ch == '\r' && sc.chNext == '\n') {
+ sc.Forward();
+ }
+ sc.Forward();
+
+ continue;
}
- // Handle line continuation generically.
- if (sc.ch == '\\' &&
- ( sc.state == SCE_HA_STRING
- || sc.state == SCE_HA_PREPROCESSOR)) {
- if (sc.chNext == '\n' || sc.chNext == '\r') {
+ if (sc.atLineStart) {
+
+ if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
+ // Prevent SCE_HA_STRINGEOL from leaking back to previous line
+ sc.SetState(sc.state);
+ }
+
+ if (literate && hs.lmode == LITERATE_BIRD) {
+ if (!IsExternalStyle(sc.state)) {
+ sc.SetState(SCE_HA_LITERATE_COMMENT);
+ }
+ }
+ }
+
+ // External
+ // Literate
+ if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
+ && sc.ch == '>') {
+ sc.SetState(SCE_HA_LITERATE_CODEDELIM);
+ sc.ForwardSetState(hs.nonexternalStyle);
+ }
+ else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
+ && ( sc.ch == ' ' || sc.ch == '\t'
+ || sc.Match("\\begin{code}"))) {
+ sc.SetState(sc.state);
+
+ while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
sc.Forward();
- // Remember the line state for future incremental lexing
- styler.SetLineState(lineCurrent, hs.ToLineState());
- lineCurrent++;
+ if (sc.Match("\\begin{code}")) {
+ sc.Forward(strlen("\\begin{code}"));
- if (sc.ch == '\r' && sc.chNext == '\n') {
+ bool correct = true;
+
+ while (!sc.atLineEnd && sc.More()) {
+ if (sc.ch != ' ' && sc.ch != '\t') {
+ correct = false;
+ }
sc.Forward();
}
- sc.Forward();
- continue;
+
+ if (correct) {
+ sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
+ hs.lmode = LITERATE_BLOCK;
+ }
}
}
+ else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
+ && sc.Match("\\end{code}")) {
+ sc.SetState(SCE_HA_LITERATE_CODEDELIM);
+
+ sc.Forward(strlen("\\end{code}"));
+ while (!sc.atLineEnd && sc.More()) {
+ sc.Forward();
+ }
+
+ sc.SetState(SCE_HA_LITERATE_COMMENT);
+ hs.lmode = LITERATE_BIRD;
+ }
+ // Preprocessor
+ else if (sc.atLineStart && sc.ch == '#' && options.cpp) {
+ sc.SetState(SCE_HA_PREPROCESSOR);
+ sc.Forward();
+ }
+ // Literate
+ else if (sc.state == SCE_HA_LITERATE_COMMENT) {
+ sc.Forward();
+ }
+ else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
+ sc.ForwardSetState(hs.nonexternalStyle);
+ }
+ // Preprocessor
+ else if (sc.state == SCE_HA_PREPROCESSOR) {
+ if (sc.atLineEnd) {
+ sc.SetState(options.stylingWithinPreprocessor
+ ? SCE_HA_DEFAULT
+ : hs.nonexternalStyle);
+ sc.Forward(); // prevent double counting a line
+ } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
+ sc.SetState(SCE_HA_DEFAULT);
+ } else {
+ sc.Forward();
+ }
+ }
+ // Haskell
// Operator
- if (sc.state == SCE_HA_OPERATOR) {
+ else if (sc.state == SCE_HA_OPERATOR) {
int style = SCE_HA_OPERATOR;
if ( sc.ch == ':'
@@ -492,7 +609,10 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
}
// Number
else if (sc.state == SCE_HA_NUMBER) {
- if (IsADigit(sc.ch, base)) {
+ if (sc.atLineEnd) {
+ sc.SetState(SCE_HA_DEFAULT);
+ sc.Forward(); // prevent double counting a line
+ } else if (IsADigit(sc.ch, base)) {
sc.Forward();
} else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
sc.Forward(2);
@@ -653,17 +773,6 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
sc.Forward();
}
}
- // Preprocessor
- else if (sc.state == SCE_HA_PREPROCESSOR) {
- if (sc.atLineEnd) {
- sc.SetState(SCE_HA_DEFAULT);
- sc.Forward(); // prevent double counting a line
- } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
- sc.SetState(SCE_HA_DEFAULT);
- } else {
- sc.Forward();
- }
- }
// New state?
else if (sc.state == SCE_HA_DEFAULT) {
// Digit
@@ -732,8 +841,8 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
sc.ChangeState(SCE_HA_OPERATOR);
// Promoted list or tuple '[T]
} else if (sc.ch == '[' || sc.ch== '(') {
- styler.ColourTo(sc.currentPos - 1, SCE_HA_OPERATOR);
- sc.ChangeState(SCE_HA_DEFAULT);
+ sc.ChangeState(SCE_HA_OPERATOR);
+ sc.ForwardSetState(SCE_HA_DEFAULT);
}
}
}
@@ -742,6 +851,7 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
else if (sc.ch == '?') {
hs.mode = HA_MODE_DEFAULT;
+ alreadyInTheMiddleOfOperator = false;
sc.SetState(SCE_HA_OPERATOR);
if ( options.implicitParams
@@ -751,11 +861,6 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty
sc.ChangeState(SCE_HA_IDENTIFIER);
}
}
- // Preprocessor
- else if (sc.atLineStart && sc.ch == '#') {
- sc.SetState(SCE_HA_PREPROCESSOR);
- sc.Forward();
- }
// Operator
else if (IsAnHaskellOperatorChar(sc.ch)) {
hs.mode = HA_MODE_DEFAULT;
@@ -807,7 +912,10 @@ static int HaskellIndentAmount(Accessor &styler, int line) {
int posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
- while ((ch == ' ' || ch == '\t' || IsCommentBlockStyle(style)) && (pos < eol_pos)) {
+ while (( ch == ' ' || ch == '\t'
+ || IsCommentBlockStyle(style)
+ || style == SCE_HA_LITERATE_CODEDELIM)
+ && (pos < eol_pos)) {
if (inPrevPrefix) {
char chPrev = styler[posPrev++];
if (chPrev != ' ' && chPrev != '\t') {
@@ -995,3 +1103,4 @@ void SCI_METHOD LexerHaskell::Fold(unsigned int startPos, int length, int // ini
}
LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
+LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);