diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/KeyWords.cxx | 1 | ||||
| -rw-r--r-- | src/LexMarkdown.cxx | 412 | 
2 files changed, 413 insertions, 0 deletions
| diff --git a/src/KeyWords.cxx b/src/KeyWords.cxx index 366cb47dc..5e4de668d 100644 --- a/src/KeyWords.cxx +++ b/src/KeyWords.cxx @@ -375,6 +375,7 @@ int Scintilla_LinkLexers() {  	LINK_LEXER(lmLua);  	LINK_LEXER(lmMagikSF);  	LINK_LEXER(lmMake); +	LINK_LEXER(lmMarkdown);  	LINK_LEXER(lmMatlab);  	LINK_LEXER(lmMETAPOST);  	LINK_LEXER(lmMMIXAL); diff --git a/src/LexMarkdown.cxx b/src/LexMarkdown.cxx new file mode 100644 index 000000000..f7fc48f40 --- /dev/null +++ b/src/LexMarkdown.cxx @@ -0,0 +1,412 @@ +/****************************************************************** + *  LexMarkdown.cxx + * + *  A simple Markdown lexer for scintilla. + *    + *  Includes highlighting for some extra features from the + *  Pandoc implementation; strikeout, using '#.' as a default  + *  ordered list item marker, and delimited code blocks. + *  + *  Limitations: + *  + *  Standard indented code blocks are not highlighted at all, + *  as it would conflict with other indentation schemes. Use  + *  delimited code blocks for blanket highlighting of an + *  entire code block.  Embedded HTML is not highlighted either. + *  Blanket HTML highlighting has issues, because some Markdown + *  implementations allow Markdown markup inside of the HTML. Also, + *  there is a following blank line issue that can't be ignored,  + *  explained in the next paragraph. Embedded HTML and code  + *  blocks would be better supported with language specific  + *  highlighting. + *  + *  The highlighting aims to accurately reflect correct syntax, + *  but a few restrictions are relaxed. Delimited code blocks are + *  highlighted, even if the line following the code block is not blank.   + *  Requiring a blank line after a block, breaks the highlighting + *  in certain cases, because of the way Scintilla ends up calling + *  the lexer. + *  + *  Written by Jon Strait - jstrait@moonloop.net + * + *  The License.txt file describes the conditions under which this + *  software may be distributed. + * + *****************************************************************/ + +#include <stdlib.h>          +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdarg.h> + +#include "Platform.h" + +#include "PropSet.h" +#include "Accessor.h" +#include "StyleContext.h" +#include "KeyWords.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +static inline bool IsNewline(const int ch) { +    return (ch == '\n' || ch == '\r'); +} + +// True if can follow ch down to the end with possibly trailing whitespace +static bool FollowToLineEnd(const int ch, const int state, const unsigned int endPos, StyleContext &sc) { +    unsigned int i = 0; +    while (sc.GetRelative(++i) == ch) +        ; +    // Skip over whitespace +    while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos) +        ++i; +    if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) { +        sc.Forward(i); +        sc.ChangeState(state); +        sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +        return true; +    } +    else return false; +} + +// Set the state on text section from current to length characters,  +// then set the rest until the newline to default, except for any characters matching token +static void SetStateAndZoom(const int state, const int length, const int token, StyleContext &sc) { +    sc.SetState(state); +    sc.Forward(length); +    sc.SetState(SCE_MARKDOWN_DEFAULT); +    sc.Forward(); +    bool started = false; +    while (sc.More() && !IsNewline(sc.ch)) { +        if (sc.ch == token && !started) { +            sc.SetState(state); +            started = true; +        } +        else if (sc.ch != token) { +            sc.SetState(SCE_MARKDOWN_DEFAULT); +            started = false; +        } +        sc.Forward(); +    } +    sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +} + +// Does the previous line have more than spaces and tabs? +static bool HasPrevLineContent(StyleContext &sc) { +    int i = 0; +    // Go back to the previous newline +    while ((--i + sc.currentPos) && !IsNewline(sc.GetRelative(i)))  +        ; +    while (--i + sc.currentPos) { +        if (IsNewline(sc.GetRelative(i))) +            break; +        if (!IsASpaceOrTab(sc.GetRelative(i))) +            return true; +    } +    return false; +} + +static bool IsValidHrule(const unsigned int endPos, StyleContext &sc) { +    int c, count = 1; +    unsigned int i = 0; +    while (++i) { +        c = sc.GetRelative(i); +        if (c == sc.ch)  +            ++count; +        // hit a terminating character +        else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) { +            // Are we a valid HRULE +            if ((IsNewline(c) || sc.currentPos + i == endPos) &&  +                    count >= 3 && !HasPrevLineContent(sc)) { +                sc.SetState(SCE_MARKDOWN_HRULE); +                sc.Forward(i); +                sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +                return true; +            } +            else { +                sc.SetState(SCE_MARKDOWN_DEFAULT); +		return false; +            } +        } +    } +    return false; +} + +static void ColorizeMarkdownDoc(unsigned int startPos, int length, int initStyle, +                               WordList **, Accessor &styler) { +    unsigned int endPos = startPos + length; +    int precharCount = 0; +    // Don't advance on a new loop iteration and retry at the same position. +    // Useful in the corner case of having to start at the beginning file position +    // in the default state. +    bool freezeCursor = false; +     +    StyleContext sc(startPos, length, initStyle, styler); + +    while (sc.More()) { +        // Skip past escaped characters +        if (sc.ch == '\\') { +            sc.Forward(); +            continue; +        } +         +        // A blockquotes resets the line semantics +        if (sc.state == SCE_MARKDOWN_BLOCKQUOTE) +            sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +         +        // Conditional state-based actions +        if (sc.state == SCE_MARKDOWN_CODE2) { +            if (sc.Match("``") && sc.GetRelative(-2) != ' ') { +                sc.Forward(2); +                sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +        }     +        else if (sc.state == SCE_MARKDOWN_CODE) { +            if (sc.ch == '`' && sc.chPrev != ' ') +                sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); +        } +        /* De-activated because it gets in the way of other valid indentation +         * schemes, for example multiple paragraphs inside a list item. +        // Code block +        else if (sc.state == SCE_MARKDOWN_CODEBK) { +            bool d = true; +            if (IsNewline(sc.ch)) { +                if (sc.chNext != '\t') { +                    for (int c = 1; c < 5; ++c) { +                        if (sc.GetRelative(c) != ' ') +                            d = false; +                    } +                } +            } +            else if (sc.atLineStart) { +                if (sc.ch != '\t' ) { +                    for (int i = 0; i < 4; ++i) { +                        if (sc.GetRelative(i) != ' ') +                            d = false; +                    } +                } +            } +            if (!d) +                sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +        } +        */ +        // Strong +        else if (sc.state == SCE_MARKDOWN_STRONG1) { +            if (sc.Match("**") && sc.chPrev != ' ') { +                sc.Forward(2); +                sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +        }  +        else if (sc.state == SCE_MARKDOWN_STRONG2) {   +            if (sc.Match("__") && sc.chPrev != ' ') {   +                sc.Forward(2); +                sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +        } +        // Emphasis     +        else if (sc.state == SCE_MARKDOWN_EM1) { +            if (sc.ch == '*' && sc.chPrev != ' ') +                sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); +        } +        else if (sc.state == SCE_MARKDOWN_EM2) { +            if (sc.ch == '_' && sc.chPrev != ' ') +                sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); +        } +        else if (sc.state == SCE_MARKDOWN_CODEBK) { +            if (sc.atLineStart && sc.Match("~~~")) { +                int i = 1; +                while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos) +                    i++; +                sc.Forward(i); +                sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +        } +        else if (sc.state == SCE_MARKDOWN_STRIKEOUT) { +            if (sc.Match("~~") && sc.chPrev != ' ') { +                sc.Forward(2); +                sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +        } +        else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) { +            // Header +            if (sc.Match("######")) +                SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc); +            else if (sc.Match("#####")) +                SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc); +            else if (sc.Match("####")) +                SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc); +            else if (sc.Match("###")) +                SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc); +            else if (sc.Match("##")) +                SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc); +            else if (sc.Match("#")) { +                // Catch the special case of an unordered list +                if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) { +                    precharCount = 0; +                    sc.SetState(SCE_MARKDOWN_PRECHAR); +                } +                else +                    SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc); +            } +            // Code block +            else if (sc.Match("~~~")) { +                if (!HasPrevLineContent(sc)) +                    sc.SetState(SCE_MARKDOWN_CODEBK); +                else +                    sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +            else if (sc.ch == '=') { +                if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc)) +                    ; +                else +                    sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +            else if (sc.ch == '-') { +                if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc)) +                    ; +                else { +                    precharCount = 0; +                    sc.SetState(SCE_MARKDOWN_PRECHAR); +                } +            } +            else if (IsNewline(sc.ch)) +                sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +            else { +                precharCount = 0; +                sc.SetState(SCE_MARKDOWN_PRECHAR); +            } +        } +                 +        // The header lasts until the newline +        else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 || +                sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 || +                sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) { +            if (IsNewline(sc.ch)) +                sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +        } +         +        // New state only within the initial whitespace +        if (sc.state == SCE_MARKDOWN_PRECHAR) { +            // Blockquote +            if (sc.ch == '>' && precharCount < 5) +                sc.SetState(SCE_MARKDOWN_BLOCKQUOTE); +            /* +            // Begin of code block +            else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4)) +                sc.SetState(SCE_MARKDOWN_CODEBK); +            */ +            // HRule - Total of three or more hyphens, asterisks, or underscores  +            // on a line by themselves     +            else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc)) +                ; +            // Unordered list +            else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) { +                sc.SetState(SCE_MARKDOWN_ULIST_ITEM); +                sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); +            } +            // Ordered list +            else if (IsADigit(sc.ch)) { +                int digitCount = 0; +                while (IsADigit(sc.GetRelative(++digitCount))) +                    ; +                if (sc.GetRelative(digitCount) == '.' &&  +                        IsASpaceOrTab(sc.GetRelative(digitCount + 1))) { +                    sc.SetState(SCE_MARKDOWN_OLIST_ITEM); +                    sc.Forward(digitCount + 1); +                    sc.SetState(SCE_MARKDOWN_DEFAULT); +                } +            } +            // Alternate Ordered list +            else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) { +                sc.SetState(SCE_MARKDOWN_OLIST_ITEM); +                sc.Forward(2); +                sc.SetState(SCE_MARKDOWN_DEFAULT); +            } +            else if (sc.ch != ' ' || precharCount > 2) +                sc.SetState(SCE_MARKDOWN_DEFAULT); +            else +                ++precharCount; +        } +         +        // New state anywhere in doc +        if (sc.state == SCE_MARKDOWN_DEFAULT) { +            if (sc.atLineStart && sc.ch == '#') { +                sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +                freezeCursor = true; +            } +            // Links and Images +            if (sc.Match("![") || sc.ch == '[') { +                int i = 0, j = 0, k = 0; +                int len = endPos - sc.currentPos; +                while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\')) +                    ; +                if (sc.GetRelative(i) == ']') { +                    j = i; +                    if (sc.GetRelative(++i) == '(') { +                        while (i < len && (sc.GetRelative(++i) != ')' || sc.GetRelative(i - 1) == '\\')) +                            ; +                        if (sc.GetRelative(i) == ')') +                            k = i; +                    } +                    else if (sc.GetRelative(i) == '[' || sc.GetRelative(++i) == '[') { +                        while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\')) +                            ; +                        if (sc.GetRelative(i) == ']') +                            k = i; +                    } +                } +                // At least a link text +                if (j) { +                    sc.SetState(SCE_MARKDOWN_LINK); +                    sc.Forward(j); +                    // Also has a URL or reference portion +                    if (k) +                        sc.Forward(k - j); +                    sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); +                } +            } +            // Code - also a special case for alternate inside spacing +            if (sc.Match("``") && sc.GetRelative(3) != ' ') { +                sc.SetState(SCE_MARKDOWN_CODE2); +                sc.Forward(); +            } +            else if (sc.ch == '`' && sc.chNext != ' ') { +                sc.SetState(SCE_MARKDOWN_CODE); +            } +            // Strong +            else if (sc.Match("**") && sc.GetRelative(2) != ' ') { +                sc.SetState(SCE_MARKDOWN_STRONG1); +                sc.Forward(); +           } +            else if (sc.Match("__") && sc.GetRelative(2) != ' ') { +                sc.SetState(SCE_MARKDOWN_STRONG2); +                sc.Forward(); +            } +            // Emphasis +            else if (sc.ch == '*' && sc.chNext != ' ') +                sc.SetState(SCE_MARKDOWN_EM1); +            else if (sc.ch == '_' && sc.chNext != ' ') +                sc.SetState(SCE_MARKDOWN_EM2); +            // Strikeout +            else if (sc.Match("~~") && sc.GetRelative(2) != ' ') { +                sc.SetState(SCE_MARKDOWN_STRIKEOUT); +                sc.Forward(); +            } +            // Beginning of line +            else if (IsNewline(sc.ch)) +                sc.SetState(SCE_MARKDOWN_LINE_BEGIN); +        } +        // Advance if not holding back the cursor for this iteration. +        if (!freezeCursor) +            sc.Forward(); +        freezeCursor = false; +    } +    sc.Complete(); +} + +LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown"); | 
