diff options
-rw-r--r-- | doc/ScintillaHistory.html | 6 | ||||
-rw-r--r-- | include/SciLexer.h | 15 | ||||
-rw-r--r-- | include/Scintilla.iface | 17 | ||||
-rw-r--r-- | lexers/LexJSON.cxx | 497 | ||||
-rw-r--r-- | src/Catalogue.cxx | 1 | ||||
-rw-r--r-- | win32/scintilla.mak | 3 |
6 files changed, 538 insertions, 1 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index d1331ab10..50029dd8f 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -500,7 +500,11 @@ </h3> <ul> <li> - Released 18 May 2016. + Released 13 March 2016. + </li> + <li> + JSON lexer added. + <a href="http://sourceforge.net/p/scintilla/feature-requests/1140/">Feature #1140.</a> </li> <li> The Python lexer treats '@' as an operator except when it is the first visible character on a line. diff --git a/include/SciLexer.h b/include/SciLexer.h index 2103b47b9..6b98b09fc 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -132,6 +132,7 @@ #define SCLEX_SREC 117 #define SCLEX_IHEX 118 #define SCLEX_TEHEX 119 +#define SCLEX_JSON 120 #define SCLEX_AUTOMATIC 1000 #define SCE_P_DEFAULT 0 #define SCE_P_COMMENTLINE 1 @@ -1787,6 +1788,20 @@ #define SCE_HEX_CHECKSUM 16 #define SCE_HEX_CHECKSUM_WRONG 17 #define SCE_HEX_GARBAGE 18 +#define SCE_JSON_DEFAULT 0 +#define SCE_JSON_NUMBER 1 +#define SCE_JSON_STRING 2 +#define SCE_JSON_STRINGEOL 3 +#define SCE_JSON_PROPERTYNAME 4 +#define SCE_JSON_ESCAPESEQUENCE 5 +#define SCE_JSON_LINECOMMENT 6 +#define SCE_JSON_BLOCKCOMMENT 7 +#define SCE_JSON_OPERATOR 8 +#define SCE_JSON_URI 9 +#define SCE_JSON_COMPACTIRI 10 +#define SCE_JSON_KEYWORD 11 +#define SCE_JSON_LDKEYWORD 12 +#define SCE_JSON_ERROR 13 /* --Autogenerated -- end of section automatically generated from Scintilla.iface */ #endif diff --git a/include/Scintilla.iface b/include/Scintilla.iface index 91b3066b5..71defe09d 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -2799,6 +2799,7 @@ val SCLEX_BIBTEX=116 val SCLEX_SREC=117 val SCLEX_IHEX=118 val SCLEX_TEHEX=119 +val SCLEX_JSON=120 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a # value assigned in sequence from SCLEX_AUTOMATIC+1. @@ -4677,6 +4678,22 @@ val SCE_HEX_GARBAGE=18 lex IHex=SCLEX_IHEX SCE_HEX_ # Lexical state for SCLEX_TEHEX (shared with Srec) lex TEHex=SCLEX_TEHEX SCE_HEX_ +# Lexical states for SCLEX_JSON +lex JSON=SCLEX_JSON SCE_JSON_ +val SCE_JSON_DEFAULT=0 +val SCE_JSON_NUMBER=1 +val SCE_JSON_STRING=2 +val SCE_JSON_STRINGEOL=3 +val SCE_JSON_PROPERTYNAME=4 +val SCE_JSON_ESCAPESEQUENCE=5 +val SCE_JSON_LINECOMMENT=6 +val SCE_JSON_BLOCKCOMMENT=7 +val SCE_JSON_OPERATOR=8 +val SCE_JSON_URI=9 +val SCE_JSON_COMPACTIRI=10 +val SCE_JSON_KEYWORD=11 +val SCE_JSON_LDKEYWORD=12 +val SCE_JSON_ERROR=13 # Events diff --git a/lexers/LexJSON.cxx b/lexers/LexJSON.cxx new file mode 100644 index 000000000..9c044e52c --- /dev/null +++ b/lexers/LexJSON.cxx @@ -0,0 +1,497 @@ +// Scintilla source code edit control +/** + * @file LexJSON.cxx + * @date February 19, 2016 + * @brief Lexer for JSON and JSON-LD formats + * @author nkmathew + * + * The License.txt file describes the conditions under which this software may + * be distributed. + * + */ + +#include <cstdlib> +#include <cassert> +#include <cctype> +#include <cstdio> +#include <string> +#include <vector> +#include <map> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" +#include "WordList.h" +#include "LexAccessor.h" +#include "StyleContext.h" +#include "CharacterSet.h" +#include "LexerModule.h" +#include "OptionSet.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +static const char *const JSONWordListDesc[] = { + "JSON Keywords", + "JSON-LD Keywords", + 0 +}; + +/** + * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the + * colon separating the prefix and suffix + * + * https://www.w3.org/TR/json-ld/#dfn-compact-iri + */ +struct CompactIRI { + int colonCount; + bool foundInvalidChar; + CharacterSet setCompactIRI; + CompactIRI() { + colonCount = 0; + foundInvalidChar = false; + setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-"); + } + void resetState() { + colonCount = 0; + foundInvalidChar = false; + } + void checkChar(int ch) { + if (ch == ':') { + colonCount++; + } else { + foundInvalidChar |= !setCompactIRI.Contains(ch); + } + } + bool shouldHighlight() const { + return !foundInvalidChar && colonCount == 1; + } +}; + +/** + * Keeps track of escaped characters in strings as per: + * + * https://tools.ietf.org/html/rfc7159#section-7 + */ +struct EscapeSequence { + int digitsLeft; + CharacterSet setHexDigits; + CharacterSet setEscapeChars; + EscapeSequence() { + digitsLeft = 0; + setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef"); + setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/"); + } + // Returns true if the following character is a valid escaped character + bool newSequence(int nextChar) { + digitsLeft = 0; + if (nextChar == 'u') { + digitsLeft = 5; + } else if (!setEscapeChars.Contains(nextChar)) { + return false; + } + return true; + } + bool atEscapeEnd() const { + return digitsLeft <= 0; + } + bool isInvalidChar(int currChar) const { + return !setHexDigits.Contains(currChar); + } +}; + +struct OptionsJSON { + bool foldCompact; + bool fold; + bool allowComments; + bool escapeSequence; + OptionsJSON() { + foldCompact = false; + fold = false; + allowComments = false; + escapeSequence = false; + } +}; + +struct OptionSetJSON : public OptionSet<OptionsJSON> { + OptionSetJSON() { + DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence, + "Set to 1 to enable highlighting of escape sequences in strings"); + + DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments, + "Set to 1 to enable highlighting of line/block comments in JSON"); + + DefineProperty("fold.compact", &OptionsJSON::foldCompact); + DefineProperty("fold", &OptionsJSON::fold); + DefineWordListSets(JSONWordListDesc); + } +}; + +class LexerJSON : public ILexer { + OptionsJSON options; + OptionSetJSON optSetJSON; + EscapeSequence escapeSeq; + WordList keywordsJSON; + WordList keywordsJSONLD; + CharacterSet setOperators; + CharacterSet setURL; + CharacterSet setKeywordJSONLD; + CharacterSet setKeywordJSON; + CompactIRI compactIRI; + + static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) { + Sci_Position i = 0; + while (i < 50) { + i++; + char curr = styler.SafeGetCharAt(start+i, '\0'); + char next = styler.SafeGetCharAt(start+i+1, '\0'); + bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n'); + if (curr == ch) { + return true; + } else if (!isspacechar(curr) || atEOL) { + return false; + } + } + return false; + } + + /** + * Looks for the colon following the end quote + * + * Assumes property names of lengths no longer than a 100 characters. + * The colon is also expected to be less than 50 spaces after the end + * quote for the string to be considered a property name + */ + static bool AtPropertyName(LexAccessor &styler, Sci_Position start) { + Sci_Position i = 0; + bool escaped = false; + while (i < 100) { + i++; + char curr = styler.SafeGetCharAt(start+i, '\0'); + if (escaped) { + escaped = false; + continue; + } + escaped = curr == '\\'; + if (curr == '"') { + return IsNextNonWhitespace(styler, start+i, ':'); + } else if (!curr) { + return false; + } + } + return false; + } + + static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet, + StyleContext &context, LexAccessor &styler) { + char word[51]; + Sci_Position currPos = (Sci_Position) context.currentPos; + int i = 0; + while (i < 50) { + char ch = styler.SafeGetCharAt(currPos + i); + if (!wordSet.Contains(ch)) { + break; + } + word[i] = ch; + i++; + } + word[i] = '\0'; + return keywordList.InList(word); + } + + public: + LexerJSON() : + setOperators(CharacterSet::setNone, "[{}]:,"), + setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="), + setKeywordJSONLD(CharacterSet::setAlpha, ":@"), + setKeywordJSON(CharacterSet::setAlpha, "$_") { + } + virtual ~LexerJSON() {} + virtual int SCI_METHOD Version() const { + return lvOriginal; + } + virtual void SCI_METHOD Release() { + delete this; + } + virtual const char *SCI_METHOD PropertyNames() { + return optSetJSON.PropertyNames(); + } + virtual int SCI_METHOD PropertyType(const char *name) { + return optSetJSON.PropertyType(name); + } + virtual const char *SCI_METHOD DescribeProperty(const char *name) { + return optSetJSON.DescribeProperty(name); + } + virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) { + if (optSetJSON.PropertySet(&options, key, val)) { + return 0; + } + return -1; + } + virtual Sci_Position SCI_METHOD WordListSet(int n, const char *wl) { + WordList *wordListN = 0; + switch (n) { + case 0: + wordListN = &keywordsJSON; + break; + case 1: + wordListN = &keywordsJSONLD; + break; + } + Sci_Position firstModification = -1; + if (wordListN) { + WordList wlNew; + wlNew.Set(wl); + if (*wordListN != wlNew) { + wordListN->Set(wl); + firstModification = 0; + } + } + return firstModification; + } + virtual void *SCI_METHOD PrivateCall(int, void *) { + return 0; + } + static ILexer *LexerFactoryJSON() { + return new LexerJSON; + } + virtual const char *SCI_METHOD DescribeWordListSets() { + return optSetJSON.DescribeWordListSets(); + } + virtual void SCI_METHOD Lex(Sci_PositionU startPos, + Sci_Position length, + int initStyle, + IDocument *pAccess); + virtual void SCI_METHOD Fold(Sci_PositionU startPos, + Sci_Position length, + int initStyle, + IDocument *pAccess); +}; + +void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos, + Sci_Position length, + int initStyle, + IDocument *pAccess) { + LexAccessor styler(pAccess); + StyleContext context(startPos, length, initStyle, styler); + int stringStyleBefore = SCE_JSON_STRING; + while (context.More()) { + switch (context.state) { + case SCE_JSON_BLOCKCOMMENT: + if (context.Match("*/")) { + context.Forward(); + context.ForwardSetState(SCE_JSON_DEFAULT); + } + break; + case SCE_JSON_LINECOMMENT: + if (context.atLineEnd) { + context.SetState(SCE_JSON_DEFAULT); + } + break; + case SCE_JSON_STRINGEOL: + if (context.atLineStart) { + context.SetState(SCE_JSON_DEFAULT); + } + break; + case SCE_JSON_ESCAPESEQUENCE: + escapeSeq.digitsLeft--; + if (!escapeSeq.atEscapeEnd()) { + if (escapeSeq.isInvalidChar(context.ch)) { + context.SetState(SCE_JSON_ERROR); + } + break; + } + if (context.ch == '"') { + context.SetState(stringStyleBefore); + context.ForwardSetState(SCE_C_DEFAULT); + } else if (context.ch == '\\') { + if (!escapeSeq.newSequence(context.chNext)) { + context.SetState(SCE_JSON_ERROR); + } + context.Forward(); + } else { + context.SetState(stringStyleBefore); + if (context.atLineEnd) { + context.ChangeState(SCE_JSON_STRINGEOL); + } + } + break; + case SCE_JSON_PROPERTYNAME: + case SCE_JSON_STRING: + if (context.ch == '"') { + if (compactIRI.shouldHighlight()) { + context.ChangeState(SCE_JSON_COMPACTIRI); + context.ForwardSetState(SCE_JSON_DEFAULT); + compactIRI.resetState(); + } else { + context.ForwardSetState(SCE_JSON_DEFAULT); + } + } else if (context.atLineEnd) { + context.ChangeState(SCE_JSON_STRINGEOL); + } else if (context.ch == '\\') { + stringStyleBefore = context.state; + if (options.escapeSequence) { + context.SetState(SCE_JSON_ESCAPESEQUENCE); + if (!escapeSeq.newSequence(context.chNext)) { + context.SetState(SCE_JSON_ERROR); + } + } + context.Forward(); + } else if (context.Match("https://") || + context.Match("http://") || + context.Match("ssh://") || + context.Match("git://") || + context.Match("svn://") || + context.Match("ftp://") || + context.Match("mailto:")) { + // Handle most common URI schemes only + stringStyleBefore = context.state; + context.SetState(SCE_JSON_URI); + } else if (context.ch == '@') { + // https://www.w3.org/TR/json-ld/#dfn-keyword + if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) { + stringStyleBefore = context.state; + context.SetState(SCE_JSON_LDKEYWORD); + } + } else { + compactIRI.checkChar(context.ch); + } + break; + case SCE_JSON_LDKEYWORD: + case SCE_JSON_URI: + if ((!setKeywordJSONLD.Contains(context.ch) && + (context.state == SCE_JSON_LDKEYWORD)) || + (!setURL.Contains(context.ch))) { + context.SetState(stringStyleBefore); + } + if (context.ch == '"') { + context.ForwardSetState(SCE_JSON_DEFAULT); + } else if (context.atLineEnd) { + context.ChangeState(SCE_JSON_STRINGEOL); + } + break; + case SCE_JSON_OPERATOR: + case SCE_JSON_NUMBER: + context.SetState(SCE_JSON_DEFAULT); + break; + case SCE_JSON_ERROR: + if (context.atLineEnd) { + context.SetState(SCE_JSON_DEFAULT); + } + break; + case SCE_JSON_KEYWORD: + if (!setKeywordJSON.Contains(context.ch)) { + context.SetState(SCE_JSON_DEFAULT); + } + break; + } + if (context.state == SCE_JSON_DEFAULT) { + if (context.ch == '"') { + compactIRI.resetState(); + context.SetState(SCE_JSON_STRING); + Sci_Position currPos = static_cast<Sci_Position>(context.currentPos); + if (AtPropertyName(styler, currPos)) { + context.SetState(SCE_JSON_PROPERTYNAME); + } + } else if (setOperators.Contains(context.ch)) { + context.SetState(SCE_JSON_OPERATOR); + } else if (options.allowComments && context.Match("/*")) { + context.SetState(SCE_JSON_BLOCKCOMMENT); + context.Forward(); + } else if (options.allowComments && context.Match("//")) { + context.SetState(SCE_JSON_LINECOMMENT); + } else if (setKeywordJSON.Contains(context.ch)) { + if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) { + context.SetState(SCE_JSON_KEYWORD); + } + } + bool numberStart = + IsADigit(context.ch) && (context.chPrev == '+'|| + context.chPrev == '-' || + context.atLineStart || + IsASpace(context.chPrev) || + setOperators.Contains(context.chPrev)); + bool exponentPart = + tolower(context.ch) == 'e' && + IsADigit(context.chPrev) && + (IsADigit(context.chNext) || + context.chNext == '+' || + context.chNext == '-'); + bool signPart = + (context.ch == '-' || context.ch == '+') && + ((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) || + ((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev)) + && IsADigit(context.chNext))); + bool adjacentDigit = + IsADigit(context.ch) && IsADigit(context.chPrev); + bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e'; + bool dotPart = context.ch == '.' && + IsADigit(context.chPrev) && + IsADigit(context.chNext); + bool afterDot = IsADigit(context.ch) && context.chPrev == '.'; + if (numberStart || + exponentPart || + signPart || + adjacentDigit || + dotPart || + afterExponent || + afterDot) { + context.SetState(SCE_JSON_NUMBER); + } else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) { + context.SetState(SCE_JSON_ERROR); + } + } + context.Forward(); + } + context.Complete(); +} + +void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos, + Sci_Position length, + int, + IDocument *pAccess) { + if (!options.fold) { + return; + } + LexAccessor styler(pAccess); + Sci_PositionU currLine = styler.GetLine(startPos); + Sci_PositionU endPos = startPos + length; + int currLevel = styler.LevelAt(currLine) & SC_FOLDLEVELNUMBERMASK; + int nextLevel = currLevel; + int visibleChars = 0; + for (Sci_PositionU i = startPos; i < endPos; i++) { + char curr = styler.SafeGetCharAt(i); + char next = styler.SafeGetCharAt(i+1); + bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n'); + if (styler.StyleAt(i) == SCE_JSON_OPERATOR) { + if (curr == '{' || curr == '[') { + nextLevel++; + } else if (curr == '}' || curr == ']') { + nextLevel--; + } + } + if (atEOL || i == (endPos-1)) { + int level = currLevel; + if (!visibleChars && options.foldCompact) { + level |= SC_FOLDLEVELWHITEFLAG; + } else if (nextLevel > currLevel) { + level |= SC_FOLDLEVELHEADERFLAG; + } + if (level != styler.LevelAt(currLine)) { + styler.SetLevel(currLine, level); + } + currLine++; + currLevel = nextLevel; + visibleChars = 0; + } + if (!isspacechar(curr)) { + visibleChars++; + } + } +} + +LexerModule lmJSON(SCLEX_JSON, + LexerJSON::LexerFactoryJSON, + "json", + JSONWordListDesc); diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx index ed47aa8b7..e6aa2587e 100644 --- a/src/Catalogue.cxx +++ b/src/Catalogue.cxx @@ -126,6 +126,7 @@ int Scintilla_LinkLexers() { LINK_LEXER(lmHTML); LINK_LEXER(lmIHex); LINK_LEXER(lmInno); + LINK_LEXER(lmJSON); LINK_LEXER(lmKix); LINK_LEXER(lmKVIrc); LINK_LEXER(lmLatex); diff --git a/win32/scintilla.mak b/win32/scintilla.mak index e0d16f387..ad72c51c7 100644 --- a/win32/scintilla.mak +++ b/win32/scintilla.mak @@ -149,6 +149,7 @@ LEXOBJS=\ $(DIR_O)\LexHex.obj \ $(DIR_O)\LexHTML.obj \ $(DIR_O)\LexInno.obj \ + $(DIR_O)\LexJSON.obj \ $(DIR_O)\LexKix.obj \ $(DIR_O)\LexKVIrc.obj \ $(DIR_O)\LexLaTeX.obj \ @@ -581,6 +582,8 @@ $(DIR_O)\LexHTML.obj: ..\lexers\LexHTML.cxx $(LEX_HEADERS) $(DIR_O)\LexInno.obj: ..\lexers\LexInno.cxx $(LEX_HEADERS) +$(DIR_O)\LexJSON.obj: ..\lexers\LexJSON.cxx $(LEX_HEADERS) + $(DIR_O)\LexKix.obj: ..\lexers\LexKix.cxx $(LEX_HEADERS) $(DIR_O)\LexKVIrc.obj: ..\lexers\LexKVIrc.cxx $(LEX_HEADERS) |