aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--doc/ScintillaHistory.html6
-rw-r--r--include/SciLexer.h15
-rw-r--r--include/Scintilla.iface17
-rw-r--r--lexers/LexJSON.cxx497
-rw-r--r--src/Catalogue.cxx1
-rw-r--r--win32/scintilla.mak3
6 files changed, 538 insertions, 1 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index d1331ab10..50029dd8f 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -500,7 +500,11 @@
</h3>
<ul>
<li>
- Released 18 May 2016.
+ Released 13 March 2016.
+ </li>
+ <li>
+ JSON lexer added.
+ <a href="http://sourceforge.net/p/scintilla/feature-requests/1140/">Feature #1140.</a>
</li>
<li>
The Python lexer treats '@' as an operator except when it is the first visible character on a line.
diff --git a/include/SciLexer.h b/include/SciLexer.h
index 2103b47b9..6b98b09fc 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -132,6 +132,7 @@
#define SCLEX_SREC 117
#define SCLEX_IHEX 118
#define SCLEX_TEHEX 119
+#define SCLEX_JSON 120
#define SCLEX_AUTOMATIC 1000
#define SCE_P_DEFAULT 0
#define SCE_P_COMMENTLINE 1
@@ -1787,6 +1788,20 @@
#define SCE_HEX_CHECKSUM 16
#define SCE_HEX_CHECKSUM_WRONG 17
#define SCE_HEX_GARBAGE 18
+#define SCE_JSON_DEFAULT 0
+#define SCE_JSON_NUMBER 1
+#define SCE_JSON_STRING 2
+#define SCE_JSON_STRINGEOL 3
+#define SCE_JSON_PROPERTYNAME 4
+#define SCE_JSON_ESCAPESEQUENCE 5
+#define SCE_JSON_LINECOMMENT 6
+#define SCE_JSON_BLOCKCOMMENT 7
+#define SCE_JSON_OPERATOR 8
+#define SCE_JSON_URI 9
+#define SCE_JSON_COMPACTIRI 10
+#define SCE_JSON_KEYWORD 11
+#define SCE_JSON_LDKEYWORD 12
+#define SCE_JSON_ERROR 13
/* --Autogenerated -- end of section automatically generated from Scintilla.iface */
#endif
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index 91b3066b5..71defe09d 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -2799,6 +2799,7 @@ val SCLEX_BIBTEX=116
val SCLEX_SREC=117
val SCLEX_IHEX=118
val SCLEX_TEHEX=119
+val SCLEX_JSON=120
# When a lexer specifies its language as SCLEX_AUTOMATIC it receives a
# value assigned in sequence from SCLEX_AUTOMATIC+1.
@@ -4677,6 +4678,22 @@ val SCE_HEX_GARBAGE=18
lex IHex=SCLEX_IHEX SCE_HEX_
# Lexical state for SCLEX_TEHEX (shared with Srec)
lex TEHex=SCLEX_TEHEX SCE_HEX_
+# Lexical states for SCLEX_JSON
+lex JSON=SCLEX_JSON SCE_JSON_
+val SCE_JSON_DEFAULT=0
+val SCE_JSON_NUMBER=1
+val SCE_JSON_STRING=2
+val SCE_JSON_STRINGEOL=3
+val SCE_JSON_PROPERTYNAME=4
+val SCE_JSON_ESCAPESEQUENCE=5
+val SCE_JSON_LINECOMMENT=6
+val SCE_JSON_BLOCKCOMMENT=7
+val SCE_JSON_OPERATOR=8
+val SCE_JSON_URI=9
+val SCE_JSON_COMPACTIRI=10
+val SCE_JSON_KEYWORD=11
+val SCE_JSON_LDKEYWORD=12
+val SCE_JSON_ERROR=13
# Events
diff --git a/lexers/LexJSON.cxx b/lexers/LexJSON.cxx
new file mode 100644
index 000000000..9c044e52c
--- /dev/null
+++ b/lexers/LexJSON.cxx
@@ -0,0 +1,497 @@
+// Scintilla source code edit control
+/**
+ * @file LexJSON.cxx
+ * @date February 19, 2016
+ * @brief Lexer for JSON and JSON-LD formats
+ * @author nkmathew
+ *
+ * The License.txt file describes the conditions under which this software may
+ * be distributed.
+ *
+ */
+
+#include <cstdlib>
+#include <cassert>
+#include <cctype>
+#include <cstdio>
+#include <string>
+#include <vector>
+#include <map>
+
+#include "ILexer.h"
+#include "Scintilla.h"
+#include "SciLexer.h"
+#include "WordList.h"
+#include "LexAccessor.h"
+#include "StyleContext.h"
+#include "CharacterSet.h"
+#include "LexerModule.h"
+#include "OptionSet.h"
+
+#ifdef SCI_NAMESPACE
+using namespace Scintilla;
+#endif
+
+static const char *const JSONWordListDesc[] = {
+ "JSON Keywords",
+ "JSON-LD Keywords",
+ 0
+};
+
+/**
+ * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
+ * colon separating the prefix and suffix
+ *
+ * https://www.w3.org/TR/json-ld/#dfn-compact-iri
+ */
+struct CompactIRI {
+ int colonCount;
+ bool foundInvalidChar;
+ CharacterSet setCompactIRI;
+ CompactIRI() {
+ colonCount = 0;
+ foundInvalidChar = false;
+ setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-");
+ }
+ void resetState() {
+ colonCount = 0;
+ foundInvalidChar = false;
+ }
+ void checkChar(int ch) {
+ if (ch == ':') {
+ colonCount++;
+ } else {
+ foundInvalidChar |= !setCompactIRI.Contains(ch);
+ }
+ }
+ bool shouldHighlight() const {
+ return !foundInvalidChar && colonCount == 1;
+ }
+};
+
+/**
+ * Keeps track of escaped characters in strings as per:
+ *
+ * https://tools.ietf.org/html/rfc7159#section-7
+ */
+struct EscapeSequence {
+ int digitsLeft;
+ CharacterSet setHexDigits;
+ CharacterSet setEscapeChars;
+ EscapeSequence() {
+ digitsLeft = 0;
+ setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
+ setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/");
+ }
+ // Returns true if the following character is a valid escaped character
+ bool newSequence(int nextChar) {
+ digitsLeft = 0;
+ if (nextChar == 'u') {
+ digitsLeft = 5;
+ } else if (!setEscapeChars.Contains(nextChar)) {
+ return false;
+ }
+ return true;
+ }
+ bool atEscapeEnd() const {
+ return digitsLeft <= 0;
+ }
+ bool isInvalidChar(int currChar) const {
+ return !setHexDigits.Contains(currChar);
+ }
+};
+
+struct OptionsJSON {
+ bool foldCompact;
+ bool fold;
+ bool allowComments;
+ bool escapeSequence;
+ OptionsJSON() {
+ foldCompact = false;
+ fold = false;
+ allowComments = false;
+ escapeSequence = false;
+ }
+};
+
+struct OptionSetJSON : public OptionSet<OptionsJSON> {
+ OptionSetJSON() {
+ DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence,
+ "Set to 1 to enable highlighting of escape sequences in strings");
+
+ DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments,
+ "Set to 1 to enable highlighting of line/block comments in JSON");
+
+ DefineProperty("fold.compact", &OptionsJSON::foldCompact);
+ DefineProperty("fold", &OptionsJSON::fold);
+ DefineWordListSets(JSONWordListDesc);
+ }
+};
+
+class LexerJSON : public ILexer {
+ OptionsJSON options;
+ OptionSetJSON optSetJSON;
+ EscapeSequence escapeSeq;
+ WordList keywordsJSON;
+ WordList keywordsJSONLD;
+ CharacterSet setOperators;
+ CharacterSet setURL;
+ CharacterSet setKeywordJSONLD;
+ CharacterSet setKeywordJSON;
+ CompactIRI compactIRI;
+
+ static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
+ Sci_Position i = 0;
+ while (i < 50) {
+ i++;
+ char curr = styler.SafeGetCharAt(start+i, '\0');
+ char next = styler.SafeGetCharAt(start+i+1, '\0');
+ bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
+ if (curr == ch) {
+ return true;
+ } else if (!isspacechar(curr) || atEOL) {
+ return false;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Looks for the colon following the end quote
+ *
+ * Assumes property names of lengths no longer than a 100 characters.
+ * The colon is also expected to be less than 50 spaces after the end
+ * quote for the string to be considered a property name
+ */
+ static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
+ Sci_Position i = 0;
+ bool escaped = false;
+ while (i < 100) {
+ i++;
+ char curr = styler.SafeGetCharAt(start+i, '\0');
+ if (escaped) {
+ escaped = false;
+ continue;
+ }
+ escaped = curr == '\\';
+ if (curr == '"') {
+ return IsNextNonWhitespace(styler, start+i, ':');
+ } else if (!curr) {
+ return false;
+ }
+ }
+ return false;
+ }
+
+ static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
+ StyleContext &context, LexAccessor &styler) {
+ char word[51];
+ Sci_Position currPos = (Sci_Position) context.currentPos;
+ int i = 0;
+ while (i < 50) {
+ char ch = styler.SafeGetCharAt(currPos + i);
+ if (!wordSet.Contains(ch)) {
+ break;
+ }
+ word[i] = ch;
+ i++;
+ }
+ word[i] = '\0';
+ return keywordList.InList(word);
+ }
+
+ public:
+ LexerJSON() :
+ setOperators(CharacterSet::setNone, "[{}]:,"),
+ setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="),
+ setKeywordJSONLD(CharacterSet::setAlpha, ":@"),
+ setKeywordJSON(CharacterSet::setAlpha, "$_") {
+ }
+ virtual ~LexerJSON() {}
+ virtual int SCI_METHOD Version() const {
+ return lvOriginal;
+ }
+ virtual void SCI_METHOD Release() {
+ delete this;
+ }
+ virtual const char *SCI_METHOD PropertyNames() {
+ return optSetJSON.PropertyNames();
+ }
+ virtual int SCI_METHOD PropertyType(const char *name) {
+ return optSetJSON.PropertyType(name);
+ }
+ virtual const char *SCI_METHOD DescribeProperty(const char *name) {
+ return optSetJSON.DescribeProperty(name);
+ }
+ virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) {
+ if (optSetJSON.PropertySet(&options, key, val)) {
+ return 0;
+ }
+ return -1;
+ }
+ virtual Sci_Position SCI_METHOD WordListSet(int n, const char *wl) {
+ WordList *wordListN = 0;
+ switch (n) {
+ case 0:
+ wordListN = &keywordsJSON;
+ break;
+ case 1:
+ wordListN = &keywordsJSONLD;
+ break;
+ }
+ Sci_Position firstModification = -1;
+ if (wordListN) {
+ WordList wlNew;
+ wlNew.Set(wl);
+ if (*wordListN != wlNew) {
+ wordListN->Set(wl);
+ firstModification = 0;
+ }
+ }
+ return firstModification;
+ }
+ virtual void *SCI_METHOD PrivateCall(int, void *) {
+ return 0;
+ }
+ static ILexer *LexerFactoryJSON() {
+ return new LexerJSON;
+ }
+ virtual const char *SCI_METHOD DescribeWordListSets() {
+ return optSetJSON.DescribeWordListSets();
+ }
+ virtual void SCI_METHOD Lex(Sci_PositionU startPos,
+ Sci_Position length,
+ int initStyle,
+ IDocument *pAccess);
+ virtual void SCI_METHOD Fold(Sci_PositionU startPos,
+ Sci_Position length,
+ int initStyle,
+ IDocument *pAccess);
+};
+
+void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
+ Sci_Position length,
+ int initStyle,
+ IDocument *pAccess) {
+ LexAccessor styler(pAccess);
+ StyleContext context(startPos, length, initStyle, styler);
+ int stringStyleBefore = SCE_JSON_STRING;
+ while (context.More()) {
+ switch (context.state) {
+ case SCE_JSON_BLOCKCOMMENT:
+ if (context.Match("*/")) {
+ context.Forward();
+ context.ForwardSetState(SCE_JSON_DEFAULT);
+ }
+ break;
+ case SCE_JSON_LINECOMMENT:
+ if (context.atLineEnd) {
+ context.SetState(SCE_JSON_DEFAULT);
+ }
+ break;
+ case SCE_JSON_STRINGEOL:
+ if (context.atLineStart) {
+ context.SetState(SCE_JSON_DEFAULT);
+ }
+ break;
+ case SCE_JSON_ESCAPESEQUENCE:
+ escapeSeq.digitsLeft--;
+ if (!escapeSeq.atEscapeEnd()) {
+ if (escapeSeq.isInvalidChar(context.ch)) {
+ context.SetState(SCE_JSON_ERROR);
+ }
+ break;
+ }
+ if (context.ch == '"') {
+ context.SetState(stringStyleBefore);
+ context.ForwardSetState(SCE_C_DEFAULT);
+ } else if (context.ch == '\\') {
+ if (!escapeSeq.newSequence(context.chNext)) {
+ context.SetState(SCE_JSON_ERROR);
+ }
+ context.Forward();
+ } else {
+ context.SetState(stringStyleBefore);
+ if (context.atLineEnd) {
+ context.ChangeState(SCE_JSON_STRINGEOL);
+ }
+ }
+ break;
+ case SCE_JSON_PROPERTYNAME:
+ case SCE_JSON_STRING:
+ if (context.ch == '"') {
+ if (compactIRI.shouldHighlight()) {
+ context.ChangeState(SCE_JSON_COMPACTIRI);
+ context.ForwardSetState(SCE_JSON_DEFAULT);
+ compactIRI.resetState();
+ } else {
+ context.ForwardSetState(SCE_JSON_DEFAULT);
+ }
+ } else if (context.atLineEnd) {
+ context.ChangeState(SCE_JSON_STRINGEOL);
+ } else if (context.ch == '\\') {
+ stringStyleBefore = context.state;
+ if (options.escapeSequence) {
+ context.SetState(SCE_JSON_ESCAPESEQUENCE);
+ if (!escapeSeq.newSequence(context.chNext)) {
+ context.SetState(SCE_JSON_ERROR);
+ }
+ }
+ context.Forward();
+ } else if (context.Match("https://") ||
+ context.Match("http://") ||
+ context.Match("ssh://") ||
+ context.Match("git://") ||
+ context.Match("svn://") ||
+ context.Match("ftp://") ||
+ context.Match("mailto:")) {
+ // Handle most common URI schemes only
+ stringStyleBefore = context.state;
+ context.SetState(SCE_JSON_URI);
+ } else if (context.ch == '@') {
+ // https://www.w3.org/TR/json-ld/#dfn-keyword
+ if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) {
+ stringStyleBefore = context.state;
+ context.SetState(SCE_JSON_LDKEYWORD);
+ }
+ } else {
+ compactIRI.checkChar(context.ch);
+ }
+ break;
+ case SCE_JSON_LDKEYWORD:
+ case SCE_JSON_URI:
+ if ((!setKeywordJSONLD.Contains(context.ch) &&
+ (context.state == SCE_JSON_LDKEYWORD)) ||
+ (!setURL.Contains(context.ch))) {
+ context.SetState(stringStyleBefore);
+ }
+ if (context.ch == '"') {
+ context.ForwardSetState(SCE_JSON_DEFAULT);
+ } else if (context.atLineEnd) {
+ context.ChangeState(SCE_JSON_STRINGEOL);
+ }
+ break;
+ case SCE_JSON_OPERATOR:
+ case SCE_JSON_NUMBER:
+ context.SetState(SCE_JSON_DEFAULT);
+ break;
+ case SCE_JSON_ERROR:
+ if (context.atLineEnd) {
+ context.SetState(SCE_JSON_DEFAULT);
+ }
+ break;
+ case SCE_JSON_KEYWORD:
+ if (!setKeywordJSON.Contains(context.ch)) {
+ context.SetState(SCE_JSON_DEFAULT);
+ }
+ break;
+ }
+ if (context.state == SCE_JSON_DEFAULT) {
+ if (context.ch == '"') {
+ compactIRI.resetState();
+ context.SetState(SCE_JSON_STRING);
+ Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
+ if (AtPropertyName(styler, currPos)) {
+ context.SetState(SCE_JSON_PROPERTYNAME);
+ }
+ } else if (setOperators.Contains(context.ch)) {
+ context.SetState(SCE_JSON_OPERATOR);
+ } else if (options.allowComments && context.Match("/*")) {
+ context.SetState(SCE_JSON_BLOCKCOMMENT);
+ context.Forward();
+ } else if (options.allowComments && context.Match("//")) {
+ context.SetState(SCE_JSON_LINECOMMENT);
+ } else if (setKeywordJSON.Contains(context.ch)) {
+ if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) {
+ context.SetState(SCE_JSON_KEYWORD);
+ }
+ }
+ bool numberStart =
+ IsADigit(context.ch) && (context.chPrev == '+'||
+ context.chPrev == '-' ||
+ context.atLineStart ||
+ IsASpace(context.chPrev) ||
+ setOperators.Contains(context.chPrev));
+ bool exponentPart =
+ tolower(context.ch) == 'e' &&
+ IsADigit(context.chPrev) &&
+ (IsADigit(context.chNext) ||
+ context.chNext == '+' ||
+ context.chNext == '-');
+ bool signPart =
+ (context.ch == '-' || context.ch == '+') &&
+ ((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
+ ((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev))
+ && IsADigit(context.chNext)));
+ bool adjacentDigit =
+ IsADigit(context.ch) && IsADigit(context.chPrev);
+ bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
+ bool dotPart = context.ch == '.' &&
+ IsADigit(context.chPrev) &&
+ IsADigit(context.chNext);
+ bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
+ if (numberStart ||
+ exponentPart ||
+ signPart ||
+ adjacentDigit ||
+ dotPart ||
+ afterExponent ||
+ afterDot) {
+ context.SetState(SCE_JSON_NUMBER);
+ } else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
+ context.SetState(SCE_JSON_ERROR);
+ }
+ }
+ context.Forward();
+ }
+ context.Complete();
+}
+
+void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos,
+ Sci_Position length,
+ int,
+ IDocument *pAccess) {
+ if (!options.fold) {
+ return;
+ }
+ LexAccessor styler(pAccess);
+ Sci_PositionU currLine = styler.GetLine(startPos);
+ Sci_PositionU endPos = startPos + length;
+ int currLevel = styler.LevelAt(currLine) & SC_FOLDLEVELNUMBERMASK;
+ int nextLevel = currLevel;
+ int visibleChars = 0;
+ for (Sci_PositionU i = startPos; i < endPos; i++) {
+ char curr = styler.SafeGetCharAt(i);
+ char next = styler.SafeGetCharAt(i+1);
+ bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
+ if (styler.StyleAt(i) == SCE_JSON_OPERATOR) {
+ if (curr == '{' || curr == '[') {
+ nextLevel++;
+ } else if (curr == '}' || curr == ']') {
+ nextLevel--;
+ }
+ }
+ if (atEOL || i == (endPos-1)) {
+ int level = currLevel;
+ if (!visibleChars && options.foldCompact) {
+ level |= SC_FOLDLEVELWHITEFLAG;
+ } else if (nextLevel > currLevel) {
+ level |= SC_FOLDLEVELHEADERFLAG;
+ }
+ if (level != styler.LevelAt(currLine)) {
+ styler.SetLevel(currLine, level);
+ }
+ currLine++;
+ currLevel = nextLevel;
+ visibleChars = 0;
+ }
+ if (!isspacechar(curr)) {
+ visibleChars++;
+ }
+ }
+}
+
+LexerModule lmJSON(SCLEX_JSON,
+ LexerJSON::LexerFactoryJSON,
+ "json",
+ JSONWordListDesc);
diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx
index ed47aa8b7..e6aa2587e 100644
--- a/src/Catalogue.cxx
+++ b/src/Catalogue.cxx
@@ -126,6 +126,7 @@ int Scintilla_LinkLexers() {
LINK_LEXER(lmHTML);
LINK_LEXER(lmIHex);
LINK_LEXER(lmInno);
+ LINK_LEXER(lmJSON);
LINK_LEXER(lmKix);
LINK_LEXER(lmKVIrc);
LINK_LEXER(lmLatex);
diff --git a/win32/scintilla.mak b/win32/scintilla.mak
index e0d16f387..ad72c51c7 100644
--- a/win32/scintilla.mak
+++ b/win32/scintilla.mak
@@ -149,6 +149,7 @@ LEXOBJS=\
$(DIR_O)\LexHex.obj \
$(DIR_O)\LexHTML.obj \
$(DIR_O)\LexInno.obj \
+ $(DIR_O)\LexJSON.obj \
$(DIR_O)\LexKix.obj \
$(DIR_O)\LexKVIrc.obj \
$(DIR_O)\LexLaTeX.obj \
@@ -581,6 +582,8 @@ $(DIR_O)\LexHTML.obj: ..\lexers\LexHTML.cxx $(LEX_HEADERS)
$(DIR_O)\LexInno.obj: ..\lexers\LexInno.cxx $(LEX_HEADERS)
+$(DIR_O)\LexJSON.obj: ..\lexers\LexJSON.cxx $(LEX_HEADERS)
+
$(DIR_O)\LexKix.obj: ..\lexers\LexKix.cxx $(LEX_HEADERS)
$(DIR_O)\LexKVIrc.obj: ..\lexers\LexKVIrc.cxx $(LEX_HEADERS)