diff options
Diffstat (limited to 'lexers/LexJSON.cxx')
| -rw-r--r-- | lexers/LexJSON.cxx | 502 | 
1 files changed, 0 insertions, 502 deletions
| diff --git a/lexers/LexJSON.cxx b/lexers/LexJSON.cxx deleted file mode 100644 index 3712e931f..000000000 --- a/lexers/LexJSON.cxx +++ /dev/null @@ -1,502 +0,0 @@ -// Scintilla source code edit control -/** - * @file LexJSON.cxx - * @date February 19, 2016 - * @brief Lexer for JSON and JSON-LD formats - * @author nkmathew - * - * The License.txt file describes the conditions under which this software may - * be distributed. - * - */ - -#include <cstdlib> -#include <cassert> -#include <cctype> -#include <cstdio> -#include <string> -#include <vector> -#include <map> - -#include "ILexer.h" -#include "Scintilla.h" -#include "SciLexer.h" -#include "WordList.h" -#include "LexAccessor.h" -#include "StyleContext.h" -#include "CharacterSet.h" -#include "LexerModule.h" -#include "OptionSet.h" -#include "DefaultLexer.h" - -using namespace Scintilla; - -static const char *const JSONWordListDesc[] = { -	"JSON Keywords", -	"JSON-LD Keywords", -	0 -}; - -/** - * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the - * colon separating the prefix and suffix - * - * https://www.w3.org/TR/json-ld/#dfn-compact-iri - */ -struct CompactIRI { -	int colonCount; -	bool foundInvalidChar; -	CharacterSet setCompactIRI; -	CompactIRI() { -		colonCount = 0; -		foundInvalidChar = false; -		setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-"); -	} -	void resetState() { -		colonCount = 0; -		foundInvalidChar = false; -	} -	void checkChar(int ch) { -		if (ch == ':') { -			colonCount++; -		} else { -			foundInvalidChar |= !setCompactIRI.Contains(ch); -		} -	} -	bool shouldHighlight() const { -		return !foundInvalidChar && colonCount == 1; -	} -}; - -/** - * Keeps track of escaped characters in strings as per: - * - * https://tools.ietf.org/html/rfc7159#section-7 - */ -struct EscapeSequence { -	int digitsLeft; -	CharacterSet setHexDigits; -	CharacterSet setEscapeChars; -	EscapeSequence() { -		digitsLeft = 0; -		setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef"); -		setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/"); -	} -	// Returns true if the following character is a valid escaped character -	bool newSequence(int nextChar) { -		digitsLeft = 0; -		if (nextChar == 'u') { -			digitsLeft = 5; -		} else if (!setEscapeChars.Contains(nextChar)) { -			return false; -		} -		return true; -	} -	bool atEscapeEnd() const { -		return digitsLeft <= 0; -	} -	bool isInvalidChar(int currChar) const { -		return !setHexDigits.Contains(currChar); -	} -}; - -struct OptionsJSON { -	bool foldCompact; -	bool fold; -	bool allowComments; -	bool escapeSequence; -	OptionsJSON() { -		foldCompact = false; -		fold = false; -		allowComments = false; -		escapeSequence = false; -	} -}; - -struct OptionSetJSON : public OptionSet<OptionsJSON> { -	OptionSetJSON() { -		DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence, -					   "Set to 1 to enable highlighting of escape sequences in strings"); - -		DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments, -					   "Set to 1 to enable highlighting of line/block comments in JSON"); - -		DefineProperty("fold.compact", &OptionsJSON::foldCompact); -		DefineProperty("fold", &OptionsJSON::fold); -		DefineWordListSets(JSONWordListDesc); -	} -}; - -class LexerJSON : public DefaultLexer { -	OptionsJSON options; -	OptionSetJSON optSetJSON; -	EscapeSequence escapeSeq; -	WordList keywordsJSON; -	WordList keywordsJSONLD; -	CharacterSet setOperators; -	CharacterSet setURL; -	CharacterSet setKeywordJSONLD; -	CharacterSet setKeywordJSON; -	CompactIRI compactIRI; - -	static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) { -		Sci_Position i = 0; -		while (i < 50) { -			i++; -			char curr = styler.SafeGetCharAt(start+i, '\0'); -			char next = styler.SafeGetCharAt(start+i+1, '\0'); -			bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n'); -			if (curr == ch) { -				return true; -			} else if (!isspacechar(curr) || atEOL) { -				return false; -			} -		} -		return false; -	} - -	/** -	 * Looks for the colon following the end quote -	 * -	 * Assumes property names of lengths no longer than a 100 characters. -	 * The colon is also expected to be less than 50 spaces after the end -	 * quote for the string to be considered a property name -	 */ -	static bool AtPropertyName(LexAccessor &styler, Sci_Position start) { -		Sci_Position i = 0; -		bool escaped = false; -		while (i < 100) { -			i++; -			char curr = styler.SafeGetCharAt(start+i, '\0'); -			if (escaped) { -				escaped = false; -				continue; -			} -			escaped = curr == '\\'; -			if (curr == '"') { -				return IsNextNonWhitespace(styler, start+i, ':'); -			} else if (!curr) { -				return false; -			} -		} -		return false; -	} - -	static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet, -								 StyleContext &context, LexAccessor &styler) { -		char word[51]; -		Sci_Position currPos = (Sci_Position) context.currentPos; -		int i = 0; -		while (i < 50) { -			char ch = styler.SafeGetCharAt(currPos + i); -			if (!wordSet.Contains(ch)) { -				break; -			} -			word[i] = ch; -			i++; -		} -		word[i] = '\0'; -		return keywordList.InList(word); -	} - -	public: -	LexerJSON() : -		DefaultLexer("json", SCLEX_JSON), -		setOperators(CharacterSet::setNone, "[{}]:,"), -		setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="), -		setKeywordJSONLD(CharacterSet::setAlpha, ":@"), -		setKeywordJSON(CharacterSet::setAlpha, "$_") { -	} -	virtual ~LexerJSON() {} -	int SCI_METHOD Version() const override { -		return lvRelease5; -	} -	void SCI_METHOD Release() override { -		delete this; -	} -	const char *SCI_METHOD PropertyNames() override { -		return optSetJSON.PropertyNames(); -	} -	int SCI_METHOD PropertyType(const char *name) override { -		return optSetJSON.PropertyType(name); -	} -	const char *SCI_METHOD DescribeProperty(const char *name) override { -		return optSetJSON.DescribeProperty(name); -	} -	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override { -		if (optSetJSON.PropertySet(&options, key, val)) { -			return 0; -		} -		return -1; -	} -	const char * SCI_METHOD PropertyGet(const char *key) override { -		return optSetJSON.PropertyGet(key); -	} -	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override { -		WordList *wordListN = 0; -		switch (n) { -			case 0: -				wordListN = &keywordsJSON; -				break; -			case 1: -				wordListN = &keywordsJSONLD; -				break; -		} -		Sci_Position firstModification = -1; -		if (wordListN) { -			WordList wlNew; -			wlNew.Set(wl); -			if (*wordListN != wlNew) { -				wordListN->Set(wl); -				firstModification = 0; -			} -		} -		return firstModification; -	} -	void *SCI_METHOD PrivateCall(int, void *) override { -		return 0; -	} -	static ILexer5 *LexerFactoryJSON() { -		return new LexerJSON; -	} -	const char *SCI_METHOD DescribeWordListSets() override { -		return optSetJSON.DescribeWordListSets(); -	} -	void SCI_METHOD Lex(Sci_PositionU startPos, -								Sci_Position length, -								int initStyle, -								IDocument *pAccess) override; -	void SCI_METHOD Fold(Sci_PositionU startPos, -								 Sci_Position length, -								 int initStyle, -								 IDocument *pAccess) override; -}; - -void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos, -							   Sci_Position length, -							   int initStyle, -							   IDocument *pAccess) { -	LexAccessor styler(pAccess); -	StyleContext context(startPos, length, initStyle, styler); -	int stringStyleBefore = SCE_JSON_STRING; -	while (context.More()) { -		switch (context.state) { -			case SCE_JSON_BLOCKCOMMENT: -				if (context.Match("*/")) { -					context.Forward(); -					context.ForwardSetState(SCE_JSON_DEFAULT); -				} -				break; -			case SCE_JSON_LINECOMMENT: -				if (context.atLineEnd) { -					context.SetState(SCE_JSON_DEFAULT); -				} -				break; -			case SCE_JSON_STRINGEOL: -				if (context.atLineStart) { -					context.SetState(SCE_JSON_DEFAULT); -				} -				break; -			case SCE_JSON_ESCAPESEQUENCE: -				escapeSeq.digitsLeft--; -				if (!escapeSeq.atEscapeEnd()) { -					if (escapeSeq.isInvalidChar(context.ch)) { -						context.SetState(SCE_JSON_ERROR); -					} -					break; -				} -				if (context.ch == '"') { -					context.SetState(stringStyleBefore); -					context.ForwardSetState(SCE_C_DEFAULT); -				} else if (context.ch == '\\') { -					if (!escapeSeq.newSequence(context.chNext)) { -						context.SetState(SCE_JSON_ERROR); -					} -					context.Forward(); -				} else { -					context.SetState(stringStyleBefore); -					if (context.atLineEnd) { -						context.ChangeState(SCE_JSON_STRINGEOL); -					} -				} -				break; -			case SCE_JSON_PROPERTYNAME: -			case SCE_JSON_STRING: -				if (context.ch == '"') { -					if (compactIRI.shouldHighlight()) { -						context.ChangeState(SCE_JSON_COMPACTIRI); -						context.ForwardSetState(SCE_JSON_DEFAULT); -						compactIRI.resetState(); -					} else { -						context.ForwardSetState(SCE_JSON_DEFAULT); -					} -				} else if (context.atLineEnd) { -					context.ChangeState(SCE_JSON_STRINGEOL); -				} else if (context.ch == '\\') { -					stringStyleBefore = context.state; -					if (options.escapeSequence) { -						context.SetState(SCE_JSON_ESCAPESEQUENCE); -						if (!escapeSeq.newSequence(context.chNext)) { -							context.SetState(SCE_JSON_ERROR); -						} -					} -					context.Forward(); -				} else if (context.Match("https://") || -						   context.Match("http://") || -						   context.Match("ssh://") || -						   context.Match("git://") || -						   context.Match("svn://") || -						   context.Match("ftp://") || -						   context.Match("mailto:")) { -					// Handle most common URI schemes only -					stringStyleBefore = context.state; -					context.SetState(SCE_JSON_URI); -				} else if (context.ch == '@') { -					// https://www.w3.org/TR/json-ld/#dfn-keyword -					if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) { -						stringStyleBefore = context.state; -						context.SetState(SCE_JSON_LDKEYWORD); -					} -				} else { -					compactIRI.checkChar(context.ch); -				} -				break; -			case SCE_JSON_LDKEYWORD: -			case SCE_JSON_URI: -				if ((!setKeywordJSONLD.Contains(context.ch) && -					 (context.state == SCE_JSON_LDKEYWORD)) || -					(!setURL.Contains(context.ch))) { -					context.SetState(stringStyleBefore); -				} -				if (context.ch == '"') { -					context.ForwardSetState(SCE_JSON_DEFAULT); -				} else if (context.atLineEnd) { -					context.ChangeState(SCE_JSON_STRINGEOL); -				} -				break; -			case SCE_JSON_OPERATOR: -			case SCE_JSON_NUMBER: -				context.SetState(SCE_JSON_DEFAULT); -				break; -			case SCE_JSON_ERROR: -				if (context.atLineEnd) { -					context.SetState(SCE_JSON_DEFAULT); -				} -				break; -			case SCE_JSON_KEYWORD: -				if (!setKeywordJSON.Contains(context.ch)) { -					context.SetState(SCE_JSON_DEFAULT); -				} -				break; -		} -		if (context.state == SCE_JSON_DEFAULT) { -			if (context.ch == '"') { -				compactIRI.resetState(); -				context.SetState(SCE_JSON_STRING); -				Sci_Position currPos = static_cast<Sci_Position>(context.currentPos); -				if (AtPropertyName(styler, currPos)) { -					context.SetState(SCE_JSON_PROPERTYNAME); -				} -			} else if (setOperators.Contains(context.ch)) { -				context.SetState(SCE_JSON_OPERATOR); -			} else if (options.allowComments && context.Match("/*")) { -				context.SetState(SCE_JSON_BLOCKCOMMENT); -				context.Forward(); -			} else if (options.allowComments && context.Match("//")) { -				context.SetState(SCE_JSON_LINECOMMENT); -			} else if (setKeywordJSON.Contains(context.ch)) { -				if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) { -					context.SetState(SCE_JSON_KEYWORD); -				} -			} -			bool numberStart = -				IsADigit(context.ch) && (context.chPrev == '+'|| -										 context.chPrev == '-' || -										 context.atLineStart || -										 IsASpace(context.chPrev) || -										 setOperators.Contains(context.chPrev)); -			bool exponentPart = -				tolower(context.ch) == 'e' && -				IsADigit(context.chPrev) && -				(IsADigit(context.chNext) || -				 context.chNext == '+' || -				 context.chNext == '-'); -			bool signPart = -				(context.ch == '-' || context.ch == '+') && -				((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) || -				 ((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev)) -				  && IsADigit(context.chNext))); -			bool adjacentDigit = -				IsADigit(context.ch) && IsADigit(context.chPrev); -			bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e'; -			bool dotPart = context.ch == '.' && -				IsADigit(context.chPrev) && -				IsADigit(context.chNext); -			bool afterDot = IsADigit(context.ch) && context.chPrev == '.'; -			if (numberStart || -				exponentPart || -				signPart || -				adjacentDigit || -				dotPart || -				afterExponent || -				afterDot) { -				context.SetState(SCE_JSON_NUMBER); -			} else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) { -				context.SetState(SCE_JSON_ERROR); -			} -		} -		context.Forward(); -	} -	context.Complete(); -} - -void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos, -								Sci_Position length, -								int, -								IDocument *pAccess) { -	if (!options.fold) { -		return; -	} -	LexAccessor styler(pAccess); -	Sci_PositionU currLine = styler.GetLine(startPos); -	Sci_PositionU endPos = startPos + length; -	int currLevel = SC_FOLDLEVELBASE; -	if (currLine > 0) -		currLevel = styler.LevelAt(currLine - 1) >> 16; -	int nextLevel = currLevel; -	int visibleChars = 0; -	for (Sci_PositionU i = startPos; i < endPos; i++) { -		char curr = styler.SafeGetCharAt(i); -		char next = styler.SafeGetCharAt(i+1); -		bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n'); -		if (styler.StyleAt(i) == SCE_JSON_OPERATOR) { -			if (curr == '{' || curr == '[') { -				nextLevel++; -			} else if (curr == '}' || curr == ']') { -				nextLevel--; -			} -		} -		if (atEOL || i == (endPos-1)) { -			int level = currLevel | nextLevel << 16; -			if (!visibleChars && options.foldCompact) { -				level |= SC_FOLDLEVELWHITEFLAG; -			} else if (nextLevel > currLevel) { -				level |= SC_FOLDLEVELHEADERFLAG; -			} -			if (level != styler.LevelAt(currLine)) { -				styler.SetLevel(currLine, level); -			} -			currLine++; -			currLevel = nextLevel; -			visibleChars = 0; -		} -		if (!isspacechar(curr)) { -			visibleChars++; -		} -	} -} - -LexerModule lmJSON(SCLEX_JSON, -				   LexerJSON::LexerFactoryJSON, -				   "json", -				   JSONWordListDesc); | 
