diff options
| -rw-r--r-- | doc/ScintillaHistory.html | 5 | ||||
| -rw-r--r-- | include/SciLexer.h | 10 | ||||
| -rw-r--r-- | include/Scintilla.iface | 11 | ||||
| -rw-r--r-- | lexers/LexEDIFACT.cxx | 315 | ||||
| -rw-r--r-- | src/Catalogue.cxx | 1 | ||||
| -rw-r--r-- | win32/scintilla.mak | 3 | 
6 files changed, 345 insertions, 0 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index 40201af00..876587a62 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -505,6 +505,7 @@        </tr><tr>  	<td>Roberto Rossi</td>  	<td>Kenny Liu</td> +	<td>Iain Clarke</td>      </tr>      </table>      <p> @@ -558,6 +559,10 @@  	Baan folder accomodates sections and lexer fixes definition of SCE_BAAN_FUNCDEF.  	</li>  	<li> +	EDIFACT lexer and folder added. +	<a href="http://sourceforge.net/p/scintilla/feature-requests/1166/">Feature #1166.</a> +	</li> +	<li>  	JSON folder fixed where it didn't resume folding with the correct fold level.  	</li>  	<li> diff --git a/include/SciLexer.h b/include/SciLexer.h index 0eb0b2956..44c02a84a 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -133,6 +133,7 @@  #define SCLEX_IHEX 118  #define SCLEX_TEHEX 119  #define SCLEX_JSON 120 +#define SCLEX_EDIFACT 121  #define SCLEX_AUTOMATIC 1000  #define SCE_P_DEFAULT 0  #define SCE_P_COMMENTLINE 1 @@ -1797,6 +1798,15 @@  #define SCE_JSON_KEYWORD 11  #define SCE_JSON_LDKEYWORD 12  #define SCE_JSON_ERROR 13 +#define SCE_EDI_DEFAULT 0 +#define SCE_EDI_SEGMENTSTART 1 +#define SCE_EDI_SEGMENTEND 2 +#define SCE_EDI_SEP_ELEMENT 3 +#define SCE_EDI_SEP_COMPOSITE 4 +#define SCE_EDI_SEP_RELEASE 5 +#define SCE_EDI_UNA 6 +#define SCE_EDI_UNH 7 +#define SCE_EDI_BADSEGMENT 8  /* --Autogenerated -- end of section automatically generated from Scintilla.iface */  #endif diff --git a/include/Scintilla.iface b/include/Scintilla.iface index bc7cb01ff..76c548e48 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -2873,6 +2873,7 @@ val SCLEX_SREC=117  val SCLEX_IHEX=118  val SCLEX_TEHEX=119  val SCLEX_JSON=120 +val SCLEX_EDIFACT=121  # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a  # value assigned in sequence from SCLEX_AUTOMATIC+1. @@ -4762,6 +4763,16 @@ val SCE_JSON_COMPACTIRI=10  val SCE_JSON_KEYWORD=11  val SCE_JSON_LDKEYWORD=12  val SCE_JSON_ERROR=13 +lex EDIFACT=SCLEX_EDIFACT SCE_EDI_ +val SCE_EDI_DEFAULT=0 +val SCE_EDI_SEGMENTSTART=1 +val SCE_EDI_SEGMENTEND=2 +val SCE_EDI_SEP_ELEMENT=3 +val SCE_EDI_SEP_COMPOSITE=4 +val SCE_EDI_SEP_RELEASE=5 +val SCE_EDI_UNA=6 +val SCE_EDI_UNH=7 +val SCE_EDI_BADSEGMENT=8  # Events diff --git a/lexers/LexEDIFACT.cxx b/lexers/LexEDIFACT.cxx new file mode 100644 index 000000000..70fd9f8f6 --- /dev/null +++ b/lexers/LexEDIFACT.cxx @@ -0,0 +1,315 @@ +// Scintilla Lexer for EDIFACT +// Written by Iain Clarke, IMCSoft & Inobiz AB. +// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html +// and more readably here: https://en.wikipedia.org/wiki/EDIFACT +// This code is subject to the same license terms as the rest of the scintilla project: +// The License.txt file describes the conditions under which this software may be distributed. +//  + +// Header order must match order in scripts/HeaderOrder.txt +#include <cstdlib> +#include <cassert> +#include <cstring> +#include <cctype> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "LexAccessor.h" +#include "LexerModule.h" + +class LexerEDIFACT : public ILexer +{ +public: +	LexerEDIFACT(); +	virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer + +	static ILexer *Factory() { +		return new LexerEDIFACT; +	} + +	virtual int SCI_METHOD Version() const +	{ +		return lvOriginal; +	} +	virtual void SCI_METHOD Release() +	{ +		delete this; +	} + +	const char * SCI_METHOD PropertyNames() +	{ +		return "fold"; +	} +	int SCI_METHOD PropertyType(const char *) +	{ +		return SC_TYPE_BOOLEAN; // Only one property! +	} +	const char * SCI_METHOD DescribeProperty(const char *name) +	{ +		if (strcmp(name, "fold")) +			return NULL; +		return "Whether to apply folding to document or not"; +	} + +	virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) +	{ +		if (strcmp(key, "fold")) +			return -1; +		m_bFold = strcmp(val, "0") ? true : false; +		return 0; +	} +	const char * SCI_METHOD DescribeWordListSets() +	{ +		return NULL; +	} +	virtual Sci_Position SCI_METHOD WordListSet(int, const char *) +	{ +		return -1; +	} +	virtual void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess); +	virtual void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess); +	virtual void * SCI_METHOD PrivateCall(int, void *) +	{ +		return NULL; +	} + +protected: +	Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength); +	Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const; +	Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const; +	int DetectSegmentHeader(char SegmentHeader[3]) const; + +	bool m_bFold; +	char m_chComponent; +	char m_chData; +	char m_chDecimal; +	char m_chRelease; +	char m_chSegment; +}; + +LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact"); + +/////////////////////////////////////////////////////////////////////////////// + + + +/////////////////////////////////////////////////////////////////////////////// + +LexerEDIFACT::LexerEDIFACT() +{ +	m_bFold = false; +	m_chComponent = ':'; +	m_chData = '+'; +	m_chDecimal = '.'; +	m_chRelease = '?'; +	m_chSegment = '\''; +} + +void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) +{ +	Sci_PositionU posFinish = startPos + lengthDoc; +	InitialiseFromUNA(pAccess, posFinish); + +	// Look backwards for a ' or a document beginning +	Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos); +	// And jump past the ' if this was not the beginning of the document +	if (posCurrent != 0) +		posCurrent++; + +	// Style buffer, so we're not issuing loads of notifications +	LexAccessor styler (pAccess); +	pAccess->StartStyling(posCurrent, '\377'); +	styler.StartSegment(posCurrent); +	Sci_Position posSegmentStart = -1; + +	while ((posCurrent < posFinish) && (posSegmentStart == -1)) +	{ +		posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish); +		// Mark whitespace as default +		styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT); +		if (posCurrent >= posFinish) +			break; + +		// Does is start with 3 charaters? ie, UNH +		char SegmentHeader[4] = { 0 }; +		pAccess->GetCharRange(SegmentHeader, posCurrent, 3); + +		int SegmentStyle = DetectSegmentHeader(SegmentHeader); +		if (SegmentStyle == SCE_EDI_BADSEGMENT) +			break; +		if (SegmentStyle == SCE_EDI_UNA) +		{ +			posCurrent += 9; +			styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA    +			continue; +		} +		posSegmentStart = posCurrent; +		posCurrent += 3; + +		styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc + +		// Colour in the rest of the segment +		for (char c; posCurrent < posFinish; posCurrent++) +		{ +			pAccess->GetCharRange(&c, posCurrent, 1); + +			if (c == m_chRelease) // ? escape character, check first, in case of ?' +				posCurrent++; +			else if (c == m_chSegment) // ' +			{ +				// Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad. +				Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart); +				Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent); +				if (lineSegmentStart == lineSegmentEnd) +					styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND); +				else +					styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT); +				posSegmentStart = -1; +				posCurrent++; +				break; +			} +			else if (c == m_chComponent) // : +				styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE); +			else if (c == m_chData) // + +				styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT); +			else +				styler.ColourTo(posCurrent, SCE_EDI_DEFAULT); +		} +	} +	styler.Flush(); + +	if (posSegmentStart == -1) +		return; + +	pAccess->StartStyling(posSegmentStart, -1); +	pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT); +} + +void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) +{ +	if (!m_bFold) +		return; + +	// Fold at UNx lines. ie, UNx segments = 0, other segments = 1. +	// There's no sub folding, so we can be quite simple. +	Sci_Position endPos = startPos + lengthDoc; +	char SegmentHeader[4] = { 0 }; + +	int iIndentPrevious = 0; +	Sci_Position lineLast = pAccess->LineFromPosition(endPos); + +	for (Sci_Position lineCurrent = pAccess->LineFromPosition(startPos); lineCurrent <= lineLast; lineCurrent++) +	{ +		Sci_Position posLineStart = pAccess->LineStart(lineCurrent); +		posLineStart = ForwardPastWhitespace(pAccess, posLineStart, endPos); +		Sci_Position lineDataStart = pAccess->LineFromPosition(posLineStart); +		// Fill in whitespace lines? +		for (; lineCurrent < lineDataStart; lineCurrent++) +			pAccess->SetLevel(lineCurrent, SC_FOLDLEVELBASE | SC_FOLDLEVELWHITEFLAG | iIndentPrevious); +		pAccess->GetCharRange(SegmentHeader, posLineStart, 3); +		//if (DetectSegmentHeader(SegmentHeader) == SCE_EDI_BADSEGMENT) // Abort if this is not a proper segment header + +		int level = 0; +		if (memcmp(SegmentHeader, "UNH", 3) == 0) // UNH starts blocks +			level = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; +		// Check for UNA,B and Z. All others are inside messages +		else if (!memcmp(SegmentHeader, "UNA", 3) || !memcmp(SegmentHeader, "UNB", 3) || !memcmp(SegmentHeader, "UNZ", 3)) +			level = SC_FOLDLEVELBASE; +		else +			level = SC_FOLDLEVELBASE | 1; +		pAccess->SetLevel(lineCurrent, level); +		iIndentPrevious = level & SC_FOLDLEVELNUMBERMASK; +	} +} + +Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength) +{ +	MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? ' + +	Sci_PositionU startPos = 0; +	startPos += ForwardPastWhitespace(pAccess, 0, MaxLength); +	if (startPos < MaxLength) +	{ +		char bufUNA[9]; +		pAccess->GetCharRange(bufUNA, startPos, 9); + +		// Check it's UNA segment +		if (!memcmp(bufUNA, "UNA", 3)) +		{ +			m_chComponent = bufUNA[3]; +			m_chData = bufUNA[4]; +			m_chDecimal = bufUNA[5]; +			m_chRelease = bufUNA[6]; +			// bufUNA [7] should be space - reserved. +			m_chSegment = bufUNA[8]; + +			return 0; // success! +		} +	} + +	// We failed to find a UNA, so drop to defaults +	m_chComponent = ':'; +	m_chData = '+'; +	m_chDecimal = '.'; +	m_chRelease = '?'; +	m_chSegment = '\''; + +	return -1; +} + +Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const +{ +	char c; + +	while (startPos < MaxLength) +	{ +		pAccess->GetCharRange(&c, startPos, 1); +		switch (c) +		{ +		case '\t': +		case '\r': +		case '\n': +		case ' ': +			break; +		default: +			return startPos; +		} + +		startPos++; +	} + +	return MaxLength; +} + +int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const +{ +	if ( +		SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' || +		SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' || +		SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z') +		return SCE_EDI_BADSEGMENT; + +	if (memcmp(SegmentHeader, "UNA", 3) == 0) +		return SCE_EDI_UNA; +	if (memcmp(SegmentHeader, "UNH", 3) == 0) +		return SCE_EDI_UNH; + +	return SCE_EDI_SEGMENTSTART; +} + +// Look backwards for a ' or a document beginning +Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const +{ +	for (char c; startPos > 0; startPos--) +	{ +		pAccess->GetCharRange(&c, startPos, 1); +		if (c == m_chSegment) +			return startPos; +	} +	// We didn't find a ', so just go with the beginning +	return 0; +} + + diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx index e6aa2587e..2eadd9509 100644 --- a/src/Catalogue.cxx +++ b/src/Catalogue.cxx @@ -110,6 +110,7 @@ int Scintilla_LinkLexers() {  	LINK_LEXER(lmDMAP);  	LINK_LEXER(lmDMIS);  	LINK_LEXER(lmECL); +	LINK_LEXER(lmEDIFACT);  	LINK_LEXER(lmEiffel);  	LINK_LEXER(lmEiffelkw);  	LINK_LEXER(lmErlang); diff --git a/win32/scintilla.mak b/win32/scintilla.mak index e97308a23..b000092c1 100644 --- a/win32/scintilla.mak +++ b/win32/scintilla.mak @@ -136,6 +136,7 @@ LEXOBJS=\  	$(DIR_O)\LexDMAP.obj \  	$(DIR_O)\LexDMIS.obj \  	$(DIR_O)\LexECL.obj \ +	$(DIR_O)\LexEDIFACT.obj \  	$(DIR_O)\LexEiffel.obj \  	$(DIR_O)\LexErlang.obj \  	$(DIR_O)\LexErrorList.obj \ @@ -556,6 +557,8 @@ $(DIR_O)\LexDMIS.obj: ..\lexers\LexDMIS.cxx $(LEX_HEADERS)  $(DIR_O)\LexECL.obj: ..\lexers\LexECL.cxx $(LEX_HEADERS) +$(DIR_O)\LexEDIFACT.obj: ..\lexers\LexEDIFACT.cxx $(LEX_HEADERS) +  $(DIR_O)\LexEiffel.obj: ..\lexers\LexEiffel.cxx $(LEX_HEADERS)  $(DIR_O)\LexErlang.obj: ..\lexers\LexErlang.cxx $(LEX_HEADERS)  | 
