6 files changed, 345 insertions, 0 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index 40201af00..876587a62 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -505,6 +505,7 @@
       </tr><tr>
 	<td>Roberto Rossi</td>
 	<td>Kenny Liu</td>
+	<td>Iain Clarke</td>
     </tr>
     </table>
     <p>
@@ -558,6 +559,10 @@
 	Baan folder accomodates sections and lexer fixes definition of SCE_BAAN_FUNCDEF.
 	</li>
 	<li>
+	EDIFACT lexer and folder added.
+	<a href="http://sourceforge.net/p/scintilla/feature-requests/1166/">Feature #1166.</a>
+	</li>
+	<li>
 	JSON folder fixed where it didn't resume folding with the correct fold level.
 	</li>
 	<li>
diff --git a/include/SciLexer.h b/include/SciLexer.h
index 0eb0b2956..44c02a84a 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -133,6 +133,7 @@
 #define SCLEX_IHEX 118
 #define SCLEX_TEHEX 119
 #define SCLEX_JSON 120
+#define SCLEX_EDIFACT 121
 #define SCLEX_AUTOMATIC 1000
 #define SCE_P_DEFAULT 0
 #define SCE_P_COMMENTLINE 1
@@ -1797,6 +1798,15 @@
 #define SCE_JSON_KEYWORD 11
 #define SCE_JSON_LDKEYWORD 12
 #define SCE_JSON_ERROR 13
+#define SCE_EDI_DEFAULT 0
+#define SCE_EDI_SEGMENTSTART 1
+#define SCE_EDI_SEGMENTEND 2
+#define SCE_EDI_SEP_ELEMENT 3
+#define SCE_EDI_SEP_COMPOSITE 4
+#define SCE_EDI_SEP_RELEASE 5
+#define SCE_EDI_UNA 6
+#define SCE_EDI_UNH 7
+#define SCE_EDI_BADSEGMENT 8
 /* --Autogenerated -- end of section automatically generated from Scintilla.iface */
 
 #endif
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index bc7cb01ff..76c548e48 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -2873,6 +2873,7 @@ val SCLEX_SREC=117
 val SCLEX_IHEX=118
 val SCLEX_TEHEX=119
 val SCLEX_JSON=120
+val SCLEX_EDIFACT=121
 
 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a
 # value assigned in sequence from SCLEX_AUTOMATIC+1.
@@ -4762,6 +4763,16 @@ val SCE_JSON_COMPACTIRI=10
 val SCE_JSON_KEYWORD=11
 val SCE_JSON_LDKEYWORD=12
 val SCE_JSON_ERROR=13
+lex EDIFACT=SCLEX_EDIFACT SCE_EDI_
+val SCE_EDI_DEFAULT=0
+val SCE_EDI_SEGMENTSTART=1
+val SCE_EDI_SEGMENTEND=2
+val SCE_EDI_SEP_ELEMENT=3
+val SCE_EDI_SEP_COMPOSITE=4
+val SCE_EDI_SEP_RELEASE=5
+val SCE_EDI_UNA=6
+val SCE_EDI_UNH=7
+val SCE_EDI_BADSEGMENT=8
 
 # Events
 
diff --git a/lexers/LexEDIFACT.cxx b/lexers/LexEDIFACT.cxx
new file mode 100644
index 000000000..70fd9f8f6
--- /dev/null
+++ b/lexers/LexEDIFACT.cxx
@@ -0,0 +1,315 @@
+// Scintilla Lexer for EDIFACT
+// Written by Iain Clarke, IMCSoft & Inobiz AB.
+// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html
+// and more readably here: https://en.wikipedia.org/wiki/EDIFACT
+// This code is subject to the same license terms as the rest of the scintilla project:
+// The License.txt file describes the conditions under which this software may be distributed.
+// 
+
+// Header order must match order in scripts/HeaderOrder.txt
+#include <cstdlib>
+#include <cassert>
+#include <cstring>
+#include <cctype>
+
+#include "ILexer.h"
+#include "Scintilla.h"
+#include "SciLexer.h"
+
+#include "LexAccessor.h"
+#include "LexerModule.h"
+
+class LexerEDIFACT : public ILexer
+{
+public:
+	LexerEDIFACT();
+	virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer
+
+	static ILexer *Factory() {
+		return new LexerEDIFACT;
+	}
+
+	virtual int SCI_METHOD Version() const
+	{
+		return lvOriginal;
+	}
+	virtual void SCI_METHOD Release()
+	{
+		delete this;
+	}
+
+	const char * SCI_METHOD PropertyNames()
+	{
+		return "fold";
+	}
+	int SCI_METHOD PropertyType(const char *)
+	{
+		return SC_TYPE_BOOLEAN; // Only one property!
+	}
+	const char * SCI_METHOD DescribeProperty(const char *name)
+	{
+		if (strcmp(name, "fold"))
+			return NULL;
+		return "Whether to apply folding to document or not";
+	}
+
+	virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val)
+	{
+		if (strcmp(key, "fold"))
+			return -1;
+		m_bFold = strcmp(val, "0") ? true : false;
+		return 0;
+	}
+	const char * SCI_METHOD DescribeWordListSets()
+	{
+		return NULL;
+	}
+	virtual Sci_Position SCI_METHOD WordListSet(int, const char *)
+	{
+		return -1;
+	}
+	virtual void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess);
+	virtual void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess);
+	virtual void * SCI_METHOD PrivateCall(int, void *)
+	{
+		return NULL;
+	}
+
+protected:
+	Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength);
+	Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const;
+	Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const;
+	int DetectSegmentHeader(char SegmentHeader[3]) const;
+
+	bool m_bFold;
+	char m_chComponent;
+	char m_chData;
+	char m_chDecimal;
+	char m_chRelease;
+	char m_chSegment;
+};
+
+LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact");
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+
+LexerEDIFACT::LexerEDIFACT()
+{
+	m_bFold = false;
+	m_chComponent = ':';
+	m_chData = '+';
+	m_chDecimal = '.';
+	m_chRelease = '?';
+	m_chSegment = '\'';
+}
+
+void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess)
+{
+	Sci_PositionU posFinish = startPos + lengthDoc;
+	InitialiseFromUNA(pAccess, posFinish);
+
+	// Look backwards for a ' or a document beginning
+	Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos);
+	// And jump past the ' if this was not the beginning of the document
+	if (posCurrent != 0)
+		posCurrent++;
+
+	// Style buffer, so we're not issuing loads of notifications
+	LexAccessor styler (pAccess);
+	pAccess->StartStyling(posCurrent, '\377');
+	styler.StartSegment(posCurrent);
+	Sci_Position posSegmentStart = -1;
+
+	while ((posCurrent < posFinish) && (posSegmentStart == -1))
+	{
+		posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish);
+		// Mark whitespace as default
+		styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT);
+		if (posCurrent >= posFinish)
+			break;
+
+		// Does is start with 3 charaters? ie, UNH
+		char SegmentHeader[4] = { 0 };
+		pAccess->GetCharRange(SegmentHeader, posCurrent, 3);
+
+		int SegmentStyle = DetectSegmentHeader(SegmentHeader);
+		if (SegmentStyle == SCE_EDI_BADSEGMENT)
+			break;
+		if (SegmentStyle == SCE_EDI_UNA)
+		{
+			posCurrent += 9;
+			styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA   
+			continue;
+		}
+		posSegmentStart = posCurrent;
+		posCurrent += 3;
+
+		styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc
+
+		// Colour in the rest of the segment
+		for (char c; posCurrent < posFinish; posCurrent++)
+		{
+			pAccess->GetCharRange(&c, posCurrent, 1);
+
+			if (c == m_chRelease) // ? escape character, check first, in case of ?'
+				posCurrent++;
+			else if (c == m_chSegment) // '
+			{
+				// Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad.
+				Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart);
+				Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent);
+				if (lineSegmentStart == lineSegmentEnd)
+					styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND);
+				else
+					styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT);
+				posSegmentStart = -1;
+				posCurrent++;
+				break;
+			}
+			else if (c == m_chComponent) // :
+				styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE);
+			else if (c == m_chData) // +
+				styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT);
+			else
+				styler.ColourTo(posCurrent, SCE_EDI_DEFAULT);
+		}
+	}
+	styler.Flush();
+
+	if (posSegmentStart == -1)
+		return;
+
+	pAccess->StartStyling(posSegmentStart, -1);
+	pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT);
+}
+
+void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess)
+{
+	if (!m_bFold)
+		return;
+
+	// Fold at UNx lines. ie, UNx segments = 0, other segments = 1.
+	// There's no sub folding, so we can be quite simple.
+	Sci_Position endPos = startPos + lengthDoc;
+	char SegmentHeader[4] = { 0 };
+
+	int iIndentPrevious = 0;
+	Sci_Position lineLast = pAccess->LineFromPosition(endPos);
+
+	for (Sci_Position lineCurrent = pAccess->LineFromPosition(startPos); lineCurrent <= lineLast; lineCurrent++)
+	{
+		Sci_Position posLineStart = pAccess->LineStart(lineCurrent);
+		posLineStart = ForwardPastWhitespace(pAccess, posLineStart, endPos);
+		Sci_Position lineDataStart = pAccess->LineFromPosition(posLineStart);
+		// Fill in whitespace lines?
+		for (; lineCurrent < lineDataStart; lineCurrent++)
+			pAccess->SetLevel(lineCurrent, SC_FOLDLEVELBASE | SC_FOLDLEVELWHITEFLAG | iIndentPrevious);
+		pAccess->GetCharRange(SegmentHeader, posLineStart, 3);
+		//if (DetectSegmentHeader(SegmentHeader) == SCE_EDI_BADSEGMENT) // Abort if this is not a proper segment header
+
+		int level = 0;
+		if (memcmp(SegmentHeader, "UNH", 3) == 0) // UNH starts blocks
+			level = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
+		// Check for UNA,B and Z. All others are inside messages
+		else if (!memcmp(SegmentHeader, "UNA", 3) || !memcmp(SegmentHeader, "UNB", 3) || !memcmp(SegmentHeader, "UNZ", 3))
+			level = SC_FOLDLEVELBASE;
+		else
+			level = SC_FOLDLEVELBASE | 1;
+		pAccess->SetLevel(lineCurrent, level);
+		iIndentPrevious = level & SC_FOLDLEVELNUMBERMASK;
+	}
+}
+
+Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength)
+{
+	MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? '
+
+	Sci_PositionU startPos = 0;
+	startPos += ForwardPastWhitespace(pAccess, 0, MaxLength);
+	if (startPos < MaxLength)
+	{
+		char bufUNA[9];
+		pAccess->GetCharRange(bufUNA, startPos, 9);
+
+		// Check it's UNA segment
+		if (!memcmp(bufUNA, "UNA", 3))
+		{
+			m_chComponent = bufUNA[3];
+			m_chData = bufUNA[4];
+			m_chDecimal = bufUNA[5];
+			m_chRelease = bufUNA[6];
+			// bufUNA [7] should be space - reserved.
+			m_chSegment = bufUNA[8];
+
+			return 0; // success!
+		}
+	}
+
+	// We failed to find a UNA, so drop to defaults
+	m_chComponent = ':';
+	m_chData = '+';
+	m_chDecimal = '.';
+	m_chRelease = '?';
+	m_chSegment = '\'';
+
+	return -1;
+}
+
+Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const
+{
+	char c;
+
+	while (startPos < MaxLength)
+	{
+		pAccess->GetCharRange(&c, startPos, 1);
+		switch (c)
+		{
+		case '\t':
+		case '\r':
+		case '\n':
+		case ' ':
+			break;
+		default:
+			return startPos;
+		}
+
+		startPos++;
+	}
+
+	return MaxLength;
+}
+
+int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const
+{
+	if (
+		SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' ||
+		SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' ||
+		SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z')
+		return SCE_EDI_BADSEGMENT;
+
+	if (memcmp(SegmentHeader, "UNA", 3) == 0)
+		return SCE_EDI_UNA;
+	if (memcmp(SegmentHeader, "UNH", 3) == 0)
+		return SCE_EDI_UNH;
+
+	return SCE_EDI_SEGMENTSTART;
+}
+
+// Look backwards for a ' or a document beginning
+Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const
+{
+	for (char c; startPos > 0; startPos--)
+	{
+		pAccess->GetCharRange(&c, startPos, 1);
+		if (c == m_chSegment)
+			return startPos;
+	}
+	// We didn't find a ', so just go with the beginning
+	return 0;
+}
+
+
diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx
index e6aa2587e..2eadd9509 100644
--- a/src/Catalogue.cxx
+++ b/src/Catalogue.cxx
@@ -110,6 +110,7 @@ int Scintilla_LinkLexers() {
 	LINK_LEXER(lmDMAP);
 	LINK_LEXER(lmDMIS);
 	LINK_LEXER(lmECL);
+	LINK_LEXER(lmEDIFACT);
 	LINK_LEXER(lmEiffel);
 	LINK_LEXER(lmEiffelkw);
 	LINK_LEXER(lmErlang);
diff --git a/win32/scintilla.mak b/win32/scintilla.mak
index e97308a23..b000092c1 100644
--- a/win32/scintilla.mak
+++ b/win32/scintilla.mak
@@ -136,6 +136,7 @@ LEXOBJS=\
 	$(DIR_O)\LexDMAP.obj \
 	$(DIR_O)\LexDMIS.obj \
 	$(DIR_O)\LexECL.obj \
+	$(DIR_O)\LexEDIFACT.obj \
 	$(DIR_O)\LexEiffel.obj \
 	$(DIR_O)\LexErlang.obj \
 	$(DIR_O)\LexErrorList.obj \
@@ -556,6 +557,8 @@ $(DIR_O)\LexDMIS.obj: ..\lexers\LexDMIS.cxx $(LEX_HEADERS)
 
 $(DIR_O)\LexECL.obj: ..\lexers\LexECL.cxx $(LEX_HEADERS)
 
+$(DIR_O)\LexEDIFACT.obj: ..\lexers\LexEDIFACT.cxx $(LEX_HEADERS)
+
 $(DIR_O)\LexEiffel.obj: ..\lexers\LexEiffel.cxx $(LEX_HEADERS)
 
 $(DIR_O)\LexErlang.obj: ..\lexers\LexErlang.cxx $(LEX_HEADERS)