aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--doc/ScintillaHistory.html5
-rw-r--r--include/SciLexer.h10
-rw-r--r--include/Scintilla.iface11
-rw-r--r--lexers/LexEDIFACT.cxx315
-rw-r--r--src/Catalogue.cxx1
-rw-r--r--win32/scintilla.mak3
6 files changed, 345 insertions, 0 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index 40201af00..876587a62 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -505,6 +505,7 @@
</tr><tr>
<td>Roberto Rossi</td>
<td>Kenny Liu</td>
+ <td>Iain Clarke</td>
</tr>
</table>
<p>
@@ -558,6 +559,10 @@
Baan folder accomodates sections and lexer fixes definition of SCE_BAAN_FUNCDEF.
</li>
<li>
+ EDIFACT lexer and folder added.
+ <a href="http://sourceforge.net/p/scintilla/feature-requests/1166/">Feature #1166.</a>
+ </li>
+ <li>
JSON folder fixed where it didn't resume folding with the correct fold level.
</li>
<li>
diff --git a/include/SciLexer.h b/include/SciLexer.h
index 0eb0b2956..44c02a84a 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -133,6 +133,7 @@
#define SCLEX_IHEX 118
#define SCLEX_TEHEX 119
#define SCLEX_JSON 120
+#define SCLEX_EDIFACT 121
#define SCLEX_AUTOMATIC 1000
#define SCE_P_DEFAULT 0
#define SCE_P_COMMENTLINE 1
@@ -1797,6 +1798,15 @@
#define SCE_JSON_KEYWORD 11
#define SCE_JSON_LDKEYWORD 12
#define SCE_JSON_ERROR 13
+#define SCE_EDI_DEFAULT 0
+#define SCE_EDI_SEGMENTSTART 1
+#define SCE_EDI_SEGMENTEND 2
+#define SCE_EDI_SEP_ELEMENT 3
+#define SCE_EDI_SEP_COMPOSITE 4
+#define SCE_EDI_SEP_RELEASE 5
+#define SCE_EDI_UNA 6
+#define SCE_EDI_UNH 7
+#define SCE_EDI_BADSEGMENT 8
/* --Autogenerated -- end of section automatically generated from Scintilla.iface */
#endif
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index bc7cb01ff..76c548e48 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -2873,6 +2873,7 @@ val SCLEX_SREC=117
val SCLEX_IHEX=118
val SCLEX_TEHEX=119
val SCLEX_JSON=120
+val SCLEX_EDIFACT=121
# When a lexer specifies its language as SCLEX_AUTOMATIC it receives a
# value assigned in sequence from SCLEX_AUTOMATIC+1.
@@ -4762,6 +4763,16 @@ val SCE_JSON_COMPACTIRI=10
val SCE_JSON_KEYWORD=11
val SCE_JSON_LDKEYWORD=12
val SCE_JSON_ERROR=13
+lex EDIFACT=SCLEX_EDIFACT SCE_EDI_
+val SCE_EDI_DEFAULT=0
+val SCE_EDI_SEGMENTSTART=1
+val SCE_EDI_SEGMENTEND=2
+val SCE_EDI_SEP_ELEMENT=3
+val SCE_EDI_SEP_COMPOSITE=4
+val SCE_EDI_SEP_RELEASE=5
+val SCE_EDI_UNA=6
+val SCE_EDI_UNH=7
+val SCE_EDI_BADSEGMENT=8
# Events
diff --git a/lexers/LexEDIFACT.cxx b/lexers/LexEDIFACT.cxx
new file mode 100644
index 000000000..70fd9f8f6
--- /dev/null
+++ b/lexers/LexEDIFACT.cxx
@@ -0,0 +1,315 @@
+// Scintilla Lexer for EDIFACT
+// Written by Iain Clarke, IMCSoft & Inobiz AB.
+// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html
+// and more readably here: https://en.wikipedia.org/wiki/EDIFACT
+// This code is subject to the same license terms as the rest of the scintilla project:
+// The License.txt file describes the conditions under which this software may be distributed.
+//
+
+// Header order must match order in scripts/HeaderOrder.txt
+#include <cstdlib>
+#include <cassert>
+#include <cstring>
+#include <cctype>
+
+#include "ILexer.h"
+#include "Scintilla.h"
+#include "SciLexer.h"
+
+#include "LexAccessor.h"
+#include "LexerModule.h"
+
+class LexerEDIFACT : public ILexer
+{
+public:
+ LexerEDIFACT();
+ virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer
+
+ static ILexer *Factory() {
+ return new LexerEDIFACT;
+ }
+
+ virtual int SCI_METHOD Version() const
+ {
+ return lvOriginal;
+ }
+ virtual void SCI_METHOD Release()
+ {
+ delete this;
+ }
+
+ const char * SCI_METHOD PropertyNames()
+ {
+ return "fold";
+ }
+ int SCI_METHOD PropertyType(const char *)
+ {
+ return SC_TYPE_BOOLEAN; // Only one property!
+ }
+ const char * SCI_METHOD DescribeProperty(const char *name)
+ {
+ if (strcmp(name, "fold"))
+ return NULL;
+ return "Whether to apply folding to document or not";
+ }
+
+ virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val)
+ {
+ if (strcmp(key, "fold"))
+ return -1;
+ m_bFold = strcmp(val, "0") ? true : false;
+ return 0;
+ }
+ const char * SCI_METHOD DescribeWordListSets()
+ {
+ return NULL;
+ }
+ virtual Sci_Position SCI_METHOD WordListSet(int, const char *)
+ {
+ return -1;
+ }
+ virtual void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess);
+ virtual void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess);
+ virtual void * SCI_METHOD PrivateCall(int, void *)
+ {
+ return NULL;
+ }
+
+protected:
+ Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength);
+ Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const;
+ Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const;
+ int DetectSegmentHeader(char SegmentHeader[3]) const;
+
+ bool m_bFold;
+ char m_chComponent;
+ char m_chData;
+ char m_chDecimal;
+ char m_chRelease;
+ char m_chSegment;
+};
+
+LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact");
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+
+LexerEDIFACT::LexerEDIFACT()
+{
+ m_bFold = false;
+ m_chComponent = ':';
+ m_chData = '+';
+ m_chDecimal = '.';
+ m_chRelease = '?';
+ m_chSegment = '\'';
+}
+
+void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess)
+{
+ Sci_PositionU posFinish = startPos + lengthDoc;
+ InitialiseFromUNA(pAccess, posFinish);
+
+ // Look backwards for a ' or a document beginning
+ Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos);
+ // And jump past the ' if this was not the beginning of the document
+ if (posCurrent != 0)
+ posCurrent++;
+
+ // Style buffer, so we're not issuing loads of notifications
+ LexAccessor styler (pAccess);
+ pAccess->StartStyling(posCurrent, '\377');
+ styler.StartSegment(posCurrent);
+ Sci_Position posSegmentStart = -1;
+
+ while ((posCurrent < posFinish) && (posSegmentStart == -1))
+ {
+ posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish);
+ // Mark whitespace as default
+ styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT);
+ if (posCurrent >= posFinish)
+ break;
+
+ // Does is start with 3 charaters? ie, UNH
+ char SegmentHeader[4] = { 0 };
+ pAccess->GetCharRange(SegmentHeader, posCurrent, 3);
+
+ int SegmentStyle = DetectSegmentHeader(SegmentHeader);
+ if (SegmentStyle == SCE_EDI_BADSEGMENT)
+ break;
+ if (SegmentStyle == SCE_EDI_UNA)
+ {
+ posCurrent += 9;
+ styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA
+ continue;
+ }
+ posSegmentStart = posCurrent;
+ posCurrent += 3;
+
+ styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc
+
+ // Colour in the rest of the segment
+ for (char c; posCurrent < posFinish; posCurrent++)
+ {
+ pAccess->GetCharRange(&c, posCurrent, 1);
+
+ if (c == m_chRelease) // ? escape character, check first, in case of ?'
+ posCurrent++;
+ else if (c == m_chSegment) // '
+ {
+ // Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad.
+ Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart);
+ Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent);
+ if (lineSegmentStart == lineSegmentEnd)
+ styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND);
+ else
+ styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT);
+ posSegmentStart = -1;
+ posCurrent++;
+ break;
+ }
+ else if (c == m_chComponent) // :
+ styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE);
+ else if (c == m_chData) // +
+ styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT);
+ else
+ styler.ColourTo(posCurrent, SCE_EDI_DEFAULT);
+ }
+ }
+ styler.Flush();
+
+ if (posSegmentStart == -1)
+ return;
+
+ pAccess->StartStyling(posSegmentStart, -1);
+ pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT);
+}
+
+void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess)
+{
+ if (!m_bFold)
+ return;
+
+ // Fold at UNx lines. ie, UNx segments = 0, other segments = 1.
+ // There's no sub folding, so we can be quite simple.
+ Sci_Position endPos = startPos + lengthDoc;
+ char SegmentHeader[4] = { 0 };
+
+ int iIndentPrevious = 0;
+ Sci_Position lineLast = pAccess->LineFromPosition(endPos);
+
+ for (Sci_Position lineCurrent = pAccess->LineFromPosition(startPos); lineCurrent <= lineLast; lineCurrent++)
+ {
+ Sci_Position posLineStart = pAccess->LineStart(lineCurrent);
+ posLineStart = ForwardPastWhitespace(pAccess, posLineStart, endPos);
+ Sci_Position lineDataStart = pAccess->LineFromPosition(posLineStart);
+ // Fill in whitespace lines?
+ for (; lineCurrent < lineDataStart; lineCurrent++)
+ pAccess->SetLevel(lineCurrent, SC_FOLDLEVELBASE | SC_FOLDLEVELWHITEFLAG | iIndentPrevious);
+ pAccess->GetCharRange(SegmentHeader, posLineStart, 3);
+ //if (DetectSegmentHeader(SegmentHeader) == SCE_EDI_BADSEGMENT) // Abort if this is not a proper segment header
+
+ int level = 0;
+ if (memcmp(SegmentHeader, "UNH", 3) == 0) // UNH starts blocks
+ level = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
+ // Check for UNA,B and Z. All others are inside messages
+ else if (!memcmp(SegmentHeader, "UNA", 3) || !memcmp(SegmentHeader, "UNB", 3) || !memcmp(SegmentHeader, "UNZ", 3))
+ level = SC_FOLDLEVELBASE;
+ else
+ level = SC_FOLDLEVELBASE | 1;
+ pAccess->SetLevel(lineCurrent, level);
+ iIndentPrevious = level & SC_FOLDLEVELNUMBERMASK;
+ }
+}
+
+Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength)
+{
+ MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? '
+
+ Sci_PositionU startPos = 0;
+ startPos += ForwardPastWhitespace(pAccess, 0, MaxLength);
+ if (startPos < MaxLength)
+ {
+ char bufUNA[9];
+ pAccess->GetCharRange(bufUNA, startPos, 9);
+
+ // Check it's UNA segment
+ if (!memcmp(bufUNA, "UNA", 3))
+ {
+ m_chComponent = bufUNA[3];
+ m_chData = bufUNA[4];
+ m_chDecimal = bufUNA[5];
+ m_chRelease = bufUNA[6];
+ // bufUNA [7] should be space - reserved.
+ m_chSegment = bufUNA[8];
+
+ return 0; // success!
+ }
+ }
+
+ // We failed to find a UNA, so drop to defaults
+ m_chComponent = ':';
+ m_chData = '+';
+ m_chDecimal = '.';
+ m_chRelease = '?';
+ m_chSegment = '\'';
+
+ return -1;
+}
+
+Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const
+{
+ char c;
+
+ while (startPos < MaxLength)
+ {
+ pAccess->GetCharRange(&c, startPos, 1);
+ switch (c)
+ {
+ case '\t':
+ case '\r':
+ case '\n':
+ case ' ':
+ break;
+ default:
+ return startPos;
+ }
+
+ startPos++;
+ }
+
+ return MaxLength;
+}
+
+int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const
+{
+ if (
+ SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' ||
+ SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' ||
+ SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z')
+ return SCE_EDI_BADSEGMENT;
+
+ if (memcmp(SegmentHeader, "UNA", 3) == 0)
+ return SCE_EDI_UNA;
+ if (memcmp(SegmentHeader, "UNH", 3) == 0)
+ return SCE_EDI_UNH;
+
+ return SCE_EDI_SEGMENTSTART;
+}
+
+// Look backwards for a ' or a document beginning
+Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const
+{
+ for (char c; startPos > 0; startPos--)
+ {
+ pAccess->GetCharRange(&c, startPos, 1);
+ if (c == m_chSegment)
+ return startPos;
+ }
+ // We didn't find a ', so just go with the beginning
+ return 0;
+}
+
+
diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx
index e6aa2587e..2eadd9509 100644
--- a/src/Catalogue.cxx
+++ b/src/Catalogue.cxx
@@ -110,6 +110,7 @@ int Scintilla_LinkLexers() {
LINK_LEXER(lmDMAP);
LINK_LEXER(lmDMIS);
LINK_LEXER(lmECL);
+ LINK_LEXER(lmEDIFACT);
LINK_LEXER(lmEiffel);
LINK_LEXER(lmEiffelkw);
LINK_LEXER(lmErlang);
diff --git a/win32/scintilla.mak b/win32/scintilla.mak
index e97308a23..b000092c1 100644
--- a/win32/scintilla.mak
+++ b/win32/scintilla.mak
@@ -136,6 +136,7 @@ LEXOBJS=\
$(DIR_O)\LexDMAP.obj \
$(DIR_O)\LexDMIS.obj \
$(DIR_O)\LexECL.obj \
+ $(DIR_O)\LexEDIFACT.obj \
$(DIR_O)\LexEiffel.obj \
$(DIR_O)\LexErlang.obj \
$(DIR_O)\LexErrorList.obj \
@@ -556,6 +557,8 @@ $(DIR_O)\LexDMIS.obj: ..\lexers\LexDMIS.cxx $(LEX_HEADERS)
$(DIR_O)\LexECL.obj: ..\lexers\LexECL.cxx $(LEX_HEADERS)
+$(DIR_O)\LexEDIFACT.obj: ..\lexers\LexEDIFACT.cxx $(LEX_HEADERS)
+
$(DIR_O)\LexEiffel.obj: ..\lexers\LexEiffel.cxx $(LEX_HEADERS)
$(DIR_O)\LexErlang.obj: ..\lexers\LexErlang.cxx $(LEX_HEADERS)