diff options
-rw-r--r-- | doc/ScintillaHistory.html | 5 | ||||
-rw-r--r-- | include/SciLexer.h | 10 | ||||
-rw-r--r-- | include/Scintilla.iface | 11 | ||||
-rw-r--r-- | lexers/LexEDIFACT.cxx | 315 | ||||
-rw-r--r-- | src/Catalogue.cxx | 1 | ||||
-rw-r--r-- | win32/scintilla.mak | 3 |
6 files changed, 345 insertions, 0 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index 40201af00..876587a62 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -505,6 +505,7 @@ </tr><tr> <td>Roberto Rossi</td> <td>Kenny Liu</td> + <td>Iain Clarke</td> </tr> </table> <p> @@ -558,6 +559,10 @@ Baan folder accomodates sections and lexer fixes definition of SCE_BAAN_FUNCDEF. </li> <li> + EDIFACT lexer and folder added. + <a href="http://sourceforge.net/p/scintilla/feature-requests/1166/">Feature #1166.</a> + </li> + <li> JSON folder fixed where it didn't resume folding with the correct fold level. </li> <li> diff --git a/include/SciLexer.h b/include/SciLexer.h index 0eb0b2956..44c02a84a 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -133,6 +133,7 @@ #define SCLEX_IHEX 118 #define SCLEX_TEHEX 119 #define SCLEX_JSON 120 +#define SCLEX_EDIFACT 121 #define SCLEX_AUTOMATIC 1000 #define SCE_P_DEFAULT 0 #define SCE_P_COMMENTLINE 1 @@ -1797,6 +1798,15 @@ #define SCE_JSON_KEYWORD 11 #define SCE_JSON_LDKEYWORD 12 #define SCE_JSON_ERROR 13 +#define SCE_EDI_DEFAULT 0 +#define SCE_EDI_SEGMENTSTART 1 +#define SCE_EDI_SEGMENTEND 2 +#define SCE_EDI_SEP_ELEMENT 3 +#define SCE_EDI_SEP_COMPOSITE 4 +#define SCE_EDI_SEP_RELEASE 5 +#define SCE_EDI_UNA 6 +#define SCE_EDI_UNH 7 +#define SCE_EDI_BADSEGMENT 8 /* --Autogenerated -- end of section automatically generated from Scintilla.iface */ #endif diff --git a/include/Scintilla.iface b/include/Scintilla.iface index bc7cb01ff..76c548e48 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -2873,6 +2873,7 @@ val SCLEX_SREC=117 val SCLEX_IHEX=118 val SCLEX_TEHEX=119 val SCLEX_JSON=120 +val SCLEX_EDIFACT=121 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a # value assigned in sequence from SCLEX_AUTOMATIC+1. @@ -4762,6 +4763,16 @@ val SCE_JSON_COMPACTIRI=10 val SCE_JSON_KEYWORD=11 val SCE_JSON_LDKEYWORD=12 val SCE_JSON_ERROR=13 +lex EDIFACT=SCLEX_EDIFACT SCE_EDI_ +val SCE_EDI_DEFAULT=0 +val SCE_EDI_SEGMENTSTART=1 +val SCE_EDI_SEGMENTEND=2 +val SCE_EDI_SEP_ELEMENT=3 +val SCE_EDI_SEP_COMPOSITE=4 +val SCE_EDI_SEP_RELEASE=5 +val SCE_EDI_UNA=6 +val SCE_EDI_UNH=7 +val SCE_EDI_BADSEGMENT=8 # Events diff --git a/lexers/LexEDIFACT.cxx b/lexers/LexEDIFACT.cxx new file mode 100644 index 000000000..70fd9f8f6 --- /dev/null +++ b/lexers/LexEDIFACT.cxx @@ -0,0 +1,315 @@ +// Scintilla Lexer for EDIFACT +// Written by Iain Clarke, IMCSoft & Inobiz AB. +// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html +// and more readably here: https://en.wikipedia.org/wiki/EDIFACT +// This code is subject to the same license terms as the rest of the scintilla project: +// The License.txt file describes the conditions under which this software may be distributed. +// + +// Header order must match order in scripts/HeaderOrder.txt +#include <cstdlib> +#include <cassert> +#include <cstring> +#include <cctype> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "LexAccessor.h" +#include "LexerModule.h" + +class LexerEDIFACT : public ILexer +{ +public: + LexerEDIFACT(); + virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer + + static ILexer *Factory() { + return new LexerEDIFACT; + } + + virtual int SCI_METHOD Version() const + { + return lvOriginal; + } + virtual void SCI_METHOD Release() + { + delete this; + } + + const char * SCI_METHOD PropertyNames() + { + return "fold"; + } + int SCI_METHOD PropertyType(const char *) + { + return SC_TYPE_BOOLEAN; // Only one property! + } + const char * SCI_METHOD DescribeProperty(const char *name) + { + if (strcmp(name, "fold")) + return NULL; + return "Whether to apply folding to document or not"; + } + + virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) + { + if (strcmp(key, "fold")) + return -1; + m_bFold = strcmp(val, "0") ? true : false; + return 0; + } + const char * SCI_METHOD DescribeWordListSets() + { + return NULL; + } + virtual Sci_Position SCI_METHOD WordListSet(int, const char *) + { + return -1; + } + virtual void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess); + virtual void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess); + virtual void * SCI_METHOD PrivateCall(int, void *) + { + return NULL; + } + +protected: + Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength); + Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const; + Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const; + int DetectSegmentHeader(char SegmentHeader[3]) const; + + bool m_bFold; + char m_chComponent; + char m_chData; + char m_chDecimal; + char m_chRelease; + char m_chSegment; +}; + +LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact"); + +/////////////////////////////////////////////////////////////////////////////// + + + +/////////////////////////////////////////////////////////////////////////////// + +LexerEDIFACT::LexerEDIFACT() +{ + m_bFold = false; + m_chComponent = ':'; + m_chData = '+'; + m_chDecimal = '.'; + m_chRelease = '?'; + m_chSegment = '\''; +} + +void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) +{ + Sci_PositionU posFinish = startPos + lengthDoc; + InitialiseFromUNA(pAccess, posFinish); + + // Look backwards for a ' or a document beginning + Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos); + // And jump past the ' if this was not the beginning of the document + if (posCurrent != 0) + posCurrent++; + + // Style buffer, so we're not issuing loads of notifications + LexAccessor styler (pAccess); + pAccess->StartStyling(posCurrent, '\377'); + styler.StartSegment(posCurrent); + Sci_Position posSegmentStart = -1; + + while ((posCurrent < posFinish) && (posSegmentStart == -1)) + { + posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish); + // Mark whitespace as default + styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT); + if (posCurrent >= posFinish) + break; + + // Does is start with 3 charaters? ie, UNH + char SegmentHeader[4] = { 0 }; + pAccess->GetCharRange(SegmentHeader, posCurrent, 3); + + int SegmentStyle = DetectSegmentHeader(SegmentHeader); + if (SegmentStyle == SCE_EDI_BADSEGMENT) + break; + if (SegmentStyle == SCE_EDI_UNA) + { + posCurrent += 9; + styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA + continue; + } + posSegmentStart = posCurrent; + posCurrent += 3; + + styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc + + // Colour in the rest of the segment + for (char c; posCurrent < posFinish; posCurrent++) + { + pAccess->GetCharRange(&c, posCurrent, 1); + + if (c == m_chRelease) // ? escape character, check first, in case of ?' + posCurrent++; + else if (c == m_chSegment) // ' + { + // Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad. + Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart); + Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent); + if (lineSegmentStart == lineSegmentEnd) + styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND); + else + styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT); + posSegmentStart = -1; + posCurrent++; + break; + } + else if (c == m_chComponent) // : + styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE); + else if (c == m_chData) // + + styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT); + else + styler.ColourTo(posCurrent, SCE_EDI_DEFAULT); + } + } + styler.Flush(); + + if (posSegmentStart == -1) + return; + + pAccess->StartStyling(posSegmentStart, -1); + pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT); +} + +void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) +{ + if (!m_bFold) + return; + + // Fold at UNx lines. ie, UNx segments = 0, other segments = 1. + // There's no sub folding, so we can be quite simple. + Sci_Position endPos = startPos + lengthDoc; + char SegmentHeader[4] = { 0 }; + + int iIndentPrevious = 0; + Sci_Position lineLast = pAccess->LineFromPosition(endPos); + + for (Sci_Position lineCurrent = pAccess->LineFromPosition(startPos); lineCurrent <= lineLast; lineCurrent++) + { + Sci_Position posLineStart = pAccess->LineStart(lineCurrent); + posLineStart = ForwardPastWhitespace(pAccess, posLineStart, endPos); + Sci_Position lineDataStart = pAccess->LineFromPosition(posLineStart); + // Fill in whitespace lines? + for (; lineCurrent < lineDataStart; lineCurrent++) + pAccess->SetLevel(lineCurrent, SC_FOLDLEVELBASE | SC_FOLDLEVELWHITEFLAG | iIndentPrevious); + pAccess->GetCharRange(SegmentHeader, posLineStart, 3); + //if (DetectSegmentHeader(SegmentHeader) == SCE_EDI_BADSEGMENT) // Abort if this is not a proper segment header + + int level = 0; + if (memcmp(SegmentHeader, "UNH", 3) == 0) // UNH starts blocks + level = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; + // Check for UNA,B and Z. All others are inside messages + else if (!memcmp(SegmentHeader, "UNA", 3) || !memcmp(SegmentHeader, "UNB", 3) || !memcmp(SegmentHeader, "UNZ", 3)) + level = SC_FOLDLEVELBASE; + else + level = SC_FOLDLEVELBASE | 1; + pAccess->SetLevel(lineCurrent, level); + iIndentPrevious = level & SC_FOLDLEVELNUMBERMASK; + } +} + +Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength) +{ + MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? ' + + Sci_PositionU startPos = 0; + startPos += ForwardPastWhitespace(pAccess, 0, MaxLength); + if (startPos < MaxLength) + { + char bufUNA[9]; + pAccess->GetCharRange(bufUNA, startPos, 9); + + // Check it's UNA segment + if (!memcmp(bufUNA, "UNA", 3)) + { + m_chComponent = bufUNA[3]; + m_chData = bufUNA[4]; + m_chDecimal = bufUNA[5]; + m_chRelease = bufUNA[6]; + // bufUNA [7] should be space - reserved. + m_chSegment = bufUNA[8]; + + return 0; // success! + } + } + + // We failed to find a UNA, so drop to defaults + m_chComponent = ':'; + m_chData = '+'; + m_chDecimal = '.'; + m_chRelease = '?'; + m_chSegment = '\''; + + return -1; +} + +Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const +{ + char c; + + while (startPos < MaxLength) + { + pAccess->GetCharRange(&c, startPos, 1); + switch (c) + { + case '\t': + case '\r': + case '\n': + case ' ': + break; + default: + return startPos; + } + + startPos++; + } + + return MaxLength; +} + +int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const +{ + if ( + SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' || + SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' || + SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z') + return SCE_EDI_BADSEGMENT; + + if (memcmp(SegmentHeader, "UNA", 3) == 0) + return SCE_EDI_UNA; + if (memcmp(SegmentHeader, "UNH", 3) == 0) + return SCE_EDI_UNH; + + return SCE_EDI_SEGMENTSTART; +} + +// Look backwards for a ' or a document beginning +Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const +{ + for (char c; startPos > 0; startPos--) + { + pAccess->GetCharRange(&c, startPos, 1); + if (c == m_chSegment) + return startPos; + } + // We didn't find a ', so just go with the beginning + return 0; +} + + diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx index e6aa2587e..2eadd9509 100644 --- a/src/Catalogue.cxx +++ b/src/Catalogue.cxx @@ -110,6 +110,7 @@ int Scintilla_LinkLexers() { LINK_LEXER(lmDMAP); LINK_LEXER(lmDMIS); LINK_LEXER(lmECL); + LINK_LEXER(lmEDIFACT); LINK_LEXER(lmEiffel); LINK_LEXER(lmEiffelkw); LINK_LEXER(lmErlang); diff --git a/win32/scintilla.mak b/win32/scintilla.mak index e97308a23..b000092c1 100644 --- a/win32/scintilla.mak +++ b/win32/scintilla.mak @@ -136,6 +136,7 @@ LEXOBJS=\ $(DIR_O)\LexDMAP.obj \ $(DIR_O)\LexDMIS.obj \ $(DIR_O)\LexECL.obj \ + $(DIR_O)\LexEDIFACT.obj \ $(DIR_O)\LexEiffel.obj \ $(DIR_O)\LexErlang.obj \ $(DIR_O)\LexErrorList.obj \ @@ -556,6 +557,8 @@ $(DIR_O)\LexDMIS.obj: ..\lexers\LexDMIS.cxx $(LEX_HEADERS) $(DIR_O)\LexECL.obj: ..\lexers\LexECL.cxx $(LEX_HEADERS) +$(DIR_O)\LexEDIFACT.obj: ..\lexers\LexEDIFACT.cxx $(LEX_HEADERS) + $(DIR_O)\LexEiffel.obj: ..\lexers\LexEiffel.cxx $(LEX_HEADERS) $(DIR_O)\LexErlang.obj: ..\lexers\LexErlang.cxx $(LEX_HEADERS) |