diff options
Diffstat (limited to 'lexers/LexEDIFACT.cxx')
-rw-r--r-- | lexers/LexEDIFACT.cxx | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/lexers/LexEDIFACT.cxx b/lexers/LexEDIFACT.cxx new file mode 100644 index 000000000..70fd9f8f6 --- /dev/null +++ b/lexers/LexEDIFACT.cxx @@ -0,0 +1,315 @@ +// Scintilla Lexer for EDIFACT +// Written by Iain Clarke, IMCSoft & Inobiz AB. +// EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html +// and more readably here: https://en.wikipedia.org/wiki/EDIFACT +// This code is subject to the same license terms as the rest of the scintilla project: +// The License.txt file describes the conditions under which this software may be distributed. +// + +// Header order must match order in scripts/HeaderOrder.txt +#include <cstdlib> +#include <cassert> +#include <cstring> +#include <cctype> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "LexAccessor.h" +#include "LexerModule.h" + +class LexerEDIFACT : public ILexer +{ +public: + LexerEDIFACT(); + virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer + + static ILexer *Factory() { + return new LexerEDIFACT; + } + + virtual int SCI_METHOD Version() const + { + return lvOriginal; + } + virtual void SCI_METHOD Release() + { + delete this; + } + + const char * SCI_METHOD PropertyNames() + { + return "fold"; + } + int SCI_METHOD PropertyType(const char *) + { + return SC_TYPE_BOOLEAN; // Only one property! + } + const char * SCI_METHOD DescribeProperty(const char *name) + { + if (strcmp(name, "fold")) + return NULL; + return "Whether to apply folding to document or not"; + } + + virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) + { + if (strcmp(key, "fold")) + return -1; + m_bFold = strcmp(val, "0") ? true : false; + return 0; + } + const char * SCI_METHOD DescribeWordListSets() + { + return NULL; + } + virtual Sci_Position SCI_METHOD WordListSet(int, const char *) + { + return -1; + } + virtual void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess); + virtual void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess); + virtual void * SCI_METHOD PrivateCall(int, void *) + { + return NULL; + } + +protected: + Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength); + Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const; + Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const; + int DetectSegmentHeader(char SegmentHeader[3]) const; + + bool m_bFold; + char m_chComponent; + char m_chData; + char m_chDecimal; + char m_chRelease; + char m_chSegment; +}; + +LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact"); + +/////////////////////////////////////////////////////////////////////////////// + + + +/////////////////////////////////////////////////////////////////////////////// + +LexerEDIFACT::LexerEDIFACT() +{ + m_bFold = false; + m_chComponent = ':'; + m_chData = '+'; + m_chDecimal = '.'; + m_chRelease = '?'; + m_chSegment = '\''; +} + +void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) +{ + Sci_PositionU posFinish = startPos + lengthDoc; + InitialiseFromUNA(pAccess, posFinish); + + // Look backwards for a ' or a document beginning + Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos); + // And jump past the ' if this was not the beginning of the document + if (posCurrent != 0) + posCurrent++; + + // Style buffer, so we're not issuing loads of notifications + LexAccessor styler (pAccess); + pAccess->StartStyling(posCurrent, '\377'); + styler.StartSegment(posCurrent); + Sci_Position posSegmentStart = -1; + + while ((posCurrent < posFinish) && (posSegmentStart == -1)) + { + posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish); + // Mark whitespace as default + styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT); + if (posCurrent >= posFinish) + break; + + // Does is start with 3 charaters? ie, UNH + char SegmentHeader[4] = { 0 }; + pAccess->GetCharRange(SegmentHeader, posCurrent, 3); + + int SegmentStyle = DetectSegmentHeader(SegmentHeader); + if (SegmentStyle == SCE_EDI_BADSEGMENT) + break; + if (SegmentStyle == SCE_EDI_UNA) + { + posCurrent += 9; + styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA + continue; + } + posSegmentStart = posCurrent; + posCurrent += 3; + + styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc + + // Colour in the rest of the segment + for (char c; posCurrent < posFinish; posCurrent++) + { + pAccess->GetCharRange(&c, posCurrent, 1); + + if (c == m_chRelease) // ? escape character, check first, in case of ?' + posCurrent++; + else if (c == m_chSegment) // ' + { + // Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad. + Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart); + Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent); + if (lineSegmentStart == lineSegmentEnd) + styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND); + else + styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT); + posSegmentStart = -1; + posCurrent++; + break; + } + else if (c == m_chComponent) // : + styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE); + else if (c == m_chData) // + + styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT); + else + styler.ColourTo(posCurrent, SCE_EDI_DEFAULT); + } + } + styler.Flush(); + + if (posSegmentStart == -1) + return; + + pAccess->StartStyling(posSegmentStart, -1); + pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT); +} + +void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess) +{ + if (!m_bFold) + return; + + // Fold at UNx lines. ie, UNx segments = 0, other segments = 1. + // There's no sub folding, so we can be quite simple. + Sci_Position endPos = startPos + lengthDoc; + char SegmentHeader[4] = { 0 }; + + int iIndentPrevious = 0; + Sci_Position lineLast = pAccess->LineFromPosition(endPos); + + for (Sci_Position lineCurrent = pAccess->LineFromPosition(startPos); lineCurrent <= lineLast; lineCurrent++) + { + Sci_Position posLineStart = pAccess->LineStart(lineCurrent); + posLineStart = ForwardPastWhitespace(pAccess, posLineStart, endPos); + Sci_Position lineDataStart = pAccess->LineFromPosition(posLineStart); + // Fill in whitespace lines? + for (; lineCurrent < lineDataStart; lineCurrent++) + pAccess->SetLevel(lineCurrent, SC_FOLDLEVELBASE | SC_FOLDLEVELWHITEFLAG | iIndentPrevious); + pAccess->GetCharRange(SegmentHeader, posLineStart, 3); + //if (DetectSegmentHeader(SegmentHeader) == SCE_EDI_BADSEGMENT) // Abort if this is not a proper segment header + + int level = 0; + if (memcmp(SegmentHeader, "UNH", 3) == 0) // UNH starts blocks + level = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG; + // Check for UNA,B and Z. All others are inside messages + else if (!memcmp(SegmentHeader, "UNA", 3) || !memcmp(SegmentHeader, "UNB", 3) || !memcmp(SegmentHeader, "UNZ", 3)) + level = SC_FOLDLEVELBASE; + else + level = SC_FOLDLEVELBASE | 1; + pAccess->SetLevel(lineCurrent, level); + iIndentPrevious = level & SC_FOLDLEVELNUMBERMASK; + } +} + +Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength) +{ + MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? ' + + Sci_PositionU startPos = 0; + startPos += ForwardPastWhitespace(pAccess, 0, MaxLength); + if (startPos < MaxLength) + { + char bufUNA[9]; + pAccess->GetCharRange(bufUNA, startPos, 9); + + // Check it's UNA segment + if (!memcmp(bufUNA, "UNA", 3)) + { + m_chComponent = bufUNA[3]; + m_chData = bufUNA[4]; + m_chDecimal = bufUNA[5]; + m_chRelease = bufUNA[6]; + // bufUNA [7] should be space - reserved. + m_chSegment = bufUNA[8]; + + return 0; // success! + } + } + + // We failed to find a UNA, so drop to defaults + m_chComponent = ':'; + m_chData = '+'; + m_chDecimal = '.'; + m_chRelease = '?'; + m_chSegment = '\''; + + return -1; +} + +Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const +{ + char c; + + while (startPos < MaxLength) + { + pAccess->GetCharRange(&c, startPos, 1); + switch (c) + { + case '\t': + case '\r': + case '\n': + case ' ': + break; + default: + return startPos; + } + + startPos++; + } + + return MaxLength; +} + +int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const +{ + if ( + SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' || + SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' || + SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z') + return SCE_EDI_BADSEGMENT; + + if (memcmp(SegmentHeader, "UNA", 3) == 0) + return SCE_EDI_UNA; + if (memcmp(SegmentHeader, "UNH", 3) == 0) + return SCE_EDI_UNH; + + return SCE_EDI_SEGMENTSTART; +} + +// Look backwards for a ' or a document beginning +Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const +{ + for (char c; startPos > 0; startPos--) + { + pAccess->GetCharRange(&c, startPos, 1); + if (c == m_chSegment) + return startPos; + } + // We didn't find a ', so just go with the beginning + return 0; +} + + |