diff options
author | mitchell <unknown> | 2019-05-27 18:34:00 -0400 |
---|---|---|
committer | mitchell <unknown> | 2019-05-27 18:34:00 -0400 |
commit | f84f2a3daef704cb309009f51b5da9f325979f68 (patch) | |
tree | a4746a5660d9c015e96e0f3e9fce600e6bff4ad0 | |
parent | 920f6ebb1d99186ea0699c97235667239604f278 (diff) | |
download | scintilla-mirror-f84f2a3daef704cb309009f51b5da9f325979f68.tar.gz |
Backport: Feature [feature-requests:#1280]. Lexer added for X12.
Backport of changeset 7486:a99fa007805f, but with LexerX12::Terminator struct
definition not having default values, which is a C++14 feature.
-rw-r--r-- | cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj | 4 | ||||
-rw-r--r-- | doc/ScintillaHistory.html | 4 | ||||
-rw-r--r-- | gtk/deps.mak | 8 | ||||
-rw-r--r-- | include/SciLexer.h | 10 | ||||
-rw-r--r-- | include/Scintilla.iface | 12 | ||||
-rw-r--r-- | lexers/LexX12.cxx | 341 | ||||
-rw-r--r-- | src/Catalogue.cxx | 1 | ||||
-rw-r--r-- | win32/deps.mak | 8 | ||||
-rw-r--r-- | win32/nmdeps.mak | 8 | ||||
-rw-r--r-- | win32/scintilla.mak | 1 |
10 files changed, 397 insertions, 0 deletions
diff --git a/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj b/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj index c49747bcd..916b7d4ea 100644 --- a/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj +++ b/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj @@ -232,6 +232,7 @@ 8DC2EF570486A6940098B216 /* Cocoa.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1058C7B1FEA5585E11CA2CBB /* Cocoa.framework */; }; F437405F9F32C7DEFCA38C11 /* LexIndent.cxx in Sources */ = {isa = PBXBuildFile; fileRef = 282E41F3B9E2BFEDD6A05BE7 /* LexIndent.cxx */; }; FDC7442CAD70B9A67EF1639D /* LexSAS.cxx in Sources */ = {isa = PBXBuildFile; fileRef = A95147A1AB7CADB00DAFE724 /* LexSAS.cxx */; }; + AE894E1CB7328CAE5B2EF47E /* LexX12.cxx in Sources */ = {isa = PBXBuildFile; fileRef = ADA64364A443F3E3F02D294E /* LexX12.cxx */; }; 4D0C4365AB6DF998CD48B1FC /* LexLPeg.cxx in Sources */ = {isa = PBXBuildFile; fileRef = 5EB3467789767C0ACE40A46A /* LexLPeg.cxx */; }; */; }; /* End PBXBuildFile section */ @@ -468,6 +469,7 @@ 8DC2EF5B0486A6940098B216 /* Scintilla.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Scintilla.framework; sourceTree = BUILT_PRODUCTS_DIR; }; A95147A1AB7CADB00DAFE724 /* LexSAS.cxx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexSAS.cxx; path = ../../lexers/LexSAS.cxx; sourceTree = SOURCE_ROOT; }; D2F7E79907B2D74100F64583 /* CoreData.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreData.framework; path = /System/Library/Frameworks/CoreData.framework; sourceTree = "<absolute>"; }; + ADA64364A443F3E3F02D294E /* LexX12.cxx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexX12.cxx; path = ../../lexers/LexX12.cxx; sourceTree = SOURCE_ROOT; }; 5EB3467789767C0ACE40A46A /* LexLPeg.cxx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexLPeg.cxx; path = ../../lexers/LexLPeg.cxx; sourceTree = SOURCE_ROOT; }; /* End PBXFileReference section */ @@ -676,6 +678,7 @@ 114B6F0A11FA7526004FB6AB /* LexVerilog.cxx */, 114B6F0B11FA7526004FB6AB /* LexVHDL.cxx */, 11594BE8155B91DF0099E1FA /* LexVisualProlog.cxx */, + ADA64364A443F3E3F02D294E /* LexX12.cxx */, 114B6F0C11FA7526004FB6AB /* LexYAML.cxx */, ); name = Lexers; @@ -1137,6 +1140,7 @@ 5F804AA6B60FE695863A39FE /* LexStata.cxx in Sources */, 0ED84236A703D57578EBFD2F /* LexNim.cxx in Sources */, 00724A59981D34F11A3D162F /* LexCIL.cxx in Sources */, + AE894E1CB7328CAE5B2EF47E /* LexX12.cxx in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index c99caa2e4..6eff6c941 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -549,6 +549,10 @@ <li> Released 10 May 2019. </li> + <li> + Lexer added for X12. + <a href="https://sourceforge.net/p/scintilla/feature-requests/1280/">Feature #1280</a>. + </li> <li> Fix bug where changing identifier sets in lexers preserved previous identifiers. </li> diff --git a/gtk/deps.mak b/gtk/deps.mak index 98189e518..104746cf6 100644 --- a/gtk/deps.mak +++ b/gtk/deps.mak @@ -1921,6 +1921,14 @@ LexVisualProlog.o: \ ../lexlib/LexerModule.h \ ../lexlib/OptionSet.h \ ../lexlib/DefaultLexer.h +LexX12.o: \ + ../lexers/LexX12.cxx \ + ../include/ILexer.h \ + ../include/Sci_Position.h \ + ../include/Scintilla.h \ + ../include/SciLexer.h \ + ../lexlib/LexerModule.h \ + ../lexlib/DefaultLexer.h LexYAML.o: \ ../lexers/LexYAML.cxx \ ../include/ILexer.h \ diff --git a/include/SciLexer.h b/include/SciLexer.h index ae20985ce..08b4da6d6 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -140,6 +140,7 @@ #define SCLEX_SAS 125 #define SCLEX_NIM 126 #define SCLEX_CIL 127 +#define SCLEX_X12 128 #define SCLEX_LPEG 999 #define SCLEX_AUTOMATIC 1000 #define SCE_P_DEFAULT 0 @@ -1886,6 +1887,15 @@ #define SCE_CIL_OPERATOR 8 #define SCE_CIL_IDENTIFIER 9 #define SCE_CIL_STRINGEOL 10 +#define SCE_X12_DEFAULT 0 +#define SCE_X12_BAD 1 +#define SCE_X12_ENVELOPE 2 +#define SCE_X12_FUNCTIONGROUP 3 +#define SCE_X12_TRANSACTIONSET 4 +#define SCE_X12_SEGMENTHEADER 5 +#define SCE_X12_SEGMENTEND 6 +#define SCE_X12_SEP_ELEMENT 7 +#define SCE_X12_SEP_SUBELEMENT 8 /* --Autogenerated -- end of section automatically generated from Scintilla.iface */ #endif diff --git a/include/Scintilla.iface b/include/Scintilla.iface index c009371c1..fdffa4009 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -2976,6 +2976,7 @@ val SCLEX_STATA=124 val SCLEX_SAS=125 val SCLEX_NIM=126 val SCLEX_CIL=127 +val SCLEX_X12=128 val SCLEX_LPEG=999 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a @@ -4962,6 +4963,17 @@ val SCE_CIL_LABEL=7 val SCE_CIL_OPERATOR=8 val SCE_CIL_IDENTIFIER=9 val SCE_CIL_STRINGEOL=10 +# Lexical states for SCLEX_X12 +lex X12=SCLEX_X12 SCE_X12_ +val SCE_X12_DEFAULT=0 +val SCE_X12_BAD=1 +val SCE_X12_ENVELOPE=2 +val SCE_X12_FUNCTIONGROUP=3 +val SCE_X12_TRANSACTIONSET=4 +val SCE_X12_SEGMENTHEADER=5 +val SCE_X12_SEGMENTEND=6 +val SCE_X12_SEP_ELEMENT=7 +val SCE_X12_SEP_SUBELEMENT=8 # Events diff --git a/lexers/LexX12.cxx b/lexers/LexX12.cxx new file mode 100644 index 000000000..171bcd8ac --- /dev/null +++ b/lexers/LexX12.cxx @@ -0,0 +1,341 @@ +// Scintilla Lexer for X12 +// Written by Iain Clarke, IMCSoft & Inobiz AB. +// X12 official documentation is behind a paywall, but there's a description of the syntax here: +// http://www.rawlinsecconsulting.com/x12tutorial/x12syn.html +// This code is subject to the same license terms as the rest of the scintilla project: +// The License.txt file describes the conditions under which this software may be distributed. +// + +// Header order must match order in scripts/HeaderOrder.txt +#include <cstdlib> +#include <cassert> +#include <cstring> +#include <cctype> + +#include <vector> +#include <algorithm> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" +#include "LexerModule.h" +#include "DefaultLexer.h" + +using namespace Scintilla; + +class LexerX12 : public DefaultLexer +{ +public: + LexerX12(); + virtual ~LexerX12() {} // virtual destructor, as we inherit from ILexer + + static ILexer *Factory() { + return new LexerX12; + } + + int SCI_METHOD Version() const override + { + return lvMetaData; + } + void SCI_METHOD Release() override + { + delete this; + } + + const char * SCI_METHOD PropertyNames() override + { + return "fold"; + } + int SCI_METHOD PropertyType(const char *) override + { + return SC_TYPE_BOOLEAN; // Only one property! + } + const char * SCI_METHOD DescribeProperty(const char *name) override + { + if (!strcmp(name, "fold")) + return "Whether to apply folding to document or not"; + return NULL; + } + + Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override + { + if (!strcmp(key, "fold")) + { + m_bFold = strcmp(val, "0") ? true : false; + return 0; + } + return -1; + } + const char * SCI_METHOD DescribeWordListSets() override + { + return NULL; + } + Sci_Position SCI_METHOD WordListSet(int, const char *) override + { + return -1; + } + void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + void * SCI_METHOD PrivateCall(int, void *) override + { + return NULL; + } + +protected: + struct Terminator + { + int Style;// = SCE_X12_BAD; + Sci_PositionU pos;// = 0; + Sci_PositionU length;// = 0; + int FoldChange;// = 0; + }; + Terminator InitialiseFromISA(IDocument *pAccess); + Sci_PositionU FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const; + Terminator DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const; + Terminator FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator = false) const; + + bool m_bFold; + char m_chSubElement; + char m_chElement; + char m_chSegment[3]; // might be CRLF +}; + +LexerModule lmX12(SCLEX_X12, LexerX12::Factory, "x12"); + +/////////////////////////////////////////////////////////////////////////////// + + + +/////////////////////////////////////////////////////////////////////////////// + +LexerX12::LexerX12() +{ + m_bFold = false; + m_chSegment[0] = m_chSegment[1] = m_chSegment[2] = m_chElement = m_chSubElement = 0; +} + +void LexerX12::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) +{ + Sci_PositionU posFinish = startPos + length; + + Terminator T = InitialiseFromISA(pAccess); + + if (T.Style == SCE_X12_BAD) + { + if (T.pos < startPos) + T.pos = startPos; // we may be colouring in batches. + pAccess->StartStyling(startPos, '\377'); + pAccess->SetStyleFor(T.pos - startPos, SCE_X12_ENVELOPE); + pAccess->SetStyleFor(posFinish - T.pos, SCE_X12_BAD); + return; + } + + // Look backwards for a segment start or a document beginning + Sci_PositionU posCurrent = FindPreviousSegmentStart (pAccess, startPos); + + // Style buffer, so we're not issuing loads of notifications + pAccess->StartStyling(posCurrent, '\377'); + + while (posCurrent < posFinish) + { + // Look for first element marker, so we can denote segment + T = DetectSegmentHeader(pAccess, posCurrent); + if (T.Style == SCE_X12_BAD) + break; + + pAccess->SetStyleFor(T.pos - posCurrent, T.Style); + pAccess->SetStyleFor(T.length, SCE_X12_SEP_ELEMENT); + posCurrent = T.pos + T.length; + + while (T.Style != SCE_X12_BAD && T.Style != SCE_X12_SEGMENTEND) // Break on bad or segment ending + { + T = FindNextTerminator(pAccess, posCurrent); + if (T.Style == SCE_X12_BAD) + break; + + int Style = T.Style; + if (T.Style == SCE_X12_SEGMENTEND && m_chSegment[0] == '\r') // don't style cr/crlf + Style = SCE_X12_DEFAULT; + + pAccess->SetStyleFor(T.pos - posCurrent, SCE_X12_DEFAULT); + pAccess->SetStyleFor(T.length, Style); + posCurrent = T.pos + T.length; + } + if (T.Style == SCE_X12_BAD) + break; + } + + pAccess->SetStyleFor(posFinish - posCurrent, SCE_X12_BAD); +} + +void LexerX12::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) +{ + if (!m_bFold) + return; + + // Are we even foldable? + if (m_chSegment[0] != '\r' && m_chSegment[0] != '\n') // check for cr,lf,cr+lf. + return; + + Sci_PositionU posFinish = startPos + length; + + // Look backwards for a segment start or a document beginning + startPos = FindPreviousSegmentStart(pAccess, startPos); + Terminator T; + + Sci_PositionU currLine = pAccess->LineFromPosition(startPos); + int levelCurrentStyle = SC_FOLDLEVELBASE; + if (currLine > 0) + levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level + int indentCurrent = levelCurrentStyle & (SC_FOLDLEVELBASE - 1); + + while (startPos < posFinish) + { + T = DetectSegmentHeader(pAccess, startPos); + int indentNext = indentCurrent + T.FoldChange; + if (indentNext < 0) + indentNext = 0; + + levelCurrentStyle = (T.FoldChange > 0) ? (SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG) : SC_FOLDLEVELBASE; + + currLine = pAccess->LineFromPosition(startPos); + pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent); + + T = FindNextTerminator(pAccess, startPos, true); + startPos = T.pos + T.length; + indentCurrent = indentNext; + } +} + +LexerX12::Terminator LexerX12::InitialiseFromISA(IDocument *pAccess) +{ + Sci_Position length = pAccess->Length(); + char c; + if (length <= 106) + return { SCE_X12_BAD, 0 }; + + pAccess->GetCharRange(&m_chElement, 3, 1); + pAccess->GetCharRange(&m_chSubElement, 104, 1); + pAccess->GetCharRange(m_chSegment, 105, 1); + if (m_chSegment[0] == '\r') // are we CRLF? + { + pAccess->GetCharRange(&c, 106, 1); + if (c == '\n') + m_chSegment[1] = c; + } + + // Validate we have an element separator, and it's not silly! + if (m_chElement == '\0' || m_chElement == '\n' || m_chElement == '\r') + return { SCE_X12_BAD, 3 }; + + // Validate we have an element separator, and it's not silly! + if (m_chSubElement == '\0' || m_chSubElement == '\n' || m_chSubElement == '\r') + return { SCE_X12_BAD, 103 }; + + if (m_chElement == m_chSubElement) + return { SCE_X12_BAD, 104 }; + if (m_chElement == m_chSegment[0]) + return { SCE_X12_BAD, 105 }; + if (m_chSubElement == m_chSegment[0]) + return { SCE_X12_BAD, 104 }; + + // Check we have element markers at all the right places! ISA element has fixed entries. + std::vector<Sci_PositionU> ElementMarkers = { 3, 6, 17, 20, 31, 34, 50, 53, 69, 76, 81, 83, 89, 99, 101, 103 }; + for (auto i : ElementMarkers) + { + pAccess->GetCharRange(&c, i, 1); + if (c != m_chElement) + return { SCE_X12_BAD, i }; + } + // Check we have no element markers anywhere else! + for (Sci_PositionU i = 0; i < 105; i++) + { + if (std::find(ElementMarkers.begin(), ElementMarkers.end(), i) != ElementMarkers.end()) + continue; + + pAccess->GetCharRange(&c, i, 1); + if (c == m_chElement) + return { SCE_X12_BAD, i }; + } + + return { SCE_X12_ENVELOPE }; +} + +Sci_PositionU LexerX12::FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const +{ + char c; + + for ( ; startPos > 0; startPos--) + { + pAccess->GetCharRange(&c, startPos, 1); + if (c != m_chSegment[0]) + continue; + // we've matched one - if this is not crlf we're done. + if (!m_chSegment[1]) + return startPos + 1; + pAccess->GetCharRange(&c, startPos+1, 1); + if (c == m_chSegment[1]) + return startPos + 2; + } + // We didn't find a ', so just go with the beginning + return 0; +} + +LexerX12::Terminator LexerX12::DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const +{ + Sci_PositionU posStart = pos; + Sci_Position Length = pAccess->Length(); + char Buf[6] = { 0 }; + while (pos - posStart < 5 && pos < (Sci_PositionU)Length) + { + pAccess->GetCharRange(Buf + pos - posStart, pos, 1); + if (Buf [pos - posStart] != m_chElement) // more? + { + pos++; + continue; + } + if (strcmp(Buf, "ISA*") == 0) + return { SCE_X12_ENVELOPE, pos, 1, +1 }; + if (strcmp(Buf, "IEA*") == 0) + return { SCE_X12_ENVELOPE, pos, 1, -1 }; + if (strcmp(Buf, "GS*") == 0) + return { SCE_X12_FUNCTIONGROUP, pos, 1, +1 }; + if (strcmp(Buf, "GE*") == 0) + return { SCE_X12_FUNCTIONGROUP, pos, 1, -1 }; + if (strcmp(Buf, "ST*") == 0) + return { SCE_X12_TRANSACTIONSET, pos, 1, +1 }; + if (strcmp(Buf, "SE*") == 0) + return { SCE_X12_TRANSACTIONSET, pos, 1, -1 }; + return { SCE_X12_SEGMENTHEADER, pos, 1, 0 }; + } + return { SCE_X12_BAD, pos, 0, 0 }; +} + +LexerX12::Terminator LexerX12::FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator) const +{ + char c; + Sci_Position Length = pAccess->Length(); + + while (pos < (Sci_PositionU)Length) + { + pAccess->GetCharRange(&c, pos, 1); + if (!bJustSegmentTerminator && c == m_chElement) + return { SCE_X12_SEP_ELEMENT, pos, 1 }; + else if (!bJustSegmentTerminator && c == m_chSubElement) + return { SCE_X12_SEP_SUBELEMENT, pos, 1 }; + else if (c == m_chSegment[0]) + { + if (!m_chSegment[1]) + return { SCE_X12_SEGMENTEND, pos, 1 }; + pos++; + if (pos >= (Sci_PositionU)Length) + break; + pAccess->GetCharRange(&c, pos, 1); + if (c == m_chSegment[1]) + return { SCE_X12_SEGMENTEND, pos-1, 2 }; + } + pos++; + } + + return { SCE_X12_BAD, pos }; +} diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx index 6b70a9225..be08651c5 100644 --- a/src/Catalogue.cxx +++ b/src/Catalogue.cxx @@ -195,6 +195,7 @@ int Scintilla_LinkLexers() { LINK_LEXER(lmVerilog); LINK_LEXER(lmVHDL); LINK_LEXER(lmVisualProlog); + LINK_LEXER(lmX12); LINK_LEXER(lmXML); LINK_LEXER(lmYAML); diff --git a/win32/deps.mak b/win32/deps.mak index 4bc99e211..8c53840ad 100644 --- a/win32/deps.mak +++ b/win32/deps.mak @@ -1963,6 +1963,14 @@ LexVisualProlog.o: \ ../lexlib/LexerModule.h \ ../lexlib/OptionSet.h \ ../lexlib/DefaultLexer.h +LexX12.o: \ + ../lexers/LexX12.cxx \ + ../include/ILexer.h \ + ../include/Sci_Position.h \ + ../include/Scintilla.h \ + ../include/SciLexer.h \ + ../lexlib/LexerModule.h \ + ../lexlib/DefaultLexer.h LexYAML.o: \ ../lexers/LexYAML.cxx \ ../include/ILexer.h \ diff --git a/win32/nmdeps.mak b/win32/nmdeps.mak index 9cf46b03b..8ddedfc79 100644 --- a/win32/nmdeps.mak +++ b/win32/nmdeps.mak @@ -1963,6 +1963,14 @@ $(DIR_O)/LexVisualProlog.obj: \ ../lexlib/LexerModule.h \ ../lexlib/OptionSet.h \ ../lexlib/DefaultLexer.h +$(DIR_O)/LexX12.obj: \ + ../lexers/LexX12.cxx \ + ../include/ILexer.h \ + ../include/Sci_Position.h \ + ../include/Scintilla.h \ + ../include/SciLexer.h \ + ../lexlib/LexerModule.h \ + ../lexlib/DefaultLexer.h $(DIR_O)/LexYAML.obj: \ ../lexers/LexYAML.cxx \ ../include/ILexer.h \ diff --git a/win32/scintilla.mak b/win32/scintilla.mak index 31b362225..49ae58a25 100644 --- a/win32/scintilla.mak +++ b/win32/scintilla.mak @@ -217,6 +217,7 @@ LEXOBJS=\ $(DIR_O)\LexVerilog.obj \ $(DIR_O)\LexVHDL.obj \ $(DIR_O)\LexVisualProlog.obj \ + $(DIR_O)\LexX12.obj \ $(DIR_O)\LexYAML.obj \ #--Autogenerated -- end of automatically generated section |