diff options
author | nyamatongwe <unknown> | 2010-07-13 21:24:26 +1000 |
---|---|---|
committer | nyamatongwe <unknown> | 2010-07-13 21:24:26 +1000 |
commit | c0247be1cde4c927b987edff2243524cea28d547 (patch) | |
tree | 482fac70e504ac105d36898d359c5992cea8e4c8 | |
parent | 27a22f2c85e3aa0f540c61a0a245a0d759e706a9 (diff) | |
download | scintilla-mirror-c0247be1cde4c927b987edff2243524cea28d547.tar.gz |
New files for new lexer implementation.
-rw-r--r-- | include/ILexer.h | 67 | ||||
-rw-r--r-- | lexlib/Accessor.cxx | 79 | ||||
-rw-r--r-- | lexlib/CharacterSet.cxx | 61 | ||||
-rw-r--r-- | lexlib/LexAccessor.h | 175 | ||||
-rw-r--r-- | lexlib/LexerBase.cxx | 92 | ||||
-rw-r--r-- | lexlib/LexerBase.h | 41 | ||||
-rw-r--r-- | lexlib/LexerModule.cxx | 121 | ||||
-rw-r--r-- | lexlib/LexerModule.h | 82 | ||||
-rw-r--r-- | lexlib/LexerNoExceptions.cxx | 68 | ||||
-rw-r--r-- | lexlib/LexerNoExceptions.h | 32 | ||||
-rw-r--r-- | lexlib/LexerSimple.cxx | 55 | ||||
-rw-r--r-- | lexlib/LexerSimple.h | 30 | ||||
-rw-r--r-- | lexlib/WordList.cxx | 200 | ||||
-rw-r--r-- | lexlib/WordList.h | 41 | ||||
-rw-r--r-- | src/Catalogue.cxx | 181 | ||||
-rw-r--r-- | src/Catalogue.h | 26 |
16 files changed, 1351 insertions, 0 deletions
diff --git a/include/ILexer.h b/include/ILexer.h new file mode 100644 index 000000000..60f2bdf10 --- /dev/null +++ b/include/ILexer.h @@ -0,0 +1,67 @@ +// Scintilla source code edit control +/** @file ILexer.h + ** Interface between Scintilla and lexers. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef ILEXER_H +#define ILEXER_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +#ifdef _WIN32 + #define SCI_METHOD __stdcall +#else + #define SCI_METHOD +#endif + +enum { dvOriginal=0 }; + +class IDocument { +public: + virtual int SCI_METHOD Version() const = 0; + virtual void SCI_METHOD SetErrorStatus(int status) = 0; + virtual int SCI_METHOD Length() const = 0; + virtual void SCI_METHOD GetCharRange(char *buffer, int position, int lengthRetrieve) const = 0; + virtual char SCI_METHOD StyleAt(int position) const = 0; + virtual int SCI_METHOD LineFromPosition(int position) const = 0; + virtual int SCI_METHOD LineStart(int line) const = 0; + virtual int SCI_METHOD GetLevel(int line) const = 0; + virtual int SCI_METHOD SetLevel(int line, int level) = 0; + virtual int SCI_METHOD GetLineState(int line) const = 0; + virtual int SCI_METHOD SetLineState(int line, int state) = 0; + virtual void SCI_METHOD StartStyling(int position, char mask) = 0; + virtual bool SCI_METHOD SetStyleFor(int length, char style) = 0; + virtual bool SCI_METHOD SetStyles(int length, const char *styles) = 0; + virtual void SCI_METHOD DecorationSetCurrentIndicator(int indicator) = 0; + virtual void SCI_METHOD DecorationFillRange(int position, int value, int fillLength) = 0; + virtual void SCI_METHOD ChangeLexerState(int start, int end) = 0; + virtual int SCI_METHOD CodePage() const = 0; + virtual bool SCI_METHOD IsDBCSLeadByte(char ch) const = 0; +}; + +enum { lvOriginal=0 }; + +class ILexer { +public: + virtual int SCI_METHOD Version() const = 0; + virtual void SCI_METHOD Release() = 0; + virtual const char * SCI_METHOD PropertyNames() = 0; + virtual int SCI_METHOD PropertyType(const char *name) = 0; + virtual const char * SCI_METHOD DescribeProperty(const char *name) = 0; + virtual int SCI_METHOD PropertySet(const char *key, const char *val) = 0; + virtual const char * SCI_METHOD DescribeWordListSets() = 0; + virtual int SCI_METHOD WordListSet(int n, const char *wl) = 0; + virtual void SCI_METHOD Lex(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess) = 0; + virtual void SCI_METHOD Fold(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess) = 0; + virtual void * SCI_METHOD PrivateCall(int operation, void *pointer) = 0; +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif diff --git a/lexlib/Accessor.cxx b/lexlib/Accessor.cxx new file mode 100644 index 000000000..5adaaa2f7 --- /dev/null +++ b/lexlib/Accessor.cxx @@ -0,0 +1,79 @@ +// Scintilla source code edit control +/** @file KeyWords.cxx + ** Colourise for particular languages. + **/ +// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <ctype.h> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "PropSetSimple.h" +#include "WordList.h" +#include "LexAccessor.h" +#include "Accessor.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +Accessor::Accessor(IDocument *pAccess_, PropSetSimple *pprops_) : LexAccessor(pAccess_), pprops(pprops_) { +} + +int Accessor::GetPropertyInt(const char *key, int defaultValue) { + return pprops->GetInt(key, defaultValue); +} + +int Accessor::IndentAmount(int line, int *flags, PFNIsCommentLeader pfnIsCommentLeader) { + int end = Length(); + int spaceFlags = 0; + + // Determines the indentation level of the current line and also checks for consistent + // indentation compared to the previous line. + // Indentation is judged consistent when the indentation whitespace of each line lines + // the same or the indentation of one line is a prefix of the other. + + int pos = LineStart(line); + char ch = (*this)[pos]; + int indent = 0; + bool inPrevPrefix = line > 0; + int posPrev = inPrevPrefix ? LineStart(line-1) : 0; + while ((ch == ' ' || ch == '\t') && (pos < end)) { + if (inPrevPrefix) { + char chPrev = (*this)[posPrev++]; + if (chPrev == ' ' || chPrev == '\t') { + if (chPrev != ch) + spaceFlags |= wsInconsistent; + } else { + inPrevPrefix = false; + } + } + if (ch == ' ') { + spaceFlags |= wsSpace; + indent++; + } else { // Tab + spaceFlags |= wsTab; + if (spaceFlags & wsSpace) + spaceFlags |= wsSpaceTab; + indent = (indent / 8 + 1) * 8; + } + ch = (*this)[++pos]; + } + + *flags = spaceFlags; + indent += SC_FOLDLEVELBASE; + // if completely empty line or the start of a comment... + if ((ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') || + (pfnIsCommentLeader && (*pfnIsCommentLeader)(*this, pos, end-pos))) + return indent | SC_FOLDLEVELWHITEFLAG; + else + return indent; +} diff --git a/lexlib/CharacterSet.cxx b/lexlib/CharacterSet.cxx new file mode 100644 index 000000000..35669dff8 --- /dev/null +++ b/lexlib/CharacterSet.cxx @@ -0,0 +1,61 @@ +// Scintilla source code edit control +/** @file CharacterSet.cxx + ** Simple case functions for ASCII. + ** Lexer infrastructure. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <assert.h> + +#include "CharacterSet.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +int CompareCaseInsensitive(const char *a, const char *b) { + while (*a && *b) { + if (*a != *b) { + char upperA = MakeUpperCase(*a); + char upperB = MakeUpperCase(*b); + if (upperA != upperB) + return upperA - upperB; + } + a++; + b++; + } + // Either *a or *b is nul + return *a - *b; +} + +int CompareNCaseInsensitive(const char *a, const char *b, size_t len) { + while (*a && *b && len) { + if (*a != *b) { + char upperA = MakeUpperCase(*a); + char upperB = MakeUpperCase(*b); + if (upperA != upperB) + return upperA - upperB; + } + a++; + b++; + len--; + } + if (len == 0) + return 0; + else + // Either *a or *b is nul + return *a - *b; +} + +#ifdef SCI_NAMESPACE +} +#endif diff --git a/lexlib/LexAccessor.h b/lexlib/LexAccessor.h new file mode 100644 index 000000000..dccf31e33 --- /dev/null +++ b/lexlib/LexAccessor.h @@ -0,0 +1,175 @@ +// Scintilla source code edit control +/** @file LexAccessor.h + ** Interfaces between Scintilla and lexers. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef LEXACCESSOR_H +#define LEXACCESSOR_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +class LexAccessor { +private: + IDocument *pAccess; + enum {extremePosition=0x7FFFFFFF}; + /** @a bufferSize is a trade off between time taken to copy the characters + * and retrieval overhead. + * @a slopSize positions the buffer before the desired position + * in case there is some backtracking. */ + enum {bufferSize=4000, slopSize=bufferSize/8}; + char buf[bufferSize+1]; + int startPos; + int endPos; + int codePage; + int lenDoc; + int mask; + char styleBuf[bufferSize]; + int validLen; + char chFlags; + char chWhile; + unsigned int startSeg; + int startPosStyling; + + void Fill(int position) { + startPos = position - slopSize; + if (startPos + bufferSize > lenDoc) + startPos = lenDoc - bufferSize; + if (startPos < 0) + startPos = 0; + endPos = startPos + bufferSize; + if (endPos > lenDoc) + endPos = lenDoc; + + pAccess->GetCharRange(buf, startPos, endPos-startPos); + buf[endPos-startPos] = '\0'; + } + +public: + LexAccessor(IDocument *pAccess_) : + pAccess(pAccess_), startPos(extremePosition), endPos(0), + codePage(pAccess->CodePage()), lenDoc(pAccess->Length()), + mask(127), validLen(0), chFlags(0), chWhile(0), + startSeg(0), startPosStyling(0) { + } + char operator[](int position) { + if (position < startPos || position >= endPos) { + Fill(position); + } + return buf[position - startPos]; + } + /** Safe version of operator[], returning a defined value for invalid position. */ + char SafeGetCharAt(int position, char chDefault=' ') { + if (position < startPos || position >= endPos) { + Fill(position); + if (position < startPos || position >= endPos) { + // Position is outside range of document + return chDefault; + } + } + return buf[position - startPos]; + } + bool IsLeadByte(char ch) { + return pAccess->IsDBCSLeadByte(ch); + } + + bool Match(int pos, const char *s) { + for (int i=0; *s; i++) { + if (*s != SafeGetCharAt(pos+i)) + return false; + s++; + } + return true; + } + char StyleAt(int position) { + return static_cast<char>(pAccess->StyleAt(position) & mask); + } + int GetLine(int position) { + return pAccess->LineFromPosition(position); + } + int LineStart(int line) { + return pAccess->LineStart(line); + } + int LevelAt(int line) { + return pAccess->GetLevel(line); + } + int Length() const { + return lenDoc; + } + void Flush() { + startPos = extremePosition; + if (validLen > 0) { + pAccess->SetStyles(validLen, styleBuf); + startPosStyling += validLen; + validLen = 0; + } + } + int GetLineState(int line) { + return pAccess->GetLineState(line); + } + int SetLineState(int line, int state) { + return pAccess->SetLineState(line, state); + } + // Style setting + void StartAt(unsigned int start, char chMask=31) { + // Store the mask specified for use with StyleAt. + mask = chMask; + pAccess->StartStyling(start, chMask); + startPosStyling = start; + } + void SetFlags(char chFlags_, char chWhile_) { + chFlags = chFlags_; + chWhile = chWhile_; + } + unsigned int GetStartSegment() const { + return startSeg; + } + void StartSegment(unsigned int pos) { + startSeg = pos; + } + void ColourTo(unsigned int pos, int chAttr) { + // Only perform styling if non empty range + if (pos != startSeg - 1) { + assert(pos >= startSeg); + if (pos < startSeg) { + return; + } + + if (validLen + (pos - startSeg + 1) >= bufferSize) + Flush(); + if (validLen + (pos - startSeg + 1) >= bufferSize) { + // Too big for buffer so send directly + pAccess->SetStyleFor(pos - startSeg + 1, static_cast<char>(chAttr)); + } else { + if (chAttr != chWhile) + chFlags = 0; + chAttr |= chFlags; + for (unsigned int i = startSeg; i <= pos; i++) { + assert((startPosStyling + validLen) < Length()); + styleBuf[validLen++] = static_cast<char>(chAttr); + } + } + } + startSeg = pos+1; + } + void SetLevel(int line, int level) { + pAccess->SetLevel(line, level); + } + void IndicatorFill(int start, int end, int indicator, int value) { + pAccess->DecorationSetCurrentIndicator(indicator); + pAccess->DecorationFillRange(start, value, end - start); + } + + void ChangeLexerState(int start, int end) { + pAccess->ChangeLexerState(start, end); + } +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif diff --git a/lexlib/LexerBase.cxx b/lexlib/LexerBase.cxx new file mode 100644 index 000000000..ea5734d24 --- /dev/null +++ b/lexlib/LexerBase.cxx @@ -0,0 +1,92 @@ +// Scintilla source code edit control +/** @file LexerSimple.cxx + ** A simple lexer with no state. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <ctype.h> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "PropSetSimple.h" +#include "WordList.h" +#include "LexAccessor.h" +#include "Accessor.h" +#include "LexerModule.h" +#include "LexerBase.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +LexerBase::LexerBase() { + for (int wl = 0; wl < numWordLists; wl++) + keyWordLists[wl] = new WordList; + keyWordLists[numWordLists] = 0; +} + +LexerBase::~LexerBase() { + for (int wl = 0; wl < numWordLists; wl++) { + delete keyWordLists[wl]; + keyWordLists[wl] = 0; + } + keyWordLists[numWordLists] = 0; +} + +void SCI_METHOD LexerBase::Release() { + delete this; +} + +int SCI_METHOD LexerBase::Version() const { + return lvOriginal; +} + +const char * SCI_METHOD LexerBase::PropertyNames() { + return ""; +} + +int SCI_METHOD LexerBase::PropertyType(const char *) { + return SC_TYPE_BOOLEAN; +} + +const char * SCI_METHOD LexerBase::DescribeProperty(const char *) { + return ""; +} + +int SCI_METHOD LexerBase::PropertySet(const char *key, const char *val) { + const char *valOld = props.Get(key); + if (strcmp(val, valOld) != 0) { + props.Set(key, val); + return 0; + } else { + return -1; + } +} + +const char * SCI_METHOD LexerBase::DescribeWordListSets() { + return ""; +} + +int SCI_METHOD LexerBase::WordListSet(int n, const char *wl) { + if (n < numWordLists) { + WordList wlNew; + wlNew.Set(wl); + if (*keyWordLists[n] != wlNew) { + keyWordLists[n]->Set(wl); + return 0; + } + } + return -1; +} + +void * SCI_METHOD LexerBase::PrivateCall(int, void *) { + return 0; +} diff --git a/lexlib/LexerBase.h b/lexlib/LexerBase.h new file mode 100644 index 000000000..02fc34dc3 --- /dev/null +++ b/lexlib/LexerBase.h @@ -0,0 +1,41 @@ +// Scintilla source code edit control +/** @file LexerBase.h + ** A simple lexer with no state. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef LEXERBASE_H +#define LEXERBASE_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +// A simple lexer with no state +class LexerBase : public ILexer { +protected: + PropSetSimple props; + enum {numWordLists=KEYWORDSET_MAX+1}; + WordList *keyWordLists[numWordLists+1]; +public: + LexerBase(); + ~LexerBase(); + void SCI_METHOD Release(); + int SCI_METHOD Version() const; + const char * SCI_METHOD PropertyNames(); + int SCI_METHOD PropertyType(const char *name); + const char * SCI_METHOD DescribeProperty(const char *name); + int SCI_METHOD PropertySet(const char *key, const char *val); + const char * SCI_METHOD DescribeWordListSets(); + int SCI_METHOD WordListSet(int n, const char *wl); + void SCI_METHOD Lex(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess) = 0; + void SCI_METHOD Fold(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess) = 0; + void * SCI_METHOD PrivateCall(int operation, void *pointer); +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif diff --git a/lexlib/LexerModule.cxx b/lexlib/LexerModule.cxx new file mode 100644 index 000000000..defc86356 --- /dev/null +++ b/lexlib/LexerModule.cxx @@ -0,0 +1,121 @@ +// Scintilla source code edit control +/** @file LexerModule.cxx + ** Colourise for particular languages. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <ctype.h> + +#include <string> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "PropSetSimple.h" +#include "WordList.h" +#include "LexAccessor.h" +#include "Accessor.h" +#include "LexerModule.h" +#include "LexerBase.h" +#include "LexerSimple.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +LexerModule::LexerModule(int language_, + LexerFunction fnLexer_, + const char *languageName_, + LexerFunction fnFolder_, + const char *const wordListDescriptions_[], + int styleBits_) : + language(language_), + fnLexer(fnLexer_), + fnFolder(fnFolder_), + fnFactory(0), + wordListDescriptions(wordListDescriptions_), + styleBits(styleBits_), + languageName(languageName_) { +} + +LexerModule::LexerModule(int language_, + LexerFactoryFunction fnFactory_, + const char *languageName_, + const char * const wordListDescriptions_[], + int styleBits_) : + language(language_), + fnLexer(0), + fnFolder(0), + fnFactory(fnFactory_), + wordListDescriptions(wordListDescriptions_), + styleBits(styleBits_), + languageName(languageName_) { +} + +int LexerModule::GetNumWordLists() const { + if (wordListDescriptions == NULL) { + return -1; + } else { + int numWordLists = 0; + + while (wordListDescriptions[numWordLists]) { + ++numWordLists; + } + + return numWordLists; + } +} + +const char *LexerModule::GetWordListDescription(int index) const { + static const char *emptyStr = ""; + + assert(index < GetNumWordLists()); + if (index >= GetNumWordLists()) { + return emptyStr; + } else { + return wordListDescriptions[index]; + } +} + +int LexerModule::GetStyleBitsNeeded() const { + return styleBits; +} + +ILexer *LexerModule::Create() const { + if (fnFactory) + return fnFactory(); + else + return new LexerSimple(this); +} + +void LexerModule::Lex(unsigned int startPos, int lengthDoc, int initStyle, + WordList *keywordlists[], Accessor &styler) const { + if (fnLexer) + fnLexer(startPos, lengthDoc, initStyle, keywordlists, styler); +} + +void LexerModule::Fold(unsigned int startPos, int lengthDoc, int initStyle, + WordList *keywordlists[], Accessor &styler) const { + if (fnFolder) { + int lineCurrent = styler.GetLine(startPos); + // Move back one line in case deletion wrecked current line fold state + if (lineCurrent > 0) { + lineCurrent--; + int newStartPos = styler.LineStart(lineCurrent); + lengthDoc += startPos - newStartPos; + startPos = newStartPos; + initStyle = 0; + if (startPos > 0) { + initStyle = styler.StyleAt(startPos - 1); + } + } + fnFolder(startPos, lengthDoc, initStyle, keywordlists, styler); + } +} diff --git a/lexlib/LexerModule.h b/lexlib/LexerModule.h new file mode 100644 index 000000000..e502541b2 --- /dev/null +++ b/lexlib/LexerModule.h @@ -0,0 +1,82 @@ +// Scintilla source code edit control +/** @file LexerModule.h + ** Colourise for particular languages. + **/ +// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef LEXERMODULE_H +#define LEXERMODULE_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +class Accessor; +class WordList; + +typedef void (*LexerFunction)(unsigned int startPos, int lengthDoc, int initStyle, + WordList *keywordlists[], Accessor &styler); +typedef ILexer *(*LexerFactoryFunction)(); + +/** + * A LexerModule is responsible for lexing and folding a particular language. + * The class maintains a list of LexerModules which can be searched to find a + * module appropriate to a particular language. + */ +class LexerModule { +protected: + int language; + LexerFunction fnLexer; + LexerFunction fnFolder; + LexerFactoryFunction fnFactory; + const char * const * wordListDescriptions; + int styleBits; + +public: + const char *languageName; + LexerModule(int language_, + LexerFunction fnLexer_, + const char *languageName_=0, + LexerFunction fnFolder_=0, + const char * const wordListDescriptions_[] = NULL, + int styleBits_=5); + LexerModule(int language_, + LexerFactoryFunction fnFactory_, + const char *languageName_, + const char * const wordListDescriptions_[] = NULL, + int styleBits_=8); + virtual ~LexerModule() { + } + int GetLanguage() const { return language; } + + // -1 is returned if no WordList information is available + int GetNumWordLists() const; + const char *GetWordListDescription(int index) const; + + int GetStyleBitsNeeded() const; + + ILexer *Create() const; + + virtual void Lex(unsigned int startPos, int length, int initStyle, + WordList *keywordlists[], Accessor &styler) const; + virtual void Fold(unsigned int startPos, int length, int initStyle, + WordList *keywordlists[], Accessor &styler) const; + + friend class Catalogue; +}; + +inline int Maximum(int a, int b) { + return (a > b) ? a : b; +} + +// Shut up annoying Visual C++ warnings: +#ifdef _MSC_VER +#pragma warning(disable: 4244 4309 4514 4710) +#endif + +#ifdef SCI_NAMESPACE +} +#endif + +#endif diff --git a/lexlib/LexerNoExceptions.cxx b/lexlib/LexerNoExceptions.cxx new file mode 100644 index 000000000..9ebae2af0 --- /dev/null +++ b/lexlib/LexerNoExceptions.cxx @@ -0,0 +1,68 @@ +// Scintilla source code edit control +/** @file LexerNoExceptions.cxx + ** A simple lexer with no state which does not throw exceptions so can be used in an external lexer. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <ctype.h> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "PropSetSimple.h" +#include "WordList.h" +#include "LexAccessor.h" +#include "Accessor.h" +#include "LexerModule.h" +#include "LexerBase.h" +#include "LexerNoExceptions.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +int SCI_METHOD LexerNoExceptions::PropertySet(const char *key, const char *val) { + try { + return LexerBase::PropertySet(key, val); + } catch (...) { + // Should not throw into caller as may be compiled with different compiler or options + } + return -1; +} + +int SCI_METHOD LexerNoExceptions::WordListSet(int n, const char *wl) { + try { + return LexerBase::WordListSet(n, wl); + } catch (...) { + // Should not throw into caller as may be compiled with different compiler or options + } + return -1; +} + +void SCI_METHOD LexerNoExceptions::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) { + try { + Accessor astyler(pAccess, &props); + Lexer(startPos, length, initStyle, pAccess, astyler); + astyler.Flush(); + } catch (...) { + // Should not throw into caller as may be compiled with different compiler or options + pAccess->SetErrorStatus(SC_STATUS_FAILURE); + } +} +void SCI_METHOD LexerNoExceptions::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) { + try { + Accessor astyler(pAccess, &props); + Folder(startPos, length, initStyle, pAccess, astyler); + astyler.Flush(); + } catch (...) { + // Should not throw into caller as may be compiled with different compiler or options + pAccess->SetErrorStatus(SC_STATUS_FAILURE); + } +} diff --git a/lexlib/LexerNoExceptions.h b/lexlib/LexerNoExceptions.h new file mode 100644 index 000000000..caac61a83 --- /dev/null +++ b/lexlib/LexerNoExceptions.h @@ -0,0 +1,32 @@ +// Scintilla source code edit control +/** @file LexerNoExceptions.h + ** A simple lexer with no state. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef LexerNoExceptions_H +#define LexerNoExceptions_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +// A simple lexer with no state +class LexerNoExceptions : public LexerBase { +public: + // TODO Also need to prevent exceptions in constructor and destructor + int SCI_METHOD PropertySet(const char *key, const char *val); + int SCI_METHOD WordListSet(int n, const char *wl); + void SCI_METHOD Lex(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess); + void SCI_METHOD Fold(unsigned int startPos, int lengthDoc, int initStyle, IDocument *); + + virtual void Lexer(unsigned int startPos, int length, int initStyle, IDocument *pAccess, Accessor &styler) = 0; + virtual void Folder(unsigned int startPos, int length, int initStyle, IDocument *pAccess, Accessor &styler) = 0; +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif diff --git a/lexlib/LexerSimple.cxx b/lexlib/LexerSimple.cxx new file mode 100644 index 000000000..2e35f19c3 --- /dev/null +++ b/lexlib/LexerSimple.cxx @@ -0,0 +1,55 @@ +// Scintilla source code edit control +/** @file LexerSimple.cxx + ** A simple lexer with no state. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <ctype.h> + +#include <string> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "PropSetSimple.h" +#include "WordList.h" +#include "LexAccessor.h" +#include "Accessor.h" +#include "LexerModule.h" +#include "LexerBase.h" +#include "LexerSimple.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +LexerSimple::LexerSimple(const LexerModule *module_) : module(module_) { + for (int wl = 0; wl < module->GetNumWordLists(); wl++) { + if (!wordLists.empty()) + wordLists += "\n"; + wordLists += module->GetWordListDescription(wl); + } +} + +const char * SCI_METHOD LexerSimple::DescribeWordListSets() { + return wordLists.c_str(); +} + +void SCI_METHOD LexerSimple::Lex(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess) { + Accessor astyler(pAccess, &props); + module->Lex(startPos, lengthDoc, initStyle, keyWordLists, astyler); + astyler.Flush(); +} + +void SCI_METHOD LexerSimple::Fold(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess) { + Accessor astyler(pAccess, &props); + module->Fold(startPos, lengthDoc, initStyle, keyWordLists, astyler); + astyler.Flush(); +} diff --git a/lexlib/LexerSimple.h b/lexlib/LexerSimple.h new file mode 100644 index 000000000..89631936f --- /dev/null +++ b/lexlib/LexerSimple.h @@ -0,0 +1,30 @@ +// Scintilla source code edit control +/** @file LexerSimple.h + ** A simple lexer with no state. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef LEXERSIMPLE_H +#define LEXERSIMPLE_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +// A simple lexer with no state +class LexerSimple : public LexerBase { + const LexerModule *module; + std::string wordLists; +public: + LexerSimple(const LexerModule *module_); + const char * SCI_METHOD DescribeWordListSets(); + void SCI_METHOD Lex(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess); + void SCI_METHOD Fold(unsigned int startPos, int lengthDoc, int initStyle, IDocument *pAccess); +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif diff --git a/lexlib/WordList.cxx b/lexlib/WordList.cxx new file mode 100644 index 000000000..b50c69498 --- /dev/null +++ b/lexlib/WordList.cxx @@ -0,0 +1,200 @@ +// Scintilla source code edit control +/** @file KeyWords.cxx + ** Colourise for particular languages. + **/ +// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdarg.h> + +#include "WordList.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +/** + * Creates an array that points into each word in the string and puts \0 terminators + * after each word. + */ +static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) { + int prev = '\n'; + int words = 0; + // For rapid determination of whether a character is a separator, build + // a look up table. + bool wordSeparator[256]; + for (int i=0; i<256; i++) { + wordSeparator[i] = false; + } + wordSeparator['\r'] = true; + wordSeparator['\n'] = true; + if (!onlyLineEnds) { + wordSeparator[' '] = true; + wordSeparator['\t'] = true; + } + for (int j = 0; wordlist[j]; j++) { + int curr = static_cast<unsigned char>(wordlist[j]); + if (!wordSeparator[curr] && wordSeparator[prev]) + words++; + prev = curr; + } + char **keywords = new char *[words + 1]; + if (keywords) { + words = 0; + prev = '\0'; + size_t slen = strlen(wordlist); + for (size_t k = 0; k < slen; k++) { + if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) { + if (!prev) { + keywords[words] = &wordlist[k]; + words++; + } + } else { + wordlist[k] = '\0'; + } + prev = wordlist[k]; + } + keywords[words] = &wordlist[slen]; + *len = words; + } else { + *len = 0; + } + return keywords; +} + +bool WordList::operator!=(const WordList &other) const { + if (len != other.len) + return true; + for (int i=0; i<len; i++) { + if (strcmp(words[i], other.words[i]) != 0) + return true; + } + return false; +} + +void WordList::Clear() { + if (words) { + delete []list; + delete []words; + } + words = 0; + list = 0; + len = 0; +} + +extern "C" int cmpString(const void *a1, const void *a2) { + // Can't work out the correct incantation to use modern casts here + return strcmp(*(char **)(a1), *(char **)(a2)); +} + +static void SortWordList(char **words, unsigned int len) { + qsort(reinterpret_cast<void *>(words), len, sizeof(*words), + cmpString); +} + +void WordList::Set(const char *s) { + Clear(); + list = new char[strlen(s) + 1]; + strcpy(list, s); + words = ArrayFromWordList(list, &len, onlyLineEnds); + SortWordList(words, len); + for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++) + starts[k] = -1; + for (int l = len - 1; l >= 0; l--) { + unsigned char indexChar = words[l][0]; + starts[indexChar] = l; + } +} + +bool WordList::InList(const char *s) const { + if (0 == words) + return false; + unsigned char firstChar = s[0]; + int j = starts[firstChar]; + if (j >= 0) { + while ((unsigned char)words[j][0] == firstChar) { + if (s[1] == words[j][1]) { + const char *a = words[j] + 1; + const char *b = s + 1; + while (*a && *a == *b) { + a++; + b++; + } + if (!*a && !*b) + return true; + } + j++; + } + } + j = starts['^']; + if (j >= 0) { + while (words[j][0] == '^') { + const char *a = words[j] + 1; + const char *b = s; + while (*a && *a == *b) { + a++; + b++; + } + if (!*a) + return true; + j++; + } + } + return false; +} + +/** similar to InList, but word s can be a substring of keyword. + * eg. the keyword define is defined as def~ine. This means the word must start + * with def to be a keyword, but also defi, defin and define are valid. + * The marker is ~ in this case. + */ +bool WordList::InListAbbreviated(const char *s, const char marker) const { + if (0 == words) + return false; + unsigned char firstChar = s[0]; + int j = starts[firstChar]; + if (j >= 0) { + while (words[j][0] == firstChar) { + bool isSubword = false; + int start = 1; + if (words[j][1] == marker) { + isSubword = true; + start++; + } + if (s[1] == words[j][start]) { + const char *a = words[j] + start; + const char *b = s + 1; + while (*a && *a == *b) { + a++; + if (*a == marker) { + isSubword = true; + a++; + } + b++; + } + if ((!*a || isSubword) && !*b) + return true; + } + j++; + } + } + j = starts['^']; + if (j >= 0) { + while (words[j][0] == '^') { + const char *a = words[j] + 1; + const char *b = s; + while (*a && *a == *b) { + a++; + b++; + } + if (!*a) + return true; + j++; + } + } + return false; +} diff --git a/lexlib/WordList.h b/lexlib/WordList.h new file mode 100644 index 000000000..ea5be1d55 --- /dev/null +++ b/lexlib/WordList.h @@ -0,0 +1,41 @@ +// Scintilla source code edit control +/** @file WordList.h + ** Hold a list of words. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef WORDLIST_H +#define WORDLIST_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +/** + */ +class WordList { +public: + // Each word contains at least one character - a empty word acts as sentinel at the end. + char **words; + char *list; + int len; + bool onlyLineEnds; ///< Delimited by any white space or only line ends + int starts[256]; + WordList(bool onlyLineEnds_ = false) : + words(0), list(0), len(0), onlyLineEnds(onlyLineEnds_) + {} + ~WordList() { Clear(); } + operator bool() const { return len ? true : false; } + bool operator!=(const WordList &other) const; + void Clear(); + void Set(const char *s); + bool InList(const char *s) const; + bool InListAbbreviated(const char *s, const char marker) const; +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx new file mode 100644 index 000000000..86e93d323 --- /dev/null +++ b/src/Catalogue.cxx @@ -0,0 +1,181 @@ +// Scintilla source code edit control +/** @file KeyWords.cxx + ** Colourise for particular languages. + **/ +// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> + +#include <vector> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "LexerModule.h" +#include "Catalogue.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +static std::vector<LexerModule *> lexerCatalogue; +static int nextLanguage = SCLEX_AUTOMATIC+1; + +const LexerModule *Catalogue::Find(int language) { + for (std::vector<LexerModule *>::iterator it=lexerCatalogue.begin(); + it != lexerCatalogue.end(); ++it) { + if ((*it)->GetLanguage() == language) { + return *it; + } + } + return 0; +} + +const LexerModule *Catalogue::Find(const char *languageName) { + if (languageName) { + for (std::vector<LexerModule *>::iterator it=lexerCatalogue.begin(); + it != lexerCatalogue.end(); ++it) { + if ((*it)->languageName && (0 == strcmp((*it)->languageName, languageName))) { + return *it; + } + } + } + return 0; +} + +void Catalogue::AddLexerModule(LexerModule *plm) { + if (plm->GetLanguage() == SCLEX_AUTOMATIC) { + plm->language = nextLanguage; + nextLanguage++; + } + lexerCatalogue.push_back(plm); +} + +// Alternative historical name for Scintilla_LinkLexers +int wxForceScintillaLexers(void) { + return Scintilla_LinkLexers(); +} + +// To add or remove a lexer, add or remove its file and run LexGen.py. + +// Force a reference to all of the Scintilla lexers so that the linker will +// not remove the code of the lexers. +int Scintilla_LinkLexers() { + + static int initialised = 0; + if (initialised) + return 0; + initialised = 1; + +// Shorten the code that declares a lexer and ensures it is linked in by calling a method. +#define LINK_LEXER(lexer) extern LexerModule lexer; Catalogue::AddLexerModule(&lexer); + +//++Autogenerated -- run src/LexGen.py to regenerate +//**\(\tLINK_LEXER(\*);\n\) + LINK_LEXER(lmAbaqus); + LINK_LEXER(lmAda); + LINK_LEXER(lmAns1); + LINK_LEXER(lmAPDL); + LINK_LEXER(lmAsm); + LINK_LEXER(lmASY); + LINK_LEXER(lmAU3); + LINK_LEXER(lmAVE); + LINK_LEXER(lmBaan); + LINK_LEXER(lmBash); + LINK_LEXER(lmBatch); + LINK_LEXER(lmBlitzBasic); + LINK_LEXER(lmBullant); + LINK_LEXER(lmCaml); + LINK_LEXER(lmClw); + LINK_LEXER(lmClwNoCase); + LINK_LEXER(lmCmake); + LINK_LEXER(lmCOBOL); + LINK_LEXER(lmConf); + LINK_LEXER(lmCPP); + LINK_LEXER(lmCPPNoCase); + LINK_LEXER(lmCsound); + LINK_LEXER(lmCss); + LINK_LEXER(lmD); + LINK_LEXER(lmDiff); + LINK_LEXER(lmEiffel); + LINK_LEXER(lmEiffelkw); + LINK_LEXER(lmErlang); + LINK_LEXER(lmErrorList); + LINK_LEXER(lmESCRIPT); + LINK_LEXER(lmF77); + LINK_LEXER(lmFlagShip); + LINK_LEXER(lmForth); + LINK_LEXER(lmFortran); + LINK_LEXER(lmFreeBasic); + LINK_LEXER(lmGAP); + LINK_LEXER(lmGui4Cli); + LINK_LEXER(lmHaskell); + LINK_LEXER(lmHTML); + LINK_LEXER(lmInno); + LINK_LEXER(lmKix); + LINK_LEXER(lmLatex); + LINK_LEXER(lmLISP); + LINK_LEXER(lmLot); + LINK_LEXER(lmLout); + LINK_LEXER(lmLua); + LINK_LEXER(lmMagikSF); + LINK_LEXER(lmMake); + LINK_LEXER(lmMarkdown); + LINK_LEXER(lmMatlab); + LINK_LEXER(lmMETAPOST); + LINK_LEXER(lmMMIXAL); + LINK_LEXER(lmMSSQL); + LINK_LEXER(lmMySQL); + LINK_LEXER(lmNimrod); + LINK_LEXER(lmNncrontab); + LINK_LEXER(lmNsis); + LINK_LEXER(lmNull); + LINK_LEXER(lmOctave); + LINK_LEXER(lmOpal); + LINK_LEXER(lmPascal); + LINK_LEXER(lmPB); + LINK_LEXER(lmPerl); + LINK_LEXER(lmPHPSCRIPT); + LINK_LEXER(lmPLM); + LINK_LEXER(lmPo); + LINK_LEXER(lmPOV); + LINK_LEXER(lmPowerPro); + LINK_LEXER(lmPowerShell); + LINK_LEXER(lmProgress); + LINK_LEXER(lmProps); + LINK_LEXER(lmPS); + LINK_LEXER(lmPureBasic); + LINK_LEXER(lmPython); + LINK_LEXER(lmR); + LINK_LEXER(lmREBOL); + LINK_LEXER(lmRuby); + LINK_LEXER(lmScriptol); + LINK_LEXER(lmSmalltalk); + LINK_LEXER(lmSML); + LINK_LEXER(lmSorc); + LINK_LEXER(lmSpecman); + LINK_LEXER(lmSpice); + LINK_LEXER(lmSQL); + LINK_LEXER(lmTACL); + LINK_LEXER(lmTADS3); + LINK_LEXER(lmTAL); + LINK_LEXER(lmTCL); + LINK_LEXER(lmTeX); + LINK_LEXER(lmVB); + LINK_LEXER(lmVBScript); + LINK_LEXER(lmVerilog); + LINK_LEXER(lmVHDL); + LINK_LEXER(lmXML); + LINK_LEXER(lmYAML); + +//--Autogenerated -- end of automatically generated section + + return 1; +} diff --git a/src/Catalogue.h b/src/Catalogue.h new file mode 100644 index 000000000..7fea37da8 --- /dev/null +++ b/src/Catalogue.h @@ -0,0 +1,26 @@ +// Scintilla source code edit control +/** @file Catalogue.h + ** Lexer infrastructure. + **/ +// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CATALOGUE_H +#define CATALOGUE_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +class Catalogue { +public: + static const LexerModule *Find(int language); + static const LexerModule *Find(const char *languageName); + static void AddLexerModule(LexerModule *plm); +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif |