diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/CharClassify.cxx | 43 | ||||
-rw-r--r-- | src/CharClassify.h | 25 | ||||
-rw-r--r-- | src/Document.cxx | 62 | ||||
-rw-r--r-- | src/Document.h | 6 | ||||
-rw-r--r-- | src/DocumentAccessor.cxx | 1 | ||||
-rw-r--r-- | src/Editor.cxx | 5 | ||||
-rw-r--r-- | src/RESearch.cxx | 74 | ||||
-rw-r--r-- | src/RESearch.h | 9 | ||||
-rw-r--r-- | src/ScintillaBase.cxx | 1 |
9 files changed, 121 insertions, 105 deletions
diff --git a/src/CharClassify.cxx b/src/CharClassify.cxx new file mode 100644 index 000000000..acab4b229 --- /dev/null +++ b/src/CharClassify.cxx @@ -0,0 +1,43 @@ +// Scintilla source code edit control +/** @file CharClassify.cxx + ** Character classifications used by Document and RESearch. + **/ +// Copyright 2006 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <ctype.h> + +#include "CharClassify.h" + +// Shut up annoying Visual C++ warnings: +#ifdef _MSC_VER +#pragma warning(disable: 4514) +#endif + +CharClassify::CharClassify() { + SetDefaultCharClasses(true); +} + +void CharClassify::SetDefaultCharClasses(bool includeWordClass) { + // Initialize all char classes to default values + for (int ch = 0; ch < 256; ch++) { + if (ch == '\r' || ch == '\n') + charClass[ch] = ccNewLine; + else if (ch < 0x20 || ch == ' ') + charClass[ch] = ccSpace; + else if (includeWordClass && (ch >= 0x80 || isalnum(ch) || ch == '_')) + charClass[ch] = ccWord; + else + charClass[ch] = ccPunctuation; + } +} + +void CharClassify::SetCharClasses(const unsigned char *chars, cc newCharClass) { + // Apply the newCharClass to the specifed chars + if (chars) { + while (*chars) { + charClass[*chars] = static_cast<unsigned char>(newCharClass); + chars++; + } + } +} diff --git a/src/CharClassify.h b/src/CharClassify.h new file mode 100644 index 000000000..881d3a114 --- /dev/null +++ b/src/CharClassify.h @@ -0,0 +1,25 @@ +// Scintilla source code edit control +/** @file CharClassify.h + ** Character classifications used by Document and RESearch. + **/ +// Copyright 2006 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CHARCLASSIFY_H +#define CHARCLASSIFY_H + +class CharClassify { +public: + CharClassify(); + + enum cc { ccSpace, ccNewLine, ccWord, ccPunctuation }; + void SetDefaultCharClasses(bool includeWordClass); + void SetCharClasses(const unsigned char *chars, cc newCharClass); + cc GetClass(unsigned char ch) const { return static_cast<cc>(charClass[ch]);} + bool IsWord(unsigned char ch) const { return static_cast<cc>(charClass[ch]) == ccWord;} + +private: + enum { maxChar=256 }; + unsigned char charClass[maxChar]; // not type cc to save space +}; +#endif diff --git a/src/Document.cxx b/src/Document.cxx index a88468cb1..92be92691 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -15,6 +15,7 @@ #include "Scintilla.h" #include "SVector.h" #include "CellBuffer.h" +#include "CharClassify.h" #include "Document.h" #include "RESearch.h" @@ -50,7 +51,6 @@ Document::Document() { stylingBits = 5; stylingBitsMask = 0x1F; stylingMask = 0; - SetDefaultCharClasses(true); endStyled = 0; styleClock = 0; enteredCount = 0; @@ -836,10 +836,10 @@ int Document::ParaDown(int pos) { return LineEnd(line-1); } -Document::charClassification Document::WordCharClass(unsigned char ch) { +CharClassify::cc Document::WordCharClass(unsigned char ch) { if ((SC_CP_UTF8 == dbcsCodePage) && (ch >= 0x80)) - return ccWord; - return charClass[ch]; + return CharClassify::ccWord; + return charClass.GetClass(ch); } /** @@ -847,7 +847,7 @@ Document::charClassification Document::WordCharClass(unsigned char ch) { * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. */ int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) { - charClassification ccStart = ccWord; + CharClassify::cc ccStart = CharClassify::ccWord; if (delta < 0) { if (!onlyWordCharacters) ccStart = WordCharClass(cb.CharAt(pos-1)); @@ -871,19 +871,19 @@ int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) { */ int Document::NextWordStart(int pos, int delta) { if (delta < 0) { - while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccSpace)) + while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace)) pos--; if (pos > 0) { - charClassification ccStart = WordCharClass(cb.CharAt(pos-1)); + CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1)); while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) { pos--; } } } else { - charClassification ccStart = WordCharClass(cb.CharAt(pos)); + CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos)); while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart)) pos++; - while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccSpace)) + while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace)) pos++; } return pos; @@ -899,22 +899,22 @@ int Document::NextWordStart(int pos, int delta) { int Document::NextWordEnd(int pos, int delta) { if (delta < 0) { if (pos > 0) { - charClassification ccStart = WordCharClass(cb.CharAt(pos-1)); - if (ccStart != ccSpace) { + CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1)); + if (ccStart != CharClassify::ccSpace) { while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) { pos--; } } - while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccSpace) { + while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) { pos--; } } } else { - while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccSpace) { + while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) { pos++; } if (pos < Length()) { - charClassification ccStart = WordCharClass(cb.CharAt(pos)); + CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos)); while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) { pos++; } @@ -929,8 +929,8 @@ int Document::NextWordEnd(int pos, int delta) { */ bool Document::IsWordStartAt(int pos) { if (pos > 0) { - charClassification ccPos = WordCharClass(CharAt(pos)); - return (ccPos == ccWord || ccPos == ccPunctuation) && + CharClassify::cc ccPos = WordCharClass(CharAt(pos)); + return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) && (ccPos != WordCharClass(CharAt(pos - 1))); } return true; @@ -942,8 +942,8 @@ bool Document::IsWordStartAt(int pos) { */ bool Document::IsWordEndAt(int pos) { if (pos < Length()) { - charClassification ccPrev = WordCharClass(CharAt(pos-1)); - return (ccPrev == ccWord || ccPrev == ccPunctuation) && + CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1)); + return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) && (ccPrev != WordCharClass(CharAt(pos))); } return true; @@ -1004,7 +1004,7 @@ long Document::FindText(int minPos, int maxPos, const char *s, int *length) { if (regExp) { if (!pre) - pre = new RESearch(); + pre = new RESearch(&charClass); if (!pre) return -1; @@ -1266,27 +1266,11 @@ void Document::ChangeCase(Range r, bool makeUpperCase) { } void Document::SetDefaultCharClasses(bool includeWordClass) { - // Initialize all char classes to default values - for (int ch = 0; ch < 256; ch++) { - if (ch == '\r' || ch == '\n') - charClass[ch] = ccNewLine; - else if (ch < 0x20 || ch == ' ') - charClass[ch] = ccSpace; - else if (includeWordClass && (ch >= 0x80 || isalnum(ch) || ch == '_')) - charClass[ch] = ccWord; - else - charClass[ch] = ccPunctuation; - } + charClass.SetDefaultCharClasses(includeWordClass); } -void Document::SetCharClasses(const unsigned char *chars, charClassification newCharClass) { - // Apply the newCharClass to the specifed chars - if (chars) { - while (*chars) { - charClass[*chars] = newCharClass; - chars++; - } - } +void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) { + charClass.SetCharClasses(chars, newCharClass); } void Document::SetStylingBits(int bits) { @@ -1430,7 +1414,7 @@ void Document::NotifyModified(DocModification mh) { } bool Document::IsWordPartSeparator(char ch) { - return (WordCharClass(ch) == ccWord) && IsPunctuation(ch); + return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch); } int Document::WordPartLeft(int pos) { diff --git a/src/Document.h b/src/Document.h index 6ff858b92..d774d5664 100644 --- a/src/Document.h +++ b/src/Document.h @@ -93,7 +93,7 @@ public: private: int refCount; CellBuffer cb; - charClassification charClass[256]; + CharClassify charClass; char stylingMask; int endStyled; int styleClock; @@ -207,7 +207,7 @@ public: void ChangeCase(Range r, bool makeUpperCase); void SetDefaultCharClasses(bool includeWordClass); - void SetCharClasses(const unsigned char *chars, charClassification newCharClass); + void SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass); void SetStylingBits(int bits); void StartStyling(int position, char mask); bool SetStyleFor(int length, char style); @@ -239,7 +239,7 @@ public: private: void CheckReadOnly(); - charClassification WordCharClass(unsigned char ch); + CharClassify::cc WordCharClass(unsigned char ch); bool IsWordStartAt(int pos); bool IsWordEndAt(int pos); bool IsWordAt(int start, int end); diff --git a/src/DocumentAccessor.cxx b/src/DocumentAccessor.cxx index f479ce025..c695c5f51 100644 --- a/src/DocumentAccessor.cxx +++ b/src/DocumentAccessor.cxx @@ -18,6 +18,7 @@ #include "DocumentAccessor.h" #include "CellBuffer.h" #include "Scintilla.h" +#include "CharClassify.h" #include "Document.h" DocumentAccessor::~DocumentAccessor() { diff --git a/src/Editor.cxx b/src/Editor.cxx index 8780a128f..08693815f 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -26,6 +26,7 @@ #include "LineMarker.h" #include "Style.h" #include "ViewStyle.h" +#include "CharClassify.h" #include "Document.h" #include "Editor.h" @@ -6043,14 +6044,14 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { pdoc->SetDefaultCharClasses(false); if (lParam == 0) return 0; - pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), Document::ccWord); + pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccWord); } break; case SCI_SETWHITESPACECHARS: { if (lParam == 0) return 0; - pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), Document::ccSpace); + pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccSpace); } break; diff --git a/src/RESearch.cxx b/src/RESearch.cxx index b7ea71bfb..f1fda7304 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -174,8 +174,14 @@ * matches: foo-foo fo-fo fob-fob foobar-foobar ... */ +#include "CharClassify.h" #include "RESearch.h" +// Shut up annoying Visual C++ warnings: +#ifdef _MSC_VER +#pragma warning(disable: 4514) +#endif + #define OKP 1 #define NOP 0 @@ -206,7 +212,15 @@ const char bitarr[] = {1,2,4,8,16,32,64,'\200'}; #define badpat(x) (*nfa = END, x) -RESearch::RESearch() { +/* + * character classification table for word boundary operators BOW + * and EOW is passed in by the creator of this object (Scintilla + * Document). The Document default state is that word chars are: + * 0-9,a-z, A-Z and _ + */ + +RESearch::RESearch(CharClassify *charClassTable) { + charClass = charClassTable; Init(); } @@ -287,7 +301,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b char *mp=nfa; /* nfa pointer */ char *lp; /* saved pointer.. */ char *sp=nfa; /* another one.. */ - char *mpMax = mp + MAXNFA - BITBLK - 10; + char *mpMax = mp + MAXNFA - BITBLK - 10; int tagi = 0; /* tag stack index */ int tagc = 1; /* actual tag count */ @@ -624,33 +638,6 @@ int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) { extern void re_fail(char *,char); -/* - * character classification table for word boundary operators BOW - * and EOW. the reason for not using ctype macros is that we can - * let the user add into our own table. see RESearch::ModifyWord. This table - * is not in the bitset form, since we may wish to extend it in the - * future for other character classifications. - * - * TRUE for 0-9 A-Z a-z _ - */ -static char chrtyp[MAXCHR] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0 - }; - -#define inascii(x) (0177&(x)) -#define iswordc(x) chrtyp[inascii(x)] #define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND]) /* @@ -757,35 +744,6 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) { } /* - * RESearch::ModifyWord: - * add new characters into the word table to change RESearch::Execute's - * understanding of what a word should look like. Note that we - * only accept additions into the word definition. - * - * If the string parameter is 0 or null string, the table is - * reset back to the default containing A-Z a-z 0-9 _. [We use - * the compact bitset representation for the default table] - */ - -static char deftab[16] = { - 0, 0, 0, 0, 0, 0, '\377', 003, '\376', '\377', '\377', '\207', - '\376', '\377', '\377', 007 -}; - -void RESearch::ModifyWord(char *s) { - int i; - - if (!s || !*s) { - for (i = 0; i < MAXCHR; i++) - if (!isinset(deftab,i)) - iswordc(i) = 0; - } - else - while(*s) - iswordc(*s++) = 1; -} - -/* * RESearch::Substitute: * substitute the matched portions of the src in dst. * diff --git a/src/RESearch.h b/src/RESearch.h index a558b371d..25205951f 100644 --- a/src/RESearch.h +++ b/src/RESearch.h @@ -18,7 +18,7 @@ #define BITBLK MAXCHR/CHRBIT class CharacterIndexer { -public: +public: virtual char CharAt(int index)=0; virtual ~CharacterIndexer() { } @@ -27,7 +27,7 @@ public: class RESearch { public: - RESearch(); + RESearch(CharClassify *charClassTable); ~RESearch(); void Init(); void Clear(); @@ -36,7 +36,6 @@ public: void ChSetWithCase(char c, bool caseSensitive); const char *Compile(const char *pat, int length, bool caseSensitive, bool posix); int Execute(CharacterIndexer &ci, int lp, int endp); - void ModifyWord(char *s); int Substitute(CharacterIndexer &ci, char *src, char *dst); enum {MAXTAG=10}; @@ -57,6 +56,10 @@ private: char bittab[BITBLK]; /* bit table for CCL */ /* pre-set bits... */ int failure; + CharClassify *charClass; + bool iswordc(unsigned char x) { + return charClass->IsWord(x); + } }; #endif diff --git a/src/ScintillaBase.cxx b/src/ScintillaBase.cxx index 0ca5002a7..2d13efaba 100644 --- a/src/ScintillaBase.cxx +++ b/src/ScintillaBase.cxx @@ -31,6 +31,7 @@ #include "Style.h" #include "ViewStyle.h" #include "AutoComplete.h" +#include "CharClassify.h" #include "Document.h" #include "Editor.h" #include "ScintillaBase.h" |