diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/CharClassify.cxx | 43 | ||||
| -rw-r--r-- | src/CharClassify.h | 25 | ||||
| -rw-r--r-- | src/Document.cxx | 62 | ||||
| -rw-r--r-- | src/Document.h | 6 | ||||
| -rw-r--r-- | src/DocumentAccessor.cxx | 1 | ||||
| -rw-r--r-- | src/Editor.cxx | 5 | ||||
| -rw-r--r-- | src/RESearch.cxx | 74 | ||||
| -rw-r--r-- | src/RESearch.h | 9 | ||||
| -rw-r--r-- | src/ScintillaBase.cxx | 1 | 
9 files changed, 121 insertions, 105 deletions
| diff --git a/src/CharClassify.cxx b/src/CharClassify.cxx new file mode 100644 index 000000000..acab4b229 --- /dev/null +++ b/src/CharClassify.cxx @@ -0,0 +1,43 @@ +// Scintilla source code edit control +/** @file CharClassify.cxx + ** Character classifications used by Document and RESearch. + **/ +// Copyright 2006 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#include <ctype.h> + +#include "CharClassify.h" + +// Shut up annoying Visual C++ warnings: +#ifdef _MSC_VER +#pragma warning(disable: 4514) +#endif + +CharClassify::CharClassify() { +	SetDefaultCharClasses(true); +} + +void CharClassify::SetDefaultCharClasses(bool includeWordClass) { +	// Initialize all char classes to default values +	for (int ch = 0; ch < 256; ch++) { +		if (ch == '\r' || ch == '\n') +			charClass[ch] = ccNewLine; +		else if (ch < 0x20 || ch == ' ') +			charClass[ch] = ccSpace; +		else if (includeWordClass && (ch >= 0x80 || isalnum(ch) || ch == '_')) +			charClass[ch] = ccWord; +		else +			charClass[ch] = ccPunctuation; +	} +} + +void CharClassify::SetCharClasses(const unsigned char *chars, cc newCharClass) { +	// Apply the newCharClass to the specifed chars +	if (chars) { +		while (*chars) { +			charClass[*chars] = static_cast<unsigned char>(newCharClass); +			chars++; +		} +	} +} diff --git a/src/CharClassify.h b/src/CharClassify.h new file mode 100644 index 000000000..881d3a114 --- /dev/null +++ b/src/CharClassify.h @@ -0,0 +1,25 @@ +// Scintilla source code edit control +/** @file CharClassify.h + ** Character classifications used by Document and RESearch. + **/ +// Copyright 2006 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CHARCLASSIFY_H +#define CHARCLASSIFY_H + +class CharClassify { +public: +	CharClassify(); + +	enum cc { ccSpace, ccNewLine, ccWord, ccPunctuation }; +	void SetDefaultCharClasses(bool includeWordClass); +	void SetCharClasses(const unsigned char *chars, cc newCharClass); +	cc GetClass(unsigned char ch) const { return static_cast<cc>(charClass[ch]);} +	bool IsWord(unsigned char ch) const { return static_cast<cc>(charClass[ch]) == ccWord;} + +private: +	enum { maxChar=256 }; +	unsigned char charClass[maxChar];    // not type cc to save space +}; +#endif diff --git a/src/Document.cxx b/src/Document.cxx index a88468cb1..92be92691 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -15,6 +15,7 @@  #include "Scintilla.h"  #include "SVector.h"  #include "CellBuffer.h" +#include "CharClassify.h"  #include "Document.h"  #include "RESearch.h" @@ -50,7 +51,6 @@ Document::Document() {  	stylingBits = 5;  	stylingBitsMask = 0x1F;  	stylingMask = 0; -	SetDefaultCharClasses(true);  	endStyled = 0;  	styleClock = 0;  	enteredCount = 0; @@ -836,10 +836,10 @@ int Document::ParaDown(int pos) {  		return LineEnd(line-1);  } -Document::charClassification Document::WordCharClass(unsigned char ch) { +CharClassify::cc Document::WordCharClass(unsigned char ch) {  	if ((SC_CP_UTF8 == dbcsCodePage) && (ch >= 0x80)) -		return ccWord; -	return charClass[ch]; +		return CharClassify::ccWord; +	return charClass.GetClass(ch);  }  /** @@ -847,7 +847,7 @@ Document::charClassification Document::WordCharClass(unsigned char ch) {   * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.   */  int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) { -	charClassification ccStart = ccWord; +	CharClassify::cc ccStart = CharClassify::ccWord;  	if (delta < 0) {  		if (!onlyWordCharacters)  			ccStart = WordCharClass(cb.CharAt(pos-1)); @@ -871,19 +871,19 @@ int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {   */  int Document::NextWordStart(int pos, int delta) {  	if (delta < 0) { -		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccSpace)) +		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))  			pos--;  		if (pos > 0) { -			charClassification ccStart = WordCharClass(cb.CharAt(pos-1)); +			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));  			while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {  				pos--;  			}  		}  	} else { -		charClassification ccStart = WordCharClass(cb.CharAt(pos)); +		CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));  		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))  			pos++; -		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccSpace)) +		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))  			pos++;  	}  	return pos; @@ -899,22 +899,22 @@ int Document::NextWordStart(int pos, int delta) {  int Document::NextWordEnd(int pos, int delta) {  	if (delta < 0) {  		if (pos > 0) { -			charClassification ccStart = WordCharClass(cb.CharAt(pos-1)); -			if (ccStart != ccSpace) { +			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1)); +			if (ccStart != CharClassify::ccSpace) {  				while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {  					pos--;  				}  			} -			while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccSpace) { +			while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {  				pos--;  			}  		}  	} else { -		while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccSpace) { +		while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {  			pos++;  		}  		if (pos < Length()) { -			charClassification ccStart = WordCharClass(cb.CharAt(pos)); +			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));  			while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {  				pos++;  			} @@ -929,8 +929,8 @@ int Document::NextWordEnd(int pos, int delta) {   */  bool Document::IsWordStartAt(int pos) {  	if (pos > 0) { -		charClassification ccPos = WordCharClass(CharAt(pos)); -		return (ccPos == ccWord || ccPos == ccPunctuation) && +		CharClassify::cc ccPos = WordCharClass(CharAt(pos)); +		return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&  			(ccPos != WordCharClass(CharAt(pos - 1)));  	}  	return true; @@ -942,8 +942,8 @@ bool Document::IsWordStartAt(int pos) {   */  bool Document::IsWordEndAt(int pos) {  	if (pos < Length()) { -		charClassification ccPrev = WordCharClass(CharAt(pos-1)); -		return (ccPrev == ccWord || ccPrev == ccPunctuation) && +		CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1)); +		return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&  			(ccPrev != WordCharClass(CharAt(pos)));  	}  	return true; @@ -1004,7 +1004,7 @@ long Document::FindText(int minPos, int maxPos, const char *s,                          int *length) {  	if (regExp) {  		if (!pre) -			pre = new RESearch(); +			pre = new RESearch(&charClass);  		if (!pre)  			return -1; @@ -1266,27 +1266,11 @@ void Document::ChangeCase(Range r, bool makeUpperCase) {  }  void Document::SetDefaultCharClasses(bool includeWordClass) { -	// Initialize all char classes to default values -	for (int ch = 0; ch < 256; ch++) { -		if (ch == '\r' || ch == '\n') -			charClass[ch] = ccNewLine; -		else if (ch < 0x20 || ch == ' ') -			charClass[ch] = ccSpace; -		else if (includeWordClass && (ch >= 0x80 || isalnum(ch) || ch == '_')) -			charClass[ch] = ccWord; -		else -			charClass[ch] = ccPunctuation; -	} +    charClass.SetDefaultCharClasses(includeWordClass);  } -void Document::SetCharClasses(const unsigned char *chars, charClassification newCharClass) { -	// Apply the newCharClass to the specifed chars -	if (chars) { -		while (*chars) { -			charClass[*chars] = newCharClass; -			chars++; -		} -	} +void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) { +    charClass.SetCharClasses(chars, newCharClass);  }  void Document::SetStylingBits(int bits) { @@ -1430,7 +1414,7 @@ void Document::NotifyModified(DocModification mh) {  }  bool Document::IsWordPartSeparator(char ch) { -	return (WordCharClass(ch) == ccWord) && IsPunctuation(ch); +	return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);  }  int Document::WordPartLeft(int pos) { diff --git a/src/Document.h b/src/Document.h index 6ff858b92..d774d5664 100644 --- a/src/Document.h +++ b/src/Document.h @@ -93,7 +93,7 @@ public:  private:  	int refCount;  	CellBuffer cb; -	charClassification charClass[256]; +	CharClassify charClass;  	char stylingMask;  	int endStyled;  	int styleClock; @@ -207,7 +207,7 @@ public:  	void ChangeCase(Range r, bool makeUpperCase);  	void SetDefaultCharClasses(bool includeWordClass); -	void SetCharClasses(const unsigned char *chars, charClassification newCharClass); +	void SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass);  	void SetStylingBits(int bits);  	void StartStyling(int position, char mask);  	bool SetStyleFor(int length, char style); @@ -239,7 +239,7 @@ public:  private:  	void CheckReadOnly(); -	charClassification WordCharClass(unsigned char ch); +	CharClassify::cc WordCharClass(unsigned char ch);  	bool IsWordStartAt(int pos);  	bool IsWordEndAt(int pos);  	bool IsWordAt(int start, int end); diff --git a/src/DocumentAccessor.cxx b/src/DocumentAccessor.cxx index f479ce025..c695c5f51 100644 --- a/src/DocumentAccessor.cxx +++ b/src/DocumentAccessor.cxx @@ -18,6 +18,7 @@  #include "DocumentAccessor.h"  #include "CellBuffer.h"  #include "Scintilla.h" +#include "CharClassify.h"  #include "Document.h"  DocumentAccessor::~DocumentAccessor() { diff --git a/src/Editor.cxx b/src/Editor.cxx index 8780a128f..08693815f 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -26,6 +26,7 @@  #include "LineMarker.h"  #include "Style.h"  #include "ViewStyle.h" +#include "CharClassify.h"  #include "Document.h"  #include "Editor.h" @@ -6043,14 +6044,14 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {  			pdoc->SetDefaultCharClasses(false);  			if (lParam == 0)  				return 0; -			pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), Document::ccWord); +			pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccWord);  		}  		break;  	case SCI_SETWHITESPACECHARS: {  			if (lParam == 0)  				return 0; -			pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), Document::ccSpace); +			pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccSpace);  		}  		break; diff --git a/src/RESearch.cxx b/src/RESearch.cxx index b7ea71bfb..f1fda7304 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -174,8 +174,14 @@   *	matches:	foo-foo fo-fo fob-fob foobar-foobar ...   */ +#include "CharClassify.h"  #include "RESearch.h" +// Shut up annoying Visual C++ warnings: +#ifdef _MSC_VER +#pragma warning(disable: 4514) +#endif +  #define OKP     1  #define NOP     0 @@ -206,7 +212,15 @@ const char bitarr[] = {1,2,4,8,16,32,64,'\200'};  #define badpat(x)	(*nfa = END, x) -RESearch::RESearch() { +/* + * character classification table for word boundary operators BOW + * and EOW is passed in by the creator of this object (Scintilla + * Document). The Document default state is that word chars are: + * 0-9,a-z, A-Z and _ + */ + +RESearch::RESearch(CharClassify *charClassTable) { +	charClass = charClassTable;  	Init();  } @@ -287,7 +301,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b  	char *mp=nfa;          /* nfa pointer       */  	char *lp;              /* saved pointer..   */  	char *sp=nfa;          /* another one..     */ -    char *mpMax = mp + MAXNFA - BITBLK - 10; +	char *mpMax = mp + MAXNFA - BITBLK - 10;  	int tagi = 0;          /* tag stack index   */  	int tagc = 1;          /* actual tag count  */ @@ -624,33 +638,6 @@ int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {  extern void re_fail(char *,char); -/* - * character classification table for word boundary operators BOW - * and EOW. the reason for not using ctype macros is that we can - * let the user add into our own table. see RESearch::ModifyWord. This table - * is not in the bitset form, since we may wish to extend it in the - * future for other character classifications. - * - *	TRUE for 0-9 A-Z a-z _ - */ -static char chrtyp[MAXCHR] = { -	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -	0, 0, 0, 0, 0, 0, 0, 0, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 0, 0, -	0, 0, 0, 0, 0, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 0, 0, 0, 0, 1, 0, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 0, 0, 0, 0, 0 -	}; - -#define inascii(x)	(0177&(x)) -#define iswordc(x) 	chrtyp[inascii(x)]  #define isinset(x,y) 	((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])  /* @@ -757,35 +744,6 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) {  }  /* - * RESearch::ModifyWord: - *	add new characters into the word table to change RESearch::Execute's - *	understanding of what a word should look like. Note that we - *	only accept additions into the word definition. - * - *	If the string parameter is 0 or null string, the table is - *	reset back to the default containing A-Z a-z 0-9 _. [We use - *	the compact bitset representation for the default table] - */ - -static char deftab[16] = { -	0, 0, 0, 0, 0, 0, '\377', 003, '\376', '\377', '\377', '\207', -	'\376', '\377', '\377', 007 -}; - -void RESearch::ModifyWord(char *s) { -	int i; - -	if (!s || !*s) { -		for (i = 0; i < MAXCHR; i++) -			if (!isinset(deftab,i)) -				iswordc(i) = 0; -	} -	else -		while(*s) -			iswordc(*s++) = 1; -} - -/*   * RESearch::Substitute:   *	substitute the matched portions of the src in dst.   * diff --git a/src/RESearch.h b/src/RESearch.h index a558b371d..25205951f 100644 --- a/src/RESearch.h +++ b/src/RESearch.h @@ -18,7 +18,7 @@  #define BITBLK	MAXCHR/CHRBIT  class CharacterIndexer { -public:  +public:  	virtual char CharAt(int index)=0;  	virtual ~CharacterIndexer() {  	} @@ -27,7 +27,7 @@ public:  class RESearch {  public: -	RESearch(); +	RESearch(CharClassify *charClassTable);  	~RESearch();  	void Init();  	void Clear(); @@ -36,7 +36,6 @@ public:  	void ChSetWithCase(char c, bool caseSensitive);  	const char *Compile(const char *pat, int length, bool caseSensitive, bool posix);  	int Execute(CharacterIndexer &ci, int lp, int endp); -	void ModifyWord(char *s);  	int Substitute(CharacterIndexer &ci, char *src, char *dst);  	enum {MAXTAG=10}; @@ -57,6 +56,10 @@ private:  	char bittab[BITBLK];		/* bit table for CCL */  						/* pre-set bits...   */  	int failure; +	CharClassify *charClass; +	bool iswordc(unsigned char x) { +		return charClass->IsWord(x); +	}  };  #endif diff --git a/src/ScintillaBase.cxx b/src/ScintillaBase.cxx index 0ca5002a7..2d13efaba 100644 --- a/src/ScintillaBase.cxx +++ b/src/ScintillaBase.cxx @@ -31,6 +31,7 @@  #include "Style.h"  #include "ViewStyle.h"  #include "AutoComplete.h" +#include "CharClassify.h"  #include "Document.h"  #include "Editor.h"  #include "ScintillaBase.h" | 
