diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Document.cxx | 154 | ||||
| -rw-r--r-- | src/Document.h | 24 | ||||
| -rw-r--r-- | src/Editor.cxx | 43 | ||||
| -rw-r--r-- | src/Editor.h | 1 | 
4 files changed, 198 insertions, 24 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index b1130bd09..fe8b43128 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -10,6 +10,17 @@  #include <stdio.h>  #include <ctype.h> +#include <string> +#include <vector> + +// With Borland C++ 5.5, including <string> includes Windows.h leading to defining +// FindText to FindTextA which makes calls here to Document::FindText fail. +#ifdef __BORLANDC__ +#ifdef FindText +#undef FindText +#endif +#endif +  #include "Platform.h"  #include "Scintilla.h" @@ -22,6 +33,7 @@  #include "Decoration.h"  #include "Document.h"  #include "RESearch.h" +#include "UniConversion.h"  #ifdef SCI_NAMESPACE  using namespace Scintilla; @@ -1074,6 +1086,57 @@ static inline char MakeLowerCase(char ch) {  		return static_cast<char>(ch - 'A' + 'a');  } +static bool GoodTrailByte(int v) { +	return (v >= 0x80) && (v < 0xc0); +} + +size_t Document::ExtractChar(int pos, char *bytes) { +	unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); +	size_t widthChar = UTF8CharLength(ch); +	bytes[0] = ch; +	for (size_t i=1; i<widthChar; i++) { +		bytes[i] = cb.CharAt(pos+i); +		if (!GoodTrailByte(static_cast<unsigned char>(bytes[i]))) { // Bad byte +			widthChar = 1; +		} +	} +	return widthChar; +} + +CaseFolderTable::CaseFolderTable() { +	for (size_t iChar=0; iChar<sizeof(mapping); iChar++) { +		mapping[iChar] = static_cast<char>(iChar); +	} +} + +CaseFolderTable::~CaseFolderTable() { +} + +size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { +	if (lenMixed > sizeFolded) { +		return 0; +	} else { +		for (size_t i=0; i<lenMixed; i++) { +			folded[i] = mapping[static_cast<unsigned char>(mixed[i])]; +		} +		return lenMixed; +	} +} + +void CaseFolderTable::SetTranslation(char ch, char chTranslation) { +	mapping[static_cast<unsigned char>(ch)] = chTranslation; +} + +void CaseFolderTable::StandardASCII() { +	for (size_t iChar=0; iChar<sizeof(mapping); iChar++) { +		if (iChar >= 'A' && iChar <= 'Z') { +			mapping[iChar] = static_cast<char>(iChar - 'A' + 'a'); +		} else { +			mapping[iChar] = static_cast<char>(iChar); +		} +	} +} +  /**   * Find text in document, supporting both forward and backward   * searches (just pass minPos > maxPos to do a backward search) @@ -1081,7 +1144,7 @@ static inline char MakeLowerCase(char ch) {   */  long Document::FindText(int minPos, int maxPos, const char *s,                          bool caseSensitive, bool word, bool wordStart, bool regExp, int flags, -                        int *length) { +                        int *length, CaseFolder *pcf) {  	if (regExp) {  		if (!regex)  			regex = CreateRegexSearch(&charClass); @@ -1104,13 +1167,11 @@ long Document::FindText(int minPos, int maxPos, const char *s,  			endSearch = endPos - lengthFind + 1;  		}  		//Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind); -		char firstChar = s[0]; -		if (!caseSensitive) -			firstChar = static_cast<char>(MakeUpperCase(firstChar));  		int pos = forward ? startPos : (startPos - 1); -		while (forward ? (pos < endSearch) : (pos >= endSearch)) { -			char ch = CharAt(pos); -			if (caseSensitive) { +		char firstChar = s[0]; +		if (caseSensitive) { +			while (forward ? (pos < endSearch) : (pos >= endSearch)) { +				char ch = CharAt(pos);  				if (ch == firstChar) {  					bool found = true;  					if (pos + lengthFind > Platform::Maximum(startPos, endPos)) found = false; @@ -1126,27 +1187,88 @@ long Document::FindText(int minPos, int maxPos, const char *s,  							return pos;  					}  				} -			} else { -				if (MakeUpperCase(ch) == firstChar) { +				pos += increment; +				if (dbcsCodePage && (pos >= 0)) { +					// Ensure trying to match from start of character +					pos = MovePositionOutsideChar(pos, increment, false); +				} +			} +		} else if (SC_CP_UTF8 == dbcsCodePage) { +			const size_t maxBytesCharacter = 4; +			const size_t maxFoldingExpansion = 4; +			int endMatch = Platform::Maximum(startPos, endPos); +			std::vector<char> searchThing(*length * maxBytesCharacter * maxFoldingExpansion + 1); +			size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), s, *length); +			while (forward ? (pos < endSearch) : (pos >= endSearch)) { +				bool matchChar = true; +				int matchOff = 0; +				int searchOff = 0; +				int widthFirst = 0; +				while (matchChar && (pos + matchOff < endMatch)) { +					int widthChar; +					char bytes[maxBytesCharacter + 1]; +					widthChar = ExtractChar(pos + matchOff, bytes); +					bytes[maxBytesCharacter] = 0; +					if (!widthFirst) +						widthFirst = widthChar; +					char folded[maxBytesCharacter * maxFoldingExpansion + 1]; +					int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); +					folded[lenFlat] = 0; +					// Does folded match the buffer +					matchChar = 0 == strncmp(folded, &searchThing[0] + searchOff, lenFlat); +					matchOff += widthChar; +					searchOff += lenFlat; +					if (searchOff >= static_cast<int>(lenSearch)) +						break; +				} +				if (matchChar && (searchOff == static_cast<int>(lenSearch))) { +					if ((!word && !wordStart) || +					        (word && IsWordAt(pos, pos + lengthFind)) || +							(wordStart && IsWordStartAt(pos))) { +						*length = matchOff; +						return pos; +					} +				} +				if (forward) { +					pos += widthFirst; +				} else { +					pos--; +					if (pos > 0) { +						// Ensure trying to match from start of character +						pos = MovePositionOutsideChar(pos, increment, false); +					} +				} +			} +		} else { +			CaseFolderTable caseFolder; +			std::vector<char> searchThing(*length + 1); +			pcf->Fold(&searchThing[0], searchThing.size(), s, *length); +			while (forward ? (pos < endSearch) : (pos >= endSearch)) { +				char ch = CharAt(pos); +				char folded[2]; +				pcf->Fold(folded, sizeof(folded), &ch, 1); +				if (folded[0] == searchThing[0]) {  					bool found = true;  					if (pos + lengthFind > Platform::Maximum(startPos, endPos)) found = false;  					for (int posMatch = 1; posMatch < lengthFind && found; posMatch++) {  						ch = CharAt(pos + posMatch); -						if (MakeUpperCase(ch) != MakeUpperCase(s[posMatch])) +						pcf->Fold(folded, sizeof(folded), &ch, 1); +						if (folded[0] != searchThing[posMatch])  							found = false;  					}  					if (found) {  						if ((!word && !wordStart) ||  						        (word && IsWordAt(pos, pos + lengthFind)) || -						        (wordStart && IsWordStartAt(pos))) +								(wordStart && IsWordStartAt(pos))) {  							return pos; +						}  					}  				} -			} -			pos += increment; -			if (dbcsCodePage && (pos >= 0)) { -				// Ensure trying to match from start of character -				pos = MovePositionOutsideChar(pos, increment, false); +				pos += increment; +				if (dbcsCodePage && (pos >= 0)) { +					// Ensure trying to match from start of character +					pos = MovePositionOutsideChar(pos, increment, false); +				}  			}  		}  	} diff --git a/src/Document.h b/src/Document.h index c61c56892..73571cbdd 100644 --- a/src/Document.h +++ b/src/Document.h @@ -115,6 +115,24 @@ struct StyledText {  	}  }; +class CaseFolder { +public: +	virtual ~CaseFolder() { +	}; +	virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) = 0; +}; + +class CaseFolderTable : public CaseFolder { +protected: +	char mapping[256]; +public: +	CaseFolderTable(); +	virtual ~CaseFolderTable(); +	virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed); +	void SetTranslation(char ch, char chTranslation); +	void StandardASCII(); +}; +  /**   */  class Document : PerLine { @@ -254,9 +272,9 @@ public:  	int NextWordEnd(int pos, int delta);  	int Length() const { return cb.Length(); }  	void Allocate(int newSize) { cb.Allocate(newSize); } -	long FindText(int minPos, int maxPos, const char *s, -		bool caseSensitive, bool word, bool wordStart, bool regExp, int flags, int *length); -	long FindText(int iMessage, unsigned long wParam, long lParam); +	size_t ExtractChar(int pos, char *bytes); +	long FindText(int minPos, int maxPos, const char *s, bool caseSensitive, bool word, +		bool wordStart, bool regExp, int flags, int *length, CaseFolder *pcf);  	const char *SubstituteByPosition(const char *text, int *length);  	int LinesTotal() const; diff --git a/src/Editor.cxx b/src/Editor.cxx index 4bdbecda8..e5623b542 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -13,6 +13,7 @@  #include <string>  #include <vector>  #include <algorithm> +#include <memory>  // With Borland C++ 5.5, including <string> includes Windows.h leading to defining  // FindText to FindTextA which makes calls here to Document::FindText fail. @@ -5309,6 +5310,31 @@ void Editor::Indent(bool forwards) {  	}  } +class CaseFolderASCII : public CaseFolderTable { +public: +	CaseFolderASCII() { +		StandardASCII(); +	} +	~CaseFolderASCII() { +	} +	virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { +		if (lenMixed > sizeFolded) { +			return 0; +		} else { +			for (size_t i=0; i<lenMixed; i++) { +				folded[i] = mapping[static_cast<unsigned char>(mixed[i])]; +			} +			return lenMixed; +		} +	} +}; + + +CaseFolder *Editor::CaseFolderForEncoding() { +	// Simple default that only maps ASCII upper case to lower case. +	return new CaseFolderASCII(); +} +  /**   * Search of a text in the document, in the given range.   * @return The position of the found text, -1 if not found. @@ -5320,13 +5346,15 @@ long Editor::FindText(  	Sci_TextToFind *ft = reinterpret_cast<Sci_TextToFind *>(lParam);  	int lengthFound = istrlen(ft->lpstrText); +	std::auto_ptr<CaseFolder> pcf(CaseFolderForEncoding());  	int pos = pdoc->FindText(ft->chrg.cpMin, ft->chrg.cpMax, ft->lpstrText,  	        (wParam & SCFIND_MATCHCASE) != 0,  	        (wParam & SCFIND_WHOLEWORD) != 0,  	        (wParam & SCFIND_WORDSTART) != 0,  	        (wParam & SCFIND_REGEXP) != 0,  	        wParam, -	        &lengthFound); +	        &lengthFound, +			pcf.get());  	if (pos != -1) {  		ft->chrgText.cpMin = pos;  		ft->chrgText.cpMax = pos + lengthFound; @@ -5363,6 +5391,7 @@ long Editor::SearchText(  	const char *txt = reinterpret_cast<char *>(lParam);  	int pos;  	int lengthFound = istrlen(txt); +	std::auto_ptr<CaseFolder> pcf(CaseFolderForEncoding());  	if (iMessage == SCI_SEARCHNEXT) {  		pos = pdoc->FindText(searchAnchor, pdoc->Length(), txt,  		        (wParam & SCFIND_MATCHCASE) != 0, @@ -5370,7 +5399,8 @@ long Editor::SearchText(  		        (wParam & SCFIND_WORDSTART) != 0,  		        (wParam & SCFIND_REGEXP) != 0,  		        wParam, -		        &lengthFound); +		        &lengthFound, +				pcf.get());  	} else {  		pos = pdoc->FindText(searchAnchor, 0, txt,  		        (wParam & SCFIND_MATCHCASE) != 0, @@ -5378,9 +5408,9 @@ long Editor::SearchText(  		        (wParam & SCFIND_WORDSTART) != 0,  		        (wParam & SCFIND_REGEXP) != 0,  		        wParam, -		        &lengthFound); +		        &lengthFound, +				pcf.get());  	} -  	if (pos != -1) {  		SetSelection(pos, pos + lengthFound);  	} @@ -5411,13 +5441,16 @@ std::string Editor::CaseMapString(const std::string &s, int caseMapping) {   */  long Editor::SearchInTarget(const char *text, int length) {  	int lengthFound = length; + +	std::auto_ptr<CaseFolder> pcf(CaseFolderForEncoding());  	int pos = pdoc->FindText(targetStart, targetEnd, text,  	        (searchFlags & SCFIND_MATCHCASE) != 0,  	        (searchFlags & SCFIND_WHOLEWORD) != 0,  	        (searchFlags & SCFIND_WORDSTART) != 0,  	        (searchFlags & SCFIND_REGEXP) != 0,  	        searchFlags, -	        &lengthFound); +	        &lengthFound, +			pcf.get());  	if (pos != -1) {  		targetStart = pos;  		targetEnd = pos + lengthFound; diff --git a/src/Editor.h b/src/Editor.h index 053b10a9e..180db571a 100644 --- a/src/Editor.h +++ b/src/Editor.h @@ -424,6 +424,7 @@ protected:	// ScintillaBase subclass needs access to much of Editor  	void Indent(bool forwards); +	virtual CaseFolder *CaseFolderForEncoding();  	long FindText(uptr_t wParam, sptr_t lParam);  	void SearchAnchor();  	long SearchText(unsigned int iMessage, uptr_t wParam, sptr_t lParam); | 
