diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Document.cxx | 50 | ||||
| -rw-r--r-- | src/Document.h | 1 | ||||
| -rw-r--r-- | src/UniConversion.h | 6 | 
3 files changed, 29 insertions, 28 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index dca0ccc51..4e9366064 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -1373,19 +1373,6 @@ static inline char MakeLowerCase(char ch) {  		return static_cast<char>(ch - 'A' + 'a');  } -size_t Document::ExtractChar(int pos, char *bytes) { -	unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); -	size_t widthChar = UTF8CharLength(ch); -	bytes[0] = ch; -	for (size_t i=1; i<widthChar; i++) { -		bytes[i] = cb.CharAt(static_cast<int>(pos+i)); -		if (!IsTrailByte(static_cast<unsigned char>(bytes[i]))) { // Bad byte -			widthChar = 1; -		} -	} -	return widthChar; -} -  CaseFolderTable::CaseFolderTable() {  	for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {  		mapping[iChar] = static_cast<char>(iChar); @@ -1476,37 +1463,46 @@ long Document::FindText(int minPos, int maxPos, const char *search,  					break;  			}  		} else if (SC_CP_UTF8 == dbcsCodePage) { -			const size_t maxBytesCharacter = 4;  			const size_t maxFoldingExpansion = 4; -			std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1); +			std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);  			const int lenSearch = static_cast<int>(  				pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind)); +			char bytes[UTF8MaxBytes + 1]; +			char folded[UTF8MaxBytes * maxFoldingExpansion + 1];  			while (forward ? (pos < endPos) : (pos >= endPos)) {  				int widthFirstCharacter = 0; -				int indexDocument = 0; +				int posIndexDocument = pos;  				int indexSearch = 0;  				bool characterMatches = true; -				while (characterMatches && -					((pos + indexDocument) < limitPos) && -					(indexSearch < lenSearch)) { -					char bytes[maxBytesCharacter + 1]; -					bytes[maxBytesCharacter] = 0; -					const int widthChar = static_cast<int>(ExtractChar(pos + indexDocument, bytes)); +				for (;;) { +					const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument)); +					bytes[0] = leadByte; +					int widthChar = 1; +					if (!UTF8IsAscii(leadByte)) { +						const int widthCharBytes = UTF8BytesOfLead[leadByte]; +						for (int b=1; b<widthCharBytes; b++) { +							bytes[b] = cb.CharAt(posIndexDocument+b); +						} +						widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth; +					}  					if (!widthFirstCharacter)  						widthFirstCharacter = widthChar; -					if ((pos + indexDocument + widthChar) > limitPos) +					if ((posIndexDocument + widthChar) > limitPos)  						break; -					char folded[maxBytesCharacter * maxFoldingExpansion + 1];  					const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));  					folded[lenFlat] = 0;  					// Does folded match the buffer  					characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); -					indexDocument += widthChar; +					if (!characterMatches) +						break; +					posIndexDocument += widthChar;  					indexSearch += lenFlat; +					if (indexSearch >= lenSearch) +						break;  				}  				if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) { -					if (MatchesWordOptions(word, wordStart, pos, indexDocument)) { -						*length = indexDocument; +					if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) { +						*length = posIndexDocument - pos;  						return pos;  					}  				} diff --git a/src/Document.h b/src/Document.h index ec41603eb..18bf00a3d 100644 --- a/src/Document.h +++ b/src/Document.h @@ -352,7 +352,6 @@ public:  	int NextWordEnd(int pos, int delta);  	int SCI_METHOD Length() const { return cb.Length(); }  	void Allocate(int newSize) { cb.Allocate(newSize); } -	size_t ExtractChar(int pos, char *bytes);  	bool MatchesWordOptions(bool word, bool wordStart, int pos, int length);  	long FindText(int minPos, int maxPos, const char *search, bool caseSensitive, bool word,  		bool wordStart, bool regExp, int flags, int *length, CaseFolder *pcf); diff --git a/src/UniConversion.h b/src/UniConversion.h index 87cc43f77..704f16239 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -5,6 +5,8 @@  // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>  // The License.txt file describes the conditions under which this software may be distributed. +const int UTF8MaxBytes = 4; +  unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen);  void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len);  unsigned int UTF8CharLength(unsigned char ch); @@ -18,5 +20,9 @@ inline bool UTF8IsTrailByte(int ch) {  	return (ch >= 0x80) && (ch < 0xc0);  } +inline bool UTF8IsAscii(int ch) { +	return ch < 0x80; +} +  enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };  int UTF8Classify(const unsigned char *us, int len); | 
