diff options
| author | nyamatongwe <unknown> | 2010-08-05 13:47:25 +1000 | 
|---|---|---|
| committer | nyamatongwe <unknown> | 2010-08-05 13:47:25 +1000 | 
| commit | 3a51b94f2cb34ea717cb7dc5f49d40eb21bf76bb (patch) | |
| tree | 6d2d953c441f3777d8b3198865fc873d8284c617 | |
| parent | 63a2f56fa532bcbac8eb65119c3ebbb5c942328d (diff) | |
| download | scintilla-mirror-3a51b94f2cb34ea717cb7dc5f49d40eb21bf76bb.tar.gz | |
Case insensitive search in DBCS and faster processing of DBCS.
DBCS case folder implemented on Windows.
| -rw-r--r-- | src/Document.cxx | 79 | ||||
| -rw-r--r-- | src/Document.h | 1 | ||||
| -rw-r--r-- | win32/ScintillaWin.cxx | 58 | 
3 files changed, 117 insertions, 21 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index a5907f97f..fa8ec0857 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -485,7 +485,16 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {  		} else {  			// Anchor DBCS calculations at start of line because start of line can  			// not be a DBCS trail byte. -			int posCheck = LineStart(LineFromPosition(pos)); +			int posStartLine = LineStart(LineFromPosition(pos)); +			if (pos == posStartLine) +				return pos; + +			// Step back until a non-lead-byte is found. +			int posCheck = pos; +			while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1))) +				posCheck--; + +			// Check from known start of character.  			while (posCheck < pos) {  				int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;  				if (posCheck + mbsize == pos) { @@ -575,6 +584,17 @@ int Document::NextPosition(int pos, int moveDir) {  	return pos;  } +bool Document::NextCharacter(int &pos, int moveDir) { +	// Returns true if pos changed +	int posNext = NextPosition(pos, moveDir); +	if (posNext == pos) { +		return false; +	} else { +		pos = posNext; +		return true; +	} +} +  int SCI_METHOD Document::CodePage() const {  	return dbcsCodePage;  } @@ -598,7 +618,7 @@ bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {  			return (uch >= 0x81) && (uch <= 0xFE);  		case 1361:  			// Korean Johab KS C-5601-1992 -			return  +			return  				((uch >= 0x84) && (uch <= 0xD3)) ||  				((uch >= 0xD8) && (uch <= 0xDE)) ||  				((uch >= 0xE0) && (uch <= 0xF9)); @@ -1316,13 +1336,8 @@ long Document::FindText(int minPos, int maxPos, const char *search,  				if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {  					return pos;  				} -				pos += increment; -				if (dbcsCodePage && (pos >= 0)) { -					// Have to use >= 0 as otherwise next statement would change -					// -1 to 0 and make loop infinite. -					// Ensure trying to match from start of character -					pos = MovePositionOutsideChar(pos, increment, false); -				} +				if (!NextCharacter(pos, increment)) +					break;  			}  		} else if (SC_CP_UTF8 == dbcsCodePage) {  			const size_t maxBytesCharacter = 4; @@ -1359,12 +1374,43 @@ long Document::FindText(int minPos, int maxPos, const char *search,  				if (forward) {  					pos += widthFirstCharacter;  				} else { -					pos--; -					if (pos > 0) { -						// Ensure trying to match from start of character -						pos = MovePositionOutsideChar(pos, increment, false); +					if (!NextCharacter(pos, increment)) +						break; +				} +			} +		} else if (dbcsCodePage) { +			const size_t maxBytesCharacter = 2; +			const size_t maxFoldingExpansion = 4; +			std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1); +			const int lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); +			while (forward ? (pos < endSearch) : (pos >= endSearch)) { +				int indexDocument = 0; +				int indexSearch = 0; +				bool characterMatches = true; +				while (characterMatches && +					((pos + indexDocument) < limitPos) && +					(indexSearch < lenSearch)) { +					char bytes[maxBytesCharacter + 1]; +					bytes[0] = cb.CharAt(pos + indexDocument); +					const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1; +					if (widthChar == 2)  +						bytes[1] = cb.CharAt(pos + indexDocument + 1); +					char folded[maxBytesCharacter * maxFoldingExpansion + 1]; +					const int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); +					folded[lenFlat] = 0; +					// Does folded match the buffer +					characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); +					indexDocument += widthChar; +					indexSearch += lenFlat; +				} +				if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) { +					if (MatchesWordOptions(word, wordStart, pos, indexDocument)) { +						*length = indexDocument; +						return pos;  					}  				} +				if (!NextCharacter(pos, increment)) +					break;  			}  		} else {  			CaseFolderTable caseFolder; @@ -1381,11 +1427,8 @@ long Document::FindText(int minPos, int maxPos, const char *search,  				if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {  					return pos;  				} -				pos += increment; -				if (dbcsCodePage && (pos >= 0)) { -					// Ensure trying to match from start of character -					pos = MovePositionOutsideChar(pos, increment, false); -				} +				if (!NextCharacter(pos, increment)) +					break;  			}  		}  	} diff --git a/src/Document.h b/src/Document.h index d87840872..6d2c2d0bb 100644 --- a/src/Document.h +++ b/src/Document.h @@ -231,6 +231,7 @@ public:  	bool InGoodUTF8(int pos, int &start, int &end);  	int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true);  	int NextPosition(int pos, int moveDir); +	bool NextCharacter(int &pos, int moveDir);	// Returns true if pos changed  	int SCI_METHOD CodePage() const;  	bool SCI_METHOD IsDBCSLeadByte(char ch) const; diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx index 570a2bc88..9873b82a4 100644 --- a/win32/ScintillaWin.cxx +++ b/win32/ScintillaWin.cxx @@ -1293,7 +1293,7 @@ void ScintillaWin::NotifyDoubleClick(Point pt, bool shift, bool ctrl, bool alt)  			  MAKELPARAM(pt.x, pt.y));  } -class CaseFolderUTF8  : public CaseFolderTable { +class CaseFolderUTF8 : public CaseFolderTable {  	// Allocate the expandable storage here so that it does not need to be reallocated  	// for each call to Fold.  	std::vector<wchar_t> utf16Mixed; @@ -1337,13 +1337,63 @@ public:  	}  }; +class CaseFolderDBCS : public CaseFolderTable { +	// Allocate the expandable storage here so that it does not need to be reallocated +	// for each call to Fold. +	std::vector<wchar_t> utf16Mixed; +	std::vector<wchar_t> utf16Folded; +	UINT cp; +public: +	CaseFolderDBCS(UINT cp_) : cp(cp_) { +		StandardASCII(); +	} +	virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { +		if ((lenMixed == 1) && (sizeFolded > 0)) { +			folded[0] = mapping[static_cast<unsigned char>(mixed[0])]; +			return 1; +		} else { +			if (lenMixed > utf16Mixed.size()) { +				utf16Mixed.resize(lenMixed + 8); +			} +			size_t nUtf16Mixed = ::MultiByteToWideChar(cp, 0, mixed, lenMixed, +				&utf16Mixed[0], utf16Mixed.size()); + +			if (nUtf16Mixed == 0) { +				// Failed to convert -> bad input +				folded[0] = '\0'; +				return 1; +			} + +			if (nUtf16Mixed * 4 > utf16Folded.size()) {	// Maximum folding expansion factor of 4 +				utf16Folded.resize(nUtf16Mixed * 4 + 8); +			} +			int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, +				LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, +				&utf16Mixed[0], nUtf16Mixed, &utf16Folded[0], utf16Folded.size()); + +			size_t lenOut = ::WideCharToMultiByte(cp, 0,  +				&utf16Folded[0], lenFlat, +				NULL, 0, NULL, 0); + +			if (lenOut < sizeFolded) { +				::WideCharToMultiByte(cp, 0,  +					&utf16Folded[0], lenFlat, +					folded, lenOut, NULL, 0); +				return lenOut; +			} else { +				return 0; +			} +		} +	} +}; +  CaseFolder *ScintillaWin::CaseFolderForEncoding() {  	UINT cpDest = CodePageOfDocument();  	if (cpDest == SC_CP_UTF8) {  		return new CaseFolderUTF8();  	} else { -		CaseFolderTable *pcf = new CaseFolderTable();  		if (pdoc->dbcsCodePage == 0) { +			CaseFolderTable *pcf = new CaseFolderTable();  			pcf->StandardASCII();  			// Only for single byte encodings  			UINT cpDoc = CodePageOfDocument(); @@ -1367,8 +1417,10 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() {  					}  				}  			} +			return pcf; +		} else { +			return new CaseFolderDBCS(cpDest);  		} -		return pcf;  	}  } | 
