aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authornyamatongwe <unknown>2010-03-25 12:10:59 +0000
committernyamatongwe <unknown>2010-03-25 12:10:59 +0000
commit295013083c4e9454656c0e94ab977057ee55ea11 (patch)
tree661dd867b477a189c846fada99399ba9a61bc191 /src
parent9f6eff4d795ec5cef078a432b89744f5542a1ade (diff)
downloadscintilla-mirror-295013083c4e9454656c0e94ab977057ee55ea11.tar.gz
New case insensitive searching implementation uses objects implementing
the CaseFolder interface to fold both search text and document text so they can be compared with a simple strcmp. A simple table based folder CaseFolderTable is used for 8 bit encodings and maps input bytes to folded bytes. For multi-byte encodings except for UTF-8 a null (output same as input) CaseFolderTable is used. For UTF-8, more complex subclasses are used which call platform APIs to perform the folding. Folding is approximately to lower case although this differs between platforms.
Diffstat (limited to 'src')
-rw-r--r--src/Document.cxx154
-rw-r--r--src/Document.h24
-rw-r--r--src/Editor.cxx43
-rw-r--r--src/Editor.h1
4 files changed, 198 insertions, 24 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index b1130bd09..fe8b43128 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -10,6 +10,17 @@
#include <stdio.h>
#include <ctype.h>
+#include <string>
+#include <vector>
+
+// With Borland C++ 5.5, including <string> includes Windows.h leading to defining
+// FindText to FindTextA which makes calls here to Document::FindText fail.
+#ifdef __BORLANDC__
+#ifdef FindText
+#undef FindText
+#endif
+#endif
+
#include "Platform.h"
#include "Scintilla.h"
@@ -22,6 +33,7 @@
#include "Decoration.h"
#include "Document.h"
#include "RESearch.h"
+#include "UniConversion.h"
#ifdef SCI_NAMESPACE
using namespace Scintilla;
@@ -1074,6 +1086,57 @@ static inline char MakeLowerCase(char ch) {
return static_cast<char>(ch - 'A' + 'a');
}
+static bool GoodTrailByte(int v) {
+ return (v >= 0x80) && (v < 0xc0);
+}
+
+size_t Document::ExtractChar(int pos, char *bytes) {
+ unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
+ size_t widthChar = UTF8CharLength(ch);
+ bytes[0] = ch;
+ for (size_t i=1; i<widthChar; i++) {
+ bytes[i] = cb.CharAt(pos+i);
+ if (!GoodTrailByte(static_cast<unsigned char>(bytes[i]))) { // Bad byte
+ widthChar = 1;
+ }
+ }
+ return widthChar;
+}
+
+CaseFolderTable::CaseFolderTable() {
+ for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
+ mapping[iChar] = static_cast<char>(iChar);
+ }
+}
+
+CaseFolderTable::~CaseFolderTable() {
+}
+
+size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
+ if (lenMixed > sizeFolded) {
+ return 0;
+ } else {
+ for (size_t i=0; i<lenMixed; i++) {
+ folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
+ }
+ return lenMixed;
+ }
+}
+
+void CaseFolderTable::SetTranslation(char ch, char chTranslation) {
+ mapping[static_cast<unsigned char>(ch)] = chTranslation;
+}
+
+void CaseFolderTable::StandardASCII() {
+ for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
+ if (iChar >= 'A' && iChar <= 'Z') {
+ mapping[iChar] = static_cast<char>(iChar - 'A' + 'a');
+ } else {
+ mapping[iChar] = static_cast<char>(iChar);
+ }
+ }
+}
+
/**
* Find text in document, supporting both forward and backward
* searches (just pass minPos > maxPos to do a backward search)
@@ -1081,7 +1144,7 @@ static inline char MakeLowerCase(char ch) {
*/
long Document::FindText(int minPos, int maxPos, const char *s,
bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
- int *length) {
+ int *length, CaseFolder *pcf) {
if (regExp) {
if (!regex)
regex = CreateRegexSearch(&charClass);
@@ -1104,13 +1167,11 @@ long Document::FindText(int minPos, int maxPos, const char *s,
endSearch = endPos - lengthFind + 1;
}
//Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
- char firstChar = s[0];
- if (!caseSensitive)
- firstChar = static_cast<char>(MakeUpperCase(firstChar));
int pos = forward ? startPos : (startPos - 1);
- while (forward ? (pos < endSearch) : (pos >= endSearch)) {
- char ch = CharAt(pos);
- if (caseSensitive) {
+ char firstChar = s[0];
+ if (caseSensitive) {
+ while (forward ? (pos < endSearch) : (pos >= endSearch)) {
+ char ch = CharAt(pos);
if (ch == firstChar) {
bool found = true;
if (pos + lengthFind > Platform::Maximum(startPos, endPos)) found = false;
@@ -1126,27 +1187,88 @@ long Document::FindText(int minPos, int maxPos, const char *s,
return pos;
}
}
- } else {
- if (MakeUpperCase(ch) == firstChar) {
+ pos += increment;
+ if (dbcsCodePage && (pos >= 0)) {
+ // Ensure trying to match from start of character
+ pos = MovePositionOutsideChar(pos, increment, false);
+ }
+ }
+ } else if (SC_CP_UTF8 == dbcsCodePage) {
+ const size_t maxBytesCharacter = 4;
+ const size_t maxFoldingExpansion = 4;
+ int endMatch = Platform::Maximum(startPos, endPos);
+ std::vector<char> searchThing(*length * maxBytesCharacter * maxFoldingExpansion + 1);
+ size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), s, *length);
+ while (forward ? (pos < endSearch) : (pos >= endSearch)) {
+ bool matchChar = true;
+ int matchOff = 0;
+ int searchOff = 0;
+ int widthFirst = 0;
+ while (matchChar && (pos + matchOff < endMatch)) {
+ int widthChar;
+ char bytes[maxBytesCharacter + 1];
+ widthChar = ExtractChar(pos + matchOff, bytes);
+ bytes[maxBytesCharacter] = 0;
+ if (!widthFirst)
+ widthFirst = widthChar;
+ char folded[maxBytesCharacter * maxFoldingExpansion + 1];
+ int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
+ folded[lenFlat] = 0;
+ // Does folded match the buffer
+ matchChar = 0 == strncmp(folded, &searchThing[0] + searchOff, lenFlat);
+ matchOff += widthChar;
+ searchOff += lenFlat;
+ if (searchOff >= static_cast<int>(lenSearch))
+ break;
+ }
+ if (matchChar && (searchOff == static_cast<int>(lenSearch))) {
+ if ((!word && !wordStart) ||
+ (word && IsWordAt(pos, pos + lengthFind)) ||
+ (wordStart && IsWordStartAt(pos))) {
+ *length = matchOff;
+ return pos;
+ }
+ }
+ if (forward) {
+ pos += widthFirst;
+ } else {
+ pos--;
+ if (pos > 0) {
+ // Ensure trying to match from start of character
+ pos = MovePositionOutsideChar(pos, increment, false);
+ }
+ }
+ }
+ } else {
+ CaseFolderTable caseFolder;
+ std::vector<char> searchThing(*length + 1);
+ pcf->Fold(&searchThing[0], searchThing.size(), s, *length);
+ while (forward ? (pos < endSearch) : (pos >= endSearch)) {
+ char ch = CharAt(pos);
+ char folded[2];
+ pcf->Fold(folded, sizeof(folded), &ch, 1);
+ if (folded[0] == searchThing[0]) {
bool found = true;
if (pos + lengthFind > Platform::Maximum(startPos, endPos)) found = false;
for (int posMatch = 1; posMatch < lengthFind && found; posMatch++) {
ch = CharAt(pos + posMatch);
- if (MakeUpperCase(ch) != MakeUpperCase(s[posMatch]))
+ pcf->Fold(folded, sizeof(folded), &ch, 1);
+ if (folded[0] != searchThing[posMatch])
found = false;
}
if (found) {
if ((!word && !wordStart) ||
(word && IsWordAt(pos, pos + lengthFind)) ||
- (wordStart && IsWordStartAt(pos)))
+ (wordStart && IsWordStartAt(pos))) {
return pos;
+ }
}
}
- }
- pos += increment;
- if (dbcsCodePage && (pos >= 0)) {
- // Ensure trying to match from start of character
- pos = MovePositionOutsideChar(pos, increment, false);
+ pos += increment;
+ if (dbcsCodePage && (pos >= 0)) {
+ // Ensure trying to match from start of character
+ pos = MovePositionOutsideChar(pos, increment, false);
+ }
}
}
}
diff --git a/src/Document.h b/src/Document.h
index c61c56892..73571cbdd 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -115,6 +115,24 @@ struct StyledText {
}
};
+class CaseFolder {
+public:
+ virtual ~CaseFolder() {
+ };
+ virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) = 0;
+};
+
+class CaseFolderTable : public CaseFolder {
+protected:
+ char mapping[256];
+public:
+ CaseFolderTable();
+ virtual ~CaseFolderTable();
+ virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed);
+ void SetTranslation(char ch, char chTranslation);
+ void StandardASCII();
+};
+
/**
*/
class Document : PerLine {
@@ -254,9 +272,9 @@ public:
int NextWordEnd(int pos, int delta);
int Length() const { return cb.Length(); }
void Allocate(int newSize) { cb.Allocate(newSize); }
- long FindText(int minPos, int maxPos, const char *s,
- bool caseSensitive, bool word, bool wordStart, bool regExp, int flags, int *length);
- long FindText(int iMessage, unsigned long wParam, long lParam);
+ size_t ExtractChar(int pos, char *bytes);
+ long FindText(int minPos, int maxPos, const char *s, bool caseSensitive, bool word,
+ bool wordStart, bool regExp, int flags, int *length, CaseFolder *pcf);
const char *SubstituteByPosition(const char *text, int *length);
int LinesTotal() const;
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 4bdbecda8..e5623b542 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -13,6 +13,7 @@
#include <string>
#include <vector>
#include <algorithm>
+#include <memory>
// With Borland C++ 5.5, including <string> includes Windows.h leading to defining
// FindText to FindTextA which makes calls here to Document::FindText fail.
@@ -5309,6 +5310,31 @@ void Editor::Indent(bool forwards) {
}
}
+class CaseFolderASCII : public CaseFolderTable {
+public:
+ CaseFolderASCII() {
+ StandardASCII();
+ }
+ ~CaseFolderASCII() {
+ }
+ virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
+ if (lenMixed > sizeFolded) {
+ return 0;
+ } else {
+ for (size_t i=0; i<lenMixed; i++) {
+ folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
+ }
+ return lenMixed;
+ }
+ }
+};
+
+
+CaseFolder *Editor::CaseFolderForEncoding() {
+ // Simple default that only maps ASCII upper case to lower case.
+ return new CaseFolderASCII();
+}
+
/**
* Search of a text in the document, in the given range.
* @return The position of the found text, -1 if not found.
@@ -5320,13 +5346,15 @@ long Editor::FindText(
Sci_TextToFind *ft = reinterpret_cast<Sci_TextToFind *>(lParam);
int lengthFound = istrlen(ft->lpstrText);
+ std::auto_ptr<CaseFolder> pcf(CaseFolderForEncoding());
int pos = pdoc->FindText(ft->chrg.cpMin, ft->chrg.cpMax, ft->lpstrText,
(wParam & SCFIND_MATCHCASE) != 0,
(wParam & SCFIND_WHOLEWORD) != 0,
(wParam & SCFIND_WORDSTART) != 0,
(wParam & SCFIND_REGEXP) != 0,
wParam,
- &lengthFound);
+ &lengthFound,
+ pcf.get());
if (pos != -1) {
ft->chrgText.cpMin = pos;
ft->chrgText.cpMax = pos + lengthFound;
@@ -5363,6 +5391,7 @@ long Editor::SearchText(
const char *txt = reinterpret_cast<char *>(lParam);
int pos;
int lengthFound = istrlen(txt);
+ std::auto_ptr<CaseFolder> pcf(CaseFolderForEncoding());
if (iMessage == SCI_SEARCHNEXT) {
pos = pdoc->FindText(searchAnchor, pdoc->Length(), txt,
(wParam & SCFIND_MATCHCASE) != 0,
@@ -5370,7 +5399,8 @@ long Editor::SearchText(
(wParam & SCFIND_WORDSTART) != 0,
(wParam & SCFIND_REGEXP) != 0,
wParam,
- &lengthFound);
+ &lengthFound,
+ pcf.get());
} else {
pos = pdoc->FindText(searchAnchor, 0, txt,
(wParam & SCFIND_MATCHCASE) != 0,
@@ -5378,9 +5408,9 @@ long Editor::SearchText(
(wParam & SCFIND_WORDSTART) != 0,
(wParam & SCFIND_REGEXP) != 0,
wParam,
- &lengthFound);
+ &lengthFound,
+ pcf.get());
}
-
if (pos != -1) {
SetSelection(pos, pos + lengthFound);
}
@@ -5411,13 +5441,16 @@ std::string Editor::CaseMapString(const std::string &s, int caseMapping) {
*/
long Editor::SearchInTarget(const char *text, int length) {
int lengthFound = length;
+
+ std::auto_ptr<CaseFolder> pcf(CaseFolderForEncoding());
int pos = pdoc->FindText(targetStart, targetEnd, text,
(searchFlags & SCFIND_MATCHCASE) != 0,
(searchFlags & SCFIND_WHOLEWORD) != 0,
(searchFlags & SCFIND_WORDSTART) != 0,
(searchFlags & SCFIND_REGEXP) != 0,
searchFlags,
- &lengthFound);
+ &lengthFound,
+ pcf.get());
if (pos != -1) {
targetStart = pos;
targetEnd = pos + lengthFound;
diff --git a/src/Editor.h b/src/Editor.h
index 053b10a9e..180db571a 100644
--- a/src/Editor.h
+++ b/src/Editor.h
@@ -424,6 +424,7 @@ protected: // ScintillaBase subclass needs access to much of Editor
void Indent(bool forwards);
+ virtual CaseFolder *CaseFolderForEncoding();
long FindText(uptr_t wParam, sptr_t lParam);
void SearchAnchor();
long SearchText(unsigned int iMessage, uptr_t wParam, sptr_t lParam);