aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <unknown>2010-08-05 13:47:25 +1000
committernyamatongwe <unknown>2010-08-05 13:47:25 +1000
commit3a51b94f2cb34ea717cb7dc5f49d40eb21bf76bb (patch)
tree6d2d953c441f3777d8b3198865fc873d8284c617
parent63a2f56fa532bcbac8eb65119c3ebbb5c942328d (diff)
downloadscintilla-mirror-3a51b94f2cb34ea717cb7dc5f49d40eb21bf76bb.tar.gz
Case insensitive search in DBCS and faster processing of DBCS.
DBCS case folder implemented on Windows.
-rw-r--r--src/Document.cxx79
-rw-r--r--src/Document.h1
-rw-r--r--win32/ScintillaWin.cxx58
3 files changed, 117 insertions, 21 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index a5907f97f..fa8ec0857 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -485,7 +485,16 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
} else {
// Anchor DBCS calculations at start of line because start of line can
// not be a DBCS trail byte.
- int posCheck = LineStart(LineFromPosition(pos));
+ int posStartLine = LineStart(LineFromPosition(pos));
+ if (pos == posStartLine)
+ return pos;
+
+ // Step back until a non-lead-byte is found.
+ int posCheck = pos;
+ while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
+ posCheck--;
+
+ // Check from known start of character.
while (posCheck < pos) {
int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
if (posCheck + mbsize == pos) {
@@ -575,6 +584,17 @@ int Document::NextPosition(int pos, int moveDir) {
return pos;
}
+bool Document::NextCharacter(int &pos, int moveDir) {
+ // Returns true if pos changed
+ int posNext = NextPosition(pos, moveDir);
+ if (posNext == pos) {
+ return false;
+ } else {
+ pos = posNext;
+ return true;
+ }
+}
+
int SCI_METHOD Document::CodePage() const {
return dbcsCodePage;
}
@@ -598,7 +618,7 @@ bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
return (uch >= 0x81) && (uch <= 0xFE);
case 1361:
// Korean Johab KS C-5601-1992
- return
+ return
((uch >= 0x84) && (uch <= 0xD3)) ||
((uch >= 0xD8) && (uch <= 0xDE)) ||
((uch >= 0xE0) && (uch <= 0xF9));
@@ -1316,13 +1336,8 @@ long Document::FindText(int minPos, int maxPos, const char *search,
if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
return pos;
}
- pos += increment;
- if (dbcsCodePage && (pos >= 0)) {
- // Have to use >= 0 as otherwise next statement would change
- // -1 to 0 and make loop infinite.
- // Ensure trying to match from start of character
- pos = MovePositionOutsideChar(pos, increment, false);
- }
+ if (!NextCharacter(pos, increment))
+ break;
}
} else if (SC_CP_UTF8 == dbcsCodePage) {
const size_t maxBytesCharacter = 4;
@@ -1359,12 +1374,43 @@ long Document::FindText(int minPos, int maxPos, const char *search,
if (forward) {
pos += widthFirstCharacter;
} else {
- pos--;
- if (pos > 0) {
- // Ensure trying to match from start of character
- pos = MovePositionOutsideChar(pos, increment, false);
+ if (!NextCharacter(pos, increment))
+ break;
+ }
+ }
+ } else if (dbcsCodePage) {
+ const size_t maxBytesCharacter = 2;
+ const size_t maxFoldingExpansion = 4;
+ std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
+ const int lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
+ while (forward ? (pos < endSearch) : (pos >= endSearch)) {
+ int indexDocument = 0;
+ int indexSearch = 0;
+ bool characterMatches = true;
+ while (characterMatches &&
+ ((pos + indexDocument) < limitPos) &&
+ (indexSearch < lenSearch)) {
+ char bytes[maxBytesCharacter + 1];
+ bytes[0] = cb.CharAt(pos + indexDocument);
+ const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
+ if (widthChar == 2)
+ bytes[1] = cb.CharAt(pos + indexDocument + 1);
+ char folded[maxBytesCharacter * maxFoldingExpansion + 1];
+ const int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
+ folded[lenFlat] = 0;
+ // Does folded match the buffer
+ characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
+ indexDocument += widthChar;
+ indexSearch += lenFlat;
+ }
+ if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
+ if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
+ *length = indexDocument;
+ return pos;
}
}
+ if (!NextCharacter(pos, increment))
+ break;
}
} else {
CaseFolderTable caseFolder;
@@ -1381,11 +1427,8 @@ long Document::FindText(int minPos, int maxPos, const char *search,
if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
return pos;
}
- pos += increment;
- if (dbcsCodePage && (pos >= 0)) {
- // Ensure trying to match from start of character
- pos = MovePositionOutsideChar(pos, increment, false);
- }
+ if (!NextCharacter(pos, increment))
+ break;
}
}
}
diff --git a/src/Document.h b/src/Document.h
index d87840872..6d2c2d0bb 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -231,6 +231,7 @@ public:
bool InGoodUTF8(int pos, int &start, int &end);
int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true);
int NextPosition(int pos, int moveDir);
+ bool NextCharacter(int &pos, int moveDir); // Returns true if pos changed
int SCI_METHOD CodePage() const;
bool SCI_METHOD IsDBCSLeadByte(char ch) const;
diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx
index 570a2bc88..9873b82a4 100644
--- a/win32/ScintillaWin.cxx
+++ b/win32/ScintillaWin.cxx
@@ -1293,7 +1293,7 @@ void ScintillaWin::NotifyDoubleClick(Point pt, bool shift, bool ctrl, bool alt)
MAKELPARAM(pt.x, pt.y));
}
-class CaseFolderUTF8 : public CaseFolderTable {
+class CaseFolderUTF8 : public CaseFolderTable {
// Allocate the expandable storage here so that it does not need to be reallocated
// for each call to Fold.
std::vector<wchar_t> utf16Mixed;
@@ -1337,13 +1337,63 @@ public:
}
};
+class CaseFolderDBCS : public CaseFolderTable {
+ // Allocate the expandable storage here so that it does not need to be reallocated
+ // for each call to Fold.
+ std::vector<wchar_t> utf16Mixed;
+ std::vector<wchar_t> utf16Folded;
+ UINT cp;
+public:
+ CaseFolderDBCS(UINT cp_) : cp(cp_) {
+ StandardASCII();
+ }
+ virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
+ if ((lenMixed == 1) && (sizeFolded > 0)) {
+ folded[0] = mapping[static_cast<unsigned char>(mixed[0])];
+ return 1;
+ } else {
+ if (lenMixed > utf16Mixed.size()) {
+ utf16Mixed.resize(lenMixed + 8);
+ }
+ size_t nUtf16Mixed = ::MultiByteToWideChar(cp, 0, mixed, lenMixed,
+ &utf16Mixed[0], utf16Mixed.size());
+
+ if (nUtf16Mixed == 0) {
+ // Failed to convert -> bad input
+ folded[0] = '\0';
+ return 1;
+ }
+
+ if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4
+ utf16Folded.resize(nUtf16Mixed * 4 + 8);
+ }
+ int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT,
+ LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE,
+ &utf16Mixed[0], nUtf16Mixed, &utf16Folded[0], utf16Folded.size());
+
+ size_t lenOut = ::WideCharToMultiByte(cp, 0,
+ &utf16Folded[0], lenFlat,
+ NULL, 0, NULL, 0);
+
+ if (lenOut < sizeFolded) {
+ ::WideCharToMultiByte(cp, 0,
+ &utf16Folded[0], lenFlat,
+ folded, lenOut, NULL, 0);
+ return lenOut;
+ } else {
+ return 0;
+ }
+ }
+ }
+};
+
CaseFolder *ScintillaWin::CaseFolderForEncoding() {
UINT cpDest = CodePageOfDocument();
if (cpDest == SC_CP_UTF8) {
return new CaseFolderUTF8();
} else {
- CaseFolderTable *pcf = new CaseFolderTable();
if (pdoc->dbcsCodePage == 0) {
+ CaseFolderTable *pcf = new CaseFolderTable();
pcf->StandardASCII();
// Only for single byte encodings
UINT cpDoc = CodePageOfDocument();
@@ -1367,8 +1417,10 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() {
}
}
}
+ return pcf;
+ } else {
+ return new CaseFolderDBCS(cpDest);
}
- return pcf;
}
}