diff options
-rw-r--r-- | src/Document.cxx | 79 | ||||
-rw-r--r-- | src/Document.h | 1 | ||||
-rw-r--r-- | win32/ScintillaWin.cxx | 58 |
3 files changed, 117 insertions, 21 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index a5907f97f..fa8ec0857 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -485,7 +485,16 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) { } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. - int posCheck = LineStart(LineFromPosition(pos)); + int posStartLine = LineStart(LineFromPosition(pos)); + if (pos == posStartLine) + return pos; + + // Step back until a non-lead-byte is found. + int posCheck = pos; + while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1))) + posCheck--; + + // Check from known start of character. while (posCheck < pos) { int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1; if (posCheck + mbsize == pos) { @@ -575,6 +584,17 @@ int Document::NextPosition(int pos, int moveDir) { return pos; } +bool Document::NextCharacter(int &pos, int moveDir) { + // Returns true if pos changed + int posNext = NextPosition(pos, moveDir); + if (posNext == pos) { + return false; + } else { + pos = posNext; + return true; + } +} + int SCI_METHOD Document::CodePage() const { return dbcsCodePage; } @@ -598,7 +618,7 @@ bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const { return (uch >= 0x81) && (uch <= 0xFE); case 1361: // Korean Johab KS C-5601-1992 - return + return ((uch >= 0x84) && (uch <= 0xD3)) || ((uch >= 0xD8) && (uch <= 0xDE)) || ((uch >= 0xE0) && (uch <= 0xF9)); @@ -1316,13 +1336,8 @@ long Document::FindText(int minPos, int maxPos, const char *search, if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { return pos; } - pos += increment; - if (dbcsCodePage && (pos >= 0)) { - // Have to use >= 0 as otherwise next statement would change - // -1 to 0 and make loop infinite. - // Ensure trying to match from start of character - pos = MovePositionOutsideChar(pos, increment, false); - } + if (!NextCharacter(pos, increment)) + break; } } else if (SC_CP_UTF8 == dbcsCodePage) { const size_t maxBytesCharacter = 4; @@ -1359,12 +1374,43 @@ long Document::FindText(int minPos, int maxPos, const char *search, if (forward) { pos += widthFirstCharacter; } else { - pos--; - if (pos > 0) { - // Ensure trying to match from start of character - pos = MovePositionOutsideChar(pos, increment, false); + if (!NextCharacter(pos, increment)) + break; + } + } + } else if (dbcsCodePage) { + const size_t maxBytesCharacter = 2; + const size_t maxFoldingExpansion = 4; + std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1); + const int lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); + while (forward ? (pos < endSearch) : (pos >= endSearch)) { + int indexDocument = 0; + int indexSearch = 0; + bool characterMatches = true; + while (characterMatches && + ((pos + indexDocument) < limitPos) && + (indexSearch < lenSearch)) { + char bytes[maxBytesCharacter + 1]; + bytes[0] = cb.CharAt(pos + indexDocument); + const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1; + if (widthChar == 2) + bytes[1] = cb.CharAt(pos + indexDocument + 1); + char folded[maxBytesCharacter * maxFoldingExpansion + 1]; + const int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); + folded[lenFlat] = 0; + // Does folded match the buffer + characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat); + indexDocument += widthChar; + indexSearch += lenFlat; + } + if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) { + if (MatchesWordOptions(word, wordStart, pos, indexDocument)) { + *length = indexDocument; + return pos; } } + if (!NextCharacter(pos, increment)) + break; } } else { CaseFolderTable caseFolder; @@ -1381,11 +1427,8 @@ long Document::FindText(int minPos, int maxPos, const char *search, if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { return pos; } - pos += increment; - if (dbcsCodePage && (pos >= 0)) { - // Ensure trying to match from start of character - pos = MovePositionOutsideChar(pos, increment, false); - } + if (!NextCharacter(pos, increment)) + break; } } } diff --git a/src/Document.h b/src/Document.h index d87840872..6d2c2d0bb 100644 --- a/src/Document.h +++ b/src/Document.h @@ -231,6 +231,7 @@ public: bool InGoodUTF8(int pos, int &start, int &end); int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true); int NextPosition(int pos, int moveDir); + bool NextCharacter(int &pos, int moveDir); // Returns true if pos changed int SCI_METHOD CodePage() const; bool SCI_METHOD IsDBCSLeadByte(char ch) const; diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx index 570a2bc88..9873b82a4 100644 --- a/win32/ScintillaWin.cxx +++ b/win32/ScintillaWin.cxx @@ -1293,7 +1293,7 @@ void ScintillaWin::NotifyDoubleClick(Point pt, bool shift, bool ctrl, bool alt) MAKELPARAM(pt.x, pt.y)); } -class CaseFolderUTF8 : public CaseFolderTable { +class CaseFolderUTF8 : public CaseFolderTable { // Allocate the expandable storage here so that it does not need to be reallocated // for each call to Fold. std::vector<wchar_t> utf16Mixed; @@ -1337,13 +1337,63 @@ public: } }; +class CaseFolderDBCS : public CaseFolderTable { + // Allocate the expandable storage here so that it does not need to be reallocated + // for each call to Fold. + std::vector<wchar_t> utf16Mixed; + std::vector<wchar_t> utf16Folded; + UINT cp; +public: + CaseFolderDBCS(UINT cp_) : cp(cp_) { + StandardASCII(); + } + virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { + if ((lenMixed == 1) && (sizeFolded > 0)) { + folded[0] = mapping[static_cast<unsigned char>(mixed[0])]; + return 1; + } else { + if (lenMixed > utf16Mixed.size()) { + utf16Mixed.resize(lenMixed + 8); + } + size_t nUtf16Mixed = ::MultiByteToWideChar(cp, 0, mixed, lenMixed, + &utf16Mixed[0], utf16Mixed.size()); + + if (nUtf16Mixed == 0) { + // Failed to convert -> bad input + folded[0] = '\0'; + return 1; + } + + if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4 + utf16Folded.resize(nUtf16Mixed * 4 + 8); + } + int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, + LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, + &utf16Mixed[0], nUtf16Mixed, &utf16Folded[0], utf16Folded.size()); + + size_t lenOut = ::WideCharToMultiByte(cp, 0, + &utf16Folded[0], lenFlat, + NULL, 0, NULL, 0); + + if (lenOut < sizeFolded) { + ::WideCharToMultiByte(cp, 0, + &utf16Folded[0], lenFlat, + folded, lenOut, NULL, 0); + return lenOut; + } else { + return 0; + } + } + } +}; + CaseFolder *ScintillaWin::CaseFolderForEncoding() { UINT cpDest = CodePageOfDocument(); if (cpDest == SC_CP_UTF8) { return new CaseFolderUTF8(); } else { - CaseFolderTable *pcf = new CaseFolderTable(); if (pdoc->dbcsCodePage == 0) { + CaseFolderTable *pcf = new CaseFolderTable(); pcf->StandardASCII(); // Only for single byte encodings UINT cpDoc = CodePageOfDocument(); @@ -1367,8 +1417,10 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() { } } } + return pcf; + } else { + return new CaseFolderDBCS(cpDest); } - return pcf; } } |