diff options
author | Neil <nyamatongwe@gmail.com> | 2013-07-11 10:43:40 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2013-07-11 10:43:40 +1000 |
commit | 5262f832018923ecc8ccd25bedecbd5a291a563f (patch) | |
tree | d14831d0e08cd988dd8425674a29d514326c99f4 /win32/ScintillaWin.cxx | |
parent | 8ee42f850d492efbec73969eca52243e1acd1c96 (diff) | |
download | scintilla-mirror-5262f832018923ecc8ccd25bedecbd5a291a563f.tar.gz |
Include case conversion data in Scintilla so that all platforms will perform
case conversion of Unicode text in accordance with Unicode.
Diffstat (limited to 'win32/ScintillaWin.cxx')
-rw-r--r-- | win32/ScintillaWin.cxx | 193 |
1 files changed, 64 insertions, 129 deletions
diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx index 8c08d6ea1..cf5750cde 100644 --- a/win32/ScintillaWin.cxx +++ b/win32/ScintillaWin.cxx @@ -60,12 +60,15 @@ #include "ViewStyle.h" #include "CharClassify.h" #include "Decoration.h" +#include "CaseFolder.h" #include "Document.h" #include "Selection.h" #include "PositionCache.h" #include "Editor.h" #include "ScintillaBase.h" #include "UniConversion.h" +#include "CaseConvert.h" + #include "PlatWin.h" #ifdef SCI_LEXER @@ -1443,55 +1446,6 @@ void ScintillaWin::NotifyDoubleClick(Point pt, bool shift, bool ctrl, bool alt) MAKELPARAM(pt.x, pt.y)); } -class CaseFolderUTF8 : public CaseFolderTable { - // Allocate the expandable storage here so that it does not need to be reallocated - // for each call to Fold. - std::vector<wchar_t> utf16Mixed; - std::vector<wchar_t> utf16Folded; -public: - CaseFolderUTF8() { - StandardASCII(); - } - virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { - if ((lenMixed == 1) && (sizeFolded > 0)) { - folded[0] = mapping[static_cast<unsigned char>(mixed[0])]; - return 1; - } else { - if (lenMixed > utf16Mixed.size()) { - utf16Mixed.resize(lenMixed + 8); - } - size_t nUtf16Mixed = ::MultiByteToWideChar(65001, 0, mixed, - static_cast<int>(lenMixed), - &utf16Mixed[0], - static_cast<int>(utf16Mixed.size())); - - if (nUtf16Mixed == 0) { - // Failed to convert -> bad UTF-8 - folded[0] = '\0'; - return 1; - } - - if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4 - utf16Folded.resize(nUtf16Mixed * 4 + 8); - } - int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, - LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, - &utf16Mixed[0], - static_cast<int>(nUtf16Mixed), - &utf16Folded[0], - static_cast<int>(utf16Folded.size())); - - size_t lenOut = UTF8Length(&utf16Folded[0], lenFlat); - if (lenOut < sizeFolded) { - UTF8FromUTF16(&utf16Folded[0], lenFlat, folded, static_cast<int>(lenOut)); - return lenOut; - } else { - return 0; - } - } - } -}; - class CaseFolderDBCS : public CaseFolderTable { // Allocate the expandable storage here so that it does not need to be reallocated // for each call to Fold. @@ -1521,15 +1475,23 @@ public: return 1; } - if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4 - utf16Folded.resize(nUtf16Mixed * 4 + 8); + unsigned int lenFlat = 0; + for (size_t mixIndex=0; mixIndex < nUtf16Mixed; mixIndex++) { + if ((lenFlat + 20) > utf16Folded.size()) + utf16Folded.resize(lenFlat + 60); + const char *foldedUTF8 = CaseConvert(utf16Mixed[mixIndex], CaseConversionFold); + if (foldedUTF8) { + // Maximum length of a case conversion is 6 bytes, 3 characters + wchar_t wFolded[20]; + unsigned int charsConverted = UTF16FromUTF8(foldedUTF8, + static_cast<unsigned int>(strlen(foldedUTF8)), + wFolded, sizeof(wFolded)/sizeof(wFolded[0])); + for (size_t j=0;j<charsConverted;j++) + utf16Folded[lenFlat++] = wFolded[j]; + } else { + utf16Folded[lenFlat++] = utf16Mixed[mixIndex]; + } } - int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, - LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, - &utf16Mixed[0], - static_cast<int>(nUtf16Mixed), - &utf16Folded[0], - static_cast<int>(utf16Folded.size())); size_t lenOut = ::WideCharToMultiByte(cp, 0, &utf16Folded[0], lenFlat, @@ -1550,7 +1512,7 @@ public: CaseFolder *ScintillaWin::CaseFolderForEncoding() { UINT cpDest = CodePageOfDocument(); if (cpDest == SC_CP_UTF8) { - return new CaseFolderUTF8(); + return new CaseFolderUnicode(); } else { if (pdoc->dbcsCodePage == 0) { CaseFolderTable *pcf = new CaseFolderTable(); @@ -1564,16 +1526,21 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() { unsigned int lengthUTF16 = ::MultiByteToWideChar(cpDoc, 0, sCharacter, 1, wCharacter, sizeof(wCharacter)/sizeof(wCharacter[0])); if (lengthUTF16 == 1) { - wchar_t wLower[20]; - int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, - LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, - wCharacter, lengthUTF16, wLower, sizeof(wLower)/sizeof(wLower[0])); - char sCharacterLowered[20]; - unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, - wLower, charsConverted, - sCharacterLowered, sizeof(sCharacterLowered), NULL, 0); - if ((lengthConverted == 1) && (sCharacter[0] != sCharacterLowered[0])) { - pcf->SetTranslation(sCharacter[0], sCharacterLowered[0]); + const char *caseFolded = CaseConvert(wCharacter[0], CaseConversionFold); + if (caseFolded) { + wchar_t wLower[20]; + unsigned int charsConverted = UTF16FromUTF8(caseFolded, + static_cast<unsigned int>(strlen(caseFolded)), + wLower, sizeof(wLower)/sizeof(wLower[0])); + if (charsConverted == 1) { + char sCharacterLowered[20]; + unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, + wLower, charsConverted, + sCharacterLowered, sizeof(sCharacterLowered), NULL, 0); + if ((lengthConverted == 1) && (sCharacter[0] != sCharacterLowered[0])) { + pcf->SetTranslation(sCharacter[0], sCharacterLowered[0]); + } + } } } } @@ -1585,13 +1552,17 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() { } std::string ScintillaWin::CaseMapString(const std::string &s, int caseMapping) { - if (s.size() == 0) - return std::string(); - - if (caseMapping == cmSame) + if ((s.size() == 0) || (caseMapping == cmSame)) return s; UINT cpDoc = CodePageOfDocument(); + if (cpDoc == SC_CP_UTF8) { + std::string retMapped(s.length() * maxExpansionCaseConversion, 0); + size_t lenMapped = CaseConvertString(&retMapped[0], retMapped.length(), s.c_str(), s.length(), + (caseMapping == cmUpper) ? CaseConversionUpper : CaseConversionLower); + retMapped.resize(lenMapped); + return retMapped; + } unsigned int lengthUTF16 = ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), NULL, 0); @@ -1601,63 +1572,27 @@ std::string ScintillaWin::CaseMapString(const std::string &s, int caseMapping) { DWORD mapFlags = LCMAP_LINGUISTIC_CASING | ((caseMapping == cmUpper) ? LCMAP_UPPERCASE : LCMAP_LOWERCASE); - // Many conversions performed by search function are short so optimize this case. - enum { shortSize=20 }; - - if (s.size() > shortSize) { - // Use dynamic allocations for long strings - - // Change text to UTF-16 - std::vector<wchar_t> vwcText(lengthUTF16); - ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), &vwcText[0], lengthUTF16); - - // Change case - int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, - &vwcText[0], lengthUTF16, NULL, 0); - std::vector<wchar_t> vwcConverted(charsConverted); - ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, - &vwcText[0], lengthUTF16, &vwcConverted[0], charsConverted); - - // Change back to document encoding - unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, - &vwcConverted[0], static_cast<int>(vwcConverted.size()), - NULL, 0, NULL, 0); - std::vector<char> vcConverted(lengthConverted); - ::WideCharToMultiByte(cpDoc, 0, - &vwcConverted[0], static_cast<int>(vwcConverted.size()), - &vcConverted[0], static_cast<int>(vcConverted.size()), NULL, 0); - - return std::string(&vcConverted[0], vcConverted.size()); - - } else { - // Use static allocations for short strings as much faster - // A factor of 15 for single character strings - - // Change text to UTF-16 - wchar_t vwcText[shortSize]; - ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), - vwcText, lengthUTF16); - - // Change case - int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, - vwcText, lengthUTF16, NULL, 0); - // Full mapping may produce up to 3 characters per input character - wchar_t vwcConverted[shortSize*3]; - ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, vwcText, lengthUTF16, - vwcConverted, charsConverted); - - // Change back to document encoding - unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, - vwcConverted, charsConverted, - NULL, 0, NULL, 0); - // Each UTF-16 code unit may need up to 3 bytes in UTF-8 - char vcConverted[shortSize * 3 * 3]; - ::WideCharToMultiByte(cpDoc, 0, - vwcConverted, charsConverted, - vcConverted, lengthConverted, NULL, 0); - - return std::string(vcConverted, lengthConverted); - } + // Change text to UTF-16 + std::vector<wchar_t> vwcText(lengthUTF16); + ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), &vwcText[0], lengthUTF16); + + // Change case + int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, + &vwcText[0], lengthUTF16, NULL, 0); + std::vector<wchar_t> vwcConverted(charsConverted); + ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, + &vwcText[0], lengthUTF16, &vwcConverted[0], charsConverted); + + // Change back to document encoding + unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, + &vwcConverted[0], static_cast<int>(vwcConverted.size()), + NULL, 0, NULL, 0); + std::vector<char> vcConverted(lengthConverted); + ::WideCharToMultiByte(cpDoc, 0, + &vwcConverted[0], static_cast<int>(vwcConverted.size()), + &vcConverted[0], static_cast<int>(vcConverted.size()), NULL, 0); + + return std::string(&vcConverted[0], vcConverted.size()); } void ScintillaWin::Copy() { |