diff options
author | Neil <nyamatongwe@gmail.com> | 2013-07-11 10:43:40 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2013-07-11 10:43:40 +1000 |
commit | 5262f832018923ecc8ccd25bedecbd5a291a563f (patch) | |
tree | d14831d0e08cd988dd8425674a29d514326c99f4 /win32 | |
parent | 8ee42f850d492efbec73969eca52243e1acd1c96 (diff) | |
download | scintilla-mirror-5262f832018923ecc8ccd25bedecbd5a291a563f.tar.gz |
Include case conversion data in Scintilla so that all platforms will perform
case conversion of Unicode text in accordance with Unicode.
Diffstat (limited to 'win32')
-rw-r--r-- | win32/ScintillaWin.cxx | 193 | ||||
-rw-r--r-- | win32/makefile | 10 | ||||
-rw-r--r-- | win32/scintilla.mak | 25 |
3 files changed, 90 insertions, 138 deletions
diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx index 8c08d6ea1..cf5750cde 100644 --- a/win32/ScintillaWin.cxx +++ b/win32/ScintillaWin.cxx @@ -60,12 +60,15 @@ #include "ViewStyle.h" #include "CharClassify.h" #include "Decoration.h" +#include "CaseFolder.h" #include "Document.h" #include "Selection.h" #include "PositionCache.h" #include "Editor.h" #include "ScintillaBase.h" #include "UniConversion.h" +#include "CaseConvert.h" + #include "PlatWin.h" #ifdef SCI_LEXER @@ -1443,55 +1446,6 @@ void ScintillaWin::NotifyDoubleClick(Point pt, bool shift, bool ctrl, bool alt) MAKELPARAM(pt.x, pt.y)); } -class CaseFolderUTF8 : public CaseFolderTable { - // Allocate the expandable storage here so that it does not need to be reallocated - // for each call to Fold. - std::vector<wchar_t> utf16Mixed; - std::vector<wchar_t> utf16Folded; -public: - CaseFolderUTF8() { - StandardASCII(); - } - virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { - if ((lenMixed == 1) && (sizeFolded > 0)) { - folded[0] = mapping[static_cast<unsigned char>(mixed[0])]; - return 1; - } else { - if (lenMixed > utf16Mixed.size()) { - utf16Mixed.resize(lenMixed + 8); - } - size_t nUtf16Mixed = ::MultiByteToWideChar(65001, 0, mixed, - static_cast<int>(lenMixed), - &utf16Mixed[0], - static_cast<int>(utf16Mixed.size())); - - if (nUtf16Mixed == 0) { - // Failed to convert -> bad UTF-8 - folded[0] = '\0'; - return 1; - } - - if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4 - utf16Folded.resize(nUtf16Mixed * 4 + 8); - } - int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, - LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, - &utf16Mixed[0], - static_cast<int>(nUtf16Mixed), - &utf16Folded[0], - static_cast<int>(utf16Folded.size())); - - size_t lenOut = UTF8Length(&utf16Folded[0], lenFlat); - if (lenOut < sizeFolded) { - UTF8FromUTF16(&utf16Folded[0], lenFlat, folded, static_cast<int>(lenOut)); - return lenOut; - } else { - return 0; - } - } - } -}; - class CaseFolderDBCS : public CaseFolderTable { // Allocate the expandable storage here so that it does not need to be reallocated // for each call to Fold. @@ -1521,15 +1475,23 @@ public: return 1; } - if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4 - utf16Folded.resize(nUtf16Mixed * 4 + 8); + unsigned int lenFlat = 0; + for (size_t mixIndex=0; mixIndex < nUtf16Mixed; mixIndex++) { + if ((lenFlat + 20) > utf16Folded.size()) + utf16Folded.resize(lenFlat + 60); + const char *foldedUTF8 = CaseConvert(utf16Mixed[mixIndex], CaseConversionFold); + if (foldedUTF8) { + // Maximum length of a case conversion is 6 bytes, 3 characters + wchar_t wFolded[20]; + unsigned int charsConverted = UTF16FromUTF8(foldedUTF8, + static_cast<unsigned int>(strlen(foldedUTF8)), + wFolded, sizeof(wFolded)/sizeof(wFolded[0])); + for (size_t j=0;j<charsConverted;j++) + utf16Folded[lenFlat++] = wFolded[j]; + } else { + utf16Folded[lenFlat++] = utf16Mixed[mixIndex]; + } } - int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, - LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, - &utf16Mixed[0], - static_cast<int>(nUtf16Mixed), - &utf16Folded[0], - static_cast<int>(utf16Folded.size())); size_t lenOut = ::WideCharToMultiByte(cp, 0, &utf16Folded[0], lenFlat, @@ -1550,7 +1512,7 @@ public: CaseFolder *ScintillaWin::CaseFolderForEncoding() { UINT cpDest = CodePageOfDocument(); if (cpDest == SC_CP_UTF8) { - return new CaseFolderUTF8(); + return new CaseFolderUnicode(); } else { if (pdoc->dbcsCodePage == 0) { CaseFolderTable *pcf = new CaseFolderTable(); @@ -1564,16 +1526,21 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() { unsigned int lengthUTF16 = ::MultiByteToWideChar(cpDoc, 0, sCharacter, 1, wCharacter, sizeof(wCharacter)/sizeof(wCharacter[0])); if (lengthUTF16 == 1) { - wchar_t wLower[20]; - int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, - LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE, - wCharacter, lengthUTF16, wLower, sizeof(wLower)/sizeof(wLower[0])); - char sCharacterLowered[20]; - unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, - wLower, charsConverted, - sCharacterLowered, sizeof(sCharacterLowered), NULL, 0); - if ((lengthConverted == 1) && (sCharacter[0] != sCharacterLowered[0])) { - pcf->SetTranslation(sCharacter[0], sCharacterLowered[0]); + const char *caseFolded = CaseConvert(wCharacter[0], CaseConversionFold); + if (caseFolded) { + wchar_t wLower[20]; + unsigned int charsConverted = UTF16FromUTF8(caseFolded, + static_cast<unsigned int>(strlen(caseFolded)), + wLower, sizeof(wLower)/sizeof(wLower[0])); + if (charsConverted == 1) { + char sCharacterLowered[20]; + unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, + wLower, charsConverted, + sCharacterLowered, sizeof(sCharacterLowered), NULL, 0); + if ((lengthConverted == 1) && (sCharacter[0] != sCharacterLowered[0])) { + pcf->SetTranslation(sCharacter[0], sCharacterLowered[0]); + } + } } } } @@ -1585,13 +1552,17 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() { } std::string ScintillaWin::CaseMapString(const std::string &s, int caseMapping) { - if (s.size() == 0) - return std::string(); - - if (caseMapping == cmSame) + if ((s.size() == 0) || (caseMapping == cmSame)) return s; UINT cpDoc = CodePageOfDocument(); + if (cpDoc == SC_CP_UTF8) { + std::string retMapped(s.length() * maxExpansionCaseConversion, 0); + size_t lenMapped = CaseConvertString(&retMapped[0], retMapped.length(), s.c_str(), s.length(), + (caseMapping == cmUpper) ? CaseConversionUpper : CaseConversionLower); + retMapped.resize(lenMapped); + return retMapped; + } unsigned int lengthUTF16 = ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), NULL, 0); @@ -1601,63 +1572,27 @@ std::string ScintillaWin::CaseMapString(const std::string &s, int caseMapping) { DWORD mapFlags = LCMAP_LINGUISTIC_CASING | ((caseMapping == cmUpper) ? LCMAP_UPPERCASE : LCMAP_LOWERCASE); - // Many conversions performed by search function are short so optimize this case. - enum { shortSize=20 }; - - if (s.size() > shortSize) { - // Use dynamic allocations for long strings - - // Change text to UTF-16 - std::vector<wchar_t> vwcText(lengthUTF16); - ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), &vwcText[0], lengthUTF16); - - // Change case - int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, - &vwcText[0], lengthUTF16, NULL, 0); - std::vector<wchar_t> vwcConverted(charsConverted); - ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, - &vwcText[0], lengthUTF16, &vwcConverted[0], charsConverted); - - // Change back to document encoding - unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, - &vwcConverted[0], static_cast<int>(vwcConverted.size()), - NULL, 0, NULL, 0); - std::vector<char> vcConverted(lengthConverted); - ::WideCharToMultiByte(cpDoc, 0, - &vwcConverted[0], static_cast<int>(vwcConverted.size()), - &vcConverted[0], static_cast<int>(vcConverted.size()), NULL, 0); - - return std::string(&vcConverted[0], vcConverted.size()); - - } else { - // Use static allocations for short strings as much faster - // A factor of 15 for single character strings - - // Change text to UTF-16 - wchar_t vwcText[shortSize]; - ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), - vwcText, lengthUTF16); - - // Change case - int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, - vwcText, lengthUTF16, NULL, 0); - // Full mapping may produce up to 3 characters per input character - wchar_t vwcConverted[shortSize*3]; - ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, vwcText, lengthUTF16, - vwcConverted, charsConverted); - - // Change back to document encoding - unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, - vwcConverted, charsConverted, - NULL, 0, NULL, 0); - // Each UTF-16 code unit may need up to 3 bytes in UTF-8 - char vcConverted[shortSize * 3 * 3]; - ::WideCharToMultiByte(cpDoc, 0, - vwcConverted, charsConverted, - vcConverted, lengthConverted, NULL, 0); - - return std::string(vcConverted, lengthConverted); - } + // Change text to UTF-16 + std::vector<wchar_t> vwcText(lengthUTF16); + ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), &vwcText[0], lengthUTF16); + + // Change case + int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, + &vwcText[0], lengthUTF16, NULL, 0); + std::vector<wchar_t> vwcConverted(charsConverted); + ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, + &vwcText[0], lengthUTF16, &vwcConverted[0], charsConverted); + + // Change back to document encoding + unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0, + &vwcConverted[0], static_cast<int>(vwcConverted.size()), + NULL, 0, NULL, 0); + std::vector<char> vcConverted(lengthConverted); + ::WideCharToMultiByte(cpDoc, 0, + &vwcConverted[0], static_cast<int>(vwcConverted.size()), + &vcConverted[0], static_cast<int>(vcConverted.size()), NULL, 0); + + return std::string(&vcConverted[0], vcConverted.size()); } void ScintillaWin::Copy() { diff --git a/win32/makefile b/win32/makefile index efcb518cb..c9086969a 100644 --- a/win32/makefile +++ b/win32/makefile @@ -48,6 +48,8 @@ LEXOBJS:=$(addsuffix .o,$(basename $(notdir $(wildcard ../lexers/Lex*.cxx)))) BASEOBJS = \ AutoComplete.o \ CallTip.o \ + CaseConvert.o \ + CaseFolder.o \ CellBuffer.o \ CharacterCategory.o \ CharacterSet.o \ @@ -111,7 +113,7 @@ ScintillaBaseL.o: ScintillaBase.cxx Platform.h \ CharClassify.h Decoration.h Document.h \ Selection.h PositionCache.h Editor.h \ ScintillaBase.h LexAccessor.h Accessor.h \ - LexerModule.h Catalogue.h + LexerModule.h Catalogue.h CaseFolder.h ScintillaWinL.o: ScintillaWin.cxx Platform.h \ ILexer.h Scintilla.h SplitVector.h \ @@ -122,7 +124,8 @@ ScintillaWinL.o: ScintillaWin.cxx Platform.h \ Document.h Selection.h PositionCache.h \ Editor.h ScintillaBase.h UniConversion.h \ LexAccessor.h Accessor.h \ - LexerModule.h Catalogue.h + LexerModule.h Catalogue.h CaseConvert.h \ + CaseFolder.h ScintillaWinS.o: ScintillaWin.cxx Platform.h \ ILexer.h Scintilla.h SplitVector.h \ @@ -131,7 +134,8 @@ ScintillaWinS.o: ScintillaWin.cxx Platform.h \ XPM.h LineMarker.h Style.h AutoComplete.h \ ViewStyle.h CharClassify.h Decoration.h \ Document.h Selection.h PositionCache.h \ - Editor.h ScintillaBase.h UniConversion.h + Editor.h ScintillaBase.h UniConversion.h \ + CaseConvert.h CaseFolder.h ScintillaBaseL.o: $(CC) $(CXXFLAGS) -D SCI_LEXER -c $< -o $@ diff --git a/win32/scintilla.mak b/win32/scintilla.mak index 742c2d3c0..13ecd43cc 100644 --- a/win32/scintilla.mak +++ b/win32/scintilla.mak @@ -60,6 +60,8 @@ clean: SOBJS=\ $(DIR_O)\AutoComplete.obj \ $(DIR_O)\CallTip.obj \ + $(DIR_O)\CaseConvert.obj \ + $(DIR_O)\CaseFolder.obj \ $(DIR_O)\CellBuffer.obj \ $(DIR_O)\CharacterCategory.obj \ $(DIR_O)\CharacterSet.obj \ @@ -185,6 +187,8 @@ LOBJS=\ $(DIR_O)\Accessor.obj \ $(DIR_O)\AutoComplete.obj \ $(DIR_O)\CallTip.obj \ + $(DIR_O)\CaseConvert.obj \ + $(DIR_O)\CaseFolder.obj \ $(DIR_O)\Catalogue.obj \ $(DIR_O)\CellBuffer.obj \ $(DIR_O)\CharacterCategory.obj \ @@ -263,6 +267,10 @@ $(DIR_O)\AutoComplete.obj: ../src/AutoComplete.cxx ../include/Platform.h \ $(DIR_O)\Accessor.obj: ../lexlib/Accessor.cxx ../lexlib/Accessor.h $(DIR_O)\CallTip.obj: ../src/CallTip.cxx ../include/Platform.h \ ../include/Scintilla.h ../src/CallTip.h +$(DIR_O)\CaseConvert.obj: ../src/CaseConvert.cxx ../src/CaseConvert.h \ + ../src/UnicodeFromUTF8.h ../src/UniConversion.h +$(DIR_O)\CaseFolder.obj: ../src/CaseFolder.cxx ../src/CaseFolder.h \ + ../src/CaseConvert.h ../src/UniConversion.h $(DIR_O)\CellBuffer.obj: ../src/CellBuffer.cxx ../include/Platform.h \ ../include/Scintilla.h ../src/SplitVector.h \ ../src/Partitioning.h ../src/CellBuffer.h @@ -279,13 +287,14 @@ $(DIR_O)\Document.obj: ../src/Document.cxx ../include/Platform.h \ ../include/Scintilla.h ../src/SplitVector.h \ ../src/Partitioning.h ../src/RunStyles.h ../src/CellBuffer.h \ ../src/CharClassify.h ../src/Decoration.h ../src/Document.h \ - ../src/RESearch.h ../src/PerLine.h + ../src/RESearch.h ../src/PerLine.h ../src/CaseFolder.h $(DIR_O)\Editor.obj: ../src/Editor.cxx ../include/Platform.h ../include/Scintilla.h \ ../src/ContractionState.h ../src/SplitVector.h \ ../src/Partitioning.h ../src/CellBuffer.h ../src/KeyMap.h \ ../src/RunStyles.h ../src/Indicator.h ../src/XPM.h ../src/LineMarker.h \ ../src/Style.h ../src/ViewStyle.h ../src/CharClassify.h \ - ../src/Decoration.h ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/PositionCache.h + ../src/Decoration.h ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/PositionCache.h \ + ../src/CaseFolder.h $(DIR_O)\ExternalLexer.obj: ../src/ExternalLexer.cxx ../include/Platform.h \ ../include/Scintilla.h ../include/SciLexer.h \ ../lexlib/Accessor.h ../src/ExternalLexer.h @@ -495,7 +504,8 @@ $(DIR_O)\PositionCache.obj: ../src/PositionCache.cxx ../include/Platform.h ../in ../src/Partitioning.h ../src/CellBuffer.h ../src/KeyMap.h \ ../src/RunStyles.h ../src/Indicator.h ../src/XPM.h ../src/LineMarker.h \ ../src/Style.h ../src/ViewStyle.h ../src/CharClassify.h \ - ../src/Decoration.h ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/PositionCache.h + ../src/Decoration.h ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/PositionCache.h \ + ../src/CaseFolder.h $(DIR_O)\PropSetSimple.obj: ../lexlib/PropSetSimple.cxx ../include/Platform.h $(DIR_O)\RESearch.obj: ../src/RESearch.cxx ../src/CharClassify.h ../src/RESearch.h $(DIR_O)\RunStyles.obj: ../src/RunStyles.cxx ../include/Platform.h \ @@ -508,7 +518,8 @@ $(DIR_O)\ScintillaBase.obj: ../src/ScintillaBase.cxx ../include/Platform.h \ ../src/CallTip.h ../src/KeyMap.h ../src/Indicator.h ../src/XPM.h \ ../src/LineMarker.h ../src/Style.h ../src/ViewStyle.h \ ../src/AutoComplete.h ../src/CharClassify.h ../src/Decoration.h \ - ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/ScintillaBase.h + ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/ScintillaBase.h \ + ../src/CaseFolder.h $(DIR_O)\ScintillaBaseL.obj: ../src/ScintillaBase.cxx ../include/Platform.h \ ../include/Scintilla.h \ ../src/ContractionState.h ../src/SplitVector.h \ @@ -516,7 +527,8 @@ $(DIR_O)\ScintillaBaseL.obj: ../src/ScintillaBase.cxx ../include/Platform.h \ ../src/CallTip.h ../src/KeyMap.h ../src/Indicator.h ../src/XPM.h \ ../src/LineMarker.h ../src/Style.h ../src/ViewStyle.h \ ../src/AutoComplete.h ../src/CharClassify.h ../src/Decoration.h \ - ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/ScintillaBase.h + ../src/Document.h ../src/Editor.h ../src/Selection.h ../src/ScintillaBase.h \ + ../src/CaseFolder.h $(DIR_O)\ScintillaWin.obj: ScintillaWin.cxx ../include/Platform.h \ ../include/Scintilla.h ../src/ContractionState.h \ ../src/SplitVector.h ../src/Partitioning.h \ @@ -524,7 +536,8 @@ $(DIR_O)\ScintillaWin.obj: ScintillaWin.cxx ../include/Platform.h \ ../src/Indicator.h ../src/XPM.h ../src/LineMarker.h ../src/Style.h \ ../src/AutoComplete.h ../src/ViewStyle.h ../src/CharClassify.h \ ../src/Decoration.h ../src/Document.h ../src/Editor.h \ - ../src/ScintillaBase.h ../src/Selection.h ../src/UniConversion.h + ../src/ScintillaBase.h ../src/Selection.h ../src/UniConversion.h \ + ../src/CaseConvert.h ../src/CaseFolder.h $(DIR_O)\ScintillaWinS.obj: ScintillaWin.cxx ../include/Platform.h \ ../include/Scintilla.h ../src/ContractionState.h \ ../src/SplitVector.h ../src/Partitioning.h \ |