aboutsummaryrefslogtreecommitdiffhomepage
path: root/win32/ScintillaWin.cxx
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2013-07-11 10:43:40 +1000
committerNeil <nyamatongwe@gmail.com>2013-07-11 10:43:40 +1000
commit5262f832018923ecc8ccd25bedecbd5a291a563f (patch)
treed14831d0e08cd988dd8425674a29d514326c99f4 /win32/ScintillaWin.cxx
parent8ee42f850d492efbec73969eca52243e1acd1c96 (diff)
downloadscintilla-mirror-5262f832018923ecc8ccd25bedecbd5a291a563f.tar.gz
Include case conversion data in Scintilla so that all platforms will perform
case conversion of Unicode text in accordance with Unicode.
Diffstat (limited to 'win32/ScintillaWin.cxx')
-rw-r--r--win32/ScintillaWin.cxx193
1 files changed, 64 insertions, 129 deletions
diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx
index 8c08d6ea1..cf5750cde 100644
--- a/win32/ScintillaWin.cxx
+++ b/win32/ScintillaWin.cxx
@@ -60,12 +60,15 @@
#include "ViewStyle.h"
#include "CharClassify.h"
#include "Decoration.h"
+#include "CaseFolder.h"
#include "Document.h"
#include "Selection.h"
#include "PositionCache.h"
#include "Editor.h"
#include "ScintillaBase.h"
#include "UniConversion.h"
+#include "CaseConvert.h"
+
#include "PlatWin.h"
#ifdef SCI_LEXER
@@ -1443,55 +1446,6 @@ void ScintillaWin::NotifyDoubleClick(Point pt, bool shift, bool ctrl, bool alt)
MAKELPARAM(pt.x, pt.y));
}
-class CaseFolderUTF8 : public CaseFolderTable {
- // Allocate the expandable storage here so that it does not need to be reallocated
- // for each call to Fold.
- std::vector<wchar_t> utf16Mixed;
- std::vector<wchar_t> utf16Folded;
-public:
- CaseFolderUTF8() {
- StandardASCII();
- }
- virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
- if ((lenMixed == 1) && (sizeFolded > 0)) {
- folded[0] = mapping[static_cast<unsigned char>(mixed[0])];
- return 1;
- } else {
- if (lenMixed > utf16Mixed.size()) {
- utf16Mixed.resize(lenMixed + 8);
- }
- size_t nUtf16Mixed = ::MultiByteToWideChar(65001, 0, mixed,
- static_cast<int>(lenMixed),
- &utf16Mixed[0],
- static_cast<int>(utf16Mixed.size()));
-
- if (nUtf16Mixed == 0) {
- // Failed to convert -> bad UTF-8
- folded[0] = '\0';
- return 1;
- }
-
- if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4
- utf16Folded.resize(nUtf16Mixed * 4 + 8);
- }
- int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT,
- LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE,
- &utf16Mixed[0],
- static_cast<int>(nUtf16Mixed),
- &utf16Folded[0],
- static_cast<int>(utf16Folded.size()));
-
- size_t lenOut = UTF8Length(&utf16Folded[0], lenFlat);
- if (lenOut < sizeFolded) {
- UTF8FromUTF16(&utf16Folded[0], lenFlat, folded, static_cast<int>(lenOut));
- return lenOut;
- } else {
- return 0;
- }
- }
- }
-};
-
class CaseFolderDBCS : public CaseFolderTable {
// Allocate the expandable storage here so that it does not need to be reallocated
// for each call to Fold.
@@ -1521,15 +1475,23 @@ public:
return 1;
}
- if (nUtf16Mixed * 4 > utf16Folded.size()) { // Maximum folding expansion factor of 4
- utf16Folded.resize(nUtf16Mixed * 4 + 8);
+ unsigned int lenFlat = 0;
+ for (size_t mixIndex=0; mixIndex < nUtf16Mixed; mixIndex++) {
+ if ((lenFlat + 20) > utf16Folded.size())
+ utf16Folded.resize(lenFlat + 60);
+ const char *foldedUTF8 = CaseConvert(utf16Mixed[mixIndex], CaseConversionFold);
+ if (foldedUTF8) {
+ // Maximum length of a case conversion is 6 bytes, 3 characters
+ wchar_t wFolded[20];
+ unsigned int charsConverted = UTF16FromUTF8(foldedUTF8,
+ static_cast<unsigned int>(strlen(foldedUTF8)),
+ wFolded, sizeof(wFolded)/sizeof(wFolded[0]));
+ for (size_t j=0;j<charsConverted;j++)
+ utf16Folded[lenFlat++] = wFolded[j];
+ } else {
+ utf16Folded[lenFlat++] = utf16Mixed[mixIndex];
+ }
}
- int lenFlat = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT,
- LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE,
- &utf16Mixed[0],
- static_cast<int>(nUtf16Mixed),
- &utf16Folded[0],
- static_cast<int>(utf16Folded.size()));
size_t lenOut = ::WideCharToMultiByte(cp, 0,
&utf16Folded[0], lenFlat,
@@ -1550,7 +1512,7 @@ public:
CaseFolder *ScintillaWin::CaseFolderForEncoding() {
UINT cpDest = CodePageOfDocument();
if (cpDest == SC_CP_UTF8) {
- return new CaseFolderUTF8();
+ return new CaseFolderUnicode();
} else {
if (pdoc->dbcsCodePage == 0) {
CaseFolderTable *pcf = new CaseFolderTable();
@@ -1564,16 +1526,21 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() {
unsigned int lengthUTF16 = ::MultiByteToWideChar(cpDoc, 0, sCharacter, 1,
wCharacter, sizeof(wCharacter)/sizeof(wCharacter[0]));
if (lengthUTF16 == 1) {
- wchar_t wLower[20];
- int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT,
- LCMAP_LINGUISTIC_CASING | LCMAP_LOWERCASE,
- wCharacter, lengthUTF16, wLower, sizeof(wLower)/sizeof(wLower[0]));
- char sCharacterLowered[20];
- unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0,
- wLower, charsConverted,
- sCharacterLowered, sizeof(sCharacterLowered), NULL, 0);
- if ((lengthConverted == 1) && (sCharacter[0] != sCharacterLowered[0])) {
- pcf->SetTranslation(sCharacter[0], sCharacterLowered[0]);
+ const char *caseFolded = CaseConvert(wCharacter[0], CaseConversionFold);
+ if (caseFolded) {
+ wchar_t wLower[20];
+ unsigned int charsConverted = UTF16FromUTF8(caseFolded,
+ static_cast<unsigned int>(strlen(caseFolded)),
+ wLower, sizeof(wLower)/sizeof(wLower[0]));
+ if (charsConverted == 1) {
+ char sCharacterLowered[20];
+ unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0,
+ wLower, charsConverted,
+ sCharacterLowered, sizeof(sCharacterLowered), NULL, 0);
+ if ((lengthConverted == 1) && (sCharacter[0] != sCharacterLowered[0])) {
+ pcf->SetTranslation(sCharacter[0], sCharacterLowered[0]);
+ }
+ }
}
}
}
@@ -1585,13 +1552,17 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() {
}
std::string ScintillaWin::CaseMapString(const std::string &s, int caseMapping) {
- if (s.size() == 0)
- return std::string();
-
- if (caseMapping == cmSame)
+ if ((s.size() == 0) || (caseMapping == cmSame))
return s;
UINT cpDoc = CodePageOfDocument();
+ if (cpDoc == SC_CP_UTF8) {
+ std::string retMapped(s.length() * maxExpansionCaseConversion, 0);
+ size_t lenMapped = CaseConvertString(&retMapped[0], retMapped.length(), s.c_str(), s.length(),
+ (caseMapping == cmUpper) ? CaseConversionUpper : CaseConversionLower);
+ retMapped.resize(lenMapped);
+ return retMapped;
+ }
unsigned int lengthUTF16 = ::MultiByteToWideChar(cpDoc, 0, s.c_str(),
static_cast<int>(s.size()), NULL, 0);
@@ -1601,63 +1572,27 @@ std::string ScintillaWin::CaseMapString(const std::string &s, int caseMapping) {
DWORD mapFlags = LCMAP_LINGUISTIC_CASING |
((caseMapping == cmUpper) ? LCMAP_UPPERCASE : LCMAP_LOWERCASE);
- // Many conversions performed by search function are short so optimize this case.
- enum { shortSize=20 };
-
- if (s.size() > shortSize) {
- // Use dynamic allocations for long strings
-
- // Change text to UTF-16
- std::vector<wchar_t> vwcText(lengthUTF16);
- ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), &vwcText[0], lengthUTF16);
-
- // Change case
- int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags,
- &vwcText[0], lengthUTF16, NULL, 0);
- std::vector<wchar_t> vwcConverted(charsConverted);
- ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags,
- &vwcText[0], lengthUTF16, &vwcConverted[0], charsConverted);
-
- // Change back to document encoding
- unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0,
- &vwcConverted[0], static_cast<int>(vwcConverted.size()),
- NULL, 0, NULL, 0);
- std::vector<char> vcConverted(lengthConverted);
- ::WideCharToMultiByte(cpDoc, 0,
- &vwcConverted[0], static_cast<int>(vwcConverted.size()),
- &vcConverted[0], static_cast<int>(vcConverted.size()), NULL, 0);
-
- return std::string(&vcConverted[0], vcConverted.size());
-
- } else {
- // Use static allocations for short strings as much faster
- // A factor of 15 for single character strings
-
- // Change text to UTF-16
- wchar_t vwcText[shortSize];
- ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()),
- vwcText, lengthUTF16);
-
- // Change case
- int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags,
- vwcText, lengthUTF16, NULL, 0);
- // Full mapping may produce up to 3 characters per input character
- wchar_t vwcConverted[shortSize*3];
- ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags, vwcText, lengthUTF16,
- vwcConverted, charsConverted);
-
- // Change back to document encoding
- unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0,
- vwcConverted, charsConverted,
- NULL, 0, NULL, 0);
- // Each UTF-16 code unit may need up to 3 bytes in UTF-8
- char vcConverted[shortSize * 3 * 3];
- ::WideCharToMultiByte(cpDoc, 0,
- vwcConverted, charsConverted,
- vcConverted, lengthConverted, NULL, 0);
-
- return std::string(vcConverted, lengthConverted);
- }
+ // Change text to UTF-16
+ std::vector<wchar_t> vwcText(lengthUTF16);
+ ::MultiByteToWideChar(cpDoc, 0, s.c_str(), static_cast<int>(s.size()), &vwcText[0], lengthUTF16);
+
+ // Change case
+ int charsConverted = ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags,
+ &vwcText[0], lengthUTF16, NULL, 0);
+ std::vector<wchar_t> vwcConverted(charsConverted);
+ ::LCMapStringW(LOCALE_SYSTEM_DEFAULT, mapFlags,
+ &vwcText[0], lengthUTF16, &vwcConverted[0], charsConverted);
+
+ // Change back to document encoding
+ unsigned int lengthConverted = ::WideCharToMultiByte(cpDoc, 0,
+ &vwcConverted[0], static_cast<int>(vwcConverted.size()),
+ NULL, 0, NULL, 0);
+ std::vector<char> vcConverted(lengthConverted);
+ ::WideCharToMultiByte(cpDoc, 0,
+ &vwcConverted[0], static_cast<int>(vwcConverted.size()),
+ &vcConverted[0], static_cast<int>(vcConverted.size()), NULL, 0);
+
+ return std::string(&vcConverted[0], vcConverted.size());
}
void ScintillaWin::Copy() {