diff options
author | Zufu Liu <unknown> | 2022-12-02 09:02:19 +1100 |
---|---|---|
committer | Zufu Liu <unknown> | 2022-12-02 09:02:19 +1100 |
commit | 312b3bc50a7d8de832b1a8681644ba4681a9a598 (patch) | |
tree | 56960dd14cc57cc09e33dd74d7361572a85ce0f1 /src/CaseConvert.cxx | |
parent | 9a400bd33f0cb4f322de4208bb7159673a399521 (diff) | |
download | scintilla-mirror-312b3bc50a7d8de832b1a8681644ba4681a9a598.tar.gz |
Feature [feature-requests:#1458] Move code into member functions, use
string_view and constexpr.
Diffstat (limited to 'src/CaseConvert.cxx')
-rw-r--r-- | src/CaseConvert.cxx | 182 |
1 files changed, 76 insertions, 106 deletions
diff --git a/src/CaseConvert.cxx b/src/CaseConvert.cxx index 5beffbcc2..601432f14 100644 --- a/src/CaseConvert.cxx +++ b/src/CaseConvert.cxx @@ -31,7 +31,7 @@ namespace { // Another pattern (pitch==2) is where each lower case letter is preceded by // the upper case form. These are also grouped into ranges. -int symmetricCaseConversionRanges[] = { +constexpr int symmetricCaseConversionRanges[] = { //lower, upper, range length, range pitch //++Autogenerated -- start of section automatically generated //**\(\*\n\) @@ -94,7 +94,7 @@ int symmetricCaseConversionRanges[] = { // Code points that are symmetric but don't fit into a range of similar characters // are listed here. -int symmetricCaseConversions[] = { +constexpr int symmetricCaseConversions[] = { //lower, upper //++Autogenerated -- start of section automatically generated //**1 \(\*\n\) @@ -262,7 +262,7 @@ int symmetricCaseConversions[] = { // folding is different to lowering, or (as appropriate) upper(lower(x)) != x or // lower(upper(x)) != x. -const char *complexCaseConversions = +constexpr std::string_view complexCaseConversions = // Original | Folded | Upper | Lower | //++Autogenerated -- start of section automatically generated //**2 \(\*\n\) @@ -577,22 +577,20 @@ const char *complexCaseConversions = //--Autogenerated -- end of section automatically generated ; +// Maximum length of a case conversion result is 6 bytes in UTF-8 +constexpr size_t maxConversionLength = 6; + class CaseConverter : public ICaseConverter { - // Maximum length of a case conversion result is 6 bytes in UTF-8 - enum { maxConversionLength=6 }; struct ConversionString { - char conversion[maxConversionLength+1]; - ConversionString() noexcept : conversion{} { - } + char conversion[maxConversionLength+1]{}; }; // Conversions are initially store in a vector of structs but then decomposed into // parallel arrays as that is about 10% faster to search. struct CharacterConversion { - int character; + int character = 0; ConversionString conversion; - CharacterConversion() noexcept : character(0) { - // Empty case: NUL -> "". - } + // Empty case: NUL -> "". + CharacterConversion() noexcept = default; CharacterConversion(int character_, std::string_view conversion_) noexcept : character(character_) { assert(conversion_.length() <= maxConversionLength); try { @@ -607,6 +605,7 @@ class CaseConverter : public ICaseConverter { return character < other.character; } }; + CaseConversion conversion; typedef std::vector<CharacterConversion> CharacterToConversion; CharacterToConversion characterToConversion; // The parallel arrays @@ -614,7 +613,8 @@ class CaseConverter : public ICaseConverter { std::vector<ConversionString> conversions; public: - CaseConverter() = default; + explicit CaseConverter(CaseConversion conversion_) : conversion(conversion_) { + }; // Deleted so CaseConverter objects can not be copied. CaseConverter(const CaseConverter &) = delete; CaseConverter(CaseConverter &&) = delete; @@ -624,8 +624,8 @@ public: bool Initialised() const noexcept { return !characters.empty(); } - void Add(int character, const char *conversion) { - characterToConversion.emplace_back(character, conversion); + void Add(int character, std::string_view conversion_) { + characterToConversion.emplace_back(character, conversion_); } const char *Find(int character) { const std::vector<int>::iterator it = std::lower_bound(characters.begin(), characters.end(), character); @@ -690,32 +690,37 @@ public: // Empty the original calculated data completely CharacterToConversion().swap(characterToConversion); } + void AddSymmetric(int lower, int upper); + void SetupConversions(); }; -CaseConverter caseConvFold; -CaseConverter caseConvUp; -CaseConverter caseConvLow; +CaseConverter caseConvFold(CaseConversion::fold); +CaseConverter caseConvUp(CaseConversion::upper); +CaseConverter caseConvLow(CaseConversion::lower); -void AddSymmetric(CaseConversion conversion, int lower,int upper) { - char lowerUTF8[UTF8MaxBytes+1]; - UTF8FromUTF32Character(lower, lowerUTF8); - char upperUTF8[UTF8MaxBytes+1]; - UTF8FromUTF32Character(upper, upperUTF8); +void CaseConverter::AddSymmetric(int lower, int upper) { + const int character = (conversion == CaseConversion::upper) ? lower : upper; + const int source = (conversion == CaseConversion::upper) ? upper : lower; + char converted[maxConversionLength+1]{}; + UTF8FromUTF32Character(source, converted); + Add(character, converted); +} - switch (conversion) { - case CaseConversion::fold: - caseConvFold.Add(upper, lowerUTF8); - break; - case CaseConversion::upper: - caseConvUp.Add(lower, upperUTF8); - break; - case CaseConversion::lower: - caseConvLow.Add(upper, lowerUTF8); - break; +// Return the next '|' separated field and remove from view. +std::string_view NextField(std::string_view &view) { + const size_t separatorPosition = view.find_first_of('|'); + const std::string_view field = view.substr(0, separatorPosition); + if (separatorPosition == std::string_view::npos) { + // Reached the end so empty the view + view.remove_prefix(view.length()); + } else { + // Remove the '|' from the view as well as the field + view.remove_prefix(separatorPosition + 1); } + return field; } -void SetupConversions(CaseConversion conversion) { +void CaseConverter::SetupConversions() { // First initialize for the symmetric ranges for (size_t i=0; i<std::size(symmetricCaseConversionRanges);) { const int lower = symmetricCaseConversionRanges[i++]; @@ -723,91 +728,63 @@ void SetupConversions(CaseConversion conversion) { const int length = symmetricCaseConversionRanges[i++]; const int pitch = symmetricCaseConversionRanges[i++]; for (int j=0; j<length*pitch; j+=pitch) { - AddSymmetric(conversion, lower+j, upper+j); + AddSymmetric(lower+j, upper+j); } } // Add the symmetric singletons for (size_t i=0; i<std::size(symmetricCaseConversions);) { const int lower = symmetricCaseConversions[i++]; const int upper = symmetricCaseConversions[i++]; - AddSymmetric(conversion, lower, upper); + AddSymmetric(lower, upper); } // Add the complex cases - const char *sComplex = complexCaseConversions; - while (*sComplex) { - // Longest ligature is 3 character so 5 for safety - constexpr size_t lenUTF8 = 5*UTF8MaxBytes+1; - unsigned char originUTF8[lenUTF8]{}; - char foldedUTF8[lenUTF8]{}; - char lowerUTF8[lenUTF8]{}; - char upperUTF8[lenUTF8]{}; - size_t i = 0; - while (*sComplex && *sComplex != '|') { - originUTF8[i++] = *sComplex; - sComplex++; - } - sComplex++; - originUTF8[i] = 0; - i = 0; - while (*sComplex && *sComplex != '|') { - foldedUTF8[i++] = *sComplex; - sComplex++; - } - sComplex++; - foldedUTF8[i] = 0; - i = 0; - while (*sComplex && *sComplex != '|') { - upperUTF8[i++] = *sComplex; - sComplex++; - } - sComplex++; - upperUTF8[i] = 0; - i = 0; - while (*sComplex && *sComplex != '|') { - lowerUTF8[i++] = *sComplex; - sComplex++; - } - sComplex++; - lowerUTF8[i] = 0; - - const int character = UnicodeFromUTF8(originUTF8); - - if (conversion == CaseConversion::fold && foldedUTF8[0]) { - caseConvFold.Add(character, foldedUTF8); - } - - if (conversion == CaseConversion::upper && upperUTF8[0]) { - caseConvUp.Add(character, upperUTF8); + std::string_view sComplex = complexCaseConversions; + while (!sComplex.empty()) { + const std::string_view originUTF8 = NextField(sComplex); + const std::string_view foldedUTF8 = NextField(sComplex); + const std::string_view upperUTF8 = NextField(sComplex); + const std::string_view lowerUTF8 = NextField(sComplex); + + std::string_view converted; + switch (conversion) { + case CaseConversion::fold: + converted = foldedUTF8; + break; + case CaseConversion::upper: + converted = upperUTF8; + break; + case CaseConversion::lower: + default: + converted = lowerUTF8; + break; } - - if (conversion == CaseConversion::lower && lowerUTF8[0]) { - caseConvLow.Add(character, lowerUTF8); + if (!converted.empty()) { + const int character = UnicodeFromUTF8(reinterpret_cast<const unsigned char *>(originUTF8.data())); + Add(character, converted); } } + FinishedAdding(); +} + +CaseConverter *ConverterForConversion(CaseConversion conversion) { + CaseConverter *pCaseConv = &caseConvFold; switch (conversion) { case CaseConversion::fold: - caseConvFold.FinishedAdding(); + pCaseConv = &caseConvFold; break; case CaseConversion::upper: - caseConvUp.FinishedAdding(); + pCaseConv = &caseConvUp; break; case CaseConversion::lower: - caseConvLow.FinishedAdding(); + default: + pCaseConv = &caseConvLow; break; } -} - -CaseConverter *ConverterForConversion(CaseConversion conversion) noexcept { - switch (conversion) { - case CaseConversion::fold: - return &caseConvFold; - case CaseConversion::upper: - return &caseConvUp; - case CaseConversion::lower: - return &caseConvLow; + if (!pCaseConv->Initialised()) { + pCaseConv->SetupConversions(); } - return nullptr; + return pCaseConv; } } @@ -815,23 +792,16 @@ CaseConverter *ConverterForConversion(CaseConversion conversion) noexcept { namespace Scintilla::Internal { ICaseConverter *ConverterFor(CaseConversion conversion) { - CaseConverter *pCaseConv = ConverterForConversion(conversion); - if (!pCaseConv->Initialised()) - SetupConversions(conversion); - return pCaseConv; + return ConverterForConversion(conversion); } const char *CaseConvert(int character, CaseConversion conversion) { CaseConverter *pCaseConv = ConverterForConversion(conversion); - if (!pCaseConv->Initialised()) - SetupConversions(conversion); return pCaseConv->Find(character); } size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed, CaseConversion conversion) { CaseConverter *pCaseConv = ConverterForConversion(conversion); - if (!pCaseConv->Initialised()) - SetupConversions(conversion); return pCaseConv->CaseConvertString(converted, sizeConverted, mixed, lenMixed); } |