aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/CaseConvert.cxx
diff options
context:
space:
mode:
authorZufu Liu <unknown>2022-12-02 09:02:19 +1100
committerZufu Liu <unknown>2022-12-02 09:02:19 +1100
commit312b3bc50a7d8de832b1a8681644ba4681a9a598 (patch)
tree56960dd14cc57cc09e33dd74d7361572a85ce0f1 /src/CaseConvert.cxx
parent9a400bd33f0cb4f322de4208bb7159673a399521 (diff)
downloadscintilla-mirror-312b3bc50a7d8de832b1a8681644ba4681a9a598.tar.gz
Feature [feature-requests:#1458] Move code into member functions, use
string_view and constexpr.
Diffstat (limited to 'src/CaseConvert.cxx')
-rw-r--r--src/CaseConvert.cxx182
1 files changed, 76 insertions, 106 deletions
diff --git a/src/CaseConvert.cxx b/src/CaseConvert.cxx
index 5beffbcc2..601432f14 100644
--- a/src/CaseConvert.cxx
+++ b/src/CaseConvert.cxx
@@ -31,7 +31,7 @@ namespace {
// Another pattern (pitch==2) is where each lower case letter is preceded by
// the upper case form. These are also grouped into ranges.
-int symmetricCaseConversionRanges[] = {
+constexpr int symmetricCaseConversionRanges[] = {
//lower, upper, range length, range pitch
//++Autogenerated -- start of section automatically generated
//**\(\*\n\)
@@ -94,7 +94,7 @@ int symmetricCaseConversionRanges[] = {
// Code points that are symmetric but don't fit into a range of similar characters
// are listed here.
-int symmetricCaseConversions[] = {
+constexpr int symmetricCaseConversions[] = {
//lower, upper
//++Autogenerated -- start of section automatically generated
//**1 \(\*\n\)
@@ -262,7 +262,7 @@ int symmetricCaseConversions[] = {
// folding is different to lowering, or (as appropriate) upper(lower(x)) != x or
// lower(upper(x)) != x.
-const char *complexCaseConversions =
+constexpr std::string_view complexCaseConversions =
// Original | Folded | Upper | Lower |
//++Autogenerated -- start of section automatically generated
//**2 \(\*\n\)
@@ -577,22 +577,20 @@ const char *complexCaseConversions =
//--Autogenerated -- end of section automatically generated
;
+// Maximum length of a case conversion result is 6 bytes in UTF-8
+constexpr size_t maxConversionLength = 6;
+
class CaseConverter : public ICaseConverter {
- // Maximum length of a case conversion result is 6 bytes in UTF-8
- enum { maxConversionLength=6 };
struct ConversionString {
- char conversion[maxConversionLength+1];
- ConversionString() noexcept : conversion{} {
- }
+ char conversion[maxConversionLength+1]{};
};
// Conversions are initially store in a vector of structs but then decomposed into
// parallel arrays as that is about 10% faster to search.
struct CharacterConversion {
- int character;
+ int character = 0;
ConversionString conversion;
- CharacterConversion() noexcept : character(0) {
- // Empty case: NUL -> "".
- }
+ // Empty case: NUL -> "".
+ CharacterConversion() noexcept = default;
CharacterConversion(int character_, std::string_view conversion_) noexcept : character(character_) {
assert(conversion_.length() <= maxConversionLength);
try {
@@ -607,6 +605,7 @@ class CaseConverter : public ICaseConverter {
return character < other.character;
}
};
+ CaseConversion conversion;
typedef std::vector<CharacterConversion> CharacterToConversion;
CharacterToConversion characterToConversion;
// The parallel arrays
@@ -614,7 +613,8 @@ class CaseConverter : public ICaseConverter {
std::vector<ConversionString> conversions;
public:
- CaseConverter() = default;
+ explicit CaseConverter(CaseConversion conversion_) : conversion(conversion_) {
+ };
// Deleted so CaseConverter objects can not be copied.
CaseConverter(const CaseConverter &) = delete;
CaseConverter(CaseConverter &&) = delete;
@@ -624,8 +624,8 @@ public:
bool Initialised() const noexcept {
return !characters.empty();
}
- void Add(int character, const char *conversion) {
- characterToConversion.emplace_back(character, conversion);
+ void Add(int character, std::string_view conversion_) {
+ characterToConversion.emplace_back(character, conversion_);
}
const char *Find(int character) {
const std::vector<int>::iterator it = std::lower_bound(characters.begin(), characters.end(), character);
@@ -690,32 +690,37 @@ public:
// Empty the original calculated data completely
CharacterToConversion().swap(characterToConversion);
}
+ void AddSymmetric(int lower, int upper);
+ void SetupConversions();
};
-CaseConverter caseConvFold;
-CaseConverter caseConvUp;
-CaseConverter caseConvLow;
+CaseConverter caseConvFold(CaseConversion::fold);
+CaseConverter caseConvUp(CaseConversion::upper);
+CaseConverter caseConvLow(CaseConversion::lower);
-void AddSymmetric(CaseConversion conversion, int lower,int upper) {
- char lowerUTF8[UTF8MaxBytes+1];
- UTF8FromUTF32Character(lower, lowerUTF8);
- char upperUTF8[UTF8MaxBytes+1];
- UTF8FromUTF32Character(upper, upperUTF8);
+void CaseConverter::AddSymmetric(int lower, int upper) {
+ const int character = (conversion == CaseConversion::upper) ? lower : upper;
+ const int source = (conversion == CaseConversion::upper) ? upper : lower;
+ char converted[maxConversionLength+1]{};
+ UTF8FromUTF32Character(source, converted);
+ Add(character, converted);
+}
- switch (conversion) {
- case CaseConversion::fold:
- caseConvFold.Add(upper, lowerUTF8);
- break;
- case CaseConversion::upper:
- caseConvUp.Add(lower, upperUTF8);
- break;
- case CaseConversion::lower:
- caseConvLow.Add(upper, lowerUTF8);
- break;
+// Return the next '|' separated field and remove from view.
+std::string_view NextField(std::string_view &view) {
+ const size_t separatorPosition = view.find_first_of('|');
+ const std::string_view field = view.substr(0, separatorPosition);
+ if (separatorPosition == std::string_view::npos) {
+ // Reached the end so empty the view
+ view.remove_prefix(view.length());
+ } else {
+ // Remove the '|' from the view as well as the field
+ view.remove_prefix(separatorPosition + 1);
}
+ return field;
}
-void SetupConversions(CaseConversion conversion) {
+void CaseConverter::SetupConversions() {
// First initialize for the symmetric ranges
for (size_t i=0; i<std::size(symmetricCaseConversionRanges);) {
const int lower = symmetricCaseConversionRanges[i++];
@@ -723,91 +728,63 @@ void SetupConversions(CaseConversion conversion) {
const int length = symmetricCaseConversionRanges[i++];
const int pitch = symmetricCaseConversionRanges[i++];
for (int j=0; j<length*pitch; j+=pitch) {
- AddSymmetric(conversion, lower+j, upper+j);
+ AddSymmetric(lower+j, upper+j);
}
}
// Add the symmetric singletons
for (size_t i=0; i<std::size(symmetricCaseConversions);) {
const int lower = symmetricCaseConversions[i++];
const int upper = symmetricCaseConversions[i++];
- AddSymmetric(conversion, lower, upper);
+ AddSymmetric(lower, upper);
}
// Add the complex cases
- const char *sComplex = complexCaseConversions;
- while (*sComplex) {
- // Longest ligature is 3 character so 5 for safety
- constexpr size_t lenUTF8 = 5*UTF8MaxBytes+1;
- unsigned char originUTF8[lenUTF8]{};
- char foldedUTF8[lenUTF8]{};
- char lowerUTF8[lenUTF8]{};
- char upperUTF8[lenUTF8]{};
- size_t i = 0;
- while (*sComplex && *sComplex != '|') {
- originUTF8[i++] = *sComplex;
- sComplex++;
- }
- sComplex++;
- originUTF8[i] = 0;
- i = 0;
- while (*sComplex && *sComplex != '|') {
- foldedUTF8[i++] = *sComplex;
- sComplex++;
- }
- sComplex++;
- foldedUTF8[i] = 0;
- i = 0;
- while (*sComplex && *sComplex != '|') {
- upperUTF8[i++] = *sComplex;
- sComplex++;
- }
- sComplex++;
- upperUTF8[i] = 0;
- i = 0;
- while (*sComplex && *sComplex != '|') {
- lowerUTF8[i++] = *sComplex;
- sComplex++;
- }
- sComplex++;
- lowerUTF8[i] = 0;
-
- const int character = UnicodeFromUTF8(originUTF8);
-
- if (conversion == CaseConversion::fold && foldedUTF8[0]) {
- caseConvFold.Add(character, foldedUTF8);
- }
-
- if (conversion == CaseConversion::upper && upperUTF8[0]) {
- caseConvUp.Add(character, upperUTF8);
+ std::string_view sComplex = complexCaseConversions;
+ while (!sComplex.empty()) {
+ const std::string_view originUTF8 = NextField(sComplex);
+ const std::string_view foldedUTF8 = NextField(sComplex);
+ const std::string_view upperUTF8 = NextField(sComplex);
+ const std::string_view lowerUTF8 = NextField(sComplex);
+
+ std::string_view converted;
+ switch (conversion) {
+ case CaseConversion::fold:
+ converted = foldedUTF8;
+ break;
+ case CaseConversion::upper:
+ converted = upperUTF8;
+ break;
+ case CaseConversion::lower:
+ default:
+ converted = lowerUTF8;
+ break;
}
-
- if (conversion == CaseConversion::lower && lowerUTF8[0]) {
- caseConvLow.Add(character, lowerUTF8);
+ if (!converted.empty()) {
+ const int character = UnicodeFromUTF8(reinterpret_cast<const unsigned char *>(originUTF8.data()));
+ Add(character, converted);
}
}
+ FinishedAdding();
+}
+
+CaseConverter *ConverterForConversion(CaseConversion conversion) {
+ CaseConverter *pCaseConv = &caseConvFold;
switch (conversion) {
case CaseConversion::fold:
- caseConvFold.FinishedAdding();
+ pCaseConv = &caseConvFold;
break;
case CaseConversion::upper:
- caseConvUp.FinishedAdding();
+ pCaseConv = &caseConvUp;
break;
case CaseConversion::lower:
- caseConvLow.FinishedAdding();
+ default:
+ pCaseConv = &caseConvLow;
break;
}
-}
-
-CaseConverter *ConverterForConversion(CaseConversion conversion) noexcept {
- switch (conversion) {
- case CaseConversion::fold:
- return &caseConvFold;
- case CaseConversion::upper:
- return &caseConvUp;
- case CaseConversion::lower:
- return &caseConvLow;
+ if (!pCaseConv->Initialised()) {
+ pCaseConv->SetupConversions();
}
- return nullptr;
+ return pCaseConv;
}
}
@@ -815,23 +792,16 @@ CaseConverter *ConverterForConversion(CaseConversion conversion) noexcept {
namespace Scintilla::Internal {
ICaseConverter *ConverterFor(CaseConversion conversion) {
- CaseConverter *pCaseConv = ConverterForConversion(conversion);
- if (!pCaseConv->Initialised())
- SetupConversions(conversion);
- return pCaseConv;
+ return ConverterForConversion(conversion);
}
const char *CaseConvert(int character, CaseConversion conversion) {
CaseConverter *pCaseConv = ConverterForConversion(conversion);
- if (!pCaseConv->Initialised())
- SetupConversions(conversion);
return pCaseConv->Find(character);
}
size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed, CaseConversion conversion) {
CaseConverter *pCaseConv = ConverterForConversion(conversion);
- if (!pCaseConv->Initialised())
- SetupConversions(conversion);
return pCaseConv->CaseConvertString(converted, sizeConverted, mixed, lenMixed);
}