diff options
Diffstat (limited to 'lexlib')
-rw-r--r-- | lexlib/CharacterCategory.cxx | 31 | ||||
-rw-r--r-- | lexlib/CharacterCategory.h | 17 |
2 files changed, 48 insertions, 0 deletions
diff --git a/lexlib/CharacterCategory.cxx b/lexlib/CharacterCategory.cxx index bc2fa2336..ca76325df 100644 --- a/lexlib/CharacterCategory.cxx +++ b/lexlib/CharacterCategory.cxx @@ -7,6 +7,7 @@ // Copyright 2013 by Neil Hodgson <neilh@scintilla.org> // The License.txt file describes the conditions under which this software may be distributed. +#include <vector> #include <algorithm> #include <iterator> @@ -3790,6 +3791,7 @@ const int catRanges[] = { 33554397, 33554460, 35651549, +35651613, //--Autogenerated -- end of section automatically generated }; @@ -3963,4 +3965,33 @@ bool IsXidContinue(int character) { } } +CharacterCategoryMap::CharacterCategoryMap() noexcept { + Optimize(256); +} + +int CharacterCategoryMap::Size() const noexcept { + return static_cast<int>(dense.size()); +} + +void CharacterCategoryMap::Optimize(int countCharacters) { + const int characters = std::clamp(countCharacters, 256, maxUnicode + 1); + dense.resize(characters); + + int end = 0; + int index = 0; + int current = catRanges[index]; + ++index; + do { + const int next = catRanges[index]; + const unsigned char category = current & maskCategory; + current >>= 5; + end = std::min(characters, next >> 5); + while (current < end) { + dense[current++] = category; + } + current = next; + ++index; + } while (characters > end); +} + } diff --git a/lexlib/CharacterCategory.h b/lexlib/CharacterCategory.h index 767d79670..d1ac39152 100644 --- a/lexlib/CharacterCategory.h +++ b/lexlib/CharacterCategory.h @@ -28,6 +28,23 @@ bool IsIdContinue(int character); bool IsXidStart(int character); bool IsXidContinue(int character); +class CharacterCategoryMap { +private: + std::vector<unsigned char> dense; +public: + CharacterCategoryMap() noexcept; + CharacterCategory CategoryFor(int character) const { + if (static_cast<size_t>(character) < dense.size()) { + return static_cast<CharacterCategory>(dense[character]); + } else { + // binary search through ranges + return CategoriseCharacter(character); + } + } + int Size() const noexcept; + void Optimize(int countCharacters); +}; + } #endif |