From 86266d4700632860705fc2d4e88d4be4f5228be1 Mon Sep 17 00:00:00 2001 From: mitchell Date: Tue, 16 Apr 2019 22:50:17 -0400 Subject: Backport: Feature [feature-requests:#1259]. Add SCI_SETCHARACTERCATEGORYOPTIMIZATION API to optimize speed of character category features. Backport of changeset 7392:2832adedd0f4, but with added includes for Sci::clamp(). --- lexlib/CharacterCategory.cxx | 33 +++++++++++++++++++++++++++++++++ lexlib/CharacterCategory.h | 17 +++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'lexlib') diff --git a/lexlib/CharacterCategory.cxx b/lexlib/CharacterCategory.cxx index bc2fa2336..19c44cabe 100644 --- a/lexlib/CharacterCategory.cxx +++ b/lexlib/CharacterCategory.cxx @@ -7,10 +7,13 @@ // Copyright 2013 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. +#include #include #include +#include "Scintilla.h" // for ptrdiff_t in Position.h #include "CharacterCategory.h" +#include "Position.h" // for Sci::clamp namespace Scintilla { @@ -3790,6 +3793,7 @@ const int catRanges[] = { 33554397, 33554460, 35651549, +35651613, //--Autogenerated -- end of section automatically generated }; @@ -3963,4 +3967,33 @@ bool IsXidContinue(int character) { } } +CharacterCategoryMap::CharacterCategoryMap() noexcept { + Optimize(256); +} + +int CharacterCategoryMap::Size() const noexcept { + return static_cast(dense.size()); +} + +void CharacterCategoryMap::Optimize(int countCharacters) { + const int characters = Sci::clamp(countCharacters, 256, maxUnicode + 1); + dense.resize(characters); + + int end = 0; + int index = 0; + int current = catRanges[index]; + ++index; + do { + const int next = catRanges[index]; + const unsigned char category = current & maskCategory; + current >>= 5; + end = std::min(characters, next >> 5); + while (current < end) { + dense[current++] = category; + } + current = next; + ++index; + } while (characters > end); +} + } diff --git a/lexlib/CharacterCategory.h b/lexlib/CharacterCategory.h index 767d79670..d1ac39152 100644 --- a/lexlib/CharacterCategory.h +++ b/lexlib/CharacterCategory.h @@ -28,6 +28,23 @@ bool IsIdContinue(int character); bool IsXidStart(int character); bool IsXidContinue(int character); +class CharacterCategoryMap { +private: + std::vector dense; +public: + CharacterCategoryMap() noexcept; + CharacterCategory CategoryFor(int character) const { + if (static_cast(character) < dense.size()) { + return static_cast(dense[character]); + } else { + // binary search through ranges + return CategoriseCharacter(character); + } + } + int Size() const noexcept; + void Optimize(int countCharacters); +}; + } #endif -- cgit v1.2.3