From ba8b1a91525dd90f8fdcc75480f37815fecce2d2 Mon Sep 17 00:00:00 2001 From: Neil Date: Fri, 29 Jan 2021 20:51:34 +1100 Subject: Move CharacterSet and CharacterCategory from lexlib to src as in both Lexilla and Scintilla --- src/CharacterCategory.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 src/CharacterCategory.h (limited to 'src/CharacterCategory.h') diff --git a/src/CharacterCategory.h b/src/CharacterCategory.h new file mode 100644 index 000000000..cd3320dd9 --- /dev/null +++ b/src/CharacterCategory.h @@ -0,0 +1,50 @@ +// Scintilla source code edit control +/** @file CharacterCategory.h + ** Returns the Unicode general category of a character. + **/ +// Copyright 2013 by Neil Hodgson +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CHARACTERCATEGORY_H +#define CHARACTERCATEGORY_H + +namespace Scintilla { + +enum CharacterCategory { + ccLu, ccLl, ccLt, ccLm, ccLo, + ccMn, ccMc, ccMe, + ccNd, ccNl, ccNo, + ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, + ccSm, ccSc, ccSk, ccSo, + ccZs, ccZl, ccZp, + ccCc, ccCf, ccCs, ccCo, ccCn +}; + +CharacterCategory CategoriseCharacter(int character); + +// Common definitions of allowable characters in identifiers from UAX #31. +bool IsIdStart(int character); +bool IsIdContinue(int character); +bool IsXidStart(int character); +bool IsXidContinue(int character); + +class CharacterCategoryMap { +private: + std::vector dense; +public: + CharacterCategoryMap(); + CharacterCategory CategoryFor(int character) const { + if (static_cast(character) < dense.size()) { + return static_cast(dense[character]); + } else { + // binary search through ranges + return CategoriseCharacter(character); + } + } + int Size() const noexcept; + void Optimize(int countCharacters); +}; + +} + +#endif -- cgit v1.2.3