diff options
| author | Neil <nyamatongwe@gmail.com> | 2021-01-29 20:51:34 +1100 | 
|---|---|---|
| committer | Neil <nyamatongwe@gmail.com> | 2021-01-29 20:51:34 +1100 | 
| commit | ba8b1a91525dd90f8fdcc75480f37815fecce2d2 (patch) | |
| tree | 5ebce1a7d7c25aaee80e640b8cb56cc9aa5d6d34 /src/CharacterSet.h | |
| parent | f8817063200055b8b9f7b9a7a83c30100f64c903 (diff) | |
| download | scintilla-mirror-ba8b1a91525dd90f8fdcc75480f37815fecce2d2.tar.gz | |
Move CharacterSet and CharacterCategory from lexlib to src as in both Lexilla and Scintilla
Diffstat (limited to 'src/CharacterSet.h')
| -rw-r--r-- | src/CharacterSet.h | 208 | 
1 files changed, 208 insertions, 0 deletions
| diff --git a/src/CharacterSet.h b/src/CharacterSet.h new file mode 100644 index 000000000..a518c27fc --- /dev/null +++ b/src/CharacterSet.h @@ -0,0 +1,208 @@ +// Scintilla source code edit control +/** @file CharacterSet.h + ** Encapsulates a set of characters. Used to test if a character is within a set. + **/ +// Copyright 2007 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CHARACTERSET_H +#define CHARACTERSET_H + +namespace Scintilla { + +class CharacterSet { +	int size; +	bool valueAfter; +	bool *bset; +public: +	enum setBase { +		setNone=0, +		setLower=1, +		setUpper=2, +		setDigits=4, +		setAlpha=setLower|setUpper, +		setAlphaNum=setAlpha|setDigits +	}; +	CharacterSet(setBase base=setNone, const char *initialSet="", int size_=0x80, bool valueAfter_=false) { +		size = size_; +		valueAfter = valueAfter_; +		bset = new bool[size]; +		for (int i=0; i < size; i++) { +			bset[i] = false; +		} +		AddString(initialSet); +		if (base & setLower) +			AddString("abcdefghijklmnopqrstuvwxyz"); +		if (base & setUpper) +			AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); +		if (base & setDigits) +			AddString("0123456789"); +	} +	CharacterSet(const CharacterSet &other) { +		size = other.size; +		valueAfter = other.valueAfter; +		bset = new bool[size]; +		for (int i=0; i < size; i++) { +			bset[i] = other.bset[i]; +		} +	} +	CharacterSet(CharacterSet &&other) noexcept { +		size = other.size; +		valueAfter = other.valueAfter; +		bset = other.bset; +		other.size = 0; +		other.bset = nullptr; +	} +	CharacterSet &operator=(const CharacterSet &other) { +		if (this != &other) { +			bool *bsetNew = new bool[other.size]; +			for (int i = 0; i < other.size; i++) { +				bsetNew[i] = other.bset[i]; +			} +			delete[]bset; +			size = other.size; +			valueAfter = other.valueAfter; +			bset = bsetNew; +		} +		return *this; +	} +	CharacterSet &operator=(CharacterSet &&other) noexcept { +		if (this != &other) { +			delete []bset; +			size = other.size; +			valueAfter = other.valueAfter; +			bset = other.bset; +			other.size = 0; +			other.bset = nullptr; +		} +		return *this; +	} +	~CharacterSet() { +		delete []bset; +		bset = nullptr; +		size = 0; +	} +	void Add(int val) { +		assert(val >= 0); +		assert(val < size); +		bset[val] = true; +	} +	void AddString(const char *setToAdd) { +		for (const char *cp=setToAdd; *cp; cp++) { +			const unsigned char uch = *cp; +			assert(uch < size); +			bset[uch] = true; +		} +	} +	bool Contains(int val) const noexcept { +		assert(val >= 0); +		if (val < 0) return false; +		return (val < size) ? bset[val] : valueAfter; +	} +	bool Contains(char ch) const noexcept { +		// Overload char as char may be signed +		const unsigned char uch = ch; +		return Contains(uch); +	} +}; + +// Functions for classifying characters + +constexpr bool IsASpace(int ch) noexcept { +    return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); +} + +constexpr bool IsASpaceOrTab(int ch) noexcept { +	return (ch == ' ') || (ch == '\t'); +} + +constexpr bool IsADigit(int ch) noexcept { +	return (ch >= '0') && (ch <= '9'); +} + +constexpr bool IsADigit(int ch, int base) noexcept { +	if (base <= 10) { +		return (ch >= '0') && (ch < '0' + base); +	} else { +		return ((ch >= '0') && (ch <= '9')) || +		       ((ch >= 'A') && (ch < 'A' + base - 10)) || +		       ((ch >= 'a') && (ch < 'a' + base - 10)); +	} +} + +constexpr bool IsASCII(int ch) noexcept { +	return (ch >= 0) && (ch < 0x80); +} + +constexpr bool IsLowerCase(int ch) noexcept { +	return (ch >= 'a') && (ch <= 'z'); +} + +constexpr bool IsUpperCase(int ch) noexcept { +	return (ch >= 'A') && (ch <= 'Z'); +} + +constexpr bool IsUpperOrLowerCase(int ch) noexcept { +	return IsUpperCase(ch) || IsLowerCase(ch); +} + +constexpr bool IsAlphaNumeric(int ch) noexcept { +	return +		((ch >= '0') && (ch <= '9')) || +		((ch >= 'a') && (ch <= 'z')) || +		((ch >= 'A') && (ch <= 'Z')); +} + +/** + * Check if a character is a space. + * This is ASCII specific but is safe with chars >= 0x80. + */ +constexpr bool isspacechar(int ch) noexcept { +    return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); +} + +constexpr bool iswordchar(int ch) noexcept { +	return IsAlphaNumeric(ch) || ch == '.' || ch == '_'; +} + +constexpr bool iswordstart(int ch) noexcept { +	return IsAlphaNumeric(ch) || ch == '_'; +} + +constexpr bool isoperator(int ch) noexcept { +	if (IsAlphaNumeric(ch)) +		return false; +	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || +	        ch == '(' || ch == ')' || ch == '-' || ch == '+' || +	        ch == '=' || ch == '|' || ch == '{' || ch == '}' || +	        ch == '[' || ch == ']' || ch == ':' || ch == ';' || +	        ch == '<' || ch == '>' || ch == ',' || ch == '/' || +	        ch == '?' || ch == '!' || ch == '.' || ch == '~') +		return true; +	return false; +} + +// Simple case functions for ASCII supersets. + +template <typename T> +constexpr T MakeUpperCase(T ch) noexcept { +	if (ch < 'a' || ch > 'z') +		return ch; +	else +		return ch - 'a' + 'A'; +} + +template <typename T> +constexpr T MakeLowerCase(T ch) noexcept { +	if (ch < 'A' || ch > 'Z') +		return ch; +	else +		return ch - 'A' + 'a'; +} + +int CompareCaseInsensitive(const char *a, const char *b) noexcept; +int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept; + +} + +#endif | 
