Feature [feature-requests:#1417] Consolidate character classification functions

in CharacterType.h, merging duplicate functions, removing unused functions and stadardizing names.
author: Neil <nyamatongwe@gmail.com> 2021-10-05 14:38:47 +1100
committer: Neil <nyamatongwe@gmail.com> 2021-10-05 14:38:47 +1100
commit: 9e06aef78c343476ee39698c0f17aa49b7e49999 (patch)
tree: fe5bb15362ffed7ff10ced26339c80df7a1e2de6 /src
parent: 180ea34843782451f6d0684af51b584b83a4dd62 (diff)
download: scintilla-mirror-9e06aef78c343476ee39698c0f17aa49b7e49999.tar.gz
5 files changed, 62 insertions, 99 deletions
diff --git a/src/CharacterType.h b/src/CharacterType.h
index 70f4cbd05..b014f1050 100644
--- a/src/CharacterType.h
+++ b/src/CharacterType.h
@@ -12,11 +12,15 @@ namespace Scintilla::Internal {
 
 // Functions for classifying characters
 
+/**
+ * Check if a character is a space.
+ * This is ASCII specific but is safe with chars >= 0x80.
+ */
 constexpr bool IsASpace(int ch) noexcept {
     return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
 }
 
-constexpr bool IsASpaceOrTab(int ch) noexcept {
+constexpr bool IsSpaceOrTab(int ch) noexcept {
 	return (ch == ' ') || (ch == '\t');
 }
 
@@ -24,6 +28,10 @@ constexpr bool IsControl(int ch) noexcept {
 	return ((ch >= 0) && (ch <= 0x1F)) || (ch == 0x7F);
 }
 
+constexpr bool IsEOLCharacter(int ch) noexcept {
+	return ch == '\r' || ch == '\n';
+}
+
 constexpr bool IsADigit(int ch) noexcept {
 	return (ch >= '0') && (ch <= '9');
 }
@@ -61,33 +69,44 @@ constexpr bool IsAlphaNumeric(int ch) noexcept {
 		((ch >= 'A') && (ch <= 'Z'));
 }
 
-/**
- * Check if a character is a space.
- * This is ASCII specific but is safe with chars >= 0x80.
- */
-constexpr bool isspacechar(int ch) noexcept {
-    return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
-}
-
-constexpr bool iswordchar(int ch) noexcept {
-	return IsAlphaNumeric(ch) || ch == '.' || ch == '_';
-}
-
-constexpr bool iswordstart(int ch) noexcept {
-	return IsAlphaNumeric(ch) || ch == '_';
-}
-
-constexpr bool isoperator(int ch) noexcept {
-	if (IsAlphaNumeric(ch))
-		return false;
-	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
-	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
-	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
-	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
-	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
-	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
+constexpr bool IsPunctuation(int ch) noexcept {
+	switch (ch) {
+	case '!':
+	case '"':
+	case '#':
+	case '$':
+	case '%':
+	case '&':
+	case '\'':
+	case '(':
+	case ')':
+	case '*':
+	case '+':
+	case ',':
+	case '-':
+	case '.':
+	case '/':
+	case ':':
+	case ';':
+	case '<':
+	case '=':
+	case '>':
+	case '?':
+	case '@':
+	case '[':
+	case '\\':
+	case ']':
+	case '^':
+	case '_':
+	case '`':
+	case '{':
+	case '|':
+	case '}':
+	case '~':
 		return true;
-	return false;
+	default:
+		return false;
+	}
 }
 
 // Simple case functions for ASCII supersets.
diff --git a/src/Document.cxx b/src/Document.cxx
index eae055dc2..fe58728ee 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -1127,10 +1127,6 @@ bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept {
 		&& IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1));
 }
 
-static constexpr bool IsSpaceOrTab(int ch) noexcept {
-	return ch == ' ' || ch == '\t';
-}
-
 // Need to break text into segments near lengthSegment but taking into
 // account the encoding to not break inside a UTF-8 or DBCS character
 // and also trying to avoid breaking inside a pair of combining characters.
@@ -2598,49 +2594,8 @@ void Document::NotifyModified(DocModification mh) {
 	}
 }
 
-// Used for word part navigation.
-static bool IsASCIIPunctuationCharacter(unsigned int ch) noexcept {
-	switch (ch) {
-	case '!':
-	case '"':
-	case '#':
-	case '$':
-	case '%':
-	case '&':
-	case '\'':
-	case '(':
-	case ')':
-	case '*':
-	case '+':
-	case ',':
-	case '-':
-	case '.':
-	case '/':
-	case ':':
-	case ';':
-	case '<':
-	case '=':
-	case '>':
-	case '?':
-	case '@':
-	case '[':
-	case '\\':
-	case ']':
-	case '^':
-	case '_':
-	case '`':
-	case '{':
-	case '|':
-	case '}':
-	case '~':
-		return true;
-	default:
-		return false;
-	}
-}
-
 bool Document::IsWordPartSeparator(unsigned int ch) const {
-	return (WordCharacterClass(ch) == CharacterClass::word) && IsASCIIPunctuationCharacter(ch);
+	return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch);
 }
 
 Sci::Position Document::WordPartLeft(Sci::Position pos) const {
@@ -2670,15 +2625,15 @@ Sci::Position Document::WordPartLeft(Sci::Position pos) const {
 					pos -= CharacterBefore(pos).widthBytes;
 				if (!IsADigit(CharacterAfter(pos).character))
 					pos += CharacterAfter(pos).widthBytes;
-			} else if (IsASCIIPunctuationCharacter(ceStart.character)) {
-				while (pos > 0 && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
+			} else if (IsPunctuation(ceStart.character)) {
+				while (pos > 0 && IsPunctuation(CharacterAfter(pos).character))
 					pos -= CharacterBefore(pos).widthBytes;
-				if (!IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
+				if (!IsPunctuation(CharacterAfter(pos).character))
 					pos += CharacterAfter(pos).widthBytes;
-			} else if (isspacechar(ceStart.character)) {
-				while (pos > 0 && isspacechar(CharacterAfter(pos).character))
+			} else if (IsASpace(ceStart.character)) {
+				while (pos > 0 && IsASpace(CharacterAfter(pos).character))
 					pos -= CharacterBefore(pos).widthBytes;
-				if (!isspacechar(CharacterAfter(pos).character))
+				if (!IsASpace(CharacterAfter(pos).character))
 					pos += CharacterAfter(pos).widthBytes;
 			} else if (!IsASCII(ceStart.character)) {
 				while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
@@ -2721,11 +2676,11 @@ Sci::Position Document::WordPartRight(Sci::Position pos) const {
 	} else if (IsADigit(ceStart.character)) {
 		while (pos < length && IsADigit(CharacterAfter(pos).character))
 			pos += CharacterAfter(pos).widthBytes;
-	} else if (IsASCIIPunctuationCharacter(ceStart.character)) {
-		while (pos < length && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
+	} else if (IsPunctuation(ceStart.character)) {
+		while (pos < length && IsPunctuation(CharacterAfter(pos).character))
 			pos += CharacterAfter(pos).widthBytes;
-	} else if (isspacechar(ceStart.character)) {
-		while (pos < length && isspacechar(CharacterAfter(pos).character))
+	} else if (IsASpace(ceStart.character)) {
+		while (pos < length && IsASpace(CharacterAfter(pos).character))
 			pos += CharacterAfter(pos).widthBytes;
 	} else {
 		pos += CharacterAfter(pos).widthBytes;
@@ -2733,18 +2688,14 @@ Sci::Position Document::WordPartRight(Sci::Position pos) const {
 	return pos;
 }
 
-static constexpr bool IsLineEndChar(char c) noexcept {
-	return (c == '\n' || c == '\r');
-}
-
 Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept {
 	const int sStart = cb.StyleAt(pos);
 	if (delta < 0) {
-		while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
+		while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
 			pos--;
 		pos++;
 	} else {
-		while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
+		while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
 			pos++;
 	}
 	return pos;
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 06d96134f..493146455 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -2193,10 +2193,10 @@ void Editor::PasteRectangular(SelectionPosition pos, const char *ptr, Sci::Posit
 	sel.RangeMain().caret = RealizeVirtualSpace(sel.RangeMain().caret);
 	const int xInsert = XFromPosition(sel.RangeMain().caret);
 	bool prevCr = false;
-	while ((len > 0) && IsEOLChar(ptr[len-1]))
+	while ((len > 0) && IsEOLCharacter(ptr[len-1]))
 		len--;
 	for (Sci::Position i = 0; i < len; i++) {
-		if (IsEOLChar(ptr[i])) {
+		if (IsEOLCharacter(ptr[i])) {
 			if ((ptr[i] == '\r') || (!prevCr))
 				line++;
 			if (line >= pdoc->LinesTotal()) {
diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx
index e94721ff5..e57cb572a 100644
--- a/src/PositionCache.cxx
+++ b/src/PositionCache.cxx
@@ -31,6 +31,7 @@
 #include "Geometry.h"
 #include "Platform.h"
 
+#include "CharacterType.h"
 #include "CharacterCategoryMap.h"
 #include "Position.h"
 #include "UniqueString.h"
diff --git a/src/PositionCache.h b/src/PositionCache.h
index dece77040..3c4ca88c3 100644
--- a/src/PositionCache.h
+++ b/src/PositionCache.h
@@ -10,14 +10,6 @@
 
 namespace Scintilla::Internal {
 
-inline constexpr bool IsEOLChar(int ch) noexcept {
-	return (ch == '\r') || (ch == '\n');
-}
-
-inline constexpr bool IsSpaceOrTab(int ch) noexcept {
-	return ch == ' ' || ch == '\t';
-}
-
 /**
 * A point in document space.
 * Uses double for sufficient resolution in large (>20,000,000 line) documents.
author	Neil <nyamatongwe@gmail.com>	2021-10-05 14:38:47 +1100
committer	Neil <nyamatongwe@gmail.com>	2021-10-05 14:38:47 +1100
commit	9e06aef78c343476ee39698c0f17aa49b7e49999 (patch)
tree	fe5bb15362ffed7ff10ced26339c80df7a1e2de6 /src
parent	180ea34843782451f6d0684af51b584b83a4dd62 (diff)
download	scintilla-mirror-9e06aef78c343476ee39698c0f17aa49b7e49999.tar.gz