Use SplitFindChar to rapidly seek for a byte value for case-sensitive searches.

Averaged 2x faster for forward search "needle" in Editor.cxx as UTF-8 or Latin1.
author: Neil <nyamatongwe@gmail.com> 2021-07-12 15:17:40 +1000
committer: Neil <nyamatongwe@gmail.com> 2021-07-12 15:17:40 +1000
commit: d07fabd9addd7e03f59bd5031e0b4a635f5caf33 (patch)
tree: 5a054e4606e44daae859f95d9fa4319222be74ba
parent: fa423977ad23a5bc842562282a9776f15d486106 (diff)
download: scintilla-mirror-d07fabd9addd7e03f59bd5031e0b4a635f5caf33.tar.gz
1 files changed, 62 insertions, 14 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index 05d8dccdb..e5022ad64 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -2003,6 +2003,36 @@ struct SplitView {
 	}
 };
 
+// Equivalent of memchr over the split view
+ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept {
+	size_t range1Length = 0;
+	if (start < view.length1) {
+		range1Length = std::min(length, view.length1 - start);
+		const char *match = static_cast<const char *>(memchr(view.segment1 + start, ch, range1Length));
+		if (match) {
+			return match - view.segment1;
+		}
+		start += range1Length;
+	}
+	const char *match2 = static_cast<const char *>(memchr(view.segment2 + start, ch, length - range1Length));
+	if (match2) {
+		return match2 - view.segment2 + view.length1;
+	}
+	return PTRDIFF_MAX;
+}
+
+// Equivalent of memcmp over the split view
+// This does not call memcmp as search texts are commonly too short to overcome the
+// call overhead.
+bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept {
+	for (size_t i = 0; i < text.length(); i++) {
+		if (view.CharAt(i + start) != text[i]) {
+			return false;
+		}
+	}
+	return true;
+}
+
 }
 
 /**
@@ -2045,26 +2075,44 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
 		if (caseSensitive) {
 			const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
 			const unsigned char charStartSearch =  search[0];
-			while (forward ? (pos < endSearch) : (pos >= endSearch)) {
-				const unsigned char leadByte = cbView.CharAt(pos);
-				if (leadByte == charStartSearch) {
-					bool found = (pos + lengthFind) <= limitPos;
-					for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
-						found = cbView.CharAt(pos + indexSearch) == search[indexSearch];
+			if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) {
+				// This is a fast case where there is no need to test byte values to iterate
+				// so becomes the equivalent of a memchr+memcmp loop. 
+				// UTF-8 search will not be self-synchronizing when starts with trail byte
+				const std::string_view suffix = search + 1;
+				while (pos < endSearch) {
+					pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch);
+					if (pos == PTRDIFF_MAX) {
+						break;
 					}
-					if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
+					if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
 						return pos;
 					}
-				}
-				if (forward && UTF8IsAscii(leadByte)) {
 					pos++;
-				} else {
-					if (dbcsCodePage) {
-						if (!NextCharacter(pos, increment)) {
-							break;
+				}
+			} else {
+				while (forward ? (pos < endSearch) : (pos >= endSearch)) {
+					const unsigned char leadByte = cbView.CharAt(pos);
+					if (leadByte == charStartSearch) {
+						bool found = (pos + lengthFind) <= limitPos;
+						// SplitMatch could be called here but it is slower with g++ -O2
+						for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
+							found = cbView.CharAt(pos + indexSearch) == search[indexSearch];
+						}
+						if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
+							return pos;
 						}
+					}
+					if (forward && UTF8IsAscii(leadByte)) {
+						pos++;
 					} else {
-						pos += increment;
+						if (dbcsCodePage) {
+							if (!NextCharacter(pos, increment)) {
+								break;
+							}
+						} else {
+							pos += increment;
+						}
 					}
 				}
 			}
author	Neil <nyamatongwe@gmail.com>	2021-07-12 15:17:40 +1000
committer	Neil <nyamatongwe@gmail.com>	2021-07-12 15:17:40 +1000
commit	d07fabd9addd7e03f59bd5031e0b4a635f5caf33 (patch)
tree	5a054e4606e44daae859f95d9fa4319222be74ba
parent	fa423977ad23a5bc842562282a9776f15d486106 (diff)
download	scintilla-mirror-d07fabd9addd7e03f59bd5031e0b4a635f5caf33.tar.gz