diff options
author | Neil <nyamatongwe@gmail.com> | 2021-07-12 15:17:40 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2021-07-12 15:17:40 +1000 |
commit | d07fabd9addd7e03f59bd5031e0b4a635f5caf33 (patch) | |
tree | 5a054e4606e44daae859f95d9fa4319222be74ba | |
parent | fa423977ad23a5bc842562282a9776f15d486106 (diff) | |
download | scintilla-mirror-d07fabd9addd7e03f59bd5031e0b4a635f5caf33.tar.gz |
Use SplitFindChar to rapidly seek for a byte value for case-sensitive searches.
Averaged 2x faster for forward search "needle" in Editor.cxx as UTF-8 or Latin1.
-rw-r--r-- | src/Document.cxx | 76 |
1 files changed, 62 insertions, 14 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 05d8dccdb..e5022ad64 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -2003,6 +2003,36 @@ struct SplitView { } }; +// Equivalent of memchr over the split view +ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept { + size_t range1Length = 0; + if (start < view.length1) { + range1Length = std::min(length, view.length1 - start); + const char *match = static_cast<const char *>(memchr(view.segment1 + start, ch, range1Length)); + if (match) { + return match - view.segment1; + } + start += range1Length; + } + const char *match2 = static_cast<const char *>(memchr(view.segment2 + start, ch, length - range1Length)); + if (match2) { + return match2 - view.segment2 + view.length1; + } + return PTRDIFF_MAX; +} + +// Equivalent of memcmp over the split view +// This does not call memcmp as search texts are commonly too short to overcome the +// call overhead. +bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept { + for (size_t i = 0; i < text.length(); i++) { + if (view.CharAt(i + start) != text[i]) { + return false; + } + } + return true; +} + } /** @@ -2045,26 +2075,44 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con if (caseSensitive) { const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos; const unsigned char charStartSearch = search[0]; - while (forward ? (pos < endSearch) : (pos >= endSearch)) { - const unsigned char leadByte = cbView.CharAt(pos); - if (leadByte == charStartSearch) { - bool found = (pos + lengthFind) <= limitPos; - for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) { - found = cbView.CharAt(pos + indexSearch) == search[indexSearch]; + if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) { + // This is a fast case where there is no need to test byte values to iterate + // so becomes the equivalent of a memchr+memcmp loop. + // UTF-8 search will not be self-synchronizing when starts with trail byte + const std::string_view suffix = search + 1; + while (pos < endSearch) { + pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch); + if (pos == PTRDIFF_MAX) { + break; } - if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { + if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) { return pos; } - } - if (forward && UTF8IsAscii(leadByte)) { pos++; - } else { - if (dbcsCodePage) { - if (!NextCharacter(pos, increment)) { - break; + } + } else { + while (forward ? (pos < endSearch) : (pos >= endSearch)) { + const unsigned char leadByte = cbView.CharAt(pos); + if (leadByte == charStartSearch) { + bool found = (pos + lengthFind) <= limitPos; + // SplitMatch could be called here but it is slower with g++ -O2 + for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) { + found = cbView.CharAt(pos + indexSearch) == search[indexSearch]; + } + if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) { + return pos; } + } + if (forward && UTF8IsAscii(leadByte)) { + pos++; } else { - pos += increment; + if (dbcsCodePage) { + if (!NextCharacter(pos, increment)) { + break; + } + } else { + pos += increment; + } } } } |