diff options
-rw-r--r-- | cppcheck.suppress | 4 | ||||
-rw-r--r-- | src/Document.cxx | 39 | ||||
-rw-r--r-- | test/performanceTests.py | 29 |
3 files changed, 46 insertions, 26 deletions
diff --git a/cppcheck.suppress b/cppcheck.suppress index 2515e8978..2d767ee2f 100644 --- a/cppcheck.suppress +++ b/cppcheck.suppress @@ -13,6 +13,10 @@ useStlAlgorithm // Written with variable for consistency
knownArgument:scintilla/src/SparseVector.h
+// The cast converts from 'unsigned char ' to 'char' so isn't unused.
+// Redundant code: Found unused cast of expression 'leadByte'
+constStatement:scintilla/src/Document.cxx
+
// Some non-explicit constructors are used for conversions or are private to lexers
noExplicitConstructor
diff --git a/src/Document.cxx b/src/Document.cxx index 3fd9ce1a9..00ac8e618 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -2185,32 +2185,32 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind); while (forward ? (pos < endPos) : (pos >= endPos)) { - int widthFirstCharacter = 0; + int widthFirstCharacter = 1; Sci::Position posIndexDocument = pos; size_t indexSearch = 0; bool characterMatches = true; - for (;;) { + while (indexSearch < lenSearch) { const unsigned char leadByte = cbView.CharAt(posIndexDocument); - char bytes[UTF8MaxBytes + 1]; int widthChar = 1; - if (!UTF8IsAscii(leadByte)) { - const int widthCharBytes = UTF8BytesOfLead[leadByte]; - bytes[0] = leadByte; - for (int b=1; b<widthCharBytes; b++) { - bytes[b] = cbView.CharAt(posIndexDocument+b); - } - widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth; - } - if (!widthFirstCharacter) { - widthFirstCharacter = widthChar; - } - if ((posIndexDocument + widthChar) > limitPos) { - break; - } size_t lenFlat = 1; - if (widthChar == 1) { + if (UTF8IsAscii(leadByte)) { + if ((posIndexDocument + 1) > limitPos) { + break; + } characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte); } else { + char bytes[UTF8MaxBytes]{ static_cast<char>(leadByte) }; + const int widthCharBytes = UTF8BytesOfLead[leadByte]; + for (int b = 1; b < widthCharBytes; b++) { + bytes[b] = cbView.CharAt(posIndexDocument + b); + } + widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth; + if (!indexSearch) { // First character + widthFirstCharacter = widthChar; + } + if ((posIndexDocument + widthChar) > limitPos) { + break; + } char folded[UTF8MaxBytes * maxFoldingExpansion + 1]; lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar); // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing @@ -2223,9 +2223,6 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con } posIndexDocument += widthChar; indexSearch += lenFlat; - if (indexSearch >= lenSearch) { - break; - } } if (characterMatches && (indexSearch == lenSearch)) { if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) { diff --git a/test/performanceTests.py b/test/performanceTests.py index eb7c0c096..a3df73465 100644 --- a/test/performanceTests.py +++ b/test/performanceTests.py @@ -26,7 +26,7 @@ class TestPerformance(unittest.TestCase): def testAddLine(self): data = (string.ascii_letters + string.digits + "\n").encode('utf-8') start = timer() - for i in range(1000): + for i in range(2000): self.ed.AddText(len(data), data) self.assertEquals(self.ed.LineCount, i + 2) end = timer() @@ -38,7 +38,7 @@ class TestPerformance(unittest.TestCase): def testAddLineMiddle(self): data = (string.ascii_letters + string.digits + "\n").encode('utf-8') start = timer() - for i in range(1000): + for i in range(2000): self.ed.AddText(len(data), data) self.assertEquals(self.ed.LineCount, i + 2) end = timer() @@ -64,7 +64,7 @@ class TestPerformance(unittest.TestCase): insert = (string.digits + "\n").encode('utf-8') self.ed.AddText(len(data), data) start = timer() - for i in range(1000): + for i in range(2000): self.ed.InsertText(0, insert) end = timer() duration = end - start @@ -96,7 +96,7 @@ class TestPerformance(unittest.TestCase): self.ed.AddText(len(manyLines), manyLines) searchString = "φ".encode('utf-8') start = timer() - for i in range(10): + for i in range(1000): self.ed.TargetStart = 0 self.ed.TargetEnd = self.ed.Length-1 self.ed.SearchFlags = self.ed.SCFIND_MATCHCASE @@ -115,7 +115,7 @@ class TestPerformance(unittest.TestCase): self.ed.AddText(len(manyLines), manyLines) searchString = "φ".encode('utf-8') start = timer() - for i in range(10): + for i in range(20): self.ed.TargetStart = 0 self.ed.TargetEnd = self.ed.Length-1 self.ed.SearchFlags = 0 @@ -126,5 +126,24 @@ class TestPerformance(unittest.TestCase): print("%6.3f testUTF8Searches" % duration) self.xite.DoEvents() + def testUTF8AsciiSearches(self): + self.ed.SetCodePage(65001) + oneLine = "Fold Margin=NagasakiOsakaHiroshimaHanedaKyoto(&F)\n".encode('utf-8') + manyLines = oneLine * 100000 + manyLines = manyLines + "φ\n".encode('utf-8') + self.ed.AddText(len(manyLines), manyLines) + searchString = "φ".encode('utf-8') + start = timer() + for i in range(20): + self.ed.TargetStart = 0 + self.ed.TargetEnd = self.ed.Length-1 + self.ed.SearchFlags = 0 + pos = self.ed.SearchInTarget(len(searchString), searchString) + self.assert_(pos > 0) + end = timer() + duration = end - start + print("%6.3f testUTF8AsciiSearches" % duration) + self.xite.DoEvents() + if __name__ == '__main__': Xite.main("performanceTests") |