3 files changed, 46 insertions, 26 deletions
diff --git a/cppcheck.suppress b/cppcheck.suppress
index 2515e8978..2d767ee2f 100644
--- a/cppcheck.suppress
+++ b/cppcheck.suppress
@@ -13,6 +13,10 @@ useStlAlgorithm
 // Written with variable for consistency
 knownArgument:scintilla/src/SparseVector.h
 
+// The cast converts from 'unsigned char ' to 'char' so isn't unused.
+// Redundant code: Found unused cast of expression 'leadByte'
+constStatement:scintilla/src/Document.cxx
+
 // Some non-explicit constructors are used for conversions or are private to lexers
 noExplicitConstructor
 
diff --git a/src/Document.cxx b/src/Document.cxx
index 3fd9ce1a9..00ac8e618 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -2185,32 +2185,32 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
 			const size_t lenSearch =
 				pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
 			while (forward ? (pos < endPos) : (pos >= endPos)) {
-				int widthFirstCharacter = 0;
+				int widthFirstCharacter = 1;
 				Sci::Position posIndexDocument = pos;
 				size_t indexSearch = 0;
 				bool characterMatches = true;
-				for (;;) {
+				while (indexSearch < lenSearch) {
 					const unsigned char leadByte = cbView.CharAt(posIndexDocument);
-					char bytes[UTF8MaxBytes + 1];
 					int widthChar = 1;
-					if (!UTF8IsAscii(leadByte)) {
-						const int widthCharBytes = UTF8BytesOfLead[leadByte];
-						bytes[0] = leadByte;
-						for (int b=1; b<widthCharBytes; b++) {
-							bytes[b] = cbView.CharAt(posIndexDocument+b);
-						}
-						widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
-					}
-					if (!widthFirstCharacter) {
-						widthFirstCharacter = widthChar;
-					}
-					if ((posIndexDocument + widthChar) > limitPos) {
-						break;
-					}
 					size_t lenFlat = 1;
-					if (widthChar == 1) {
+					if (UTF8IsAscii(leadByte)) {
+						if ((posIndexDocument + 1) > limitPos) {
+							break;
+						}
 						characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte);
 					} else {
+						char bytes[UTF8MaxBytes]{ static_cast<char>(leadByte) };
+						const int widthCharBytes = UTF8BytesOfLead[leadByte];
+						for (int b = 1; b < widthCharBytes; b++) {
+							bytes[b] = cbView.CharAt(posIndexDocument + b);
+						}
+						widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
+						if (!indexSearch) {	// First character
+							widthFirstCharacter = widthChar;
+						}
+						if ((posIndexDocument + widthChar) > limitPos) {
+							break;
+						}
 						char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
 						lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
 						// memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
@@ -2223,9 +2223,6 @@ Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, con
 					}
 					posIndexDocument += widthChar;
 					indexSearch += lenFlat;
-					if (indexSearch >= lenSearch) {
-						break;
-					}
 				}
 				if (characterMatches && (indexSearch == lenSearch)) {
 					if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
diff --git a/test/performanceTests.py b/test/performanceTests.py
index eb7c0c096..a3df73465 100644
--- a/test/performanceTests.py
+++ b/test/performanceTests.py
@@ -26,7 +26,7 @@ class TestPerformance(unittest.TestCase):
 	def testAddLine(self):
 		data = (string.ascii_letters + string.digits + "\n").encode('utf-8')
 		start = timer()
-		for i in range(1000):
+		for i in range(2000):
 			self.ed.AddText(len(data), data)
 			self.assertEquals(self.ed.LineCount, i + 2)
 		end = timer()
@@ -38,7 +38,7 @@ class TestPerformance(unittest.TestCase):
 	def testAddLineMiddle(self):
 		data = (string.ascii_letters + string.digits + "\n").encode('utf-8')
 		start = timer()
-		for i in range(1000):
+		for i in range(2000):
 			self.ed.AddText(len(data), data)
 			self.assertEquals(self.ed.LineCount, i + 2)
 		end = timer()
@@ -64,7 +64,7 @@ class TestPerformance(unittest.TestCase):
 		insert = (string.digits + "\n").encode('utf-8')
 		self.ed.AddText(len(data), data)
 		start = timer()
-		for i in range(1000):
+		for i in range(2000):
 			self.ed.InsertText(0, insert)
 		end = timer()
 		duration = end - start
@@ -96,7 +96,7 @@ class TestPerformance(unittest.TestCase):
 		self.ed.AddText(len(manyLines), manyLines)
 		searchString = "φ".encode('utf-8')
 		start = timer()
-		for i in range(10):
+		for i in range(1000):
 			self.ed.TargetStart = 0
 			self.ed.TargetEnd = self.ed.Length-1
 			self.ed.SearchFlags = self.ed.SCFIND_MATCHCASE
@@ -115,7 +115,7 @@ class TestPerformance(unittest.TestCase):
 		self.ed.AddText(len(manyLines), manyLines)
 		searchString = "φ".encode('utf-8')
 		start = timer()
-		for i in range(10):
+		for i in range(20):
 			self.ed.TargetStart = 0
 			self.ed.TargetEnd = self.ed.Length-1
 			self.ed.SearchFlags = 0
@@ -126,5 +126,24 @@ class TestPerformance(unittest.TestCase):
 		print("%6.3f testUTF8Searches" % duration)
 		self.xite.DoEvents()
 
+	def testUTF8AsciiSearches(self):
+		self.ed.SetCodePage(65001)
+		oneLine = "Fold Margin=NagasakiOsakaHiroshimaHanedaKyoto(&F)\n".encode('utf-8')
+		manyLines = oneLine * 100000
+		manyLines = manyLines + "φ\n".encode('utf-8')
+		self.ed.AddText(len(manyLines), manyLines)
+		searchString = "φ".encode('utf-8')
+		start = timer()
+		for i in range(20):
+			self.ed.TargetStart = 0
+			self.ed.TargetEnd = self.ed.Length-1
+			self.ed.SearchFlags = 0
+			pos = self.ed.SearchInTarget(len(searchString), searchString)
+			self.assert_(pos > 0)
+		end = timer()
+		duration = end - start
+		print("%6.3f testUTF8AsciiSearches" % duration)
+		self.xite.DoEvents()
+
 if __name__ == '__main__':
 	Xite.main("performanceTests")