diff options
Diffstat (limited to 'test/unit/testDocument.cxx')
| -rw-r--r-- | test/unit/testDocument.cxx | 193 | 
1 files changed, 189 insertions, 4 deletions
| diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index c5275d25f..44bb68b7a 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -36,10 +36,31 @@ using namespace Scintilla::Internal;  // Test Document. +struct Folding { +	int from; +	int to; +	int length; +}; + +// Table of case folding for non-ASCII bytes in Windows Latin code page 1252 +Folding foldings1252[] = { +	{0x8a, 0x9a, 0x01}, +	{0x8c, 0x9c, 0x01}, +	{0x8e, 0x9e, 0x01}, +	{0x9f, 0xff, 0x01}, +	{0xc0, 0xe0, 0x17}, +	{0xd8, 0xf8, 0x07}, +}; +  struct DocPlus {  	Document document;  	DocPlus(std::string_view svInitial, int codePage) : document(DocumentOption::Default) { +		SetCodePage(codePage); +		document.InsertString(0, svInitial.data(), svInitial.length()); +	} + +	void SetCodePage(int codePage) {  		document.SetDBCSCodePage(codePage);  		if (codePage == CpUtf8) {  			document.SetCaseFolder(std::make_unique<CaseFolderUnicode>()); @@ -50,9 +71,27 @@ struct DocPlus {  			pcft->StandardASCII();  			document.SetCaseFolder(std::move(pcft));  		} -		document.InsertString(0, svInitial.data(), svInitial.length());  	} +	void SetSBCSFoldings(const Folding *foldings, size_t length) { +		std::unique_ptr<CaseFolderTable> pcft = std::make_unique<CaseFolderTable>(); +		pcft->StandardASCII(); +		for (size_t block = 0; block < length; block++) { +			for (int fold = 0; fold < foldings[block].length; fold++) { +				pcft->SetTranslation(foldings[block].from + fold, foldings[block].to + fold); +			} +		} +		document.SetCaseFolder(std::move(pcft)); +	} + +	Sci::Position FindNeedle(const std::string &needle, FindOption options, Sci::Position *length) { +		assert(*length == static_cast<Sci::Position>(needle.length())); +		return document.FindText(0, document.Length(), needle.c_str(), options, length); +	} +	Sci::Position FindNeedleReverse(const std::string &needle, FindOption options, Sci::Position *length) { +		assert(*length == static_cast<Sci::Position>(needle.length())); +		return document.FindText(document.Length(), 0, needle.c_str(), options, length); +	}  };  TEST_CASE("Document") { @@ -74,14 +113,130 @@ TEST_CASE("Document") {  		REQUIRE(!doc.document.CanRedo());  	} +	// Search ranges are from first argument to just before second argument +	// Arguments are expected to be at character boundaries and will be tweaked if +	// part way through a character. +	SECTION("SearchInLatin") { +		DocPlus doc("abcde", 0);	// a b c d e +		std::string finding = "b"; +		Sci::Position lengthFinding = finding.length(); +		Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.FindNeedleReverse(finding, FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.document.FindText(0, 2, finding.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.document.FindText(0, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == -1); +	} + +	SECTION("InsensitiveSearchInLatin") { +		DocPlus doc("abcde", 0);	// a b c d e +		std::string finding = "B"; +		Sci::Position lengthFinding = finding.length(); +		Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.document.FindText(0, 2, finding.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.document.FindText(0, 1, finding.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == -1); +	} + +	SECTION("InsensitiveSearchIn1252") { +		// In Windows Latin, code page 1252, C6 is AE and E6 is ae +		DocPlus doc("tru\xc6s\xe6t", 0);	// t r u AE s ae t +		doc.SetSBCSFoldings(foldings1252, std::size(foldings1252)); + +		// Search for upper-case AE +		std::string finding = "\xc6"; +		Sci::Position lengthFinding = finding.length(); +		Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 3); +		location = doc.document.FindText(4, doc.document.Length(), finding.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 5); +		location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 5); + +		// Search for lower-case ae +		finding = "\xe6"; +		location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 3); +		location = doc.document.FindText(4, doc.document.Length(), finding.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 5); +		location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 5); +	} + +	SECTION("Search2InLatin") { +		// Checks that the initial '_' and final 'f' are ignored since they are outside the search bounds +		DocPlus doc("_abcdef", 0);	// _ a b c d e f +		std::string finding = "cd"; +		Sci::Position lengthFinding = finding.length(); +		size_t docLength = doc.document.Length() - 1; +		Sci::Position location = doc.document.FindText(1, docLength, finding.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 3); +		location = doc.document.FindText(docLength, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 3); +		location = doc.document.FindText(docLength, 1, "bc", FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 2); +		location = doc.document.FindText(docLength, 1, "ab", FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.document.FindText(docLength, 1, "de", FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 4); +		location = doc.document.FindText(docLength, 1, "_a", FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == -1); +		location = doc.document.FindText(docLength, 1, "ef", FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == -1); +		lengthFinding = 3; +		location = doc.document.FindText(docLength, 1, "cde", FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 3); +	} +  	SECTION("SearchInUTF8") {  		DocPlus doc("ab\xCE\x93" "d", CpUtf8);	// a b gamma d -		std::string finding = "b"; +		const std::string finding = "b";  		Sci::Position lengthFinding = finding.length(); -		Sci::Position location = doc.document.FindText(0, doc.document.Length(), finding.c_str(), FindOption::MatchCase, &lengthFinding); +		Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);  		REQUIRE(location == 1);  		location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::MatchCase, &lengthFinding);  		REQUIRE(location == 1); +		location = doc.document.FindText(0, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == -1); +		// Check doesn't try to follow a lead-byte past the search end +		const std::string findingUTF = "\xCE\x93"; +		lengthFinding = findingUTF.length(); +		location = doc.document.FindText(0, 4, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 2); +		// Only succeeds as 3 is partway through character so adjusted to 4 +		location = doc.document.FindText(0, 3, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == 2); +		location = doc.document.FindText(0, 2, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding); +		REQUIRE(location == -1); +	} + +	SECTION("InsensitiveSearchInUTF8") { +		DocPlus doc("ab\xCE\x93" "d", CpUtf8);	// a b gamma d +		const std::string finding = "b"; +		Sci::Position lengthFinding = finding.length(); +		Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 1); +		location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 1); +		const std::string findingUTF = "\xCE\x93"; +		lengthFinding = findingUTF.length(); +		location = doc.FindNeedle(findingUTF, FindOption::None, &lengthFinding); +		REQUIRE(location == 2); +		location = doc.document.FindText(doc.document.Length(), 0, findingUTF.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 2); +		location = doc.document.FindText(0, 4, findingUTF.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 2); +		// Only succeeds as 3 is partway through character so adjusted to 4 +		location = doc.document.FindText(0, 3, findingUTF.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 2); +		location = doc.document.FindText(0, 2, findingUTF.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == -1);  	}  	SECTION("SearchInShiftJIS") { @@ -92,7 +247,7 @@ TEST_CASE("Document") {  		std::string finding = "b";  		// Search forwards  		Sci::Position lengthFinding = finding.length(); -		Sci::Position location = doc.document.FindText(0, doc.document.Length(), finding.c_str(), FindOption::MatchCase, &lengthFinding); +		Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);  		REQUIRE(location == 1);  		// Search backwards  		lengthFinding = finding.length(); @@ -100,6 +255,36 @@ TEST_CASE("Document") {  		REQUIRE(location == 1);  	} +	SECTION("InsensitiveSearchInShiftJIS") { +		// {CJK UNIFIED IDEOGRAPH-9955} is two bytes: {0xE9, 'b'} in Shift-JIS +		// The 'b' can be incorrectly matched by the search string 'b' when the search +		// does not iterate the text correctly. +		DocPlus doc("ab\xe9" "b ", 932);	// a b {CJK UNIFIED IDEOGRAPH-9955} {space} +		std::string finding = "b"; +		// Search forwards +		Sci::Position lengthFinding = finding.length(); +		Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); +		REQUIRE(location == 1); +		// Search backwards +		lengthFinding = finding.length(); +		location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 1); +		std::string finding932 = "\xe9" "b"; +		// Search forwards +		lengthFinding = finding932.length(); +		location = doc.FindNeedle(finding932, FindOption::None, &lengthFinding); +		REQUIRE(location == 2); +		// Search backwards +		lengthFinding = finding932.length(); +		location = doc.document.FindText(doc.document.Length(), 0, finding932.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 2); +		location = doc.document.FindText(0, 3, finding932.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == 2); +		location = doc.document.FindText(0, 2, finding932.c_str(), FindOption::None, &lengthFinding); +		REQUIRE(location == -1); +		// Can not test case mapping of double byte text as folder available here does not implement this  +	} +  	SECTION("GetCharacterAndWidth") {  		Document doc(DocumentOption::Default);  		doc.SetDBCSCodePage(932); | 
