diff options
| -rw-r--r-- | src/Document.cxx | 18 | ||||
| -rw-r--r-- | test/unit/testUnicodeFromUTF8.cxx | 44 | ||||
| -rw-r--r-- | test/unit/unitTest.cxx | 1 | 
3 files changed, 48 insertions, 15 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index d0909b808..c88f8ba42 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -37,6 +37,7 @@  #include "Document.h"  #include "RESearch.h"  #include "UniConversion.h" +#include "UnicodeFromUTF8.h"  #ifdef SCI_NAMESPACE  using namespace Scintilla; @@ -766,19 +767,6 @@ bool Document::NextCharacter(int &pos, int moveDir) const {  	}  } -static inline int UnicodeFromBytes(const unsigned char *us) { -	if (us[0] < 0xC2) { -		return us[0]; -	} else if (us[0] < 0xE0) { -		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F); -	} else if (us[0] < 0xF0) { -		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F); -	} else if (us[0] < 0xF5) { -		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); -	} -	return us[0]; -} -  // Return -1  on out-of-bounds  int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {  	int pos = positionStart; @@ -819,7 +807,7 @@ int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {  					character =  0xDC80 + leadByte;  				} else {  					bytesInCharacter = utf8status & UTF8MaskWidth; -					character = UnicodeFromBytes(charBytes); +					character = UnicodeFromUTF8(charBytes);  				}  			}  		} else { @@ -1610,7 +1598,7 @@ Document::CharacterExtracted Document::ExtractCharacter(int position) const {  		// Treat as invalid and use up just one byte  		return CharacterExtracted(unicodeReplacementChar, 1);  	} else { -		return CharacterExtracted(UnicodeFromBytes(charBytes), utf8status & UTF8MaskWidth); +		return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);  	}  } diff --git a/test/unit/testUnicodeFromUTF8.cxx b/test/unit/testUnicodeFromUTF8.cxx new file mode 100644 index 000000000..841a9c68c --- /dev/null +++ b/test/unit/testUnicodeFromUTF8.cxx @@ -0,0 +1,44 @@ +// Unit Tests for Scintilla internal data structures + +#include <string.h> + +#include <algorithm> + +#include "Platform.h" + +#include "UnicodeFromUTF8.h" + +#include "catch.hpp" + +// Test UnicodeFromUTF8. +// Use examples from Wikipedia: +// http://en.wikipedia.org/wiki/UTF-8 + +TEST_CASE("UnicodeFromUTF8") { + +	SECTION("ASCII") { +		const unsigned char s[]={'a', 0}; +		REQUIRE(UnicodeFromUTF8(s) == 'a'); +	} + +	SECTION("Example1") { +		const unsigned char s[]={0x24, 0}; +		REQUIRE(UnicodeFromUTF8(s) == 0x24); +	} + +	SECTION("Example2") { +		const unsigned char s[]={0xC2, 0xA2, 0}; +		REQUIRE(UnicodeFromUTF8(s) == 0xA2); +	} + +	SECTION("Example3") { +		const unsigned char s[]={0xE2, 0x82, 0xAC, 0}; +		REQUIRE(UnicodeFromUTF8(s) == 0x20AC); +	} + +	SECTION("Example4") { +		const unsigned char s[]={0xF0, 0x90, 0x8D, 0x88, 0}; +		REQUIRE(UnicodeFromUTF8(s) == 0x10348); +	} + +} diff --git a/test/unit/unitTest.cxx b/test/unit/unitTest.cxx index 3aa78a54d..a6feed204 100644 --- a/test/unit/unitTest.cxx +++ b/test/unit/unitTest.cxx @@ -10,6 +10,7 @@          Decoration          DecorationList          CellBuffer +        UnicodeFromUTF8      To do:          PerLine *  | 
