diff options
author | Neil <nyamatongwe@gmail.com> | 2014-12-22 11:52:44 +1100 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2014-12-22 11:52:44 +1100 |
commit | 743dc19a40f45f312b3851e6f7fa010102c4391c (patch) | |
tree | 0e49035cd4a15bed695ece7d3d2dd604294d940e | |
parent | 65c581df8051692502612bb45aad5add08c38cf8 (diff) | |
download | scintilla-mirror-743dc19a40f45f312b3851e6f7fa010102c4391c.tar.gz |
Replace function UnicodeFromBytes with UnicodeFromUTF8 as they are exactly the
same.
Add unit tests for UnicodeFromUTF8.
-rw-r--r-- | src/Document.cxx | 18 | ||||
-rw-r--r-- | test/unit/testUnicodeFromUTF8.cxx | 44 | ||||
-rw-r--r-- | test/unit/unitTest.cxx | 1 |
3 files changed, 48 insertions, 15 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index d0909b808..c88f8ba42 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -37,6 +37,7 @@ #include "Document.h" #include "RESearch.h" #include "UniConversion.h" +#include "UnicodeFromUTF8.h" #ifdef SCI_NAMESPACE using namespace Scintilla; @@ -766,19 +767,6 @@ bool Document::NextCharacter(int &pos, int moveDir) const { } } -static inline int UnicodeFromBytes(const unsigned char *us) { - if (us[0] < 0xC2) { - return us[0]; - } else if (us[0] < 0xE0) { - return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F); - } else if (us[0] < 0xF0) { - return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F); - } else if (us[0] < 0xF5) { - return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); - } - return us[0]; -} - // Return -1 on out-of-bounds int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const { int pos = positionStart; @@ -819,7 +807,7 @@ int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const { character = 0xDC80 + leadByte; } else { bytesInCharacter = utf8status & UTF8MaskWidth; - character = UnicodeFromBytes(charBytes); + character = UnicodeFromUTF8(charBytes); } } } else { @@ -1610,7 +1598,7 @@ Document::CharacterExtracted Document::ExtractCharacter(int position) const { // Treat as invalid and use up just one byte return CharacterExtracted(unicodeReplacementChar, 1); } else { - return CharacterExtracted(UnicodeFromBytes(charBytes), utf8status & UTF8MaskWidth); + return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); } } diff --git a/test/unit/testUnicodeFromUTF8.cxx b/test/unit/testUnicodeFromUTF8.cxx new file mode 100644 index 000000000..841a9c68c --- /dev/null +++ b/test/unit/testUnicodeFromUTF8.cxx @@ -0,0 +1,44 @@ +// Unit Tests for Scintilla internal data structures + +#include <string.h> + +#include <algorithm> + +#include "Platform.h" + +#include "UnicodeFromUTF8.h" + +#include "catch.hpp" + +// Test UnicodeFromUTF8. +// Use examples from Wikipedia: +// http://en.wikipedia.org/wiki/UTF-8 + +TEST_CASE("UnicodeFromUTF8") { + + SECTION("ASCII") { + const unsigned char s[]={'a', 0}; + REQUIRE(UnicodeFromUTF8(s) == 'a'); + } + + SECTION("Example1") { + const unsigned char s[]={0x24, 0}; + REQUIRE(UnicodeFromUTF8(s) == 0x24); + } + + SECTION("Example2") { + const unsigned char s[]={0xC2, 0xA2, 0}; + REQUIRE(UnicodeFromUTF8(s) == 0xA2); + } + + SECTION("Example3") { + const unsigned char s[]={0xE2, 0x82, 0xAC, 0}; + REQUIRE(UnicodeFromUTF8(s) == 0x20AC); + } + + SECTION("Example4") { + const unsigned char s[]={0xF0, 0x90, 0x8D, 0x88, 0}; + REQUIRE(UnicodeFromUTF8(s) == 0x10348); + } + +} diff --git a/test/unit/unitTest.cxx b/test/unit/unitTest.cxx index 3aa78a54d..a6feed204 100644 --- a/test/unit/unitTest.cxx +++ b/test/unit/unitTest.cxx @@ -10,6 +10,7 @@ Decoration DecorationList CellBuffer + UnicodeFromUTF8 To do: PerLine * |