Replace function UnicodeFromBytes with UnicodeFromUTF8 as they are exactly the

same. Add unit tests for UnicodeFromUTF8.
author: Neil <nyamatongwe@gmail.com> 2014-12-22 11:52:44 +1100
committer: Neil <nyamatongwe@gmail.com> 2014-12-22 11:52:44 +1100
commit: 743dc19a40f45f312b3851e6f7fa010102c4391c (patch)
tree: 0e49035cd4a15bed695ece7d3d2dd604294d940e
parent: 65c581df8051692502612bb45aad5add08c38cf8 (diff)
download: scintilla-mirror-743dc19a40f45f312b3851e6f7fa010102c4391c.tar.gz
3 files changed, 48 insertions, 15 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index d0909b808..c88f8ba42 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -37,6 +37,7 @@
 #include "Document.h"
 #include "RESearch.h"
 #include "UniConversion.h"
+#include "UnicodeFromUTF8.h"
 
 #ifdef SCI_NAMESPACE
 using namespace Scintilla;
@@ -766,19 +767,6 @@ bool Document::NextCharacter(int &pos, int moveDir) const {
 	}
 }
 
-static inline int UnicodeFromBytes(const unsigned char *us) {
-	if (us[0] < 0xC2) {
-		return us[0];
-	} else if (us[0] < 0xE0) {
-		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
-	} else if (us[0] < 0xF0) {
-		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
-	} else if (us[0] < 0xF5) {
-		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
-	}
-	return us[0];
-}
-
 // Return -1  on out-of-bounds
 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
 	int pos = positionStart;
@@ -819,7 +807,7 @@ int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
 					character =  0xDC80 + leadByte;
 				} else {
 					bytesInCharacter = utf8status & UTF8MaskWidth;
-					character = UnicodeFromBytes(charBytes);
+					character = UnicodeFromUTF8(charBytes);
 				}
 			}
 		} else {
@@ -1610,7 +1598,7 @@ Document::CharacterExtracted Document::ExtractCharacter(int position) const {
 		// Treat as invalid and use up just one byte
 		return CharacterExtracted(unicodeReplacementChar, 1);
 	} else {
-		return CharacterExtracted(UnicodeFromBytes(charBytes), utf8status & UTF8MaskWidth);
+		return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
 	}
 }
 
diff --git a/test/unit/testUnicodeFromUTF8.cxx b/test/unit/testUnicodeFromUTF8.cxx
new file mode 100644
index 000000000..841a9c68c
--- /dev/null
+++ b/test/unit/testUnicodeFromUTF8.cxx
@@ -0,0 +1,44 @@
+// Unit Tests for Scintilla internal data structures
+
+#include <string.h>
+
+#include <algorithm>
+
+#include "Platform.h"
+
+#include "UnicodeFromUTF8.h"
+
+#include "catch.hpp"
+
+// Test UnicodeFromUTF8.
+// Use examples from Wikipedia:
+// http://en.wikipedia.org/wiki/UTF-8
+
+TEST_CASE("UnicodeFromUTF8") {
+
+	SECTION("ASCII") {
+		const unsigned char s[]={'a', 0};
+		REQUIRE(UnicodeFromUTF8(s) == 'a');
+	}
+
+	SECTION("Example1") {
+		const unsigned char s[]={0x24, 0};
+		REQUIRE(UnicodeFromUTF8(s) == 0x24);
+	}
+
+	SECTION("Example2") {
+		const unsigned char s[]={0xC2, 0xA2, 0};
+		REQUIRE(UnicodeFromUTF8(s) == 0xA2);
+	}
+
+	SECTION("Example3") {
+		const unsigned char s[]={0xE2, 0x82, 0xAC, 0};
+		REQUIRE(UnicodeFromUTF8(s) == 0x20AC);
+	}
+
+	SECTION("Example4") {
+		const unsigned char s[]={0xF0, 0x90, 0x8D, 0x88, 0};
+		REQUIRE(UnicodeFromUTF8(s) == 0x10348);
+	}
+
+}
diff --git a/test/unit/unitTest.cxx b/test/unit/unitTest.cxx
index 3aa78a54d..a6feed204 100644
--- a/test/unit/unitTest.cxx
+++ b/test/unit/unitTest.cxx
@@ -10,6 +10,7 @@
         Decoration
         DecorationList
         CellBuffer
+        UnicodeFromUTF8
 
     To do:
         PerLine *
author	Neil <nyamatongwe@gmail.com>	2014-12-22 11:52:44 +1100
committer	Neil <nyamatongwe@gmail.com>	2014-12-22 11:52:44 +1100
commit	743dc19a40f45f312b3851e6f7fa010102c4391c (patch)
tree	0e49035cd4a15bed695ece7d3d2dd604294d940e
parent	65c581df8051692502612bb45aad5add08c38cf8 (diff)
download	scintilla-mirror-743dc19a40f45f312b3851e6f7fa010102c4391c.tar.gz