aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2014-12-22 11:52:44 +1100
committerNeil <nyamatongwe@gmail.com>2014-12-22 11:52:44 +1100
commit56efd859ce5996c60eea2a0099dcb3dde13af072 (patch)
tree653a09e26797d71d74fd32224c035fb1255d6ad6
parent1a11c0356117fd4e7c5f230b974a9dfd5c8a4dc9 (diff)
downloadscintilla-mirror-56efd859ce5996c60eea2a0099dcb3dde13af072.tar.gz
Replace function UnicodeFromBytes with UnicodeFromUTF8 as they are exactly the
same. Add unit tests for UnicodeFromUTF8.
-rw-r--r--src/Document.cxx18
-rw-r--r--test/unit/testUnicodeFromUTF8.cxx44
-rw-r--r--test/unit/unitTest.cxx1
3 files changed, 48 insertions, 15 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index d0909b808..c88f8ba42 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -37,6 +37,7 @@
#include "Document.h"
#include "RESearch.h"
#include "UniConversion.h"
+#include "UnicodeFromUTF8.h"
#ifdef SCI_NAMESPACE
using namespace Scintilla;
@@ -766,19 +767,6 @@ bool Document::NextCharacter(int &pos, int moveDir) const {
}
}
-static inline int UnicodeFromBytes(const unsigned char *us) {
- if (us[0] < 0xC2) {
- return us[0];
- } else if (us[0] < 0xE0) {
- return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
- } else if (us[0] < 0xF0) {
- return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
- } else if (us[0] < 0xF5) {
- return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
- }
- return us[0];
-}
-
// Return -1 on out-of-bounds
int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
int pos = positionStart;
@@ -819,7 +807,7 @@ int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
character = 0xDC80 + leadByte;
} else {
bytesInCharacter = utf8status & UTF8MaskWidth;
- character = UnicodeFromBytes(charBytes);
+ character = UnicodeFromUTF8(charBytes);
}
}
} else {
@@ -1610,7 +1598,7 @@ Document::CharacterExtracted Document::ExtractCharacter(int position) const {
// Treat as invalid and use up just one byte
return CharacterExtracted(unicodeReplacementChar, 1);
} else {
- return CharacterExtracted(UnicodeFromBytes(charBytes), utf8status & UTF8MaskWidth);
+ return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
}
}
diff --git a/test/unit/testUnicodeFromUTF8.cxx b/test/unit/testUnicodeFromUTF8.cxx
new file mode 100644
index 000000000..841a9c68c
--- /dev/null
+++ b/test/unit/testUnicodeFromUTF8.cxx
@@ -0,0 +1,44 @@
+// Unit Tests for Scintilla internal data structures
+
+#include <string.h>
+
+#include <algorithm>
+
+#include "Platform.h"
+
+#include "UnicodeFromUTF8.h"
+
+#include "catch.hpp"
+
+// Test UnicodeFromUTF8.
+// Use examples from Wikipedia:
+// http://en.wikipedia.org/wiki/UTF-8
+
+TEST_CASE("UnicodeFromUTF8") {
+
+ SECTION("ASCII") {
+ const unsigned char s[]={'a', 0};
+ REQUIRE(UnicodeFromUTF8(s) == 'a');
+ }
+
+ SECTION("Example1") {
+ const unsigned char s[]={0x24, 0};
+ REQUIRE(UnicodeFromUTF8(s) == 0x24);
+ }
+
+ SECTION("Example2") {
+ const unsigned char s[]={0xC2, 0xA2, 0};
+ REQUIRE(UnicodeFromUTF8(s) == 0xA2);
+ }
+
+ SECTION("Example3") {
+ const unsigned char s[]={0xE2, 0x82, 0xAC, 0};
+ REQUIRE(UnicodeFromUTF8(s) == 0x20AC);
+ }
+
+ SECTION("Example4") {
+ const unsigned char s[]={0xF0, 0x90, 0x8D, 0x88, 0};
+ REQUIRE(UnicodeFromUTF8(s) == 0x10348);
+ }
+
+}
diff --git a/test/unit/unitTest.cxx b/test/unit/unitTest.cxx
index 3aa78a54d..a6feed204 100644
--- a/test/unit/unitTest.cxx
+++ b/test/unit/unitTest.cxx
@@ -10,6 +10,7 @@
Decoration
DecorationList
CellBuffer
+ UnicodeFromUTF8
To do:
PerLine *