aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CaseConvert.cxx21
-rw-r--r--src/Document.cxx1
-rw-r--r--src/UniConversion.cxx22
-rw-r--r--src/UniConversion.h16
-rw-r--r--src/UnicodeFromUTF8.h28
5 files changed, 36 insertions, 52 deletions
diff --git a/src/CaseConvert.cxx b/src/CaseConvert.cxx
index 76bc0c652..49205cb3e 100644
--- a/src/CaseConvert.cxx
+++ b/src/CaseConvert.cxx
@@ -18,7 +18,6 @@
#include "StringCopy.h"
#include "CaseConvert.h"
#include "UniConversion.h"
-#include "UnicodeFromUTF8.h"
using namespace Scintilla;
@@ -665,26 +664,6 @@ CaseConverter caseConvFold;
CaseConverter caseConvUp;
CaseConverter caseConvLow;
-void UTF8FromUTF32Character(int uch, char *putf) {
- size_t k = 0;
- if (uch < 0x80) {
- putf[k++] = static_cast<char>(uch);
- } else if (uch < 0x800) {
- putf[k++] = static_cast<char>(0xC0 | (uch >> 6));
- putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
- } else if (uch < 0x10000) {
- putf[k++] = static_cast<char>(0xE0 | (uch >> 12));
- putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
- putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
- } else {
- putf[k++] = static_cast<char>(0xF0 | (uch >> 18));
- putf[k++] = static_cast<char>(0x80 | ((uch >> 12) & 0x3f));
- putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
- putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
- }
- putf[k] = 0;
-}
-
void AddSymmetric(enum CaseConversion conversion, int lower,int upper) {
char lowerUTF8[UTF8MaxBytes+1];
UTF8FromUTF32Character(lower, lowerUTF8);
diff --git a/src/Document.cxx b/src/Document.cxx
index 48913a16c..cb2892c96 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -51,7 +51,6 @@
#include "Document.h"
#include "RESearch.h"
#include "UniConversion.h"
-#include "UnicodeFromUTF8.h"
using namespace Scintilla;
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 19b968932..de86b0b76 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -65,6 +65,26 @@ void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len) {
putf[k] = '\0';
}
+void UTF8FromUTF32Character(int uch, char *putf) {
+ size_t k = 0;
+ if (uch < 0x80) {
+ putf[k++] = static_cast<char>(uch);
+ } else if (uch < 0x800) {
+ putf[k++] = static_cast<char>(0xC0 | (uch >> 6));
+ putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
+ } else if (uch < 0x10000) {
+ putf[k++] = static_cast<char>(0xE0 | (uch >> 12));
+ putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
+ putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
+ } else {
+ putf[k++] = static_cast<char>(0xF0 | (uch >> 18));
+ putf[k++] = static_cast<char>(0x80 | ((uch >> 12) & 0x3f));
+ putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
+ putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
+ }
+ putf[k] = '\0';
+}
+
size_t UTF16Length(const char *s, size_t len) {
size_t ulen = 0;
const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
@@ -101,7 +121,7 @@ size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) {
break;
}
- const size_t outLen = (byteCount < 4) ? 1 : 2;
+ const size_t outLen = UTF16LengthFromUTF8ByteCount(byteCount);
if (ui + outLen > tlen) {
throw std::runtime_error("UTF16FromUTF8: attempted write beyond end");
}
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 0f22c06e6..98bcd0329 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -16,6 +16,7 @@ const int unicodeReplacementChar = 0xFFFD;
size_t UTF8Length(const wchar_t *uptr, size_t tlen);
void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len);
+void UTF8FromUTF32Character(int uch, char *putf);
size_t UTF16Length(const char *s, size_t len);
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen);
@@ -24,6 +25,19 @@ std::string FixInvalidUTF8(const std::string &text);
extern const unsigned char UTF8BytesOfLead[256];
+inline int UnicodeFromUTF8(const unsigned char *us) {
+ switch (UTF8BytesOfLead[us[0]]) {
+ case 1:
+ return us[0];
+ case 2:
+ return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
+ case 3:
+ return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
+ default:
+ return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
+ }
+}
+
inline bool UTF8IsTrailByte(unsigned char ch) {
return (ch >= 0x80) && (ch < 0xc0);
}
@@ -63,7 +77,7 @@ inline unsigned int UTF16CharLength(wchar_t uch) {
}
inline unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) {
- return (byteCount < 4) ? 1 : 2;
+ return (byteCount < 4) ? 1 : 2;
}
}
diff --git a/src/UnicodeFromUTF8.h b/src/UnicodeFromUTF8.h
deleted file mode 100644
index 17999a786..000000000
--- a/src/UnicodeFromUTF8.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Scintilla source code edit control
-/** @file UnicodeFromUTF8.h
- ** Lexer infrastructure.
- **/
-// Copyright 2013 by Neil Hodgson <neilh@scintilla.org>
-// This file is in the public domain.
-
-#ifndef UNICODEFROMUTF8_H
-#define UNICODEFROMUTF8_H
-
-namespace Scintilla {
-
-inline int UnicodeFromUTF8(const unsigned char *us) {
- if (us[0] < 0xC2) {
- return us[0];
- } else if (us[0] < 0xE0) {
- return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
- } else if (us[0] < 0xF0) {
- return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
- } else if (us[0] < 0xF5) {
- return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
- }
- return us[0];
-}
-
-}
-
-#endif