From 0bb4d5456748c8794a943b4716ee089d0590519c Mon Sep 17 00:00:00 2001
From: Zufu Liu <unknown>
Date: Sat, 24 Mar 2018 13:53:22 +1100
Subject: Feature [feature-requests:#1212]. Move Unicode conversions into
 UniConversion. Move Unicode conversion functions UnicodeFromUTF8 and
 UTF8FromUTF32Character into UniConversion.

---
 src/CaseConvert.cxx   | 21 ---------------------
 src/Document.cxx      |  1 -
 src/UniConversion.cxx | 22 +++++++++++++++++++++-
 src/UniConversion.h   | 16 +++++++++++++++-
 src/UnicodeFromUTF8.h | 28 ----------------------------
 5 files changed, 36 insertions(+), 52 deletions(-)
 delete mode 100644 src/UnicodeFromUTF8.h

(limited to 'src')
diff --git a/src/CaseConvert.cxx b/src/CaseConvert.cxx
index 76bc0c652..49205cb3e 100644
--- a/src/CaseConvert.cxx
+++ b/src/CaseConvert.cxx
@@ -18,7 +18,6 @@
 #include "StringCopy.h"
 #include "CaseConvert.h"
 #include "UniConversion.h"
-#include "UnicodeFromUTF8.h"
 
 using namespace Scintilla;
 
@@ -665,26 +664,6 @@ CaseConverter caseConvFold;
 CaseConverter caseConvUp;
 CaseConverter caseConvLow;
 
-void UTF8FromUTF32Character(int uch, char *putf) {
-	size_t k = 0;
-	if (uch < 0x80) {
-		putf[k++] = static_cast<char>(uch);
-	} else if (uch < 0x800) {
-		putf[k++] = static_cast<char>(0xC0 | (uch >> 6));
-		putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
-	} else if (uch < 0x10000) {
-		putf[k++] = static_cast<char>(0xE0 | (uch >> 12));
-		putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
-		putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
-	} else {
-		putf[k++] = static_cast<char>(0xF0 | (uch >> 18));
-		putf[k++] = static_cast<char>(0x80 | ((uch >> 12) & 0x3f));
-		putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
-		putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
-	}
-	putf[k] = 0;
-}
-
 void AddSymmetric(enum CaseConversion conversion, int lower,int upper) {
 	char lowerUTF8[UTF8MaxBytes+1];
 	UTF8FromUTF32Character(lower, lowerUTF8);
diff --git a/src/Document.cxx b/src/Document.cxx
index 48913a16c..cb2892c96 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -51,7 +51,6 @@
 #include "Document.h"
 #include "RESearch.h"
 #include "UniConversion.h"
-#include "UnicodeFromUTF8.h"
 
 using namespace Scintilla;
 
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 19b968932..de86b0b76 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -65,6 +65,26 @@ void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len) {
 		putf[k] = '\0';
 }
 
+void UTF8FromUTF32Character(int uch, char *putf) {
+	size_t k = 0;
+	if (uch < 0x80) {
+		putf[k++] = static_cast<char>(uch);
+	} else if (uch < 0x800) {
+		putf[k++] = static_cast<char>(0xC0 | (uch >> 6));
+		putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
+	} else if (uch < 0x10000) {
+		putf[k++] = static_cast<char>(0xE0 | (uch >> 12));
+		putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
+		putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
+	} else {
+		putf[k++] = static_cast<char>(0xF0 | (uch >> 18));
+		putf[k++] = static_cast<char>(0x80 | ((uch >> 12) & 0x3f));
+		putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
+		putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
+	}
+	putf[k] = '\0';
+}
+
 size_t UTF16Length(const char *s, size_t len) {
 	size_t ulen = 0;
 	const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
@@ -101,7 +121,7 @@ size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) {
 			break;
 		}
 
-		const size_t outLen = (byteCount < 4) ? 1 : 2;
+		const size_t outLen = UTF16LengthFromUTF8ByteCount(byteCount);
 		if (ui + outLen > tlen) {
 			throw std::runtime_error("UTF16FromUTF8: attempted write beyond end");
 		}
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 0f22c06e6..98bcd0329 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -16,6 +16,7 @@ const int unicodeReplacementChar = 0xFFFD;
 
 size_t UTF8Length(const wchar_t *uptr, size_t tlen);
 void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len);
+void UTF8FromUTF32Character(int uch, char *putf);
 size_t UTF16Length(const char *s, size_t len);
 size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
 size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen);
@@ -24,6 +25,19 @@ std::string FixInvalidUTF8(const std::string &text);
 
 extern const unsigned char UTF8BytesOfLead[256];
 
+inline int UnicodeFromUTF8(const unsigned char *us) {
+	switch (UTF8BytesOfLead[us[0]]) {
+	case 1:
+		return us[0];
+	case 2:
+		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
+	case 3:
+		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
+	default:
+		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
+	}
+}
+
 inline bool UTF8IsTrailByte(unsigned char ch) {
 	return (ch >= 0x80) && (ch < 0xc0);
 }
@@ -63,7 +77,7 @@ inline unsigned int UTF16CharLength(wchar_t uch) {
 }
 
 inline unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) {
-    return (byteCount < 4) ? 1 : 2;
+	return (byteCount < 4) ? 1 : 2;
 }
 
 }
diff --git a/src/UnicodeFromUTF8.h b/src/UnicodeFromUTF8.h
deleted file mode 100644
index 17999a786..000000000
--- a/src/UnicodeFromUTF8.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Scintilla source code edit control
-/** @file UnicodeFromUTF8.h
- ** Lexer infrastructure.
- **/
-// Copyright 2013 by Neil Hodgson <neilh@scintilla.org>
-// This file is in the public domain.
-
-#ifndef UNICODEFROMUTF8_H
-#define UNICODEFROMUTF8_H
-
-namespace Scintilla {
-
-inline int UnicodeFromUTF8(const unsigned char *us) {
-	if (us[0] < 0xC2) {
-		return us[0];
-	} else if (us[0] < 0xE0) {
-		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
-	} else if (us[0] < 0xF0) {
-		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
-	} else if (us[0] < 0xF5) {
-		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
-	}
-	return us[0];
-}
-
-}
-
-#endif
-- 
cgit v1.2.3