aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/UniConversion.h
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
committerNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
commit56e20ea0283d8018dee48d736ba9dfef3c84dc3f (patch)
tree21bdb500dfc092fadecb123b87e9799a2c46f6a9 /src/UniConversion.h
parentd27cbe587930d13d3f1802b271d0d13e7e3c6e38 (diff)
downloadscintilla-mirror-56e20ea0283d8018dee48d736ba9dfef3c84dc3f.tar.gz
Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added.
Diffstat (limited to 'src/UniConversion.h')
-rw-r--r--src/UniConversion.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 6d257cd8e..c676230da 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -22,6 +22,7 @@ size_t UTF16Length(std::string_view sv);
size_t UTF16FromUTF8(std::string_view sv, wchar_t *tbuf, size_t tlen);
size_t UTF32FromUTF8(std::string_view sv, unsigned int *tbuf, size_t tlen);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept;
+bool UTF8IsValid(std::string_view sv) noexcept;
std::string FixInvalidUTF8(const std::string &text);
extern const unsigned char UTF8BytesOfLead[256];
@@ -49,6 +50,9 @@ inline constexpr bool UTF8IsAscii(int ch) noexcept {
enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
int UTF8Classify(const unsigned char *us, size_t len) noexcept;
+inline int UTF8Classify(std::string_view sv) noexcept {
+ return UTF8Classify(reinterpret_cast<const unsigned char *>(sv.data()), sv.length());
+}
// Similar to UTF8Classify but returns a length of 1 for invalid bytes
// instead of setting the invalid flag