From 24b9ab288ebb48fb4cd7efc0fd6a409f50964664 Mon Sep 17 00:00:00 2001 From: Neil Date: Tue, 19 May 2020 13:23:04 +1000 Subject: Encapsulate common check for PS, LS, and NEL as UTF8IsMultibyteLineEnd. Avoids construction of temporary array. --- src/UniConversion.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/UniConversion.h') diff --git a/src/UniConversion.h b/src/UniConversion.h index 4aea824b6..bfa247c06 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -75,6 +75,13 @@ inline bool UTF8IsNEL(const unsigned char *us) noexcept { return (us[0] == 0xc2) && (us[1] == 0x85); } +// Is the sequence of 3 char a UTF-8 line end? Only the last two char are tested for a NEL. +constexpr bool UTF8IsMultibyteLineEnd(unsigned char ch0, unsigned char ch1, unsigned char ch2) noexcept { + return + ((ch0 == 0xe2) && (ch1 == 0x80) && ((ch2 == 0xa8) || (ch2 == 0xa9))) || + ((ch1 == 0xc2) && (ch2 == 0x85)); +} + enum { SURROGATE_LEAD_FIRST = 0xD800 }; enum { SURROGATE_LEAD_LAST = 0xDBFF }; enum { SURROGATE_TRAIL_FIRST = 0xDC00 }; -- cgit v1.2.3