From 92e04b39ccd38939d59b17fbf9d7764cba068412 Mon Sep 17 00:00:00 2001 From: Neil Date: Tue, 13 Jul 2021 16:13:14 +1000 Subject: Restrict UTF8IsAscii to char and unsigned char to avoid failures when (signed) char passed. --- src/Document.cxx | 2 +- src/UniConversion.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Document.cxx b/src/Document.cxx index 4fdfe4e34..9b2e3848c 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -1707,7 +1707,7 @@ Sci::Position Document::ParaDown(Sci::Position pos) const { } CharacterClass Document::WordCharacterClass(unsigned int ch) const { - if (dbcsCodePage && (!UTF8IsAscii(ch))) { + if (dbcsCodePage && (ch >= 0x80)) { if (CpUtf8 == dbcsCodePage) { // Use hard coded Unicode class const CharacterCategory cc = charMap.CategoryFor(ch); diff --git a/src/UniConversion.h b/src/UniConversion.h index 73426beed..ead7aae57 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -48,10 +48,15 @@ inline constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept { return (ch >= 0x80) && (ch < 0xc0); } -inline constexpr bool UTF8IsAscii(int ch) noexcept { +inline constexpr bool UTF8IsAscii(unsigned char ch) noexcept { return ch < 0x80; } +inline constexpr bool UTF8IsAscii(char ch) noexcept { + const unsigned char uch = ch; + return uch < 0x80; +} + enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 }; int UTF8Classify(const unsigned char *us, size_t len) noexcept; inline int UTF8Classify(std::string_view sv) noexcept { -- cgit v1.2.3