From 4c9ddc3121d0488914858ee511028520b96fd0e9 Mon Sep 17 00:00:00 2001
From: Neil <nyamatongwe@gmail.com>
Date: Tue, 4 Feb 2025 11:47:48 +1100
Subject: Fix segmentation of long lexemes to avoid breaking before modifiers
 like accents that must be drawn with their base letters. This is only a
 subset of implementing grapheme cluster boundaries but it improves behaviour
 with some Asian scripts like Thai and Javanese. Javanese is mostly written
 with (ASCII) Roman characters so issues will be rare but Thai uses Thai
 script. Also slightly improves placement of combining accents in European
 texts. https://github.com/notepad-plus-plus/notepad-plus-plus/issues/14822
 https://github.com/notepad-plus-plus/notepad-plus-plus/issues/16115

---
 src/UniConversion.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/UniConversion.h')

diff --git a/src/UniConversion.h b/src/UniConversion.h
index 7a51b2d08..5990cca8c 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -49,6 +49,10 @@ constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept {
 	return (ch >= 0x80) && (ch < 0xc0);
 }
 
+constexpr bool UTF8IsFirstByte(unsigned char ch) noexcept {
+	return (ch >= 0xc2) && (ch <= 0xf4);
+}
+
 constexpr bool UTF8IsAscii(unsigned char ch) noexcept {
 	return ch < 0x80;
 }
-- 
cgit v1.2.3