diff options
Diffstat (limited to 'src/UniConversion.h')
-rw-r--r-- | src/UniConversion.h | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/src/UniConversion.h b/src/UniConversion.h index 704f16239..70e8a9517 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -26,3 +26,16 @@ inline bool UTF8IsAscii(int ch) { enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 }; int UTF8Classify(const unsigned char *us, int len); + +// Line separator is U+2028 \xe2\x80\xa8 +// Paragraph separator is U+2029 \xe2\x80\xa9 +const int UTF8SeparatorLength = 3; +inline bool UTF8IsSeparator(const unsigned char *us) { + return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) || (us[2] == 0xa9)); +} + +// NEL is U+0085 \xc2\x85 +const int UTF8NELLength = 2; +inline bool UTF8IsNEL(const unsigned char *us) { + return (us[0] == 0xc2) && (us[1] == 0x85); +} |