aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/UniConversion.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/UniConversion.h')
-rw-r--r--src/UniConversion.h13
1 files changed, 13 insertions, 0 deletions
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 704f16239..70e8a9517 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -26,3 +26,16 @@ inline bool UTF8IsAscii(int ch) {
enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
int UTF8Classify(const unsigned char *us, int len);
+
+// Line separator is U+2028 \xe2\x80\xa8
+// Paragraph separator is U+2029 \xe2\x80\xa9
+const int UTF8SeparatorLength = 3;
+inline bool UTF8IsSeparator(const unsigned char *us) {
+ return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) || (us[2] == 0xa9));
+}
+
+// NEL is U+0085 \xc2\x85
+const int UTF8NELLength = 2;
+inline bool UTF8IsNEL(const unsigned char *us) {
+ return (us[0] == 0xc2) && (us[1] == 0x85);
+}