From 9624ba95f4a739da16592f26ced2885da74f79eb Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Thu, 9 Oct 2008 22:05:43 +0000 Subject: Fix for #2153429 by Jason Oster. UTF-8 text could be chopped up. --- src/LexCSS.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/LexCSS.cxx b/src/LexCSS.cxx index f6757c46f..3b139cdcd 100644 --- a/src/LexCSS.cxx +++ b/src/LexCSS.cxx @@ -28,7 +28,12 @@ using namespace Scintilla; static inline bool IsAWordChar(const unsigned int ch) { - return (isalnum(ch) || ch == '-' || ch == '_' || ch >= 161); // _ is not in fact correct CSS word-character + /* FIXME: + * The CSS spec allows "ISO 10646 characters U+00A1 and higher" to be treated as word chars. + * Unfortunately, we are only getting string bytes here, and not full unicode characters. We cannot guarantee + * that our byte is between U+0080 - U+00A0 (to return false), so we have to allow all characters U+0080 and higher + */ + return ch >= 0x80 || isalnum(ch) || ch == '-' || ch == '_'; } inline bool IsCssOperator(const int ch) { -- cgit v1.2.3