From 4dadac8a15b5fa17679db9ef64b437919399f226 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Mon, 2 Sep 2024 23:09:53 +0200 Subject: glob patterns fully support Unicode now (refs #5) * The ASCII compiler would try to escape ("\") all bytes of a multibyte UTF-8 glyph. * The new implementation escapes only metacharacters and passes down all non-ANSI glyphs unchanged. On the downside, this will work only with PCREs. --- src/glob.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/glob.c b/src/glob.c index a4d12d3..050ec4d 100644 --- a/src/glob.c +++ b/src/glob.c @@ -225,17 +225,19 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = '['; break; } + /* fall through: escape PCRE metacharacters */ + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '+': + case '{': + *pout++ = '\\'; /* fall through */ default: - /* - * For simplicity, all non-alphanumeric - * characters are escaped since they could - * be PCRE magic characters. - * g_regex_escape_string() is inefficient. - * character anyway. - */ - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; *pout++ = *pattern; break; } @@ -271,12 +273,13 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = ']'; break; } - /* fall through */ - default: - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; + /* fall through: escape PCRE metacharacters */ + case '\\': + case '[': + *pout++ = '\\'; /* fall through */ case '-': + default: state = STATE_CLASS; *pout++ = *pattern; break; -- cgit v1.2.3