diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-09-02 23:09:53 +0200 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-09-09 18:22:21 +0200 |
commit | 4dadac8a15b5fa17679db9ef64b437919399f226 (patch) | |
tree | 5e16b1de86b612802872fc45bfa0adbf0dc19b29 /src/glob.c | |
parent | 33124e3d469d028f367b5fcd1f1a7197754f8f09 (diff) | |
download | sciteco-4dadac8a15b5fa17679db9ef64b437919399f226.tar.gz |
glob patterns fully support Unicode now (refs #5)
* The ASCII compiler would try to escape ("\") all bytes of a multibyte
UTF-8 glyph.
* The new implementation escapes only metacharacters and passes down
all non-ANSI glyphs unchanged.
On the downside, this will work only with PCREs.
Diffstat (limited to 'src/glob.c')
-rw-r--r-- | src/glob.c | 29 |
1 files changed, 16 insertions, 13 deletions
@@ -225,17 +225,19 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = '['; break; } + /* fall through: escape PCRE metacharacters */ + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '+': + case '{': + *pout++ = '\\'; /* fall through */ default: - /* - * For simplicity, all non-alphanumeric - * characters are escaped since they could - * be PCRE magic characters. - * g_regex_escape_string() is inefficient. - * character anyway. - */ - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; *pout++ = *pattern; break; } @@ -271,12 +273,13 @@ teco_globber_compile_pattern(const gchar *pattern) *pout++ = ']'; break; } - /* fall through */ - default: - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; + /* fall through: escape PCRE metacharacters */ + case '\\': + case '[': + *pout++ = '\\'; /* fall through */ case '-': + default: state = STATE_CLASS; *pout++ = *pattern; break; |