aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-02 23:09:53 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-09 18:22:21 +0200
commit4dadac8a15b5fa17679db9ef64b437919399f226 (patch)
tree5e16b1de86b612802872fc45bfa0adbf0dc19b29 /src
parent33124e3d469d028f367b5fcd1f1a7197754f8f09 (diff)
downloadsciteco-4dadac8a15b5fa17679db9ef64b437919399f226.tar.gz
glob patterns fully support Unicode now (refs #5)
* The ASCII compiler would try to escape ("\") all bytes of a multibyte UTF-8 glyph. * The new implementation escapes only metacharacters and passes down all non-ANSI glyphs unchanged. On the downside, this will work only with PCREs.
Diffstat (limited to 'src')
-rw-r--r--src/glob.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/src/glob.c b/src/glob.c
index a4d12d3..050ec4d 100644
--- a/src/glob.c
+++ b/src/glob.c
@@ -225,17 +225,19 @@ teco_globber_compile_pattern(const gchar *pattern)
*pout++ = '[';
break;
}
+ /* fall through: escape PCRE metacharacters */
+ case '\\':
+ case '^':
+ case '$':
+ case '.':
+ case '|':
+ case '(':
+ case ')':
+ case '+':
+ case '{':
+ *pout++ = '\\';
/* fall through */
default:
- /*
- * For simplicity, all non-alphanumeric
- * characters are escaped since they could
- * be PCRE magic characters.
- * g_regex_escape_string() is inefficient.
- * character anyway.
- */
- if (!g_ascii_isalnum(*pattern))
- *pout++ = '\\';
*pout++ = *pattern;
break;
}
@@ -271,12 +273,13 @@ teco_globber_compile_pattern(const gchar *pattern)
*pout++ = ']';
break;
}
- /* fall through */
- default:
- if (!g_ascii_isalnum(*pattern))
- *pout++ = '\\';
+ /* fall through: escape PCRE metacharacters */
+ case '\\':
+ case '[':
+ *pout++ = '\\';
/* fall through */
case '-':
+ default:
state = STATE_CLASS;
*pout++ = *pattern;
break;