1 files changed, 195 insertions, 8 deletions
diff --git a/src/glob.cpp b/src/glob.cpp
index 2806fbf..cb9633f 100644
--- a/src/glob.cpp
+++ b/src/glob.cpp
@@ -61,7 +61,7 @@ Globber::Globber(const gchar *pattern, GFileTest _test)
 	dir = g_dir_open(*dirname ? dirname : ".", 0, NULL);
 	/* if dirname does not exist, dir may be NULL */
 
-	Globber::pattern = g_pattern_spec_new(pattern + dirname_len);
+	Globber::pattern = compile_pattern(pattern + dirname_len);
 }
 
 gchar *
@@ -75,7 +75,7 @@ Globber::next(void)
 	while ((basename = g_dir_read_name(dir))) {
 		gchar *filename;
 
-		if (!g_pattern_match_string(pattern, basename))
+		if (!g_regex_match(pattern, basename, (GRegexMatchFlags)0, NULL))
 			continue;
 
 		/*
@@ -100,12 +100,196 @@ Globber::next(void)
 Globber::~Globber()
 {
 	if (pattern)
-		g_pattern_spec_free(pattern);
+		g_regex_unref(pattern);
 	if (dir)
 		g_dir_close(dir);
 	g_free(dirname);
 }
 
+gchar *
+Globber::escape_pattern(const gchar *pattern)
+{
+	gsize escaped_len = 1;
+	gchar *escaped, *pout;
+
+	/*
+	 * NOTE: The exact size of the escaped string is easy to calculate
+	 * in O(n) just like strlen(pattern), so we can just as well
+	 * do that.
+	 */
+	for (const gchar *pin = pattern; *pin; pin++) {
+		switch (*pin) {
+		case '*':
+		case '?':
+		case '[':
+			escaped_len += 3;
+			break;
+		default:
+			escaped_len++;
+			break;
+		}
+	}
+	pout = escaped = (gchar *)g_malloc(escaped_len);
+
+	while (*pattern) {
+		switch (*pattern) {
+		case '*':
+		case '?':
+		case '[':
+			*pout++ = '[';
+			*pout++ = *pattern;
+			*pout++ = ']';
+			break;
+		default:
+			*pout++ = *pattern;
+			break;
+		}
+
+		pattern++;
+	}
+	*pout = '\0';
+
+	return escaped;
+}
+
+/**
+ * Compile a fnmatch(3)-compatible glob pattern to
+ * a PCRE regular expression.
+ *
+ * There is GPattern, but it only supports the
+ * "*" and "?" wildcards which most importantly
+ * do not allow escaping.
+ *
+ * @param pattern The pattern to compile.
+ * @return A new compiled regular expression object.
+ *         Always non-NULL. Unref after use.
+ */
+GRegex *
+Globber::compile_pattern(const gchar *pattern)
+{
+	gchar *pattern_regex, *pout;
+	GRegex *pattern_compiled;
+
+	enum {
+		STATE_WILDCARD,
+		STATE_CLASS_START,
+		STATE_CLASS_NEGATE,
+		STATE_CLASS
+	} state = STATE_WILDCARD;
+
+	/*
+	 * NOTE: The conversion to regex needs at most two
+	 * characters per input character and the regex pattern
+	 * is required only temporarily, so we use a fixed size
+	 * buffer avoiding reallocations but wasting a few bytes
+	 * (determining the exact required space would be tricky).
+	 * It is not allocated on the stack though since pattern
+	 * might be arbitrary user input and we must avoid
+	 * stack overflows at all costs.
+	 */
+	pout = pattern_regex = (gchar *)g_malloc(strlen(pattern)*2 + 1 + 1);
+
+	while (*pattern) {
+		if (state == STATE_WILDCARD) {
+			/*
+			 * Outside a character class/set.
+			 */
+			switch (*pattern) {
+			case '*':
+				*pout++ = '.';
+				*pout++ = '*';
+				break;
+			case '?':
+				*pout++ = '.';
+				break;
+			case '[':
+				/*
+				 * The special case of an unclosed character
+				 * class is allowed in fnmatch(3) but invalid
+				 * in PCRE, so we must check for it explicitly.
+				 * FIXME: This is sort of inefficient...
+				 */
+				if (strchr(pattern, ']')) {
+					state = STATE_CLASS_START;
+					*pout++ = '[';
+					break;
+				}
+				/* fall through */
+			default:
+				/*
+				 * For simplicity, all non-alphanumeric
+				 * characters are escaped since they could
+				 * be PCRE magic characters.
+				 * g_regex_escape_string() is inefficient.
+				 * character anyway.
+				 */
+				if (!g_ascii_isalnum(*pattern))
+					*pout++ = '\\';
+				*pout++ = *pattern;
+				break;
+			}
+		} else {
+			/*
+			 * Within a character class/set.
+			 */
+			switch (*pattern) {
+			case '!':
+				/*
+				 * fnmatch(3) allows ! instead of ^ immediately
+				 * after the opening bracket.
+				 */
+				if (state > STATE_CLASS_START) {
+					state = STATE_CLASS;
+					*pout++ = '!';
+					break;
+				}
+				/* fall through */
+			case '^':
+				state = state == STATE_CLASS_START
+					? STATE_CLASS_NEGATE : STATE_CLASS;
+				*pout++ = '^';
+				break;
+			case ']':
+				/*
+				 * fnmatch(3) allows the closing bracket as the
+				 * first character to include it in the set, while
+				 * PCRE requires it to be escaped.
+				 */
+				if (state == STATE_CLASS) {
+					state = STATE_WILDCARD;
+					*pout++ = ']';
+					break;
+				}
+				/* fall through */
+			default:
+				if (!g_ascii_isalnum(*pattern))
+					*pout++ = '\\';
+				/* fall through */
+			case '-':
+				state = STATE_CLASS;
+				*pout++ = *pattern;
+				break;
+			}
+		}
+
+		pattern++;
+	}
+	*pout++ = '$';
+	*pout = '\0';
+
+	pattern_compiled = g_regex_new(pattern_regex,
+	                               (GRegexCompileFlags)(G_REGEX_DOTALL | G_REGEX_ANCHORED),
+	                               (GRegexMatchFlags)0, NULL);
+	/*
+	 * Since the regex is generated from patterns that are
+	 * always valid, there must be no syntactic error.
+	 */
+	g_assert(pattern_compiled != NULL);
+
+	g_free(pattern_regex);
+	return pattern_compiled;
+}
+
 /*
  * Command States
  */
@@ -116,10 +300,9 @@ Globber::~Globber()
  *
  * EN is a powerful command for performing various tasks
  * given a glob \fIpattern\fP.
- * A \fIpattern\fP is a file name with \(lq*\(rq and
- * \(lq?\(rq wildcards:
- * \(lq*\(rq matches an arbitrary, possibly empty, string.
- * \(lq?\(rq matches an arbitrary character.
+ * For a description of the glob pattern syntax, refer to the section
+ * .B Glob Patterns
+ * for details.
  *
  * \fIpattern\fP may be omitted, in which case it defaults
  * to the pattern saved in the search and glob register \(lq_\(rq.
@@ -291,7 +474,9 @@ StateGlob_filename::got_file(const gchar *filename)
 		/*
 		 * Match pattern against provided file name
 		 */
-		if (g_pattern_match_simple(pattern_str, filename) &&
+		GRegex *pattern = Globber::compile_pattern(pattern_str);
+
+		if (g_regex_match(pattern, filename, (GRegexMatchFlags)0, NULL) &&
 		    (!teco_test_mode || g_file_test(filename, file_flags))) {
 			if (!colon_modified) {
 				interface.ssm(SCI_BEGINUNDOACTION);
@@ -304,6 +489,8 @@ StateGlob_filename::got_file(const gchar *filename)
 
 			matching = true;
 		}
+
+		g_regex_unref(pattern);
 	} else if (colon_modified) {
 		/*
 		 * Match pattern against directory contents (globbing),