/* * Copyright (C) 2012-2021 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include "sciteco.h" #include "string-utils.h" #include "file-utils.h" #include "interface.h" #include "parser.h" #include "core-commands.h" #include "expressions.h" #include "qreg.h" #include "ring.h" #include "error.h" #include "glob.h" /* * FIXME: This state could be static. */ TECO_DECLARE_STATE(teco_state_glob_filename); /** @memberof teco_globber_t */ void teco_globber_init(teco_globber_t *ctx, const gchar *pattern, GFileTest test) { memset(ctx, 0, sizeof(*ctx)); ctx->test = test; /* * This finds the directory component including * any trailing directory separator * without making up a directory if it is missing * (as g_path_get_dirname() does). * Important since it allows us to construct * file names with the exact same directory * prefix as the input pattern. */ gsize dirname_len = teco_file_get_dirname_len(pattern); ctx->dirname = g_strndup(pattern, dirname_len); ctx->dir = g_dir_open(*ctx->dirname ? ctx->dirname : ".", 0, NULL); /* if dirname does not exist, the result may be NULL */ ctx->pattern = teco_globber_compile_pattern(pattern + dirname_len); } /** @memberof teco_globber_t */ gchar * teco_globber_next(teco_globber_t *ctx) { const gchar *basename; if (!ctx->dir) return NULL; while ((basename = g_dir_read_name(ctx->dir))) { if (!g_regex_match(ctx->pattern, basename, 0, NULL)) continue; /* * As dirname includes the directory separator, * we can simply concatenate dirname with basename. */ gchar *filename = g_strconcat(ctx->dirname, basename, NULL); /* * No need to perform file test for EXISTS since * g_dir_read_name() will only return existing entries */ if (ctx->test == G_FILE_TEST_EXISTS || g_file_test(filename, ctx->test)) return filename; g_free(filename); } return NULL; } /** @memberof teco_globber_t */ void teco_globber_clear(teco_globber_t *ctx) { if (ctx->pattern) g_regex_unref(ctx->pattern); if (ctx->dir) g_dir_close(ctx->dir); g_free(ctx->dirname); } /** @static @memberof teco_globber_t */ gchar * teco_globber_escape_pattern(const gchar *pattern) { gsize escaped_len = 1; gchar *escaped, *pout; /* * NOTE: The exact size of the escaped string is easy to calculate * in O(n) just like strlen(pattern), so we can just as well * do that. */ for (const gchar *pin = pattern; *pin; pin++) { switch (*pin) { case '*': case '?': case '[': escaped_len += 3; break; default: escaped_len++; break; } } pout = escaped = g_malloc(escaped_len); while (*pattern) { switch (*pattern) { case '*': case '?': case '[': *pout++ = '['; *pout++ = *pattern; *pout++ = ']'; break; default: *pout++ = *pattern; break; } pattern++; } *pout = '\0'; return escaped; } /** * Compile a fnmatch(3)-compatible glob pattern to * a PCRE regular expression. * * There is GPattern, but it only supports the * "*" and "?" wildcards which most importantly * do not allow escaping. * * @param pattern The pattern to compile. * @return A new compiled regular expression object. * Always non-NULL. Unref after use. * * @static @memberof teco_globber_t */ GRegex * teco_globber_compile_pattern(const gchar *pattern) { enum { STATE_WILDCARD, STATE_CLASS_START, STATE_CLASS_NEGATE, STATE_CLASS } state = STATE_WILDCARD; /* * NOTE: The conversion to regex needs at most two * characters per input character and the regex pattern * is required only temporarily, so we use a fixed size * buffer avoiding reallocations but wasting a few bytes * (determining the exact required space would be tricky). * It is not allocated on the stack though since pattern * might be arbitrary user input and we must avoid * stack overflows at all costs. */ g_autofree gchar *pattern_regex = g_malloc(strlen(pattern)*2 + 1 + 1); gchar *pout = pattern_regex; while (*pattern) { if (state == STATE_WILDCARD) { /* * Outside a character class/set. */ switch (*pattern) { case '*': *pout++ = '.'; *pout++ = '*'; break; case '?': *pout++ = '.'; break; case '[': /* * The special case of an unclosed character * class is allowed in fnmatch(3) but invalid * in PCRE, so we must check for it explicitly. * FIXME: This is sort of inefficient... */ if (strchr(pattern, ']')) { state = STATE_CLASS_START; *pout++ = '['; break; } /* fall through */ default: /* * For simplicity, all non-alphanumeric * characters are escaped since they could * be PCRE magic characters. * g_regex_escape_string() is inefficient. * character anyway. */ if (!g_ascii_isalnum(*pattern)) *pout++ = '\\'; *pout++ = *pattern; break; } } else { /* * Within a character class/set. */ switch (*pattern) { case '!': /* * fnmatch(3) allows ! instead of ^ immediately * after the opening bracket. */ if (state > STATE_CLASS_START) { state = STATE_CLASS; *pout++ = '!'; break; } /* fall through */ case '^': state = state == STATE_CLASS_START ? STATE_CLASS_NEGATE : STATE_CLASS; *pout++ = '^'; break; case ']': /* * fnmatch(3) allows the closing bracket as the * first character to include it in the set, while * PCRE requires it to be escaped. */ if (state == STATE_CLASS) { state = STATE_WILDCARD; *pout++ = ']'; break; } /* fall through */ default: if (!g_ascii_isalnum(*pattern)) *pout++ = '\\'; /* fall through */ case '-': state = STATE_CLASS; *pout++ = *pattern; break; } } pattern++; } *pout++ = '$'; *pout = '\0'; GRegex *pattern_compiled = g_regex_new(pattern_regex, G_REGEX_DOTALL | G_REGEX_ANCHORED, 0, NULL); /* * Since the regex is generated from patterns that are * always valid, there must be no syntactic error. */ g_assert(pattern_compiled != NULL); return pattern_compiled; } /* * Command States */ static teco_state_t * teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) { if (ctx->mode > TECO_MODE_NORMAL) return &teco_state_glob_filename; if (str->len > 0) { g_autofree gchar *filename = teco_file_expand_path(str->data); teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); if (!glob_reg->vtable->undo_set_string(glob_reg, error) || !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error)) return NULL; } return &teco_state_glob_filename; } /*$ EN glob * [type]EN[pattern]$[filename]$ -- Glob files or match filename and check file type * [type]:EN[pattern]$[filename]$ -> Success|Failure * * EN is a powerful command for performing various tasks * given a glob \fIpattern\fP. * For a description of the glob pattern syntax, refer to the section * .B Glob Patterns * for details. * * \fIpattern\fP may be omitted, in which case it defaults * to the pattern saved in the search and glob register \(lq_\(rq. * If it is specified, it overwrites the contents of the register * \(lq_\(rq with \fIpattern\fP. * This behaviour is similar to the search and replace commands * and allows for repeated gloHTTP/1.1 200 OK Connection: keep-alive Connection: keep-alive Content-Disposition: inline; filename="glob.c" Content-Disposition: inline; filename="glob.c" Content-Length: 16786 Content-Length: 16786 Content-Security-Policy: default-src 'none' Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Type: text/plain; charset=UTF-8 Date: Wed, 22 Oct 2025 01:40:38 UTC ETag: "f6810c2731f8ff54cc11b8378c5cd6f010c7051b" ETag: "f6810c2731f8ff54cc11b8378c5cd6f010c7051b" Expires: Sat, 20 Oct 2035 01:40:38 GMT Expires: Sat, 20 Oct 2035 01:40:38 GMT Last-Modified: Wed, 22 Oct 2025 01:40:38 GMT Last-Modified: Wed, 22 Oct 2025 01:40:38 GMT Server: OpenBSD httpd Server: OpenBSD httpd X-Content-Type-Options: nosniff X-Content-Type-Options: nosniff /* * Copyright (C) 2012-2021 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include "sciteco.h" #include "string-utils.h" #include "file-utils.h" #include "interface.h" #include "parser.h" #include "core-commands.h" #include "expressions.h" #include "qreg.h" #include "ring.h" #include "error.h" #include "glob.h" /* * FIXME: This state could be static. */ TECO_DECLARE_STATE(teco_state_glob_filename); /** @memberof teco_globber_t */ void teco_globber_init(teco_globber_t *ctx, const gchar *pattern, GFileTest test) { memset(ctx, 0, sizeof(*ctx)); ctx->test = test; /* * This finds the directory component including * any trailing directory separator * without making up a directory if it is missing * (as g_path_get_dirname() does). * Important since it allows us to construct * file names with the exact same directory * prefix as the input pattern. */ gsize dirname_len = teco_file_get_dirname_len(pattern); ctx->dirname = g_strndup(pattern, dirname_len); ctx->dir = g_dir_open(*ctx->dirname ? ctx->dirname : ".", 0, NULL); /* if dirname does not exist, the result may be NULL */ ctx->pattern = teco_globber_compile_pattern(pattern + dirname_len); } /** @memberof teco_globber_t */ gchar * teco_globber_next(teco_globber_t *ctx) { const gchar *basename; if (!ctx->dir) return NULL; while ((basename = g_dir_read_name(ctx->dir))) { if (!g_regex_match(ctx->pattern, basename, 0, NULL)) continue; /* * As dirname includes the directory separator, * we can simply concatenate dirname with basename. */ gchar *filename = g_strconcat(ctx->dirname, basename, NULL); /* * No need to perform file test for EXISTS since * g_dir_read_name() will only return existing entries */ if (ctx->test == G_FILE_TEST_EXISTS || g_file_test(filename, ctx->test)) return filename; g_free(filename); } return NULL; } /** @memberof teco_globber_t */ void teco_globber_clear(teco_globber_t *ctx) { if (ctx->pattern) g_regex_unref(ctx->pattern); if (ctx->dir) g_dir_close(ctx->dir); g_free(ctx->dirname); } /** @static @memberof teco_globber_t */ gchar * teco_globber_escape_pattern(const gchar *pattern) { gsize escaped_len = 1; gchar *escaped, *pout; /* * NOTE: The exact size of the escaped string is easy to calculate * in O(n) just like strlen(pattern), so we can just as well * do that. */ for (const gchar *pin = pattern; *pin; pin++) { switch (*pin) { case '*': case '?': case '[': escaped_len += 3; break; default: escaped_len++; break; } } pout = escaped = g_malloc(escaped_len); while (*pattern) { switch (*pattern) { case '*': case '?': case '[': *pout++ = '['; *pout++ = *pattern; *pout++ = ']'; break; default: *pout++ = *pattern; break; } pattern++; } *pout = '\0'; return escaped; } /** * Compile a fnmatch(3)-compatible glob pattern to * a PCRE regular expression. * * There is GPattern, but it only supports the * "*" and "?" wildcards which most importantly * do not allow escaping. * * @param pattern The pattern to compile. * @return A new compiled regular expression object. * Always non-NULL. Unref after use. * * @static @memberof teco_globber_t */ GRegex * teco_globber_compile_pattern(const gchar *pattern) { enum { STATE_WILDCARD, STATE_CLASS_START, STATE_CLASS_NEGATE, STATE_CLASS } state = STATE_WILDCARD; /* * NOTE: The conversion to regex needs at most two * characters per input character and the regex pattern * is required only temporarily, so we use a fixed size * buffer avoiding reallocations but wasting a few bytes * (determining the exact required space would be tricky). * It is not allocated on the stack though since pattern * might be arbitrary user input and we must avoid * stack overflows at all costs. */ g_autofree gchar *pattern_regex = g_malloc(strlen(pattern)*2 + 1 + 1); gchar *pout = pattern_regex; while (*pattern) { if (state == STATE_WILDCARD) { /* * Outside a character class/set. */ switch (*pattern) { case '*': *pout++ = '.'; *pout++ = '*'; break; case '?': *pout++ = '.'; break; case '[': /* * The special case of an unclosed character * class is allowed in fnmatch(3) but invalid * in PCRE, so we must check for it explicitly. * FIXME: This is sort of inefficient... */ if (strchr(pattern, ']')) { state = STATE_CLASS_START; *pout++ = '['; break; } /* fall through */ default: /* * For simplicity, all non-alphanumeric * characters are escaped since they could * be PCRE magic characters. * g_regex_escape_string() is inefficient. * character anyway. */ if (!g_ascii_isalnum(*pattern)) *pout++ = '\\'; *pout++ = *pattern; break; } } else { /* * Within a character class/set. */ switch (*pattern) { case '!': /* * fnmatch(3) allows ! instead of ^ immediately * after the opening bracket. */ if (state > STATE_CLASS_START) { state = STATE_CLASS; *pout++ = '!'; break; } /* fall through */ case '^': state = state == STATE_CLASS_START ? STATE_CLASS_NEGATE : STATE_CLASS; *pout++ = '^'; break; case ']': /* * fnmatch(3) allows the closing bracket as the * first character to include it in the set, while * PCRE requires it to be escaped. */ if (state == STATE_CLASS) { state = STATE_WILDCARD; *pout++ = ']'; break; } /* fall through */ default: if (!g_ascii_isalnum(*pattern)) *pout++ = '\\'; /* fall through */ case '-': state = STATE_CLASS; *pout++ = *pattern; break; } } pattern++; } *pout++ = '$'; *pout = '\0'; GRegex *pattern_compiled = g_regex_new(pattern_regex, G_REGEX_DOTALL | G_REGEX_ANCHORED, 0, NULL); /* * Since the regex is generated from patterns that are * always valid, there must be no syntactic error. */ g_assert(pattern_compiled != NULL); return pattern_compiled; } /* * Command States */ static teco_state_t * teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) { if (ctx->mode > TECO_MODE_NORMAL) return &teco_state_glob_filename; if (str->len > 0) { g_autofree gchar *filename = teco_file_expand_path(str->data); teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); if (!glob_reg->vtable->undo_set_string(glob_reg, error) || !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error)) return NULL; } return &teco_state_glob_filename; } /*$ EN glob * [type]EN[pattern]$[filename]$ -- Glob files or match filename and check file type * [type]:EN[pattern]$[filename]$ -> Success|Failure * * EN is a powerful command for performing various tasks * given a glob \fIpattern\fP. * For a description of the