/*
* Copyright (C) 2012-2021 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include
#include
#include
#include
#include "sciteco.h"
#include "string-utils.h"
#include "file-utils.h"
#include "interface.h"
#include "parser.h"
#include "core-commands.h"
#include "expressions.h"
#include "qreg.h"
#include "ring.h"
#include "error.h"
#include "glob.h"
/*
* FIXME: This state could be static.
*/
TECO_DECLARE_STATE(teco_state_glob_filename);
/** @memberof teco_globber_t */
void
teco_globber_init(teco_globber_t *ctx, const gchar *pattern, GFileTest test)
{
memset(ctx, 0, sizeof(*ctx));
ctx->test = test;
/*
* This finds the directory component including
* any trailing directory separator
* without making up a directory if it is missing
* (as g_path_get_dirname() does).
* Important since it allows us to construct
* file names with the exact same directory
* prefix as the input pattern.
*/
gsize dirname_len = teco_file_get_dirname_len(pattern);
ctx->dirname = g_strndup(pattern, dirname_len);
ctx->dir = g_dir_open(*ctx->dirname ? ctx->dirname : ".", 0, NULL);
/* if dirname does not exist, the result may be NULL */
ctx->pattern = teco_globber_compile_pattern(pattern + dirname_len);
}
/** @memberof teco_globber_t */
gchar *
teco_globber_next(teco_globber_t *ctx)
{
const gchar *basename;
if (!ctx->dir)
return NULL;
while ((basename = g_dir_read_name(ctx->dir))) {
if (!g_regex_match(ctx->pattern, basename, 0, NULL))
continue;
/*
* As dirname includes the directory separator,
* we can simply concatenate dirname with basename.
*/
gchar *filename = g_strconcat(ctx->dirname, basename, NULL);
/*
* No need to perform file test for EXISTS since
* g_dir_read_name() will only return existing entries
*/
if (ctx->test == G_FILE_TEST_EXISTS || g_file_test(filename, ctx->test))
return filename;
g_free(filename);
}
return NULL;
}
/** @memberof teco_globber_t */
void
teco_globber_clear(teco_globber_t *ctx)
{
if (ctx->pattern)
g_regex_unref(ctx->pattern);
if (ctx->dir)
g_dir_close(ctx->dir);
g_free(ctx->dirname);
}
/** @static @memberof teco_globber_t */
gchar *
teco_globber_escape_pattern(const gchar *pattern)
{
gsize escaped_len = 1;
gchar *escaped, *pout;
/*
* NOTE: The exact size of the escaped string is easy to calculate
* in O(n) just like strlen(pattern), so we can just as well
* do that.
*/
for (const gchar *pin = pattern; *pin; pin++) {
switch (*pin) {
case '*':
case '?':
case '[':
escaped_len += 3;
break;
default:
escaped_len++;
break;
}
}
pout = escaped = g_malloc(escaped_len);
while (*pattern) {
switch (*pattern) {
case '*':
case '?':
case '[':
*pout++ = '[';
*pout++ = *pattern;
*pout++ = ']';
break;
default:
*pout++ = *pattern;
break;
}
pattern++;
}
*pout = '\0';
return escaped;
}
/**
* Compile a fnmatch(3)-compatible glob pattern to
* a PCRE regular expression.
*
* There is GPattern, but it only supports the
* "*" and "?" wildcards which most importantly
* do not allow escaping.
*
* @param pattern The pattern to compile.
* @return A new compiled regular expression object.
* Always non-NULL. Unref after use.
*
* @static @memberof teco_globber_t
*/
GRegex *
teco_globber_compile_pattern(const gchar *pattern)
{
enum {
STATE_WILDCARD,
STATE_CLASS_START,
STATE_CLASS_NEGATE,
STATE_CLASS
} state = STATE_WILDCARD;
/*
* NOTE: The conversion to regex needs at most two
* characters per input character and the regex pattern
* is required only temporarily, so we use a fixed size
* buffer avoiding reallocations but wasting a few bytes
* (determining the exact required space would be tricky).
* It is not allocated on the stack though since pattern
* might be arbitrary user input and we must avoid
* stack overflows at all costs.
*/
g_autofree gchar *pattern_regex = g_malloc(strlen(pattern)*2 + 1 + 1);
gchar *pout = pattern_regex;
while (*pattern) {
if (state == STATE_WILDCARD) {
/*
* Outside a character class/set.
*/
switch (*pattern) {
case '*':
*pout++ = '.';
*pout++ = '*';
break;
case '?':
*pout++ = '.';
break;
case '[':
/*
* The special case of an unclosed character
* class is allowed in fnmatch(3) but invalid
* in PCRE, so we must check for it explicitly.
* FIXME: This is sort of inefficient...
*/
if (strchr(pattern, ']')) {
state = STATE_CLASS_START;
*pout++ = '[';
break;
}
/* fall through */
default:
/*
* For simplicity, all non-alphanumeric
* characters are escaped since they could
* be PCRE magic characters.
* g_regex_escape_string() is inefficient.
* character anyway.
*/
if (!g_ascii_isalnum(*pattern))
*pout++ = '\\';
*pout++ = *pattern;
break;
}
} else {
/*
* Within a character class/set.
*/
switch (*pattern) {
case '!':
/*
* fnmatch(3) allows ! instead of ^ immediately
* after the opening bracket.
*/
if (state > STATE_CLASS_START) {
state = STATE_CLASS;
*pout++ = '!';
break;
}
/* fall through */
case '^':
state = state == STATE_CLASS_START
? STATE_CLASS_NEGATE : STATE_CLASS;
*pout++ = '^';
break;
case ']':
/*
* fnmatch(3) allows the closing bracket as the
* first character to include it in the set, while
* PCRE requires it to be escaped.
*/
if (state == STATE_CLASS) {
state = STATE_WILDCARD;
*pout++ = ']';
break;
}
/* fall through */
default:
if (!g_ascii_isalnum(*pattern))
*pout++ = '\\';
/* fall through */
case '-':
state = STATE_CLASS;
*pout++ = *pattern;
break;
}
}
pattern++;
}
*pout++ = '$';
*pout = '\0';
GRegex *pattern_compiled = g_regex_new(pattern_regex,
G_REGEX_DOTALL | G_REGEX_ANCHORED, 0, NULL);
/*
* Since the regex is generated from patterns that are
* always valid, there must be no syntactic error.
*/
g_assert(pattern_compiled != NULL);
return pattern_compiled;
}
/*
* Command States
*/
static teco_state_t *
teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error)
{
if (ctx->mode > TECO_MODE_NORMAL)
return &teco_state_glob_filename;
if (str->len > 0) {
g_autofree gchar *filename = teco_file_expand_path(str->data);
teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1);
g_assert(glob_reg != NULL);
if (!glob_reg->vtable->undo_set_string(glob_reg, error) ||
!glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error))
return NULL;
}
return &teco_state_glob_filename;
}
/*$ EN glob
* [type]EN[pattern]$[filename]$ -- Glob files or match filename and check file type
* [type]:EN[pattern]$[filename]$ -> Success|Failure
*
* EN is a powerful command for performing various tasks
* given a glob \fIpattern\fP.
* For a description of the glob pattern syntax, refer to the section
* .B Glob Patterns
* for details.
*
* \fIpattern\fP may be omitted, in which case it defaults
* to the pattern saved in the search and glob register \(lq_\(rq.
* If it is specified, it overwrites the contents of the register
* \(lq_\(rq with \fIpattern\fP.
* This behaviour is similar to the search and replace commands
* and allows for repeated gloHTTP/1.1 200 OK
Connection: keep-alive
Connection: keep-alive
Content-Disposition: inline; filename="glob.c"
Content-Disposition: inline; filename="glob.c"
Content-Length: 16786
Content-Length: 16786
Content-Security-Policy: default-src 'none'
Content-Security-Policy: default-src 'none'
Content-Type: text/plain; charset=UTF-8
Content-Type: text/plain; charset=UTF-8
Date: Wed, 22 Oct 2025 01:40:38 UTC
ETag: "f6810c2731f8ff54cc11b8378c5cd6f010c7051b"
ETag: "f6810c2731f8ff54cc11b8378c5cd6f010c7051b"
Expires: Sat, 20 Oct 2035 01:40:38 GMT
Expires: Sat, 20 Oct 2035 01:40:38 GMT
Last-Modified: Wed, 22 Oct 2025 01:40:38 GMT
Last-Modified: Wed, 22 Oct 2025 01:40:38 GMT
Server: OpenBSD httpd
Server: OpenBSD httpd
X-Content-Type-Options: nosniff
X-Content-Type-Options: nosniff
/*
* Copyright (C) 2012-2021 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include
#include
#include
#include
#include "sciteco.h"
#include "string-utils.h"
#include "file-utils.h"
#include "interface.h"
#include "parser.h"
#include "core-commands.h"
#include "expressions.h"
#include "qreg.h"
#include "ring.h"
#include "error.h"
#include "glob.h"
/*
* FIXME: This state could be static.
*/
TECO_DECLARE_STATE(teco_state_glob_filename);
/** @memberof teco_globber_t */
void
teco_globber_init(teco_globber_t *ctx, const gchar *pattern, GFileTest test)
{
memset(ctx, 0, sizeof(*ctx));
ctx->test = test;
/*
* This finds the directory component including
* any trailing directory separator
* without making up a directory if it is missing
* (as g_path_get_dirname() does).
* Important since it allows us to construct
* file names with the exact same directory
* prefix as the input pattern.
*/
gsize dirname_len = teco_file_get_dirname_len(pattern);
ctx->dirname = g_strndup(pattern, dirname_len);
ctx->dir = g_dir_open(*ctx->dirname ? ctx->dirname : ".", 0, NULL);
/* if dirname does not exist, the result may be NULL */
ctx->pattern = teco_globber_compile_pattern(pattern + dirname_len);
}
/** @memberof teco_globber_t */
gchar *
teco_globber_next(teco_globber_t *ctx)
{
const gchar *basename;
if (!ctx->dir)
return NULL;
while ((basename = g_dir_read_name(ctx->dir))) {
if (!g_regex_match(ctx->pattern, basename, 0, NULL))
continue;
/*
* As dirname includes the directory separator,
* we can simply concatenate dirname with basename.
*/
gchar *filename = g_strconcat(ctx->dirname, basename, NULL);
/*
* No need to perform file test for EXISTS since
* g_dir_read_name() will only return existing entries
*/
if (ctx->test == G_FILE_TEST_EXISTS || g_file_test(filename, ctx->test))
return filename;
g_free(filename);
}
return NULL;
}
/** @memberof teco_globber_t */
void
teco_globber_clear(teco_globber_t *ctx)
{
if (ctx->pattern)
g_regex_unref(ctx->pattern);
if (ctx->dir)
g_dir_close(ctx->dir);
g_free(ctx->dirname);
}
/** @static @memberof teco_globber_t */
gchar *
teco_globber_escape_pattern(const gchar *pattern)
{
gsize escaped_len = 1;
gchar *escaped, *pout;
/*
* NOTE: The exact size of the escaped string is easy to calculate
* in O(n) just like strlen(pattern), so we can just as well
* do that.
*/
for (const gchar *pin = pattern; *pin; pin++) {
switch (*pin) {
case '*':
case '?':
case '[':
escaped_len += 3;
break;
default:
escaped_len++;
break;
}
}
pout = escaped = g_malloc(escaped_len);
while (*pattern) {
switch (*pattern) {
case '*':
case '?':
case '[':
*pout++ = '[';
*pout++ = *pattern;
*pout++ = ']';
break;
default:
*pout++ = *pattern;
break;
}
pattern++;
}
*pout = '\0';
return escaped;
}
/**
* Compile a fnmatch(3)-compatible glob pattern to
* a PCRE regular expression.
*
* There is GPattern, but it only supports the
* "*" and "?" wildcards which most importantly
* do not allow escaping.
*
* @param pattern The pattern to compile.
* @return A new compiled regular expression object.
* Always non-NULL. Unref after use.
*
* @static @memberof teco_globber_t
*/
GRegex *
teco_globber_compile_pattern(const gchar *pattern)
{
enum {
STATE_WILDCARD,
STATE_CLASS_START,
STATE_CLASS_NEGATE,
STATE_CLASS
} state = STATE_WILDCARD;
/*
* NOTE: The conversion to regex needs at most two
* characters per input character and the regex pattern
* is required only temporarily, so we use a fixed size
* buffer avoiding reallocations but wasting a few bytes
* (determining the exact required space would be tricky).
* It is not allocated on the stack though since pattern
* might be arbitrary user input and we must avoid
* stack overflows at all costs.
*/
g_autofree gchar *pattern_regex = g_malloc(strlen(pattern)*2 + 1 + 1);
gchar *pout = pattern_regex;
while (*pattern) {
if (state == STATE_WILDCARD) {
/*
* Outside a character class/set.
*/
switch (*pattern) {
case '*':
*pout++ = '.';
*pout++ = '*';
break;
case '?':
*pout++ = '.';
break;
case '[':
/*
* The special case of an unclosed character
* class is allowed in fnmatch(3) but invalid
* in PCRE, so we must check for it explicitly.
* FIXME: This is sort of inefficient...
*/
if (strchr(pattern, ']')) {
state = STATE_CLASS_START;
*pout++ = '[';
break;
}
/* fall through */
default:
/*
* For simplicity, all non-alphanumeric
* characters are escaped since they could
* be PCRE magic characters.
* g_regex_escape_string() is inefficient.
* character anyway.
*/
if (!g_ascii_isalnum(*pattern))
*pout++ = '\\';
*pout++ = *pattern;
break;
}
} else {
/*
* Within a character class/set.
*/
switch (*pattern) {
case '!':
/*
* fnmatch(3) allows ! instead of ^ immediately
* after the opening bracket.
*/
if (state > STATE_CLASS_START) {
state = STATE_CLASS;
*pout++ = '!';
break;
}
/* fall through */
case '^':
state = state == STATE_CLASS_START
? STATE_CLASS_NEGATE : STATE_CLASS;
*pout++ = '^';
break;
case ']':
/*
* fnmatch(3) allows the closing bracket as the
* first character to include it in the set, while
* PCRE requires it to be escaped.
*/
if (state == STATE_CLASS) {
state = STATE_WILDCARD;
*pout++ = ']';
break;
}
/* fall through */
default:
if (!g_ascii_isalnum(*pattern))
*pout++ = '\\';
/* fall through */
case '-':
state = STATE_CLASS;
*pout++ = *pattern;
break;
}
}
pattern++;
}
*pout++ = '$';
*pout = '\0';
GRegex *pattern_compiled = g_regex_new(pattern_regex,
G_REGEX_DOTALL | G_REGEX_ANCHORED, 0, NULL);
/*
* Since the regex is generated from patterns that are
* always valid, there must be no syntactic error.
*/
g_assert(pattern_compiled != NULL);
return pattern_compiled;
}
/*
* Command States
*/
static teco_state_t *
teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error)
{
if (ctx->mode > TECO_MODE_NORMAL)
return &teco_state_glob_filename;
if (str->len > 0) {
g_autofree gchar *filename = teco_file_expand_path(str->data);
teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1);
g_assert(glob_reg != NULL);
if (!glob_reg->vtable->undo_set_string(glob_reg, error) ||
!glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error))
return NULL;
}
return &teco_state_glob_filename;
}
/*$ EN glob
* [type]EN[pattern]$[filename]$ -- Glob files or match filename and check file type
* [type]:EN[pattern]$[filename]$ -> Success|Failure
*
* EN is a powerful command for performing various tasks
* given a glob \fIpattern\fP.
* For a description of the