/*
* Copyright (C) 2012-2016 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include
#include
#include
#include
#include "sciteco.h"
#include "interface.h"
#include "parser.h"
#include "expressions.h"
#include "qregisters.h"
#include "ring.h"
#include "ioview.h"
#include "glob.h"
namespace SciTECO {
namespace States {
StateGlob_pattern glob_pattern;
StateGlob_filename glob_filename;
}
Globber::Globber(const gchar *pattern, GFileTest _test)
: test(_test)
{
gsize dirname_len;
/*
* This finds the directory component including
* any trailing directory separator
* without making up a directory if it is missing
* (as g_path_get_dirname() does).
* Important since it allows us to construct
* file names with the exact same directory
* prefix as the input pattern.
*/
dirname_len = file_get_dirname_len(pattern);
dirname = g_strndup(pattern, dirname_len);
dir = g_dir_open(*dirname ? dirname : ".", 0, NULL);
/* if dirname does not exist, dir may be NULL */
Globber::pattern = compile_pattern(pattern + dirname_len);
}
gchar *
Globber::next(void)
{
const gchar *basename;
if (!dir)
return NULL;
while ((basename = g_dir_read_name(dir))) {
gchar *filename;
if (!g_regex_match(pattern, basename, (GRegexMatchFlags)0, NULL))
continue;
/*
* As dirname includes the directory separator,
* we can simply concatenate dirname with basename.
*/
filename = g_strconcat(dirname, basename, NIL);
/*
* No need to perform file test for EXISTS since
* g_dir_read_name() will only return existing entries
*/
if (test == G_FILE_TEST_EXISTS || g_file_test(filename, test))
return filename;
g_free(filename);
}
return NULL;
}
Globber::~Globber()
{
if (pattern)
g_regex_unref(pattern);
if (dir)
g_dir_close(dir);
g_free(dirname);
}
gchar *
Globber::escape_pattern(const gchar *pattern)
{
gsize escaped_len = 1;
gchar *escaped, *pout;
/*
* NOTE: The exact size of the escaped string is easy to calculate
* in O(n) just like strlen(pattern), so we can just as well
* do that.
*/
for (const gchar *pin = pattern; *pin; pin++) {
switch (*pin) {
case '*':
case '?':
case '[':
escaped_len += 3;
break;
default:
escaped_len++;
break;
}
}
pout = escaped = (gchar *)g_malloc(escaped_len);
while (*pattern) {
switch (*pattern) {
case '*':
case '?':
case '[':
*pout++ = '[';
*pout++ = *pattern;
*pout++ = ']';
break;
default:
*pout++ = *pattern;
break;
}
pattern++;
}
*pout = '\0';
return escaped;
}
/**
* Compile a fnmatch(3)-compatible glob pattern to
* a PCRE regular expression.
*
* There is GPattern, but it only supports the
* "*" and "?" wildcards which most importantly
* do not allow escaping.
*
* @param pattern The pattern to compile.
* @return A new compiled regular expression object.
* Always non-NULL. Unref after use.
*/
GRegex *
Globber::compile_pattern(const gchar *pattern)
{
gchar *pattern_regex, *pout;
GRegex *pattern_compiled;
enum {
STATE_WILDCARD,
STATE_CLASS_START,
STATE_CLASS_NEGATE,
STATE_CLASS
} state = STATE_WILDCARD;
/*
* NOTE: The conversion to regex needs at most two
* characters per input character and the regex pattern
* is required only temporarily, so we use a fixed size
* buffer avoiding reallocations but wasting a few bytes
* (determining the exact required space would be tricky).
* It is not allocated on the stack though since pattern
* might be arbitrary user input and we must avoid
* stack overflows at all costs.
*/
pout = pattern_regex = (gchar *)g_malloc(strlen(pattern)*2 + 1 + 1);
while (*pattern) {
if (state == STATE_WILDCARD) {
/*
* Outside a character class/set.
*/
switch (*pattern) {
case '*':
*pout++ = '.';
*pout++ = '*';
break;
case '?':
*pout++ = '.';
break;
case '[':
/*
* The special case of an unclosed character
* class is allowed in fnmatch(3) but invalid
* in PCRE, so we must check for it explicitly.
* FIXME: This is sort of inefficient...
*/
if (strchr(pattern, ']')) {
state = STATE_CLASS_START;
*pout++ = '[';
break;
}
/* fall through */
default:
/*
* For simplicity, all non-alphanumeric
* characters are escaped since they could
* be PCRE magic characters.
* g_regex_escape_string() is inefficient.
* character anyway.
*/
if (!g_ascii_isalnum(*pattern))
*pout++ = '\\';
*pout++ = *pattern;
break;
}
} else {
/*
* Within a character class/set.
*/
switch (*pattern) {
case '!':
/*
* fnmatch(3) allows ! instead of ^ immediately
* after the opening bracket.
*/
if (state > STATE_CLASS_START) {
state = STATE_CLASS;
*pout++ = '!';
break;
}
/* fall through */
case '^':
state = state == STATE_CLASS_START
? STATE_CLASS_NEGATE : STATE_CLASS;
*pout++ = '^';
break;
case ']':
/*
* fnmatch(3) allows the closing bracket as the
* first character to include it in the set, while
* PCRE requires it to be escaped.
*/
if (state == STATE_CLASS) {
state = STATE_WILDCARD;
*pout++ = ']';
break;
}
/* fall through */
default:
if (!g_ascii_isalnum(*pattern))
*pout++ = '\\';
/* fall through */
case '-':
state = STATE_CLASS;
*pout++ = *pattern;
break;
}
}
pattern++;
}
*pout++ = '$';
*pout = '\0';
pattern_compiled = g_regex_new(pattern_regex,
(GRegexCompileFlags)(G_REGEX_DOTALL | G_REGEX_ANCHORED),
(GRegexMatchFlags)0, NULL);
/*
* Since the regex is generated from patterns that are
* always valid, there must be no syntactic error.
*/
g_assert(pattern_compiled != NULL);
g_free(pattern_regex);
return pattern_compiled;
}
/*
* Command States
*/
/*$
* [type]EN[pattern]$[filename]$ -- Glob files or match filename and check file type
* [type]:EN[pattern]$[filename]$ -> Success|Failure
*
* EN is a powerful command for performing various tasks
* given a glob \fIpattern\fP.
* For a description of the glob pattern syntax, refer to the section
* .B Glob Patterns
* for details.
*
* \fIpattern\fP may be omitted, in which case it defaults
* to the pattern saved in the search and glob register \(lq_\(rq.
* If it is specified, it overwrites the contents of the register
* \(lq_\(rq with \fIpattern\fP.
* This behaviour is similar to the search and replace commands
* and allows for repeated globbing/matching with the same
* pattern.
* Therefoe you should also save the \(lq_\(rq register on the
* Q-Register stack when calling EN from portable macros.
*
* If \fIfilename\fP is omitted (empty), EN may be used to expand
* a glob \fIpattern\fP to a list of matching file names.
* This is similar to globbing
* on UNIX but not as powerful and may be used e.g. for
* iterating over directory contents.
* E.g. \(lqEN*.c\fB$$\fP\(rq expands to all \(lq.c\(rq files
* in the current directory.
* The resulting file names have the exact same directory
* component as \fIpattern\fP (if any).
* Without \fIfilename\fP, EN will currently only match files
* in the file name component
* of \fIpattern\fP, not on each component of the path name
* separately.
* In other words, EN only looks through the directory
* of \fIpattern\fP \(em you cannot effectively match
* multiple directories.
*
* If \fIfilename\fP is specified, \fIpattern\fP will only
* be matched against that single file name.
* If it matches, \fIfilename\fP is used verbatim.
* In this form, \fIpattern\fP is matched against the entire
* file name, so it is possible to match directory components
* as well.
* \fIfilename\fP does not necessarily have to exist in the
* file system for the match to succeed (unless a file type check
* is also specified).
* For instance, \(lqENf??/\[**].c\fB$\fPfoo/bar.c\fB$\fP\(rq will
* always match and the string \(lqfoo/bar.c\(rq will be inserted
* (see below).
*
* By default, if EN is not colon-modified, the result of
* globbing or file name matching is inserted into the current
* document, at the current position.
* A linefeed is inserted after every file name, i.e.
* every matching file will be on its own line.
*
* EN may be colon-modified to avoid any text insertion.
* Instead, a boolean is returned that signals whether
* any file matched \fIpattern\fP.
* E.g. \(lq:EN*.c\fB$$\fP\(rq returns success (-1) if
* there is at least one \(lq.c\(rq file in the current directory.
*
* The results of EN may be filtered by specifying a numeric file
* \fItype\fP check argument.
* This argument may be omitted (as in the examples above) and defaults
* to 0, i.e. no additional checking.
* The following file type check values are currently defined:
* .IP 0 4
* No file type checking is performed.
* Note however, that when globbing only directory contents
* (of any type) are used, so without the \fIfilename\fP
* argument, the value 0 is equivalent to 5.
* .IP 1
* Only match \fIregular files\fP (no directories).
* Will also match symlinks to regular files (on platforms
* supporting symlinks).
* .IP 2
* Only match \fIsymlinks\fP.
* On platforms without symlinks (non-UNIX), this will never
* match anything.
* .IP 3
* Only match \fIdirectories\fP.
* .IP 4
* Only match \fIexecutables\fP.
* On UNIX, the executable flag is evaluated, while on
* Windows only the file name is checked.
* .IP 5
* Only match existing files or directories.
* When globbing, this check makes no sense and is
* equivalent to no check at all.
* It may however be used to test that a filename refers
* to an existing file.
*
* For instance, \(lq3EN*\fB$$\fP\(rq will expand to
* all subdirectories in the current directory.
* The following idiom may be used to check whether
* a given filename refers to a regular file:
* 1:EN*\fB$\fIfilename\fB$\fR
*
* Note that both without colon and colon modified
* forms of EN save the success or failure of the
* operation in the numeric part of the glob register
* \(lq_\(rq (i.e. the same value that the colon modified
* form would return).
* The command itself never fails because of failure
* in matching any files.
* E.g. if \(lqEN*.c\fB$$\fP\(rq does not match any
* files, the EN command is still successful but does
* not insert anything. A failure boolean would be saved
* in \(lq_\(rq, though.
*
* String-building characters are enabled for EN and
* both string arguments are considered file names
* with regard to auto-completions.
*/
/*
* NOTE: This does not work like classic TECO's
* EN command (iterative globbing), since the
* position in the directory cannot be reasonably
* reset on rubout with glib's API.
* If we have to perform all the globbing on initialization
* we can just as well return all the results at once.
* And we can add them to the current document since
* when they should be in a register, the user will
* have to edit that register anyway.
*/
State *
StateGlob_pattern::got_file(const gchar *filename)
{
BEGIN_EXEC(&States::glob_filename);
if (*filename) {
QRegister *glob_reg = QRegisters::globals["_"];
glob_reg->undo_set_string();
glob_reg->set_string(filename);
}
return &States::glob_filename;
}
State *
StateGlob_filename::got_file(const gchar *filename)
{
BEGIN_EXEC(&States::start);
tecoInt teco_test_mode;
GFileTest file_flags = G_FILE_TEST_EXISTS;
bool matching = false;
bool colon_modified = eval_colon();
QRegister *glob_reg = QRegisters::globals["_"];
gchar *pattern_str;
expressions.eval();
teco_test_mode = expressions.pop_num_calc(0, 0);
switch (teco_test_mode) {
/*
* 0 means, no file testing.
* file_flags will still be G_FILE_TEST_EXISTS which
* is equivalent to no testing when using the Globber class.
*/
case 0: break;
case 1: file_flags = G_FILE_TEST_IS_REGULAR; break;
case 2: file_flags = G_FILE_TEST_IS_SYMLINK; break;
case 3: file_flags = G_FILE_TEST_IS_DIR; break;
case 4: file_flags = G_FILE_TEST_IS_EXECUTABLE; break;
case 5: file_flags = G_FILE_TEST_EXISTS; break;
default:
throw Error("Invalid file test %" TECO_INTEGER_FORMAT
" for ", teco_test_mode);
}
pattern_str = glob_reg->get_string();
if (*filename) {
/*
* Match pattern against provided file name
*/
GRegex *pattern = Globber::compile_pattern(pattern_str);
if (g_regex_match(pattern, filename, (GRegexMatchFlags)0, NULL) &&
(!teco_test_mode || g_file_test(filename, file_flags))) {
if (!colon_modified) {
interface.ssm(SCI_BEGINUNDOACTION);
interface.ssm(SCI_ADDTEXT, strlen(filename),
(sptr_t)filename);
interface.ssm(SCI_ADDTEXT, 1, (sptr_t)"\n");
interface.ssm(SCI_SCROLLCARET);
interface.ssm(SCI_ENDUNDOACTION);
}
matching = true;
}
g_regex_unref(pattern);
} else if (colon_modified) {
/*
* Match pattern against directory contents (globbing),
* returning SUCCESS if at least one file matches
*/
Globber globber(pattern_str, file_flags);
gchar *globbed_filename = globber.next();
matching = globbed_filename != NULL;
g_free(globbed_filename);
} else {
/*
* Match pattern against directory contents (globbing),
* inserting all matching file names (linefeed-terminated)
*/
Globber globber(pattern_str, file_flags);
gchar *globbed_filename;
interface.ssm(SCI_BEGINUNDOACTION);
while ((globbed_filename = globber.next())) {
size_t len = strlen(globbed_filename);
/* overwrite trailing null */
globbed_filename[len] = '\n';
/*
* FIXME: Once we're 8-bit clean, we should
* add the filenames null-terminated
* (there may be linebreaks in filename).
*/
interface.ssm(SCI_ADDTEXT, len+1,
(sptr_t)globbed_filename);
g_free(globbed_filename);
matching = true;
}
interface.ssm(SCI_SCROLLCARET);
interface.ssm(SCI_ENDUNDOACTION);
}
g_free(pattern_str);
if (colon_modified) {
expressions.push(TECO_BOOL(matching));
} else if (matching) {
/* text has been inserted */
ring.dirtify();
if (current_doc_must_undo())
interface.undo_ssm(SCI_UNDO);
}
glob_reg->undo_set_integer();
glob_reg->set_integer(TECO_BOOL(matching));
return &States::start;
}
} /* namespace SciTECO */