diff options
Diffstat (limited to 'src/glob.cpp')
| -rw-r--r-- | src/glob.cpp | 554 |
1 files changed, 0 insertions, 554 deletions
diff --git a/src/glob.cpp b/src/glob.cpp deleted file mode 100644 index e6b5bd4..0000000 --- a/src/glob.cpp +++ /dev/null @@ -1,554 +0,0 @@ -/* - * Copyright (C) 2012-2017 Robin Haberkorn - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <string.h> - -#include <glib.h> -#include <glib/gprintf.h> -#include <glib/gstdio.h> - -#include "sciteco.h" -#include "interface.h" -#include "parser.h" -#include "expressions.h" -#include "qregisters.h" -#include "ring.h" -#include "ioview.h" -#include "glob.h" - -namespace SciTECO { - -namespace States { - StateGlob_pattern glob_pattern; - StateGlob_filename glob_filename; -} - -Globber::Globber(const gchar *pattern, GFileTest _test) - : test(_test) -{ - gsize dirname_len; - - /* - * This finds the directory component including - * any trailing directory separator - * without making up a directory if it is missing - * (as g_path_get_dirname() does). - * Important since it allows us to construct - * file names with the exact same directory - * prefix as the input pattern. - */ - dirname_len = file_get_dirname_len(pattern); - dirname = g_strndup(pattern, dirname_len); - - dir = g_dir_open(*dirname ? dirname : ".", 0, NULL); - /* if dirname does not exist, dir may be NULL */ - - Globber::pattern = compile_pattern(pattern + dirname_len); -} - -gchar * -Globber::next(void) -{ - const gchar *basename; - - if (!dir) - return NULL; - - while ((basename = g_dir_read_name(dir))) { - gchar *filename; - - if (!g_regex_match(pattern, basename, (GRegexMatchFlags)0, NULL)) - continue; - - /* - * As dirname includes the directory separator, - * we can simply concatenate dirname with basename. - */ - filename = g_strconcat(dirname, basename, NIL); - - /* - * No need to perform file test for EXISTS since - * g_dir_read_name() will only return existing entries - */ - if (test == G_FILE_TEST_EXISTS || g_file_test(filename, test)) - return filename; - - g_free(filename); - } - - return NULL; -} - -Globber::~Globber() -{ - if (pattern) - g_regex_unref(pattern); - if (dir) - g_dir_close(dir); - g_free(dirname); -} - -gchar * -Globber::escape_pattern(const gchar *pattern) -{ - gsize escaped_len = 1; - gchar *escaped, *pout; - - /* - * NOTE: The exact size of the escaped string is easy to calculate - * in O(n) just like strlen(pattern), so we can just as well - * do that. - */ - for (const gchar *pin = pattern; *pin; pin++) { - switch (*pin) { - case '*': - case '?': - case '[': - escaped_len += 3; - break; - default: - escaped_len++; - break; - } - } - pout = escaped = (gchar *)g_malloc(escaped_len); - - while (*pattern) { - switch (*pattern) { - case '*': - case '?': - case '[': - *pout++ = '['; - *pout++ = *pattern; - *pout++ = ']'; - break; - default: - *pout++ = *pattern; - break; - } - - pattern++; - } - *pout = '\0'; - - return escaped; -} - -/** - * Compile a fnmatch(3)-compatible glob pattern to - * a PCRE regular expression. - * - * There is GPattern, but it only supports the - * "*" and "?" wildcards which most importantly - * do not allow escaping. - * - * @param pattern The pattern to compile. - * @return A new compiled regular expression object. - * Always non-NULL. Unref after use. - */ -GRegex * -Globber::compile_pattern(const gchar *pattern) -{ - gchar *pattern_regex, *pout; - GRegex *pattern_compiled; - - enum { - STATE_WILDCARD, - STATE_CLASS_START, - STATE_CLASS_NEGATE, - STATE_CLASS - } state = STATE_WILDCARD; - - /* - * NOTE: The conversion to regex needs at most two - * characters per input character and the regex pattern - * is required only temporarily, so we use a fixed size - * buffer avoiding reallocations but wasting a few bytes - * (determining the exact required space would be tricky). - * It is not allocated on the stack though since pattern - * might be arbitrary user input and we must avoid - * stack overflows at all costs. - */ - pout = pattern_regex = (gchar *)g_malloc(strlen(pattern)*2 + 1 + 1); - - while (*pattern) { - if (state == STATE_WILDCARD) { - /* - * Outside a character class/set. - */ - switch (*pattern) { - case '*': - *pout++ = '.'; - *pout++ = '*'; - break; - case '?': - *pout++ = '.'; - break; - case '[': - /* - * The special case of an unclosed character - * class is allowed in fnmatch(3) but invalid - * in PCRE, so we must check for it explicitly. - * FIXME: This is sort of inefficient... - */ - if (strchr(pattern, ']')) { - state = STATE_CLASS_START; - *pout++ = '['; - break; - } - /* fall through */ - default: - /* - * For simplicity, all non-alphanumeric - * characters are escaped since they could - * be PCRE magic characters. - * g_regex_escape_string() is inefficient. - * character anyway. - */ - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; - *pout++ = *pattern; - break; - } - } else { - /* - * Within a character class/set. - */ - switch (*pattern) { - case '!': - /* - * fnmatch(3) allows ! instead of ^ immediately - * after the opening bracket. - */ - if (state > STATE_CLASS_START) { - state = STATE_CLASS; - *pout++ = '!'; - break; - } - /* fall through */ - case '^': - state = state == STATE_CLASS_START - ? STATE_CLASS_NEGATE : STATE_CLASS; - *pout++ = '^'; - break; - case ']': - /* - * fnmatch(3) allows the closing bracket as the - * first character to include it in the set, while - * PCRE requires it to be escaped. - */ - if (state == STATE_CLASS) { - state = STATE_WILDCARD; - *pout++ = ']'; - break; - } - /* fall through */ - default: - if (!g_ascii_isalnum(*pattern)) - *pout++ = '\\'; - /* fall through */ - case '-': - state = STATE_CLASS; - *pout++ = *pattern; - break; - } - } - - pattern++; - } - *pout++ = '$'; - *pout = '\0'; - - pattern_compiled = g_regex_new(pattern_regex, - (GRegexCompileFlags)(G_REGEX_DOTALL | G_REGEX_ANCHORED), - (GRegexMatchFlags)0, NULL); - /* - * Since the regex is generated from patterns that are - * always valid, there must be no syntactic error. - */ - g_assert(pattern_compiled != NULL); - - g_free(pattern_regex); - return pattern_compiled; -} - -/* - * Command States - */ - -/*$ EN glob - * [type]EN[pattern]$[filename]$ -- Glob files or match filename and check file type - * [type]:EN[pattern]$[filename]$ -> Success|Failure - * - * EN is a powerful command for performing various tasks - * given a glob \fIpattern\fP. - * For a description of the glob pattern syntax, refer to the section - * .B Glob Patterns - * for details. - * - * \fIpattern\fP may be omitted, in which case it defaults - * to the pattern saved in the search and glob register \(lq_\(rq. - * If it is specified, it overwrites the contents of the register - * \(lq_\(rq with \fIpattern\fP. - * This behaviour is similar to the search and replace commands - * and allows for repeated globbing/matching with the same - * pattern. - * Therefoe you should also save the \(lq_\(rq register on the - * Q-Register stack when calling EN from portable macros. - * - * If \fIfilename\fP is omitted (empty), EN may be used to expand - * a glob \fIpattern\fP to a list of matching file names. - * This is similar to globbing - * on UNIX but not as powerful and may be used e.g. for - * iterating over directory contents. - * E.g. \(lqEN*.c\fB$$\fP\(rq expands to all \(lq.c\(rq files - * in the current directory. - * The resulting file names have the exact same directory - * component as \fIpattern\fP (if any). - * Without \fIfilename\fP, EN will currently only match files - * in the file name component - * of \fIpattern\fP, not on each component of the path name - * separately. - * In other words, EN only looks through the directory - * of \fIpattern\fP \(em you cannot effectively match - * multiple directories. - * - * If \fIfilename\fP is specified, \fIpattern\fP will only - * be matched against that single file name. - * If it matches, \fIfilename\fP is used verbatim. - * In this form, \fIpattern\fP is matched against the entire - * file name, so it is possible to match directory components - * as well. - * \fIfilename\fP does not necessarily have to exist in the - * file system for the match to succeed (unless a file type check - * is also specified). - * For instance, \(lqENf??/\[**].c\fB$\fPfoo/bar.c\fB$\fP\(rq will - * always match and the string \(lqfoo/bar.c\(rq will be inserted - * (see below). - * - * By default, if EN is not colon-modified, the result of - * globbing or file name matching is inserted into the current - * document, at the current position. - * A linefeed is inserted after every file name, i.e. - * every matching file will be on its own line. - * - * EN may be colon-modified to avoid any text insertion. - * Instead, a boolean is returned that signals whether - * any file matched \fIpattern\fP. - * E.g. \(lq:EN*.c\fB$$\fP\(rq returns success (-1) if - * there is at least one \(lq.c\(rq file in the current directory. - * - * The results of EN may be filtered by specifying a numeric file - * \fItype\fP check argument. - * This argument may be omitted (as in the examples above) and defaults - * to 0, i.e. no additional checking. - * The following file type check values are currently defined: - * .IP 0 4 - * No file type checking is performed. - * Note however, that when globbing only directory contents - * (of any type) are used, so without the \fIfilename\fP - * argument, the value 0 is equivalent to 5. - * .IP 1 - * Only match \fIregular files\fP (no directories). - * Will also match symlinks to regular files (on platforms - * supporting symlinks). - * .IP 2 - * Only match \fIsymlinks\fP. - * On platforms without symlinks (non-UNIX), this will never - * match anything. - * .IP 3 - * Only match \fIdirectories\fP. - * .IP 4 - * Only match \fIexecutables\fP. - * On UNIX, the executable flag is evaluated, while on - * Windows only the file name is checked. - * .IP 5 - * Only match existing files or directories. - * When globbing, this check makes no sense and is - * equivalent to no check at all. - * It may however be used to test that a filename refers - * to an existing file. - * - * For instance, \(lq3EN*\fB$$\fP\(rq will expand to - * all subdirectories in the current directory. - * The following idiom may be used to check whether - * a given filename refers to a regular file: - * 1:EN*\fB$\fIfilename\fB$\fR - * - * Note that both without colon and colon modified - * forms of EN save the success or failure of the - * operation in the numeric part of the glob register - * \(lq_\(rq (i.e. the same value that the colon modified - * form would return). - * The command itself never fails because of failure - * in matching any files. - * E.g. if \(lqEN*.c\fB$$\fP\(rq does not match any - * files, the EN command is still successful but does - * not insert anything. A failure boolean would be saved - * in \(lq_\(rq, though. - * - * String-building characters are enabled for EN and - * both string arguments are considered file names - * with regard to auto-completions. - */ -/* - * NOTE: This does not work like classic TECO's - * EN command (iterative globbing), since the - * position in the directory cannot be reasonably - * reset on rubout with glib's API. - * If we have to perform all the globbing on initialization - * we can just as well return all the results at once. - * And we can add them to the current document since - * when they should be in a register, the user will - * have to edit that register anyway. - */ -State * -StateGlob_pattern::got_file(const gchar *filename) -{ - BEGIN_EXEC(&States::glob_filename); - - if (*filename) { - QRegister *glob_reg = QRegisters::globals["_"]; - - glob_reg->undo_set_string(); - glob_reg->set_string(filename); - } - - return &States::glob_filename; -} - -State * -StateGlob_filename::got_file(const gchar *filename) -{ - BEGIN_EXEC(&States::start); - - tecoInt teco_test_mode; - GFileTest file_flags = G_FILE_TEST_EXISTS; - - bool matching = false; - bool colon_modified = eval_colon(); - - QRegister *glob_reg = QRegisters::globals["_"]; - gchar *pattern_str; - - expressions.eval(); - teco_test_mode = expressions.pop_num_calc(0, 0); - switch (teco_test_mode) { - /* - * 0 means, no file testing. - * file_flags will still be G_FILE_TEST_EXISTS which - * is equivalent to no testing when using the Globber class. - */ - case 0: break; - case 1: file_flags = G_FILE_TEST_IS_REGULAR; break; - case 2: file_flags = G_FILE_TEST_IS_SYMLINK; break; - case 3: file_flags = G_FILE_TEST_IS_DIR; break; - case 4: file_flags = G_FILE_TEST_IS_EXECUTABLE; break; - case 5: file_flags = G_FILE_TEST_EXISTS; break; - default: - throw Error("Invalid file test %" TECO_INTEGER_FORMAT - " for <EN>", teco_test_mode); - } - - pattern_str = glob_reg->get_string(); - - if (*filename) { - /* - * Match pattern against provided file name - */ - GRegex *pattern = Globber::compile_pattern(pattern_str); - - if (g_regex_match(pattern, filename, (GRegexMatchFlags)0, NULL) && - (!teco_test_mode || g_file_test(filename, file_flags))) { - if (!colon_modified) { - interface.ssm(SCI_BEGINUNDOACTION); - interface.ssm(SCI_ADDTEXT, strlen(filename), - (sptr_t)filename); - interface.ssm(SCI_ADDTEXT, 1, (sptr_t)"\n"); - interface.ssm(SCI_SCROLLCARET); - interface.ssm(SCI_ENDUNDOACTION); - } - - matching = true; - } - - g_regex_unref(pattern); - } else if (colon_modified) { - /* - * Match pattern against directory contents (globbing), - * returning SUCCESS if at least one file matches - */ - Globber globber(pattern_str, file_flags); - gchar *globbed_filename = globber.next(); - - matching = globbed_filename != NULL; - - g_free(globbed_filename); - } else { - /* - * Match pattern against directory contents (globbing), - * inserting all matching file names (linefeed-terminated) - */ - Globber globber(pattern_str, file_flags); - - gchar *globbed_filename; - - interface.ssm(SCI_BEGINUNDOACTION); - - while ((globbed_filename = globber.next())) { - size_t len = strlen(globbed_filename); - /* overwrite trailing null */ - globbed_filename[len] = '\n'; - - /* - * FIXME: Once we're 8-bit clean, we should - * add the filenames null-terminated - * (there may be linebreaks in filename). - */ - interface.ssm(SCI_ADDTEXT, len+1, - (sptr_t)globbed_filename); - - g_free(globbed_filename); - matching = true; - } - - interface.ssm(SCI_SCROLLCARET); - interface.ssm(SCI_ENDUNDOACTION); - } - - g_free(pattern_str); - - if (colon_modified) { - expressions.push(TECO_BOOL(matching)); - } else if (matching) { - /* text has been inserted */ - ring.dirtify(); - if (current_doc_must_undo()) - interface.undo_ssm(SCI_UNDO); - } - - glob_reg->undo_set_integer(); - glob_reg->set_integer(TECO_BOOL(matching)); - - return &States::start; -} - -} /* namespace SciTECO */ |
