aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-04 02:22:36 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-04 11:43:18 +0300
commit3a823fb43ba0abe52f3152d337675e9ed9a3f175 (patch)
treef63143368fe15b4fbf88f9646a0a913eb46717fd
parent11054d94a99e8c11d6010b117c84ee88b4fa1a73 (diff)
downloadsciteco-3a823fb43ba0abe52f3152d337675e9ed9a3f175.tar.gz
implemented ^Y/^S commands for receiving pattern match/insertion ranges and lengths (refs #27)
* Allows storing pattern matches into Q-Registers (^YXq). * You can also refer to subpatterns marked by ^E[...] by passing a number > 0. This is equivalent to \0-9 references in many programming languages. * It's especially useful for supporting TECO's equivalent of structural regular expressions. This will be done with additional macros. * You can also simply back up to the beginning of an insertion or search. So I...$^SC leaves dot at the beginning of the insertion. S...$^SC leaves dot before the found pattern. This has been previously requested by users. * Perhaps there should be ^Y string building characters as well to backreference in search-replacement commands (TODO). This means that the search commands would have to store the matched text itself in teco_range_t structures since FR deletes the matched text before processing the replacement string. It could also be made into a FR/FS-specific construct, so we don't fetch the substrings unnecessarily. * This differs from DEC TECO in always returning the same range even after dot movements, since we are storing start/end byte positions instead of only the length. Also DEC TECO does not support fetching subpattern ranges.
-rw-r--r--src/core-commands.c120
-rw-r--r--src/core-commands.h11
-rw-r--r--src/error.h8
-rw-r--r--src/glob.c24
-rw-r--r--src/qreg-commands.c5
-rw-r--r--src/search.c69
-rw-r--r--src/spawn.c7
-rw-r--r--tests/testsuite.at5
8 files changed, 224 insertions, 25 deletions
diff --git a/src/core-commands.c b/src/core-commands.c
index 60e7bcc..752a8e8 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -201,9 +201,14 @@ teco_state_start_backslash(teco_machine_main_t *ctx, GError **error)
gchar *str = teco_expressions_format(buffer, value,
ctx->qreg_table_locals->radix);
g_assert(*str != '\0');
+ gsize len = strlen(str);
+
+ teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from + len;
+ teco_undo_guint(teco_ranges_count) = 1;
teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
- teco_interface_ssm(SCI_ADDTEXT, strlen(str), (sptr_t)str);
+ teco_interface_ssm(SCI_ADDTEXT, len, (sptr_t)str);
teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
teco_ring_dirtify();
@@ -1836,6 +1841,103 @@ teco_state_control_glyphs2bytes(teco_machine_main_t *ctx, GError **error)
teco_expressions_push(res);
}
+/**
+ * Number of buffer ranges in teco_ranges
+ * @fixme Should this be 1 from the very beginning, so 0^Y/^S never fail?
+ */
+guint teco_ranges_count = 0;
+/** Array of buffer ranges of the last matched substrings or the last text insertion */
+teco_range_t *teco_ranges = NULL;
+
+/*
+ * Make sure we always have space for at least one result,
+ * so we don't have to check for NULL everywhere.
+ */
+static void __attribute__((constructor))
+teco_ranges_init(void)
+{
+ teco_ranges = g_new0(teco_range_t, 1);
+}
+
+/*$ ^Y subexpression subpattern
+ * [n]^Y -> start, end -- Return range of last pattern match, subexpression or text insertion
+ *
+ * This command returns the buffer ranges of the subpatterns of the
+ * last pattern match (search command) or of the last text insertion.
+ * <n> specifies the number of the subpattern from left to right.
+ * The default value 0 specifies the entire matched pattern,
+ * while higher numbers refer to \fB^E[\fI...\fB]\fR subpatterns.
+ * \fB^Y\fP can also be used to return the buffer range of the
+ * last text insertion by any \*(ST command (\fBI\fP, \fBEI\fP, \fB^I\fP, \fBG\fIq\fR,
+ * \fB\\\fP, \fBEC\fP, \fBEN\fP, etc).
+ * In this case <n> is only allowed to be 0 or missing.
+ *
+ * For instance, \(lq^YXq\(rq copies the entire matched pattern or text
+ * insertion into register \fIq\fP.
+ */
+/*
+ * In DEC TECO, this is actually defined as ".+^S,.".
+ * The SciTECO version is more robust to moving dot afterwards, though,
+ * as it will always return the same buffer range.
+ */
+static void
+teco_state_control_last_range(teco_machine_main_t *ctx, GError **error)
+{
+ teco_int_t n;
+
+ if (!teco_expressions_pop_num_calc(&n, 0, error))
+ return;
+ if (n < 0 || n >= teco_ranges_count) {
+ teco_error_subpattern_set(error, "^Y");
+ return;
+ }
+
+ teco_expressions_push(teco_interface_bytes2glyphs(teco_ranges[n].from));
+ teco_expressions_push(teco_interface_bytes2glyphs(teco_ranges[n].to));
+}
+
+/*$ ^S
+ * [n]^S -> -length -- Return negative length of last pattern match, subexpression or text insertion
+ * -^S -> length
+ *
+ * Returns the negative length of the subpatterns of the last pattern match
+ * (search command) or of the last text insertion.
+ * <n> specifies the number of the subpattern from left to right
+ * and defaults to 0 (the entire pattern match or text insertion).
+ * \(lq^S\(rq is equivalent to \(lq^YU1U0 Q0-Q1\(rq.
+ * Without arguments, the sign prefix negates the result, i.e. returns the
+ * length of the entire matched pattern or text insertion.
+ *
+ * A common idiom \(lq^SC\(rq can be used for jumping to the
+ * beginning of the matched pattern or inserted string.
+ */
+static void
+teco_state_control_last_length(teco_machine_main_t *ctx, GError **error)
+{
+ teco_int_t n = 0;
+
+ /*
+ * There is little use in supporting n^S for n != 0.
+ * This is just for consistency with ^Y.
+ */
+ if (teco_expressions_args() > 0 &&
+ !teco_expressions_pop_num_calc(&n, 0, error))
+ return;
+ if (n < 0 || n >= teco_ranges_count) {
+ teco_error_subpattern_set(error, "^Y");
+ return;
+ }
+
+ teco_expressions_push(teco_interface_bytes2glyphs(teco_ranges[n].from) -
+ teco_interface_bytes2glyphs(teco_ranges[n].to));
+}
+
+static void TECO_DEBUG_CLEANUP
+teco_ranges_cleanup(void)
+{
+ g_free(teco_ranges);
+}
+
static teco_state_t *
teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
{
@@ -1864,7 +1966,9 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
['D'] = {&teco_state_start, teco_state_control_decimal},
['R'] = {&teco_state_start, teco_state_control_radix},
['E'] = {&teco_state_start, teco_state_control_glyphs2bytes},
- ['X'] = {&teco_state_start, teco_state_control_search_mode}
+ ['X'] = {&teco_state_start, teco_state_control_search_mode},
+ ['Y'] = {&teco_state_start, teco_state_control_last_range},
+ ['S'] = {&teco_state_start, teco_state_control_last_length}
};
/*
@@ -2785,6 +2889,9 @@ teco_state_insert_initial(teco_machine_main_t *ctx, GError **error)
if (ctx->mode > TECO_MODE_NORMAL)
return TRUE;
+ teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ teco_undo_guint(teco_ranges_count) = 1;
+
/*
* Current document's encoding determines the behaviour of
* string building constructs.
@@ -2861,6 +2968,15 @@ teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t *str,
return TRUE;
}
+teco_state_t *
+teco_state_insert_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error)
+{
+ if (ctx->mode == TECO_MODE_NORMAL)
+ teco_undo_gsize(teco_ranges[0].to) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+
+ return &teco_state_start;
+}
+
/*
* NOTE: cannot support VideoTECO's <n>I because
* beginning and end of strings must be determined
diff --git a/src/core-commands.h b/src/core-commands.h
index e30770d..fbb67fa 100644
--- a/src/core-commands.h
+++ b/src/core-commands.h
@@ -38,9 +38,18 @@ TECO_DECLARE_STATE(teco_state_ascii);
TECO_DECLARE_STATE(teco_state_escape);
TECO_DECLARE_STATE(teco_state_ecommand);
+typedef struct {
+ gsize from; /*< start position in bytes */
+ gsize to; /*< end position in bytes */
+} teco_range_t;
+
+extern guint teco_ranges_count;
+extern teco_range_t *teco_ranges;
+
gboolean teco_state_insert_initial(teco_machine_main_t *ctx, GError **error);
gboolean teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t *str,
gsize new_chars, GError **error);
+teco_state_t *teco_state_insert_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error);
/* in cmdline.c */
gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
@@ -57,7 +66,7 @@ gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machi
static teco_state_t * \
NAME##_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) \
{ \
- return &teco_state_start; /* nothing to be done when done */ \
+ return teco_state_insert_done(ctx, str, error); \
} \
TECO_DEFINE_STATE_EXPECTSTRING(NAME, \
.initial_cb = (teco_state_initial_cb_t)teco_state_insert_initial, \
diff --git a/src/error.h b/src/error.h
index 5ffd434..2df7b89 100644
--- a/src/error.h
+++ b/src/error.h
@@ -44,6 +44,7 @@ typedef enum {
TECO_ERROR_MOVE,
TECO_ERROR_WORDS,
TECO_ERROR_RANGE,
+ TECO_ERROR_SUBPATTERN,
TECO_ERROR_INVALIDQREG,
TECO_ERROR_QREGOPUNSUPPORTED,
TECO_ERROR_QREGCONTAINSNULL,
@@ -108,6 +109,13 @@ teco_error_range_set(GError **error, const gchar *cmd)
}
static inline void
+teco_error_subpattern_set(GError **error, const gchar *cmd)
+{
+ g_set_error(error, TECO_ERROR, TECO_ERROR_SUBPATTERN,
+ "Invalid subpattern specified for <%s>", cmd);
+}
+
+static inline void
teco_error_invalidqreg_set(GError **error, const gchar *name, gsize len, gboolean local)
{
g_autofree gchar *name_printable = teco_string_echo(name, len);
diff --git a/src/glob.c b/src/glob.c
index 0374d7c..a69aa81 100644
--- a/src/glob.c
+++ b/src/glob.c
@@ -35,6 +35,7 @@
#include "qreg.h"
#include "ring.h"
#include "error.h"
+#include "undo.h"
#include "glob.h"
/*
@@ -512,14 +513,19 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str
if (g_regex_match(pattern, filename, 0, NULL) &&
(teco_test_mode == 0 || g_file_test(filename, file_flags))) {
if (!colon_modified) {
+ gsize len = strlen(filename);
+
+ teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from + len + 1;
+ teco_undo_guint(teco_ranges_count) = 1;
+
/*
* FIXME: Filenames may contain linefeeds.
* But if we add them null-terminated, they will be relatively hard to parse.
*/
+ filename[len] = '\n';
teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
- teco_interface_ssm(SCI_ADDTEXT, strlen(filename),
- (sptr_t)filename);
- teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)"\n");
+ teco_interface_ssm(SCI_ADDTEXT, len+1, (sptr_t)filename);
teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
}
@@ -544,17 +550,23 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str
g_auto(teco_globber_t) globber;
teco_globber_init(&globber, pattern_str.data, file_flags);
+ teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from;
+ teco_undo_guint(teco_ranges_count) = 1;
+
teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
gchar *globbed_filename;
while ((globbed_filename = teco_globber_next(&globber))) {
+ gsize len = strlen(globbed_filename);
+ teco_ranges[0].to += len+1;
+
/*
* FIXME: Filenames may contain linefeeds.
* But if we add them null-terminated, they will be relatively hard to parse.
*/
- teco_interface_ssm(SCI_ADDTEXT, strlen(globbed_filename),
- (sptr_t)globbed_filename);
- teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)"\n");
+ globbed_filename[len] = '\n';
+ teco_interface_ssm(SCI_ADDTEXT, len+1, (sptr_t)globbed_filename);
g_free(globbed_filename);
matching = TRUE;
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index 89618da..9f22de9 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -299,6 +299,7 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
/*$ Q Qq query
* Qq -> n -- Query Q-Register existence, its integer or string characters
+ * -Qq -> -n
* <position>Qq -> character
* :Qq -> -1 | size
*
@@ -533,6 +534,10 @@ teco_state_getqregstring_got_register(teco_machine_main_t *ctx, teco_qreg_t *qre
if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
return NULL;
+ teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from + str.len;
+ teco_undo_guint(teco_ranges_count) = 1;
+
if (str.len > 0) {
teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
teco_interface_ssm(SCI_ADDTEXT, str.len, (sptr_t)str.data);
diff --git a/src/search.c b/src/search.c
index 0908df0..c0398dc 100644
--- a/src/search.c
+++ b/src/search.c
@@ -480,7 +480,8 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin
/* a complete expression is strictly required */
return g_strdup("");
- teco_string_append(&re, "(", 1);
+ /* don't capture this group - it's not included in ^Y */
+ teco_string_append(&re, "(?:", 3);
teco_string_append(&re, temp, strlen(temp));
teco_string_append(&re, ")+", 2);
state = TECO_SEARCH_STATE_START;
@@ -537,6 +538,27 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin
return g_steal_pointer(&re.data) ? : g_strdup("");
}
+TECO_DEFINE_UNDO_OBJECT_OWN(ranges, teco_range_t *, g_free);
+
+#define teco_undo_ranges_own(VAR) \
+ (*teco_undo_object_ranges_push(&(VAR)))
+
+static teco_range_t *
+teco_get_ranges(const GMatchInfo *match_info, gsize offset, guint *count)
+{
+ *count = g_match_info_get_match_count(match_info);
+ teco_range_t *ranges = g_new(teco_range_t, *count);
+
+ for (gint i = 0; i < *count; i++) {
+ gint from, to;
+ g_match_info_fetch_pos(match_info, i, &from, &to);
+ ranges[i].from = offset+MAX(from, 0);
+ ranges[i].to = offset+MAX(to, 0);
+ }
+
+ return ranges;
+}
+
static gboolean
teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
{
@@ -554,7 +576,8 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
return FALSE;
}
- gint matched_from = -1, matched_to = -1;
+ guint num_ranges = 0;
+ teco_range_t *matched_ranges = NULL;
if (*count >= 0) {
while (g_match_info_matches(info) && --(*count)) {
@@ -570,22 +593,23 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
if (!*count)
/* successful */
- g_match_info_fetch_pos(info, 0,
- &matched_from, &matched_to);
+ matched_ranges = teco_get_ranges(info, from, &num_ranges);
} else {
/* only keep the last `count' matches, in a circular stack */
typedef struct {
- gint from, to;
- } teco_range_t;
+ guint num_ranges;
+ teco_range_t *ranges;
+ } teco_match_t;
- gsize matched_size = sizeof(teco_range_t) * -*count;
+ guint matched_num = -*count;
+ gsize matched_size = sizeof(teco_match_t[matched_num]);
/*
* matched_size could overflow.
* NOTE: Glib 2.48 has g_size_checked_mul() which uses
* compiler intrinsics.
*/
- if (matched_size / sizeof(teco_range_t) != -*count)
+ if (matched_size / sizeof(teco_match_t) != matched_num)
/* guaranteed to fail either teco_memory_check() or g_malloc() */
matched_size = G_MAXSIZE;
@@ -598,13 +622,17 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
if (!teco_memory_check(matched_size, error))
return FALSE;
- g_autofree teco_range_t *matched = g_malloc(matched_size);
+ /*
+ * NOTE: This needs to be deep-freed, which does not currently
+ * happen automatically.
+ */
+ g_autofree teco_match_t *matched = g_malloc0(matched_size);
gint matched_total = 0, i = 0;
while (g_match_info_matches(info)) {
- g_match_info_fetch_pos(info, 0,
- &matched[i].from, &matched[i].to);
+ g_free(matched[i].ranges);
+ matched[i].ranges = teco_get_ranges(info, from, &matched[i].num_ranges);
/*
* NOTE: The return boolean does NOT signal whether an error was generated.
@@ -612,6 +640,8 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
g_match_info_next(info, &tmp_error);
if (tmp_error) {
g_propagate_error(error, tmp_error);
+ for (int i = 0; i < matched_num; i++)
+ g_free(matched[i].ranges);
return FALSE;
}
@@ -621,14 +651,23 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
*count = MIN(*count + matched_total, 0);
if (!*count) {
/* successful -> i points to stack bottom */
- matched_from = matched[i].from;
- matched_to = matched[i].to;
+ num_ranges = matched[i].num_ranges;
+ matched_ranges = matched[i].ranges;
+ matched[i].ranges = NULL;
}
+
+ for (int i = 0; i < matched_num; i++)
+ g_free(matched[i].ranges);
}
- if (matched_from >= 0 && matched_to >= 0)
+ if (matched_ranges) {
/* match success */
- teco_interface_ssm(SCI_SETSEL, from+matched_from, from+matched_to);
+ teco_undo_ranges_own(teco_ranges) = matched_ranges;
+ teco_undo_guint(teco_ranges_count) = num_ranges;
+ g_assert(teco_ranges_count > 0);
+
+ teco_interface_ssm(SCI_SETSEL, matched_ranges[0].from, matched_ranges[0].to);
+ }
return TRUE;
}
diff --git a/src/spawn.c b/src/spawn.c
index 16ab082..a0bc7a9 100644
--- a/src/spawn.c
+++ b/src/spawn.c
@@ -412,9 +412,14 @@ teco_state_execute_done(teco_machine_main_t *ctx, const teco_string_t *str, GErr
teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0);
teco_spawn_ctx.start = teco_spawn_ctx.from;
g_main_loop_run(teco_spawn_ctx.mainloop);
- if (!teco_spawn_ctx.register_argument)
+ if (!teco_spawn_ctx.register_argument) {
teco_interface_ssm(SCI_DELETERANGE, teco_spawn_ctx.from,
teco_spawn_ctx.to - teco_spawn_ctx.from);
+
+ teco_undo_gsize(teco_ranges[0].from) = teco_spawn_ctx.from;
+ teco_undo_gsize(teco_ranges[0].to) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0);
+ teco_undo_guint(teco_ranges_count) = 1;
+ }
teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
if (teco_spawn_ctx.register_argument) {
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 44d1711..bef270c 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -116,6 +116,11 @@ AT_CHECK([$SCITECO -e "@I/XXX/J -^X @:S/xxx/\"S(0/0)'"], 0, ignore, ignore)
AT_CHECK([$SCITECO -e "-^X @^Um{^X} Mm-0\"N(0/0)'"], 0, ignore, ignore)
AT_CLEANUP
+AT_SETUP([Search and insertion ranges])
+AT_CHECK([$SCITECO -e "@I/XXYYZZ/^SC .\"N(0/0)' C @S/YY/^YU1U0 Q0-2\"N(0/0)' Q1-4\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "@I/XXYYZZ/J @S/XX^E[[^EMY]]/ 1^YXa :Qa-2\"N(0/0)'"], 0, ignore, ignore)
+AT_CLEANUP
+
AT_SETUP([Editing local registers in macro calls])
AT_CHECK([$SCITECO -e '@^Ua{@EQ.x//} :Ma @^U.x/FOO/'], 0, ignore, ignore)
AT_CHECK([$SCITECO -e '@^Ua{@EQ.x//} Ma @^U.x/FOO/'], 1, ignore, ignore)