aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/search.c
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-04 02:22:36 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-04 11:43:18 +0300
commit3a823fb43ba0abe52f3152d337675e9ed9a3f175 (patch)
treef63143368fe15b4fbf88f9646a0a913eb46717fd /src/search.c
parent11054d94a99e8c11d6010b117c84ee88b4fa1a73 (diff)
downloadsciteco-3a823fb43ba0abe52f3152d337675e9ed9a3f175.tar.gz
implemented ^Y/^S commands for receiving pattern match/insertion ranges and lengths (refs #27)
* Allows storing pattern matches into Q-Registers (^YXq). * You can also refer to subpatterns marked by ^E[...] by passing a number > 0. This is equivalent to \0-9 references in many programming languages. * It's especially useful for supporting TECO's equivalent of structural regular expressions. This will be done with additional macros. * You can also simply back up to the beginning of an insertion or search. So I...$^SC leaves dot at the beginning of the insertion. S...$^SC leaves dot before the found pattern. This has been previously requested by users. * Perhaps there should be ^Y string building characters as well to backreference in search-replacement commands (TODO). This means that the search commands would have to store the matched text itself in teco_range_t structures since FR deletes the matched text before processing the replacement string. It could also be made into a FR/FS-specific construct, so we don't fetch the substrings unnecessarily. * This differs from DEC TECO in always returning the same range even after dot movements, since we are storing start/end byte positions instead of only the length. Also DEC TECO does not support fetching subpattern ranges.
Diffstat (limited to 'src/search.c')
-rw-r--r--src/search.c69
1 files changed, 54 insertions, 15 deletions
diff --git a/src/search.c b/src/search.c
index 0908df0..c0398dc 100644
--- a/src/search.c
+++ b/src/search.c
@@ -480,7 +480,8 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin
/* a complete expression is strictly required */
return g_strdup("");
- teco_string_append(&re, "(", 1);
+ /* don't capture this group - it's not included in ^Y */
+ teco_string_append(&re, "(?:", 3);
teco_string_append(&re, temp, strlen(temp));
teco_string_append(&re, ")+", 2);
state = TECO_SEARCH_STATE_START;
@@ -537,6 +538,27 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin
return g_steal_pointer(&re.data) ? : g_strdup("");
}
+TECO_DEFINE_UNDO_OBJECT_OWN(ranges, teco_range_t *, g_free);
+
+#define teco_undo_ranges_own(VAR) \
+ (*teco_undo_object_ranges_push(&(VAR)))
+
+static teco_range_t *
+teco_get_ranges(const GMatchInfo *match_info, gsize offset, guint *count)
+{
+ *count = g_match_info_get_match_count(match_info);
+ teco_range_t *ranges = g_new(teco_range_t, *count);
+
+ for (gint i = 0; i < *count; i++) {
+ gint from, to;
+ g_match_info_fetch_pos(match_info, i, &from, &to);
+ ranges[i].from = offset+MAX(from, 0);
+ ranges[i].to = offset+MAX(to, 0);
+ }
+
+ return ranges;
+}
+
static gboolean
teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
{
@@ -554,7 +576,8 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
return FALSE;
}
- gint matched_from = -1, matched_to = -1;
+ guint num_ranges = 0;
+ teco_range_t *matched_ranges = NULL;
if (*count >= 0) {
while (g_match_info_matches(info) && --(*count)) {
@@ -570,22 +593,23 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
if (!*count)
/* successful */
- g_match_info_fetch_pos(info, 0,
- &matched_from, &matched_to);
+ matched_ranges = teco_get_ranges(info, from, &num_ranges);
} else {
/* only keep the last `count' matches, in a circular stack */
typedef struct {
- gint from, to;
- } teco_range_t;
+ guint num_ranges;
+ teco_range_t *ranges;
+ } teco_match_t;
- gsize matched_size = sizeof(teco_range_t) * -*count;
+ guint matched_num = -*count;
+ gsize matched_size = sizeof(teco_match_t[matched_num]);
/*
* matched_size could overflow.
* NOTE: Glib 2.48 has g_size_checked_mul() which uses
* compiler intrinsics.
*/
- if (matched_size / sizeof(teco_range_t) != -*count)
+ if (matched_size / sizeof(teco_match_t) != matched_num)
/* guaranteed to fail either teco_memory_check() or g_malloc() */
matched_size = G_MAXSIZE;
@@ -598,13 +622,17 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
if (!teco_memory_check(matched_size, error))
return FALSE;
- g_autofree teco_range_t *matched = g_malloc(matched_size);
+ /*
+ * NOTE: This needs to be deep-freed, which does not currently
+ * happen automatically.
+ */
+ g_autofree teco_match_t *matched = g_malloc0(matched_size);
gint matched_total = 0, i = 0;
while (g_match_info_matches(info)) {
- g_match_info_fetch_pos(info, 0,
- &matched[i].from, &matched[i].to);
+ g_free(matched[i].ranges);
+ matched[i].ranges = teco_get_ranges(info, from, &matched[i].num_ranges);
/*
* NOTE: The return boolean does NOT signal whether an error was generated.
@@ -612,6 +640,8 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
g_match_info_next(info, &tmp_error);
if (tmp_error) {
g_propagate_error(error, tmp_error);
+ for (int i = 0; i < matched_num; i++)
+ g_free(matched[i].ranges);
return FALSE;
}
@@ -621,14 +651,23 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error)
*count = MIN(*count + matched_total, 0);
if (!*count) {
/* successful -> i points to stack bottom */
- matched_from = matched[i].from;
- matched_to = matched[i].to;
+ num_ranges = matched[i].num_ranges;
+ matched_ranges = matched[i].ranges;
+ matched[i].ranges = NULL;
}
+
+ for (int i = 0; i < matched_num; i++)
+ g_free(matched[i].ranges);
}
- if (matched_from >= 0 && matched_to >= 0)
+ if (matched_ranges) {
/* match success */
- teco_interface_ssm(SCI_SETSEL, from+matched_from, from+matched_to);
+ teco_undo_ranges_own(teco_ranges) = matched_ranges;
+ teco_undo_guint(teco_ranges_count) = num_ranges;
+ g_assert(teco_ranges_count > 0);
+
+ teco_interface_ssm(SCI_SETSEL, matched_ranges[0].from, matched_ranges[0].to);
+ }
return TRUE;
}