diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-12-04 02:22:36 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-12-04 11:43:18 +0300 |
commit | 3a823fb43ba0abe52f3152d337675e9ed9a3f175 (patch) | |
tree | f63143368fe15b4fbf88f9646a0a913eb46717fd /src | |
parent | 11054d94a99e8c11d6010b117c84ee88b4fa1a73 (diff) | |
download | sciteco-3a823fb43ba0abe52f3152d337675e9ed9a3f175.tar.gz |
implemented ^Y/^S commands for receiving pattern match/insertion ranges and lengths (refs #27)
* Allows storing pattern matches into Q-Registers (^YXq).
* You can also refer to subpatterns marked by ^E[...] by passing a number > 0.
This is equivalent to \0-9 references in many programming languages.
* It's especially useful for supporting TECO's equivalent of structural regular expressions.
This will be done with additional macros.
* You can also simply back up to the beginning of an insertion or search.
So I...$^SC leaves dot at the beginning of the insertion.
S...$^SC leaves dot before the found pattern.
This has been previously requested by users.
* Perhaps there should be ^Y string building characters as well to backreference
in search-replacement commands (TODO).
This means that the search commands would have to store the matched text itself
in teco_range_t structures since FR deletes the matched text before
processing the replacement string.
It could also be made into a FR/FS-specific construct,
so we don't fetch the substrings unnecessarily.
* This differs from DEC TECO in always returning the same range even after dot movements,
since we are storing start/end byte positions instead of only the length.
Also DEC TECO does not support fetching subpattern ranges.
Diffstat (limited to 'src')
-rw-r--r-- | src/core-commands.c | 120 | ||||
-rw-r--r-- | src/core-commands.h | 11 | ||||
-rw-r--r-- | src/error.h | 8 | ||||
-rw-r--r-- | src/glob.c | 24 | ||||
-rw-r--r-- | src/qreg-commands.c | 5 | ||||
-rw-r--r-- | src/search.c | 69 | ||||
-rw-r--r-- | src/spawn.c | 7 |
7 files changed, 219 insertions, 25 deletions
diff --git a/src/core-commands.c b/src/core-commands.c index 60e7bcc..752a8e8 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -201,9 +201,14 @@ teco_state_start_backslash(teco_machine_main_t *ctx, GError **error) gchar *str = teco_expressions_format(buffer, value, ctx->qreg_table_locals->radix); g_assert(*str != '\0'); + gsize len = strlen(str); + + teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from + len; + teco_undo_guint(teco_ranges_count) = 1; teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); - teco_interface_ssm(SCI_ADDTEXT, strlen(str), (sptr_t)str); + teco_interface_ssm(SCI_ADDTEXT, len, (sptr_t)str); teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); teco_ring_dirtify(); @@ -1836,6 +1841,103 @@ teco_state_control_glyphs2bytes(teco_machine_main_t *ctx, GError **error) teco_expressions_push(res); } +/** + * Number of buffer ranges in teco_ranges + * @fixme Should this be 1 from the very beginning, so 0^Y/^S never fail? + */ +guint teco_ranges_count = 0; +/** Array of buffer ranges of the last matched substrings or the last text insertion */ +teco_range_t *teco_ranges = NULL; + +/* + * Make sure we always have space for at least one result, + * so we don't have to check for NULL everywhere. + */ +static void __attribute__((constructor)) +teco_ranges_init(void) +{ + teco_ranges = g_new0(teco_range_t, 1); +} + +/*$ ^Y subexpression subpattern + * [n]^Y -> start, end -- Return range of last pattern match, subexpression or text insertion + * + * This command returns the buffer ranges of the subpatterns of the + * last pattern match (search command) or of the last text insertion. + * <n> specifies the number of the subpattern from left to right. + * The default value 0 specifies the entire matched pattern, + * while higher numbers refer to \fB^E[\fI...\fB]\fR subpatterns. + * \fB^Y\fP can also be used to return the buffer range of the + * last text insertion by any \*(ST command (\fBI\fP, \fBEI\fP, \fB^I\fP, \fBG\fIq\fR, + * \fB\\\fP, \fBEC\fP, \fBEN\fP, etc). + * In this case <n> is only allowed to be 0 or missing. + * + * For instance, \(lq^YXq\(rq copies the entire matched pattern or text + * insertion into register \fIq\fP. + */ +/* + * In DEC TECO, this is actually defined as ".+^S,.". + * The SciTECO version is more robust to moving dot afterwards, though, + * as it will always return the same buffer range. + */ +static void +teco_state_control_last_range(teco_machine_main_t *ctx, GError **error) +{ + teco_int_t n; + + if (!teco_expressions_pop_num_calc(&n, 0, error)) + return; + if (n < 0 || n >= teco_ranges_count) { + teco_error_subpattern_set(error, "^Y"); + return; + } + + teco_expressions_push(teco_interface_bytes2glyphs(teco_ranges[n].from)); + teco_expressions_push(teco_interface_bytes2glyphs(teco_ranges[n].to)); +} + +/*$ ^S + * [n]^S -> -length -- Return negative length of last pattern match, subexpression or text insertion + * -^S -> length + * + * Returns the negative length of the subpatterns of the last pattern match + * (search command) or of the last text insertion. + * <n> specifies the number of the subpattern from left to right + * and defaults to 0 (the entire pattern match or text insertion). + * \(lq^S\(rq is equivalent to \(lq^YU1U0 Q0-Q1\(rq. + * Without arguments, the sign prefix negates the result, i.e. returns the + * length of the entire matched pattern or text insertion. + * + * A common idiom \(lq^SC\(rq can be used for jumping to the + * beginning of the matched pattern or inserted string. + */ +static void +teco_state_control_last_length(teco_machine_main_t *ctx, GError **error) +{ + teco_int_t n = 0; + + /* + * There is little use in supporting n^S for n != 0. + * This is just for consistency with ^Y. + */ + if (teco_expressions_args() > 0 && + !teco_expressions_pop_num_calc(&n, 0, error)) + return; + if (n < 0 || n >= teco_ranges_count) { + teco_error_subpattern_set(error, "^Y"); + return; + } + + teco_expressions_push(teco_interface_bytes2glyphs(teco_ranges[n].from) - + teco_interface_bytes2glyphs(teco_ranges[n].to)); +} + +static void TECO_DEBUG_CLEANUP +teco_ranges_cleanup(void) +{ + g_free(teco_ranges); +} + static teco_state_t * teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { @@ -1864,7 +1966,9 @@ teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error) ['D'] = {&teco_state_start, teco_state_control_decimal}, ['R'] = {&teco_state_start, teco_state_control_radix}, ['E'] = {&teco_state_start, teco_state_control_glyphs2bytes}, - ['X'] = {&teco_state_start, teco_state_control_search_mode} + ['X'] = {&teco_state_start, teco_state_control_search_mode}, + ['Y'] = {&teco_state_start, teco_state_control_last_range}, + ['S'] = {&teco_state_start, teco_state_control_last_length} }; /* @@ -2785,6 +2889,9 @@ teco_state_insert_initial(teco_machine_main_t *ctx, GError **error) if (ctx->mode > TECO_MODE_NORMAL) return TRUE; + teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_undo_guint(teco_ranges_count) = 1; + /* * Current document's encoding determines the behaviour of * string building constructs. @@ -2861,6 +2968,15 @@ teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t *str, return TRUE; } +teco_state_t * +teco_state_insert_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) +{ + if (ctx->mode == TECO_MODE_NORMAL) + teco_undo_gsize(teco_ranges[0].to) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + + return &teco_state_start; +} + /* * NOTE: cannot support VideoTECO's <n>I because * beginning and end of strings must be determined diff --git a/src/core-commands.h b/src/core-commands.h index e30770d..fbb67fa 100644 --- a/src/core-commands.h +++ b/src/core-commands.h @@ -38,9 +38,18 @@ TECO_DECLARE_STATE(teco_state_ascii); TECO_DECLARE_STATE(teco_state_escape); TECO_DECLARE_STATE(teco_state_ecommand); +typedef struct { + gsize from; /*< start position in bytes */ + gsize to; /*< end position in bytes */ +} teco_range_t; + +extern guint teco_ranges_count; +extern teco_range_t *teco_ranges; + gboolean teco_state_insert_initial(teco_machine_main_t *ctx, GError **error); gboolean teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t *str, gsize new_chars, GError **error); +teco_state_t *teco_state_insert_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error); /* in cmdline.c */ gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error); @@ -57,7 +66,7 @@ gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machi static teco_state_t * \ NAME##_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) \ { \ - return &teco_state_start; /* nothing to be done when done */ \ + return teco_state_insert_done(ctx, str, error); \ } \ TECO_DEFINE_STATE_EXPECTSTRING(NAME, \ .initial_cb = (teco_state_initial_cb_t)teco_state_insert_initial, \ diff --git a/src/error.h b/src/error.h index 5ffd434..2df7b89 100644 --- a/src/error.h +++ b/src/error.h @@ -44,6 +44,7 @@ typedef enum { TECO_ERROR_MOVE, TECO_ERROR_WORDS, TECO_ERROR_RANGE, + TECO_ERROR_SUBPATTERN, TECO_ERROR_INVALIDQREG, TECO_ERROR_QREGOPUNSUPPORTED, TECO_ERROR_QREGCONTAINSNULL, @@ -108,6 +109,13 @@ teco_error_range_set(GError **error, const gchar *cmd) } static inline void +teco_error_subpattern_set(GError **error, const gchar *cmd) +{ + g_set_error(error, TECO_ERROR, TECO_ERROR_SUBPATTERN, + "Invalid subpattern specified for <%s>", cmd); +} + +static inline void teco_error_invalidqreg_set(GError **error, const gchar *name, gsize len, gboolean local) { g_autofree gchar *name_printable = teco_string_echo(name, len); @@ -35,6 +35,7 @@ #include "qreg.h" #include "ring.h" #include "error.h" +#include "undo.h" #include "glob.h" /* @@ -512,14 +513,19 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str if (g_regex_match(pattern, filename, 0, NULL) && (teco_test_mode == 0 || g_file_test(filename, file_flags))) { if (!colon_modified) { + gsize len = strlen(filename); + + teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from + len + 1; + teco_undo_guint(teco_ranges_count) = 1; + /* * FIXME: Filenames may contain linefeeds. * But if we add them null-terminated, they will be relatively hard to parse. */ + filename[len] = '\n'; teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); - teco_interface_ssm(SCI_ADDTEXT, strlen(filename), - (sptr_t)filename); - teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)"\n"); + teco_interface_ssm(SCI_ADDTEXT, len+1, (sptr_t)filename); teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); } @@ -544,17 +550,23 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str g_auto(teco_globber_t) globber; teco_globber_init(&globber, pattern_str.data, file_flags); + teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from; + teco_undo_guint(teco_ranges_count) = 1; + teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); gchar *globbed_filename; while ((globbed_filename = teco_globber_next(&globber))) { + gsize len = strlen(globbed_filename); + teco_ranges[0].to += len+1; + /* * FIXME: Filenames may contain linefeeds. * But if we add them null-terminated, they will be relatively hard to parse. */ - teco_interface_ssm(SCI_ADDTEXT, strlen(globbed_filename), - (sptr_t)globbed_filename); - teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)"\n"); + globbed_filename[len] = '\n'; + teco_interface_ssm(SCI_ADDTEXT, len+1, (sptr_t)globbed_filename); g_free(globbed_filename); matching = TRUE; diff --git a/src/qreg-commands.c b/src/qreg-commands.c index 89618da..9f22de9 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -299,6 +299,7 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, /*$ Q Qq query * Qq -> n -- Query Q-Register existence, its integer or string characters + * -Qq -> -n * <position>Qq -> character * :Qq -> -1 | size * @@ -533,6 +534,10 @@ teco_state_getqregstring_got_register(teco_machine_main_t *ctx, teco_qreg_t *qre if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; + teco_undo_gsize(teco_ranges[0].from) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_undo_gsize(teco_ranges[0].to) = teco_ranges[0].from + str.len; + teco_undo_guint(teco_ranges_count) = 1; + if (str.len > 0) { teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); teco_interface_ssm(SCI_ADDTEXT, str.len, (sptr_t)str.data); diff --git a/src/search.c b/src/search.c index 0908df0..c0398dc 100644 --- a/src/search.c +++ b/src/search.c @@ -480,7 +480,8 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin /* a complete expression is strictly required */ return g_strdup(""); - teco_string_append(&re, "(", 1); + /* don't capture this group - it's not included in ^Y */ + teco_string_append(&re, "(?:", 3); teco_string_append(&re, temp, strlen(temp)); teco_string_append(&re, ")+", 2); state = TECO_SEARCH_STATE_START; @@ -537,6 +538,27 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin return g_steal_pointer(&re.data) ? : g_strdup(""); } +TECO_DEFINE_UNDO_OBJECT_OWN(ranges, teco_range_t *, g_free); + +#define teco_undo_ranges_own(VAR) \ + (*teco_undo_object_ranges_push(&(VAR))) + +static teco_range_t * +teco_get_ranges(const GMatchInfo *match_info, gsize offset, guint *count) +{ + *count = g_match_info_get_match_count(match_info); + teco_range_t *ranges = g_new(teco_range_t, *count); + + for (gint i = 0; i < *count; i++) { + gint from, to; + g_match_info_fetch_pos(match_info, i, &from, &to); + ranges[i].from = offset+MAX(from, 0); + ranges[i].to = offset+MAX(to, 0); + } + + return ranges; +} + static gboolean teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) { @@ -554,7 +576,8 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) return FALSE; } - gint matched_from = -1, matched_to = -1; + guint num_ranges = 0; + teco_range_t *matched_ranges = NULL; if (*count >= 0) { while (g_match_info_matches(info) && --(*count)) { @@ -570,22 +593,23 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) if (!*count) /* successful */ - g_match_info_fetch_pos(info, 0, - &matched_from, &matched_to); + matched_ranges = teco_get_ranges(info, from, &num_ranges); } else { /* only keep the last `count' matches, in a circular stack */ typedef struct { - gint from, to; - } teco_range_t; + guint num_ranges; + teco_range_t *ranges; + } teco_match_t; - gsize matched_size = sizeof(teco_range_t) * -*count; + guint matched_num = -*count; + gsize matched_size = sizeof(teco_match_t[matched_num]); /* * matched_size could overflow. * NOTE: Glib 2.48 has g_size_checked_mul() which uses * compiler intrinsics. */ - if (matched_size / sizeof(teco_range_t) != -*count) + if (matched_size / sizeof(teco_match_t) != matched_num) /* guaranteed to fail either teco_memory_check() or g_malloc() */ matched_size = G_MAXSIZE; @@ -598,13 +622,17 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) if (!teco_memory_check(matched_size, error)) return FALSE; - g_autofree teco_range_t *matched = g_malloc(matched_size); + /* + * NOTE: This needs to be deep-freed, which does not currently + * happen automatically. + */ + g_autofree teco_match_t *matched = g_malloc0(matched_size); gint matched_total = 0, i = 0; while (g_match_info_matches(info)) { - g_match_info_fetch_pos(info, 0, - &matched[i].from, &matched[i].to); + g_free(matched[i].ranges); + matched[i].ranges = teco_get_ranges(info, from, &matched[i].num_ranges); /* * NOTE: The return boolean does NOT signal whether an error was generated. @@ -612,6 +640,8 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) g_match_info_next(info, &tmp_error); if (tmp_error) { g_propagate_error(error, tmp_error); + for (int i = 0; i < matched_num; i++) + g_free(matched[i].ranges); return FALSE; } @@ -621,14 +651,23 @@ teco_do_search(GRegex *re, gsize from, gsize to, gint *count, GError **error) *count = MIN(*count + matched_total, 0); if (!*count) { /* successful -> i points to stack bottom */ - matched_from = matched[i].from; - matched_to = matched[i].to; + num_ranges = matched[i].num_ranges; + matched_ranges = matched[i].ranges; + matched[i].ranges = NULL; } + + for (int i = 0; i < matched_num; i++) + g_free(matched[i].ranges); } - if (matched_from >= 0 && matched_to >= 0) + if (matched_ranges) { /* match success */ - teco_interface_ssm(SCI_SETSEL, from+matched_from, from+matched_to); + teco_undo_ranges_own(teco_ranges) = matched_ranges; + teco_undo_guint(teco_ranges_count) = num_ranges; + g_assert(teco_ranges_count > 0); + + teco_interface_ssm(SCI_SETSEL, matched_ranges[0].from, matched_ranges[0].to); + } return TRUE; } diff --git a/src/spawn.c b/src/spawn.c index 16ab082..a0bc7a9 100644 --- a/src/spawn.c +++ b/src/spawn.c @@ -412,9 +412,14 @@ teco_state_execute_done(teco_machine_main_t *ctx, const teco_string_t *str, GErr teco_interface_ssm(SCI_BEGINUNDOACTION, 0, 0); teco_spawn_ctx.start = teco_spawn_ctx.from; g_main_loop_run(teco_spawn_ctx.mainloop); - if (!teco_spawn_ctx.register_argument) + if (!teco_spawn_ctx.register_argument) { teco_interface_ssm(SCI_DELETERANGE, teco_spawn_ctx.from, teco_spawn_ctx.to - teco_spawn_ctx.from); + + teco_undo_gsize(teco_ranges[0].from) = teco_spawn_ctx.from; + teco_undo_gsize(teco_ranges[0].to) = teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0); + teco_undo_guint(teco_ranges_count) = 1; + } teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0); if (teco_spawn_ctx.register_argument) { |