From 49587788101dce76111a8b16dfed0889c26b0e61 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Mon, 29 Jun 2026 19:20:55 +0200 Subject: regular expression matching can now be reliably interrupted using CTRL+C * The previous checks for interruptions only helped in a few corner cases like for very high search-repeat counts or during backwards searches across the entire buffer. * But even with terex' more predictable runtime properties a single regex execution can hang quite a long time. E.g. `S^EM^X$` on a huge buffer or even more so with backreferences as in `S^~(.*)\1$`. * We now use the new tere_set_is_interrupted_cb() to register teco_interface_is_interrupted(). Types should be compatible as long as gboolean resolves to int. * It's no longer necessary to manually check for teco_interface_is_interrupted() since tere_exec() now returns REG_EINTR in case the callback returned TRUE in which case it's handled by teco_error_regex_set(). --- TODO | 6 ------ contrib/terex | 2 +- src/search.c | 23 +++++++---------------- 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/TODO b/TODO index b2de61c..ec65898 100644 --- a/TODO +++ b/TODO @@ -74,12 +74,6 @@ Known Bugs: and b) the file mode and ownership of re-created files can be preserved. We should fall back silently to an (inefficient) memory copy or temporary file strategy if this is detected. - * It is still possible to hang searches on huge files since a single match - could still scan too much memory - e.g. try searching for a word that - occurs only at the end of the huge file. - Can probably be avoided by including `(*MATCH_LIMIT=d)` in the pattern. - A new regexp engine should also allow interruptions within a single match, - so we don't have to invent limits like that. * It is still possible to crash SciTECO using recursive functions, since they map to the C program's call stack. It is perhaps best to use another stack of diff --git a/contrib/terex b/contrib/terex index fa3d463..d71d752 160000 --- a/contrib/terex +++ b/contrib/terex @@ -1 +1 @@ -Subproject commit fa3d463a4cd563f3c5f29331f48a0161bf586863 +Subproject commit d71d7527a8be8654560867d761036598408bfe14 diff --git a/src/search.c b/src/search.c index 491ddbb..1ab925c 100644 --- a/src/search.c +++ b/src/search.c @@ -708,14 +708,6 @@ teco_do_search_forward(regex_t *re, gsize from, gsize to, gint *count, GError ** 1+re->re_nsub, info, eflags)) == REG_OKAY && --(*count)) { buffer += info[0].rm_eo; from += info[0].rm_eo; - - /* - * FIXME: A single pathological match could already be excessively slow. - */ - if (G_UNLIKELY(teco_interface_is_interrupted())) { - teco_error_interrupted_set(error); - return FALSE; - } } if (rc == REG_OKAY) { @@ -810,14 +802,6 @@ teco_do_search_backwards(regex_t *re, gsize from, gsize to, gint *count, GError gint rc; for (;;) { - /* - * FIXME: A single pathological match could already be excessively slow. - */ - if (G_UNLIKELY(teco_interface_is_interrupted())) { - teco_error_interrupted_set(error); - return FALSE; - } - rc = tere_exec(re, (const chr *)buffer+from_block+offset, to_block-from_block-offset, &details, 1+re->re_nsub, info, eflags); if (rc != REG_OKAY) @@ -1000,6 +984,13 @@ teco_state_search_process(teco_machine_main_t *ctx, teco_string_t str, gsize new if (rc != REG_OKAY) goto failure; + /* + * Check for interruptions during matching since they can be quite slow + * especially when matching with regular expressions and backreferences + * (e.g. @S/^~(.*)\1/). + */ + tere_set_is_interrupted_cb(&re, teco_interface_is_interrupted); + if (!teco_qreg_current && teco_ring_current != teco_search_parameters.from_buffer) { teco_ring_undo_edit(); -- cgit v1.2.3