aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRobin Haberkorn <rhaberkorn@fmsbw.de>2026-06-29 19:20:55 +0200
committerRobin Haberkorn <rhaberkorn@fmsbw.de>2026-06-29 19:20:55 +0200
commit49587788101dce76111a8b16dfed0889c26b0e61 (patch)
tree0d80d8a78efb514a47a25828880ada78a1143438
parent8601ccfb905a5cc6fe10f7d81a491155f9236f35 (diff)
regular expression matching can now be reliably interrupted using CTRL+CHEADmaster-fmsbw-cimaster
* The previous checks for interruptions only helped in a few corner cases like for very high search-repeat counts or during backwards searches across the entire buffer. * But even with terex' more predictable runtime properties a single regex execution can hang quite a long time. E.g. `S^EM^X$` on a huge buffer or even more so with backreferences as in `S^~(.*)\1$`. * We now use the new tere_set_is_interrupted_cb() to register teco_interface_is_interrupted(). Types should be compatible as long as gboolean resolves to int. * It's no longer necessary to manually check for teco_interface_is_interrupted() since tere_exec() now returns REG_EINTR in case the callback returned TRUE in which case it's handled by teco_error_regex_set().
-rw-r--r--TODO6
m---------contrib/terex0
-rw-r--r--src/search.c23
3 files changed, 7 insertions, 22 deletions
diff --git a/TODO b/TODO
index b2de61c..ec65898 100644
--- a/TODO
+++ b/TODO
@@ -74,12 +74,6 @@ Known Bugs:
and b) the file mode and ownership of re-created files can be preserved.
We should fall back silently to an (inefficient) memory copy or temporary
file strategy if this is detected.
- * It is still possible to hang searches on huge files since a single match
- could still scan too much memory - e.g. try searching for a word that
- occurs only at the end of the huge file.
- Can probably be avoided by including `(*MATCH_LIMIT=d)` in the pattern.
- A new regexp engine should also allow interruptions within a single match,
- so we don't have to invent limits like that.
* It is still possible to crash SciTECO using recursive functions,
since they map to the C program's call stack.
It is perhaps best to use another stack of
diff --git a/contrib/terex b/contrib/terex
-Subproject fa3d463a4cd563f3c5f29331f48a0161bf58686
+Subproject d71d7527a8be8654560867d761036598408bfe1
diff --git a/src/search.c b/src/search.c
index 491ddbb..1ab925c 100644
--- a/src/search.c
+++ b/src/search.c
@@ -708,14 +708,6 @@ teco_do_search_forward(regex_t *re, gsize from, gsize to, gint *count, GError **
1+re->re_nsub, info, eflags)) == REG_OKAY && --(*count)) {
buffer += info[0].rm_eo;
from += info[0].rm_eo;
-
- /*
- * FIXME: A single pathological match could already be excessively slow.
- */
- if (G_UNLIKELY(teco_interface_is_interrupted())) {
- teco_error_interrupted_set(error);
- return FALSE;
- }
}
if (rc == REG_OKAY) {
@@ -810,14 +802,6 @@ teco_do_search_backwards(regex_t *re, gsize from, gsize to, gint *count, GError
gint rc;
for (;;) {
- /*
- * FIXME: A single pathological match could already be excessively slow.
- */
- if (G_UNLIKELY(teco_interface_is_interrupted())) {
- teco_error_interrupted_set(error);
- return FALSE;
- }
-
rc = tere_exec(re, (const chr *)buffer+from_block+offset, to_block-from_block-offset,
&details, 1+re->re_nsub, info, eflags);
if (rc != REG_OKAY)
@@ -1000,6 +984,13 @@ teco_state_search_process(teco_machine_main_t *ctx, teco_string_t str, gsize new
if (rc != REG_OKAY)
goto failure;
+ /*
+ * Check for interruptions during matching since they can be quite slow
+ * especially when matching with regular expressions and backreferences
+ * (e.g. @S/^~(.*)\1/).
+ */
+ tere_set_is_interrupted_cb(&re, teco_interface_is_interrupted);
+
if (!teco_qreg_current &&
teco_ring_current != teco_search_parameters.from_buffer) {
teco_ring_undo_edit();