From 4fe5bc6f3867096965270c90f2e1e5df77b8825f Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sun, 28 Jun 2026 00:39:51 +0200 Subject: terex is the new regular expression engine now and replaces PCRE (GRegex) * terex is based on Henry Spencer's regular expression engine for Tcl. It is a hybrid NFA/DFA design which has better worst-time runtimes than the backtracking PCRE. Memory usage is also limited and can no longer increase catastrophically. * It should no longer be possible to crash SciTECO with pathological searches. * Since it reliably supports partial matches (REG_EXPECT) we can now enable the new backwards-search algorithm by default. This used to be broken because of a glib bug, which I already fixed. It would however take a long time until this ends up on the majority of glib installations. * Regexp executions can still be quite slow if you are looking for a pattern at the end of a huge file, which can hang the editor, but this can now at least theoretically be solved by adding hooks into terex to poll for interruptions. * We can now also get rid of a TECO-pattern to regexp translation step by directly generating terex tokens (TODO). * Performance-wise terex appears to be slower than PCRE for simple forward searches even when linking everything with optimzations (FIXME). * Having a stand-alone regular expression engine is also a huge step in getting rid of glib. See also: https://git.fmsbw.de/terex/about/ --- tests/testsuite.at | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'tests/testsuite.at') diff --git a/tests/testsuite.at b/tests/testsuite.at index fc8ab37..a97e0f8 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -519,6 +519,24 @@ AT_SETUP([Search accesses wrong Q-Register table]) TE_CHECK([[@^U.#xx/123/ @^Um{:@S/^EG.#xx/$} :Mm Mm]], 1, ignore, ignore) AT_CLEANUP +# NOTE: This used to be a bug in the old GRegex-based implementation, +# which surfaced only with specific build options of Glib's +# PCRE which was not predictable. +# It segfaulted at least on Ubuntu 20.04 (libpcre3 v2:8.39). +# It could fail because the memory limit is exceeed, +# but not in this case since the match string isn't too large. +AT_SETUP([Pattern matching overflow]) +# NOTE: Creating very long lines would currently be ineffective +# at least in UTF-8 mode. +TE_CHECK([[100000<@I"^J">J @S"^EM^X"]], 0, ignore, ignore) +AT_CLEANUP + +AT_SETUP([Block-wise backwards search]) +# Failed when using GRegex (PCRE), which had broken support for partial matches. +# This is not an issue with terex. +TE_CHECK([[2,8EJ @I/ABCD/ -:@S/BC/"F(0/0)' .-3"N(0/0)' ^S+2"N(0/0)']], 0, ignore, ignore) +AT_CLEANUP + AT_SETUP([Invalid buffer ids]) TE_CHECK([[42@EB//]], 1, ignore, ignore) TE_CHECK([[23@EW//]], 1, ignore, ignore) @@ -659,24 +677,6 @@ TE_CHECK([[| (0/0) ']], 1, ignore, ignore) AT_XFAIL_IF(true) AT_CLEANUP -# NOTE: This bug depends on specific build options of Glib's -# PCRE which is not predictable. -# It segfaults at least on Ubuntu 20.04 (libpcre3 v2:8.39). -#AT_SETUP([Pattern matching overflow]) -## Should no longer dump core. -## It could fail because the memory limit is exceeed, -## but not in this case since the match string isn't too large. -#TE_CHECK([[100000<@I"X">J @S"^EM^X"]], 0, ignore, ignore) -#AT_XFAIL_IF(true) -#AT_CLEANUP - -AT_SETUP([Block-wise backwards search]) -# Crashes are caused by a glib bug when a match falls on block boundaries. -# See teco_do_search_backwards() -TE_CHECK([[2,8EJ @I/ABCD/ -:@S/BC/"F(0/0)' .-3"N(0/0)' ^S+2"N(0/0)']], 0, ignore, ignore) -AT_XFAIL_IF(true) -AT_CLEANUP - AT_SETUP([Backtracking in patterns]) # ^ES should be greedy and posessive TE_CHECK([[@I/ /J :@S/^ES^X/"S(0/0)']], 0, ignore, ignore) -- cgit v1.2.3