aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRobin Haberkorn <rhaberkorn@fmsbw.de>2026-06-29 00:15:46 +0200
committerRobin Haberkorn <rhaberkorn@fmsbw.de>2026-06-29 00:32:13 +0200
commit3522966d9584ec16e2f469acd0fe8727857a9d25 (patch)
treef361cd32d54cc874bd3714c1cf9248254638df1d
parentf08dea5fead2f9ef9e0fa114b2e09aa94908d629 (diff)
implemented the ^~ pattern match construct: the rest of the pattern will be an Advanced Regular ExpressionHEADmaster-fmsbw-cimaster
* Allows searching by regular expressions. We will never support all ARE constructs in TECO patterns, so this is useful to have available. * Can only be typed upcaret. This leaves ^E~q available as an escape-regexp string building construct. * Once we replace the pattern2regexp converter with a custom terex lexer, we might want to restrict ^~ to the beginning of the pattern. Currently, however it can be anywhere, so you can mix TECO patterns with regular expressions.
-rw-r--r--doc/sciteco.7.template11
-rw-r--r--src/search.c29
-rw-r--r--tests/testsuite.at4
3 files changed, 35 insertions, 9 deletions
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template
index 87e8574..41f3abf 100644
--- a/doc/sciteco.7.template
+++ b/doc/sciteco.7.template
@@ -2050,10 +2050,17 @@ the space and horizontal tab characters.
Matches one in a list of patterns.
Any pattern match construct may be used.
The pattern alternatives must be separated by commas.
+.TP
+.SCITECO_TOPIC ^~
+.BI ^~
+The rest of the pattern is an Advanced Regular Expression (ARE)
+as in Tcl - see
+.BR re_syntax (3).
+This can only be typed with an upcaret.
.LP
All non-pattern-match-characters match themselves.
-Note however that currently, all pattern matching is performed
-.BR case-insensitive .
+Pattern matching is performed case-insensitive by default,
+but this can be controlled with the \fB^X\fP command.
.
.
.SH FILE NAMES AND DIRECTORIES
diff --git a/src/search.c b/src/search.c
index 0df483f..ce4a338 100644
--- a/src/search.c
+++ b/src/search.c
@@ -425,14 +425,29 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin
g_auto(teco_string_t) re = {NULL, 0};
do {
- /*
- * Previous character was caret.
- * Make sure it is handled like a control character.
- * This is necessary even though we have string building activated,
- * to support constructs like ^Q^Q (typed with carets) in order to
- * quote pattern matching characters.
- */
if (state == TECO_SEARCH_STATE_CTL) {
+ if (*pattern->data == '~') {
+ /* rest of pattern is a regular expression */
+ teco_string_append(&re, pattern->data+1, pattern->len-1);
+ /*
+ * FIXME: In terex, it actually could contain null bytes.
+ */
+ if (teco_string_contains(re, '\0')) {
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ "Regular expression must not contain null-byte"
+ " - use \\0 instead");
+ return NULL;
+ }
+ return g_steal_pointer(&re.data) ? : g_strdup("");
+ }
+
+ /*
+ * Previous character was caret.
+ * Make sure it is handled like a control character.
+ * This is necessary even though we have string building activated,
+ * to support constructs like ^Q^Q (typed with carets) in order to
+ * quote pattern matching characters.
+ */
*pattern->data = TECO_CTL_KEY(g_ascii_toupper(*pattern->data));
state = TECO_SEARCH_STATE_START;
}
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 0f7e32b..95c1336 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -293,6 +293,10 @@ TE_CHECK([[@I/XYZ/J @EB/foo/ @I/XZY/J @:FN/Z/0/"F(0/0)' Q*-2"N(0/0)'
@:FN///"F(0/0)' Q*-1"N(0/0)']], 0, ignore, ignore)
AT_CLEANUP
+AT_SETUP([Searches by regexp])
+TE_CHECK([[@I/ABC/J :@S/^~[A-Z]+/"F(0/0)']], 0, ignore, ignore)
+AT_CLEANUP
+
AT_SETUP([Search and insertion ranges])
# When deleting characters, the result of ^S/^Y must not change.
TE_CHECK([[@I/XXYYZZ/^SC ."N(0/0)' C @S/YY/ HK ^YU1U0 Q0-2"N(0/0)' Q1-4"N(0/0)']], 0, ignore, ignore)