diff options
| author | Robin Haberkorn <rhaberkorn@fmsbw.de> | 2026-06-29 00:15:46 +0200 |
|---|---|---|
| committer | Robin Haberkorn <rhaberkorn@fmsbw.de> | 2026-06-29 00:32:13 +0200 |
| commit | 3522966d9584ec16e2f469acd0fe8727857a9d25 (patch) | |
| tree | f361cd32d54cc874bd3714c1cf9248254638df1d | |
| parent | f08dea5fead2f9ef9e0fa114b2e09aa94908d629 (diff) | |
implemented the ^~ pattern match construct: the rest of the pattern will be an Advanced Regular ExpressionHEADmaster-fmsbw-cimaster
* Allows searching by regular expressions.
We will never support all ARE constructs in TECO patterns, so this is useful to have available.
* Can only be typed upcaret.
This leaves ^E~q available as an escape-regexp string building construct.
* Once we replace the pattern2regexp converter with a custom terex lexer,
we might want to restrict ^~ to the beginning of the pattern.
Currently, however it can be anywhere, so you can mix TECO patterns with regular expressions.
| -rw-r--r-- | doc/sciteco.7.template | 11 | ||||
| -rw-r--r-- | src/search.c | 29 | ||||
| -rw-r--r-- | tests/testsuite.at | 4 |
3 files changed, 35 insertions, 9 deletions
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template index 87e8574..41f3abf 100644 --- a/doc/sciteco.7.template +++ b/doc/sciteco.7.template @@ -2050,10 +2050,17 @@ the space and horizontal tab characters. Matches one in a list of patterns. Any pattern match construct may be used. The pattern alternatives must be separated by commas. +.TP +.SCITECO_TOPIC ^~ +.BI ^~ +The rest of the pattern is an Advanced Regular Expression (ARE) +as in Tcl - see +.BR re_syntax (3). +This can only be typed with an upcaret. .LP All non-pattern-match-characters match themselves. -Note however that currently, all pattern matching is performed -.BR case-insensitive . +Pattern matching is performed case-insensitive by default, +but this can be controlled with the \fB^X\fP command. . . .SH FILE NAMES AND DIRECTORIES diff --git a/src/search.c b/src/search.c index 0df483f..ce4a338 100644 --- a/src/search.c +++ b/src/search.c @@ -425,14 +425,29 @@ teco_pattern2regexp(teco_string_t *pattern, teco_machine_qregspec_t *qreg_machin g_auto(teco_string_t) re = {NULL, 0}; do { - /* - * Previous character was caret. - * Make sure it is handled like a control character. - * This is necessary even though we have string building activated, - * to support constructs like ^Q^Q (typed with carets) in order to - * quote pattern matching characters. - */ if (state == TECO_SEARCH_STATE_CTL) { + if (*pattern->data == '~') { + /* rest of pattern is a regular expression */ + teco_string_append(&re, pattern->data+1, pattern->len-1); + /* + * FIXME: In terex, it actually could contain null bytes. + */ + if (teco_string_contains(re, '\0')) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + "Regular expression must not contain null-byte" + " - use \\0 instead"); + return NULL; + } + return g_steal_pointer(&re.data) ? : g_strdup(""); + } + + /* + * Previous character was caret. + * Make sure it is handled like a control character. + * This is necessary even though we have string building activated, + * to support constructs like ^Q^Q (typed with carets) in order to + * quote pattern matching characters. + */ *pattern->data = TECO_CTL_KEY(g_ascii_toupper(*pattern->data)); state = TECO_SEARCH_STATE_START; } diff --git a/tests/testsuite.at b/tests/testsuite.at index 0f7e32b..95c1336 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -293,6 +293,10 @@ TE_CHECK([[@I/XYZ/J @EB/foo/ @I/XZY/J @:FN/Z/0/"F(0/0)' Q*-2"N(0/0)' @:FN///"F(0/0)' Q*-1"N(0/0)']], 0, ignore, ignore) AT_CLEANUP +AT_SETUP([Searches by regexp]) +TE_CHECK([[@I/ABC/J :@S/^~[A-Z]+/"F(0/0)']], 0, ignore, ignore) +AT_CLEANUP + AT_SETUP([Search and insertion ranges]) # When deleting characters, the result of ^S/^Y must not change. TE_CHECK([[@I/XXYYZZ/^SC ."N(0/0)' C @S/YY/ HK ^YU1U0 Q0-2"N(0/0)' Q1-4"N(0/0)']], 0, ignore, ignore) |
