diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-10-04 23:41:16 +0400 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-10-04 23:41:16 +0400 |
commit | b36ff2502ae3b0e18fa862a01fba9cc2c9067e31 (patch) | |
tree | e4ffef55d060b77706b65ca48c6c0ae62a57e89e /src/search.c | |
parent | 024d26ac0cd869826801889f1299df34676fdf57 (diff) | |
download | sciteco-b36ff2502ae3b0e18fa862a01fba9cc2c9067e31.tar.gz |
pattern match characters support ^Q/^R now as well
* makes it possible, albeit cumbersome, to escape pattern match characters
* For instance, to search for ^Q, you now have to type
S^Q^Q^Q^Q$.
To search for ^E you have to type
S^Q^Q^Q^E$.
But the last character cannot be typed with carets currently (FIXME?).
For pattern-only characters, two ^Q should be sufficient as in
S^Q^Q^X$.
* Perhaps it would be more elegant to abolish the difference between string building
and pattern matching characters to avoid double quoting.
But then all string building constructs like ^EQq should operate at the pattern level
as well (ie. match the contents of register q verbatim instead of being interpreted as a pattern).
TECOC and TECO-64 don't do that either.
If we leave everything as it is, at least a new string building construct should be added for
auto-quoting patterns (analoguous to ^EN and ^E@).
Diffstat (limited to 'src/search.c')
-rw-r--r-- | src/search.c | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/src/search.c b/src/search.c index ed3a00c..c9a2ba0 100644 --- a/src/search.c +++ b/src/search.c @@ -115,6 +115,8 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) typedef enum { TECO_SEARCH_STATE_START, + TECO_SEARCH_STATE_CTL, + TECO_SEARCH_STATE_ESCAPE, TECO_SEARCH_STATE_NOT, TECO_SEARCH_STATE_CTL_E, TECO_SEARCH_STATE_ANYQ, @@ -320,6 +322,18 @@ teco_pattern2regexp(teco_string_t *pattern, guint codepage, gboolean single_expr do { /* + * Previous character was caret. + * Make sure it is handled like a control character. + * This is necessary even though we have string building activated, + * to support constructs like ^Q^Q (typed with carets) in order to + * quote pattern matching characters. + */ + if (state == TECO_SEARCH_STATE_CTL) { + *pattern->data = TECO_CTL_KEY(g_ascii_toupper(*pattern->data)); + state = TECO_SEARCH_STATE_START; + } + + /* * First check whether it is a class. * This will not treat individual characters * as classes, so we do not convert them to regexp @@ -347,21 +361,36 @@ teco_pattern2regexp(teco_string_t *pattern, guint codepage, gboolean single_expr switch (state) { case TECO_SEARCH_STATE_START: switch (*pattern->data) { - case TECO_CTL_KEY('X'): teco_string_append_c(&re, '.'); break; - case TECO_CTL_KEY('N'): state = TECO_SEARCH_STATE_NOT; break; - default: { - gsize len = codepage == SC_CP_UTF8 - ? g_utf8_next_char(pattern->data) - pattern->data : 1; - /* the allocation could theoretically be avoided by escaping char-wise */ - g_autofree gchar *escaped = g_regex_escape_string(pattern->data, len); - teco_string_append(&re, escaped, strlen(escaped)); - pattern->data += len; - pattern->len -= len; + case '^': + state = TECO_SEARCH_STATE_CTL; + break; + case TECO_CTL_KEY('Q'): + case TECO_CTL_KEY('R'): + state = TECO_SEARCH_STATE_ESCAPE; + break; + case TECO_CTL_KEY('X'): + teco_string_append_c(&re, '.'); + break; + case TECO_CTL_KEY('N'): + state = TECO_SEARCH_STATE_NOT; + break; + default: + state = TECO_SEARCH_STATE_ESCAPE; continue; } - } break; + case TECO_SEARCH_STATE_ESCAPE: { + gsize len = codepage == SC_CP_UTF8 + ? g_utf8_next_char(pattern->data) - pattern->data : 1; + /* the allocation could theoretically be avoided by escaping char-wise */ + g_autofree gchar *escaped = g_regex_escape_string(pattern->data, len); + teco_string_append(&re, escaped, strlen(escaped)); + pattern->data += len; + pattern->len -= len; + continue; + } + case TECO_SEARCH_STATE_NOT: { state = TECO_SEARCH_STATE_START; g_autofree gchar *temp = teco_class2regexp(&state, pattern, codepage, TRUE, error); |