From 7c592561af3bbbad2eaf865247811ba2bd590c2e Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Thu, 29 Aug 2024 01:56:50 +0200 Subject: Glyph to byte offset mapping is now using the line character index (refs #5) * This works reasonably well unless lines are exceedingly long (as on a line we always count characters). The following test case is still slow (on Unicode buffers): 10000<@I/XX/> <%a-1:J;> While the following is now also fast: 10000<@I/X^J/> <%a-1:J;> * Commands with relative character offsets (C, R, A, D) have a special optimization where they always count characters beginning at dot, as long as the argument is now exceedingly large. This means they are fast even on exceedingly long lines. * The remaining commands (search, EC/EG, Xq) now accept glyph indexes. --- src/search.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'src/search.c') diff --git a/src/search.c b/src/search.c index 46407d0..88b0e16 100644 --- a/src/search.c +++ b/src/search.c @@ -38,11 +38,8 @@ #include "search.h" typedef struct { - /* - * FIXME: Should perhaps all be teco_int_t? - */ - gint dot; - gint from, to; + gssize dot; + gssize from, to; gint count; teco_buffer_t *from_buffer, *to_buffer; @@ -79,16 +76,16 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error) return FALSE; if (v1 <= v2) { teco_search_parameters.count = 1; - teco_search_parameters.from = (gint)v1; - teco_search_parameters.to = (gint)v2; + teco_search_parameters.from = teco_glyphs2bytes(v1); + teco_search_parameters.to = teco_glyphs2bytes(v2); } else { teco_search_parameters.count = -1; - teco_search_parameters.from = (gint)v2; - teco_search_parameters.to = (gint)v1; + teco_search_parameters.from = teco_glyphs2bytes(v2); + teco_search_parameters.to = teco_glyphs2bytes(v1); } - if (!teco_validate_pos(teco_search_parameters.from) || - !teco_validate_pos(teco_search_parameters.to)) { + if (teco_search_parameters.from < 0 || + teco_search_parameters.to < 0) { /* * FIXME: In derived classes, the command name will * no longer be correct. -- cgit v1.2.3