aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am5
-rw-r--r--src/search.c126
2 files changed, 63 insertions, 68 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 5b2572e..b850905 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -13,7 +13,7 @@ include $(top_srcdir)/contrib/scintilla.am
# FIXME: Common flags should be in configure.ac
AM_CFLAGS = -std=gnu11 -Wall -Wno-initializer-overrides -Wno-unused-value
-AM_CPPFLAGS += -I$(top_srcdir)/contrib/rb3ptr
+AM_CPPFLAGS += -I$(top_srcdir)/contrib/rb3ptr -I$(top_srcdir)/contrib/hsrex
AM_LDFLAGS =
if STATIC_EXECUTABLES
@@ -57,7 +57,8 @@ libsciteco_base_la_SOURCES = main.c sciteco.h list.h \
# NOTE: We cannot link in Scintilla (static library) into
# a libtool convenience library
libsciteco_base_la_LIBADD = $(LIBSCITECO_INTERFACE) \
- $(top_builddir)/contrib/rb3ptr/librb3ptr.la
+ $(top_builddir)/contrib/rb3ptr/librb3ptr.la \
+ $(top_builddir)/contrib/hsrex/libhswrex.la
if REPLACE_MALLOC
libsciteco_base_la_LIBADD += $(top_builddir)/contrib/dlmalloc/libdlmalloc.la
endif
diff --git a/src/search.c b/src/search.c
index 01c598e..81d2074 100644
--- a/src/search.c
+++ b/src/search.c
@@ -24,6 +24,13 @@
#include <glib.h>
#include <glib/gprintf.h>
+/* should always be Henry Spencer's version from contrib/hsrex */
+#define REGEX_STANDALONE
+//#define REGEX_WCHAR
+#include <regalone.h>
+#include <regex.h>
+G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(regex_t, regfree);
+
#include "sciteco.h"
#include "string-utils.h"
#include "expressions.h"
@@ -463,53 +470,38 @@ teco_pattern2regexp(teco_string_t *pattern, guint codepage, gboolean single_expr
}
static gboolean
-teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
+teco_do_search(regex_t *re, gsize from, gsize to, gint *count, GError **error)
{
- g_autoptr(GMatchInfo) info = NULL;
- const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0);
- GError *tmp_error = NULL;
-
- /*
- * NOTE: The return boolean does NOT signal whether an error was generated.
- */
- g_regex_match_full(re, buffer, (gssize)to, from, 0, &info, &tmp_error);
- if (tmp_error) {
- g_propagate_error(error, tmp_error);
- return FALSE;
- }
+ regmatch_t info = {.rm_so = from, .rm_eo = to};
+ /* FIXME: avoid moving the gap here */
+ const guchar *buffer = (const guchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0);
gint matched_from = -1, matched_to = -1;
if (*count >= 0) {
- while (g_match_info_matches(info) && --(*count)) {
- /*
- * NOTE: The return boolean does NOT signal whether an error was generated.
- */
- g_match_info_next(info, &tmp_error);
- if (tmp_error) {
- g_propagate_error(error, tmp_error);
- return FALSE;
- }
- }
-
- if (!*count)
+ gint rc;
+ while ((rc = re_exec(re, buffer+from, to-from, NULL, 1, &info, REG_NOTEOL | REG_NOTBOL)) == REG_OKAY && --(*count))
+ from += info.rm_eo;
+ if (rc == REG_OKAY) {
/* successful */
- g_match_info_fetch_pos(info, 0,
- &matched_from, &matched_to);
+ matched_from = from+info.rm_so;
+ matched_to = from+info.rm_eo;
+ } else if (rc != REG_NOMATCH) {
+ // FIXME: Use regerror()
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ "Error executing regular expression");
+ return FALSE;
+ }
} else {
/* only keep the last `count' matches, in a circular stack */
- typedef struct {
- gint from, to;
- } teco_range_t;
-
- gsize matched_size = sizeof(teco_range_t) * -*count;
+ gsize matched_size = sizeof(regmatch_t) * -*count;
/*
* matched_size could overflow.
* NOTE: Glib 2.48 has g_size_checked_mul() which uses
* compiler intrinsics.
*/
- if (matched_size / sizeof(teco_range_t) != -*count)
+ if (matched_size / sizeof(regmatch_t) != -*count)
/* guaranteed to fail either teco_memory_check() or g_malloc() */
matched_size = G_MAXSIZE;
@@ -522,32 +514,29 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
if (!teco_memory_check(matched_size, error))
return FALSE;
- g_autofree teco_range_t *matched = g_malloc(matched_size);
+ g_autofree regmatch_t *matched = g_malloc(matched_size);
gint matched_total = 0, i = 0;
- while (g_match_info_matches(info)) {
- g_match_info_fetch_pos(info, 0,
- &matched[i].from, &matched[i].to);
-
- /*
- * NOTE: The return boolean does NOT signal whether an error was generated.
- */
- g_match_info_next(info, &tmp_error);
- if (tmp_error) {
- g_propagate_error(error, tmp_error);
- return FALSE;
- }
-
+ gint rc;
+ while ((rc = re_exec(re, buffer+from, to-from, NULL, 1, &matched[i], REG_NOTEOL | REG_NOTBOL | REG_STARTEND)) == REG_OKAY) {
+ matched[i].rm_so += from;
+ matched[i].rm_eo += from;
+ from = matched[i].rm_eo;
i = ++matched_total % -(*count);
}
*count = MIN(*count + matched_total, 0);
- if (!*count) {
- /* successful -> i points to stack bottom */
- matched_from = matched[i].from;
- matched_to = matched[i].to;
+ if (rc != REG_NOMATCH) {
+ // FIXME: Use regerror()
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ "Error executing regular expression");
+ return FALSE;
}
+
+ /* successful -> i points to stack bottom */
+ matched_from = matched[i].rm_so;
+ matched_to = matched[i].rm_eo;
}
if (matched_from >= 0 && matched_to >= 0)
@@ -560,14 +549,11 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
static gboolean
teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gsize new_chars, GError **error)
{
- /* FIXME: Should G_REGEX_OPTIMIZE be added under certain circumstances? */
- GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_DOTALL;
+ gint flags = REG_EXTENDED | REG_ICASE;
/* this is set in teco_state_search_initial() */
- if (ctx->expectstring.machine.codepage != SC_CP_UTF8) {
- /* single byte encoding */
- flags |= G_REGEX_RAW;
- } else if (!teco_string_validate_utf8(str)) {
+ if (ctx->expectstring.machine.codepage == SC_CP_UTF8 &&
+ !teco_string_validate_utf8(str)) {
/*
* While SciTECO code is always guaranteed to be in valid UTF-8,
* the result of string building may not (eg. if ^EQq inserts garbage).
@@ -588,7 +574,7 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
!search_reg->vtable->set_integer(search_reg, TECO_FAILURE, error))
return FALSE;
- g_autoptr(GRegex) re = NULL;
+ g_auto(regex_t) re = {0};
teco_string_t pattern = *str;
g_autofree gchar *re_pattern;
/* NOTE: teco_pattern2regexp() modifies str pointer */
@@ -602,10 +588,18 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
if (!*re_pattern)
goto failure;
/*
- * FIXME: Should we propagate at least some of the errors?
+ * FIXME: We don't have to escape null characters in re_pattern.
*/
- re = g_regex_new(re_pattern, flags, 0, NULL);
- if (!re)
+#if 0
+ gint rc = ctx->expectstring.machine.codepage == SC_CP_UTF8
+ ? re_wcomp(&re, re_pattern, strlen(re_pattern), flags)
+ : re_comp(&re, re_pattern, strlen(re_pattern), flags);
+#endif
+ // FIXME: Apparently this is the ASCII-only version, while re_wcomp() is the widechar version
+ // which expects UTF-32.
+ // This means that teco_pattern2regexp() would have to return an UTF-32 version.
+ gint rc = re_comp(&re, re_pattern, strlen(re_pattern), flags);
+ if (rc)
goto failure;
if (!teco_qreg_current &&
@@ -616,7 +610,7 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
gint count = teco_search_parameters.count;
- if (!teco_do_search(re, teco_search_parameters.from, teco_search_parameters.to, &count, error))
+ if (!teco_do_search(&re, teco_search_parameters.from, teco_search_parameters.to, &count, error))
return FALSE;
if (teco_search_parameters.to_buffer && count) {
@@ -631,12 +625,12 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
teco_buffer_edit(buffer);
if (buffer == teco_search_parameters.to_buffer) {
- if (!teco_do_search(re, 0, teco_search_parameters.dot, &count, error))
+ if (!teco_do_search(&re, 0, teco_search_parameters.dot, &count, error))
return FALSE;
break;
}
- if (!teco_do_search(re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
+ if (!teco_do_search(&re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
&count, error))
return FALSE;
} while (count);
@@ -646,14 +640,14 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
teco_buffer_edit(buffer);
if (buffer == teco_search_parameters.to_buffer) {
- if (!teco_do_search(re, teco_search_parameters.dot,
+ if (!teco_do_search(&re, teco_search_parameters.dot,
teco_interface_ssm(SCI_GETLENGTH, 0, 0),
&count, error))
return FALSE;
break;
}
- if (!teco_do_search(re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
+ if (!teco_do_search(&re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
&count, error))
return FALSE;
} while (count);