aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c411
1 files changed, 268 insertions, 143 deletions
diff --git a/src/parser.c b/src/parser.c
index c1d22b2..747249d 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2025 Robin Haberkorn
+ * Copyright (C) 2012-2026 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,6 +19,7 @@
#include "config.h"
#endif
+#include <errno.h>
#include <string.h>
#include <glib.h>
@@ -80,7 +81,7 @@ teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error)
gboolean
teco_state_end_of_macro(teco_machine_t *ctx, GError **error)
{
- g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_SYNTAX,
"Unterminated command");
return FALSE;
}
@@ -161,9 +162,7 @@ gboolean
teco_execute_macro(const gchar *macro, gsize macro_len,
teco_qreg_table_t *qreg_table_locals, GError **error)
{
- const teco_string_t str = {(gchar *)macro, macro_len};
-
- if (!teco_string_validate_utf8(&str)) {
+ if (!teco_string_validate_utf8((teco_string_t){(gchar *)macro, macro_len})) {
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
"Invalid UTF-8 byte sequence in macro");
return FALSE;
@@ -185,41 +184,60 @@ teco_execute_macro(const gchar *macro, gsize macro_len,
GError *tmp_error = NULL;
- if (!teco_machine_main_step(&macro_machine, macro, macro_len, &tmp_error)) {
- if (!g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) {
- /* passes ownership of tmp_error */
- g_propagate_error(error, tmp_error);
- goto error_cleanup;
+ for (;;) {
+ if (!teco_machine_main_step(&macro_machine, macro, macro_len, &tmp_error)) {
+ if (!g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) {
+ /* passes ownership of tmp_error */
+ g_propagate_error(error, tmp_error);
+ goto error_cleanup;
+ }
+ g_error_free(tmp_error);
+
+ /*
+ * Macro returned - handle like regular
+ * end of macro, even though some checks
+ * are unnecessary here.
+ * macro_pc will still point to the return PC.
+ */
+ g_assert(macro_machine.parent.current == &teco_state_start);
+
+ /*
+ * Discard all braces, except the current one.
+ */
+ if (!teco_expressions_brace_return(parent_brace_level, teco_error_return_args, error))
+ goto error_cleanup;
+
+ /*
+ * Clean up the loop stack.
+ * We are allowed to return in loops.
+ * NOTE: This does not have to be undone.
+ */
+ g_array_remove_range(teco_loop_stack, macro_machine.loop_stack_fp,
+ teco_loop_stack->len - macro_machine.loop_stack_fp);
}
- g_error_free(tmp_error);
- /*
- * Macro returned - handle like regular
- * end of macro, even though some checks
- * are unnecessary here.
- * macro_pc will still point to the return PC.
- */
- g_assert(macro_machine.parent.current == &teco_state_start);
+ if (G_LIKELY(teco_goto_backup_pc < 0))
+ break;
- /*
- * Discard all braces, except the current one.
- */
- if (!teco_expressions_brace_return(parent_brace_level, teco_error_return_args, error))
- goto error_cleanup;
+ /* continue after :Olabel$ */
+ macro_machine.macro_pc = teco_goto_backup_pc;
+ /* macro could have ended in a "lookahead" state */
+ macro_machine.parent.current = &teco_state_start;
- /*
- * Clean up the loop stack.
- * We are allowed to return in loops.
- * NOTE: This does not have to be undone.
- */
- g_array_remove_range(teco_loop_stack, macro_machine.loop_stack_fp,
- teco_loop_stack->len - macro_machine.loop_stack_fp);
+ teco_undo_string_own(teco_goto_skip_label);
+ memset(&teco_goto_skip_label, 0, sizeof(teco_goto_skip_label));
+ teco_undo_gssize(teco_goto_backup_pc) = -1;
+
+ if (macro_machine.parent.must_undo)
+ teco_undo_flags(macro_machine.flags);
+ macro_machine.flags.mode = TECO_MODE_NORMAL;
+
+ /* no need to reparse everything in the future */
+ macro_machine.goto_table.complete = TRUE;
}
if (G_UNLIKELY(teco_goto_skip_label.len > 0)) {
- g_autofree gchar *label_printable = teco_string_echo(teco_goto_skip_label.data, teco_goto_skip_label.len);
- g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
- "Label \"%s\" not found", label_printable);
+ teco_error_label_set(error, teco_goto_skip_label.data, teco_goto_skip_label.len);
goto error_attach;
}
@@ -385,6 +403,8 @@ teco_machine_main_clear(teco_machine_main_t *ctx)
teco_goto_table_clear(&ctx->goto_table);
teco_string_clear(&ctx->expectstring.string);
teco_machine_stringbuilding_clear(&ctx->expectstring.machine);
+ teco_string_clear(&ctx->goto_label);
+ teco_machine_qregspec_free(ctx->expectqreg);
}
/** Append string to result with case folding. */
@@ -394,9 +414,6 @@ teco_machine_stringbuilding_append(teco_machine_stringbuilding_t *ctx, const gch
g_assert(ctx->result != NULL);
switch (ctx->mode) {
- case TECO_STRINGBUILDING_MODE_NORMAL:
- teco_string_append(ctx->result, str, len);
- break;
case TECO_STRINGBUILDING_MODE_UPPER: {
g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8
? g_utf8_strup(str, len) : g_ascii_strup(str, len);
@@ -409,46 +426,91 @@ teco_machine_stringbuilding_append(teco_machine_stringbuilding_t *ctx, const gch
teco_string_append(ctx->result, folded, strlen(folded));
break;
}
+ default:
+ teco_string_append(ctx->result, str, len);
+ break;
}
}
-/*
- * FIXME: All teco_state_stringbuilding_* states could be static?
+/**
+ * Append codepoint to result string with case folding.
+ *
+ * This also takes the target encoding into account and checks the value
+ * range accordingly.
+ *
+ * @return FALSE if the codepoint is not valid in the target encoding.
*/
+static gboolean
+teco_machine_stringbuilding_append_c(teco_machine_stringbuilding_t *ctx, teco_int_t value)
+{
+ g_assert(ctx->result != NULL);
+
+ if (ctx->codepage == SC_CP_UTF8) {
+ if (value < 0 || !g_unichar_validate(value))
+ return FALSE;
+ switch (ctx->mode) {
+ case TECO_STRINGBUILDING_MODE_UPPER:
+ value = g_unichar_toupper(value);
+ break;
+ case TECO_STRINGBUILDING_MODE_LOWER:
+ value = g_unichar_tolower(value);
+ break;
+ }
+ teco_string_append_wc(ctx->result, value);
+ } else {
+ if (value < 0 || value > 0xFF)
+ return FALSE;
+ switch (ctx->mode) {
+ case TECO_STRINGBUILDING_MODE_UPPER:
+ value = g_ascii_toupper(value);
+ break;
+ case TECO_STRINGBUILDING_MODE_LOWER:
+ value = g_ascii_tolower(value);
+ break;
+ }
+ teco_string_append_c(ctx->result, value);
+ }
+
+ return TRUE;
+}
+
static teco_state_t *teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx,
gunichar chr, GError **error);
-TECO_DECLARE_STATE(teco_state_stringbuilding_ctl);
+static teco_state_t teco_state_stringbuilding_ctl;
static teco_state_t *teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx,
gunichar chr, GError **error);
-TECO_DECLARE_STATE(teco_state_stringbuilding_escaped);
+static teco_state_t teco_state_stringbuilding_escaped;
-TECO_DECLARE_STATE(teco_state_stringbuilding_lower);
-TECO_DECLARE_STATE(teco_state_stringbuilding_upper);
+static teco_state_t teco_state_stringbuilding_lower;
+static teco_state_t teco_state_stringbuilding_upper;
-TECO_DECLARE_STATE(teco_state_stringbuilding_ctle);
-TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_num);
-TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_u);
-TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_q);
-TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_quote);
-TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_n);
+static teco_state_t teco_state_stringbuilding_ctle;
+static teco_state_t teco_state_stringbuilding_ctle_num;
+static teco_state_t teco_state_stringbuilding_ctle_u;
+static teco_state_t teco_state_stringbuilding_ctle_code;
+static teco_state_t teco_state_stringbuilding_ctle_q;
+static teco_state_t teco_state_stringbuilding_ctle_quote;
+static teco_state_t teco_state_stringbuilding_ctle_n;
static teco_state_t *
teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
{
- switch (chr) {
- case '^':
- return &teco_state_stringbuilding_ctl;
- case TECO_CTL_KEY('^'):
- /*
- * Ctrl+^ is inserted verbatim as code 30.
- * Otherwise it would expand to a single caret
- * just like caret+caret (^^).
- */
- break;
- default:
- if (TECO_IS_CTL(chr))
- return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error);
+ if (ctx->mode != TECO_STRINGBUILDING_MODE_DISABLED) {
+ switch (chr) {
+ case '^':
+ return &teco_state_stringbuilding_ctl;
+ case TECO_CTL_KEY('^'):
+ /*
+ * Ctrl+^ is inserted verbatim as code 30.
+ * Otherwise it would expand to a single caret
+ * just like caret+caret (^^).
+ */
+ break;
+ default:
+ if (TECO_IS_CTL(chr))
+ return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error);
+ }
}
return teco_state_stringbuilding_escaped_input(ctx, chr, error);
@@ -457,14 +519,15 @@ teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunich
/* in cmdline.c */
gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
gunichar key, GError **error);
-gboolean teco_state_stringbuilding_insert_completion(teco_machine_stringbuilding_t *ctx, const teco_string_t *str, GError **error);
-
-TECO_DEFINE_STATE(teco_state_stringbuilding_start,
- .is_start = TRUE,
- .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)
- teco_state_stringbuilding_start_process_edit_cmd,
- .insert_completion_cb = (teco_state_insert_completion_cb_t)
- teco_state_stringbuilding_insert_completion
+gboolean teco_state_stringbuilding_insert_completion(teco_machine_stringbuilding_t *ctx, teco_string_t str, GError **error);
+
+static TECO_DEFINE_STATE(teco_state_stringbuilding_start,
+ .is_start = TRUE,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_start_input,
+ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)
+ teco_state_stringbuilding_start_process_edit_cmd,
+ .insert_completion_cb = (teco_state_insert_completion_cb_t)
+ teco_state_stringbuilding_insert_completion
);
static teco_state_t *
@@ -481,6 +544,11 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar
* be abolished altogether.
*/
break;
+ case 'P':
+ if (ctx->parent.must_undo)
+ teco_undo_guint(ctx->mode);
+ ctx->mode = TECO_STRINGBUILDING_MODE_DISABLED;
+ return &teco_state_stringbuilding_start;
case 'Q':
case 'R': return &teco_state_stringbuilding_escaped;
case 'V': return &teco_state_stringbuilding_lower;
@@ -509,7 +577,9 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl);
+static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctl_input,
+);
static teco_state_t *
teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -523,8 +593,6 @@ teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, guni
* is that we don't try to casefold non-ANSI characters in single-byte mode.
*/
switch (ctx->mode) {
- case TECO_STRINGBUILDING_MODE_NORMAL:
- break;
case TECO_STRINGBUILDING_MODE_UPPER:
chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
? g_unichar_toupper(chr) : chr;
@@ -543,7 +611,8 @@ teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, guni
gboolean teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
gunichar key, GError **error);
-TECO_DEFINE_STATE(teco_state_stringbuilding_escaped,
+static TECO_DEFINE_STATE(teco_state_stringbuilding_escaped,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_escaped_input,
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)
teco_state_stringbuilding_escaped_process_edit_cmd
);
@@ -569,7 +638,9 @@ teco_state_stringbuilding_lower_ctl_input(teco_machine_stringbuilding_t *ctx, gu
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_lower_ctl);
+static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_lower_ctl,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_lower_ctl_input
+);
static teco_state_t *
teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -587,7 +658,9 @@ teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunich
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE(teco_state_stringbuilding_lower);
+static TECO_DEFINE_STATE(teco_state_stringbuilding_lower,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_lower_input
+);
static teco_state_t *
teco_state_stringbuilding_upper_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -610,7 +683,9 @@ teco_state_stringbuilding_upper_ctl_input(teco_machine_stringbuilding_t *ctx, gu
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_upper_ctl);
+static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_upper_ctl,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_upper_ctl_input
+);
static teco_state_t *
teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -628,7 +703,9 @@ teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunich
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE(teco_state_stringbuilding_upper);
+static TECO_DEFINE_STATE(teco_state_stringbuilding_upper,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_upper_input
+);
static teco_state_t *
teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -638,6 +715,7 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunicha
switch (teco_ascii_toupper(chr)) {
case '\\': next = &teco_state_stringbuilding_ctle_num; break;
case 'U': next = &teco_state_stringbuilding_ctle_u; break;
+ case '<': next = &teco_state_stringbuilding_ctle_code; break;
case 'Q': next = &teco_state_stringbuilding_ctle_q; break;
case '@': next = &teco_state_stringbuilding_ctle_quote; break;
case 'N': next = &teco_state_stringbuilding_ctle_n; break;
@@ -660,7 +738,9 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunicha
return next;
}
-TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle);
+static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_input
+);
/* in cmdline.c */
gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
@@ -711,7 +791,9 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gun
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num);
+static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_num_input
+);
static teco_state_t *
teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -736,47 +818,73 @@ teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunic
if (!qreg->vtable->get_integer(qreg, &value, error))
return NULL;
- if (ctx->codepage == SC_CP_UTF8) {
- if (value < 0 || !g_unichar_validate(value))
- goto error_codepoint;
- switch (ctx->mode) {
- case TECO_STRINGBUILDING_MODE_NORMAL:
- break;
- case TECO_STRINGBUILDING_MODE_UPPER:
- value = g_unichar_toupper(value);
- break;
- case TECO_STRINGBUILDING_MODE_LOWER:
- value = g_unichar_tolower(value);
- break;
+ if (!teco_machine_stringbuilding_append_c(ctx, value)) {
+ g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len);
+ g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Q-Register \"%s\" does not contain a valid codepoint", name_printable);
+ return NULL;
+ }
+
+ return &teco_state_stringbuilding_start;
+}
+
+static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_u_input
+);
+
+static teco_state_t *
+teco_state_stringbuilding_ctle_code_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
+{
+ if (chr == '>') {
+ if (!ctx->result)
+ /* parse-only mode */
+ return &teco_state_stringbuilding_start;
+
+ if (!ctx->code.data) {
+ g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Invalid empty ^E<> specified");
+ return NULL;
}
- teco_string_append_wc(ctx->result, value);
- } else {
- if (value < 0 || value > 0xFF)
- goto error_codepoint;
- switch (ctx->mode) {
- case TECO_STRINGBUILDING_MODE_NORMAL:
- break;
- case TECO_STRINGBUILDING_MODE_UPPER:
- value = g_ascii_toupper(value);
- break;
- case TECO_STRINGBUILDING_MODE_LOWER:
- value = g_ascii_tolower(value);
- break;
+
+ /*
+ * FIXME: Once we support hexadecimal constants in the SciTECO
+ * language itself, we might support this syntax as well.
+ * Or should we perhaps always consider the current radix?
+ */
+ gchar *endp = ctx->code.data;
+ errno = 0;
+ gint64 code = g_ascii_strtoll(ctx->code.data, &endp, 0);
+ if (errno || endp - ctx->code.data != ctx->code.len ||
+ !teco_machine_stringbuilding_append_c(ctx, code)) {
+ /* will also catch embedded nulls */
+ g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Invalid code ^E<%s> specified", ctx->code.data);
+ return NULL;
}
- teco_string_append_c(ctx->result, value);
+
+ if (ctx->parent.must_undo)
+ teco_undo_string_own(ctx->code);
+ else
+ teco_string_clear(&ctx->code);
+ memset(&ctx->code, 0, sizeof(ctx->code));
+
+ return &teco_state_stringbuilding_start;
}
- return &teco_state_stringbuilding_start;
+ if (!ctx->result)
+ /* parse-only mode */
+ return &teco_state_stringbuilding_ctle_code;
-error_codepoint: {
- g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len);
- g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
- "Q-Register \"%s\" does not contain a valid codepoint", name_printable);
- return NULL;
-}
+ if (ctx->parent.must_undo)
+ undo__teco_string_truncate(&ctx->code, ctx->code.len);
+ teco_string_append_wc(&ctx->code, chr);
+
+ return &teco_state_stringbuilding_ctle_code;
}
-TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u);
+static TECO_DEFINE_STATE(teco_state_stringbuilding_ctle_code,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_code_input
+);
static teco_state_t *
teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -804,7 +912,9 @@ teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunic
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q);
+static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_q_input
+);
static teco_state_t *
teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -836,7 +946,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g
* in command line arguments anyway.
* Otherwise, we'd have to implement our own POSIX shell escape function.
*/
- if (teco_string_contains(&str, '\0')) {
+ if (teco_string_contains(str, '\0')) {
teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len,
table != &teco_qreg_table_globals);
return NULL;
@@ -847,7 +957,9 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote);
+static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_quote_input
+);
static teco_state_t *
teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
@@ -872,7 +984,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunic
g_auto(teco_string_t) str = {NULL, 0};
if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error))
return NULL;
- if (teco_string_contains(&str, '\0')) {
+ if (teco_string_contains(str, '\0')) {
teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len,
table != &teco_qreg_table_globals);
return NULL;
@@ -884,7 +996,9 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunic
return &teco_state_stringbuilding_start;
}
-TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n);
+static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n,
+ .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_n_input
+);
void
teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char,
@@ -922,6 +1036,11 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch
for (guint i = 0; i < len; ) {
gunichar chr = g_utf8_get_char(str+i);
+ /*
+ * NOTE: We support both `[` and `{`, so this works for autocompleting
+ * long Q-register specifications as well.
+ * This may therefore insert unnecessary ^Q, but they won't hurt.
+ */
if (g_unichar_toupper(chr) == ctx->escape_char ||
(ctx->escape_char == '[' && chr == ']') ||
(ctx->escape_char == '{' && chr == '}'))
@@ -939,8 +1058,8 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch
void
teco_machine_stringbuilding_clear(teco_machine_stringbuilding_t *ctx)
{
- if (ctx->machine_qregspec)
- teco_machine_qregspec_free(ctx->machine_qregspec);
+ teco_machine_qregspec_free(ctx->machine_qregspec);
+ teco_string_clear(&ctx->code);
}
gboolean
@@ -958,30 +1077,28 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e
teco_state_t *current = ctx->parent.current;
/*
- * String termination handling
+ * Ignore whitespace immediately after @-modified commands.
+ * This is inspired by TECO-64.
+ * The alternative would have been to throw an error,
+ * as allowing whitespace escape_chars is harmful.
*/
- if (ctx->flags.modifier_at) {
- if (current->expectstring.last)
- /* also clears the "@" modifier flag */
- teco_machine_main_eval_at(ctx);
+ if (ctx->flags.modifier_at && teco_is_noop(chr))
+ return current;
+ /*
+ * String termination handling
+ */
+ if (teco_machine_main_eval_at(ctx)) {
/*
- * FIXME: Exclude setting at least whitespace characters as the
- * new string escape character to avoid accidental errors?
- *
* FIXME: Should we perhaps restrict case folding escape characters
* to the ANSI range (teco_ascii_toupper())?
- * This would be faster than case folding each and every character
+ * This would be faster than case folding almost all characters
* of a string argument to check against the escape char.
*/
- switch (ctx->expectstring.machine.escape_char) {
- case '\e':
- case '{':
- if (ctx->parent.must_undo)
- teco_undo_gunichar(ctx->expectstring.machine.escape_char);
- ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
- return current;
- }
+ if (ctx->parent.must_undo)
+ teco_undo_gunichar(ctx->expectstring.machine.escape_char);
+ ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
+ return current;
}
/*
@@ -1019,11 +1136,11 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e
* so they may do their main activity in process_cb().
*/
if (ctx->expectstring.insert_len && current->expectstring.process_cb &&
- !current->expectstring.process_cb(ctx, &ctx->expectstring.string,
+ !current->expectstring.process_cb(ctx, ctx->expectstring.string,
ctx->expectstring.insert_len, error))
return NULL;
- teco_state_t *next = current->expectstring.done_cb(ctx, &ctx->expectstring.string, error);
+ teco_state_t *next = current->expectstring.done_cb(ctx, ctx->expectstring.string, error);
if (ctx->parent.must_undo)
teco_undo_string_own(ctx->expectstring.string);
@@ -1035,6 +1152,14 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e
if (ctx->parent.must_undo)
teco_undo_gunichar(ctx->expectstring.machine.escape_char);
ctx->expectstring.machine.escape_char = '\e';
+ } else if (ctx->expectstring.machine.escape_char == '{') {
+ /*
+ * Makes sure that after all but the last string argument,
+ * the escape character is reset, as in @FR{foo}{bar}.
+ */
+ if (ctx->parent.must_undo)
+ teco_undo_flags(ctx->flags);
+ ctx->flags.modifier_at = TRUE;
}
ctx->expectstring.nesting = 1;
@@ -1090,7 +1215,7 @@ teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error)
/* never calls process_cb() in parse-only mode */
if (ctx->expectstring.insert_len && current->expectstring.process_cb &&
- !current->expectstring.process_cb(ctx, &ctx->expectstring.string,
+ !current->expectstring.process_cb(ctx, ctx->expectstring.string,
ctx->expectstring.insert_len, error))
return FALSE;
@@ -1102,10 +1227,10 @@ teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error)
}
gboolean
-teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str,
+teco_state_expectfile_process(teco_machine_main_t *ctx, teco_string_t str,
gsize new_chars, GError **error)
{
- g_assert(str->data != NULL);
+ g_assert(str.data != NULL);
/*
* Null-chars must not occur in filename/path strings and at some point
@@ -1114,7 +1239,7 @@ teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str
* Doing it here ensures that teco_file_expand_path() can be safely called
* from the done_cb().
*/
- if (memchr(str->data + str->len - new_chars, '\0', new_chars)) {
+ if (memchr(str.data + str.len - new_chars, '\0', new_chars)) {
g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
"Null-character not allowed in filenames");
return FALSE;