diff options
Diffstat (limited to 'src/parser.c')
| -rw-r--r-- | src/parser.c | 411 |
1 files changed, 268 insertions, 143 deletions
diff --git a/src/parser.c b/src/parser.c index c1d22b2..747249d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2025 Robin Haberkorn + * Copyright (C) 2012-2026 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,6 +19,7 @@ #include "config.h" #endif +#include <errno.h> #include <string.h> #include <glib.h> @@ -80,7 +81,7 @@ teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error) gboolean teco_state_end_of_macro(teco_machine_t *ctx, GError **error) { - g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_SYNTAX, "Unterminated command"); return FALSE; } @@ -161,9 +162,7 @@ gboolean teco_execute_macro(const gchar *macro, gsize macro_len, teco_qreg_table_t *qreg_table_locals, GError **error) { - const teco_string_t str = {(gchar *)macro, macro_len}; - - if (!teco_string_validate_utf8(&str)) { + if (!teco_string_validate_utf8((teco_string_t){(gchar *)macro, macro_len})) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, "Invalid UTF-8 byte sequence in macro"); return FALSE; @@ -185,41 +184,60 @@ teco_execute_macro(const gchar *macro, gsize macro_len, GError *tmp_error = NULL; - if (!teco_machine_main_step(¯o_machine, macro, macro_len, &tmp_error)) { - if (!g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) { - /* passes ownership of tmp_error */ - g_propagate_error(error, tmp_error); - goto error_cleanup; + for (;;) { + if (!teco_machine_main_step(¯o_machine, macro, macro_len, &tmp_error)) { + if (!g_error_matches(tmp_error, TECO_ERROR, TECO_ERROR_RETURN)) { + /* passes ownership of tmp_error */ + g_propagate_error(error, tmp_error); + goto error_cleanup; + } + g_error_free(tmp_error); + + /* + * Macro returned - handle like regular + * end of macro, even though some checks + * are unnecessary here. + * macro_pc will still point to the return PC. + */ + g_assert(macro_machine.parent.current == &teco_state_start); + + /* + * Discard all braces, except the current one. + */ + if (!teco_expressions_brace_return(parent_brace_level, teco_error_return_args, error)) + goto error_cleanup; + + /* + * Clean up the loop stack. + * We are allowed to return in loops. + * NOTE: This does not have to be undone. + */ + g_array_remove_range(teco_loop_stack, macro_machine.loop_stack_fp, + teco_loop_stack->len - macro_machine.loop_stack_fp); } - g_error_free(tmp_error); - /* - * Macro returned - handle like regular - * end of macro, even though some checks - * are unnecessary here. - * macro_pc will still point to the return PC. - */ - g_assert(macro_machine.parent.current == &teco_state_start); + if (G_LIKELY(teco_goto_backup_pc < 0)) + break; - /* - * Discard all braces, except the current one. - */ - if (!teco_expressions_brace_return(parent_brace_level, teco_error_return_args, error)) - goto error_cleanup; + /* continue after :Olabel$ */ + macro_machine.macro_pc = teco_goto_backup_pc; + /* macro could have ended in a "lookahead" state */ + macro_machine.parent.current = &teco_state_start; - /* - * Clean up the loop stack. - * We are allowed to return in loops. - * NOTE: This does not have to be undone. - */ - g_array_remove_range(teco_loop_stack, macro_machine.loop_stack_fp, - teco_loop_stack->len - macro_machine.loop_stack_fp); + teco_undo_string_own(teco_goto_skip_label); + memset(&teco_goto_skip_label, 0, sizeof(teco_goto_skip_label)); + teco_undo_gssize(teco_goto_backup_pc) = -1; + + if (macro_machine.parent.must_undo) + teco_undo_flags(macro_machine.flags); + macro_machine.flags.mode = TECO_MODE_NORMAL; + + /* no need to reparse everything in the future */ + macro_machine.goto_table.complete = TRUE; } if (G_UNLIKELY(teco_goto_skip_label.len > 0)) { - g_autofree gchar *label_printable = teco_string_echo(teco_goto_skip_label.data, teco_goto_skip_label.len); - g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, - "Label \"%s\" not found", label_printable); + teco_error_label_set(error, teco_goto_skip_label.data, teco_goto_skip_label.len); goto error_attach; } @@ -385,6 +403,8 @@ teco_machine_main_clear(teco_machine_main_t *ctx) teco_goto_table_clear(&ctx->goto_table); teco_string_clear(&ctx->expectstring.string); teco_machine_stringbuilding_clear(&ctx->expectstring.machine); + teco_string_clear(&ctx->goto_label); + teco_machine_qregspec_free(ctx->expectqreg); } /** Append string to result with case folding. */ @@ -394,9 +414,6 @@ teco_machine_stringbuilding_append(teco_machine_stringbuilding_t *ctx, const gch g_assert(ctx->result != NULL); switch (ctx->mode) { - case TECO_STRINGBUILDING_MODE_NORMAL: - teco_string_append(ctx->result, str, len); - break; case TECO_STRINGBUILDING_MODE_UPPER: { g_autofree gchar *folded = ctx->codepage == SC_CP_UTF8 ? g_utf8_strup(str, len) : g_ascii_strup(str, len); @@ -409,46 +426,91 @@ teco_machine_stringbuilding_append(teco_machine_stringbuilding_t *ctx, const gch teco_string_append(ctx->result, folded, strlen(folded)); break; } + default: + teco_string_append(ctx->result, str, len); + break; } } -/* - * FIXME: All teco_state_stringbuilding_* states could be static? +/** + * Append codepoint to result string with case folding. + * + * This also takes the target encoding into account and checks the value + * range accordingly. + * + * @return FALSE if the codepoint is not valid in the target encoding. */ +static gboolean +teco_machine_stringbuilding_append_c(teco_machine_stringbuilding_t *ctx, teco_int_t value) +{ + g_assert(ctx->result != NULL); + + if (ctx->codepage == SC_CP_UTF8) { + if (value < 0 || !g_unichar_validate(value)) + return FALSE; + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_UPPER: + value = g_unichar_toupper(value); + break; + case TECO_STRINGBUILDING_MODE_LOWER: + value = g_unichar_tolower(value); + break; + } + teco_string_append_wc(ctx->result, value); + } else { + if (value < 0 || value > 0xFF) + return FALSE; + switch (ctx->mode) { + case TECO_STRINGBUILDING_MODE_UPPER: + value = g_ascii_toupper(value); + break; + case TECO_STRINGBUILDING_MODE_LOWER: + value = g_ascii_tolower(value); + break; + } + teco_string_append_c(ctx->result, value); + } + + return TRUE; +} + static teco_state_t *teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error); -TECO_DECLARE_STATE(teco_state_stringbuilding_ctl); +static teco_state_t teco_state_stringbuilding_ctl; static teco_state_t *teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error); -TECO_DECLARE_STATE(teco_state_stringbuilding_escaped); +static teco_state_t teco_state_stringbuilding_escaped; -TECO_DECLARE_STATE(teco_state_stringbuilding_lower); -TECO_DECLARE_STATE(teco_state_stringbuilding_upper); +static teco_state_t teco_state_stringbuilding_lower; +static teco_state_t teco_state_stringbuilding_upper; -TECO_DECLARE_STATE(teco_state_stringbuilding_ctle); -TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_num); -TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_u); -TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_q); -TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_quote); -TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_n); +static teco_state_t teco_state_stringbuilding_ctle; +static teco_state_t teco_state_stringbuilding_ctle_num; +static teco_state_t teco_state_stringbuilding_ctle_u; +static teco_state_t teco_state_stringbuilding_ctle_code; +static teco_state_t teco_state_stringbuilding_ctle_q; +static teco_state_t teco_state_stringbuilding_ctle_quote; +static teco_state_t teco_state_stringbuilding_ctle_n; static teco_state_t * teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) { - switch (chr) { - case '^': - return &teco_state_stringbuilding_ctl; - case TECO_CTL_KEY('^'): - /* - * Ctrl+^ is inserted verbatim as code 30. - * Otherwise it would expand to a single caret - * just like caret+caret (^^). - */ - break; - default: - if (TECO_IS_CTL(chr)) - return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + if (ctx->mode != TECO_STRINGBUILDING_MODE_DISABLED) { + switch (chr) { + case '^': + return &teco_state_stringbuilding_ctl; + case TECO_CTL_KEY('^'): + /* + * Ctrl+^ is inserted verbatim as code 30. + * Otherwise it would expand to a single caret + * just like caret+caret (^^). + */ + break; + default: + if (TECO_IS_CTL(chr)) + return teco_state_stringbuilding_ctl_input(ctx, TECO_CTL_ECHO(chr), error); + } } return teco_state_stringbuilding_escaped_input(ctx, chr, error); @@ -457,14 +519,15 @@ teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunich /* in cmdline.c */ gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); -gboolean teco_state_stringbuilding_insert_completion(teco_machine_stringbuilding_t *ctx, const teco_string_t *str, GError **error); - -TECO_DEFINE_STATE(teco_state_stringbuilding_start, - .is_start = TRUE, - .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) - teco_state_stringbuilding_start_process_edit_cmd, - .insert_completion_cb = (teco_state_insert_completion_cb_t) - teco_state_stringbuilding_insert_completion +gboolean teco_state_stringbuilding_insert_completion(teco_machine_stringbuilding_t *ctx, teco_string_t str, GError **error); + +static TECO_DEFINE_STATE(teco_state_stringbuilding_start, + .is_start = TRUE, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_start_input, + .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) + teco_state_stringbuilding_start_process_edit_cmd, + .insert_completion_cb = (teco_state_insert_completion_cb_t) + teco_state_stringbuilding_insert_completion ); static teco_state_t * @@ -481,6 +544,11 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar * be abolished altogether. */ break; + case 'P': + if (ctx->parent.must_undo) + teco_undo_guint(ctx->mode); + ctx->mode = TECO_STRINGBUILDING_MODE_DISABLED; + return &teco_state_stringbuilding_start; case 'Q': case 'R': return &teco_state_stringbuilding_escaped; case 'V': return &teco_state_stringbuilding_lower; @@ -509,7 +577,9 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl); +static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctl_input, +); static teco_state_t * teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -523,8 +593,6 @@ teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, guni * is that we don't try to casefold non-ANSI characters in single-byte mode. */ switch (ctx->mode) { - case TECO_STRINGBUILDING_MODE_NORMAL: - break; case TECO_STRINGBUILDING_MODE_UPPER: chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80 ? g_unichar_toupper(chr) : chr; @@ -543,7 +611,8 @@ teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, guni gboolean teco_state_stringbuilding_escaped_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error); -TECO_DEFINE_STATE(teco_state_stringbuilding_escaped, +static TECO_DEFINE_STATE(teco_state_stringbuilding_escaped, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_escaped_input, .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) teco_state_stringbuilding_escaped_process_edit_cmd ); @@ -569,7 +638,9 @@ teco_state_stringbuilding_lower_ctl_input(teco_machine_stringbuilding_t *ctx, gu return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_lower_ctl); +static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_lower_ctl, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_lower_ctl_input +); static teco_state_t * teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -587,7 +658,9 @@ teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunich return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE(teco_state_stringbuilding_lower); +static TECO_DEFINE_STATE(teco_state_stringbuilding_lower, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_lower_input +); static teco_state_t * teco_state_stringbuilding_upper_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -610,7 +683,9 @@ teco_state_stringbuilding_upper_ctl_input(teco_machine_stringbuilding_t *ctx, gu return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_upper_ctl); +static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_upper_ctl, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_upper_ctl_input +); static teco_state_t * teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -628,7 +703,9 @@ teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunich return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE(teco_state_stringbuilding_upper); +static TECO_DEFINE_STATE(teco_state_stringbuilding_upper, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_upper_input +); static teco_state_t * teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -638,6 +715,7 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunicha switch (teco_ascii_toupper(chr)) { case '\\': next = &teco_state_stringbuilding_ctle_num; break; case 'U': next = &teco_state_stringbuilding_ctle_u; break; + case '<': next = &teco_state_stringbuilding_ctle_code; break; case 'Q': next = &teco_state_stringbuilding_ctle_q; break; case '@': next = &teco_state_stringbuilding_ctle_quote; break; case 'N': next = &teco_state_stringbuilding_ctle_n; break; @@ -660,7 +738,9 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunicha return next; } -TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle); +static TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_input +); /* in cmdline.c */ gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx, @@ -711,7 +791,9 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gun return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num); +static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_num_input +); static teco_state_t * teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -736,47 +818,73 @@ teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunic if (!qreg->vtable->get_integer(qreg, &value, error)) return NULL; - if (ctx->codepage == SC_CP_UTF8) { - if (value < 0 || !g_unichar_validate(value)) - goto error_codepoint; - switch (ctx->mode) { - case TECO_STRINGBUILDING_MODE_NORMAL: - break; - case TECO_STRINGBUILDING_MODE_UPPER: - value = g_unichar_toupper(value); - break; - case TECO_STRINGBUILDING_MODE_LOWER: - value = g_unichar_tolower(value); - break; + if (!teco_machine_stringbuilding_append_c(ctx, value)) { + g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len); + g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Q-Register \"%s\" does not contain a valid codepoint", name_printable); + return NULL; + } + + return &teco_state_stringbuilding_start; +} + +static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_u_input +); + +static teco_state_t * +teco_state_stringbuilding_ctle_code_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) +{ + if (chr == '>') { + if (!ctx->result) + /* parse-only mode */ + return &teco_state_stringbuilding_start; + + if (!ctx->code.data) { + g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid empty ^E<> specified"); + return NULL; } - teco_string_append_wc(ctx->result, value); - } else { - if (value < 0 || value > 0xFF) - goto error_codepoint; - switch (ctx->mode) { - case TECO_STRINGBUILDING_MODE_NORMAL: - break; - case TECO_STRINGBUILDING_MODE_UPPER: - value = g_ascii_toupper(value); - break; - case TECO_STRINGBUILDING_MODE_LOWER: - value = g_ascii_tolower(value); - break; + + /* + * FIXME: Once we support hexadecimal constants in the SciTECO + * language itself, we might support this syntax as well. + * Or should we perhaps always consider the current radix? + */ + gchar *endp = ctx->code.data; + errno = 0; + gint64 code = g_ascii_strtoll(ctx->code.data, &endp, 0); + if (errno || endp - ctx->code.data != ctx->code.len || + !teco_machine_stringbuilding_append_c(ctx, code)) { + /* will also catch embedded nulls */ + g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT, + "Invalid code ^E<%s> specified", ctx->code.data); + return NULL; } - teco_string_append_c(ctx->result, value); + + if (ctx->parent.must_undo) + teco_undo_string_own(ctx->code); + else + teco_string_clear(&ctx->code); + memset(&ctx->code, 0, sizeof(ctx->code)); + + return &teco_state_stringbuilding_start; } - return &teco_state_stringbuilding_start; + if (!ctx->result) + /* parse-only mode */ + return &teco_state_stringbuilding_ctle_code; -error_codepoint: { - g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len); - g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT, - "Q-Register \"%s\" does not contain a valid codepoint", name_printable); - return NULL; -} + if (ctx->parent.must_undo) + undo__teco_string_truncate(&ctx->code, ctx->code.len); + teco_string_append_wc(&ctx->code, chr); + + return &teco_state_stringbuilding_ctle_code; } -TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u); +static TECO_DEFINE_STATE(teco_state_stringbuilding_ctle_code, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_code_input +); static teco_state_t * teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -804,7 +912,9 @@ teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunic return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q); +static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_q_input +); static teco_state_t * teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -836,7 +946,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g * in command line arguments anyway. * Otherwise, we'd have to implement our own POSIX shell escape function. */ - if (teco_string_contains(&str, '\0')) { + if (teco_string_contains(str, '\0')) { teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len, table != &teco_qreg_table_globals); return NULL; @@ -847,7 +957,9 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote); +static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_quote_input +); static teco_state_t * teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error) @@ -872,7 +984,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunic g_auto(teco_string_t) str = {NULL, 0}; if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; - if (teco_string_contains(&str, '\0')) { + if (teco_string_contains(str, '\0')) { teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len, table != &teco_qreg_table_globals); return NULL; @@ -884,7 +996,9 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunic return &teco_state_stringbuilding_start; } -TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n); +static TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n, + .input_cb = (teco_state_input_cb_t)teco_state_stringbuilding_ctle_n_input +); void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char, @@ -922,6 +1036,11 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch for (guint i = 0; i < len; ) { gunichar chr = g_utf8_get_char(str+i); + /* + * NOTE: We support both `[` and `{`, so this works for autocompleting + * long Q-register specifications as well. + * This may therefore insert unnecessary ^Q, but they won't hurt. + */ if (g_unichar_toupper(chr) == ctx->escape_char || (ctx->escape_char == '[' && chr == ']') || (ctx->escape_char == '{' && chr == '}')) @@ -939,8 +1058,8 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch void teco_machine_stringbuilding_clear(teco_machine_stringbuilding_t *ctx) { - if (ctx->machine_qregspec) - teco_machine_qregspec_free(ctx->machine_qregspec); + teco_machine_qregspec_free(ctx->machine_qregspec); + teco_string_clear(&ctx->code); } gboolean @@ -958,30 +1077,28 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e teco_state_t *current = ctx->parent.current; /* - * String termination handling + * Ignore whitespace immediately after @-modified commands. + * This is inspired by TECO-64. + * The alternative would have been to throw an error, + * as allowing whitespace escape_chars is harmful. */ - if (ctx->flags.modifier_at) { - if (current->expectstring.last) - /* also clears the "@" modifier flag */ - teco_machine_main_eval_at(ctx); + if (ctx->flags.modifier_at && teco_is_noop(chr)) + return current; + /* + * String termination handling + */ + if (teco_machine_main_eval_at(ctx)) { /* - * FIXME: Exclude setting at least whitespace characters as the - * new string escape character to avoid accidental errors? - * * FIXME: Should we perhaps restrict case folding escape characters * to the ANSI range (teco_ascii_toupper())? - * This would be faster than case folding each and every character + * This would be faster than case folding almost all characters * of a string argument to check against the escape char. */ - switch (ctx->expectstring.machine.escape_char) { - case '\e': - case '{': - if (ctx->parent.must_undo) - teco_undo_gunichar(ctx->expectstring.machine.escape_char); - ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); - return current; - } + if (ctx->parent.must_undo) + teco_undo_gunichar(ctx->expectstring.machine.escape_char); + ctx->expectstring.machine.escape_char = g_unichar_toupper(chr); + return current; } /* @@ -1019,11 +1136,11 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e * so they may do their main activity in process_cb(). */ if (ctx->expectstring.insert_len && current->expectstring.process_cb && - !current->expectstring.process_cb(ctx, &ctx->expectstring.string, + !current->expectstring.process_cb(ctx, ctx->expectstring.string, ctx->expectstring.insert_len, error)) return NULL; - teco_state_t *next = current->expectstring.done_cb(ctx, &ctx->expectstring.string, error); + teco_state_t *next = current->expectstring.done_cb(ctx, ctx->expectstring.string, error); if (ctx->parent.must_undo) teco_undo_string_own(ctx->expectstring.string); @@ -1035,6 +1152,14 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **e if (ctx->parent.must_undo) teco_undo_gunichar(ctx->expectstring.machine.escape_char); ctx->expectstring.machine.escape_char = '\e'; + } else if (ctx->expectstring.machine.escape_char == '{') { + /* + * Makes sure that after all but the last string argument, + * the escape character is reset, as in @FR{foo}{bar}. + */ + if (ctx->parent.must_undo) + teco_undo_flags(ctx->flags); + ctx->flags.modifier_at = TRUE; } ctx->expectstring.nesting = 1; @@ -1090,7 +1215,7 @@ teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error) /* never calls process_cb() in parse-only mode */ if (ctx->expectstring.insert_len && current->expectstring.process_cb && - !current->expectstring.process_cb(ctx, &ctx->expectstring.string, + !current->expectstring.process_cb(ctx, ctx->expectstring.string, ctx->expectstring.insert_len, error)) return FALSE; @@ -1102,10 +1227,10 @@ teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error) } gboolean -teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str, +teco_state_expectfile_process(teco_machine_main_t *ctx, teco_string_t str, gsize new_chars, GError **error) { - g_assert(str->data != NULL); + g_assert(str.data != NULL); /* * Null-chars must not occur in filename/path strings and at some point @@ -1114,7 +1239,7 @@ teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str * Doing it here ensures that teco_file_expand_path() can be safely called * from the done_cb(). */ - if (memchr(str->data + str->len - new_chars, '\0', new_chars)) { + if (memchr(str.data + str.len - new_chars, '\0', new_chars)) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, "Null-character not allowed in filenames"); return FALSE; |
