From b31b88717172e22b49c0493185f603b8f84989ec Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Wed, 4 Sep 2024 12:49:29 +0200 Subject: the ^EUq string building escape now respects the encoding (can insert bytes or codepoints) (refs #5) * This is trickier than it sounds because there isn't one single place to consult. It depends on the context. If the string argument relates to buffer contents - as in , , etc. - the buffer's encoding is consulted. If it goes into a register (EU), the register's encoding is consulted. Everything else (O, EN, EC, ES...) expects only Unicode codepoints. * This is communicated through a new field teco_machine_stringbuilding_t::codepage which must be set in the states' initial callback. * Seems overkill just for ^EUq, but it can be used for context-sensitive processing of all the other string building constructs as well. * ^V and ^W cannot be supported for Unicode characters for the time being without an Unicode-aware parser --- src/parser.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/parser.h') diff --git a/src/parser.h b/src/parser.h index 4b4a3a0..ba6054f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -309,9 +309,6 @@ typedef enum { /** * A stringbuilding state machine. * - * @fixme Should contain the escape char (currently in teco_machine_expectstring_t), - * so that we can escape it via ^Q. - * * @extends teco_machine_t */ typedef struct teco_machine_stringbuilding_t { @@ -350,6 +347,13 @@ typedef struct teco_machine_stringbuilding_t { * (see teco_state_stringbuilding_start_process_edit_cmd()). */ teco_string_t *result; + + /** + * Encoding of string in `result`. + * This is inherited from the embedding command and may depend on + * the buffer's or Q-Register's encoding. + */ + guint codepage; } teco_machine_stringbuilding_t; void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char, @@ -508,6 +512,7 @@ void teco_machine_main_clear(teco_machine_main_t *ctx); G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(teco_machine_main_t, teco_machine_main_clear); +gboolean teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error); teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error); gboolean teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error); @@ -533,6 +538,7 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco return teco_state_expectstring_input(ctx, chr, error); \ } \ TECO_DEFINE_STATE(NAME, \ + .initial_cb = (teco_state_initial_cb_t)teco_state_expectstring_initial, \ .refresh_cb = (teco_state_refresh_cb_t)teco_state_expectstring_refresh, \ .process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \ teco_state_expectstring_process_edit_cmd, \ -- cgit v1.2.3