diff options
Diffstat (limited to 'src/qreg-commands.c')
-rw-r--r-- | src/qreg-commands.c | 128 |
1 files changed, 99 insertions, 29 deletions
diff --git a/src/qreg-commands.c b/src/qreg-commands.c index be0aada..cff4c84 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2023 Robin Haberkorn + * Copyright (C) 2012-2024 Robin Haberkorn * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -50,7 +50,7 @@ teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error) } teco_state_t * -teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error) +teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error) { teco_state_t *current = ctx->parent.current; @@ -149,7 +149,7 @@ teco_state_loadqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr if (str->len > 0) { /* Load file into Q-Register */ g_autofree gchar *filename = teco_file_expand_path(str->data); - if (!teco_qreg_load(qreg, filename, error)) + if (!qreg->vtable->load(qreg, filename, error)) return NULL; } else { /* Edit Q-Register */ @@ -202,7 +202,7 @@ teco_state_saveqreg_done(teco_machine_main_t *ctx, const teco_string_t *str, GEr return &teco_state_start; g_autofree gchar *filename = teco_file_expand_path(str->data); - return teco_qreg_save(qreg, filename, error) ? &teco_state_start : NULL; + return qreg->vtable->save(qreg, filename, error) ? &teco_state_start : NULL; } /*$ E% E%q @@ -259,9 +259,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, if (teco_machine_main_eval_colon(ctx)) { /* Query Q-Register's existence or string size */ if (qreg) { - gsize len; - - if (!qreg->vtable->get_string(qreg, NULL, &len, error)) + /* get_string() would return the size in bytes */ + teco_int_t len = qreg->vtable->get_length(qreg, error); + if (len < 0) return NULL; teco_expressions_push(len); } else { @@ -281,10 +281,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, return NULL; } - gint c = qreg->vtable->get_character(qreg, pos, error); - if (c < 0) + teco_int_t c; + if (!qreg->vtable->get_character(qreg, pos, &c, error)) return NULL; - teco_expressions_push(c); } else { /* Query integer */ @@ -311,6 +310,10 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, * Positions are handled like buffer positions \(em they * begin at 0 up to the length of the string minus 1. * An error is thrown for invalid positions. + * If <q> is encoded as UTF-8 and there is + * an incomplete sequence at the requested position, + * -1 is returned. + * All other invalid Unicode sequences are returned as -2. * Both non-colon-modified forms of Q require register <q> * to be defined and fail otherwise. * @@ -369,24 +372,50 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, gint args = teco_expressions_args(); if (args > 0) { - g_autofree gchar *buffer = g_malloc(args); + guint codepage = teco_default_codepage(); + if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) + return NULL; - for (gint i = args; i > 0; i--) { - teco_int_t v; - if (!teco_expressions_pop_num_calc(&v, 0, error)) - return NULL; - buffer[i-1] = (gchar)v; + g_autofree gchar *buffer = NULL; + gsize len = 0; + + if (codepage == SC_CP_UTF8) { + /* the glib docs wrongly claim that one character can take 6 bytes */ + buffer = g_malloc(4*args); + for (gint i = args; i > 0; i--) { + teco_int_t v; + if (!teco_expressions_pop_num_calc(&v, 0, error)) + return NULL; + if (v < 0 || !g_unichar_validate(v)) { + teco_error_codepoint_set(error, "^U"); + return NULL; + } + len += g_unichar_to_utf8(v, buffer+len); + } + } else { + buffer = g_malloc(args); + for (gint i = args; i > 0; i--) { + teco_int_t v; + if (!teco_expressions_pop_num_calc(&v, 0, error)) + return NULL; + if (v < 0 || v > 0xFF) { + teco_error_codepoint_set(error, "^U"); + return NULL; + } + buffer[len++] = v; + } } if (colon_modified) { /* append to register */ if (!qreg->vtable->undo_append_string(qreg, error) || - !qreg->vtable->append_string(qreg, buffer, args, error)) + !qreg->vtable->append_string(qreg, buffer, len, error)) return NULL; } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, buffer, args, error)) + !qreg->vtable->set_string(qreg, buffer, len, + codepage, error)) return NULL; } } @@ -399,7 +428,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str->data, str->len, error)) + !qreg->vtable->set_string(qreg, str->data, str->len, + teco_default_codepage(), error)) return NULL; } @@ -450,6 +480,26 @@ TECO_DEFINE_STATE_EXPECTQREG(teco_state_eucommand, .expectqreg.type = TECO_QREG_OPTIONAL_INIT ); +static gboolean +teco_state_setqregstring_building_initial(teco_machine_main_t *ctx, GError **error) +{ + if (ctx->mode > TECO_MODE_NORMAL) + return TRUE; + + teco_qreg_t *qreg; + teco_machine_qregspec_get_results(ctx->expectqreg, &qreg, NULL); + + /* + * The expected codepage of string building constructs is determined + * by the Q-Register. + */ + guint codepage; + if (!qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) + return FALSE; + teco_machine_stringbuilding_set_codepage(&ctx->expectstring.machine, codepage); + return TRUE; +} + static teco_state_t * teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error) { @@ -467,6 +517,7 @@ teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_stri * characters \fBenabled\fP. */ TECO_DEFINE_STATE_EXPECTSTRING(teco_state_setqregstring_building, + .initial_cb = (teco_state_initial_cb_t)teco_state_setqregstring_building_initial, .expectstring.string_building = TRUE ); @@ -481,7 +532,7 @@ teco_state_getqregstring_got_register(teco_machine_main_t *ctx, teco_qreg_t *qre g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; if (str.len > 0) { @@ -604,8 +655,15 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, } else { g_auto(teco_qreg_table_t) table; teco_qreg_table_init(&table, FALSE); + if (!teco_qreg_execute(qreg, &table, error)) return NULL; + if (teco_qreg_current && !teco_qreg_current->must_undo) { + /* currently editing local Q-Register */ + teco_error_editinglocalqreg_set(error, teco_qreg_current->head.name.data, + teco_qreg_current->head.name.len); + return NULL; + } } return &teco_state_start; @@ -632,6 +690,10 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, * Note that the string of <q> will be copied upon macro execution, * so subsequent changes to Q-Register <q> from inside the macro do * not modify the executed code. + * + * While \fBM\fP does not check the register's configured encoding + * (as reported by \fBEE\fP), its contents must be and are checked to be in + * valid UTF-8. */ TECO_DEFINE_STATE_EXPECTQREG(teco_state_macro); @@ -666,6 +728,9 @@ teco_state_macrofile_done(teco_machine_main_t *ctx, const teco_string_t *str, GE * It is otherwise similar to the \(lqM\(rq command. * * If <file> could not be read, the command yields an error. + * + * As all \*(ST code, the contents of <file> must be in valid UTF-8 + * even if operating in the \(lqdefault ANSI\(rq mode as configured by \fBED\fP. */ TECO_DEFINE_STATE_EXPECTFILE(teco_state_macrofile); @@ -678,7 +743,7 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, if (ctx->mode > TECO_MODE_NORMAL) return &teco_state_start; - teco_int_t from, len; + gssize from, len; /* in bytes */ if (!teco_expressions_eval(FALSE, error)) return NULL; @@ -702,32 +767,37 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, len *= -1; } } else { - teco_int_t to = teco_expressions_pop_num(0); - from = teco_expressions_pop_num(0); - + gssize to = teco_interface_glyphs2bytes(teco_expressions_pop_num(0)); + from = teco_interface_glyphs2bytes(teco_expressions_pop_num(0)); len = to - from; - if (len < 0 || !teco_validate_pos(from) || !teco_validate_pos(to)) { + if (len < 0 || from < 0 || to < 0) { teco_error_range_set(error, "X"); return NULL; } } + /* + * NOTE: This does not use SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION + * since it may not be safe when copying from register to register. + */ g_autofree gchar *str = g_malloc(len + 1); - struct Sci_TextRange text_range = { - .chrg = {.cpMin = from, .cpMax = from + len}, + struct Sci_TextRangeFull range = { + .chrg = {from, from + len}, .lpstrText = str }; - teco_interface_ssm(SCI_GETTEXTRANGE, 0, (sptr_t)&text_range); + teco_interface_ssm(SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); if (teco_machine_main_eval_colon(ctx)) { if (!qreg->vtable->undo_append_string(qreg, error) || !qreg->vtable->append_string(qreg, str, len, error)) return NULL; } else { + guint cp = teco_interface_get_codepage(); + if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str, len, error)) + !qreg->vtable->set_string(qreg, str, len, cp, error)) return NULL; } |