From 41ab5cf0289dab60ac1ddc97cf9680ee2468ea6c Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Mon, 9 Sep 2024 00:03:33 +0200 Subject: Xq and ]q inherit the document encoding from the source document (refs #5) * ^Uq however always sets an UTF8 register as the source is supposed to be a SciTECO macro which is always UTF-8. * :^Uq preserves the register's encoding * teco_doc_set_string() now also sets the encoding * instead of trying to restore the encoding in teco_doc_undo_set_string(), we now swap out the document in a teco_doc_t and pass it to an undo token. * The get_codepage() Q-Reg method has been removed as the same can now be done with teco_doc_get_string() and the get_string() method. --- lib/lexer.tes | Bin 720 -> 714 bytes src/cmdline.c | 7 +-- src/core-commands.c | 4 +- src/doc.c | 96 ++++++++++++++++++++++++++++++++++-------- src/doc.h | 4 +- src/file-utils.c | 2 +- src/glob.c | 6 ++- src/help.c | 2 +- src/interface-gtk/interface.c | 2 +- src/parser.c | 6 +-- src/qreg-commands.c | 22 +++++++--- src/qreg.c | 88 +++++++++++++++++--------------------- src/qreg.h | 9 ++-- src/search.c | 17 +++++--- src/spawn.c | 24 +++++------ 15 files changed, 177 insertions(+), 112 deletions(-) diff --git a/lib/lexer.tes b/lib/lexer.tes index 8d7ea06..7381e62 100644 Binary files a/lib/lexer.tes and b/lib/lexer.tes differ diff --git a/src/cmdline.c b/src/cmdline.c index 255ffac..d6fcd37 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -125,7 +125,8 @@ teco_cmdline_insert(const gchar *data, gsize len, GError **error) teco_qreg_t *cmdline_reg = teco_qreg_table_find(&teco_qreg_table_globals, "\e", 1); teco_string_t new_cmdline; - if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len, error)) + if (!cmdline_reg->vtable->get_string(cmdline_reg, &new_cmdline.data, &new_cmdline.len, + NULL, error)) return FALSE; /* @@ -307,7 +308,7 @@ teco_cmdline_fnmacro(const gchar *name, GError **error) return TRUE; g_auto(teco_string_t) macro_str = {NULL, 0}; - return macro_reg->vtable->get_string(macro_reg, ¯o_str.data, ¯o_str.len, error) && + return macro_reg->vtable->get_string(macro_reg, ¯o_str.data, ¯o_str.len, NULL, error) && teco_cmdline_keypress(macro_str.data, macro_str.len, error); } @@ -1051,7 +1052,7 @@ teco_state_save_cmdline_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg return &teco_state_start; if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, error)) + !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, SC_CP_UTF8, error)) return NULL; return &teco_state_start; diff --git a/src/core-commands.c b/src/core-commands.c index a84d0ef..638279d 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -1451,7 +1451,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE teco_qreg_t *qreg = teco_qreg_table_find(&teco_qreg_table_globals, "$HOME", 5); g_assert(qreg != NULL); teco_string_t home; - if (!qreg->vtable->get_string(qreg, &home.data, &home.len, error)) + if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, error)) return NULL; /* @@ -2634,8 +2634,6 @@ teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error) * Only now, it will be safe to recalculate dot in the new encoding. * If the new codepage is UTF-8, the line character index will be * ready only now. - * FIXME: Apparently the line character index is still not ready - * after switching to UTF-8! */ teco_interface_ssm(SCI_GOTOPOS, teco_glyphs2bytes(dot_glyphs), 0); } diff --git a/src/doc.c b/src/doc.c index 12413af..516dadb 100644 --- a/src/doc.c +++ b/src/doc.c @@ -29,9 +29,32 @@ #include "qreg.h" #include "doc.h" +static inline teco_doc_scintilla_t * +teco_doc_scintilla_ref(teco_doc_scintilla_t *doc) +{ + if (doc) + teco_view_ssm(teco_qreg_view, SCI_ADDREFDOCUMENT, 0, (sptr_t)doc); + return doc; +} + +static inline void +teco_doc_scintilla_release(teco_doc_scintilla_t *doc) +{ + if (doc) + teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)doc); +} + +TECO_DEFINE_UNDO_OBJECT(doc_scintilla, teco_doc_scintilla_t *, + teco_doc_scintilla_ref, teco_doc_scintilla_release); + static inline teco_doc_scintilla_t * teco_doc_get_scintilla(teco_doc_t *ctx) { + /* + * FIXME: Perhaps we should always specify SC_DOCUMENTOPTION_TEXT_LARGE? + * SC_DOCUMENTOPTION_STYLES_NONE is unfortunately also not safe to set + * always as the Q-Reg might well be used for styling even in batch mode. + */ if (G_UNLIKELY(!ctx->doc)) ctx->doc = (teco_doc_scintilla_t *)teco_view_ssm(teco_qreg_view, SCI_CREATEDOCUMENT, 0, 0); return ctx->doc; @@ -66,8 +89,8 @@ teco_doc_edit(teco_doc_t *ctx) * The index is reference-counted. Perhaps we could just allocate * one more time, so it doesn't get freed when changing documents. */ - if (!(teco_view_ssm(teco_qreg_view, - SCI_GETLINECHARACTERINDEX, 0, 0) & SC_LINECHARACTERINDEX_UTF32)) + if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)) teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX, SC_LINECHARACTERINDEX_UTF32, 0); } @@ -85,23 +108,52 @@ teco_doc_undo_edit(teco_doc_t *ctx) undo__teco_view_ssm(teco_qreg_view, SCI_SETXOFFSET, ctx->xoffset, 0); undo__teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0); undo__teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0, - (sptr_t)teco_doc_get_scintilla(ctx)); + (sptr_t)teco_doc_get_scintilla(ctx)); } /** @memberof teco_doc_t */ void -teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len) +teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage) { if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); + teco_doc_scintilla_release(ctx->doc); + ctx->doc = NULL; + teco_doc_reset(ctx); teco_doc_edit(ctx); - teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); - teco_view_ssm(teco_qreg_view, SCI_CLEARALL, 0, 0); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : "")); - teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); + + if (codepage != SC_CP_UTF8) { + /* + * We have a new UTF-8 document and + * teco_doc_edit() currently always initializes an index. + */ + teco_view_ssm(teco_qreg_view, SCI_RELEASELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + g_assert(!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)); + + /* + * Configure a single-byte codepage/charset. + * This requires setting it on all of the possible styles. + * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles. + * This is important only for display purposes - other than that + * all single-byte encodings are handled the same. + * + * FIXME: Should we avoid this if codepage == 0? + * It will be used for raw byte handling mostly. + * Perhaps we should even set char representations appropriately + * for all non-ANSI codepoints in the 0 codepage. + * But this would also be costly... + */ + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, style, codepage); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0); + } if (teco_qreg_current) teco_doc_edit(&teco_qreg_current->string); @@ -117,13 +169,13 @@ teco_doc_undo_set_string(teco_doc_t *ctx) */ teco_doc_update(ctx, teco_qreg_view); - if (teco_qreg_current && teco_qreg_current->must_undo) // FIXME + if (teco_qreg_current && teco_qreg_current->must_undo && // FIXME + ctx == &teco_qreg_current->string) + /* load old document into view */ teco_doc_undo_edit(&teco_qreg_current->string); teco_doc_undo_reset(ctx); - undo__teco_view_ssm(teco_qreg_view, SCI_UNDO, 0, 0); - - teco_doc_undo_edit(ctx); + teco_undo_object_doc_scintilla_push(&ctx->doc); } /** @@ -134,17 +186,22 @@ teco_doc_undo_set_string(teco_doc_t *ctx) * It can be NULL if you are interested only in the string's length. * Strings must be freed via g_free(). * @param len Where to store the string's length (mandatory). + * @param codepage Where to store the document's codepage or NULL + * if that information is not necessary. * * @see teco_qreg_vtable_t::get_string() * @memberof teco_doc_t */ void -teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len) +teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage) { if (!ctx->doc) { if (str) *str = NULL; - *len = 0; + if (outlen) + *outlen = 0; + if (codepage) + *codepage = SC_CP_UTF8; return; } @@ -153,11 +210,15 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len) teco_doc_edit(ctx); - *len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); + gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); if (str) { - *str = g_malloc(*len + 1); - teco_view_ssm(teco_qreg_view, SCI_GETTEXT, *len + 1, (sptr_t)*str); + *str = g_malloc(len + 1); + teco_view_ssm(teco_qreg_view, SCI_GETTEXT, len + 1, (sptr_t)*str); } + if (outlen) + *outlen = len; + if (codepage) + *codepage = teco_view_get_codepage(teco_qreg_view); if (teco_qreg_current) teco_doc_edit(&teco_qreg_current->string); @@ -202,6 +263,5 @@ teco_doc_exchange(teco_doc_t *ctx, teco_doc_t *other) void teco_doc_clear(teco_doc_t *ctx) { - if (ctx->doc) - teco_view_ssm(teco_qreg_view, SCI_RELEASEDOCUMENT, 0, (sptr_t)ctx->doc); + teco_doc_scintilla_release(ctx->doc); } diff --git a/src/doc.h b/src/doc.h index 9dc1665..b7a4f99 100644 --- a/src/doc.h +++ b/src/doc.h @@ -65,10 +65,10 @@ teco_doc_init(teco_doc_t *ctx) void teco_doc_edit(teco_doc_t *ctx); void teco_doc_undo_edit(teco_doc_t *ctx); -void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len); +void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage); void teco_doc_undo_set_string(teco_doc_t *ctx); -void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len); +void teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *len, guint *codepage); void teco_doc_update_from_view(teco_doc_t *ctx, teco_view_t *from); void teco_doc_update_from_doc(teco_doc_t *ctx, const teco_doc_t *from); diff --git a/src/file-utils.c b/src/file-utils.c index cd0eaf2..a49974c 100644 --- a/src/file-utils.c +++ b/src/file-utils.c @@ -204,7 +204,7 @@ teco_file_expand_path(const gchar *path) * but it may have been changed later on. */ g_auto(teco_string_t) home = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL) || + if (!qreg->vtable->get_string(qreg, &home.data, &home.len, NULL, NULL) || teco_string_contains(&home, '\0')) return g_strdup(path); g_assert(home.data != NULL); diff --git a/src/glob.c b/src/glob.c index 050ec4d..2c955ee 100644 --- a/src/glob.c +++ b/src/glob.c @@ -318,7 +318,8 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); if (!glob_reg->vtable->undo_set_string(glob_reg, error) || - !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), error)) + !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), + SC_CP_UTF8, error)) return NULL; } @@ -493,7 +494,8 @@ teco_state_glob_filename_done(teco_machine_main_t *ctx, const teco_string_t *str teco_qreg_t *glob_reg = teco_qreg_table_find(&teco_qreg_table_globals, "_", 1); g_assert(glob_reg != NULL); g_auto(teco_string_t) pattern_str = {NULL, 0}; - if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len, error)) + if (!glob_reg->vtable->get_string(glob_reg, &pattern_str.data, &pattern_str.len, + NULL, error)) return NULL; if (teco_string_contains(&pattern_str, '\0')) { teco_error_qregcontainsnull_set(error, "_", 1, FALSE); diff --git a/src/help.c b/src/help.c index ef94336..8364496 100644 --- a/src/help.c +++ b/src/help.c @@ -94,7 +94,7 @@ teco_help_init(GError **error) teco_qreg_t *lib_reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SCITECOPATH", 12); g_assert(lib_reg != NULL); g_auto(teco_string_t) lib_path = {NULL, 0}; - if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, error)) + if (!lib_reg->vtable->get_string(lib_reg, &lib_path.data, &lib_path.len, NULL, error)) return FALSE; /* * FIXME: lib_path may contain null-bytes. diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c index 9b2560d..2ad8335 100644 --- a/src/interface-gtk/interface.c +++ b/src/interface-gtk/interface.c @@ -1040,7 +1040,7 @@ teco_interface_event_loop(GError **error) g_assert(scitecoconfig_reg != NULL); g_auto(teco_string_t) scitecoconfig = {NULL, 0}; if (!scitecoconfig_reg->vtable->get_string(scitecoconfig_reg, - &scitecoconfig.data, &scitecoconfig.len, error)) + &scitecoconfig.data, &scitecoconfig.len, NULL, error)) return FALSE; if (teco_string_contains(&scitecoconfig, '\0')) { g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED, diff --git a/src/parser.c b/src/parser.c index 29519b0..aef6223 100644 --- a/src/parser.c +++ b/src/parser.c @@ -628,7 +628,7 @@ teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar * FIXME: Should we have a special teco_qreg_get_string_append() function? */ g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; teco_string_append(ctx->result, str.data, str.len); return &teco_state_stringbuilding_start; @@ -657,7 +657,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g return &teco_state_stringbuilding_start; g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; /* * NOTE: g_shell_quote() expects a null-terminated string, so it is @@ -700,7 +700,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar return &teco_state_stringbuilding_start; g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; if (teco_string_contains(&str, '\0')) { teco_error_qregcontainsnull_set(error, qreg->head.name.data, qreg->head.name.len, diff --git a/src/qreg-commands.c b/src/qreg-commands.c index 3e3cd0a..e8be384 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -372,10 +372,14 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, gint args = teco_expressions_args(); if (args > 0) { + guint codepage = SC_CP_UTF8; + if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) + return NULL; + g_autofree gchar *buffer = NULL; gsize len = 0; - if (qreg->vtable->get_codepage(qreg) == SC_CP_UTF8) { + if (codepage == SC_CP_UTF8) { /* the glib docs wrongly claim that one character can take 6 bytes */ buffer = g_malloc(4*args); for (gint i = args; i > 0; i--) { @@ -410,7 +414,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, buffer, len, error)) + !qreg->vtable->set_string(qreg, buffer, len, + SC_CP_UTF8, error)) return NULL; } } @@ -423,7 +428,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, } else { /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str->data, str->len, error)) + !qreg->vtable->set_string(qreg, str->data, str->len, + SC_CP_UTF8, error)) return NULL; } @@ -487,8 +493,8 @@ teco_state_setqregstring_building_initial(teco_machine_main_t *ctx, GError **err * The expected codepage of string building constructs is determined * by the Q-Register. */ - teco_undo_guint(ctx->expectstring.machine.codepage) = qreg->vtable->get_codepage(qreg); - return TRUE; + teco_undo_guint(ctx->expectstring.machine.codepage); + return qreg->vtable->get_string(qreg, NULL, NULL, &ctx->expectstring.machine.codepage, error); } static teco_state_t * @@ -523,7 +529,7 @@ teco_state_getqregstring_got_register(teco_machine_main_t *ctx, teco_qreg_t *qre g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return NULL; if (str.len > 0) { @@ -767,8 +773,10 @@ teco_state_copytoqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, !qreg->vtable->append_string(qreg, str, len, error)) return NULL; } else { + guint cp = teco_interface_get_codepage(); + if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, str, len, error)) + !qreg->vtable->set_string(qreg, str, len, cp, error)) return NULL; } diff --git a/src/qreg.c b/src/qreg.c index 10aaa86..08bc8fc 100644 --- a/src/qreg.c +++ b/src/qreg.c @@ -89,7 +89,7 @@ teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_locals, GErro * On the other hand, we will have to validate the * UTF-8 codepoints before execution anyway. */ - if (!qreg->vtable->get_string(qreg, ¯o.data, ¯o.len, error) || + if (!qreg->vtable->get_string(qreg, ¯o.data, ¯o.len, NULL, error) || !teco_execute_macro(macro.data, macro.len, qreg_table_locals, error)) { teco_error_add_frame_qreg(qreg->head.name.data, qreg->head.name.len); return FALSE; @@ -210,25 +210,11 @@ teco_qreg_plain_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **error) return TRUE; } -static guint -teco_qreg_plain_get_codepage(teco_qreg_t *qreg) -{ - if (teco_qreg_current) - teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - - teco_doc_edit(&qreg->string); - guint ret = teco_view_get_codepage(teco_qreg_view); - - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - - return ret; -} - static gboolean -teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { - teco_doc_set_string(&qreg->string, str, len); + teco_doc_set_string(&qreg->string, str, len, codepage); return TRUE; } @@ -265,9 +251,10 @@ teco_qreg_plain_append_string(teco_qreg_t *qreg, const gchar *str, gsize len, GE } static gboolean -teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { - teco_doc_get_string(&qreg->string, str, len); + teco_doc_get_string(&qreg->string, str, len, codepage); return TRUE; } @@ -372,7 +359,6 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error) .set_integer = teco_qreg_plain_set_integer, \ .undo_set_integer = teco_qreg_plain_undo_set_integer, \ .get_integer = teco_qreg_plain_get_integer, \ - .get_codepage = teco_qreg_plain_get_codepage, \ .set_string = teco_qreg_plain_set_string, \ .undo_set_string = teco_qreg_plain_undo_set_string, \ .append_string = teco_qreg_plain_append_string, \ @@ -402,7 +388,7 @@ teco_qreg_external_edit(teco_qreg_t *qreg, GError **error) g_auto(teco_string_t) str = {NULL, 0}; if (!teco_qreg_plain_edit(qreg, error) || - !qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + !qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return FALSE; teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); @@ -414,27 +400,19 @@ teco_qreg_external_edit(teco_qreg_t *qreg, GError **error) return TRUE; } -static guint -teco_qreg_external_get_codepage(teco_qreg_t *qreg) -{ - /* - * External registers are always assumed to be UTF-8-encoded. - */ - return SC_CP_UTF8; -} - static gboolean teco_qreg_external_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error) { g_auto(teco_string_t) other_str, own_str = {NULL, 0}; + guint other_cp, own_cp; - teco_doc_get_string(src, &other_str.data, &other_str.len); + teco_doc_get_string(src, &other_str.data, &other_str.len, &other_cp); - if (!qreg->vtable->get_string(qreg, &own_str.data, &own_str.len, error) || - !qreg->vtable->set_string(qreg, other_str.data, other_str.len, error)) + if (!qreg->vtable->get_string(qreg, &own_str.data, &own_str.len, &own_cp, error) || + !qreg->vtable->set_string(qreg, other_str.data, other_str.len, other_cp, error)) return FALSE; - teco_doc_set_string(src, own_str.data, own_str.len); + teco_doc_set_string(src, own_str.data, own_str.len, own_cp); return TRUE; } @@ -454,7 +432,7 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position, { g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return FALSE; if (position < 0 || position >= g_utf8_strlen(str.data, str.len)) { @@ -478,7 +456,7 @@ teco_qreg_external_get_length(teco_qreg_t *qreg, GError **error) { g_auto(teco_string_t) str = {NULL, 0}; - if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error)) + if (!qreg->vtable->get_string(qreg, &str.data, &str.len, NULL, error)) return -1; return g_utf8_strlen(str.data, str.len); @@ -489,7 +467,6 @@ teco_qreg_external_get_length(teco_qreg_t *qreg, GError **error) * These rely on custom implementations of get_string() and set_string(). */ #define TECO_INIT_QREG_EXTERNAL(...) TECO_INIT_QREG( \ - .get_codepage = teco_qreg_external_get_codepage, \ .exchange_string = teco_qreg_external_exchange_string, \ .undo_exchange_string = teco_qreg_external_undo_exchange_string, \ .edit = teco_qreg_external_edit, \ @@ -525,7 +502,8 @@ teco_qreg_bufferinfo_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **er * Either it renames the current buffer, or opens a file (alternative to EB). */ static gboolean -teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_bufferinfo_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { teco_error_qregopunsupported_set(error, qreg->head.name.data, qreg->head.name.len, FALSE); return FALSE; @@ -554,7 +532,8 @@ teco_qreg_bufferinfo_undo_append_string(teco_qreg_t *qreg, GError **error) * NOTE: The `string` component is currently unused on the "*" register. */ static gboolean -teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { /* * On platforms with a default non-forward-slash directory @@ -569,6 +548,8 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr * NOTE: teco_file_normalize_path() does not change the size of the string. */ *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0; + if (codepage) + *codepage = SC_CP_UTF8; return TRUE; } @@ -586,7 +567,6 @@ teco_qreg_bufferinfo_new(void) .undo_append_string = teco_qreg_bufferinfo_undo_append_string, .get_string = teco_qreg_bufferinfo_get_string, /* we don't want to inherit all the other stuff from TECO_INIT_QREG_EXTERNAL(). */ - .get_codepage = teco_qreg_external_get_codepage, .edit = teco_qreg_external_edit, .get_character = teco_qreg_external_get_character, .get_length = teco_qreg_external_get_length @@ -596,7 +576,8 @@ teco_qreg_bufferinfo_new(void) } static gboolean -teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_workingdir_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { /* * NOTE: Makes sure that `dir` will be null-terminated as str[len] may not be '\0'. @@ -647,7 +628,8 @@ teco_qreg_workingdir_undo_append_string(teco_qreg_t *qreg, GError **error) } static gboolean -teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { /* * On platforms with a default non-forward-slash directory @@ -664,6 +646,8 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr *str = teco_file_normalize_path(dir); else g_free(dir); + if (codepage) + *codepage = SC_CP_UTF8; return TRUE; } @@ -694,7 +678,8 @@ teco_qreg_workingdir_new(void) } static gboolean -teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error) +teco_qreg_clipboard_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error) { g_assert(!teco_string_contains(&qreg->head.name, '\0')); const gchar *clipboard_name = qreg->head.name.data + 1; @@ -779,7 +764,8 @@ teco_qreg_clipboard_undo_set_string(teco_qreg_t *qreg, GError **error) } static gboolean -teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error) +teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error) { g_assert(!teco_string_contains(&qreg->head.name, '\0')); const gchar *clipboard_name = qreg->head.name.data + 1; @@ -811,6 +797,8 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErro else teco_string_clear(&str_converted); *len = str_converted.len; + if (codepage) + *codepage = SC_CP_UTF8; return TRUE; } @@ -921,7 +909,8 @@ teco_qreg_table_set_environ(teco_qreg_table_t *table, GError **error) qreg = found; } - if (!qreg->vtable->set_string(qreg, value, strlen(value), error)) + if (!qreg->vtable->set_string(qreg, value, strlen(value), + SC_CP_UTF8, error)) return FALSE; } @@ -976,7 +965,7 @@ teco_qreg_table_get_environ(teco_qreg_table_t *table, GError **error) continue; g_auto(teco_string_t) value = {NULL, 0}; - if (!cur->vtable->get_string(cur, &value.data, &value.len, error)) { + if (!cur->vtable->get_string(cur, &value.data, &value.len, NULL, error)) { g_strfreev(envp); return NULL; } @@ -1070,12 +1059,13 @@ teco_qreg_stack_push(teco_qreg_t *qreg, GError **error) { teco_qreg_stack_entry_t entry; g_auto(teco_string_t) string = {NULL, 0}; + guint codepage; if (!qreg->vtable->get_integer(qreg, &entry.integer, error) || - !qreg->vtable->get_string(qreg, &string.data, &string.len, error)) + !qreg->vtable->get_string(qreg, &string.data, &string.len, &codepage, error)) return FALSE; teco_doc_init(&entry.string); - teco_doc_set_string(&entry.string, string.data, string.len); + teco_doc_set_string(&entry.string, string.data, string.len, codepage); teco_doc_update(&entry.string, &qreg->string); /* pass ownership of entry to teco_qreg_stack */ diff --git a/src/qreg.h b/src/qreg.h index f87b877..8c8764e 100644 --- a/src/qreg.h +++ b/src/qreg.h @@ -41,19 +41,22 @@ extern teco_view_t *teco_qreg_view; * FIXME: Use TECO_DECLARE_VTABLE_METHOD(gboolean, teco_qreg, set_integer, teco_qreg_t *, teco_int_t, GError **); * ... * teco_qreg_set_integer_t set_integer; + * ... + * teco_qreg_set_integer(qreg, 23, error); */ typedef const struct { gboolean (*set_integer)(teco_qreg_t *qreg, teco_int_t value, GError **error); gboolean (*undo_set_integer)(teco_qreg_t *qreg, GError **error); gboolean (*get_integer)(teco_qreg_t *qreg, teco_int_t *ret, GError **error); - guint (*get_codepage)(teco_qreg_t *qreg); - gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error); + gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, + guint codepage, GError **error); gboolean (*undo_set_string)(teco_qreg_t *qreg, GError **error); gboolean (*append_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error); gboolean (*undo_append_string)(teco_qreg_t *qreg, GError **error); - gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error); + gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, + guint *codepage, GError **error); gboolean (*get_character)(teco_qreg_t *qreg, teco_int_t position, teco_int_t *chr, GError **error); /* always returns length in glyphs in contrast to get_string() */ diff --git a/src/search.c b/src/search.c index 2dff965..cf26c7f 100644 --- a/src/search.c +++ b/src/search.c @@ -250,7 +250,7 @@ teco_class2regexp(teco_search_state_t *state, teco_string_t *pattern, teco_machine_qregspec_reset(teco_search_qreg_machine); g_auto(teco_string_t) str = {NULL, 0}; - if (!reg->vtable->get_string(reg, &str.data, &str.len, error)) + if (!reg->vtable->get_string(reg, &str.data, &str.len, NULL, error)) return NULL; pattern->data++; @@ -677,13 +677,15 @@ teco_state_search_done(teco_machine_main_t *ctx, const teco_string_t *str, GErro undo__teco_interface_ssm(SCI_SETANCHOR, anchor, 0); if (!search_reg->vtable->undo_set_string(search_reg, error) || - !search_reg->vtable->set_string(search_reg, str->data, str->len, error)) + !search_reg->vtable->set_string(search_reg, str->data, str->len, + SC_CP_UTF8, error)) return NULL; teco_interface_ssm(SCI_SETANCHOR, anchor, 0); } else { g_auto(teco_string_t) search_str = {NULL, 0}; - if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len, error) || + if (!search_reg->vtable->get_string(search_reg, &search_str.data, &search_str.len, + NULL, error) || !teco_state_search_process(ctx, &search_str, search_str.len, error)) return NULL; } @@ -1075,11 +1077,13 @@ teco_state_replace_default_insert_done_overwrite(teco_machine_main_t *ctx, const if (str->len > 0) { if (!replace_reg->vtable->undo_set_string(replace_reg, error) || - !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error)) + !replace_reg->vtable->set_string(replace_reg, str->data, str->len, + SC_CP_UTF8, error)) return NULL; } else { g_auto(teco_string_t) replace_str = {NULL, 0}; - if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len, error) || + if (!replace_reg->vtable->get_string(replace_reg, &replace_str.data, &replace_str.len, + NULL, error) || (replace_str.len > 0 && !teco_state_insert_process(ctx, &replace_str, replace_str.len, error))) return NULL; } @@ -1106,7 +1110,8 @@ teco_state_replace_default_ignore_done(teco_machine_main_t *ctx, const teco_stri g_assert(replace_reg != NULL); if (!replace_reg->vtable->undo_set_string(replace_reg, error) || - !replace_reg->vtable->set_string(replace_reg, str->data, str->len, error)) + !replace_reg->vtable->set_string(replace_reg, str->data, str->len, + SC_CP_UTF8, error)) return NULL; return &teco_state_start; diff --git a/src/spawn.c b/src/spawn.c index 4317288..c6dd779 100644 --- a/src/spawn.c +++ b/src/spawn.c @@ -121,7 +121,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error) teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$COMSPEC", 8); g_assert(reg != NULL); teco_string_t comspec; - if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, error)) + if (!reg->vtable->get_string(reg, &comspec.data, &comspec.len, NULL, error)) return NULL; argv = g_new(gchar *, 5); @@ -140,7 +140,7 @@ teco_parse_shell_command_line(const gchar *cmdline, GError **error) teco_qreg_t *reg = teco_qreg_table_find(&teco_qreg_table_globals, "$SHELL", 6); g_assert(reg != NULL); teco_string_t shell; - if (!reg->vtable->get_string(reg, &shell.data, &shell.len, error)) + if (!reg->vtable->get_string(reg, &shell.data, &shell.len, NULL, error)) return NULL; argv = g_new(gchar *, 4); @@ -673,6 +673,8 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da /* source has already been dispatched */ return G_SOURCE_REMOVE; + teco_qreg_t *qreg = teco_spawn_ctx.register_argument; + for (;;) { teco_string_t buffer; @@ -691,20 +693,16 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da if (!buffer.len) return G_SOURCE_CONTINUE; - if (teco_spawn_ctx.register_argument) { + if (qreg) { if (teco_spawn_ctx.text_added) { - if (!teco_spawn_ctx.register_argument->vtable->undo_append_string(teco_spawn_ctx.register_argument, - &teco_spawn_ctx.error) || - !teco_spawn_ctx.register_argument->vtable->append_string(teco_spawn_ctx.register_argument, - buffer.data, buffer.len, - &teco_spawn_ctx.error)) + if (!qreg->vtable->undo_append_string(qreg, &teco_spawn_ctx.error) || + !qreg->vtable->append_string(qreg, buffer.data, buffer.len, + &teco_spawn_ctx.error)) goto error; } else { - if (!teco_spawn_ctx.register_argument->vtable->undo_set_string(teco_spawn_ctx.register_argument, - &teco_spawn_ctx.error) || - !teco_spawn_ctx.register_argument->vtable->set_string(teco_spawn_ctx.register_argument, - buffer.data, buffer.len, - &teco_spawn_ctx.error)) + if (!qreg->vtable->undo_set_string(qreg, &teco_spawn_ctx.error) || + !qreg->vtable->set_string(qreg, buffer.data, buffer.len, + SC_CP_UTF8, &teco_spawn_ctx.error)) goto error; } } else { -- cgit v1.2.3