From b31b88717172e22b49c0493185f603b8f84989ec Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Wed, 4 Sep 2024 12:49:29 +0200 Subject: the ^EUq string building escape now respects the encoding (can insert bytes or codepoints) (refs #5) * This is trickier than it sounds because there isn't one single place to consult. It depends on the context. If the string argument relates to buffer contents - as in , , etc. - the buffer's encoding is consulted. If it goes into a register (EU), the register's encoding is consulted. Everything else (O, EN, EC, ES...) expects only Unicode codepoints. * This is communicated through a new field teco_machine_stringbuilding_t::codepage which must be set in the states' initial callback. * Seems overkill just for ^EUq, but it can be used for context-sensitive processing of all the other string building constructs as well. * ^V and ^W cannot be supported for Unicode characters for the time being without an Unicode-aware parser --- src/view.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/view.h') diff --git a/src/view.h b/src/view.h index 882a33c..8f54fdd 100644 --- a/src/view.h +++ b/src/view.h @@ -71,6 +71,13 @@ gboolean teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError /** @pure @memberof teco_view_t */ void teco_view_free(teco_view_t *ctx); +static inline guint +teco_view_get_codepage(teco_view_t *ctx) +{ + return teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) + ? : teco_view_ssm(ctx, SCI_STYLEGETCHARACTERSET, STYLE_DEFAULT, 0); +} + gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos); teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos); gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n); -- cgit v1.2.3