From b31b88717172e22b49c0493185f603b8f84989ec Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <robin.haberkorn@googlemail.com>
Date: Wed, 4 Sep 2024 12:49:29 +0200
Subject: the ^EUq string building escape now respects the encoding (can insert
 bytes or codepoints) (refs #5)

* This is trickier than it sounds because there isn't one single place to consult.
  It depends on the context.
  If the string argument relates to buffer contents - as in <I>, <S>, <FR> etc. -
  the buffer's encoding is consulted.
  If it goes into a register (EU), the register's encoding is consulted.
  Everything else (O, EN, EC, ES...) expects only Unicode codepoints.
* This is communicated through a new field teco_machine_stringbuilding_t::codepage
  which must be set in the states' initial callback.
* Seems overkill just for ^EUq, but it can be used for context-sensitive
  processing of all the other string building constructs as well.
* ^V and ^W cannot be supported for Unicode characters for the time being without an Unicode-aware parser
---
 src/view.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src/view.h')
diff --git a/src/view.h b/src/view.h
index 882a33c..8f54fdd 100644
--- a/src/view.h
+++ b/src/view.h
@@ -71,6 +71,13 @@ gboolean teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError
 /** @pure @memberof teco_view_t */
 void teco_view_free(teco_view_t *ctx);
 
+static inline guint
+teco_view_get_codepage(teco_view_t *ctx)
+{
+	return teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0)
+		? : teco_view_ssm(ctx, SCI_STYLEGETCHARACTERSET, STYLE_DEFAULT, 0);
+}
+
 gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos);
 teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos);
 gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n);
-- 
cgit v1.2.3