From 34683e89478962874f64b06e353bb4d6b9f472ae Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <robin.haberkorn@googlemail.com>
Date: Sat, 31 Aug 2024 02:33:42 +0200
Subject: reserve at most 4 bytes for UTF-8 encoded characters (refs #5)

There is a widespread myth that they could take up to 6 bytes.
---
 src/core-commands.c | 2 +-
 src/qreg-commands.c | 3 ++-
 src/qreg.c          | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/core-commands.c b/src/core-commands.c
index c6a9d5f..951e001 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -1048,7 +1048,7 @@ teco_state_start_get(teco_machine_main_t *ctx, GError **error)
 	teco_int_t ret;
 
 	if (teco_interface_ssm(SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) {
-		gchar buf[6+1];
+		gchar buf[4+1];
 		struct Sci_TextRangeFull range = {
 			.chrg = {get_pos, MIN(len, get_pos+sizeof(buf)-1)},
 			.lpstrText = buf
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index 089f2a5..d7bfafe 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -374,7 +374,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
 		gsize len = 0;
 
 		if (qreg->vtable->get_codepage(qreg) == SC_CP_UTF8) {
-			buffer = g_malloc(6*args);
+			/* the glib docs wrongly claim that one character can take 6 bytes */
+			buffer = g_malloc(4*args);
 			for (gint i = args; i > 0; i--) {
 				teco_int_t v;
 				if (!teco_expressions_pop_num_calc(&v, 0, error))
diff --git a/src/qreg.c b/src/qreg.c
index e17bf4d..4432cbf 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -285,7 +285,7 @@ teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position,
 		ret = FALSE;
 		/* make sure we still restore the current Q-Register */
 	} else if (teco_view_ssm(teco_qreg_view, SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) {
-		gchar buf[6+1];
+		gchar buf[4+1];
 		struct Sci_TextRangeFull range = {
 			.chrg = {off, MIN(len, off+sizeof(buf)-1)},
 			.lpstrText = buf
-- 
cgit v1.2.3