From 34683e89478962874f64b06e353bb4d6b9f472ae Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sat, 31 Aug 2024 02:33:42 +0200 Subject: reserve at most 4 bytes for UTF-8 encoded characters (refs #5) There is a widespread myth that they could take up to 6 bytes. --- src/core-commands.c | 2 +- src/qreg-commands.c | 3 ++- src/qreg.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core-commands.c b/src/core-commands.c index c6a9d5f..951e001 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -1048,7 +1048,7 @@ teco_state_start_get(teco_machine_main_t *ctx, GError **error) teco_int_t ret; if (teco_interface_ssm(SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) { - gchar buf[6+1]; + gchar buf[4+1]; struct Sci_TextRangeFull range = { .chrg = {get_pos, MIN(len, get_pos+sizeof(buf)-1)}, .lpstrText = buf diff --git a/src/qreg-commands.c b/src/qreg-commands.c index 089f2a5..d7bfafe 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -374,7 +374,8 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, gsize len = 0; if (qreg->vtable->get_codepage(qreg) == SC_CP_UTF8) { - buffer = g_malloc(6*args); + /* the glib docs wrongly claim that one character can take 6 bytes */ + buffer = g_malloc(4*args); for (gint i = args; i > 0; i--) { teco_int_t v; if (!teco_expressions_pop_num_calc(&v, 0, error)) diff --git a/src/qreg.c b/src/qreg.c index e17bf4d..4432cbf 100644 --- a/src/qreg.c +++ b/src/qreg.c @@ -285,7 +285,7 @@ teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position, ret = FALSE; /* make sure we still restore the current Q-Register */ } else if (teco_view_ssm(teco_qreg_view, SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) { - gchar buf[6+1]; + gchar buf[4+1]; struct Sci_TextRangeFull range = { .chrg = {off, MIN(len, off+sizeof(buf)-1)}, .lpstrText = buf -- cgit v1.2.3