From 893a0a6ad85411a57c1225af03260b34561377c7 Mon Sep 17 00:00:00 2001
From: Robin Haberkorn <robin.haberkorn@googlemail.com>
Date: Wed, 4 Sep 2024 18:26:00 +0200
Subject: leave some comments on what to do when converting the parser to
 Unicode (refs #5)

---
 src/qreg.c         |  9 +++++++++
 src/string-utils.c | 13 ++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/qreg.c b/src/qreg.c
index 2c2b6ad..10aaa86 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -83,6 +83,12 @@ teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_locals, GErro
 {
 	g_auto(teco_string_t) macro = {NULL, 0};
 
+	/*
+	 * FIXME: Once we have an Unicode-aware parser,
+	 * we should probably check the encoding of the register.
+	 * On the other hand, we will have to validate the
+	 * UTF-8 codepoints before execution anyway.
+	 */
 	if (!qreg->vtable->get_string(qreg, &macro.data, &macro.len, error) ||
 	    !teco_execute_macro(macro.data, macro.len, qreg_table_locals, error)) {
 		teco_error_add_frame_qreg(qreg->head.name.data, qreg->head.name.len);
@@ -1310,6 +1316,7 @@ teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr,
 	if (!ctx->parse_only) {
 		if (ctx->parent.must_undo)
 			undo__teco_string_truncate(&ctx->name, ctx->name.len);
+		/* FIXME: g_unicode_toupper() once we have an Unicode-conforming parser */
 		teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
 	}
 	return teco_state_qregspec_done(ctx, error);
@@ -1334,6 +1341,7 @@ teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GEr
 	if (!ctx->parse_only) {
 		if (ctx->parent.must_undo)
 			undo__teco_string_truncate(&ctx->name, ctx->name.len);
+		/* FIXME: g_unicode_toupper() once we have an Unicode-conforming parser */
 		teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
 	}
 	return &teco_state_qregspec_secondchar;
@@ -1352,6 +1360,7 @@ teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GE
 	if (!ctx->parse_only) {
 		if (ctx->parent.must_undo)
 			undo__teco_string_truncate(&ctx->name, ctx->name.len);
+		/* FIXME: g_unicode_toupper() once we have an Unicode-conforming parser */
 		teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
 	}
 	return teco_state_qregspec_done(ctx, error);
diff --git a/src/string-utils.c b/src/string-utils.c
index e1013f5..ac5835b 100644
--- a/src/string-utils.c
+++ b/src/string-utils.c
@@ -91,7 +91,18 @@ teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
 	return len;
 }
 
-/** @memberof teco_string_t */
+/**
+ * Get the length of the prefix common to two strings
+ * without considering case.
+ *
+ * @fixme This is currently only used for symbols and one/two letter
+ * Q-Register names, which cannot be UTF-8.
+ * If we rewrote this to perform Unicode case folding, we would
+ * also have to check for character validity.
+ * Once our parser is Unicode-aware, this is not necessary.
+ *
+ * @memberof teco_string_t
+ */
 gsize
 teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len)
 {
-- 
cgit v1.2.3