29 files changed, 325 insertions, 202 deletions
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template
index a5b7f4a..d0574d7 100644
--- a/doc/sciteco.7.template
+++ b/doc/sciteco.7.template
@@ -87,10 +87,6 @@ regular commands for command-line editing.
 .
 When the user presses a key or key-combination it is first translated
 to an UTF-8 string.
-All immediate editing commands and regular \*(ST commands however operate on
-a language based solely on
-.B ASCII
-codes, which is a subset of Unicode.
 The rules for translating keys are as follows:
 .RS
 .IP 1. 4
@@ -138,6 +134,18 @@ This feature is called function key macros and explained in the
 next subsection.
 .RE
 .
+.LP
+All immediate editing commands and regular \*(ST commands however operate on
+a language based solely on
+.B ASCII
+codes, which is a subset of Unicode.
+\# This is because we cannot assume the presence of any particular non-ANSI
+\# symbol on a user's keyboard.
+Since the \*(ST parser is Unicode-aware, this does not exclude
+using Unicode glyphs wherever a single character is expected,
+ie. \fB^^\fIx\fR and \fBU\fIq\fR works with arbitrary Unicode glyphs.
+All \*(ST macros must be in valid UTF-8.
+.
 .SS Function Key Macros
 .
 .SCITECO_TOPIC "function key"
@@ -1082,8 +1090,8 @@ Consequently when querying the code at a character position
 or inserting characters by code, the code may be an Unicode
 codepoint instead of byte-sized integer.
 .LP
-Currently, \*(ST supports UTF-8 and single-byte ANSI encodings,
-that can also be used for editing raw binary files.
+Currently, \*(ST supports buffers in UTF-8 and single-byte
+ANSI encodings, that can also be used for editing raw binary files.
 \# You can configure other single-byte code pages with EE,
 \# but there isn't yet any way to insert characters.
 UTF-8 is the default codepage for new buffers and Q-Registers
diff --git a/src/cmdline.c b/src/cmdline.c
index 47ef86f..be7a5b1 100644
--- a/src/cmdline.c
+++ b/src/cmdline.c
@@ -194,7 +194,7 @@ teco_cmdline_rubin(GError **error)
 }
 
 gboolean
-teco_cmdline_keypress_c(gchar key, GError **error)
+teco_cmdline_keypress_wc(gunichar key, GError **error)
 {
 	teco_machine_t *machine = &teco_cmdline.machine.parent;
 	g_autoptr(GError) tmp_error = NULL;
@@ -283,6 +283,30 @@ teco_cmdline_keypress_c(gchar key, GError **error)
 	return TRUE;
 }
 
+/*
+ * FIXME: If one character causes an error, we should rub out the
+ * entire string.
+ * Usually it will be called only with single keys (strings containing
+ * single codepoints), but especially teco_cmdline_fnmacro() can emulate
+ * many key presses at once.
+ */
+gboolean
+teco_cmdline_keypress(const gchar *str, gsize len, GError **error)
+{
+	for (guint i = 0; i < len; i += g_utf8_next_char(str+i) - (str+i)) {
+		gunichar chr = g_utf8_get_char_validated(str+i, len-i);
+		if ((gint32)chr < 0) {
+			g_set_error_literal(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+			                    "Invalid UTF-8 sequence");
+			return FALSE;
+		}
+		if (!teco_cmdline_keypress_wc(chr, error))
+			return FALSE;
+	}
+
+	return TRUE;
+}
+
 gboolean
 teco_cmdline_fnmacro(const gchar *name, GError **error)
 {
@@ -361,7 +385,7 @@ teco_cmdline_cleanup(void)
  */
 
 gboolean
-teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	switch (key) {
 	case '\n': /* insert EOL sequence */
@@ -431,23 +455,30 @@ teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gch
 	}
 
 	teco_interface_popup_clear();
-	return teco_cmdline_insert(&key, sizeof(key), error);
+
+	gchar buf[6];
+	gsize len = g_unichar_to_utf8(key, buf);
+	return teco_cmdline_insert(buf, len, error);
 }
 
 gboolean
-teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
+	/*
+	 * Auto case folding is for syntactic characters,
+	 * so this could be done by working only with a-z and A-Z.
+	 * However, it's also not speed critical.
+	 */
 	if (teco_ed & TECO_ED_AUTOCASEFOLD)
-		/* will not modify non-letter keys */
-		key = g_ascii_islower(key) ? g_ascii_toupper(key)
-		                           : g_ascii_tolower(key);
+		key = g_unichar_islower(key) ? g_unichar_toupper(key)
+		                             : g_unichar_tolower(key);
 
 	return teco_state_process_edit_cmd(ctx, parent_ctx, key, error);
 }
 
 gboolean
 teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
-                                                 gchar key, GError **error)
+                                                 gunichar key, GError **error)
 {
 	teco_state_t *current = ctx->parent.current;
 
@@ -597,7 +628,7 @@ teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *
 
 gboolean
 teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
-                                                gchar chr, GError **error)
+                                                gunichar chr, GError **error)
 {
 	g_assert(ctx->machine_qregspec != NULL);
 	/* We downcast since teco_machine_qregspec_t is private in qreg.c */
@@ -606,7 +637,7 @@ teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *c
 }
 
 gboolean
-teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -614,7 +645,7 @@ teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_
 }
 
 gboolean
-teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -650,7 +681,7 @@ teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *par
 }
 
 gboolean
-teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -720,8 +751,8 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t
 		gboolean unambiguous = teco_file_auto_complete(ctx->expectstring.string.data, G_FILE_TEST_EXISTS, &new_chars);
 		teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped);
 		if (unambiguous && ctx->expectstring.nesting == 1)
-			teco_string_append_c(&new_chars_escaped,
-			                     ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
+			teco_string_append_wc(&new_chars_escaped,
+			                      ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
 
 		return teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error);
 	}
@@ -731,7 +762,7 @@ teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t
 }
 
 gboolean
-teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -773,7 +804,7 @@ teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *
 }
 
 gboolean
-teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	g_assert(ctx->expectqreg != NULL);
 	/*
@@ -785,7 +816,7 @@ teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t
 }
 
 gboolean
-teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	switch (key) {
 	case '\t': { /* autocomplete Q-Register name */
@@ -820,7 +851,7 @@ teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_
 }
 
 gboolean
-teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = teco_machine_qregspec_get_stringbuilding(ctx);
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -860,7 +891,7 @@ teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_m
 }
 
 gboolean
-teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -905,7 +936,7 @@ teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *pa
 }
 
 gboolean
-teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -950,7 +981,7 @@ teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_mac
 }
 
 gboolean
-teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -997,7 +1028,7 @@ teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren
 }
 
 gboolean
-teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error)
+teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error)
 {
 	teco_machine_stringbuilding_t *stringbuilding_ctx = &ctx->expectstring.machine;
 	teco_state_t *stringbuilding_current = stringbuilding_ctx->parent.current;
@@ -1028,8 +1059,8 @@ teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *paren
 		gboolean unambiguous = teco_help_auto_complete(ctx->expectstring.string.data, &new_chars);
 		teco_machine_stringbuilding_escape(stringbuilding_ctx, new_chars.data, new_chars.len, &new_chars_escaped);
 		if (unambiguous && ctx->expectstring.nesting == 1)
-			teco_string_append_c(&new_chars_escaped,
-			                     ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
+			teco_string_append_wc(&new_chars_escaped,
+			                      ctx->expectstring.machine.escape_char == '{' ? '}' : ctx->expectstring.machine.escape_char);
 
 		return new_chars_escaped.len ? teco_cmdline_insert(new_chars_escaped.data, new_chars_escaped.len, error) : TRUE;
 	}
diff --git a/src/cmdline.h b/src/cmdline.h
index 7f40b5f..78d101c 100644
--- a/src/cmdline.h
+++ b/src/cmdline.h
@@ -64,16 +64,8 @@ gboolean teco_cmdline_insert(const gchar *data, gsize len, GError **error);
 
 gboolean teco_cmdline_rubin(GError **error);
 
-gboolean teco_cmdline_keypress_c(gchar key, GError **error);
-
-static inline gboolean
-teco_cmdline_keypress(const gchar *str, gsize len, GError **error)
-{
-	for (guint i = 0; i < len; i++)
-		if (!teco_cmdline_keypress_c(str[i], error))
-			return FALSE;
-	return TRUE;
-}
+gboolean teco_cmdline_keypress_wc(gunichar key, GError **error);
+gboolean teco_cmdline_keypress(const gchar *str, gsize len, GError **error);
 
 gboolean teco_cmdline_fnmacro(const gchar *name, GError **error);
 
diff --git a/src/core-commands.c b/src/core-commands.c
index 3686624..ef763d5 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -45,7 +45,7 @@
 #include "goto-commands.h"
 #include "core-commands.h"
 
-static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error);
+static teco_state_t *teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error);
 
 /*
  * NOTE: This needs some extra code in teco_state_start_input().
@@ -1049,7 +1049,7 @@ teco_state_start_get(teco_machine_main_t *ctx, GError **error)
 }
 
 static teco_state_t *
-teco_state_start_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_start_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	static teco_machine_main_transition_t transitions[] = {
 		/*
@@ -1388,7 +1388,7 @@ teco_state_fcommand_cond_else(teco_machine_main_t *ctx, GError **error)
 }
 
 static teco_state_t *
-teco_state_fcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_fcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	static teco_machine_main_transition_t transitions[] = {
 		/*
@@ -1512,7 +1512,7 @@ teco_state_changedir_done(teco_machine_main_t *ctx, const teco_string_t *str, GE
 TECO_DEFINE_STATE_EXPECTDIR(teco_state_changedir);
 
 static teco_state_t *
-teco_state_condcommand_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_condcommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	teco_int_t value = 0;
 	gboolean result = TRUE;
@@ -1800,7 +1800,7 @@ teco_state_control_glyphs2bytes(teco_machine_main_t *ctx, GError **error)
 }
 
 static teco_state_t *
-teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_control_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	static teco_machine_main_transition_t transitions[] = {
 		/*
@@ -1841,10 +1841,10 @@ teco_state_control_input(teco_machine_main_t *ctx, gchar chr, GError **error)
 TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_control);
 
 static teco_state_t *
-teco_state_ascii_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_ascii_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	if (ctx->mode == TECO_MODE_NORMAL)
-		teco_expressions_push((guchar)chr);
+		teco_expressions_push(chr);
 
 	return &teco_state_start;
 }
@@ -1877,7 +1877,7 @@ TECO_DEFINE_STATE(teco_state_ascii);
  * only be seen when executing the following command.
  */
 static teco_state_t *
-teco_state_escape_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_escape_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	/*$ ^[^[ ^[$ $$ terminate return
 	 * [a1,a2,...]$$ -- Terminate command line or return from macro
@@ -2700,7 +2700,7 @@ teco_state_ecommand_exit(teco_machine_main_t *ctx, GError **error)
 }
 
 static teco_state_t *
-teco_state_ecommand_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_ecommand_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	static teco_machine_main_transition_t transitions[] = {
 		/*
@@ -2874,10 +2874,9 @@ teco_state_insert_indent_initial(teco_machine_main_t *ctx, GError **error)
 		len -= teco_interface_ssm(SCI_GETCOLUMN,
 		                          teco_interface_ssm(SCI_GETCURRENTPOS, 0, 0), 0) % len;
 
-		gchar spaces[len];
-
-		memset(spaces, ' ', sizeof(spaces));
-		teco_interface_ssm(SCI_ADDTEXT, sizeof(spaces), (sptr_t)spaces);
+		gchar space = ' ';
+		while (len-- > 0)
+			teco_interface_ssm(SCI_ADDTEXT, 1, (sptr_t)&space);
 	}
 	teco_interface_ssm(SCI_ENDUNDOACTION, 0, 0);
 	teco_ring_dirtify();
diff --git a/src/core-commands.h b/src/core-commands.h
index 370c7ba..e30770d 100644
--- a/src/core-commands.h
+++ b/src/core-commands.h
@@ -43,7 +43,7 @@ gboolean teco_state_insert_process(teco_machine_main_t *ctx, const teco_string_t
                                    gsize new_chars, GError **error);
 
 /* in cmdline.c */
-gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_insert_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
 
 /**
  * @class TECO_DEFINE_STATE_INSERT
diff --git a/src/error.h b/src/error.h
index f60be1a..7543d02 100644
--- a/src/error.h
+++ b/src/error.h
@@ -61,10 +61,10 @@ typedef enum {
 } teco_error_t;
 
 static inline void
-teco_error_syntax_set(GError **error, gchar chr)
+teco_error_syntax_set(GError **error, gunichar chr)
 {
 	g_set_error(error, TECO_ERROR, TECO_ERROR_SYNTAX,
-	            "Syntax error \"%c\" (%d)", chr, chr);
+	            "Syntax error \"%C\" (U+%04" G_GINT32_MODIFIER "X)", chr, chr);
 }
 
 static inline void
diff --git a/src/expressions.c b/src/expressions.c
index ef785e0..1ba8706 100644
--- a/src/expressions.c
+++ b/src/expressions.c
@@ -114,10 +114,11 @@ teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error)
 }
 
 void
-teco_expressions_add_digit(gchar digit)
+teco_expressions_add_digit(gunichar digit)
 {
 	teco_int_t n = teco_expressions_args() > 0 ? teco_expressions_pop_num(0) : 0;
 
+	/* use g_unichar_digit_value()? */
 	teco_expressions_push(n*teco_radix + (n < 0 ? -1 : 1)*(digit - '0'));
 }
 
diff --git a/src/expressions.h b/src/expressions.h
index 24c5eff..68d8ddb 100644
--- a/src/expressions.h
+++ b/src/expressions.h
@@ -123,7 +123,7 @@ teco_int_t teco_expressions_peek_num(guint index);
 teco_int_t teco_expressions_pop_num(guint index);
 gboolean teco_expressions_pop_num_calc(teco_int_t *ret, teco_int_t imply, GError **error);
 
-void teco_expressions_add_digit(gchar digit);
+void teco_expressions_add_digit(gunichar digit);
 
 void teco_expressions_push_op(teco_operator_t op);
 gboolean teco_expressions_push_calc(teco_operator_t op, GError **error);
diff --git a/src/goto-commands.c b/src/goto-commands.c
index 2326f64..bf80c0b 100644
--- a/src/goto-commands.c
+++ b/src/goto-commands.c
@@ -53,7 +53,7 @@ teco_state_label_initial(teco_machine_main_t *ctx, GError **error)
  * I'm unsure whether !-signs should be allowed within comments.
  */
 static teco_state_t *
-teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_label_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	if (chr == '!') {
 		/*
@@ -85,7 +85,7 @@ teco_state_label_input(teco_machine_main_t *ctx, gchar chr, GError **error)
 
 	if (ctx->parent.must_undo)
 		undo__teco_string_truncate(&ctx->goto_label, ctx->goto_label.len);
-	teco_string_append_c(&ctx->goto_label, chr);
+	teco_string_append_wc(&ctx->goto_label, chr);
 	return &teco_state_label;
 }
 
@@ -138,7 +138,7 @@ teco_state_goto_done(teco_machine_main_t *ctx, const teco_string_t *str, GError
 }
 
 /* in cmdline.c */
-gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_goto_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
 
 /*$ O
  * Olabel$ -- Go to label
diff --git a/src/help.c b/src/help.c
index 8364496..9ee7239 100644
--- a/src/help.c
+++ b/src/help.c
@@ -314,7 +314,7 @@ teco_state_help_done(teco_machine_main_t *ctx, const teco_string_t *str, GError
 }
 
 /* in cmdline.c */
-gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_help_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
 
 /*$ "?" help
  * ?[topic]$ -- Get help for topic
diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c
index 443a903..96254a9 100644
--- a/src/interface-curses/interface.c
+++ b/src/interface-curses/interface.c
@@ -1582,6 +1582,9 @@ teco_interface_blocking_getch(void)
 void
 teco_interface_event_loop_iter(void)
 {
+	static gchar keybuf[4];
+	static gint keybuf_i = 0;
+
 	gint key = g_queue_is_empty(teco_interface.input_queue)
 			? teco_interface_blocking_getch()
 			: GPOINTER_TO_INT(g_queue_pop_head(teco_interface.input_queue));
@@ -1610,14 +1613,14 @@ teco_interface_event_loop_iter(void)
 		 * backspace.
 		 * In SciTECO backspace is normalized to ^H.
 		 */
-		if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'),
-		                             &teco_interface.event_loop_error))
+		if (!teco_cmdline_keypress_wc(TECO_CTL_KEY('H'),
+		                              &teco_interface.event_loop_error))
 			return;
 		break;
 	case KEY_ENTER:
 	case '\r':
 	case '\n':
-		if (!teco_cmdline_keypress_c('\n', &teco_interface.event_loop_error))
+		if (!teco_cmdline_keypress_wc('\n', &teco_interface.event_loop_error))
 			return;
 		break;
 
@@ -1658,8 +1661,19 @@ teco_interface_event_loop_iter(void)
 	 * Control keys and keys with printable representation
 	 */
 	default:
-		if (key <= 0xFF &&
-		    !teco_cmdline_keypress_c(key, &teco_interface.event_loop_error))
+		if (key > 0xFF)
+			return;
+
+		/*
+		 * NOTE: There's also wget_wch(), but it requires
+		 * a widechar version of Curses.
+		 */
+		keybuf[keybuf_i++] = key;
+		gunichar cp = g_utf8_get_char_validated(keybuf, keybuf_i);
+		if (keybuf_i >= sizeof(keybuf) || cp != (gunichar)-2)
+			keybuf_i = 0;
+		if ((gint32)cp < 0 ||
+		    !teco_cmdline_keypress_wc(cp, &teco_interface.event_loop_error))
 			return;
 	}
 
diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c
index 2ad8335..9c1ce6a 100644
--- a/src/interface-gtk/interface.c
+++ b/src/interface-gtk/interface.c
@@ -927,19 +927,19 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error)
 
 	switch (event->keyval) {
 	case GDK_KEY_Escape:
-		if (!teco_cmdline_keypress_c('\e', error))
+		if (!teco_cmdline_keypress_wc('\e', error))
 			return FALSE;
 		break;
 	case GDK_KEY_BackSpace:
-		if (!teco_cmdline_keypress_c(TECO_CTL_KEY('H'), error))
+		if (!teco_cmdline_keypress_wc(TECO_CTL_KEY('H'), error))
 			return FALSE;
 		break;
 	case GDK_KEY_Tab:
-		if (!teco_cmdline_keypress_c('\t', error))
+		if (!teco_cmdline_keypress_wc('\t', error))
 			return FALSE;
 		break;
 	case GDK_KEY_Return:
-		if (!teco_cmdline_keypress_c('\n', error))
+		if (!teco_cmdline_keypress_wc('\n', error))
 			return FALSE;
 		break;
 
@@ -994,7 +994,7 @@ teco_interface_handle_key_press(GdkEventKey *event, GError **error)
 		if ((event->state & (GDK_CONTROL_MASK | GDK_MOD1_MASK)) == GDK_CONTROL_MASK) {
 			gchar c = teco_interface_get_ansi_key(event);
 			if (c) {
-				if (!teco_cmdline_keypress_c(TECO_CTL_KEY(g_ascii_toupper(c)), error))
+				if (!teco_cmdline_keypress_wc(TECO_CTL_KEY(g_ascii_toupper(c)), error))
 					return FALSE;
 				break;
 			}
diff --git a/src/parser.c b/src/parser.c
index ed21740..321803a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -59,7 +59,7 @@ teco_loop_stack_cleanup(void)
 }
 
 gboolean
-teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error)
+teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error)
 {
 	teco_state_t *next = ctx->current->input_cb(ctx, chr, error);
 	if (!next)
@@ -86,10 +86,20 @@ teco_state_end_of_macro(teco_machine_t *ctx, GError **error)
 }
 
 /**
+ * Execute macro from current PC to stop position.
+ *
  * Handles all expected exceptions and preparing them for stack frame insertion.
+ *
+ * @param ctx State machine.
+ * @param macro The macro to execute.
+ *   It does not have to be complete.
+ *   It must consist only of validated UTF-8 sequences, though.
+ * @param stop_pos Where to stop execution in bytes.
+ * @param error Location to store error.
+ * @return FALSE if an error occurred.
  */
 gboolean
-teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_pos, GError **error)
+teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gsize stop_pos, GError **error)
 {
 	while (ctx->macro_pc < stop_pos) {
 #ifdef DEBUG
@@ -110,9 +120,13 @@ teco_machine_main_step(teco_machine_main_t *ctx, const gchar *macro, gint stop_p
 		if (!teco_memory_check(0, error))
 			goto error_attach;
 
-		if (!teco_machine_input(&ctx->parent, macro[ctx->macro_pc], error))
+		/* UTF-8 sequences are already validated */
+		gunichar chr = g_utf8_get_char(macro+ctx->macro_pc);
+
+		if (!teco_machine_input(&ctx->parent, chr, error))
 			goto error_attach;
-		ctx->macro_pc++;
+
+		ctx->macro_pc = g_utf8_next_char(macro+ctx->macro_pc) - macro;
 	}
 
 	/*
@@ -145,6 +159,20 @@ teco_execute_macro(const gchar *macro, gsize macro_len,
                    teco_qreg_table_t *qreg_table_locals, GError **error)
 {
 	/*
+	 * Validate UTF-8, but accept null bytes.
+	 * NOTE: there is g_utf8_validate_len() in Glib 2.60
+	 */
+	const gchar *p = macro;
+	while (!g_utf8_validate(p, macro_len - (p - macro), &p) && !*p)
+		p++;
+	if (p - macro < macro_len) {
+		g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+		            "Invalid UTF-8 byte sequence at %" G_GSIZE_FORMAT,
+		            p - macro);
+		return FALSE;
+	}
+
+	/*
 	 * This is not auto-cleaned up, so it can be initialized
 	 * on demand.
 	 */
@@ -309,26 +337,26 @@ teco_machine_main_eval_colon(teco_machine_main_t *ctx)
 teco_state_t *
 teco_machine_main_transition_input(teco_machine_main_t *ctx,
                                    teco_machine_main_transition_t *transitions,
-                                   guint len, gchar chr, GError **error)
+                                   guint len, gunichar chr, GError **error)
 {
-	if (chr < 0 || chr >= len || !transitions[(guint)chr].next) {
+	if (chr >= len || !transitions[chr].next) {
 		teco_error_syntax_set(error, chr);
 		return NULL;
 	}
 
-	if (ctx->mode == TECO_MODE_NORMAL && transitions[(guint)chr].transition_cb) {
+	if (ctx->mode == TECO_MODE_NORMAL && transitions[chr].transition_cb) {
 		/*
 		 * NOTE: We could also just let transition_cb return a boolean...
 		 */
 		GError *tmp_error = NULL;
-		transitions[(guint)chr].transition_cb(ctx, &tmp_error);
+		transitions[chr].transition_cb(ctx, &tmp_error);
 		if (tmp_error) {
 			g_propagate_error(error, tmp_error);
 			return NULL;
 		}
 	}
 
-	return transitions[(guint)chr].next;
+	return transitions[chr].next;
 }
 
 void
@@ -342,11 +370,11 @@ teco_machine_main_clear(teco_machine_main_t *ctx)
  * FIXME: All teco_state_stringbuilding_* states could be static?
  */
 static teco_state_t *teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx,
-                                                         gchar chr, GError **error);
+                                                         gunichar chr, GError **error);
 TECO_DECLARE_STATE(teco_state_stringbuilding_ctl);
 
 static teco_state_t *teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx,
-                                                             gchar chr, GError **error);
+                                                             gunichar chr, GError **error);
 TECO_DECLARE_STATE(teco_state_stringbuilding_escaped);
 
 TECO_DECLARE_STATE(teco_state_stringbuilding_lower);
@@ -360,7 +388,7 @@ TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_quote);
 TECO_DECLARE_STATE(teco_state_stringbuilding_ctle_n);
 
 static teco_state_t *
-teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	if (chr == '^')
 		return &teco_state_stringbuilding_ctl;
@@ -372,7 +400,7 @@ teco_state_stringbuilding_start_input(teco_machine_stringbuilding_t *ctx, gchar
 
 /* in cmdline.c */
 gboolean teco_state_stringbuilding_start_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
-                                                          gchar key, GError **error);
+                                                          gunichar key, GError **error);
 
 TECO_DEFINE_STATE(teco_state_stringbuilding_start,
 		.is_start = TRUE,
@@ -381,7 +409,7 @@ TECO_DEFINE_STATE(teco_state_stringbuilding_start,
 );
 
 static teco_state_t *
-teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	chr = teco_ascii_toupper(chr);
 
@@ -396,40 +424,50 @@ teco_state_stringbuilding_ctl_input(teco_machine_stringbuilding_t *ctx, gchar ch
 		chr = TECO_CTL_KEY(chr);
 	}
 
+	/*
+	 * Source code is always in UTF-8, so it does not
+	 * make sense to handle ctx->codepage != SC_CP_UTF8
+	 * separately.
+	 */
 	if (ctx->result)
-		teco_string_append_c(ctx->result, chr);
+		teco_string_append_wc(ctx->result, chr);
 	return &teco_state_stringbuilding_start;
 }
 
 TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctl);
 
 static teco_state_t *
-teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	if (!ctx->result)
 		/* parse-only mode */
 		return &teco_state_stringbuilding_start;
 
-	/* FIXME: Consult ctx->codepage once we have an Unicode-conforming parser */
+	/*
+	 * The subtle difference between UTF-8 and single-byte targets
+	 * is that we don't try to casefold non-ANSI characters in single-byte mode.
+	 */
 	switch (ctx->mode) {
 	case TECO_STRINGBUILDING_MODE_UPPER:
-		chr = g_ascii_toupper(chr);
+		chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+					? g_unichar_toupper(chr) : chr;
 		break;
 	case TECO_STRINGBUILDING_MODE_LOWER:
-		chr = g_ascii_tolower(chr);
+		chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+					? g_unichar_tolower(chr) : chr;
 		break;
 	default:
 		break;
 	}
 
-	teco_string_append_c(ctx->result, chr);
+	teco_string_append_wc(ctx->result, chr);
 	return &teco_state_stringbuilding_start;
 }
 
 TECO_DEFINE_STATE(teco_state_stringbuilding_escaped);
 
 static teco_state_t *
-teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	if (!ctx->result)
 		/* parse-only mode */
@@ -443,8 +481,9 @@ teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gchar
 			teco_undo_guint(ctx->mode);
 		ctx->mode = TECO_STRINGBUILDING_MODE_LOWER;
 	} else {
-		/* FIXME: Consult ctx->codepage once we have an Unicode-conforming parser */
-		teco_string_append_c(ctx->result, g_ascii_tolower(chr));
+		chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+					? g_unichar_tolower(chr) : chr;
+		teco_string_append_wc(ctx->result, chr);
 	}
 
 	return &teco_state_stringbuilding_start;
@@ -453,7 +492,7 @@ teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gchar
 TECO_DEFINE_STATE(teco_state_stringbuilding_lower);
 
 static teco_state_t *
-teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	if (!ctx->result)
 		/* parse-only mode */
@@ -467,8 +506,9 @@ teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gchar
 			teco_undo_guint(ctx->mode);
 		ctx->mode = TECO_STRINGBUILDING_MODE_UPPER;
 	} else {
-		/* FIXME: Consult ctx->codepage once we have an Unicode-conforming parser */
-		teco_string_append_c(ctx->result, g_ascii_toupper(chr));
+		chr = ctx->codepage == SC_CP_UTF8 || chr < 0x80
+					? g_unichar_toupper(chr) : chr;
+		teco_string_append_wc(ctx->result, chr);
 	}
 
 	return &teco_state_stringbuilding_start;
@@ -477,7 +517,7 @@ teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gchar
 TECO_DEFINE_STATE(teco_state_stringbuilding_upper);
 
 static teco_state_t *
-teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	teco_state_t *next;
 
@@ -489,8 +529,9 @@ teco_state_stringbuilding_ctle_input(teco_machine_stringbuilding_t *ctx, gchar c
 	case 'N':  next = &teco_state_stringbuilding_ctle_n; break;
 	default:
 		if (ctx->result) {
-			gchar buf[] = {TECO_CTL_KEY('E'), chr};
-			teco_string_append(ctx->result, buf, sizeof(buf));
+			gchar buf[1+6] = {TECO_CTL_KEY('E')};
+			gsize len = g_unichar_to_utf8(chr, buf+1);
+			teco_string_append(ctx->result, buf, 1+len);
 		}
 		return &teco_state_stringbuilding_start;
 	}
@@ -508,7 +549,7 @@ TECO_DEFINE_STATE_CASEINSENSITIVE(teco_state_stringbuilding_ctle);
 
 /* in cmdline.c */
 gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuilding_t *ctx, teco_machine_t *parent_ctx,
-                                                         gchar chr, GError **error);
+                                                         gunichar chr, GError **error);
 
 /**
  * @interface TECO_DEFINE_STATE_STRINGBUILDING_QREG
@@ -523,7 +564,7 @@ gboolean teco_state_stringbuilding_qreg_process_edit_cmd(teco_machine_stringbuil
 	)
 
 static teco_state_t *
-teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	teco_qreg_t *qreg;
 
@@ -558,7 +599,7 @@ teco_state_stringbuilding_ctle_num_input(teco_machine_stringbuilding_t *ctx, gch
 TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_num);
 
 static teco_state_t *
-teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	teco_qreg_t *qreg;
 
@@ -583,10 +624,7 @@ teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar
 	if (ctx->codepage == SC_CP_UTF8) {
 		if (value < 0 || !g_unichar_validate(value))
 			goto error_codepoint;
-		/* 4 bytes should be enough, but we better follow the documentation */
-		gchar buf[6];
-		gsize len = g_unichar_to_utf8(value, buf);
-		teco_string_append(ctx->result, buf, len);
+		teco_string_append_wc(ctx->result, value);
 	} else {
 		if (value < 0 || value > 0xFF)
 			goto error_codepoint;
@@ -606,7 +644,7 @@ error_codepoint: {
 TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u);
 
 static teco_state_t *
-teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	teco_qreg_t *qreg;
 
@@ -637,7 +675,7 @@ teco_state_stringbuilding_ctle_q_input(teco_machine_stringbuilding_t *ctx, gchar
 TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_q);
 
 static teco_state_t *
-teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	teco_qreg_t *qreg;
 	teco_qreg_table_t *table;
@@ -680,7 +718,7 @@ teco_state_stringbuilding_ctle_quote_input(teco_machine_stringbuilding_t *ctx, g
 TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_quote);
 
 static teco_state_t *
-teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar chr, GError **error)
+teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gunichar chr, GError **error)
 {
 	teco_qreg_t *qreg;
 	teco_qreg_table_t *table;
@@ -717,7 +755,7 @@ teco_state_stringbuilding_ctle_n_input(teco_machine_stringbuilding_t *ctx, gchar
 TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_n);
 
 void
-teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char,
+teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char,
                                  teco_qreg_table_t *locals, gboolean must_undo)
 {
 	memset(ctx, 0, sizeof(*ctx));
@@ -738,6 +776,10 @@ teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx)
 	ctx->mode = TECO_STRINGBUILDING_MODE_NORMAL;
 }
 
+/*
+ * If we case folded only ANSI characters as in teco_ascii_toupper(),
+ * this could be simplified.
+ */
 void
 teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gchar *str, gsize len,
                                    teco_string_t *target)
@@ -745,12 +787,18 @@ teco_machine_stringbuilding_escape(teco_machine_stringbuilding_t *ctx, const gch
 	target->data = g_malloc(len*2+1);
 	target->len = 0;
 
-	for (guint i = 0; i < len; i++) {
-		if (teco_ascii_toupper(str[i]) == ctx->escape_char ||
-		    (ctx->escape_char == '[' && str[i] == ']') ||
-		    (ctx->escape_char == '{' && str[i] == '}'))
+	for (guint i = 0; i < len; ) {
+		gunichar chr = g_utf8_get_char(str+i);
+
+		if (g_unichar_toupper(chr) == ctx->escape_char ||
+		    (ctx->escape_char == '[' && chr == ']') ||
+		    (ctx->escape_char == '{' && chr == '}'))
 			target->data[target->len++] = TECO_CTL_KEY('Q');
-		target->data[target->len++] = str[i];
+
+		gsize lenc = g_utf8_next_char(str+i) - (str+i);
+		memcpy(target->data+target->len, str+i, lenc);
+		target->len += lenc;
+		i += lenc;
 	}
 
 	target->data[target->len] = '\0';
@@ -772,7 +820,7 @@ teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error)
 }
 
 teco_state_t *
-teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	teco_state_t *current = ctx->parent.current;
 
@@ -789,13 +837,18 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
 		/*
 		 * FIXME: Exclude setting at least whitespace characters as the
 		 * new string escape character to avoid accidental errors?
+		 *
+		 * FIXME: Should we perhaps restrict case folding escape characters
+		 * to the ANSI range (teco_ascii_toupper())?
+		 * This would be faster than case folding each and every character
+		 * of a string argument to check against the escape char.
 		 */
 		switch (ctx->expectstring.machine.escape_char) {
 		case '\e':
 		case '{':
 			if (ctx->parent.must_undo)
-				teco_undo_gchar(ctx->expectstring.machine.escape_char);
-			ctx->expectstring.machine.escape_char = teco_ascii_toupper(chr);
+				teco_undo_gunichar(ctx->expectstring.machine.escape_char);
+			ctx->expectstring.machine.escape_char = g_unichar_toupper(chr);
 			return current;
 		}
 	}
@@ -819,7 +872,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
 				ctx->expectstring.nesting--;
 				break;
 			}
-		} else if (teco_ascii_toupper(chr) == ctx->expectstring.machine.escape_char) {
+		} else if (g_unichar_toupper(chr) == ctx->expectstring.machine.escape_char) {
 			if (ctx->parent.must_undo)
 				teco_undo_gint(ctx->expectstring.nesting);
 			ctx->expectstring.nesting--;
@@ -849,7 +902,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
 
 		if (current->expectstring.last) {
 			if (ctx->parent.must_undo)
-				teco_undo_gchar(ctx->expectstring.machine.escape_char);
+				teco_undo_gunichar(ctx->expectstring.machine.escape_char);
 			ctx->expectstring.machine.escape_char = '\e';
 		}
 		ctx->expectstring.nesting = 1;
@@ -880,7 +933,7 @@ teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **erro
 		if (!teco_machine_stringbuilding_input(&ctx->expectstring.machine, chr, str, error))
 			return NULL;
 	} else if (ctx->mode == TECO_MODE_NORMAL) {
-		teco_string_append_c(&ctx->expectstring.string, chr);
+		teco_string_append_wc(&ctx->expectstring.string, chr);
 	}
 
 	/*
@@ -924,7 +977,7 @@ teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_string_t *str
 	g_assert(str->data != NULL);
 
 	/*
-	 * Null-chars must not ocur in filename/path strings and at some point
+	 * Null-chars must not occur in filename/path strings and at some point
 	 * teco_string_t has to be converted to a null-terminated C string
 	 * as all the glib filename functions rely on null-terminated strings.
 	 * Doing it here ensures that teco_file_expand_path() can be safely called
diff --git a/src/parser.h b/src/parser.h
index 09ec483..ae2cb9b 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -101,11 +101,11 @@ typedef const struct {
 } teco_state_expectqreg_t;
 
 typedef gboolean (*teco_state_initial_cb_t)(teco_machine_t *ctx, GError **error);
-typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gchar chr, GError **error);
+typedef teco_state_t *(*teco_state_input_cb_t)(teco_machine_t *ctx, gunichar chr, GError **error);
 typedef gboolean (*teco_state_refresh_cb_t)(teco_machine_t *ctx, GError **error);
 typedef gboolean (*teco_state_end_of_macro_cb_t)(teco_machine_t *ctx, GError **error);
 typedef gboolean (*teco_state_process_edit_cmd_cb_t)(teco_machine_t *ctx, teco_machine_t *parent_ctx,
-                                                     gchar key, GError **error);
+                                                     gunichar key, GError **error);
 
 typedef enum {
 	TECO_FNMACRO_MASK_START		= (1 << 0),
@@ -225,7 +225,7 @@ struct teco_state_t {
 gboolean teco_state_end_of_macro(teco_machine_t *ctx, GError **error);
 
 /* in cmdline.c */
-gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
 
 /**
  * @interface TECO_DEFINE_STATE
@@ -254,7 +254,7 @@ gboolean teco_state_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent
 	extern teco_state_t NAME
 
 /* in cmdline.c */
-gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gchar chr, GError **error);
+gboolean teco_state_caseinsensitive_process_edit_cmd(teco_machine_t *ctx, teco_machine_t *parent_ctx, gunichar chr, GError **error);
 
 /**
  * @interface TECO_DEFINE_STATE_CASEINSENSITIVE
@@ -308,7 +308,7 @@ teco_machine_reset(teco_machine_t *ctx, teco_state_t *initial)
 		teco_undo_ptr(ctx->current) = initial;
 }
 
-gboolean teco_machine_input(teco_machine_t *ctx, gchar chr, GError **error);
+gboolean teco_machine_input(teco_machine_t *ctx, gunichar chr, GError **error);
 
 typedef enum {
 	TECO_STRINGBUILDING_MODE_NORMAL = 0,
@@ -336,7 +336,7 @@ typedef struct teco_machine_stringbuilding_t {
 	 * If this is `[` or `{`, it is assumed that `]` and `}` must
 	 * be escaped as well by teco_machine_stringbuilding_escape().
 	 */
-	gchar escape_char;
+	gunichar escape_char;
 
 	/**
 	 * Q-Register table for local registers.
@@ -366,7 +366,7 @@ typedef struct teco_machine_stringbuilding_t {
 	guint codepage;
 } teco_machine_stringbuilding_t;
 
-void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char,
+void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gunichar escape_char,
                                       teco_qreg_table_t *locals, gboolean must_undo);
 
 void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx);
@@ -381,7 +381,7 @@ void teco_machine_stringbuilding_reset(teco_machine_stringbuilding_t *ctx);
  * @return FALSE in case of error.
  */
 static inline gboolean
-teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gchar chr,
+teco_machine_stringbuilding_input(teco_machine_stringbuilding_t *ctx, gunichar chr,
                                   teco_string_t *result, GError **error)
 {
 	ctx->result = result;
@@ -497,7 +497,7 @@ void teco_machine_main_init(teco_machine_main_t *ctx,
 gboolean teco_machine_main_eval_colon(teco_machine_main_t *ctx);
 
 gboolean teco_machine_main_step(teco_machine_main_t *ctx,
-                                const gchar *macro, gint stop_pos, GError **error);
+                                const gchar *macro, gsize stop_pos, GError **error);
 
 gboolean teco_execute_macro(const gchar *macro, gsize macro_len,
                             teco_qreg_table_t *qreg_table_locals, GError **error);
@@ -516,18 +516,18 @@ typedef const struct {
  */
 teco_state_t *teco_machine_main_transition_input(teco_machine_main_t *ctx,
                                                  teco_machine_main_transition_t *transitions,
-                                                 guint len, gchar chr, GError **error);
+                                                 guint len, gunichar chr, GError **error);
 
 void teco_machine_main_clear(teco_machine_main_t *ctx);
 
 G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(teco_machine_main_t, teco_machine_main_clear);
 
 gboolean teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error);
-teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error);
+teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gunichar chr, GError **error);
 gboolean teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error);
 
 /* in cmdline.c */
-gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
 
 /**
  * @interface TECO_DEFINE_STATE_EXPECTSTRING
@@ -543,7 +543,7 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco
  */
 #define TECO_DEFINE_STATE_EXPECTSTRING(NAME, ...) \
 	static teco_state_t * \
-	NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \
+	NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \
 	{ \
 		return teco_state_expectstring_input(ctx, chr, error); \
 	} \
@@ -564,7 +564,7 @@ gboolean teco_state_expectfile_process(teco_machine_main_t *ctx, const teco_stri
                                        gsize new_chars, GError **error);
 
 /* in cmdline.c */
-gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
 
 /**
  * @interface TECO_DEFINE_STATE_EXPECTFILE
@@ -580,7 +580,7 @@ gboolean teco_state_expectfile_process_edit_cmd(teco_machine_main_t *ctx, teco_m
 	)
 
 /* in cmdline.c */
-gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectdir_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
 
 /**
  * @interface TECO_DEFINE_STATE_EXPECTDIR
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index f248ced..8d28e7d 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -50,7 +50,7 @@ teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error)
 }
 
 teco_state_t *
-teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error)
+teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error)
 {
 	teco_state_t *current = ctx->parent.current;
 
@@ -680,6 +680,10 @@ teco_state_macro_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
  * Note that the string of <q> will be copied upon macro execution,
  * so subsequent changes to Q-Register <q> from inside the macro do
  * not modify the executed code.
+ *
+ * While \fBM\fP does not check the register's configured encoding
+ * (as reported by \fBEE\fP), its contents must be and are checked to be in
+ * valid UTF-8.
  */
 TECO_DEFINE_STATE_EXPECTQREG(teco_state_macro);
 
@@ -714,6 +718,9 @@ teco_state_macrofile_done(teco_machine_main_t *ctx, const teco_string_t *str, GE
  * It is otherwise similar to the \(lqM\(rq command.
  *
  * If <file> could not be read, the command yields an error.
+ *
+ * As all \*(ST code, the contents of <file> must be in valid UTF-8
+ * even if operating in the \(lqdefault ANSI\(rq mode as configured by \fBED\fP.
  */
 TECO_DEFINE_STATE_EXPECTFILE(teco_state_macrofile);
 
diff --git a/src/qreg-commands.h b/src/qreg-commands.h
index b190e9f..27a6a5c 100644
--- a/src/qreg-commands.h
+++ b/src/qreg-commands.h
@@ -33,10 +33,10 @@ teco_state_expectqreg_reset(teco_machine_main_t *ctx)
 
 gboolean teco_state_expectqreg_initial(teco_machine_main_t *ctx, GError **error);
 
-teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gchar chr, GError **error);
+teco_state_t *teco_state_expectqreg_input(teco_machine_main_t *ctx, gunichar chr, GError **error);
 
 /* in cmdline.c */
-gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
 
 /**
  * @interface TECO_DEFINE_STATE_EXPECTQREG
@@ -47,7 +47,7 @@ gboolean teco_state_expectqreg_process_edit_cmd(teco_machine_main_t *ctx, teco_m
  */
 #define TECO_DEFINE_STATE_EXPECTQREG(NAME, ...) \
 	static teco_state_t * \
-	NAME##_input(teco_machine_main_t *ctx, gchar chr, GError **error) \
+	NAME##_input(teco_machine_main_t *ctx, gunichar chr, GError **error) \
 	{ \
 		return teco_state_expectqreg_input(ctx, chr, error); \
 	} \
diff --git a/src/qreg.c b/src/qreg.c
index fb559af..cac2d12 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -84,10 +84,9 @@ teco_qreg_execute(teco_qreg_t *qreg, teco_qreg_table_t *qreg_table_locals, GErro
 	g_auto(teco_string_t) macro = {NULL, 0};
 
 	/*
-	 * FIXME: Once we have an Unicode-aware parser,
-	 * we should probably check the encoding of the register.
-	 * On the other hand, we will have to validate the
-	 * UTF-8 codepoints before execution anyway.
+	 * SciTECO macros must be in UTF-8, but we don't check the encoding,
+	 * so as not to complicate TECO_ED_DEFAULT_ANSI mode.
+	 * The UTF-8 byte sequences are checked anyway.
 	 */
 	if (!qreg->vtable->get_string(qreg, &macro.data, &macro.len, NULL, error) ||
 	    !teco_execute_macro(macro.data, macro.len, qreg_table_locals, error)) {
@@ -1220,7 +1219,7 @@ TECO_DECLARE_STATE(teco_state_qregspec_secondchar);
 TECO_DECLARE_STATE(teco_state_qregspec_string);
 
 static teco_state_t *teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx,
-                                                            gchar chr, GError **error);
+                                                            gunichar chr, GError **error);
 
 static teco_state_t *
 teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error)
@@ -1255,7 +1254,7 @@ teco_state_qregspec_done(teco_machine_qregspec_t *ctx, GError **error)
 }
 
 static teco_state_t *
-teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
 {
 	/*
 	 * FIXME: We're using teco_state_qregspec_start as a success condition,
@@ -1272,7 +1271,7 @@ teco_state_qregspec_start_input(teco_machine_qregspec_t *ctx, gchar chr, GError
 }
 
 /* in cmdline.c */
-gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_qregspec_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
 
 TECO_DEFINE_STATE(teco_state_qregspec_start,
 	.is_start = TRUE,
@@ -1280,7 +1279,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start,
 );
 
 static teco_state_t *
-teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
 {
 	/*
 	 * FIXME: Disallow space characters?
@@ -1299,8 +1298,7 @@ teco_state_qregspec_start_global_input(teco_machine_qregspec_t *ctx, gchar chr,
 	if (!ctx->parse_only) {
 		if (ctx->parent.must_undo)
 			undo__teco_string_truncate(&ctx->name, ctx->name.len);
-		/* FIXME: g_unicode_toupper() once we have an Unicode-conforming parser */
-		teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
+		teco_string_append_wc(&ctx->name, g_unichar_toupper(chr));
 	}
 	return teco_state_qregspec_done(ctx, error);
 }
@@ -1316,7 +1314,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_start_global,
 );
 
 static teco_state_t *
-teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
 {
 	/*
 	 * FIXME: Disallow space characters?
@@ -1324,8 +1322,7 @@ teco_state_qregspec_firstchar_input(teco_machine_qregspec_t *ctx, gchar chr, GEr
 	if (!ctx->parse_only) {
 		if (ctx->parent.must_undo)
 			undo__teco_string_truncate(&ctx->name, ctx->name.len);
-		/* FIXME: g_unicode_toupper() once we have an Unicode-conforming parser */
-		teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
+		teco_string_append_wc(&ctx->name, g_unichar_toupper(chr));
 	}
 	return &teco_state_qregspec_secondchar;
 }
@@ -1335,7 +1332,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_firstchar,
 );
 
 static teco_state_t *
-teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
 {
 	/*
 	 * FIXME: Disallow space characters?
@@ -1343,8 +1340,7 @@ teco_state_qregspec_secondchar_input(teco_machine_qregspec_t *ctx, gchar chr, GE
 	if (!ctx->parse_only) {
 		if (ctx->parent.must_undo)
 			undo__teco_string_truncate(&ctx->name, ctx->name.len);
-		/* FIXME: g_unicode_toupper() once we have an Unicode-conforming parser */
-		teco_string_append_c(&ctx->name, g_ascii_toupper(chr));
+		teco_string_append_wc(&ctx->name, g_unichar_toupper(chr));
 	}
 	return teco_state_qregspec_done(ctx, error);
 }
@@ -1354,7 +1350,7 @@ TECO_DEFINE_STATE(teco_state_qregspec_secondchar,
 );
 
 static teco_state_t *
-teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError **error)
+teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gunichar chr, GError **error)
 {
 	/*
 	 * Makes sure that braces within string building constructs do not have to be
@@ -1395,7 +1391,7 @@ teco_state_qregspec_string_input(teco_machine_qregspec_t *ctx, gchar chr, GError
 
 /* in cmdline.c */
 gboolean teco_state_qregspec_string_process_edit_cmd(teco_machine_qregspec_t *ctx, teco_machine_t *parent_ctx,
-                                                     gchar key, GError **error);
+                                                     gunichar key, GError **error);
 
 TECO_DEFINE_STATE(teco_state_qregspec_string,
 	.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t)teco_state_qregspec_string_process_edit_cmd
@@ -1456,7 +1452,7 @@ teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx)
  * @memberof teco_machine_qregspec_t
  */
 teco_machine_qregspec_status_t
-teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr,
+teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr,
                             teco_qreg_t **result, teco_qreg_table_t **result_table, GError **error)
 {
 	ctx->parse_only = result == NULL;
@@ -1484,7 +1480,7 @@ teco_machine_qregspec_get_results(teco_machine_qregspec_t *ctx,
 gboolean
 teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t *insert)
 {
-	gsize restrict_len = 0;
+	guint restrict_len = 0;
 
 	/*
 	 * NOTE: We could have separate process_edit_cmd_cb() for
@@ -1499,6 +1495,10 @@ teco_machine_qregspec_auto_complete(teco_machine_qregspec_t *ctx, teco_string_t
 		/* two-letter Q-Reg */
 		restrict_len = 2;
 
+	/*
+	 * FIXME: This is not quite right as it will propose even
+	 * lower case single or two-letter Q-Register names.
+	 */
 	return teco_rb3str_auto_complete(&ctx->result_table->tree, !restrict_len,
 	                                 ctx->name.data, ctx->name.len, restrict_len, insert) &&
 	       ctx->nesting == 1;
diff --git a/src/qreg.h b/src/qreg.h
index 8c8764e..df4bdb4 100644
--- a/src/qreg.h
+++ b/src/qreg.h
@@ -227,7 +227,7 @@ void teco_machine_qregspec_reset(teco_machine_qregspec_t *ctx);
  */
 struct teco_machine_stringbuilding_t *teco_machine_qregspec_get_stringbuilding(teco_machine_qregspec_t *ctx);
 
-teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gchar chr,
+teco_machine_qregspec_status_t teco_machine_qregspec_input(teco_machine_qregspec_t *ctx, gunichar chr,
                                                            teco_qreg_t **result,
                                                            teco_qreg_table_t **result_table, GError **error);
 
diff --git a/src/rb3str.c b/src/rb3str.c
index 72cf444..d51ac5d 100644
--- a/src/rb3str.c
+++ b/src/rb3str.c
@@ -95,7 +95,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar
  * @param case_sensitive Whether to match case-sensitive.
  * @param str String to complete (not necessarily null-terminated).
  * @param str_len Length of characters in `str`.
- * @param restrict_len Limit completions to this size.
+ * @param restrict_len Limit completions to this size (in characters).
  * @param insert String to set with characters that can be autocompleted.
  * @return TRUE if the completion was unambiguous, else FALSE.
  *
@@ -103,7 +103,7 @@ teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_sensitive, const gchar
  */
 gboolean
 teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
-                          const gchar *str, gsize str_len, gsize restrict_len, teco_string_t *insert)
+                          const gchar *str, gsize str_len, guint restrict_len, teco_string_t *insert)
 {
 	memset(insert, 0, sizeof(*insert));
 
@@ -115,7 +115,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
 	for (teco_rb3str_head_t *cur = teco_rb3str_nfind(tree, case_sensitive, str, str_len);
 	     cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len;
 	     cur = teco_rb3str_get_next(cur)) {
-		if (restrict_len && cur->key.len != restrict_len)
+		if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len)
 			continue;
 
 		if (G_UNLIKELY(!first)) {
@@ -136,7 +136,7 @@ teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
 		for (teco_rb3str_head_t *cur = first;
 		     cur && cur->key.len >= str_len && diff(&cur->key, str, str_len) == str_len;
 		     cur = teco_rb3str_get_next(cur)) {
-			if (restrict_len && cur->key.len != restrict_len)
+			if (restrict_len && g_utf8_strlen(cur->key.data, cur->key.len) != restrict_len)
 				continue;
 
 			teco_interface_popup_add(TECO_POPUP_PLAIN,
diff --git a/src/rb3str.h b/src/rb3str.h
index 74b3a37..adf5f89 100644
--- a/src/rb3str.h
+++ b/src/rb3str.h
@@ -65,5 +65,5 @@ teco_rb3str_head_t *teco_rb3str_nfind(teco_rb3str_tree_t *tree, gboolean case_se
                                       const gchar *str, gsize len);
 
 gboolean teco_rb3str_auto_complete(teco_rb3str_tree_t *tree, gboolean case_sensitive,
-                                   const gchar *str, gsize str_len, gsize restrict_len,
+                                   const gchar *str, gsize str_len, guint restrict_len,
                                    teco_string_t *insert);
diff --git a/src/sciteco.h b/src/sciteco.h
index 09dea3b..02eed97 100644
--- a/src/sciteco.h
+++ b/src/sciteco.h
@@ -71,7 +71,7 @@ teco_is_failure(teco_bool_t x)
 #endif
 
 /** TRUE if C is a control character */
-#define TECO_IS_CTL(C)		((guchar)(C) < ' ')
+#define TECO_IS_CTL(C)		((gunichar)(C) < ' ')
 /** ASCII character to echo control character C */
 #define TECO_CTL_ECHO(C)	((C) | 0x40)
 /**
diff --git a/src/search.c b/src/search.c
index e146def..43a2936 100644
--- a/src/search.c
+++ b/src/search.c
@@ -308,14 +308,6 @@ teco_pattern2regexp(teco_string_t *pattern, gboolean single_expr, GError **error
 
 	do {
 		/*
-		 * FIXME: Currently we are fed single bytes, so there
-		 * could be an incomplete UTF-8 sequence at the end of the pattern.
-		 * This should not be necessary once we have an Unicode-aware parser.
-		 */
-		if (pattern->len > 0 && (gint32)g_utf8_get_char_validated(pattern->data, -1) < 0)
-			break;
-
-		/*
 		 * First check whether it is a class.
 		 * This will not treat individual characters
 		 * as classes, so we do not convert them to regexp
diff --git a/src/spawn.c b/src/spawn.c
index 044b8de..445acc5 100644
--- a/src/spawn.c
+++ b/src/spawn.c
@@ -417,7 +417,7 @@ cleanup:
 }
 
 /* in cmdline.c */
-gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_execute_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
 
 /*$ EC pipe filter
  * ECcommand$ -- Execute operating system command and filter buffer contents
@@ -642,7 +642,7 @@ teco_spawn_stdin_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer dat
 	gssize bytes_written = teco_eol_writer_convert(&teco_spawn_ctx.stdin_writer, buffer,
 	                                               convert_len, &teco_spawn_ctx.error);
 	if (bytes_written < 0) {
-		/* GError ocurred */
+		/* GError occurred */
 		g_main_loop_quit(teco_spawn_ctx.mainloop);
 		return G_SOURCE_REMOVE;
 	}
diff --git a/src/string-utils.c b/src/string-utils.c
index ac5835b..d9b12e0 100644
--- a/src/string-utils.c
+++ b/src/string-utils.c
@@ -78,7 +78,17 @@ teco_string_get_coord(const gchar *str, guint pos, guint *line, guint *column)
 	}
 }
 
-/** @memberof teco_string_t */
+/**
+ * Get the length of the prefix common to two strings.
+ * Works with UTF-8 and single-byte encodings.
+ *
+ * @param a Left string.
+ * @param b Right string.
+ * @param b_len Length of right string.
+ * @return Length of the common prefix in bytes.
+ *
+ * @memberof teco_string_t
+ */
 gsize
 teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
 {
@@ -92,14 +102,16 @@ teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
 }
 
 /**
- * Get the length of the prefix common to two strings
+ * Get the length of the prefix common to two UTF-8 strings
  * without considering case.
  *
- * @fixme This is currently only used for symbols and one/two letter
- * Q-Register names, which cannot be UTF-8.
- * If we rewrote this to perform Unicode case folding, we would
- * also have to check for character validity.
- * Once our parser is Unicode-aware, this is not necessary.
+ * The UTF-8 strings must be validated, which should be the case
+ * for help labels and short Q-Register names.
+ *
+ * @param a Left UTF-8 string.
+ * @param b Right UTF-8 string.
+ * @param b_len Length of right UTF-8 string.
+ * @return Length of the common prefix in bytes.
  *
  * @memberof teco_string_t
  */
@@ -108,9 +120,13 @@ teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len)
 {
 	gsize len = 0;
 
-	while (len < a->len && len < b_len &&
-	       g_ascii_tolower(a->data[len]) == g_ascii_tolower(b[len]))
-		len++;
+	while (len < a->len && len < b_len) {
+		gunichar a_chr = g_utf8_get_char(a->data+len);
+		gunichar b_chr = g_utf8_get_char(b+len);
+		if (g_unichar_tolower(a_chr) != g_unichar_tolower(b_chr))
+			break;
+		len = g_utf8_next_char(b+len) - b;
+	}
 
 	return len;
 }
diff --git a/src/string-utils.h b/src/string-utils.h
index bb9ed37..1b4957f 100644
--- a/src/string-utils.h
+++ b/src/string-utils.h
@@ -26,11 +26,11 @@
 /**
  * Upper-case SciTECO command character.
  *
- * There are implementations in glib (g_ascii_toupper) and libc,
+ * There are implementations in glib (g_ascii_toupper() and g_unichar_toupper()) and libc,
  * but this implementation is sufficient for all letters used by SciTECO commands.
  */
-static inline gchar
-teco_ascii_toupper(gchar chr)
+static inline gunichar
+teco_ascii_toupper(gunichar chr)
 {
 	return chr >= 'a' && chr <= 'z' ? chr & ~0x20 : chr;
 }
@@ -52,6 +52,7 @@ teco_strv_remove(gchar **strv, guint i)
  * and the allocation length is not stored.
  * Just like GString, teco_string_t are always null-terminated but at the
  * same time 8-bit clean (can contain null-characters).
+ * It may or may not contain UTF-8 byte sequences.
  *
  * The API is designed such that teco_string_t operations operate on plain
  * (null-terminated) C strings, a single character or character array as well as
@@ -74,7 +75,7 @@ typedef struct {
 	 * The pointer is guaranteed to be non-NULL after initialization.
 	 */
 	gchar *data;
-	/** Length of `data` without the trailing null-byte. */
+	/** Length of `data` without the trailing null-byte in bytes. */
 	gsize len;
 } teco_string_t;
 
@@ -128,6 +129,16 @@ teco_string_append_c(teco_string_t *str, gchar chr)
 	teco_string_append(str, &chr, sizeof(chr));
 }
 
+/** @memberof teco_string_t */
+static inline void
+teco_string_append_wc(teco_string_t *target, gunichar chr)
+{
+	/* 4 bytes should be enough, but we better follow the documentation */
+	target->data = g_realloc(target->data, target->len + 6 + 1);
+	target->len += g_unichar_to_utf8(chr, target->data+target->len);
+	target->data[target->len] = '\0';
+}
+
 /**
  * @fixme Should this also realloc str->data?
  *
diff --git a/src/symbols.c b/src/symbols.c
index ba407cc..feead76 100644
--- a/src/symbols.c
+++ b/src/symbols.c
@@ -251,7 +251,7 @@ teco_state_scintilla_symbols_done(teco_machine_main_t *ctx, const teco_string_t
 }
 
 /* in cmdline.c */
-gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gchar key, GError **error);
+gboolean teco_state_scintilla_symbols_process_edit_cmd(teco_machine_main_t *ctx, teco_machine_t *parent_ctx, gunichar key, GError **error);
 
 /*$ ES scintilla message
  * -- Send Scintilla message
diff --git a/src/undo.c b/src/undo.c
index dfae63b..dc54c7a 100644
--- a/src/undo.c
+++ b/src/undo.c
@@ -30,7 +30,7 @@
 
 //#define DEBUG
 
-TECO_DEFINE_UNDO_SCALAR(gchar);
+TECO_DEFINE_UNDO_SCALAR(gunichar);
 TECO_DEFINE_UNDO_SCALAR(gint);
 TECO_DEFINE_UNDO_SCALAR(guint);
 TECO_DEFINE_UNDO_SCALAR(gsize);
diff --git a/src/undo.h b/src/undo.h
index ea1414f..9715c7a 100644
--- a/src/undo.h
+++ b/src/undo.h
@@ -164,8 +164,8 @@ gpointer teco_undo_push_size(teco_undo_action_t action_cb, gsize size)
  * significantly improves batch-mode performance.
  */
 
-TECO_DECLARE_UNDO_SCALAR(gchar);
-#define teco_undo_gchar(VAR) (*teco_undo_object_gchar_push(&(VAR)))
+TECO_DECLARE_UNDO_SCALAR(gunichar);
+#define teco_undo_gunichar(VAR) (*teco_undo_object_gunichar_push(&(VAR)))
 
 TECO_DECLARE_UNDO_SCALAR(gint);
 #define teco_undo_gint(VAR) (*teco_undo_object_gint_push(&(VAR)))
diff --git a/tests/testsuite.at b/tests/testsuite.at
index 4749b13..0733d2a 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -84,8 +84,6 @@ AT_CHECK([$SCITECO -e "0@I//J 0A\"N(0/0)' :@S/^@/\"F(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "@EQa//0EE 1U*0EE 0:@EUa/f^@^@/ :Qa-4\"N(0/0)' Ga Z-4\"N(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "0EE 129@I// -A-129\"N(0/0)' HXa @EQa// EE\"N(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -8e "129@:^Ua// 0Qa-129\"N(0/0)'"], 0, ignore, ignore)
-# FIXME: This will fail once we have an UTF-8-only parser.
-AT_CHECK([$SCITECO -8e "@:^Ua/^^/ 129:@^Ua// Ma-129\"N(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "1EE 167Ua @I/^EUa/ .-1\"N(0/0)'"], 0, ignore, ignore)
 AT_CLEANUP
 
@@ -95,6 +93,8 @@ AT_CHECK([$SCITECO -e "8594@^Ua/Здравствуй, мир!/ :Qa-17\"N(0/0)' 0
 AT_CHECK([$SCITECO -e "@I/Здравствуй, мир!/ JW .-10\"N(0/0)' ^E-20\"N(0/0)' 204:EE .-10\"N(0/0)'"], 0, ignore, ignore)
 AT_CHECK([$SCITECO -e "@I/TEST/ @EW/юникод.txt/"], 0, ignore, ignore)
 AT_CHECK([test -f юникод.txt], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "^^ß-223\"N(0/0) 23Uъ Q[Ъ]-23\"N(0/0)'"], 0, ignore, ignore)
+AT_CHECK([$SCITECO -e "@O/метка/ !метка!"], 0, ignore, ignore)
 AT_CLEANUP
 
 AT_SETUP([Automatic EOL normalization])
@@ -207,8 +207,7 @@ AT_CLEANUP
 AT_SETUP([Unicode glitches])
 # While TECO code must always be UTF-8, strings after string building
 # can be in single-byte encodings as well.
-# This might already work after introducing the Unicode-aware parser.
-# If not, it should be fixed.
+# It must be possible to search for single bytes in single-byte encodings.
 AT_CHECK([$SCITECO -8e "164Ua Ga@I//J :@S/^EUa/\"F(0/0)'"], 0, ignore, ignore)
 AT_XFAIL_IF(true)
 AT_CLEANUP