17 files changed, 158 insertions, 107 deletions
diff --git a/README b/README
index ba26222..651d8be 100644
--- a/README
+++ b/README
@@ -77,7 +77,7 @@ Features
 * Full Unicode (UTF-8) support: The document is still represented as a random-accessible
   codepoint sequence.
 * 8-bit clean: SciTECO can be used to edit binary files if the encoding is changed to
-  ANSI (`0EE`) and automatic EOL conversion is turned off (`16,0ED`).
+  ANSI and automatic EOL conversion is turned off (easiest with `--8bit`).
 * Self-documenting: An integrated indexed help system allows browsing formatted documentation
   about commands, macros and concepts within SciTECO (`?` command).
   Macro packages can be documented with the `tedoc` tool, generating man pages.
diff --git a/doc/sciteco.1.in b/doc/sciteco.1.in
index 73303a1..b03f62d 100644
--- a/doc/sciteco.1.in
+++ b/doc/sciteco.1.in
@@ -19,6 +19,7 @@ Scintilla-based \fBT\fPext \fBE\fPditor and \fBCO\fPrrector
 .OP "-e|--eval" macro
 .OP "-m|--mung"
 .OP "--no-profile"
+.OP "-8|--8bit"
 .RI [ "UI option .\|.\|." ]
 .OP "--"
 .RI [ script ]
@@ -191,6 +192,14 @@ munging an empty file.
 This is useful to fix up a broken profile script.
 This option has no effect when a file is explicitly munged with
 .BR \-\-mung .
+.IP "\fB-8\fR, \fB--8bit\fR"
+.SCITECO_TOPIC "-8" "--8-bit"
+Use raw single-byte ANSI encoding by default and disable automatic EOL conversion,
+which optimizes \*(ST for 8-bit cleanliness.
+It is equivalent to executing \(lq16,4ED\(rq, but since it is executed
+very early at startup, all Q-Registers and the unnamed buffer will
+already be in ANSI encoding.
+This option is also useful when munging the profile macro.
 .IP "\fIUI options .\|.\|.\fP"
 Some graphical user interfaces, notably GTK+, provide
 additional command line options.
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template
index f344820..a5b7f4a 100644
--- a/doc/sciteco.7.template
+++ b/doc/sciteco.7.template
@@ -1086,7 +1086,10 @@ Currently, \*(ST supports UTF-8 and single-byte ANSI encodings,
 that can also be used for editing raw binary files.
 \# You can configure other single-byte code pages with EE,
 \# but there isn't yet any way to insert characters.
-UTF-8 is the default codepage for new buffers and Q-Registers.
+UTF-8 is the default codepage for new buffers and Q-Registers
+unless the 2nd \fBED\fP flag bit is set.
+You can also specify \fB--8bit\fP to optimize \*(ST for
+8-bit cleanliness.
 While navigation in documents with single-byte encodings
 takes place in constant time, \*(ST uses heuristics in
 UTF-8 documents for translating between byte and character
diff --git a/sample.teco_ini b/sample.teco_ini
index a352f3e..d7060dc 100644
--- a/sample.teco_ini
+++ b/sample.teco_ini
@@ -17,7 +17,7 @@ EMQ[$SCITECOPATH]/session.tes
     32,0ED
 
     !* non-UTF-8 documents are assumed to be in latin1 (8859-1) *!
-    1024<:C; -A"T 1EE 1;'> J
+    EE"N 1024<:C; -A"T 1EE 1;'> J '
 
     M[lexer.auto]
 
diff --git a/src/cmdline.c b/src/cmdline.c
index d6fcd37..47ef86f 100644
--- a/src/cmdline.c
+++ b/src/cmdline.c
@@ -1052,7 +1052,8 @@ teco_state_save_cmdline_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg
 		return &teco_state_start;
 
 	if (!qreg->vtable->undo_set_string(qreg, error) ||
-	    !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, SC_CP_UTF8, error))
+	    !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len,
+	                              teco_default_codepage(), error))
 		return NULL;
 
 	return &teco_state_start;
diff --git a/src/core-commands.c b/src/core-commands.c
index 638279d..176bb17 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -2038,6 +2038,11 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error)
  * Without any argument ED returns the current flags.
  *
  * Currently, the following flags are used by \*(ST:
+ *   - 4: If enabled, prefer raw single-byte ANSI encoding
+ *     for all new buffers and registers.
+ *     This does not change the encoding of any existing
+ *     buffers and any initialized default register when set via
+ *     \fBED\fP, so you might want to launch \*(ST with \fB--8bit\fP.
  *   - 8: Enable/disable automatic folding of case-insensitive
  *     command characters during interactive key translation.
  *     The case of letter keys is inverted, so one or two
@@ -2610,9 +2615,6 @@ teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error)
 		 *
 		 * FIXME: Should we avoid this if new_cp == 0?
 		 * It will be used for raw byte handling mostly.
-		 * Perhaps we should even set char representations appropriately
-		 * for all non-ANSI codepoints in the 0 codepage.
-		 * But this would also be costly...
 		 */
 		if (teco_current_doc_must_undo()) {
 			/*
diff --git a/src/doc.c b/src/doc.c
index 516dadb..a1ebe2c 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -60,10 +60,19 @@ teco_doc_get_scintilla(teco_doc_t *ctx)
 	return ctx->doc;
 }
 
-/** @memberof teco_doc_t */
+/**
+ * Edit the given document in the Q-Register view.
+ *
+ * @param ctx The document to edit.
+ * @param default_cp The codepage to configure if the document is new.
+ *
+ * @memberof teco_doc_t
+ */
 void
-teco_doc_edit(teco_doc_t *ctx)
+teco_doc_edit(teco_doc_t *ctx, guint default_cp)
 {
+	gboolean new_doc = ctx->doc == NULL;
+
 	teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0,
 	              (sptr_t)teco_doc_get_scintilla(ctx));
 	teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0);
@@ -77,22 +86,33 @@ teco_doc_edit(teco_doc_t *ctx)
 	 */
 	//teco_view_set_representations(teco_qreg_view);
 
-	/*
-	 * All UTF-8 documents are expected to have a character index.
-	 * This allocates nothing if the document is not UTF-8.
-	 * But it is reference counted, so it must not be allocated
-	 * more than once.
-	 *
-	 * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER
-	 * (although I don't know why and where).
-	 * Recalculating it could be inefficient.
-	 * The index is reference-counted. Perhaps we could just allocate
-	 * one more time, so it doesn't get freed when changing documents.
-	 */
-	if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
-						& SC_LINECHARACTERINDEX_UTF32))
+	if (new_doc && default_cp != SC_CP_UTF8) {
+		/*
+		 * There is a chance the user will see this buffer even if we
+		 * are currently in batch mode.
+		 */
+		for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
+			teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET,
+			              style, default_cp);
+		/* 0 is used for ALL single-byte encodings */
+		teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0);
+	} else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
+							& SC_LINECHARACTERINDEX_UTF32)) {
+		/*
+		 * All UTF-8 documents are expected to have a character index.
+		 * This allocates nothing if the document is not UTF-8.
+		 * But it is reference counted, so it must not be allocated
+		 * more than once.
+		 *
+		 * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER
+		 * (although I don't know why and where).
+		 * Recalculating it could be inefficient.
+		 * The index is reference-counted. Perhaps we could just allocate
+		 * one more time, so it doesn't get freed when changing documents.
+		 */
 		teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX,
 		              SC_LINECHARACTERINDEX_UTF32, 0);
+	}
 }
 
 /** @memberof teco_doc_t */
@@ -122,41 +142,12 @@ teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage
 	ctx->doc = NULL;
 
 	teco_doc_reset(ctx);
-	teco_doc_edit(ctx);
+	teco_doc_edit(ctx, codepage);
 
 	teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : ""));
 
-	if (codepage != SC_CP_UTF8) {
-		/*
-		 * We have a new UTF-8 document and
-		 * teco_doc_edit() currently always initializes an index.
-		 */
-		teco_view_ssm(teco_qreg_view, SCI_RELEASELINECHARACTERINDEX,
-		              SC_LINECHARACTERINDEX_UTF32, 0);
-		g_assert(!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
-							& SC_LINECHARACTERINDEX_UTF32));
-
-		/*
-		 * Configure a single-byte codepage/charset.
-		 * This requires setting it on all of the possible styles.
-		 * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles.
-		 * This is important only for display purposes - other than that
-		 * all single-byte encodings are handled the same.
-		 *
-		 * FIXME: Should we avoid this if codepage == 0?
-		 * It will be used for raw byte handling mostly.
-		 * Perhaps we should even set char representations appropriately
-		 * for all non-ANSI codepoints in the 0 codepage.
-		 * But this would also be costly...
-		 */
-		for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
-			teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, style, codepage);
-		/* 0 is used for ALL single-byte encodings */
-		teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0);
-	}
-
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 }
 
 /** @memberof teco_doc_t */
@@ -201,14 +192,14 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage
 		if (outlen)
 			*outlen = 0;
 		if (codepage)
-			*codepage = SC_CP_UTF8;
+			*codepage = teco_default_codepage();
 		return;
 	}
 
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(ctx);
+	teco_doc_edit(ctx, teco_default_codepage());
 
 	gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
 	if (str) {
@@ -221,7 +212,7 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage
 		*codepage = teco_view_get_codepage(teco_qreg_view);
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 }
 
 /** @memberof teco_doc_t */
diff --git a/src/doc.h b/src/doc.h
index b7a4f99..1218c35 100644
--- a/src/doc.h
+++ b/src/doc.h
@@ -62,7 +62,7 @@ teco_doc_init(teco_doc_t *ctx)
 	memset(ctx, 0, sizeof(*ctx));
 }
 
-void teco_doc_edit(teco_doc_t *ctx);
+void teco_doc_edit(teco_doc_t *ctx, guint default_cp);
 void teco_doc_undo_edit(teco_doc_t *ctx);
 
 void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage);
diff --git a/src/glob.c b/src/glob.c
index 2c955ee..9aa499d 100644
--- a/src/glob.c
+++ b/src/glob.c
@@ -319,7 +319,7 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str,
 		g_assert(glob_reg != NULL);
 		if (!glob_reg->vtable->undo_set_string(glob_reg, error) ||
 		    !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename),
-		                                  SC_CP_UTF8, error))
+		                                  teco_default_codepage(), error))
 			return NULL;
 	}
 
diff --git a/src/main.c b/src/main.c
index c38b1a3..abf8d2f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -105,6 +105,7 @@ teco_get_default_config_path(const gchar *program)
 static gchar *teco_eval_macro = NULL;
 static gboolean teco_mung_file = FALSE;
 static gboolean teco_mung_profile = TRUE;
+static gboolean teco_8bit_clean = FALSE;
 
 static gchar *
 teco_process_options(gint *argc, gchar ***argv)
@@ -120,6 +121,8 @@ teco_process_options(gint *argc, gchar ***argv)
 		 "Do not mung "
 		 "$SCITECOCONFIG" G_DIR_SEPARATOR_S INI_FILE " "
 		 "even if it exists"},
+		{"8bit", '8', 0, G_OPTION_ARG_NONE, &teco_8bit_clean,
+		 "Use ANSI encoding by default and disable automatic EOL conversion"},
 		{NULL}
 	};
 
@@ -320,6 +323,10 @@ main(int argc, char **argv)
 	 * to the macro or munged file.
 	 */
 
+	if (teco_8bit_clean)
+		/* equivalent to 16,4ED but executed earlier */
+		teco_ed = (teco_ed & ~TECO_ED_AUTOEOL) | TECO_ED_DEFAULT_ANSI;
+
 	/*
 	 * Theoretically, QReg tables should only be initialized
 	 * after the interface, since they contain Scintilla documents.
diff --git a/src/parser.c b/src/parser.c
index aef6223..ed21740 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -724,7 +724,7 @@ teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escap
 	teco_machine_init(&ctx->parent, &teco_state_stringbuilding_start, must_undo);
 	ctx->escape_char = escape_char;
 	ctx->qreg_table_locals = locals;
-	ctx->codepage = SC_CP_UTF8;
+	ctx->codepage = teco_default_codepage();
 }
 
 void
@@ -767,7 +767,7 @@ gboolean
 teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error)
 {
 	if (ctx->mode == TECO_MODE_NORMAL)
-		teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8;
+		teco_undo_guint(ctx->expectstring.machine.codepage) = teco_default_codepage();
 	return TRUE;
 }
 
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index e8be384..0e07944 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -372,7 +372,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
 	gint args = teco_expressions_args();
 
 	if (args > 0) {
-		guint codepage = SC_CP_UTF8;
+		guint codepage = teco_default_codepage();
 		if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error))
 			return NULL;
 
@@ -415,7 +415,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
 			/* set register */
 			if (!qreg->vtable->undo_set_string(qreg, error) ||
 			    !qreg->vtable->set_string(qreg, buffer, len,
-			                              SC_CP_UTF8, error))
+			                              codepage, error))
 				return NULL;
 		}
 	}
@@ -429,7 +429,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
 		/* set register */
 		if (!qreg->vtable->undo_set_string(qreg, error) ||
 		    !qreg->vtable->set_string(qreg, str->data, str->len,
-		                              SC_CP_UTF8, error))
+		                              teco_default_codepage(), error))
 			return NULL;
 	}
 
diff --git a/src/qreg.c b/src/qreg.c
index 08bc8fc..fb559af 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -127,11 +127,11 @@ teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode)
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(&qreg->string);
+	teco_doc_edit(&qreg->string, teco_default_codepage());
 	teco_view_ssm(teco_qreg_view, SCI_SETEOLMODE, mode, 0);
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 }
 
 /** @memberof teco_qreg_t */
@@ -144,7 +144,7 @@ teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(&qreg->string);
+	teco_doc_edit(&qreg->string, teco_default_codepage());
 	teco_doc_reset(&qreg->string);
 
 	/*
@@ -162,7 +162,7 @@ teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
 		return FALSE;
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 
 	return TRUE;
 }
@@ -174,18 +174,14 @@ teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error)
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(&qreg->string);
+	teco_doc_edit(&qreg->string, teco_default_codepage());
 
-	if (!teco_view_save(teco_qreg_view, filename, error)) {
-		if (teco_qreg_current)
-			teco_doc_edit(&teco_qreg_current->string);
-		return FALSE;
-	}
+	gboolean ret = teco_view_save(teco_qreg_view, filename, error);
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 
-	return TRUE;
+	return ret;
 }
 
 static gboolean
@@ -239,14 +235,14 @@ teco_qreg_plain_append_string(teco_qreg_t *qreg, const gchar *str, gsize len, GE
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(&qreg->string);
+	teco_doc_edit(&qreg->string, teco_default_codepage());
 
 	teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
 	teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)str);
 	teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 	return TRUE;
 }
 
@@ -262,27 +258,24 @@ static gboolean
 teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position,
                               teco_int_t *chr, GError **error)
 {
-	gboolean ret = TRUE;
-
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(&qreg->string);
+	teco_doc_edit(&qreg->string, teco_default_codepage());
 
 	sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
 	gssize off = teco_view_glyphs2bytes(teco_qreg_view, position);
 
-	if (off < 0 || off == len) {
+	gboolean ret = off >= 0 && off != len;
+	if (!ret)
 		g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
 		            "Position %" TECO_INT_FORMAT " out of range", position);
-		ret = FALSE;
 		/* make sure we still restore the current Q-Register */
-	} else {
+	else
 		*chr = teco_view_get_character(teco_qreg_view, off, len);
-	}
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 
 	return ret;
 }
@@ -293,13 +286,13 @@ teco_qreg_plain_get_length(teco_qreg_t *qreg, GError **error)
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(&qreg->string);
+	teco_doc_edit(&qreg->string, teco_default_codepage());
 
 	sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
 	teco_int_t ret = teco_view_bytes2glyphs(teco_qreg_view, len);
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 
 	return ret;
 }
@@ -326,7 +319,7 @@ teco_qreg_plain_edit(teco_qreg_t *qreg, GError **error)
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(&qreg->string);
+	teco_doc_edit(&qreg->string, teco_default_codepage());
 	teco_interface_show_view(teco_qreg_view);
 	teco_interface_info_update(qreg);
 
@@ -549,7 +542,7 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
 	 */
 	*len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0;
 	if (codepage)
-		 *codepage = SC_CP_UTF8;
+		 *codepage = teco_default_codepage();
 	return TRUE;
 }
 
@@ -647,7 +640,7 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
 	else
 		g_free(dir);
 	if (codepage)
-		*codepage = SC_CP_UTF8;
+		*codepage = teco_default_codepage();
 
 	return TRUE;
 }
@@ -798,7 +791,7 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
 		teco_string_clear(&str_converted);
 	*len = str_converted.len;
 	if (codepage)
-		*codepage = SC_CP_UTF8;
+		*codepage = teco_default_codepage();
 
 	return TRUE;
 }
@@ -910,7 +903,7 @@ teco_qreg_table_set_environ(teco_qreg_table_t *table, GError **error)
 		}
 
 		if (!qreg->vtable->set_string(qreg, value, strlen(value),
-		                              SC_CP_UTF8, error))
+		                              teco_default_codepage(), error))
 			return FALSE;
 	}
 
diff --git a/src/sciteco.h b/src/sciteco.h
index 7f420e8..09dea3b 100644
--- a/src/sciteco.h
+++ b/src/sciteco.h
@@ -21,6 +21,8 @@
 
 #include <glib.h>
 
+#include <Scintilla.h>
+
 #if TECO_INTEGER == 32
 typedef gint32 teco_int_t;
 #define TECO_INT_FORMAT G_GINT32_FORMAT
@@ -83,6 +85,7 @@ teco_is_failure(teco_bool_t x)
  * This is not a bitfield, since it is set from SciTECO.
  */
 enum {
+	TECO_ED_DEFAULT_ANSI	= (1 << 2),
 	TECO_ED_AUTOCASEFOLD	= (1 << 3),
 	TECO_ED_AUTOEOL		= (1 << 4),
 	TECO_ED_HOOKS		= (1 << 5),
@@ -94,6 +97,12 @@ enum {
 /* in main.c */
 extern teco_int_t teco_ed;
 
+static inline guint
+teco_default_codepage(void)
+{
+	return teco_ed & TECO_ED_DEFAULT_ANSI ? SC_CHARSET_ANSI : SC_CP_UTF8;
+}
+
 /* in main.c */
 extern volatile sig_atomic_t teco_interrupted;
 
diff --git a/src/search.c b/src/search.c
index cf26c7f..c1dd542 100644
--- a/src/search.c
+++ b/src/search.c
@@ -678,7 +678,7 @@ teco_state_search_done(teco_machine_main_t *ctx, const teco_string_t *str, GErro
 
 		if (!search_reg->vtable->undo_set_string(search_reg, error) ||
 		    !search_reg->vtable->set_string(search_reg, str->data, str->len,
-		                                    SC_CP_UTF8, error))
+		                                    teco_default_codepage(), error))
 			return NULL;
 
 		teco_interface_ssm(SCI_SETANCHOR, anchor, 0);
@@ -1078,7 +1078,7 @@ teco_state_replace_default_insert_done_overwrite(teco_machine_main_t *ctx, const
 	if (str->len > 0) {
 		if (!replace_reg->vtable->undo_set_string(replace_reg, error) ||
 		    !replace_reg->vtable->set_string(replace_reg, str->data, str->len,
-		                                     SC_CP_UTF8, error))
+		                                     teco_default_codepage(), error))
 			return NULL;
 	} else {
 		g_auto(teco_string_t) replace_str = {NULL, 0};
@@ -1111,7 +1111,7 @@ teco_state_replace_default_ignore_done(teco_machine_main_t *ctx, const teco_stri
 
 	if (!replace_reg->vtable->undo_set_string(replace_reg, error) ||
 	    !replace_reg->vtable->set_string(replace_reg, str->data, str->len,
-	                                     SC_CP_UTF8, error))
+	                                     teco_default_codepage(), error))
 		return NULL;
 
 	return &teco_state_start;
diff --git a/src/spawn.c b/src/spawn.c
index c6dd779..6d3a441 100644
--- a/src/spawn.c
+++ b/src/spawn.c
@@ -165,9 +165,10 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error)
 		return TRUE;
 
 	/*
-	 * Command-lines and file names are always assumed to be UTF-8.
+	 * Command-lines and file names are always assumed to be UTF-8,
+	 * unless we set TECO_ED_DEFAULT_ANSI.
 	 */
-	teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8;
+	teco_undo_guint(ctx->expectstring.machine.codepage) = teco_default_codepage();
 
 	if (!teco_expressions_eval(FALSE, error))
 		return FALSE;
@@ -702,7 +703,7 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da
 			} else {
 				if (!qreg->vtable->undo_set_string(qreg, &teco_spawn_ctx.error) ||
 				    !qreg->vtable->set_string(qreg, buffer.data, buffer.len,
-				                              SC_CP_UTF8, &teco_spawn_ctx.error))
+				                              teco_default_codepage(), &teco_spawn_ctx.error))
 					goto error;
 			}
 		} else {
diff --git a/src/view.c b/src/view.c
index 291c06b..0d1d168 100644
--- a/src/view.c
+++ b/src/view.c
@@ -72,6 +72,27 @@ teco_view_setup(teco_view_t *ctx)
 	 */
 	teco_view_ssm(ctx, SCI_SETMARGINWIDTHN, 1, 0);
 
+	if (teco_ed & TECO_ED_DEFAULT_ANSI) {
+		/*
+		 * Configure a single-byte codepage/charset.
+		 * This requires setting it on all of the possible styles.
+		 * Fortunately, we can do it before SCI_STYLECLEARALL.
+		 * This is important only for display purposes - other than that
+		 * all single-byte encodings are handled the same.
+		 */
+		teco_view_ssm(ctx, SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, SC_CHARSET_ANSI);
+		/* 0 is used for ALL single-byte encodings */
+		teco_view_ssm(ctx, SCI_SETCODEPAGE, 0, 0);
+	} else {
+		/*
+		 * Documents are UTF-8 by default and all UTF-8 documents
+		 * are expected to have a character index.
+		 * This is a property of the document, instead of the view.
+		 */
+		teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX,
+		              SC_LINECHARACTERINDEX_UTF32, 0);
+	}
+
 	/*
 	 * Set some basic styles in order to provide
 	 * a consistent look across UIs if no profile
@@ -118,14 +139,6 @@ teco_view_setup(teco_view_t *ctx)
 	 * the representations only once.
 	 */
 	teco_view_set_representations(ctx);
-
-	/*
-	 * Documents are UTF-8 by default and all UTF-8 documents
-	 * are expected to have a character index.
-	 * This is a property of the document, instead of the view.
-	 */
-	teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX,
-	              SC_LINECHARACTERINDEX_UTF32, 0);
 }
 
 TECO_DEFINE_UNDO_CALL(teco_view_ssm, teco_view_t *, unsigned int, uptr_t, sptr_t);
@@ -145,6 +158,28 @@ teco_view_set_representations(teco_view_t *ctx)
 		gchar buf[] = {(gchar)cc, '\0'};
 		teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)reps[cc]);
 	}
+
+	if (teco_ed & TECO_ED_DEFAULT_ANSI) {
+		/*
+		 * Non-ANSI chars should be visible somehow.
+		 * This would best be done always when changing the
+		 * encoding to 0, but it would be kind of expensive.
+		 *
+		 * FIXME: On the other hand, this could cause problems
+		 * when setting SC_CP_UTF8 later on.
+		 */
+		for (guint cc = 0x80; cc <= 0xFF; cc++) {
+			gchar buf[] = {(gchar)cc, '\0'};
+			gchar rep[2+1];
+			/*
+			 * Hexadecimal is poorly supported in SciTECO, but
+			 * multiple decimal numbers one after another look
+			 * confusing, esp. in Curses.
+			 */
+			g_snprintf(rep, sizeof(rep), "%02X", cc);
+			teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)rep);
+		}
+	}
 }
 
 /**