aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--README2
-rw-r--r--doc/sciteco.1.in9
-rw-r--r--doc/sciteco.7.template5
-rw-r--r--sample.teco_ini2
-rw-r--r--src/cmdline.c3
-rw-r--r--src/core-commands.c8
-rw-r--r--src/doc.c91
-rw-r--r--src/doc.h2
-rw-r--r--src/glob.c2
-rw-r--r--src/main.c7
-rw-r--r--src/parser.c4
-rw-r--r--src/qreg-commands.c6
-rw-r--r--src/qreg.c51
-rw-r--r--src/sciteco.h9
-rw-r--r--src/search.c6
-rw-r--r--src/spawn.c7
-rw-r--r--src/view.c51
17 files changed, 158 insertions, 107 deletions
diff --git a/README b/README
index ba26222..651d8be 100644
--- a/README
+++ b/README
@@ -77,7 +77,7 @@ Features
* Full Unicode (UTF-8) support: The document is still represented as a random-accessible
codepoint sequence.
* 8-bit clean: SciTECO can be used to edit binary files if the encoding is changed to
- ANSI (`0EE`) and automatic EOL conversion is turned off (`16,0ED`).
+ ANSI and automatic EOL conversion is turned off (easiest with `--8bit`).
* Self-documenting: An integrated indexed help system allows browsing formatted documentation
about commands, macros and concepts within SciTECO (`?` command).
Macro packages can be documented with the `tedoc` tool, generating man pages.
diff --git a/doc/sciteco.1.in b/doc/sciteco.1.in
index 73303a1..b03f62d 100644
--- a/doc/sciteco.1.in
+++ b/doc/sciteco.1.in
@@ -19,6 +19,7 @@ Scintilla-based \fBT\fPext \fBE\fPditor and \fBCO\fPrrector
.OP "-e|--eval" macro
.OP "-m|--mung"
.OP "--no-profile"
+.OP "-8|--8bit"
.RI [ "UI option .\|.\|." ]
.OP "--"
.RI [ script ]
@@ -191,6 +192,14 @@ munging an empty file.
This is useful to fix up a broken profile script.
This option has no effect when a file is explicitly munged with
.BR \-\-mung .
+.IP "\fB-8\fR, \fB--8bit\fR"
+.SCITECO_TOPIC "-8" "--8-bit"
+Use raw single-byte ANSI encoding by default and disable automatic EOL conversion,
+which optimizes \*(ST for 8-bit cleanliness.
+It is equivalent to executing \(lq16,4ED\(rq, but since it is executed
+very early at startup, all Q-Registers and the unnamed buffer will
+already be in ANSI encoding.
+This option is also useful when munging the profile macro.
.IP "\fIUI options .\|.\|.\fP"
Some graphical user interfaces, notably GTK+, provide
additional command line options.
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template
index f344820..a5b7f4a 100644
--- a/doc/sciteco.7.template
+++ b/doc/sciteco.7.template
@@ -1086,7 +1086,10 @@ Currently, \*(ST supports UTF-8 and single-byte ANSI encodings,
that can also be used for editing raw binary files.
\# You can configure other single-byte code pages with EE,
\# but there isn't yet any way to insert characters.
-UTF-8 is the default codepage for new buffers and Q-Registers.
+UTF-8 is the default codepage for new buffers and Q-Registers
+unless the 2nd \fBED\fP flag bit is set.
+You can also specify \fB--8bit\fP to optimize \*(ST for
+8-bit cleanliness.
While navigation in documents with single-byte encodings
takes place in constant time, \*(ST uses heuristics in
UTF-8 documents for translating between byte and character
diff --git a/sample.teco_ini b/sample.teco_ini
index a352f3e..d7060dc 100644
--- a/sample.teco_ini
+++ b/sample.teco_ini
@@ -17,7 +17,7 @@ EMQ[$SCITECOPATH]/session.tes
32,0ED
!* non-UTF-8 documents are assumed to be in latin1 (8859-1) *!
- 1024<:C; -A"T 1EE 1;'> J
+ EE"N 1024<:C; -A"T 1EE 1;'> J '
M[lexer.auto]
diff --git a/src/cmdline.c b/src/cmdline.c
index d6fcd37..47ef86f 100644
--- a/src/cmdline.c
+++ b/src/cmdline.c
@@ -1052,7 +1052,8 @@ teco_state_save_cmdline_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg
return &teco_state_start;
if (!qreg->vtable->undo_set_string(qreg, error) ||
- !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, SC_CP_UTF8, error))
+ !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len,
+ teco_default_codepage(), error))
return NULL;
return &teco_state_start;
diff --git a/src/core-commands.c b/src/core-commands.c
index 638279d..176bb17 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -2038,6 +2038,11 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error)
* Without any argument ED returns the current flags.
*
* Currently, the following flags are used by \*(ST:
+ * - 4: If enabled, prefer raw single-byte ANSI encoding
+ * for all new buffers and registers.
+ * This does not change the encoding of any existing
+ * buffers and any initialized default register when set via
+ * \fBED\fP, so you might want to launch \*(ST with \fB--8bit\fP.
* - 8: Enable/disable automatic folding of case-insensitive
* command characters during interactive key translation.
* The case of letter keys is inverted, so one or two
@@ -2610,9 +2615,6 @@ teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error)
*
* FIXME: Should we avoid this if new_cp == 0?
* It will be used for raw byte handling mostly.
- * Perhaps we should even set char representations appropriately
- * for all non-ANSI codepoints in the 0 codepage.
- * But this would also be costly...
*/
if (teco_current_doc_must_undo()) {
/*
diff --git a/src/doc.c b/src/doc.c
index 516dadb..a1ebe2c 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -60,10 +60,19 @@ teco_doc_get_scintilla(teco_doc_t *ctx)
return ctx->doc;
}
-/** @memberof teco_doc_t */
+/**
+ * Edit the given document in the Q-Register view.
+ *
+ * @param ctx The document to edit.
+ * @param default_cp The codepage to configure if the document is new.
+ *
+ * @memberof teco_doc_t
+ */
void
-teco_doc_edit(teco_doc_t *ctx)
+teco_doc_edit(teco_doc_t *ctx, guint default_cp)
{
+ gboolean new_doc = ctx->doc == NULL;
+
teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0,
(sptr_t)teco_doc_get_scintilla(ctx));
teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0);
@@ -77,22 +86,33 @@ teco_doc_edit(teco_doc_t *ctx)
*/
//teco_view_set_representations(teco_qreg_view);
- /*
- * All UTF-8 documents are expected to have a character index.
- * This allocates nothing if the document is not UTF-8.
- * But it is reference counted, so it must not be allocated
- * more than once.
- *
- * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER
- * (although I don't know why and where).
- * Recalculating it could be inefficient.
- * The index is reference-counted. Perhaps we could just allocate
- * one more time, so it doesn't get freed when changing documents.
- */
- if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
- & SC_LINECHARACTERINDEX_UTF32))
+ if (new_doc && default_cp != SC_CP_UTF8) {
+ /*
+ * There is a chance the user will see this buffer even if we
+ * are currently in batch mode.
+ */
+ for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
+ teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET,
+ style, default_cp);
+ /* 0 is used for ALL single-byte encodings */
+ teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0);
+ } else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
+ & SC_LINECHARACTERINDEX_UTF32)) {
+ /*
+ * All UTF-8 documents are expected to have a character index.
+ * This allocates nothing if the document is not UTF-8.
+ * But it is reference counted, so it must not be allocated
+ * more than once.
+ *
+ * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER
+ * (although I don't know why and where).
+ * Recalculating it could be inefficient.
+ * The index is reference-counted. Perhaps we could just allocate
+ * one more time, so it doesn't get freed when changing documents.
+ */
teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX,
SC_LINECHARACTERINDEX_UTF32, 0);
+ }
}
/** @memberof teco_doc_t */
@@ -122,41 +142,12 @@ teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage
ctx->doc = NULL;
teco_doc_reset(ctx);
- teco_doc_edit(ctx);
+ teco_doc_edit(ctx, codepage);
teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : ""));
- if (codepage != SC_CP_UTF8) {
- /*
- * We have a new UTF-8 document and
- * teco_doc_edit() currently always initializes an index.
- */
- teco_view_ssm(teco_qreg_view, SCI_RELEASELINECHARACTERINDEX,
- SC_LINECHARACTERINDEX_UTF32, 0);
- g_assert(!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
- & SC_LINECHARACTERINDEX_UTF32));
-
- /*
- * Configure a single-byte codepage/charset.
- * This requires setting it on all of the possible styles.
- * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles.
- * This is important only for display purposes - other than that
- * all single-byte encodings are handled the same.
- *
- * FIXME: Should we avoid this if codepage == 0?
- * It will be used for raw byte handling mostly.
- * Perhaps we should even set char representations appropriately
- * for all non-ANSI codepoints in the 0 codepage.
- * But this would also be costly...
- */
- for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
- teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, style, codepage);
- /* 0 is used for ALL single-byte encodings */
- teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0);
- }
-
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
}
/** @memberof teco_doc_t */
@@ -201,14 +192,14 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage
if (outlen)
*outlen = 0;
if (codepage)
- *codepage = SC_CP_UTF8;
+ *codepage = teco_default_codepage();
return;
}
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(ctx);
+ teco_doc_edit(ctx, teco_default_codepage());
gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
if (str) {
@@ -221,7 +212,7 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage
*codepage = teco_view_get_codepage(teco_qreg_view);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
}
/** @memberof teco_doc_t */
diff --git a/src/doc.h b/src/doc.h
index b7a4f99..1218c35 100644
--- a/src/doc.h
+++ b/src/doc.h
@@ -62,7 +62,7 @@ teco_doc_init(teco_doc_t *ctx)
memset(ctx, 0, sizeof(*ctx));
}
-void teco_doc_edit(teco_doc_t *ctx);
+void teco_doc_edit(teco_doc_t *ctx, guint default_cp);
void teco_doc_undo_edit(teco_doc_t *ctx);
void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage);
diff --git a/src/glob.c b/src/glob.c
index 2c955ee..9aa499d 100644
--- a/src/glob.c
+++ b/src/glob.c
@@ -319,7 +319,7 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str,
g_assert(glob_reg != NULL);
if (!glob_reg->vtable->undo_set_string(glob_reg, error) ||
!glob_reg->vtable->set_string(glob_reg, filename, strlen(filename),
- SC_CP_UTF8, error))
+ teco_default_codepage(), error))
return NULL;
}
diff --git a/src/main.c b/src/main.c
index c38b1a3..abf8d2f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -105,6 +105,7 @@ teco_get_default_config_path(const gchar *program)
static gchar *teco_eval_macro = NULL;
static gboolean teco_mung_file = FALSE;
static gboolean teco_mung_profile = TRUE;
+static gboolean teco_8bit_clean = FALSE;
static gchar *
teco_process_options(gint *argc, gchar ***argv)
@@ -120,6 +121,8 @@ teco_process_options(gint *argc, gchar ***argv)
"Do not mung "
"$SCITECOCONFIG" G_DIR_SEPARATOR_S INI_FILE " "
"even if it exists"},
+ {"8bit", '8', 0, G_OPTION_ARG_NONE, &teco_8bit_clean,
+ "Use ANSI encoding by default and disable automatic EOL conversion"},
{NULL}
};
@@ -320,6 +323,10 @@ main(int argc, char **argv)
* to the macro or munged file.
*/
+ if (teco_8bit_clean)
+ /* equivalent to 16,4ED but executed earlier */
+ teco_ed = (teco_ed & ~TECO_ED_AUTOEOL) | TECO_ED_DEFAULT_ANSI;
+
/*
* Theoretically, QReg tables should only be initialized
* after the interface, since they contain Scintilla documents.
diff --git a/src/parser.c b/src/parser.c
index aef6223..ed21740 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -724,7 +724,7 @@ teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escap
teco_machine_init(&ctx->parent, &teco_state_stringbuilding_start, must_undo);
ctx->escape_char = escape_char;
ctx->qreg_table_locals = locals;
- ctx->codepage = SC_CP_UTF8;
+ ctx->codepage = teco_default_codepage();
}
void
@@ -767,7 +767,7 @@ gboolean
teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error)
{
if (ctx->mode == TECO_MODE_NORMAL)
- teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8;
+ teco_undo_guint(ctx->expectstring.machine.codepage) = teco_default_codepage();
return TRUE;
}
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index e8be384..0e07944 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -372,7 +372,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
gint args = teco_expressions_args();
if (args > 0) {
- guint codepage = SC_CP_UTF8;
+ guint codepage = teco_default_codepage();
if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error))
return NULL;
@@ -415,7 +415,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
/* set register */
if (!qreg->vtable->undo_set_string(qreg, error) ||
!qreg->vtable->set_string(qreg, buffer, len,
- SC_CP_UTF8, error))
+ codepage, error))
return NULL;
}
}
@@ -429,7 +429,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
/* set register */
if (!qreg->vtable->undo_set_string(qreg, error) ||
!qreg->vtable->set_string(qreg, str->data, str->len,
- SC_CP_UTF8, error))
+ teco_default_codepage(), error))
return NULL;
}
diff --git a/src/qreg.c b/src/qreg.c
index 08bc8fc..fb559af 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -127,11 +127,11 @@ teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode)
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
teco_view_ssm(teco_qreg_view, SCI_SETEOLMODE, mode, 0);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
}
/** @memberof teco_qreg_t */
@@ -144,7 +144,7 @@ teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
teco_doc_reset(&qreg->string);
/*
@@ -162,7 +162,7 @@ teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error)
return FALSE;
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
return TRUE;
}
@@ -174,18 +174,14 @@ teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error)
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
- if (!teco_view_save(teco_qreg_view, filename, error)) {
- if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
- return FALSE;
- }
+ gboolean ret = teco_view_save(teco_qreg_view, filename, error);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
- return TRUE;
+ return ret;
}
static gboolean
@@ -239,14 +235,14 @@ teco_qreg_plain_append_string(teco_qreg_t *qreg, const gchar *str, gsize len, GE
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0);
teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)str);
teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
return TRUE;
}
@@ -262,27 +258,24 @@ static gboolean
teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position,
teco_int_t *chr, GError **error)
{
- gboolean ret = TRUE;
-
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
gssize off = teco_view_glyphs2bytes(teco_qreg_view, position);
- if (off < 0 || off == len) {
+ gboolean ret = off >= 0 && off != len;
+ if (!ret)
g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
"Position %" TECO_INT_FORMAT " out of range", position);
- ret = FALSE;
/* make sure we still restore the current Q-Register */
- } else {
+ else
*chr = teco_view_get_character(teco_qreg_view, off, len);
- }
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
return ret;
}
@@ -293,13 +286,13 @@ teco_qreg_plain_get_length(teco_qreg_t *qreg, GError **error)
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
teco_int_t ret = teco_view_bytes2glyphs(teco_qreg_view, len);
if (teco_qreg_current)
- teco_doc_edit(&teco_qreg_current->string);
+ teco_doc_edit(&teco_qreg_current->string, 0);
return ret;
}
@@ -326,7 +319,7 @@ teco_qreg_plain_edit(teco_qreg_t *qreg, GError **error)
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
- teco_doc_edit(&qreg->string);
+ teco_doc_edit(&qreg->string, teco_default_codepage());
teco_interface_show_view(teco_qreg_view);
teco_interface_info_update(qreg);
@@ -549,7 +542,7 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
*/
*len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0;
if (codepage)
- *codepage = SC_CP_UTF8;
+ *codepage = teco_default_codepage();
return TRUE;
}
@@ -647,7 +640,7 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
else
g_free(dir);
if (codepage)
- *codepage = SC_CP_UTF8;
+ *codepage = teco_default_codepage();
return TRUE;
}
@@ -798,7 +791,7 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len,
teco_string_clear(&str_converted);
*len = str_converted.len;
if (codepage)
- *codepage = SC_CP_UTF8;
+ *codepage = teco_default_codepage();
return TRUE;
}
@@ -910,7 +903,7 @@ teco_qreg_table_set_environ(teco_qreg_table_t *table, GError **error)
}
if (!qreg->vtable->set_string(qreg, value, strlen(value),
- SC_CP_UTF8, error))
+ teco_default_codepage(), error))
return FALSE;
}
diff --git a/src/sciteco.h b/src/sciteco.h
index 7f420e8..09dea3b 100644
--- a/src/sciteco.h
+++ b/src/sciteco.h
@@ -21,6 +21,8 @@
#include <glib.h>
+#include <Scintilla.h>
+
#if TECO_INTEGER == 32
typedef gint32 teco_int_t;
#define TECO_INT_FORMAT G_GINT32_FORMAT
@@ -83,6 +85,7 @@ teco_is_failure(teco_bool_t x)
* This is not a bitfield, since it is set from SciTECO.
*/
enum {
+ TECO_ED_DEFAULT_ANSI = (1 << 2),
TECO_ED_AUTOCASEFOLD = (1 << 3),
TECO_ED_AUTOEOL = (1 << 4),
TECO_ED_HOOKS = (1 << 5),
@@ -94,6 +97,12 @@ enum {
/* in main.c */
extern teco_int_t teco_ed;
+static inline guint
+teco_default_codepage(void)
+{
+ return teco_ed & TECO_ED_DEFAULT_ANSI ? SC_CHARSET_ANSI : SC_CP_UTF8;
+}
+
/* in main.c */
extern volatile sig_atomic_t teco_interrupted;
diff --git a/src/search.c b/src/search.c
index cf26c7f..c1dd542 100644
--- a/src/search.c
+++ b/src/search.c
@@ -678,7 +678,7 @@ teco_state_search_done(teco_machine_main_t *ctx, const teco_string_t *str, GErro
if (!search_reg->vtable->undo_set_string(search_reg, error) ||
!search_reg->vtable->set_string(search_reg, str->data, str->len,
- SC_CP_UTF8, error))
+ teco_default_codepage(), error))
return NULL;
teco_interface_ssm(SCI_SETANCHOR, anchor, 0);
@@ -1078,7 +1078,7 @@ teco_state_replace_default_insert_done_overwrite(teco_machine_main_t *ctx, const
if (str->len > 0) {
if (!replace_reg->vtable->undo_set_string(replace_reg, error) ||
!replace_reg->vtable->set_string(replace_reg, str->data, str->len,
- SC_CP_UTF8, error))
+ teco_default_codepage(), error))
return NULL;
} else {
g_auto(teco_string_t) replace_str = {NULL, 0};
@@ -1111,7 +1111,7 @@ teco_state_replace_default_ignore_done(teco_machine_main_t *ctx, const teco_stri
if (!replace_reg->vtable->undo_set_string(replace_reg, error) ||
!replace_reg->vtable->set_string(replace_reg, str->data, str->len,
- SC_CP_UTF8, error))
+ teco_default_codepage(), error))
return NULL;
return &teco_state_start;
diff --git a/src/spawn.c b/src/spawn.c
index c6dd779..6d3a441 100644
--- a/src/spawn.c
+++ b/src/spawn.c
@@ -165,9 +165,10 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error)
return TRUE;
/*
- * Command-lines and file names are always assumed to be UTF-8.
+ * Command-lines and file names are always assumed to be UTF-8,
+ * unless we set TECO_ED_DEFAULT_ANSI.
*/
- teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8;
+ teco_undo_guint(ctx->expectstring.machine.codepage) = teco_default_codepage();
if (!teco_expressions_eval(FALSE, error))
return FALSE;
@@ -702,7 +703,7 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da
} else {
if (!qreg->vtable->undo_set_string(qreg, &teco_spawn_ctx.error) ||
!qreg->vtable->set_string(qreg, buffer.data, buffer.len,
- SC_CP_UTF8, &teco_spawn_ctx.error))
+ teco_default_codepage(), &teco_spawn_ctx.error))
goto error;
}
} else {
diff --git a/src/view.c b/src/view.c
index 291c06b..0d1d168 100644
--- a/src/view.c
+++ b/src/view.c
@@ -72,6 +72,27 @@ teco_view_setup(teco_view_t *ctx)
*/
teco_view_ssm(ctx, SCI_SETMARGINWIDTHN, 1, 0);
+ if (teco_ed & TECO_ED_DEFAULT_ANSI) {
+ /*
+ * Configure a single-byte codepage/charset.
+ * This requires setting it on all of the possible styles.
+ * Fortunately, we can do it before SCI_STYLECLEARALL.
+ * This is important only for display purposes - other than that
+ * all single-byte encodings are handled the same.
+ */
+ teco_view_ssm(ctx, SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, SC_CHARSET_ANSI);
+ /* 0 is used for ALL single-byte encodings */
+ teco_view_ssm(ctx, SCI_SETCODEPAGE, 0, 0);
+ } else {
+ /*
+ * Documents are UTF-8 by default and all UTF-8 documents
+ * are expected to have a character index.
+ * This is a property of the document, instead of the view.
+ */
+ teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+ }
+
/*
* Set some basic styles in order to provide
* a consistent look across UIs if no profile
@@ -118,14 +139,6 @@ teco_view_setup(teco_view_t *ctx)
* the representations only once.
*/
teco_view_set_representations(ctx);
-
- /*
- * Documents are UTF-8 by default and all UTF-8 documents
- * are expected to have a character index.
- * This is a property of the document, instead of the view.
- */
- teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX,
- SC_LINECHARACTERINDEX_UTF32, 0);
}
TECO_DEFINE_UNDO_CALL(teco_view_ssm, teco_view_t *, unsigned int, uptr_t, sptr_t);
@@ -145,6 +158,28 @@ teco_view_set_representations(teco_view_t *ctx)
gchar buf[] = {(gchar)cc, '\0'};
teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)reps[cc]);
}
+
+ if (teco_ed & TECO_ED_DEFAULT_ANSI) {
+ /*
+ * Non-ANSI chars should be visible somehow.
+ * This would best be done always when changing the
+ * encoding to 0, but it would be kind of expensive.
+ *
+ * FIXME: On the other hand, this could cause problems
+ * when setting SC_CP_UTF8 later on.
+ */
+ for (guint cc = 0x80; cc <= 0xFF; cc++) {
+ gchar buf[] = {(gchar)cc, '\0'};
+ gchar rep[2+1];
+ /*
+ * Hexadecimal is poorly supported in SciTECO, but
+ * multiple decimal numbers one after another look
+ * confusing, esp. in Curses.
+ */
+ g_snprintf(rep, sizeof(rep), "%02X", cc);
+ teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)rep);
+ }
+ }
}
/**