diff options
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | doc/sciteco.1.in | 9 | ||||
-rw-r--r-- | doc/sciteco.7.template | 5 | ||||
-rw-r--r-- | sample.teco_ini | 2 | ||||
-rw-r--r-- | src/cmdline.c | 3 | ||||
-rw-r--r-- | src/core-commands.c | 8 | ||||
-rw-r--r-- | src/doc.c | 91 | ||||
-rw-r--r-- | src/doc.h | 2 | ||||
-rw-r--r-- | src/glob.c | 2 | ||||
-rw-r--r-- | src/main.c | 7 | ||||
-rw-r--r-- | src/parser.c | 4 | ||||
-rw-r--r-- | src/qreg-commands.c | 6 | ||||
-rw-r--r-- | src/qreg.c | 51 | ||||
-rw-r--r-- | src/sciteco.h | 9 | ||||
-rw-r--r-- | src/search.c | 6 | ||||
-rw-r--r-- | src/spawn.c | 7 | ||||
-rw-r--r-- | src/view.c | 51 |
17 files changed, 158 insertions, 107 deletions
@@ -77,7 +77,7 @@ Features * Full Unicode (UTF-8) support: The document is still represented as a random-accessible codepoint sequence. * 8-bit clean: SciTECO can be used to edit binary files if the encoding is changed to - ANSI (`0EE`) and automatic EOL conversion is turned off (`16,0ED`). + ANSI and automatic EOL conversion is turned off (easiest with `--8bit`). * Self-documenting: An integrated indexed help system allows browsing formatted documentation about commands, macros and concepts within SciTECO (`?` command). Macro packages can be documented with the `tedoc` tool, generating man pages. diff --git a/doc/sciteco.1.in b/doc/sciteco.1.in index 73303a1..b03f62d 100644 --- a/doc/sciteco.1.in +++ b/doc/sciteco.1.in @@ -19,6 +19,7 @@ Scintilla-based \fBT\fPext \fBE\fPditor and \fBCO\fPrrector .OP "-e|--eval" macro .OP "-m|--mung" .OP "--no-profile" +.OP "-8|--8bit" .RI [ "UI option .\|.\|." ] .OP "--" .RI [ script ] @@ -191,6 +192,14 @@ munging an empty file. This is useful to fix up a broken profile script. This option has no effect when a file is explicitly munged with .BR \-\-mung . +.IP "\fB-8\fR, \fB--8bit\fR" +.SCITECO_TOPIC "-8" "--8-bit" +Use raw single-byte ANSI encoding by default and disable automatic EOL conversion, +which optimizes \*(ST for 8-bit cleanliness. +It is equivalent to executing \(lq16,4ED\(rq, but since it is executed +very early at startup, all Q-Registers and the unnamed buffer will +already be in ANSI encoding. +This option is also useful when munging the profile macro. .IP "\fIUI options .\|.\|.\fP" Some graphical user interfaces, notably GTK+, provide additional command line options. diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template index f344820..a5b7f4a 100644 --- a/doc/sciteco.7.template +++ b/doc/sciteco.7.template @@ -1086,7 +1086,10 @@ Currently, \*(ST supports UTF-8 and single-byte ANSI encodings, that can also be used for editing raw binary files. \# You can configure other single-byte code pages with EE, \# but there isn't yet any way to insert characters. -UTF-8 is the default codepage for new buffers and Q-Registers. +UTF-8 is the default codepage for new buffers and Q-Registers +unless the 2nd \fBED\fP flag bit is set. +You can also specify \fB--8bit\fP to optimize \*(ST for +8-bit cleanliness. While navigation in documents with single-byte encodings takes place in constant time, \*(ST uses heuristics in UTF-8 documents for translating between byte and character diff --git a/sample.teco_ini b/sample.teco_ini index a352f3e..d7060dc 100644 --- a/sample.teco_ini +++ b/sample.teco_ini @@ -17,7 +17,7 @@ EMQ[$SCITECOPATH]/session.tes 32,0ED !* non-UTF-8 documents are assumed to be in latin1 (8859-1) *! - 1024<:C; -A"T 1EE 1;'> J + EE"N 1024<:C; -A"T 1EE 1;'> J ' M[lexer.auto] diff --git a/src/cmdline.c b/src/cmdline.c index d6fcd37..47ef86f 100644 --- a/src/cmdline.c +++ b/src/cmdline.c @@ -1052,7 +1052,8 @@ teco_state_save_cmdline_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg return &teco_state_start; if (!qreg->vtable->undo_set_string(qreg, error) || - !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, SC_CP_UTF8, error)) + !qreg->vtable->set_string(qreg, teco_last_cmdline.data, teco_last_cmdline.len, + teco_default_codepage(), error)) return NULL; return &teco_state_start; diff --git a/src/core-commands.c b/src/core-commands.c index 638279d..176bb17 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -2038,6 +2038,11 @@ teco_state_ecommand_close(teco_machine_main_t *ctx, GError **error) * Without any argument ED returns the current flags. * * Currently, the following flags are used by \*(ST: + * - 4: If enabled, prefer raw single-byte ANSI encoding + * for all new buffers and registers. + * This does not change the encoding of any existing + * buffers and any initialized default register when set via + * \fBED\fP, so you might want to launch \*(ST with \fB--8bit\fP. * - 8: Enable/disable automatic folding of case-insensitive * command characters during interactive key translation. * The case of letter keys is inverted, so one or two @@ -2610,9 +2615,6 @@ teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error) * * FIXME: Should we avoid this if new_cp == 0? * It will be used for raw byte handling mostly. - * Perhaps we should even set char representations appropriately - * for all non-ANSI codepoints in the 0 codepage. - * But this would also be costly... */ if (teco_current_doc_must_undo()) { /* @@ -60,10 +60,19 @@ teco_doc_get_scintilla(teco_doc_t *ctx) return ctx->doc; } -/** @memberof teco_doc_t */ +/** + * Edit the given document in the Q-Register view. + * + * @param ctx The document to edit. + * @param default_cp The codepage to configure if the document is new. + * + * @memberof teco_doc_t + */ void -teco_doc_edit(teco_doc_t *ctx) +teco_doc_edit(teco_doc_t *ctx, guint default_cp) { + gboolean new_doc = ctx->doc == NULL; + teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0, (sptr_t)teco_doc_get_scintilla(ctx)); teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0); @@ -77,22 +86,33 @@ teco_doc_edit(teco_doc_t *ctx) */ //teco_view_set_representations(teco_qreg_view); - /* - * All UTF-8 documents are expected to have a character index. - * This allocates nothing if the document is not UTF-8. - * But it is reference counted, so it must not be allocated - * more than once. - * - * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER - * (although I don't know why and where). - * Recalculating it could be inefficient. - * The index is reference-counted. Perhaps we could just allocate - * one more time, so it doesn't get freed when changing documents. - */ - if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) - & SC_LINECHARACTERINDEX_UTF32)) + if (new_doc && default_cp != SC_CP_UTF8) { + /* + * There is a chance the user will see this buffer even if we + * are currently in batch mode. + */ + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, + style, default_cp); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0); + } else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)) { + /* + * All UTF-8 documents are expected to have a character index. + * This allocates nothing if the document is not UTF-8. + * But it is reference counted, so it must not be allocated + * more than once. + * + * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER + * (although I don't know why and where). + * Recalculating it could be inefficient. + * The index is reference-counted. Perhaps we could just allocate + * one more time, so it doesn't get freed when changing documents. + */ teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX, SC_LINECHARACTERINDEX_UTF32, 0); + } } /** @memberof teco_doc_t */ @@ -122,41 +142,12 @@ teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage ctx->doc = NULL; teco_doc_reset(ctx); - teco_doc_edit(ctx); + teco_doc_edit(ctx, codepage); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : "")); - if (codepage != SC_CP_UTF8) { - /* - * We have a new UTF-8 document and - * teco_doc_edit() currently always initializes an index. - */ - teco_view_ssm(teco_qreg_view, SCI_RELEASELINECHARACTERINDEX, - SC_LINECHARACTERINDEX_UTF32, 0); - g_assert(!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) - & SC_LINECHARACTERINDEX_UTF32)); - - /* - * Configure a single-byte codepage/charset. - * This requires setting it on all of the possible styles. - * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles. - * This is important only for display purposes - other than that - * all single-byte encodings are handled the same. - * - * FIXME: Should we avoid this if codepage == 0? - * It will be used for raw byte handling mostly. - * Perhaps we should even set char representations appropriately - * for all non-ANSI codepoints in the 0 codepage. - * But this would also be costly... - */ - for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) - teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, style, codepage); - /* 0 is used for ALL single-byte encodings */ - teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0); - } - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ @@ -201,14 +192,14 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage if (outlen) *outlen = 0; if (codepage) - *codepage = SC_CP_UTF8; + *codepage = teco_default_codepage(); return; } if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(ctx); + teco_doc_edit(ctx, teco_default_codepage()); gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); if (str) { @@ -221,7 +212,7 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage *codepage = teco_view_get_codepage(teco_qreg_view); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ @@ -62,7 +62,7 @@ teco_doc_init(teco_doc_t *ctx) memset(ctx, 0, sizeof(*ctx)); } -void teco_doc_edit(teco_doc_t *ctx); +void teco_doc_edit(teco_doc_t *ctx, guint default_cp); void teco_doc_undo_edit(teco_doc_t *ctx); void teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage); @@ -319,7 +319,7 @@ teco_state_glob_pattern_done(teco_machine_main_t *ctx, const teco_string_t *str, g_assert(glob_reg != NULL); if (!glob_reg->vtable->undo_set_string(glob_reg, error) || !glob_reg->vtable->set_string(glob_reg, filename, strlen(filename), - SC_CP_UTF8, error)) + teco_default_codepage(), error)) return NULL; } @@ -105,6 +105,7 @@ teco_get_default_config_path(const gchar *program) static gchar *teco_eval_macro = NULL; static gboolean teco_mung_file = FALSE; static gboolean teco_mung_profile = TRUE; +static gboolean teco_8bit_clean = FALSE; static gchar * teco_process_options(gint *argc, gchar ***argv) @@ -120,6 +121,8 @@ teco_process_options(gint *argc, gchar ***argv) "Do not mung " "$SCITECOCONFIG" G_DIR_SEPARATOR_S INI_FILE " " "even if it exists"}, + {"8bit", '8', 0, G_OPTION_ARG_NONE, &teco_8bit_clean, + "Use ANSI encoding by default and disable automatic EOL conversion"}, {NULL} }; @@ -320,6 +323,10 @@ main(int argc, char **argv) * to the macro or munged file. */ + if (teco_8bit_clean) + /* equivalent to 16,4ED but executed earlier */ + teco_ed = (teco_ed & ~TECO_ED_AUTOEOL) | TECO_ED_DEFAULT_ANSI; + /* * Theoretically, QReg tables should only be initialized * after the interface, since they contain Scintilla documents. diff --git a/src/parser.c b/src/parser.c index aef6223..ed21740 100644 --- a/src/parser.c +++ b/src/parser.c @@ -724,7 +724,7 @@ teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escap teco_machine_init(&ctx->parent, &teco_state_stringbuilding_start, must_undo); ctx->escape_char = escape_char; ctx->qreg_table_locals = locals; - ctx->codepage = SC_CP_UTF8; + ctx->codepage = teco_default_codepage(); } void @@ -767,7 +767,7 @@ gboolean teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error) { if (ctx->mode == TECO_MODE_NORMAL) - teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8; + teco_undo_guint(ctx->expectstring.machine.codepage) = teco_default_codepage(); return TRUE; } diff --git a/src/qreg-commands.c b/src/qreg-commands.c index e8be384..0e07944 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -372,7 +372,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, gint args = teco_expressions_args(); if (args > 0) { - guint codepage = SC_CP_UTF8; + guint codepage = teco_default_codepage(); if (colon_modified && !qreg->vtable->get_string(qreg, NULL, NULL, &codepage, error)) return NULL; @@ -415,7 +415,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || !qreg->vtable->set_string(qreg, buffer, len, - SC_CP_UTF8, error)) + codepage, error)) return NULL; } } @@ -429,7 +429,7 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx, /* set register */ if (!qreg->vtable->undo_set_string(qreg, error) || !qreg->vtable->set_string(qreg, str->data, str->len, - SC_CP_UTF8, error)) + teco_default_codepage(), error)) return NULL; } @@ -127,11 +127,11 @@ teco_qreg_set_eol_mode(teco_qreg_t *qreg, gint mode) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_view_ssm(teco_qreg_view, SCI_SETEOLMODE, mode, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_qreg_t */ @@ -144,7 +144,7 @@ teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_doc_reset(&qreg->string); /* @@ -162,7 +162,7 @@ teco_qreg_load(teco_qreg_t *qreg, const gchar *filename, GError **error) return FALSE; if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return TRUE; } @@ -174,18 +174,14 @@ teco_qreg_save(teco_qreg_t *qreg, const gchar *filename, GError **error) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); - if (!teco_view_save(teco_qreg_view, filename, error)) { - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); - return FALSE; - } + gboolean ret = teco_view_save(teco_qreg_view, filename, error); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); - return TRUE; + return ret; } static gboolean @@ -239,14 +235,14 @@ teco_qreg_plain_append_string(teco_qreg_t *qreg, const gchar *str, gsize len, GE if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_view_ssm(teco_qreg_view, SCI_BEGINUNDOACTION, 0, 0); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)str); teco_view_ssm(teco_qreg_view, SCI_ENDUNDOACTION, 0, 0); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return TRUE; } @@ -262,27 +258,24 @@ static gboolean teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position, teco_int_t *chr, GError **error) { - gboolean ret = TRUE; - if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); gssize off = teco_view_glyphs2bytes(teco_qreg_view, position); - if (off < 0 || off == len) { + gboolean ret = off >= 0 && off != len; + if (!ret) g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE, "Position %" TECO_INT_FORMAT " out of range", position); - ret = FALSE; /* make sure we still restore the current Q-Register */ - } else { + else *chr = teco_view_get_character(teco_qreg_view, off, len); - } if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return ret; } @@ -293,13 +286,13 @@ teco_qreg_plain_get_length(teco_qreg_t *qreg, GError **error) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); teco_int_t ret = teco_view_bytes2glyphs(teco_qreg_view, len); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); return ret; } @@ -326,7 +319,7 @@ teco_qreg_plain_edit(teco_qreg_t *qreg, GError **error) if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(&qreg->string); + teco_doc_edit(&qreg->string, teco_default_codepage()); teco_interface_show_view(teco_qreg_view); teco_interface_info_update(qreg); @@ -549,7 +542,7 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, */ *len = teco_ring_current->filename ? strlen(teco_ring_current->filename) : 0; if (codepage) - *codepage = SC_CP_UTF8; + *codepage = teco_default_codepage(); return TRUE; } @@ -647,7 +640,7 @@ teco_qreg_workingdir_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, else g_free(dir); if (codepage) - *codepage = SC_CP_UTF8; + *codepage = teco_default_codepage(); return TRUE; } @@ -798,7 +791,7 @@ teco_qreg_clipboard_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, teco_string_clear(&str_converted); *len = str_converted.len; if (codepage) - *codepage = SC_CP_UTF8; + *codepage = teco_default_codepage(); return TRUE; } @@ -910,7 +903,7 @@ teco_qreg_table_set_environ(teco_qreg_table_t *table, GError **error) } if (!qreg->vtable->set_string(qreg, value, strlen(value), - SC_CP_UTF8, error)) + teco_default_codepage(), error)) return FALSE; } diff --git a/src/sciteco.h b/src/sciteco.h index 7f420e8..09dea3b 100644 --- a/src/sciteco.h +++ b/src/sciteco.h @@ -21,6 +21,8 @@ #include <glib.h> +#include <Scintilla.h> + #if TECO_INTEGER == 32 typedef gint32 teco_int_t; #define TECO_INT_FORMAT G_GINT32_FORMAT @@ -83,6 +85,7 @@ teco_is_failure(teco_bool_t x) * This is not a bitfield, since it is set from SciTECO. */ enum { + TECO_ED_DEFAULT_ANSI = (1 << 2), TECO_ED_AUTOCASEFOLD = (1 << 3), TECO_ED_AUTOEOL = (1 << 4), TECO_ED_HOOKS = (1 << 5), @@ -94,6 +97,12 @@ enum { /* in main.c */ extern teco_int_t teco_ed; +static inline guint +teco_default_codepage(void) +{ + return teco_ed & TECO_ED_DEFAULT_ANSI ? SC_CHARSET_ANSI : SC_CP_UTF8; +} + /* in main.c */ extern volatile sig_atomic_t teco_interrupted; diff --git a/src/search.c b/src/search.c index cf26c7f..c1dd542 100644 --- a/src/search.c +++ b/src/search.c @@ -678,7 +678,7 @@ teco_state_search_done(teco_machine_main_t *ctx, const teco_string_t *str, GErro if (!search_reg->vtable->undo_set_string(search_reg, error) || !search_reg->vtable->set_string(search_reg, str->data, str->len, - SC_CP_UTF8, error)) + teco_default_codepage(), error)) return NULL; teco_interface_ssm(SCI_SETANCHOR, anchor, 0); @@ -1078,7 +1078,7 @@ teco_state_replace_default_insert_done_overwrite(teco_machine_main_t *ctx, const if (str->len > 0) { if (!replace_reg->vtable->undo_set_string(replace_reg, error) || !replace_reg->vtable->set_string(replace_reg, str->data, str->len, - SC_CP_UTF8, error)) + teco_default_codepage(), error)) return NULL; } else { g_auto(teco_string_t) replace_str = {NULL, 0}; @@ -1111,7 +1111,7 @@ teco_state_replace_default_ignore_done(teco_machine_main_t *ctx, const teco_stri if (!replace_reg->vtable->undo_set_string(replace_reg, error) || !replace_reg->vtable->set_string(replace_reg, str->data, str->len, - SC_CP_UTF8, error)) + teco_default_codepage(), error)) return NULL; return &teco_state_start; diff --git a/src/spawn.c b/src/spawn.c index c6dd779..6d3a441 100644 --- a/src/spawn.c +++ b/src/spawn.c @@ -165,9 +165,10 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error) return TRUE; /* - * Command-lines and file names are always assumed to be UTF-8. + * Command-lines and file names are always assumed to be UTF-8, + * unless we set TECO_ED_DEFAULT_ANSI. */ - teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8; + teco_undo_guint(ctx->expectstring.machine.codepage) = teco_default_codepage(); if (!teco_expressions_eval(FALSE, error)) return FALSE; @@ -702,7 +703,7 @@ teco_spawn_stdout_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer da } else { if (!qreg->vtable->undo_set_string(qreg, &teco_spawn_ctx.error) || !qreg->vtable->set_string(qreg, buffer.data, buffer.len, - SC_CP_UTF8, &teco_spawn_ctx.error)) + teco_default_codepage(), &teco_spawn_ctx.error)) goto error; } } else { @@ -72,6 +72,27 @@ teco_view_setup(teco_view_t *ctx) */ teco_view_ssm(ctx, SCI_SETMARGINWIDTHN, 1, 0); + if (teco_ed & TECO_ED_DEFAULT_ANSI) { + /* + * Configure a single-byte codepage/charset. + * This requires setting it on all of the possible styles. + * Fortunately, we can do it before SCI_STYLECLEARALL. + * This is important only for display purposes - other than that + * all single-byte encodings are handled the same. + */ + teco_view_ssm(ctx, SCI_STYLESETCHARACTERSET, STYLE_DEFAULT, SC_CHARSET_ANSI); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(ctx, SCI_SETCODEPAGE, 0, 0); + } else { + /* + * Documents are UTF-8 by default and all UTF-8 documents + * are expected to have a character index. + * This is a property of the document, instead of the view. + */ + teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); + } + /* * Set some basic styles in order to provide * a consistent look across UIs if no profile @@ -118,14 +139,6 @@ teco_view_setup(teco_view_t *ctx) * the representations only once. */ teco_view_set_representations(ctx); - - /* - * Documents are UTF-8 by default and all UTF-8 documents - * are expected to have a character index. - * This is a property of the document, instead of the view. - */ - teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, - SC_LINECHARACTERINDEX_UTF32, 0); } TECO_DEFINE_UNDO_CALL(teco_view_ssm, teco_view_t *, unsigned int, uptr_t, sptr_t); @@ -145,6 +158,28 @@ teco_view_set_representations(teco_view_t *ctx) gchar buf[] = {(gchar)cc, '\0'}; teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)reps[cc]); } + + if (teco_ed & TECO_ED_DEFAULT_ANSI) { + /* + * Non-ANSI chars should be visible somehow. + * This would best be done always when changing the + * encoding to 0, but it would be kind of expensive. + * + * FIXME: On the other hand, this could cause problems + * when setting SC_CP_UTF8 later on. + */ + for (guint cc = 0x80; cc <= 0xFF; cc++) { + gchar buf[] = {(gchar)cc, '\0'}; + gchar rep[2+1]; + /* + * Hexadecimal is poorly supported in SciTECO, but + * multiple decimal numbers one after another look + * confusing, esp. in Curses. + */ + g_snprintf(rep, sizeof(rep), "%02X", cc); + teco_view_ssm(ctx, SCI_SETREPRESENTATION, (uptr_t)buf, (sptr_t)rep); + } + } } /** |