diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-09-09 16:54:26 +0200 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-09-09 18:22:21 +0200 |
commit | 4f231871a0208ec9bcc2679fce25d3b9795d1597 (patch) | |
tree | bdc9055166fe236f009c6640acf53b6706310c27 /src/doc.c | |
parent | 41ab5cf0289dab60ac1ddc97cf9680ee2468ea6c (diff) | |
download | sciteco-4f231871a0208ec9bcc2679fce25d3b9795d1597.tar.gz |
added raw ANSI mode to facilitate 8-bit clean editing (refs #5)
* When enabled with bit 2 in the ED flags (0,4ED),
all registers and buffers will get the raw ANSI encoding (as if 0EE had been
called on them).
You can still manually change the encoding, eg. by calling 65001EE afterwards.
* Also the ANSI mode sets up character representations for all bytes >= 0x80.
This is currently done only depending on the ED flag, not when setting 0EE.
* Since setting 16,4ED for 8-bit clean editing in a macro can be tricky -
the default unnamed buffer will still be at UTF-8 and at least a bunch
of environment registers as well - we added the command line option
`--8bit` (short `-8`) which configures the ED flags very early on.
As another advantage you can mung the profile in 8-bit mode as well
when using SciTECO as a sort of interactive hex editor.
* Disable UTF-8 checks in 8-bit clean mode (sample.teco_ini).
Diffstat (limited to 'src/doc.c')
-rw-r--r-- | src/doc.c | 91 |
1 files changed, 41 insertions, 50 deletions
@@ -60,10 +60,19 @@ teco_doc_get_scintilla(teco_doc_t *ctx) return ctx->doc; } -/** @memberof teco_doc_t */ +/** + * Edit the given document in the Q-Register view. + * + * @param ctx The document to edit. + * @param default_cp The codepage to configure if the document is new. + * + * @memberof teco_doc_t + */ void -teco_doc_edit(teco_doc_t *ctx) +teco_doc_edit(teco_doc_t *ctx, guint default_cp) { + gboolean new_doc = ctx->doc == NULL; + teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0, (sptr_t)teco_doc_get_scintilla(ctx)); teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0); @@ -77,22 +86,33 @@ teco_doc_edit(teco_doc_t *ctx) */ //teco_view_set_representations(teco_qreg_view); - /* - * All UTF-8 documents are expected to have a character index. - * This allocates nothing if the document is not UTF-8. - * But it is reference counted, so it must not be allocated - * more than once. - * - * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER - * (although I don't know why and where). - * Recalculating it could be inefficient. - * The index is reference-counted. Perhaps we could just allocate - * one more time, so it doesn't get freed when changing documents. - */ - if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) - & SC_LINECHARACTERINDEX_UTF32)) + if (new_doc && default_cp != SC_CP_UTF8) { + /* + * There is a chance the user will see this buffer even if we + * are currently in batch mode. + */ + for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) + teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, + style, default_cp); + /* 0 is used for ALL single-byte encodings */ + teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0); + } else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) + & SC_LINECHARACTERINDEX_UTF32)) { + /* + * All UTF-8 documents are expected to have a character index. + * This allocates nothing if the document is not UTF-8. + * But it is reference counted, so it must not be allocated + * more than once. + * + * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER + * (although I don't know why and where). + * Recalculating it could be inefficient. + * The index is reference-counted. Perhaps we could just allocate + * one more time, so it doesn't get freed when changing documents. + */ teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX, SC_LINECHARACTERINDEX_UTF32, 0); + } } /** @memberof teco_doc_t */ @@ -122,41 +142,12 @@ teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage ctx->doc = NULL; teco_doc_reset(ctx); - teco_doc_edit(ctx); + teco_doc_edit(ctx, codepage); teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : "")); - if (codepage != SC_CP_UTF8) { - /* - * We have a new UTF-8 document and - * teco_doc_edit() currently always initializes an index. - */ - teco_view_ssm(teco_qreg_view, SCI_RELEASELINECHARACTERINDEX, - SC_LINECHARACTERINDEX_UTF32, 0); - g_assert(!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0) - & SC_LINECHARACTERINDEX_UTF32)); - - /* - * Configure a single-byte codepage/charset. - * This requires setting it on all of the possible styles. - * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles. - * This is important only for display purposes - other than that - * all single-byte encodings are handled the same. - * - * FIXME: Should we avoid this if codepage == 0? - * It will be used for raw byte handling mostly. - * Perhaps we should even set char representations appropriately - * for all non-ANSI codepoints in the 0 codepage. - * But this would also be costly... - */ - for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++) - teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, style, codepage); - /* 0 is used for ALL single-byte encodings */ - teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0); - } - if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ @@ -201,14 +192,14 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage if (outlen) *outlen = 0; if (codepage) - *codepage = SC_CP_UTF8; + *codepage = teco_default_codepage(); return; } if (teco_qreg_current) teco_doc_update(&teco_qreg_current->string, teco_qreg_view); - teco_doc_edit(ctx); + teco_doc_edit(ctx, teco_default_codepage()); gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0); if (str) { @@ -221,7 +212,7 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage *codepage = teco_view_get_codepage(teco_qreg_view); if (teco_qreg_current) - teco_doc_edit(&teco_qreg_current->string); + teco_doc_edit(&teco_qreg_current->string, 0); } /** @memberof teco_doc_t */ |