added raw ANSI mode to facilitate 8-bit clean editing (refs #5)

* When enabled with bit 2 in the ED flags (0,4ED), all registers and buffers will get the raw ANSI encoding (as if 0EE had been called on them). You can still manually change the encoding, eg. by calling 65001EE afterwards. * Also the ANSI mode sets up character representations for all bytes >= 0x80. This is currently done only depending on the ED flag, not when setting 0EE. * Since setting 16,4ED for 8-bit clean editing in a macro can be tricky - the default unnamed buffer will still be at UTF-8 and at least a bunch of environment registers as well - we added the command line option `--8bit` (short `-8`) which configures the ED flags very early on. As another advantage you can mung the profile in 8-bit mode as well when using SciTECO as a sort of interactive hex editor. * Disable UTF-8 checks in 8-bit clean mode (sample.teco_ini).
author: Robin Haberkorn <robin.haberkorn@googlemail.com> 2024-09-09 16:54:26 +0200
committer: Robin Haberkorn <robin.haberkorn@googlemail.com> 2024-09-09 18:22:21 +0200
commit: 4f231871a0208ec9bcc2679fce25d3b9795d1597 (patch)
tree: bdc9055166fe236f009c6640acf53b6706310c27 /src/doc.c
parent: 41ab5cf0289dab60ac1ddc97cf9680ee2468ea6c (diff)
download: sciteco-4f231871a0208ec9bcc2679fce25d3b9795d1597.tar.gz
1 files changed, 41 insertions, 50 deletions
diff --git a/src/doc.c b/src/doc.c
index 516dadb..a1ebe2c 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -60,10 +60,19 @@ teco_doc_get_scintilla(teco_doc_t *ctx)
 	return ctx->doc;
 }
 
-/** @memberof teco_doc_t */
+/**
+ * Edit the given document in the Q-Register view.
+ *
+ * @param ctx The document to edit.
+ * @param default_cp The codepage to configure if the document is new.
+ *
+ * @memberof teco_doc_t
+ */
 void
-teco_doc_edit(teco_doc_t *ctx)
+teco_doc_edit(teco_doc_t *ctx, guint default_cp)
 {
+	gboolean new_doc = ctx->doc == NULL;
+
 	teco_view_ssm(teco_qreg_view, SCI_SETDOCPOINTER, 0,
 	              (sptr_t)teco_doc_get_scintilla(ctx));
 	teco_view_ssm(teco_qreg_view, SCI_SETFIRSTVISIBLELINE, ctx->first_line, 0);
@@ -77,22 +86,33 @@ teco_doc_edit(teco_doc_t *ctx)
 	 */
 	//teco_view_set_representations(teco_qreg_view);
 
-	/*
-	 * All UTF-8 documents are expected to have a character index.
-	 * This allocates nothing if the document is not UTF-8.
-	 * But it is reference counted, so it must not be allocated
-	 * more than once.
-	 *
-	 * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER
-	 * (although I don't know why and where).
-	 * Recalculating it could be inefficient.
-	 * The index is reference-counted. Perhaps we could just allocate
-	 * one more time, so it doesn't get freed when changing documents.
-	 */
-	if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
-						& SC_LINECHARACTERINDEX_UTF32))
+	if (new_doc && default_cp != SC_CP_UTF8) {
+		/*
+		 * There is a chance the user will see this buffer even if we
+		 * are currently in batch mode.
+		 */
+		for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
+			teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET,
+			              style, default_cp);
+		/* 0 is used for ALL single-byte encodings */
+		teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0);
+	} else if (!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
+							& SC_LINECHARACTERINDEX_UTF32)) {
+		/*
+		 * All UTF-8 documents are expected to have a character index.
+		 * This allocates nothing if the document is not UTF-8.
+		 * But it is reference counted, so it must not be allocated
+		 * more than once.
+		 *
+		 * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER
+		 * (although I don't know why and where).
+		 * Recalculating it could be inefficient.
+		 * The index is reference-counted. Perhaps we could just allocate
+		 * one more time, so it doesn't get freed when changing documents.
+		 */
 		teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX,
 		              SC_LINECHARACTERINDEX_UTF32, 0);
+	}
 }
 
 /** @memberof teco_doc_t */
@@ -122,41 +142,12 @@ teco_doc_set_string(teco_doc_t *ctx, const gchar *str, gsize len, guint codepage
 	ctx->doc = NULL;
 
 	teco_doc_reset(ctx);
-	teco_doc_edit(ctx);
+	teco_doc_edit(ctx, codepage);
 
 	teco_view_ssm(teco_qreg_view, SCI_APPENDTEXT, len, (sptr_t)(str ? : ""));
 
-	if (codepage != SC_CP_UTF8) {
-		/*
-		 * We have a new UTF-8 document and
-		 * teco_doc_edit() currently always initializes an index.
-		 */
-		teco_view_ssm(teco_qreg_view, SCI_RELEASELINECHARACTERINDEX,
-		              SC_LINECHARACTERINDEX_UTF32, 0);
-		g_assert(!(teco_view_ssm(teco_qreg_view, SCI_GETLINECHARACTERINDEX, 0, 0)
-							& SC_LINECHARACTERINDEX_UTF32));
-
-		/*
-		 * Configure a single-byte codepage/charset.
-		 * This requires setting it on all of the possible styles.
-		 * Unfortunately there can theoretically even be 255 (STYLE_MAX) styles.
-		 * This is important only for display purposes - other than that
-		 * all single-byte encodings are handled the same.
-		 *
-		 * FIXME: Should we avoid this if codepage == 0?
-		 * It will be used for raw byte handling mostly.
-		 * Perhaps we should even set char representations appropriately
-		 * for all non-ANSI codepoints in the 0 codepage.
-		 * But this would also be costly...
-		 */
-		for (gint style = 0; style <= STYLE_LASTPREDEFINED; style++)
-			teco_view_ssm(teco_qreg_view, SCI_STYLESETCHARACTERSET, style, codepage);
-		/* 0 is used for ALL single-byte encodings */
-		teco_view_ssm(teco_qreg_view, SCI_SETCODEPAGE, 0, 0);
-	}
-
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 }
 
 /** @memberof teco_doc_t */
@@ -201,14 +192,14 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage
 		if (outlen)
 			*outlen = 0;
 		if (codepage)
-			*codepage = SC_CP_UTF8;
+			*codepage = teco_default_codepage();
 		return;
 	}
 
 	if (teco_qreg_current)
 		teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
 
-	teco_doc_edit(ctx);
+	teco_doc_edit(ctx, teco_default_codepage());
 
 	gsize len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
 	if (str) {
@@ -221,7 +212,7 @@ teco_doc_get_string(teco_doc_t *ctx, gchar **str, gsize *outlen, guint *codepage
 		*codepage = teco_view_get_codepage(teco_qreg_view);
 
 	if (teco_qreg_current)
-		teco_doc_edit(&teco_qreg_current->string);
+		teco_doc_edit(&teco_qreg_current->string, 0);
 }
 
 /** @memberof teco_doc_t */
author	Robin Haberkorn <robin.haberkorn@googlemail.com>	2024-09-09 16:54:26 +0200
committer	Robin Haberkorn <robin.haberkorn@googlemail.com>	2024-09-09 18:22:21 +0200
commit	4f231871a0208ec9bcc2679fce25d3b9795d1597 (patch)
tree	bdc9055166fe236f009c6640acf53b6706310c27 /src/doc.c
parent	41ab5cf0289dab60ac1ddc97cf9680ee2468ea6c (diff)
download	sciteco-4f231871a0208ec9bcc2679fce25d3b9795d1597.tar.gz