aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--doc/sciteco.7.template6
-rw-r--r--src/core-commands.c9
-rw-r--r--src/interface.h6
-rw-r--r--src/parser.c37
-rw-r--r--src/parser.h12
-rw-r--r--src/qreg-commands.c18
-rw-r--r--src/qreg.c6
-rw-r--r--src/qreg.h2
-rw-r--r--src/search.c12
-rw-r--r--src/spawn.c5
-rw-r--r--src/view.h7
11 files changed, 104 insertions, 16 deletions
diff --git a/doc/sciteco.7.template b/doc/sciteco.7.template
index a6cca40..ca23c93 100644
--- a/doc/sciteco.7.template
+++ b/doc/sciteco.7.template
@@ -1647,6 +1647,12 @@ Expands to the character whose code is stored in the numeric
part of Q-Register \fIq\fP.
For instance if register \(lqA\(rq contains the code 66,
\(lq^EUa\(rq expands to the character \(lqB\(rq.
+The interpretation of this code depends on the context.
+Within inserts and searches (\fBI\fP, \fBS\fP, etc.) bytes or Unicode codepoints
+are expected depending on the buffer's encoding.
+Operations on registers (\fBEU\fP) similarily consult the
+register's encoding.
+Everything else expects Unicode codepoints.
.TP
.SCITECO_TOPIC ^EQ ^EQq
.BI ^EQ q
diff --git a/src/core-commands.c b/src/core-commands.c
index ef4621f..a84d0ef 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -2494,8 +2494,7 @@ teco_state_ecommand_encoding(teco_machine_main_t *ctx, GError **error)
gboolean colon_modified = teco_machine_main_eval_colon(ctx);
- sptr_t old_cp = teco_interface_ssm(SCI_GETCODEPAGE, 0, 0)
- ? : teco_interface_ssm(SCI_STYLEGETCHARACTERSET, STYLE_DEFAULT, 0);
+ guint old_cp = teco_interface_get_codepage();
if (!teco_expressions_args()) {
/* get current code page */
@@ -2745,6 +2744,12 @@ teco_state_insert_initial(teco_machine_main_t *ctx, GError **error)
if (ctx->mode > TECO_MODE_NORMAL)
return TRUE;
+ /*
+ * Current document's encoding determines the behaviour of
+ * string building constructs.
+ */
+ teco_undo_guint(ctx->expectstring.machine.codepage) = teco_interface_get_codepage();
+
if (!teco_expressions_eval(FALSE, error))
return FALSE;
guint args = teco_expressions_args();
diff --git a/src/interface.h b/src/interface.h
index cbe10bd..bbefe88 100644
--- a/src/interface.h
+++ b/src/interface.h
@@ -154,6 +154,12 @@ void teco_interface_process_notify(SCNotification *notify);
/** @pure */
void teco_interface_cleanup(void);
+static inline guint
+teco_interface_get_codepage(void)
+{
+ return teco_view_get_codepage(teco_interface_current_view);
+}
+
static inline gssize
teco_glyphs2bytes(teco_int_t pos)
{
diff --git a/src/parser.c b/src/parser.c
index 8d3cc92..29519b0 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -410,6 +410,7 @@ teco_state_stringbuilding_escaped_input(teco_machine_stringbuilding_t *ctx, gcha
/* parse-only mode */
return &teco_state_stringbuilding_start;
+ /* FIXME: Consult ctx->codepage once we have an Unicode-conforming parser */
switch (ctx->mode) {
case TECO_STRINGBUILDING_MODE_UPPER:
chr = g_ascii_toupper(chr);
@@ -442,6 +443,7 @@ teco_state_stringbuilding_lower_input(teco_machine_stringbuilding_t *ctx, gchar
teco_undo_guint(ctx->mode);
ctx->mode = TECO_STRINGBUILDING_MODE_LOWER;
} else {
+ /* FIXME: Consult ctx->codepage once we have an Unicode-conforming parser */
teco_string_append_c(ctx->result, g_ascii_tolower(chr));
}
@@ -465,6 +467,7 @@ teco_state_stringbuilding_upper_input(teco_machine_stringbuilding_t *ctx, gchar
teco_undo_guint(ctx->mode);
ctx->mode = TECO_STRINGBUILDING_MODE_UPPER;
} else {
+ /* FIXME: Consult ctx->codepage once we have an Unicode-conforming parser */
teco_string_append_c(ctx->result, g_ascii_toupper(chr));
}
@@ -576,15 +579,28 @@ teco_state_stringbuilding_ctle_u_input(teco_machine_stringbuilding_t *ctx, gchar
teco_int_t value;
if (!qreg->vtable->get_integer(qreg, &value, error))
return NULL;
- if (value < 0 || value > 0xFF) {
- g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len);
- g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
- "Q-Register \"%s\" does not contain a valid character", name_printable);
- return NULL;
+
+ if (ctx->codepage == SC_CP_UTF8) {
+ if (value < 0 || !g_unichar_validate(value))
+ goto error_codepoint;
+ /* 4 bytes should be enough, but we better follow the documentation */
+ gchar buf[6];
+ gsize len = g_unichar_to_utf8(value, buf);
+ teco_string_append(ctx->result, buf, len);
+ } else {
+ if (value < 0 || value > 0xFF)
+ goto error_codepoint;
+ teco_string_append_c(ctx->result, (gchar)value);
}
- teco_string_append_c(ctx->result, (gchar)value);
return &teco_state_stringbuilding_start;
+
+error_codepoint: {
+ g_autofree gchar *name_printable = teco_string_echo(qreg->head.name.data, qreg->head.name.len);
+ g_set_error(error, TECO_ERROR, TECO_ERROR_CODEPOINT,
+ "Q-Register \"%s\" does not contain a valid codepoint", name_printable);
+ return NULL;
+}
}
TECO_DEFINE_STATE_STRINGBUILDING_QREG(teco_state_stringbuilding_ctle_u);
@@ -708,6 +724,7 @@ teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escap
teco_machine_init(&ctx->parent, &teco_state_stringbuilding_start, must_undo);
ctx->escape_char = escape_char;
ctx->qreg_table_locals = locals;
+ ctx->codepage = SC_CP_UTF8;
}
void
@@ -746,6 +763,14 @@ teco_machine_stringbuilding_clear(teco_machine_stringbuilding_t *ctx)
teco_machine_qregspec_free(ctx->machine_qregspec);
}
+gboolean
+teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error)
+{
+ if (ctx->mode == TECO_MODE_NORMAL)
+ teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8;
+ return TRUE;
+}
+
teco_state_t *
teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error)
{
diff --git a/src/parser.h b/src/parser.h
index 4b4a3a0..ba6054f 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -309,9 +309,6 @@ typedef enum {
/**
* A stringbuilding state machine.
*
- * @fixme Should contain the escape char (currently in teco_machine_expectstring_t),
- * so that we can escape it via ^Q.
- *
* @extends teco_machine_t
*/
typedef struct teco_machine_stringbuilding_t {
@@ -350,6 +347,13 @@ typedef struct teco_machine_stringbuilding_t {
* (see teco_state_stringbuilding_start_process_edit_cmd()).
*/
teco_string_t *result;
+
+ /**
+ * Encoding of string in `result`.
+ * This is inherited from the embedding command and may depend on
+ * the buffer's or Q-Register's encoding.
+ */
+ guint codepage;
} teco_machine_stringbuilding_t;
void teco_machine_stringbuilding_init(teco_machine_stringbuilding_t *ctx, gchar escape_char,
@@ -508,6 +512,7 @@ void teco_machine_main_clear(teco_machine_main_t *ctx);
G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(teco_machine_main_t, teco_machine_main_clear);
+gboolean teco_state_expectstring_initial(teco_machine_main_t *ctx, GError **error);
teco_state_t *teco_state_expectstring_input(teco_machine_main_t *ctx, gchar chr, GError **error);
gboolean teco_state_expectstring_refresh(teco_machine_main_t *ctx, GError **error);
@@ -533,6 +538,7 @@ gboolean teco_state_expectstring_process_edit_cmd(teco_machine_main_t *ctx, teco
return teco_state_expectstring_input(ctx, chr, error); \
} \
TECO_DEFINE_STATE(NAME, \
+ .initial_cb = (teco_state_initial_cb_t)teco_state_expectstring_initial, \
.refresh_cb = (teco_state_refresh_cb_t)teco_state_expectstring_refresh, \
.process_edit_cmd_cb = (teco_state_process_edit_cmd_cb_t) \
teco_state_expectstring_process_edit_cmd, \
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index 09b2b90..1bde944 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -470,6 +470,23 @@ TECO_DEFINE_STATE_EXPECTQREG(teco_state_eucommand,
.expectqreg.type = TECO_QREG_OPTIONAL_INIT
);
+static gboolean
+teco_state_setqregstring_building_initial(teco_machine_main_t *ctx, GError **error)
+{
+ if (ctx->mode > TECO_MODE_NORMAL)
+ return TRUE;
+
+ teco_qreg_t *qreg;
+ teco_machine_qregspec_get_results(ctx->expectqreg, &qreg, NULL);
+
+ /*
+ * The expected codepage of string building constructs is determined
+ * by the Q-Register.
+ */
+ teco_undo_guint(ctx->expectstring.machine.codepage) = qreg->vtable->get_codepage(qreg);
+ return TRUE;
+}
+
static teco_state_t *
teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_string_t *str, GError **error)
{
@@ -487,6 +504,7 @@ teco_state_setqregstring_building_done(teco_machine_main_t *ctx, const teco_stri
* characters \fBenabled\fP.
*/
TECO_DEFINE_STATE_EXPECTSTRING(teco_state_setqregstring_building,
+ .initial_cb = (teco_state_initial_cb_t)teco_state_setqregstring_building_initial,
.expectstring.string_building = TRUE
);
diff --git a/src/qreg.c b/src/qreg.c
index c3ab1a5..2c2b6ad 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -204,14 +204,14 @@ teco_qreg_plain_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **error)
return TRUE;
}
-static gint
+static guint
teco_qreg_plain_get_codepage(teco_qreg_t *qreg)
{
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
teco_doc_edit(&qreg->string);
- gint ret = teco_view_ssm(teco_qreg_view, SCI_GETCODEPAGE, 0, 0);
+ guint ret = teco_view_get_codepage(teco_qreg_view);
if (teco_qreg_current)
teco_doc_edit(&teco_qreg_current->string);
@@ -408,7 +408,7 @@ teco_qreg_external_edit(teco_qreg_t *qreg, GError **error)
return TRUE;
}
-static gint
+static guint
teco_qreg_external_get_codepage(teco_qreg_t *qreg)
{
/*
diff --git a/src/qreg.h b/src/qreg.h
index 7a150ea..f87b877 100644
--- a/src/qreg.h
+++ b/src/qreg.h
@@ -47,7 +47,7 @@ typedef const struct {
gboolean (*undo_set_integer)(teco_qreg_t *qreg, GError **error);
gboolean (*get_integer)(teco_qreg_t *qreg, teco_int_t *ret, GError **error);
- gint (*get_codepage)(teco_qreg_t *qreg);
+ guint (*get_codepage)(teco_qreg_t *qreg);
gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error);
gboolean (*undo_set_string)(teco_qreg_t *qreg, GError **error);
gboolean (*append_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error);
diff --git a/src/search.c b/src/search.c
index 88b0e16..f72616d 100644
--- a/src/search.c
+++ b/src/search.c
@@ -60,6 +60,8 @@ teco_state_search_initial(teco_machine_main_t *ctx, GError **error)
if (ctx->mode > TECO_MODE_NORMAL)
return TRUE;
+ teco_undo_guint(ctx->expectstring.machine.codepage) = teco_interface_get_codepage();
+
if (G_UNLIKELY(!teco_search_qreg_machine))
teco_search_qreg_machine = teco_machine_qregspec_new(TECO_QREG_REQUIRED, ctx->qreg_table_locals,
ctx->parent.must_undo);
@@ -978,11 +980,19 @@ teco_state_search_delete_done(teco_machine_main_t *ctx, const teco_string_t *str
*/
TECO_DEFINE_STATE_SEARCH(teco_state_search_delete);
+static gboolean
+teco_state_replace_insert_initial(teco_machine_main_t *ctx, GError **error)
+{
+ if (ctx->mode == TECO_MODE_NORMAL)
+ teco_undo_guint(ctx->expectstring.machine.codepage) = teco_interface_get_codepage();
+ return TRUE;
+}
+
/*
* FIXME: Could be static
*/
TECO_DEFINE_STATE_INSERT(teco_state_replace_insert,
- .initial_cb = NULL
+ .initial_cb = (teco_state_initial_cb_t)teco_state_replace_insert_initial
);
static teco_state_t *
diff --git a/src/spawn.c b/src/spawn.c
index c1fb426..4317288 100644
--- a/src/spawn.c
+++ b/src/spawn.c
@@ -164,6 +164,11 @@ teco_state_execute_initial(teco_machine_main_t *ctx, GError **error)
if (ctx->mode > TECO_MODE_NORMAL)
return TRUE;
+ /*
+ * Command-lines and file names are always assumed to be UTF-8.
+ */
+ teco_undo_guint(ctx->expectstring.machine.codepage) = SC_CP_UTF8;
+
if (!teco_expressions_eval(FALSE, error))
return FALSE;
diff --git a/src/view.h b/src/view.h
index 882a33c..8f54fdd 100644
--- a/src/view.h
+++ b/src/view.h
@@ -71,6 +71,13 @@ gboolean teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError
/** @pure @memberof teco_view_t */
void teco_view_free(teco_view_t *ctx);
+static inline guint
+teco_view_get_codepage(teco_view_t *ctx)
+{
+ return teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0)
+ ? : teco_view_ssm(ctx, SCI_STYLEGETCHARACTERSET, STYLE_DEFAULT, 0);
+}
+
gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos);
teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos);
gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n);