aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core-commands.c3
-rw-r--r--src/doc.c10
-rw-r--r--src/doc.h2
-rw-r--r--src/interface.c84
-rw-r--r--src/interface.h22
-rw-r--r--src/qreg-commands.c47
-rw-r--r--src/qreg.c141
-rw-r--r--src/qreg.h6
-rw-r--r--src/view.c100
-rw-r--r--src/view.h4
10 files changed, 274 insertions, 145 deletions
diff --git a/src/core-commands.c b/src/core-commands.c
index 7e6dbfa..9281d0d 100644
--- a/src/core-commands.c
+++ b/src/core-commands.c
@@ -1010,6 +1010,9 @@ teco_state_start_delete_chars(teco_machine_main_t *ctx, GError **error)
* This can be an ASCII <code> or Unicode codepoint
* depending on Scintilla's encoding of the current
* buffer.
+ * Invalid Unicode byte sequences are reported as
+ * -1 or -2.
+ *
* - If <n> is 0, return the <code> of the character
* pointed to by dot.
* - If <n> is 1, return the <code> of the character
diff --git a/src/doc.c b/src/doc.c
index fb0661e..0360b43 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -53,6 +53,16 @@ teco_doc_edit(teco_doc_t *ctx)
* initialized only once.
*/
//teco_view_set_representations(teco_qreg_view);
+
+ /*
+ * Documents are UTF-8 by default and all UTF-8 documents
+ * are expected to have a character index.
+ *
+ * FIXME: This apparently gets reset with every SCI_SETDOCPOINTER.
+ * Does that mean the index needs to be recalculated repeatedly as well?
+ */
+ teco_view_ssm(teco_qreg_view, SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
}
/** @memberof teco_doc_t */
diff --git a/src/doc.h b/src/doc.h
index 6b0721e..9dc1665 100644
--- a/src/doc.h
+++ b/src/doc.h
@@ -42,7 +42,7 @@ typedef struct teco_doc_scintilla_t teco_doc_scintilla_t;
typedef struct {
/**
* Underlying Scintilla document.
- * It is created on demand in teco_doc_maybe_create_document(),
+ * It is created on demand in teco_doc_get_scintilla(),
* so that we don't waste memory on integer-only Q-Registers.
*/
teco_doc_scintilla_t *doc;
diff --git a/src/interface.c b/src/interface.c
index e21cbb4..2e2d64e 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -118,87 +118,3 @@ teco_interface_process_notify(SCNotification *notify)
g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code);
#endif
}
-
-/**
- * Convert a glyph index to a byte offset as used by Scintilla.
- *
- * This is optimized with the "line character index",
- * which must always be enabled in UTF-8 documents.
- *
- * It is also used to validate glyph indexes.
- *
- * @param pos Position in glyphs/characters.
- * @return Position in bytes or -1 if pos is out of bounds.
- */
-gssize
-teco_glyphs2bytes(teco_int_t pos)
-{
- if (pos < 0)
- return -1; /* invalid position */
- if (!pos)
- return 0;
-
- if (!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) &
- SC_LINECHARACTERINDEX_UTF32))
- /* assume single-byte encoding */
- return pos <= teco_interface_ssm(SCI_GETLENGTH, 0, 0) ? pos : -1;
-
- sptr_t line = teco_interface_ssm(SCI_LINEFROMINDEXPOSITION, pos,
- SC_LINECHARACTERINDEX_UTF32);
- sptr_t line_bytes = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0);
- pos -= teco_interface_ssm(SCI_INDEXPOSITIONFROMLINE, line,
- SC_LINECHARACTERINDEX_UTF32);
- return teco_interface_ssm(SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1;
-}
-
-/**
- * Convert byte offset to glyph/character index without bounds checking.
- */
-teco_int_t
-teco_bytes2glyphs(gsize pos)
-{
- if (!pos)
- return 0;
-
- if (!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) &
- SC_LINECHARACTERINDEX_UTF32))
- /* assume single-byte encoding */
- return pos;
-
- sptr_t line = teco_interface_ssm(SCI_LINEFROMPOSITION, pos, 0);
- sptr_t line_bytes = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0);
- return teco_interface_ssm(SCI_INDEXPOSITIONFROMLINE, line,
- SC_LINECHARACTERINDEX_UTF32) +
- teco_interface_ssm(SCI_COUNTCHARACTERS, line_bytes, pos);
-}
-
-#define TECO_RELATIVE_LIMIT 1024
-
-/**
- * Convert a glyph index relative to a byte position to
- * a byte position.
- *
- * Can be used to implement commands with relative character
- * ranges.
- * As an optimization, this always counts characters for deltas
- * smaller than TECO_RELATIVE_LIMIT, so it will be fast
- * even where the character-index based lookup is too slow
- * (as on exceedingly long lines).
- *
- * @param pos Byte position to start.
- * @param n Number of glyphs/characters to the left (negative) or
- * right (positive) of pos.
- * @return Position in bytes or -1 if the resulting position is out of bounds.
- */
-gssize
-teco_glyphs2bytes_relative(gsize pos, teco_int_t n)
-{
- if (!n)
- return pos;
- if (ABS(n) > TECO_RELATIVE_LIMIT)
- return teco_glyphs2bytes(teco_bytes2glyphs(pos) + n);
-
- sptr_t res = teco_interface_ssm(SCI_POSITIONRELATIVE, pos, n);
- /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */
- return res ? : n > 0 ? -1 : teco_bytes2glyphs(pos)+n >= 0 ? 0 : -1;
-}
diff --git a/src/interface.h b/src/interface.h
index 6a391aa..c975525 100644
--- a/src/interface.h
+++ b/src/interface.h
@@ -154,16 +154,30 @@ void teco_interface_process_notify(SCNotification *notify);
/** @pure */
void teco_interface_cleanup(void);
+static inline gssize
+teco_glyphs2bytes(teco_int_t pos)
+{
+ return teco_view_glyphs2bytes(teco_interface_current_view, pos);
+}
+
+static inline teco_int_t
+teco_bytes2glyphs(gsize pos)
+{
+ return teco_view_bytes2glyphs(teco_interface_current_view, pos);
+}
+
+static inline gssize
+teco_glyphs2bytes_relative(gsize pos, teco_int_t n)
+{
+ return teco_view_glyphs2bytes_relative(teco_interface_current_view, pos, n);
+}
+
/*
* The following functions are here for lack of a better place.
* They could also be in sciteco.h, but only if declared as non-inline
* since sciteco.h should not depend on interface.h.
*/
-gssize teco_glyphs2bytes(teco_int_t pos);
-teco_int_t teco_bytes2glyphs(gsize pos);
-gssize teco_glyphs2bytes_relative(gsize pos, teco_int_t n);
-
static inline gboolean
teco_validate_line(teco_int_t n)
{
diff --git a/src/qreg-commands.c b/src/qreg-commands.c
index 34f3164..089f2a5 100644
--- a/src/qreg-commands.c
+++ b/src/qreg-commands.c
@@ -259,9 +259,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
if (teco_machine_main_eval_colon(ctx)) {
/* Query Q-Register's existence or string size */
if (qreg) {
- gsize len;
-
- if (!qreg->vtable->get_string(qreg, NULL, &len, error))
+ /* get_string() would return the size in bytes */
+ teco_int_t len = qreg->vtable->get_length(qreg, error);
+ if (len < 0)
return NULL;
teco_expressions_push(len);
} else {
@@ -281,10 +281,9 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
return NULL;
}
- gint c = qreg->vtable->get_character(qreg, pos, error);
- if (c < 0)
+ teco_int_t c;
+ if (!qreg->vtable->get_character(qreg, pos, &c, error))
return NULL;
-
teco_expressions_push(c);
} else {
/* Query integer */
@@ -311,6 +310,8 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg,
* Positions are handled like buffer positions \(em they
* begin at 0 up to the length of the string minus 1.
* An error is thrown for invalid positions.
+ * If <q> is Unicode-encoded, -1 or -2 could be returned for
+ * invalid byte sequences.
* Both non-colon-modified forms of Q require register <q>
* to be defined and fail otherwise.
*
@@ -369,24 +370,40 @@ teco_state_setqregstring_nobuilding_done(teco_machine_main_t *ctx,
gint args = teco_expressions_args();
if (args > 0) {
- g_autofree gchar *buffer = g_malloc(args);
-
- for (gint i = args; i > 0; i--) {
- teco_int_t v;
- if (!teco_expressions_pop_num_calc(&v, 0, error))
- return NULL;
- buffer[i-1] = (gchar)v;
+ g_autofree gchar *buffer = NULL;
+ gsize len = 0;
+
+ if (qreg->vtable->get_codepage(qreg) == SC_CP_UTF8) {
+ buffer = g_malloc(6*args);
+ for (gint i = args; i > 0; i--) {
+ teco_int_t v;
+ if (!teco_expressions_pop_num_calc(&v, 0, error))
+ return NULL;
+ if (!g_unichar_validate(v)) {
+ teco_error_codepoint_set(error, "^U");
+ return NULL;
+ }
+ len += g_unichar_to_utf8(v, buffer+len);
+ }
+ } else {
+ buffer = g_malloc(args);
+ for (gint i = args; i > 0; i--) {
+ teco_int_t v;
+ if (!teco_expressions_pop_num_calc(&v, 0, error))
+ return NULL;
+ buffer[len++] = v;
+ }
}
if (colon_modified) {
/* append to register */
if (!qreg->vtable->undo_append_string(qreg, error) ||
- !qreg->vtable->append_string(qreg, buffer, args, error))
+ !qreg->vtable->append_string(qreg, buffer, len, error))
return NULL;
} else {
/* set register */
if (!qreg->vtable->undo_set_string(qreg, error) ||
- !qreg->vtable->set_string(qreg, buffer, args, error))
+ !qreg->vtable->set_string(qreg, buffer, len, error))
return NULL;
}
}
diff --git a/src/qreg.c b/src/qreg.c
index f058aff..e17bf4d 100644
--- a/src/qreg.c
+++ b/src/qreg.c
@@ -204,6 +204,21 @@ teco_qreg_plain_get_integer(teco_qreg_t *qreg, teco_int_t *ret, GError **error)
return TRUE;
}
+static gint
+teco_qreg_plain_get_codepage(teco_qreg_t *qreg)
+{
+ if (teco_qreg_current)
+ teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
+
+ teco_doc_edit(&qreg->string);
+ gint ret = teco_view_ssm(teco_qreg_view, SCI_GETCODEPAGE, 0, 0);
+
+ if (teco_qreg_current)
+ teco_doc_edit(&teco_qreg_current->string);
+
+ return ret;
+}
+
static gboolean
teco_qreg_plain_set_string(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error)
{
@@ -250,23 +265,64 @@ teco_qreg_plain_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GError **
return TRUE;
}
-static gint
-teco_qreg_plain_get_character(teco_qreg_t *qreg, guint position, GError **error)
+static gboolean
+teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position,
+ teco_int_t *chr, GError **error)
{
- gint ret = -1;
+ gboolean ret = TRUE;
if (teco_qreg_current)
teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
teco_doc_edit(&qreg->string);
- if (position < teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0))
- /* internally, values are casted to signed char */
- ret = (guchar)teco_view_ssm(teco_qreg_view, SCI_GETCHARAT, position, 0);
- else
+ sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
+ gssize off = teco_view_glyphs2bytes(teco_qreg_view, position);
+
+ if (off < 0 || off == len) {
g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
- "Position %u out of range", position);
+ "Position %" TECO_INT_FORMAT " out of range", position);
+ ret = FALSE;
/* make sure we still restore the current Q-Register */
+ } else if (teco_view_ssm(teco_qreg_view, SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) {
+ gchar buf[6+1];
+ struct Sci_TextRangeFull range = {
+ .chrg = {off, MIN(len, off+sizeof(buf)-1)},
+ .lpstrText = buf
+ };
+ /*
+ * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION
+ * or repeatedly calling SCI_GETCHARAT.
+ */
+ teco_view_ssm(teco_qreg_view, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range);
+ /*
+ * Make sure that the -1/-2 error values are preserved.
+ * The sign bit in UCS-4/UTF-32 is unused, so this will even
+ * suffice if TECO_INTEGER == 32.
+ */
+ *chr = (gint32)g_utf8_get_char_validated(buf, -1);
+ } else {
+ // FIXME: Everything else is a single-byte encoding?
+ /* internally, the character is casted to signed char */
+ *chr = (guchar)teco_view_ssm(teco_qreg_view, SCI_GETCHARAT, off, 0);
+ }
+
+ if (teco_qreg_current)
+ teco_doc_edit(&teco_qreg_current->string);
+
+ return ret;
+}
+
+static teco_int_t
+teco_qreg_plain_get_length(teco_qreg_t *qreg, GError **error)
+{
+ if (teco_qreg_current)
+ teco_doc_update(&teco_qreg_current->string, teco_qreg_view);
+
+ teco_doc_edit(&qreg->string);
+
+ sptr_t len = teco_view_ssm(teco_qreg_view, SCI_GETLENGTH, 0, 0);
+ teco_int_t ret = teco_view_bytes2glyphs(teco_qreg_view, len);
if (teco_qreg_current)
teco_doc_edit(&teco_qreg_current->string);
@@ -329,12 +385,14 @@ teco_qreg_plain_undo_edit(teco_qreg_t *qreg, GError **error)
.set_integer = teco_qreg_plain_set_integer, \
.undo_set_integer = teco_qreg_plain_undo_set_integer, \
.get_integer = teco_qreg_plain_get_integer, \
+ .get_codepage = teco_qreg_plain_get_codepage, \
.set_string = teco_qreg_plain_set_string, \
.undo_set_string = teco_qreg_plain_undo_set_string, \
.append_string = teco_qreg_plain_append_string, \
.undo_append_string = teco_qreg_plain_undo_set_string, \
.get_string = teco_qreg_plain_get_string, \
.get_character = teco_qreg_plain_get_character, \
+ .get_length = teco_qreg_plain_get_length, \
.exchange_string = teco_qreg_plain_exchange_string, \
.undo_exchange_string = teco_qreg_plain_undo_exchange_string, \
.edit = teco_qreg_plain_edit, \
@@ -369,6 +427,15 @@ teco_qreg_external_edit(teco_qreg_t *qreg, GError **error)
return TRUE;
}
+static gint
+teco_qreg_external_get_codepage(teco_qreg_t *qreg)
+{
+ /*
+ * External registers are always assumed to be UTF-8-encoded.
+ */
+ return SC_CP_UTF8;
+}
+
static gboolean
teco_qreg_external_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GError **error)
{
@@ -394,21 +461,40 @@ teco_qreg_external_undo_exchange_string(teco_qreg_t *qreg, teco_doc_t *src, GErr
return TRUE;
}
-static gint
-teco_qreg_external_get_character(teco_qreg_t *qreg, guint position, GError **error)
+static gboolean
+teco_qreg_external_get_character(teco_qreg_t *qreg, teco_int_t position,
+ teco_int_t *chr, GError **error)
{
g_auto(teco_string_t) str = {NULL, 0};
if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error))
- return -1;
+ return FALSE;
- if (position >= str.len) {
+ if (position < 0 || position >= g_utf8_strlen(str.data, str.len)) {
g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
- "Position %u out of range", position);
- return -1;
+ "Position %" TECO_INT_FORMAT " out of range", position);
+ return FALSE;
}
+ const gchar *p = g_utf8_offset_to_pointer(str.data, position);
- return (guchar)str.data[position];
+ /*
+ * Make sure that the -1/-2 error values are preserved.
+ * The sign bit in UCS-4/UTF-32 is unused, so this will even
+ * suffice if TECO_INTEGER == 32.
+ */
+ *chr = (gint32)g_utf8_get_char_validated(p, -1);
+ return TRUE;
+}
+
+static teco_int_t
+teco_qreg_external_get_length(teco_qreg_t *qreg, GError **error)
+{
+ g_auto(teco_string_t) str = {NULL, 0};
+
+ if (!qreg->vtable->get_string(qreg, &str.data, &str.len, error))
+ return -1;
+
+ return g_utf8_strlen(str.data, str.len);
}
/**
@@ -416,10 +502,12 @@ teco_qreg_external_get_character(teco_qreg_t *qreg, guint position, GError **err
* These rely on custom implementations of get_string() and set_string().
*/
#define TECO_INIT_QREG_EXTERNAL(...) TECO_INIT_QREG( \
+ .get_codepage = teco_qreg_external_get_codepage, \
.exchange_string = teco_qreg_external_exchange_string, \
.undo_exchange_string = teco_qreg_external_undo_exchange_string, \
.edit = teco_qreg_external_edit, \
.get_character = teco_qreg_external_get_character, \
+ .get_length = teco_qreg_external_get_length, \
##__VA_ARGS__ \
)
@@ -497,23 +585,6 @@ teco_qreg_bufferinfo_get_string(teco_qreg_t *qreg, gchar **str, gsize *len, GErr
return TRUE;
}
-static gint
-teco_qreg_bufferinfo_get_character(teco_qreg_t *qreg, guint position, GError **error)
-{
- gsize max_len;
-
- if (!teco_qreg_bufferinfo_get_string(qreg, NULL, &max_len, error))
- return -1;
-
- if (position >= max_len) {
- g_set_error(error, TECO_ERROR, TECO_ERROR_RANGE,
- "Position %u out of range", position);
- return -1;
- }
-
- return (guchar)teco_ring_current->filename[position];
-}
-
/** @static @memberof teco_qreg_t */
teco_qreg_t *
teco_qreg_bufferinfo_new(void)
@@ -527,9 +598,11 @@ teco_qreg_bufferinfo_new(void)
.append_string = teco_qreg_bufferinfo_append_string,
.undo_append_string = teco_qreg_bufferinfo_undo_append_string,
.get_string = teco_qreg_bufferinfo_get_string,
- .get_character = teco_qreg_bufferinfo_get_character,
/* we don't want to inherit all the other stuff from TECO_INIT_QREG_EXTERNAL(). */
- .edit = teco_qreg_external_edit
+ .get_codepage = teco_qreg_external_get_codepage,
+ .edit = teco_qreg_external_edit,
+ .get_character = teco_qreg_external_get_character,
+ .get_length = teco_qreg_external_get_length
);
return teco_qreg_new(&vtable, "*", 1);
diff --git a/src/qreg.h b/src/qreg.h
index 4867dc2..7a150ea 100644
--- a/src/qreg.h
+++ b/src/qreg.h
@@ -47,13 +47,17 @@ typedef const struct {
gboolean (*undo_set_integer)(teco_qreg_t *qreg, GError **error);
gboolean (*get_integer)(teco_qreg_t *qreg, teco_int_t *ret, GError **error);
+ gint (*get_codepage)(teco_qreg_t *qreg);
gboolean (*set_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error);
gboolean (*undo_set_string)(teco_qreg_t *qreg, GError **error);
gboolean (*append_string)(teco_qreg_t *qreg, const gchar *str, gsize len, GError **error);
gboolean (*undo_append_string)(teco_qreg_t *qreg, GError **error);
gboolean (*get_string)(teco_qreg_t *qreg, gchar **str, gsize *len, GError **error);
- gint (*get_character)(teco_qreg_t *qreg, guint position, GError **error);
+ gboolean (*get_character)(teco_qreg_t *qreg, teco_int_t position,
+ teco_int_t *chr, GError **error);
+ /* always returns length in glyphs in contrast to get_string() */
+ teco_int_t (*get_length)(teco_qreg_t *qreg, GError **error);
/*
* These callbacks exist only to optimize teco_qreg_stack_push|pop()
diff --git a/src/view.c b/src/view.c
index 6fecdc0..4f959a3 100644
--- a/src/view.c
+++ b/src/view.c
@@ -112,18 +112,20 @@ teco_view_setup(teco_view_t *ctx)
teco_view_ssm(ctx, SCI_STYLESETBACK, STYLE_CALLTIP, 0xFFFFFF);
/*
- * Documents are UTF-8 by default and all UTF-8 documents
- * are expected to have a character index.
- */
- teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, SC_LINECHARACTERINDEX_UTF32, 0);
-
- /*
* Since we have patched out Scintilla's original SetRepresentations(),
* it no longer resets them on SCI_SETDOCPOINTER.
* Therefore it is sufficient for all kinds of views to initialize
* the representations only once.
*/
teco_view_set_representations(ctx);
+
+ /*
+ * Documents are UTF-8 by default and all UTF-8 documents
+ * are expected to have a character index.
+ * This is a property of the document, instead of the view.
+ */
+ teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
}
TECO_DEFINE_UNDO_CALL(teco_view_ssm, teco_view_t *, unsigned int, uptr_t, sptr_t);
@@ -455,3 +457,89 @@ teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError **error)
return TRUE;
}
+
+/**
+ * Convert a glyph index to a byte offset as used by Scintilla.
+ *
+ * This is optimized with the "line character index",
+ * which must always be enabled in UTF-8 documents.
+ *
+ * It is also used to validate glyph indexes.
+ *
+ * @param ctx The view to operate on.
+ * @param pos Position in glyphs/characters.
+ * @return Position in bytes or -1 if pos is out of bounds.
+ */
+gssize
+teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos)
+{
+ if (pos < 0)
+ return -1; /* invalid position */
+ if (!pos)
+ return 0;
+
+ if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos <= teco_view_ssm(ctx, SCI_GETLENGTH, 0, 0) ? pos : -1;
+
+ sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMINDEXPOSITION, pos,
+ SC_LINECHARACTERINDEX_UTF32);
+ sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0);
+ pos -= teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32);
+ return teco_view_ssm(ctx, SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1;
+}
+
+/**
+ * Convert byte offset to glyph/character index without bounds checking.
+ */
+teco_int_t
+teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos)
+{
+ if (!pos)
+ return 0;
+
+ if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos;
+
+ sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMPOSITION, pos, 0);
+ sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0);
+ return teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32) +
+ teco_view_ssm(ctx, SCI_COUNTCHARACTERS, line_bytes, pos);
+}
+
+#define TECO_RELATIVE_LIMIT 1024
+
+/**
+ * Convert a glyph index relative to a byte position to
+ * a byte position.
+ *
+ * Can be used to implement commands with relative character
+ * ranges.
+ * As an optimization, this always counts characters for deltas
+ * smaller than TECO_RELATIVE_LIMIT, so it will be fast
+ * even where the character-index based lookup is too slow
+ * (as on exceedingly long lines).
+ *
+ * @param ctx The view to operate on.
+ * @param pos Byte position to start.
+ * @param n Number of glyphs/characters to the left (negative) or
+ * right (positive) of pos.
+ * @return Position in bytes or -1 if the resulting position is out of bounds.
+ */
+gssize
+teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n)
+{
+ if (!n)
+ return pos;
+ if (ABS(n) > TECO_RELATIVE_LIMIT)
+ return teco_view_glyphs2bytes(ctx, teco_view_bytes2glyphs(ctx, pos) + n);
+
+ sptr_t res = teco_view_ssm(ctx, SCI_POSITIONRELATIVE, pos, n);
+ /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */
+ return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1;
+}
diff --git a/src/view.h b/src/view.h
index 15a09cd..a395dcf 100644
--- a/src/view.h
+++ b/src/view.h
@@ -70,3 +70,7 @@ gboolean teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError
/** @pure @memberof teco_view_t */
void teco_view_free(teco_view_t *ctx);
+
+gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos);
+teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos);
+gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n);