aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/view.c
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-08-30 04:15:36 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-09 18:22:21 +0200
commit7507ad3e1816f3bc9004dceb39bb303804287438 (patch)
tree3c4b2746b56a0538564dbf0416745012b51228fa /src/view.c
parent90bad24f96deeaaa2255f0ad89ece21d5397b78b (diff)
downloadsciteco-7507ad3e1816f3bc9004dceb39bb303804287438.tar.gz
Unicode support for the Q-Register commands (refs #5)
* this required adding several Q-Register vtable methods * it should still be investigated whether the repeated calling of SCI_ALLOCATELINECHARACTERINDEX causes any overhead.
Diffstat (limited to 'src/view.c')
-rw-r--r--src/view.c100
1 files changed, 94 insertions, 6 deletions
diff --git a/src/view.c b/src/view.c
index 6fecdc0..4f959a3 100644
--- a/src/view.c
+++ b/src/view.c
@@ -112,18 +112,20 @@ teco_view_setup(teco_view_t *ctx)
teco_view_ssm(ctx, SCI_STYLESETBACK, STYLE_CALLTIP, 0xFFFFFF);
/*
- * Documents are UTF-8 by default and all UTF-8 documents
- * are expected to have a character index.
- */
- teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, SC_LINECHARACTERINDEX_UTF32, 0);
-
- /*
* Since we have patched out Scintilla's original SetRepresentations(),
* it no longer resets them on SCI_SETDOCPOINTER.
* Therefore it is sufficient for all kinds of views to initialize
* the representations only once.
*/
teco_view_set_representations(ctx);
+
+ /*
+ * Documents are UTF-8 by default and all UTF-8 documents
+ * are expected to have a character index.
+ * This is a property of the document, instead of the view.
+ */
+ teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
}
TECO_DEFINE_UNDO_CALL(teco_view_ssm, teco_view_t *, unsigned int, uptr_t, sptr_t);
@@ -455,3 +457,89 @@ teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError **error)
return TRUE;
}
+
+/**
+ * Convert a glyph index to a byte offset as used by Scintilla.
+ *
+ * This is optimized with the "line character index",
+ * which must always be enabled in UTF-8 documents.
+ *
+ * It is also used to validate glyph indexes.
+ *
+ * @param ctx The view to operate on.
+ * @param pos Position in glyphs/characters.
+ * @return Position in bytes or -1 if pos is out of bounds.
+ */
+gssize
+teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos)
+{
+ if (pos < 0)
+ return -1; /* invalid position */
+ if (!pos)
+ return 0;
+
+ if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos <= teco_view_ssm(ctx, SCI_GETLENGTH, 0, 0) ? pos : -1;
+
+ sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMINDEXPOSITION, pos,
+ SC_LINECHARACTERINDEX_UTF32);
+ sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0);
+ pos -= teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32);
+ return teco_view_ssm(ctx, SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1;
+}
+
+/**
+ * Convert byte offset to glyph/character index without bounds checking.
+ */
+teco_int_t
+teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos)
+{
+ if (!pos)
+ return 0;
+
+ if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos;
+
+ sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMPOSITION, pos, 0);
+ sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0);
+ return teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32) +
+ teco_view_ssm(ctx, SCI_COUNTCHARACTERS, line_bytes, pos);
+}
+
+#define TECO_RELATIVE_LIMIT 1024
+
+/**
+ * Convert a glyph index relative to a byte position to
+ * a byte position.
+ *
+ * Can be used to implement commands with relative character
+ * ranges.
+ * As an optimization, this always counts characters for deltas
+ * smaller than TECO_RELATIVE_LIMIT, so it will be fast
+ * even where the character-index based lookup is too slow
+ * (as on exceedingly long lines).
+ *
+ * @param ctx The view to operate on.
+ * @param pos Byte position to start.
+ * @param n Number of glyphs/characters to the left (negative) or
+ * right (positive) of pos.
+ * @return Position in bytes or -1 if the resulting position is out of bounds.
+ */
+gssize
+teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n)
+{
+ if (!n)
+ return pos;
+ if (ABS(n) > TECO_RELATIVE_LIMIT)
+ return teco_view_glyphs2bytes(ctx, teco_view_bytes2glyphs(ctx, pos) + n);
+
+ sptr_t res = teco_view_ssm(ctx, SCI_POSITIONRELATIVE, pos, n);
+ /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */
+ return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1;
+}