aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/view.c
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-08-31 23:01:42 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-09 18:22:21 +0200
commitb146ccd7a7513b910affd86848c704d600df3090 (patch)
tree5c9e2f65d43aee57c8fc0e9e2616754781857276 /src/view.c
parentdb5ab171995bded490c65ed299c9ff066c41c413 (diff)
downloadsciteco-b146ccd7a7513b910affd86848c704d600df3090.tar.gz
avoid redunancies between teco_qreg_plain_get_character() and teco_state_start_get() (refs #5)
Diffstat (limited to 'src/view.c')
-rw-r--r--src/view.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/src/view.c b/src/view.c
index 4f959a3..291c06b 100644
--- a/src/view.c
+++ b/src/view.c
@@ -543,3 +543,43 @@ teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n)
/* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */
return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1;
}
+
+/**
+ * Get codepoint at given byte offset.
+ *
+ * @param ctx The view to operate on.
+ * @param pos The glyph's byte position
+ * @param len The length of the document in bytes
+ * @return The requested codepoint.
+ * In UTF-8 encoded documents, this might be -1 (incomplete sequence)
+ * or -2 (invalid byte sequence).
+ */
+teco_int_t
+teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len)
+{
+ if (teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) != SC_CP_UTF8)
+ /*
+ * We don't support the asiatic multi-byte encodings,
+ * so everything else is single-byte codepages.
+ * NOTE: Internally, the character is casted to signed char
+ * and may therefore become negative.
+ */
+ return (guchar)teco_view_ssm(ctx, SCI_GETCHARAT, pos, 0);
+
+ gchar buf[4+1];
+ struct Sci_TextRangeFull range = {
+ .chrg = {pos, MIN(len, pos+sizeof(buf)-1)},
+ .lpstrText = buf
+ };
+ /*
+ * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION
+ * or repeatedly calling SCI_GETCHARAT.
+ */
+ teco_view_ssm(ctx, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range);
+ /*
+ * Make sure that the -1/-2 error values are preserved.
+ * The sign bit in UCS-4/UTF-32 is unused, so this will even
+ * suffice if TECO_INTEGER == 32.
+ */
+ return (gint32)g_utf8_get_char_validated(buf, -1);
+}