aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/interface.c
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-08-29 01:56:50 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-09 18:22:21 +0200
commit7c592561af3bbbad2eaf865247811ba2bd590c2e (patch)
tree75b27cf40fb9ba8d646eb00e05be5f91d116b493 /src/interface.c
parentc71ed30cf0c554d288edfe87842082cc9ec393a7 (diff)
downloadsciteco-7c592561af3bbbad2eaf865247811ba2bd590c2e.tar.gz
Glyph to byte offset mapping is now using the line character index (refs #5)
* This works reasonably well unless lines are exceedingly long (as on a line we always count characters). The following test case is still slow (on Unicode buffers): 10000<@I/XX/> <%a-1:J;> While the following is now also fast: 10000<@I/X^J/> <%a-1:J;> * Commands with relative character offsets (C, R, A, D) have a special optimization where they always count characters beginning at dot, as long as the argument is now exceedingly large. This means they are fast even on exceedingly long lines. * The remaining commands (search, EC/EG, Xq) now accept glyph indexes.
Diffstat (limited to 'src/interface.c')
-rw-r--r--src/interface.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/src/interface.c b/src/interface.c
index 2e2d64e..e21cbb4 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -118,3 +118,87 @@ teco_interface_process_notify(SCNotification *notify)
g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code);
#endif
}
+
+/**
+ * Convert a glyph index to a byte offset as used by Scintilla.
+ *
+ * This is optimized with the "line character index",
+ * which must always be enabled in UTF-8 documents.
+ *
+ * It is also used to validate glyph indexes.
+ *
+ * @param pos Position in glyphs/characters.
+ * @return Position in bytes or -1 if pos is out of bounds.
+ */
+gssize
+teco_glyphs2bytes(teco_int_t pos)
+{
+ if (pos < 0)
+ return -1; /* invalid position */
+ if (!pos)
+ return 0;
+
+ if (!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos <= teco_interface_ssm(SCI_GETLENGTH, 0, 0) ? pos : -1;
+
+ sptr_t line = teco_interface_ssm(SCI_LINEFROMINDEXPOSITION, pos,
+ SC_LINECHARACTERINDEX_UTF32);
+ sptr_t line_bytes = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0);
+ pos -= teco_interface_ssm(SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32);
+ return teco_interface_ssm(SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1;
+}
+
+/**
+ * Convert byte offset to glyph/character index without bounds checking.
+ */
+teco_int_t
+teco_bytes2glyphs(gsize pos)
+{
+ if (!pos)
+ return 0;
+
+ if (!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) &
+ SC_LINECHARACTERINDEX_UTF32))
+ /* assume single-byte encoding */
+ return pos;
+
+ sptr_t line = teco_interface_ssm(SCI_LINEFROMPOSITION, pos, 0);
+ sptr_t line_bytes = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0);
+ return teco_interface_ssm(SCI_INDEXPOSITIONFROMLINE, line,
+ SC_LINECHARACTERINDEX_UTF32) +
+ teco_interface_ssm(SCI_COUNTCHARACTERS, line_bytes, pos);
+}
+
+#define TECO_RELATIVE_LIMIT 1024
+
+/**
+ * Convert a glyph index relative to a byte position to
+ * a byte position.
+ *
+ * Can be used to implement commands with relative character
+ * ranges.
+ * As an optimization, this always counts characters for deltas
+ * smaller than TECO_RELATIVE_LIMIT, so it will be fast
+ * even where the character-index based lookup is too slow
+ * (as on exceedingly long lines).
+ *
+ * @param pos Byte position to start.
+ * @param n Number of glyphs/characters to the left (negative) or
+ * right (positive) of pos.
+ * @return Position in bytes or -1 if the resulting position is out of bounds.
+ */
+gssize
+teco_glyphs2bytes_relative(gsize pos, teco_int_t n)
+{
+ if (!n)
+ return pos;
+ if (ABS(n) > TECO_RELATIVE_LIMIT)
+ return teco_glyphs2bytes(teco_bytes2glyphs(pos) + n);
+
+ sptr_t res = teco_interface_ssm(SCI_POSITIONRELATIVE, pos, n);
+ /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */
+ return res ? : n > 0 ? -1 : teco_bytes2glyphs(pos)+n >= 0 ? 0 : -1;
+}