diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-08-29 01:56:50 +0200 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-09-09 18:22:21 +0200 |
commit | 7c592561af3bbbad2eaf865247811ba2bd590c2e (patch) | |
tree | 75b27cf40fb9ba8d646eb00e05be5f91d116b493 /src/interface.c | |
parent | c71ed30cf0c554d288edfe87842082cc9ec393a7 (diff) | |
download | sciteco-7c592561af3bbbad2eaf865247811ba2bd590c2e.tar.gz |
Glyph to byte offset mapping is now using the line character index (refs #5)
* This works reasonably well unless lines are exceedingly long
(as on a line we always count characters).
The following test case is still slow (on Unicode buffers):
10000<@I/XX/> <%a-1:J;>
While the following is now also fast:
10000<@I/X^J/> <%a-1:J;>
* Commands with relative character offsets (C, R, A, D) have
a special optimization where they always count characters beginning
at dot, as long as the argument is now exceedingly large.
This means they are fast even on exceedingly long lines.
* The remaining commands (search, EC/EG, Xq) now accept glyph indexes.
Diffstat (limited to 'src/interface.c')
-rw-r--r-- | src/interface.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/src/interface.c b/src/interface.c index 2e2d64e..e21cbb4 100644 --- a/src/interface.c +++ b/src/interface.c @@ -118,3 +118,87 @@ teco_interface_process_notify(SCNotification *notify) g_printf("SCINTILLA NOTIFY: code=%d\n", notify->nmhdr.code); #endif } + +/** + * Convert a glyph index to a byte offset as used by Scintilla. + * + * This is optimized with the "line character index", + * which must always be enabled in UTF-8 documents. + * + * It is also used to validate glyph indexes. + * + * @param pos Position in glyphs/characters. + * @return Position in bytes or -1 if pos is out of bounds. + */ +gssize +teco_glyphs2bytes(teco_int_t pos) +{ + if (pos < 0) + return -1; /* invalid position */ + if (!pos) + return 0; + + if (!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos <= teco_interface_ssm(SCI_GETLENGTH, 0, 0) ? pos : -1; + + sptr_t line = teco_interface_ssm(SCI_LINEFROMINDEXPOSITION, pos, + SC_LINECHARACTERINDEX_UTF32); + sptr_t line_bytes = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0); + pos -= teco_interface_ssm(SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32); + return teco_interface_ssm(SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1; +} + +/** + * Convert byte offset to glyph/character index without bounds checking. + */ +teco_int_t +teco_bytes2glyphs(gsize pos) +{ + if (!pos) + return 0; + + if (!(teco_interface_ssm(SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos; + + sptr_t line = teco_interface_ssm(SCI_LINEFROMPOSITION, pos, 0); + sptr_t line_bytes = teco_interface_ssm(SCI_POSITIONFROMLINE, line, 0); + return teco_interface_ssm(SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32) + + teco_interface_ssm(SCI_COUNTCHARACTERS, line_bytes, pos); +} + +#define TECO_RELATIVE_LIMIT 1024 + +/** + * Convert a glyph index relative to a byte position to + * a byte position. + * + * Can be used to implement commands with relative character + * ranges. + * As an optimization, this always counts characters for deltas + * smaller than TECO_RELATIVE_LIMIT, so it will be fast + * even where the character-index based lookup is too slow + * (as on exceedingly long lines). + * + * @param pos Byte position to start. + * @param n Number of glyphs/characters to the left (negative) or + * right (positive) of pos. + * @return Position in bytes or -1 if the resulting position is out of bounds. + */ +gssize +teco_glyphs2bytes_relative(gsize pos, teco_int_t n) +{ + if (!n) + return pos; + if (ABS(n) > TECO_RELATIVE_LIMIT) + return teco_glyphs2bytes(teco_bytes2glyphs(pos) + n); + + sptr_t res = teco_interface_ssm(SCI_POSITIONRELATIVE, pos, n); + /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */ + return res ? : n > 0 ? -1 : teco_bytes2glyphs(pos)+n >= 0 ? 0 : -1; +} |