diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-08-30 04:15:36 +0200 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-09-09 18:22:21 +0200 |
commit | 7507ad3e1816f3bc9004dceb39bb303804287438 (patch) | |
tree | 3c4b2746b56a0538564dbf0416745012b51228fa /src/view.c | |
parent | 90bad24f96deeaaa2255f0ad89ece21d5397b78b (diff) | |
download | sciteco-7507ad3e1816f3bc9004dceb39bb303804287438.tar.gz |
Unicode support for the Q-Register commands (refs #5)
* this required adding several Q-Register vtable methods
* it should still be investigated whether the repeated calling of
SCI_ALLOCATELINECHARACTERINDEX causes any overhead.
Diffstat (limited to 'src/view.c')
-rw-r--r-- | src/view.c | 100 |
1 files changed, 94 insertions, 6 deletions
@@ -112,18 +112,20 @@ teco_view_setup(teco_view_t *ctx) teco_view_ssm(ctx, SCI_STYLESETBACK, STYLE_CALLTIP, 0xFFFFFF); /* - * Documents are UTF-8 by default and all UTF-8 documents - * are expected to have a character index. - */ - teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, SC_LINECHARACTERINDEX_UTF32, 0); - - /* * Since we have patched out Scintilla's original SetRepresentations(), * it no longer resets them on SCI_SETDOCPOINTER. * Therefore it is sufficient for all kinds of views to initialize * the representations only once. */ teco_view_set_representations(ctx); + + /* + * Documents are UTF-8 by default and all UTF-8 documents + * are expected to have a character index. + * This is a property of the document, instead of the view. + */ + teco_view_ssm(ctx, SCI_ALLOCATELINECHARACTERINDEX, + SC_LINECHARACTERINDEX_UTF32, 0); } TECO_DEFINE_UNDO_CALL(teco_view_ssm, teco_view_t *, unsigned int, uptr_t, sptr_t); @@ -455,3 +457,89 @@ teco_view_save_to_file(teco_view_t *ctx, const gchar *filename, GError **error) return TRUE; } + +/** + * Convert a glyph index to a byte offset as used by Scintilla. + * + * This is optimized with the "line character index", + * which must always be enabled in UTF-8 documents. + * + * It is also used to validate glyph indexes. + * + * @param ctx The view to operate on. + * @param pos Position in glyphs/characters. + * @return Position in bytes or -1 if pos is out of bounds. + */ +gssize +teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos) +{ + if (pos < 0) + return -1; /* invalid position */ + if (!pos) + return 0; + + if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos <= teco_view_ssm(ctx, SCI_GETLENGTH, 0, 0) ? pos : -1; + + sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMINDEXPOSITION, pos, + SC_LINECHARACTERINDEX_UTF32); + sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0); + pos -= teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32); + return teco_view_ssm(ctx, SCI_POSITIONRELATIVE, line_bytes, pos) ? : -1; +} + +/** + * Convert byte offset to glyph/character index without bounds checking. + */ +teco_int_t +teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos) +{ + if (!pos) + return 0; + + if (!(teco_view_ssm(ctx, SCI_GETLINECHARACTERINDEX, 0, 0) & + SC_LINECHARACTERINDEX_UTF32)) + /* assume single-byte encoding */ + return pos; + + sptr_t line = teco_view_ssm(ctx, SCI_LINEFROMPOSITION, pos, 0); + sptr_t line_bytes = teco_view_ssm(ctx, SCI_POSITIONFROMLINE, line, 0); + return teco_view_ssm(ctx, SCI_INDEXPOSITIONFROMLINE, line, + SC_LINECHARACTERINDEX_UTF32) + + teco_view_ssm(ctx, SCI_COUNTCHARACTERS, line_bytes, pos); +} + +#define TECO_RELATIVE_LIMIT 1024 + +/** + * Convert a glyph index relative to a byte position to + * a byte position. + * + * Can be used to implement commands with relative character + * ranges. + * As an optimization, this always counts characters for deltas + * smaller than TECO_RELATIVE_LIMIT, so it will be fast + * even where the character-index based lookup is too slow + * (as on exceedingly long lines). + * + * @param ctx The view to operate on. + * @param pos Byte position to start. + * @param n Number of glyphs/characters to the left (negative) or + * right (positive) of pos. + * @return Position in bytes or -1 if the resulting position is out of bounds. + */ +gssize +teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n) +{ + if (!n) + return pos; + if (ABS(n) > TECO_RELATIVE_LIMIT) + return teco_view_glyphs2bytes(ctx, teco_view_bytes2glyphs(ctx, pos) + n); + + sptr_t res = teco_view_ssm(ctx, SCI_POSITIONRELATIVE, pos, n); + /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */ + return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1; +} |