diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-08-31 23:01:42 +0200 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2024-09-09 18:22:21 +0200 |
commit | b146ccd7a7513b910affd86848c704d600df3090 (patch) | |
tree | 5c9e2f65d43aee57c8fc0e9e2616754781857276 /src | |
parent | db5ab171995bded490c65ed299c9ff066c41c413 (diff) | |
download | sciteco-b146ccd7a7513b910affd86848c704d600df3090.tar.gz |
avoid redunancies between teco_qreg_plain_get_character() and teco_state_start_get() (refs #5)
Diffstat (limited to 'src')
-rw-r--r-- | src/core-commands.c | 27 | ||||
-rw-r--r-- | src/interface.h | 6 | ||||
-rw-r--r-- | src/qreg-commands.c | 6 | ||||
-rw-r--r-- | src/qreg.c | 21 | ||||
-rw-r--r-- | src/view.c | 40 | ||||
-rw-r--r-- | src/view.h | 2 |
6 files changed, 54 insertions, 48 deletions
diff --git a/src/core-commands.c b/src/core-commands.c index 951e001..98097bb 100644 --- a/src/core-commands.c +++ b/src/core-commands.c @@ -1045,32 +1045,7 @@ teco_state_start_get(teco_machine_main_t *ctx, GError **error) return; } - teco_int_t ret; - - if (teco_interface_ssm(SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) { - gchar buf[4+1]; - struct Sci_TextRangeFull range = { - .chrg = {get_pos, MIN(len, get_pos+sizeof(buf)-1)}, - .lpstrText = buf - }; - /* - * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION - * or repeatedly calling SCI_GETCHARAT. - */ - teco_interface_ssm(SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); - /* - * Make sure that the -1/-2 error values are preserved. - * The sign bit in UCS-4/UTF-32 is unused, so this will even - * suffice if TECO_INTEGER == 32. - */ - ret = (gint32)g_utf8_get_char_validated(buf, -1); - } else { - // FIXME: Everything else is a single-byte encoding? - /* internally, the character is casted to signed char */ - ret = (guchar)teco_interface_ssm(SCI_GETCHARAT, get_pos, 0); - } - - teco_expressions_push(ret); + teco_expressions_push(teco_interface_get_character(get_pos, len)); } static teco_state_t * diff --git a/src/interface.h b/src/interface.h index c975525..cbe10bd 100644 --- a/src/interface.h +++ b/src/interface.h @@ -172,6 +172,12 @@ teco_glyphs2bytes_relative(gsize pos, teco_int_t n) return teco_view_glyphs2bytes_relative(teco_interface_current_view, pos, n); } +static inline teco_int_t +teco_interface_get_character(gsize pos, gsize len) +{ + return teco_view_get_character(teco_interface_current_view, pos, len); +} + /* * The following functions are here for lack of a better place. * They could also be in sciteco.h, but only if declared as non-inline diff --git a/src/qreg-commands.c b/src/qreg-commands.c index d7bfafe..09b2b90 100644 --- a/src/qreg-commands.c +++ b/src/qreg-commands.c @@ -310,8 +310,10 @@ teco_state_queryqreg_got_register(teco_machine_main_t *ctx, teco_qreg_t *qreg, * Positions are handled like buffer positions \(em they * begin at 0 up to the length of the string minus 1. * An error is thrown for invalid positions. - * If <q> is Unicode-encoded, -1 or -2 could be returned for - * invalid byte sequences. + * If <q> is encoded as UTF-8 and there is + * an incomplete sequence at the requested position, + * -1 is returned. + * All other invalid Unicode sequences are returned as -2. * Both non-colon-modified forms of Q require register <q> * to be defined and fail otherwise. * @@ -284,27 +284,8 @@ teco_qreg_plain_get_character(teco_qreg_t *qreg, teco_int_t position, "Position %" TECO_INT_FORMAT " out of range", position); ret = FALSE; /* make sure we still restore the current Q-Register */ - } else if (teco_view_ssm(teco_qreg_view, SCI_GETCODEPAGE, 0, 0) == SC_CP_UTF8) { - gchar buf[4+1]; - struct Sci_TextRangeFull range = { - .chrg = {off, MIN(len, off+sizeof(buf)-1)}, - .lpstrText = buf - }; - /* - * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION - * or repeatedly calling SCI_GETCHARAT. - */ - teco_view_ssm(teco_qreg_view, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); - /* - * Make sure that the -1/-2 error values are preserved. - * The sign bit in UCS-4/UTF-32 is unused, so this will even - * suffice if TECO_INTEGER == 32. - */ - *chr = (gint32)g_utf8_get_char_validated(buf, -1); } else { - // FIXME: Everything else is a single-byte encoding? - /* internally, the character is casted to signed char */ - *chr = (guchar)teco_view_ssm(teco_qreg_view, SCI_GETCHARAT, off, 0); + *chr = teco_view_get_character(teco_qreg_view, off, len); } if (teco_qreg_current) @@ -543,3 +543,43 @@ teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n) /* SCI_POSITIONRELATIVE may return 0 even if the offset is valid */ return res ? : n > 0 ? -1 : teco_view_bytes2glyphs(ctx, pos)+n >= 0 ? 0 : -1; } + +/** + * Get codepoint at given byte offset. + * + * @param ctx The view to operate on. + * @param pos The glyph's byte position + * @param len The length of the document in bytes + * @return The requested codepoint. + * In UTF-8 encoded documents, this might be -1 (incomplete sequence) + * or -2 (invalid byte sequence). + */ +teco_int_t +teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len) +{ + if (teco_view_ssm(ctx, SCI_GETCODEPAGE, 0, 0) != SC_CP_UTF8) + /* + * We don't support the asiatic multi-byte encodings, + * so everything else is single-byte codepages. + * NOTE: Internally, the character is casted to signed char + * and may therefore become negative. + */ + return (guchar)teco_view_ssm(ctx, SCI_GETCHARAT, pos, 0); + + gchar buf[4+1]; + struct Sci_TextRangeFull range = { + .chrg = {pos, MIN(len, pos+sizeof(buf)-1)}, + .lpstrText = buf + }; + /* + * Probably faster than SCI_GETRANGEPOINTER+SCI_GETGAPPOSITION + * or repeatedly calling SCI_GETCHARAT. + */ + teco_view_ssm(ctx, SCI_GETTEXTRANGEFULL, 0, (sptr_t)&range); + /* + * Make sure that the -1/-2 error values are preserved. + * The sign bit in UCS-4/UTF-32 is unused, so this will even + * suffice if TECO_INTEGER == 32. + */ + return (gint32)g_utf8_get_char_validated(buf, -1); +} @@ -74,3 +74,5 @@ void teco_view_free(teco_view_t *ctx); gssize teco_view_glyphs2bytes(teco_view_t *ctx, teco_int_t pos); teco_int_t teco_view_bytes2glyphs(teco_view_t *ctx, gsize pos); gssize teco_view_glyphs2bytes_relative(teco_view_t *ctx, gsize pos, teco_int_t n); + +teco_int_t teco_view_get_character(teco_view_t *ctx, gsize pos, gsize len); |