From abfbeb17e56bd9abc275de0f7ace6c197e00e3bf Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Mon, 21 Oct 2024 02:10:44 +0300 Subject: fixed EOL conversion on UTF-8 texts * The old bug of saving gchar in gints, so teco_eol_reader_t::last_char could become negative. * When converting from an UTF-8 text with CRLF linebreaks, we could have data loss and corruptions. * On strings ending in UTF-8 characters, teco_eol_reader_t::offset would overflow, resulting in invalid reads and potentially insertion of data garbage. I observed this with G~ on Gtk. * Test cased updated. Couldn't reproduce the bug with the test suite, though. --- src/eol.c | 2 +- tests/autoeol-input.txt | 6 +++--- tests/autoeol-output.txt | 4 ++-- tests/testsuite.at | 2 ++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/eol.c b/src/eol.c index 0063bbd..3439869 100644 --- a/src/eol.c +++ b/src/eol.c @@ -239,7 +239,7 @@ teco_eol_reader_convert(teco_eol_reader_t *ctx, gchar **ret, gsize *data_len, GE else if (ctx->eol_style != SC_EOL_CR) ctx->eol_style_inconsistent = TRUE; } - ctx->last_char = buffer[i]; + ctx->last_char = (guchar)buffer[i]; break; } } diff --git a/tests/autoeol-input.txt b/tests/autoeol-input.txt index 237e7d1..705845d 100644 --- a/tests/autoeol-input.txt +++ b/tests/autoeol-input.txt @@ -1,4 +1,4 @@ -Hello -world FOO -BAR \ No newline at end of file +BAR +Здравствуй +мир \ No newline at end of file diff --git a/tests/autoeol-output.txt b/tests/autoeol-output.txt index 5fb4c8e..9c1ef29 100644 --- a/tests/autoeol-output.txt +++ b/tests/autoeol-output.txt @@ -1,4 +1,4 @@ -Hello -world FOO BAR +Здравствуй +мир \ No newline at end of file diff --git a/tests/testsuite.at b/tests/testsuite.at index 7769cb7..729b3c8 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -134,6 +134,8 @@ AT_SETUP([Automatic EOL normalization]) AT_CHECK([$SCITECO -e "@EB'${srcdir}/autoeol-input.txt' EL-2\"N(0/0)' 2LR 13@I'' 0EL @EW'autoeol-sciteco.txt'"], 0, ignore, ignore) AT_CHECK([cmp autoeol-sciteco.txt ${srcdir}/autoeol-output.txt], 0, ignore, ignore) +AT_CHECK([$SCITECO -e "@EB'autoeol-sciteco.txt' EL-0\"N(0/0)' 2EL @EW''"], 0, ignore, ignore) +AT_CHECK([cmp autoeol-sciteco.txt ${srcdir}/autoeol-input.txt], 0, ignore, ignore) AT_CLEANUP AT_SETUP([Memory limiting]) -- cgit v1.2.3