aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/view.c
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-17 12:48:41 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-12-17 12:48:41 +0300
commit715d7e151eea9126ead42834cf9f9a05a7226597 (patch)
tree1c68f8ec53f972ba9bfc33e141255df4912f3e62 /src/view.c
parent9cd8d404d246774f53b6f5d318ccbd4bd82a922d (diff)
downloadsciteco-715d7e151eea9126ead42834cf9f9a05a7226597.tar.gz
sped up opening very large UTF-8 files by temporarily disabling the line-character index
* checks for character consistency (of UTF-8 byte sequences) were slowing down things significantly in Scintilla * It got even worse if the file indeed contained non-ANSI codepoints as reading in chunks of 1024 would sometimes mean that incomplete byte sequences would be read. Some large 160mb test files wouldn't load even after minutes. They now load in seconds. * This does NOT yet solve the slowdowns when operating on very long lines.
Diffstat (limited to 'src/view.c')
-rw-r--r--src/view.c44
1 files changed, 33 insertions, 11 deletions
diff --git a/src/view.c b/src/view.c
index 836ffdc..1f09ce6 100644
--- a/src/view.c
+++ b/src/view.c
@@ -206,9 +206,24 @@ teco_view_set_representations(teco_view_t *ctx)
gboolean
teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **error)
{
+ gboolean ret = TRUE;
+
g_auto(teco_eol_reader_t) reader;
teco_eol_reader_init_gio(&reader, channel);
+ /*
+ * Temporarily disable the line character index.
+ * This tremendously speeds up reading UTF-8 documents.
+ * The reason is, that UTF-8 consistency checks are rather
+ * costly. Also, when reading in chunks of 1024 bytes,
+ * we can very well add incomplete UTF-8 sequences,
+ * resulting in unnecessary recalculations of the line index.
+ */
+ guint cp = teco_view_get_codepage(ctx);
+ if (cp == SC_CP_UTF8)
+ teco_interface_ssm(SCI_RELEASELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+
teco_view_ssm(ctx, SCI_BEGINUNDOACTION, 0, 0);
teco_view_ssm(ctx, SCI_CLEARALL, 0, 0);
@@ -222,8 +237,9 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
struct stat stat_buf = {.st_size = 0};
if (!fstat(g_io_channel_unix_get_fd(channel), &stat_buf) &&
stat_buf.st_size > 0) {
- if (!teco_memory_check(stat_buf.st_size, error))
- goto error;
+ ret = teco_memory_check(stat_buf.st_size, error);
+ if (!ret)
+ goto cleanup;
teco_view_ssm(ctx, SCI_ALLOCATE, stat_buf.st_size, 0);
}
@@ -235,8 +251,10 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
teco_string_t str;
GIOStatus rc = teco_eol_reader_convert(&reader, &str.data, &str.len, error);
- if (rc == G_IO_STATUS_ERROR)
- goto error;
+ if (rc == G_IO_STATUS_ERROR) {
+ ret = FALSE;
+ goto cleanup;
+ }
if (rc == G_IO_STATUS_EOF)
break;
@@ -246,12 +264,14 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
* Even if we checked initially, knowing the file size,
* Scintilla could allocate much more bytes.
*/
- if (!teco_memory_check(0, error))
- goto error;
+ ret = teco_memory_check(0, error);
+ if (!ret)
+ goto cleanup;
if (G_UNLIKELY(teco_interface_is_interrupted())) {
teco_error_interrupted_set(error);
- goto error;
+ ret = FALSE;
+ goto cleanup;
}
}
@@ -272,12 +292,14 @@ teco_view_load_from_channel(teco_view_t *ctx, GIOChannel *channel, GError **erro
teco_interface_msg(TECO_MSG_WARNING,
"Inconsistent EOL styles normalized");
+cleanup:
teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0);
- return TRUE;
-error:
- teco_view_ssm(ctx, SCI_ENDUNDOACTION, 0, 0);
- return FALSE;
+ if (cp == SC_CP_UTF8)
+ teco_interface_ssm(SCI_ALLOCATELINECHARACTERINDEX,
+ SC_LINECHARACTERINDEX_UTF32, 0);
+
+ return ret;
}
/**