From 4038aa23ab80bf52e98ffe69442ccd5e0cf79a89 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Thu, 23 Mar 2017 09:02:34 +0100 Subject: fixed and optimized piping very large buffers via EC/EG * test case: HECcat$ on a large buffer (>= 64kb) truncates the buffer or repeats its beginning * it turns out that the incremental writing to the process' stdin was broken. We were always writing data from the beginning of the buffer which fails if the stdin watcher must be activated more than once. * Also, EOLWriter::convert() can validly return 0, even if bytes have been written on the data sink, so this value cannot be used to check whether the process has closed its stdin. We now make sure that the entire buffer range is written to stdin. * Piping large buffers no longer removes the buffer gap. This makes little difference when filtering via EC since it will change the buffer gap anyway. Can make a huge difference when not touching the buffer, though (e.g. HEGAcat$). * I did not add a test suite case since that requires a very large test file and it cannot be easily generated automatically. --- src/eol.cpp | 3 +-- src/spawn.cpp | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/eol.cpp b/src/eol.cpp index 85c69ba..de503e5 100644 --- a/src/eol.cpp +++ b/src/eol.cpp @@ -249,8 +249,7 @@ EOLReaderMem::convert_all(gsize *out_len) * * @param buffer The buffer to convert. * @parem buffer_len The length of the data in buffer. - * @return The number of bytes written to the data sink, - * i.e. the size of the EOL-normalized data written. + * @return The number of bytes consumed/converted from buffer. */ gsize EOLWriter::convert(const gchar *buffer, gsize buffer_len) diff --git a/src/spawn.cpp b/src/spawn.cpp index 0d6d18a..b3a8823 100644 --- a/src/spawn.cpp +++ b/src/spawn.cpp @@ -326,11 +326,6 @@ StateExecuteCommand::initial(void) } } -/* - * FIXME: `xclip -selection clipboard -in` hangs -- the - * stdout watcher is never activated! - * Workaround is to pipe to /dev/null - */ State * StateExecuteCommand::done(const gchar *str) { @@ -567,28 +562,37 @@ stdin_watch_cb(GIOChannel *chan, GIOCondition condition, gpointer data) StateExecuteCommand::Context &ctx = *(StateExecuteCommand::Context *)data; + sptr_t gap; + gsize convert_len; const gchar *buffer; gsize bytes_written; + if (!(condition & G_IO_OUT)) + /* stdin might be closed prematurely */ + goto remove; + /* we always read from the current view */ + gap = interface.ssm(SCI_GETGAPPOSITION); + convert_len = ctx.start < gap && gap < ctx.to + ? gap - ctx.start : ctx.to - ctx.start; buffer = (const gchar *)interface.ssm(SCI_GETRANGEPOINTER, - ctx.from, (sptr_t)(ctx.to - ctx.start)); + ctx.start, convert_len); try { /* - * This cares about automatic EOL conversion + * This cares about automatic EOL conversion and + * returns the number of consumed bytes. + * If it can only write a part of the EOL sequence (ie. CR of CRLF) + * it may return a short byte count (possibly 0) which ensures that + * we do not yet remove the source. */ - bytes_written = ctx.stdin_writer->convert(buffer, ctx.to - ctx.start); + bytes_written = ctx.stdin_writer->convert(buffer, convert_len); } catch (Error &e) { ctx.error = new Error(e); /* do not yet quit -- we still have to reap the child */ goto remove; } - if (bytes_written == 0) - /* EOF: process closed stdin preliminarily? */ - goto remove; - ctx.start += bytes_written; if (ctx.start == ctx.to) -- cgit v1.2.3