diff options
Diffstat (limited to 'src/eol.cpp')
-rw-r--r-- | src/eol.cpp | 359 |
1 files changed, 0 insertions, 359 deletions
diff --git a/src/eol.cpp b/src/eol.cpp deleted file mode 100644 index 2dea3ef..0000000 --- a/src/eol.cpp +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (C) 2012-2017 Robin Haberkorn - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include <glib.h> - -#include "sciteco.h" -#include "error.h" -#include "eol.h" - -namespace SciTECO { - -/** - * Read data with automatic EOL translation. - * - * This gets the next data block from the converter - * implementation, performs EOL translation (if enabled) - * in a more or less efficient manner and returns - * a chunk of EOL-normalized data. - * - * Since the underlying data source may have to be - * queried repeatedly and because EOLReader avoids - * reassembling the EOL-normalized data by returning - * references into the modified data source, it is - * necessary to call this function repeatedly until - * it returns NULL. - * - * Errors reading the data source are propagated - * (as exceptions). - * - * @param data_len The length of the data chunk returned - * by this function. Set on return. - * @return A pointer to a chunk of EOL-normalized - * data of length data_len. - * It is NOT null-terminated. - * NULL is returned when all data has been converted. - */ -const gchar * -EOLReader::convert(gsize &data_len) -{ - if (last_char < 0) { - /* a CRLF was last translated */ - block_len++; - last_char = '\n'; - } - offset += block_len; - - if (offset == read_len) { - offset = 0; - - /* - * NOTE: This throws in case of errors - */ - if (!this->read(buffer, read_len)) { - /* EOF */ - if (last_char == '\r') { - /* - * Very last character read is CR. - * If this is the only EOL so far, the - * EOL style is MAC. - * This is also executed if auto-eol is disabled - * but it doesn't hurt. - */ - if (eol_style < 0) - eol_style = SC_EOL_CR; - else if (eol_style != SC_EOL_CR) - eol_style_inconsistent = TRUE; - } - - return NULL; - } - - if (!(Flags::ed & Flags::ED_AUTOEOL)) { - /* - * No EOL translation - always return entire - * buffer - */ - data_len = block_len = read_len; - return buffer; - } - } - - /* - * Return data with automatic EOL translation. - * Every EOL sequence is normalized to LF and - * the first sequence determines the documents - * EOL style. - * This loop is executed for every byte of the - * file/stream, so it was important to optimize - * it. Specifically, the number of returns - * is minimized by keeping a pointer to - * the beginning of a block of data in the buffer - * which already has LFs (offset). - * Mac EOLs can be converted to UNIX EOLs directly - * in the buffer. - * So if their EOLs are consistent, the function - * will return one block for the entire buffer. - * When reading a file with DOS EOLs, there will - * be one call per line which is significantly slower. - */ - for (guint i = offset; i < read_len; i++) { - switch (buffer[i]) { - case '\n': - if (last_char == '\r') { - if (eol_style < 0) - eol_style = SC_EOL_CRLF; - else if (eol_style != SC_EOL_CRLF) - eol_style_inconsistent = TRUE; - - /* - * Return block. CR has already - * been made LF in `buffer`. - */ - data_len = block_len = i-offset; - /* next call will skip the CR */ - last_char = -1; - return buffer + offset; - } - - if (eol_style < 0) - eol_style = SC_EOL_LF; - else if (eol_style != SC_EOL_LF) - eol_style_inconsistent = TRUE; - /* - * No conversion necessary and no need to - * return block yet. - */ - last_char = '\n'; - break; - - case '\r': - if (last_char == '\r') { - if (eol_style < 0) - eol_style = SC_EOL_CR; - else if (eol_style != SC_EOL_CR) - eol_style_inconsistent = TRUE; - } - - /* - * Convert CR to LF in `buffer`. - * This way more than one line using - * Mac EOLs can be returned at once. - */ - buffer[i] = '\n'; - last_char = '\r'; - break; - - default: - if (last_char == '\r') { - if (eol_style < 0) - eol_style = SC_EOL_CR; - else if (eol_style != SC_EOL_CR) - eol_style_inconsistent = TRUE; - } - last_char = buffer[i]; - break; - } - } - - /* - * Return remaining block. - * With UNIX/MAC EOLs, this will usually be the - * entire `buffer` - */ - data_len = block_len = read_len-offset; - return buffer + offset; -} - -bool -EOLReaderGIO::read(gchar *buffer, gsize &read_len) -{ - GError *error = NULL; - - switch (g_io_channel_read_chars(channel, buffer, - sizeof(EOLReaderGIO::buffer), - &read_len, &error)) { - case G_IO_STATUS_ERROR: - throw GlibError(error); - case G_IO_STATUS_EOF: - return false; - case G_IO_STATUS_NORMAL: - case G_IO_STATUS_AGAIN: - break; - } - - return true; -} - -bool -EOLReaderMem::read(gchar *buffer, gsize &read_len) -{ - read_len = buffer_len; - buffer_len = 0; - /* - * On the first call, returns true, - * later false (no more data). - */ - return read_len != 0; -} - -/* - * This could be in EOLReader as well, but this way, we - * make use of the buffer_len to avoid unnecessary allocations. - */ -gchar * -EOLReaderMem::convert_all(gsize *out_len) -{ - GString *str = g_string_sized_new(buffer_len); - const gchar *data; - gsize data_len; - - try { - while ((data = convert(data_len))) - g_string_append_len(str, data, data_len); - } catch (...) { - g_string_free(str, TRUE); - throw; /* forward */ - } - - if (out_len) - *out_len = str->len; - return g_string_free(str, FALSE); -} - -/** - * Perform EOL-normalization on a buffer (if enabled) and - * pass it to the underlying data sink. - * - * This can be called repeatedly to transform a larger - * document - the buffer provided does not have to be - * well-formed with regard to EOL sequences. - * - * @param buffer The buffer to convert. - * @param buffer_len The length of the data in buffer. - * @return The number of bytes consumed/converted from buffer. - */ -gsize -EOLWriter::convert(const gchar *buffer, gsize buffer_len) -{ - gsize bytes_written; - guint i = 0; - guint block_start; - gsize block_written; - - if (!(Flags::ed & Flags::ED_AUTOEOL)) - /* - * Write without EOL-translation: - * `state` is not required - * NOTE: This throws in case of errors - */ - return this->write(buffer, buffer_len); - - /* - * Write to stream with EOL-translation. - * The document's EOL mode tells us what was guessed - * when its content was read in (presumably from a file) - * but might have been changed manually by the user. - * NOTE: This code assumes that the output stream is - * buffered, since otherwise it would be slower - * (has been benchmarked). - * NOTE: The loop is executed for every character - * in `buffer` and has been optimized for minimal - * function (i.e. GIOChannel) calls. - */ - bytes_written = 0; - if (state == STATE_WRITE_LF) { - /* complete writing a CRLF sequence */ - if (this->write("\n", 1) < 1) - return 0; - state = STATE_START; - bytes_written++; - i++; - } - - block_start = i; - while (i < buffer_len) { - switch (buffer[i]) { - case '\n': - if (last_c == '\r') { - /* EOL sequence already written */ - bytes_written++; - block_start = i+1; - break; - } - /* fall through */ - case '\r': - block_written = this->write(buffer+block_start, i-block_start); - bytes_written += block_written; - if (block_written < i-block_start) - return bytes_written; - - block_written = this->write(eol_seq, eol_seq_len); - if (block_written == 0) - return bytes_written; - if (block_written < eol_seq_len) { - /* incomplete EOL seq - we have written CR of CRLF */ - state = STATE_WRITE_LF; - return bytes_written; - } - bytes_written++; - - block_start = i+1; - break; - } - - last_c = buffer[i++]; - } - - /* - * Write out remaining block (i.e. line) - */ - bytes_written += this->write(buffer+block_start, buffer_len-block_start); - return bytes_written; -} - -gsize -EOLWriterGIO::write(const gchar *buffer, gsize buffer_len) -{ - gsize bytes_written; - GError *error = NULL; - - switch (g_io_channel_write_chars(channel, buffer, buffer_len, - &bytes_written, &error)) { - case G_IO_STATUS_ERROR: - throw GlibError(error); - case G_IO_STATUS_EOF: - case G_IO_STATUS_NORMAL: - case G_IO_STATUS_AGAIN: - break; - } - - return bytes_written; -} - -gsize -EOLWriterMem::write(const gchar *buffer, gsize buffer_len) -{ - g_string_append_len(str, buffer, buffer_len); - return buffer_len; -} - -} /* namespace SciTECO */ |