/*
* Copyright (C) 2012-2024 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include
#include "sciteco.h"
#include "undo.h"
#include "string-utils.h"
/**
* Get echoable (printable) version of a given string.
*
* This converts all control characters to printable
* characters without tabs, line feeds, etc.
* That's also why it can safely return a null-terminated string.
* Useful for displaying Q-Register names and TECO code.
*
* @memberof teco_string_t
*/
gchar *
teco_string_echo(const gchar *str, gsize len)
{
gchar *ret, *p;
p = ret = g_malloc(len*2 + 1);
for (guint i = 0; i < len; i++) {
if (TECO_IS_CTL(str[i])) {
*p++ = '^';
*p++ = TECO_CTL_ECHO(str[i]);
} else {
*p++ = str[i];
}
}
*p = '\0';
return ret;
}
/**
* Get character coordinates for a given byte index.
*
* The given string must be valid UTF-8.
*
* @memberof teco_string_t
*/
void
teco_string_get_coord(const gchar *str, gsize off, guint *pos, guint *line, guint *column)
{
*pos = 0;
*line = *column = 1;
for (guint i = 0; i < off; i = g_utf8_next_char(str+i) - str) {
switch (str[i]) {
case '\r':
if (str[i+1] == '\n')
i++;
/* fall through */
case '\n':
(*line)++;
(*column) = 1;
break;
default:
(*column)++;
break;
}
(*pos)++;
}
}
/**
* Get the length of the prefix common to two strings.
* Works with UTF-8 and single-byte encodings.
*
* @param a Left string.
* @param b Right string.
* @param b_len Length of right string.
* @return Length of the common prefix in bytes.
*
* @memberof teco_string_t
*/
gsize
teco_string_diff(const teco_string_t *a, const gchar *b, gsize b_len)
{
gsize len = 0;
while (len < a->len && len < b_len &&
a->data[len] == b[len])
len++;
return len;
}
/**
* Get the length of the prefix common to two UTF-8 strings
* without considering case.
*
* The UTF-8 strings must be validated, which should be the case
* for help labels and short Q-Register names.
*
* @param a Left UTF-8 string.
* @param b Right UTF-8 string.
* @param b_len Length of right UTF-8 string.
* @return Length of the common prefix in bytes.
*
* @memberof teco_string_t
*/
gsize
teco_string_casediff(const teco_string_t *a, const gchar *b, gsize b_len)
{
gsize len = 0;
while (len < a->len && len < b_len) {
gunichar a_chr = g_utf8_get_char(a->data+len);
gunichar b_chr = g_utf8_get_char(b+len);
if (g_unichar_tolower(a_chr) != g_unichar_tolower(b_chr))
break;
len = g_utf8_next_char(b+len) - b;
}
return len;
}
/** @memberof teco_string_t */
gint
teco_string_cmp(const teco_string_t *a, const gchar *b, gsize b_len)
{
for (guint i = 0; i < a->len; i++) {
if (i == b_len)
/* b is a prefix of a */
return 1;
gint ret = (gint)a->data[i] - (gint)b[i];
if (ret != 0)
/* a and b have a common prefix of length i */
return ret;
}
return a->len == b_len ? 0 : -1;
}
/** @memberof teco_string_t */
gint
teco_string_casecmp(const teco_string_t *a, const gchar *b, gsize b_len)
{
for (guint i = 0; i < a->len; i++) {
if (i == b_len)
/* b is a prefix of a */
return 1;
gint ret = (gint)g_ascii_tolower(a->data[i]) - (gint)g_ascii_tolower(b[i]);
if (ret != 0)
/* a and b have a common prefix of length i */
return ret;
}
return a->len == b_len ? 0 : -1;
}
/**
* Find string after the last occurrence of any in a set of characters.
*
* @param str String to search through.
* @param chars Null-terminated set of characters.
* The null-byte itself is always considered part of the set.
* @return A null-terminated suffix of str or NULL.
*
* @memberof teco_string_t
*/
const gchar *
teco_string_last_occurrence(const teco_string_t *str, const gchar *chars)
{
teco_string_t ret = *str;
if (!ret.len)
return NULL;
do {
gint i = teco_string_rindex(&ret, *chars);
if (i >= 0) {
ret.data += i+1;
ret.len -= i+1;
}
} while (*chars++);
return ret.data;
}
TECO_DEFINE_UNDO_CALL(teco_string_truncate, teco_string_t *, gsize);
TECO_DEFINE_UNDO_OBJECT(cstring, gchar *, g_strdup, g_free);
static inline teco_string_t
teco_string_copy(const teco_string_t str)
{
teco_string_t ret;
teco_string_init(&ret, str.data, str.len);
return ret;
}
#define DELETE(X) teco_string_clear(&(X))
TECO_DEFINE_UNDO_OBJECT(string, teco_string_t, teco_string_copy, DELETE);
TECO_DEFINE_UNDO_OBJECT_OWN(string_own, teco_string_t, DELETE);
#undef DELETE