aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-08-28 12:59:05 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-09 18:16:07 +0200
commit4c6b6814abfc9c022c6ea8d1e23097c2a774fde5 (patch)
tree26ea9ad6d2777c080c1733b55fc7d30180c335f5
parentfdc185b8faaae44d67f85d2c5a9b9fa48d3e2859 (diff)
downloadsciteco-4c6b6814abfc9c022c6ea8d1e23097c2a774fde5.tar.gz
input and displaying of Unicode characters is now possible (refs #5)
* All non-ASCII characters are inserted as Unicode. On Curses, this also requires a properly set up locale. * We still do not need any widechar Curses, as waddch() handles multibyte characters on ncurses. We will see whether there is any Curses variant that strictly requires wadd_wch(). If this will be an exception, we might keep both widechar and non-widechar support. * By convention gsize is used exclusively for byte sizes. Character offsets or lengths use int or long.
-rw-r--r--src/interface-curses/curses-utils.c70
-rw-r--r--src/interface-curses/curses-utils.h4
-rw-r--r--src/interface-curses/interface.c10
-rw-r--r--src/interface-gtk/interface.c8
-rw-r--r--src/sciteco.h2
-rw-r--r--src/string-utils.h6
6 files changed, 73 insertions, 27 deletions
diff --git a/src/interface-curses/curses-utils.c b/src/interface-curses/curses-utils.c
index e7c8659..c751afd 100644
--- a/src/interface-curses/curses-utils.c
+++ b/src/interface-curses/curses-utils.c
@@ -29,7 +29,21 @@
#include "string-utils.h"
#include "curses-utils.h"
-gsize
+/**
+ * Render UTF-8 string with TECO character representations.
+ *
+ * Strings are cut off with `...` at the end if necessary.
+ * The mapping is similar to teco_view_set_representations().
+ *
+ * @param win The Curses window to write to.
+ * @param str The string to format.
+ * @param len The length of the string in bytes.
+ * @param max_width The maximum width to consume in
+ * the window in characters. If smaller 0, take the
+ * entire remaining space in the window.
+ * @return Number of characters actually written.
+ */
+guint
teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width)
{
int old_x, old_y;
@@ -42,6 +56,12 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width)
while (len > 0) {
/*
+ * NOTE: It shouldn't be possible to meet any string,
+ * that is not valid UTF-8.
+ */
+ gsize clen = g_utf8_next_char(str) - str;
+
+ /*
* NOTE: This mapping is similar to
* teco_view_set_representations().
*/
@@ -85,12 +105,18 @@ teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width)
chars_added++;
if (chars_added > max_width)
goto truncate;
- waddch(win, *str);
+ /*
+ * FIXME: This works with UTF-8 on ncurses,
+ * since it detects multi-byte characters.
+ * However on other platforms wadd_wch() may be
+ * necessary, which requires a widechar Curses variant.
+ */
+ waddnstr(win, str, clen);
}
}
- str++;
- len--;
+ str += clen;
+ len -= clen;
}
return getcurx(win) - old_x;
@@ -108,23 +134,43 @@ truncate:
return getcurx(win) - old_x;
}
-gsize
-teco_curses_format_filename(WINDOW *win, const gchar *filename,
- gint max_width)
+/**
+ * Render UTF-8 filename.
+ *
+ * This cuts of overlong filenames with `...` at the beginning,
+ * possibly skipping any drive letter.
+ * Control characters are escaped, but not highlighted.
+ *
+ * @param win The Curses window to write to.
+ * @param filename Null-terminated filename to render.
+ * @param max_width The maximum width to consume in
+ * the window in characters. If smaller 0, take the
+ * entire remaining space in the window.
+ * @return Number of characters actually written.
+ */
+guint
+teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width)
{
int old_x = getcurx(win);
g_autofree gchar *filename_printable = teco_string_echo(filename, strlen(filename));
- size_t filename_len = strlen(filename_printable);
+ glong filename_len = g_utf8_strlen(filename_printable, -1);
if (max_width < 0)
max_width = getmaxx(win) - old_x;
- if (filename_len <= (size_t)max_width) {
+ if (filename_len <= max_width) {
+ /*
+ * FIXME: This works with UTF-8 on ncurses,
+ * since it detects multi-byte characters.
+ * However on other platforms wadd_wch() may be
+ * necessary, which requires a widechar Curses variant.
+ */
waddstr(win, filename_printable);
- } else {
- const gchar *keep_post = filename_printable + filename_len -
- max_width + 3;
+ } else if (filename_len >= 3) {
+ const gchar *keep_post;
+ keep_post = g_utf8_offset_to_pointer(filename_printable + strlen(filename_printable),
+ -max_width + 3);
#ifdef G_OS_WIN32
const gchar *keep_pre = g_path_skip_root(filename_printable);
diff --git a/src/interface-curses/curses-utils.h b/src/interface-curses/curses-utils.h
index 35d9582..28c6f9b 100644
--- a/src/interface-curses/curses-utils.h
+++ b/src/interface-curses/curses-utils.h
@@ -20,6 +20,6 @@
#include <curses.h>
-gsize teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width);
+guint teco_curses_format_str(WINDOW *win, const gchar *str, gsize len, gint max_width);
-gsize teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width);
+guint teco_curses_format_filename(WINDOW *win, const gchar *filename, gint max_width);
diff --git a/src/interface-curses/interface.c b/src/interface-curses/interface.c
index 8de744f..89be588 100644
--- a/src/interface-curses/interface.c
+++ b/src/interface-curses/interface.c
@@ -344,7 +344,7 @@ static struct {
WINDOW *msg_window;
WINDOW *cmdline_window, *cmdline_pad;
- gsize cmdline_len, cmdline_rubout_len;
+ guint cmdline_len, cmdline_rubout_len;
GQueue *input_queue;
@@ -680,7 +680,7 @@ teco_interface_init_interactive(GError **error)
#endif
/* for displaying UTF-8 characters properly */
- setlocale(LC_CTYPE, "");
+ setlocale(LC_ALL, "");
teco_interface_init_screen();
@@ -1044,7 +1044,8 @@ teco_interface_cmdline_update(const teco_cmdline_t *cmdline)
* We don't know if it is similar to the last one,
* so resizing makes no sense.
* We approximate the size of the new formatted command-line,
- * wasting a few bytes for control characters.
+ * wasting a few bytes for control characters and
+ * multi-byte Unicode sequences.
*/
if (teco_interface.cmdline_pad)
delwin(teco_interface.cmdline_pad);
@@ -1626,6 +1627,7 @@ teco_interface_event_loop_iter(void)
/*
* Function key macros
+ * FIXME: What about keyname()?
*/
#define FN(KEY) \
case KEY_##KEY: \
@@ -1660,7 +1662,7 @@ teco_interface_event_loop_iter(void)
* Control keys and keys with printable representation
*/
default:
- if (key < 0x80 &&
+ if (key <= 0xFF &&
!teco_cmdline_keypress_c(key, &teco_interface.event_loop_error))
return;
}
diff --git a/src/interface-gtk/interface.c b/src/interface-gtk/interface.c
index bddd51f..ed027a2 100644
--- a/src/interface-gtk/interface.c
+++ b/src/interface-gtk/interface.c
@@ -876,14 +876,6 @@ teco_interface_cmdline_commit_cb(GtkIMContext *context, gchar *str, gpointer use
{
g_autoptr(GError) error = NULL;
- /*
- * FIXME: This is only for consistency as long as we
- * do not support Unicode.
- */
- for (char *p = str; *p != '\0'; p = g_utf8_next_char(p))
- if (g_utf8_get_char(p) >= 0x80)
- return;
-
if (!teco_cmdline_keypress(str, strlen(str), &error) &&
g_error_matches(error, TECO_ERROR, TECO_ERROR_QUIT))
gtk_main_quit();
diff --git a/src/sciteco.h b/src/sciteco.h
index 55ffd6c..7f420e8 100644
--- a/src/sciteco.h
+++ b/src/sciteco.h
@@ -69,7 +69,7 @@ teco_is_failure(teco_bool_t x)
#endif
/** TRUE if C is a control character */
-#define TECO_IS_CTL(C) ((C) < ' ')
+#define TECO_IS_CTL(C) ((guchar)(C) < ' ')
/** ASCII character to echo control character C */
#define TECO_CTL_ECHO(C) ((C) | 0x40)
/**
diff --git a/src/string-utils.h b/src/string-utils.h
index 973b954..efc6fc5 100644
--- a/src/string-utils.h
+++ b/src/string-utils.h
@@ -51,6 +51,12 @@ teco_ascii_toupper(gchar chr)
* A target teco_string_t::data is always null-terminated and thus safe to pass
* to functions expecting traditional null-terminated C strings if you can
* guarantee that it contains no null-character other than the trailing one.
+ *
+ * @warning For consistency with C idioms the underlying character type is
+ * `char`, which might be signed!
+ * Accessing individual characters may yield signed integers and that sign
+ * might be preserved when upcasting to a larger signed integer.
+ * In this case you should always cast to `guchar` first.
*/
typedef struct {
/**