From 6618b77947f29748dd65af62c51c47ce94f87860 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Sat, 16 Mar 2019 18:38:44 +0100 Subject: allow lookups containing accentuation character (Unicode 0x0301) * they are simply ignored * in openrussian.lua, we delegate Unicode handling to SQLite * for Bash completions, we ignore accents during matching but (have to) preserve them in the prefix -- see comments. * also properly escape SQL strings preserving accidental code injection. This means we do not necessarily have to port openrussian-completion.bash to Lua. --- openrussian-completion.bash | 20 ++++++++++++++++---- openrussian.lua | 5 ++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/openrussian-completion.bash b/openrussian-completion.bash index 0facd31..fd27c06 100644 --- a/openrussian-completion.bash +++ b/openrussian-completion.bash @@ -3,13 +3,25 @@ # NOTE: Uses SQL for matching instead of compgen, since it's probably not # a good idea to always retrieve all the words, although this happens anyway # when completing an empty word. -# -# FIXME: sqlite3 command-line tool does not provide a safe method for embedding -# strings, so we should either properly escape $2 or write an external Lua script. _openrussian_completions() { - SQL="SELECT bare FROM words WHERE bare LIKE \"$2%\"" + # NOTE: sqlite3's command-line tool does not provide a way to properly + # embed strings, so we try to escape it here. + WORD=$(echo -n "$2" | sed 's/"/""/g') + + # This is a rather convoluted way of writing + # SELECT bare FROM words WHERE bare LIKE "${WORD}%"; + # but allowing $WORD to contain accentuation characters, which can easily + # happen when cutting/pasting words from the openrussian.lua manpages or the + # internet. + # NOTE: This is merely a workaround since all completions will begin with + # $WORD including accents and end without accents, so the suggested completions + # will likely be with wrong accents. + # It seems to be impossible, at least in Bash, to rubout $WORD first. + SQL=$(printf 'SELECT "%s" || SUBSTR(bare, LENGTH(REPLACE("%s", "\u0301", ""))+1) + FROM words WHERE bare LIKE REPLACE("%s%%", "\u0301", "")' \ + "$WORD" "$WORD" "$WORD") # Calculate database path based on the installation path of the `openrussian` # CLI tool. This way, we can avoid preprocessing the script during installation. diff --git a/openrussian.lua b/openrussian.lua index 3ca060e..4b37ea3 100755 --- a/openrussian.lua +++ b/openrussian.lua @@ -202,9 +202,12 @@ function format.other(word_id, accented) map_accented(accented), '\n') end +-- NOTE: This lets SQL strip the accent char from the input, which +-- allows users to cut and paste from generated output while we don't +-- have to deal with Unicode in Lua. local cur = assert(con:execute(string.format([[ SELECT accented, type, words.id AS word_id - FROM words WHERE bare = "%s" + FROM words WHERE bare = REPLACE("%s", CHAR(0x0301), "") ]], search_word))) local row = cur:fetch({}, "a") cur:close() -- cgit v1.2.3