aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2019-03-16 18:38:44 +0100
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2019-03-16 18:38:44 +0100
commit6618b77947f29748dd65af62c51c47ce94f87860 (patch)
tree1d94f414a0c41ab082aeab087f3abab942acce98
parent7e2ff6435cd1a5a504eab34823a4f63c7a46df1f (diff)
downloadopenrussian-cli-6618b77947f29748dd65af62c51c47ce94f87860.tar.gz
allow lookups containing accentuation character (Unicode 0x0301)
* they are simply ignored * in openrussian.lua, we delegate Unicode handling to SQLite * for Bash completions, we ignore accents during matching but (have to) preserve them in the prefix -- see comments. * also properly escape SQL strings preserving accidental code injection. This means we do not necessarily have to port openrussian-completion.bash to Lua.
-rw-r--r--openrussian-completion.bash20
-rwxr-xr-xopenrussian.lua5
2 files changed, 20 insertions, 5 deletions
diff --git a/openrussian-completion.bash b/openrussian-completion.bash
index 0facd31..fd27c06 100644
--- a/openrussian-completion.bash
+++ b/openrussian-completion.bash
@@ -3,13 +3,25 @@
# NOTE: Uses SQL for matching instead of compgen, since it's probably not
# a good idea to always retrieve all the words, although this happens anyway
# when completing an empty word.
-#
-# FIXME: sqlite3 command-line tool does not provide a safe method for embedding
-# strings, so we should either properly escape $2 or write an external Lua script.
_openrussian_completions()
{
- SQL="SELECT bare FROM words WHERE bare LIKE \"$2%\""
+ # NOTE: sqlite3's command-line tool does not provide a way to properly
+ # embed strings, so we try to escape it here.
+ WORD=$(echo -n "$2" | sed 's/"/""/g')
+
+ # This is a rather convoluted way of writing
+ # SELECT bare FROM words WHERE bare LIKE "${WORD}%";
+ # but allowing $WORD to contain accentuation characters, which can easily
+ # happen when cutting/pasting words from the openrussian.lua manpages or the
+ # internet.
+ # NOTE: This is merely a workaround since all completions will begin with
+ # $WORD including accents and end without accents, so the suggested completions
+ # will likely be with wrong accents.
+ # It seems to be impossible, at least in Bash, to rubout $WORD first.
+ SQL=$(printf 'SELECT "%s" || SUBSTR(bare, LENGTH(REPLACE("%s", "\u0301", ""))+1)
+ FROM words WHERE bare LIKE REPLACE("%s%%", "\u0301", "")' \
+ "$WORD" "$WORD" "$WORD")
# Calculate database path based on the installation path of the `openrussian`
# CLI tool. This way, we can avoid preprocessing the script during installation.
diff --git a/openrussian.lua b/openrussian.lua
index 3ca060e..4b37ea3 100755
--- a/openrussian.lua
+++ b/openrussian.lua
@@ -202,9 +202,12 @@ function format.other(word_id, accented)
map_accented(accented), '\n')
end
+-- NOTE: This lets SQL strip the accent char from the input, which
+-- allows users to cut and paste from generated output while we don't
+-- have to deal with Unicode in Lua.
local cur = assert(con:execute(string.format([[
SELECT accented, type, words.id AS word_id
- FROM words WHERE bare = "%s"
+ FROM words WHERE bare = REPLACE("%s", CHAR(0x0301), "")
]], search_word)))
local row = cur:fetch({}, "a")
cur:close()