diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-03-16 18:38:44 +0100 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-03-16 18:38:44 +0100 |
commit | 6618b77947f29748dd65af62c51c47ce94f87860 (patch) | |
tree | 1d94f414a0c41ab082aeab087f3abab942acce98 | |
parent | 7e2ff6435cd1a5a504eab34823a4f63c7a46df1f (diff) | |
download | openrussian-cli-6618b77947f29748dd65af62c51c47ce94f87860.tar.gz |
allow lookups containing accentuation character (Unicode 0x0301)
* they are simply ignored
* in openrussian.lua, we delegate Unicode handling to SQLite
* for Bash completions, we ignore accents during matching but (have to)
preserve them in the prefix -- see comments.
* also properly escape SQL strings preserving accidental code injection.
This means we do not necessarily have to port openrussian-completion.bash
to Lua.
-rw-r--r-- | openrussian-completion.bash | 20 | ||||
-rwxr-xr-x | openrussian.lua | 5 |
2 files changed, 20 insertions, 5 deletions
diff --git a/openrussian-completion.bash b/openrussian-completion.bash index 0facd31..fd27c06 100644 --- a/openrussian-completion.bash +++ b/openrussian-completion.bash @@ -3,13 +3,25 @@ # NOTE: Uses SQL for matching instead of compgen, since it's probably not # a good idea to always retrieve all the words, although this happens anyway # when completing an empty word. -# -# FIXME: sqlite3 command-line tool does not provide a safe method for embedding -# strings, so we should either properly escape $2 or write an external Lua script. _openrussian_completions() { - SQL="SELECT bare FROM words WHERE bare LIKE \"$2%\"" + # NOTE: sqlite3's command-line tool does not provide a way to properly + # embed strings, so we try to escape it here. + WORD=$(echo -n "$2" | sed 's/"/""/g') + + # This is a rather convoluted way of writing + # SELECT bare FROM words WHERE bare LIKE "${WORD}%"; + # but allowing $WORD to contain accentuation characters, which can easily + # happen when cutting/pasting words from the openrussian.lua manpages or the + # internet. + # NOTE: This is merely a workaround since all completions will begin with + # $WORD including accents and end without accents, so the suggested completions + # will likely be with wrong accents. + # It seems to be impossible, at least in Bash, to rubout $WORD first. + SQL=$(printf 'SELECT "%s" || SUBSTR(bare, LENGTH(REPLACE("%s", "\u0301", ""))+1) + FROM words WHERE bare LIKE REPLACE("%s%%", "\u0301", "")' \ + "$WORD" "$WORD" "$WORD") # Calculate database path based on the installation path of the `openrussian` # CLI tool. This way, we can avoid preprocessing the script during installation. diff --git a/openrussian.lua b/openrussian.lua index 3ca060e..4b37ea3 100755 --- a/openrussian.lua +++ b/openrussian.lua @@ -202,9 +202,12 @@ function format.other(word_id, accented) map_accented(accented), '\n') end +-- NOTE: This lets SQL strip the accent char from the input, which +-- allows users to cut and paste from generated output while we don't +-- have to deal with Unicode in Lua. local cur = assert(con:execute(string.format([[ SELECT accented, type, words.id AS word_id - FROM words WHERE bare = "%s" + FROM words WHERE bare = REPLACE("%s", CHAR(0x0301), "") ]], search_word))) local row = cur:fetch({}, "a") cur:close() |