diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-04-18 23:20:46 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-04-18 23:20:46 +0300 |
commit | 481c69be02c95fbf20567d8de617315484e2d692 (patch) | |
tree | 71bf406543586f278a5e779a0befa57f0fe529e9 /postprocess.sql | |
parent | 07d4fa1fe85eba554becb20bb4a872acb320a423 (diff) | |
download | openrussian-cli-481c69be02c95fbf20567d8de617315484e2d692.tar.gz |
major changes (unfortunately not done systematically)
* keep the openrussian-sql.zip to avoid bitrot (sudden breakage when building the
project against a recent DB)
* allow lookups against inflections.
A special table `bare_inflections` was added to the DB in order to speed this up.
* authentic autocompletions taking every parameter into account, using a magic `-C` parameter
* language selection via locale or -L<lang>
* translation lookup
* allow multiple search terms on the command line without escaping
* all SQL strings are properly escaped now to avoid (accidental) code insertion
* luautf8 is mandatory now and used much more, which is safer
than handling Unicode strings with the builtin functions
* ignore "disable" database entries
* added option -V (verbatim)
* more protections against empty fields
* print "DERIVED FROM", "AUDIO", "USAGE" and various word relation sections
* print word ranking and level (A1-C2...)
Diffstat (limited to 'postprocess.sql')
-rw-r--r-- | postprocess.sql | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/postprocess.sql b/postprocess.sql new file mode 100644 index 0000000..5976cb1 --- /dev/null +++ b/postprocess.sql @@ -0,0 +1,78 @@ +-- Drop unused tables, hopefully saving a few megabytes. +-- `expressions_words` is for excercises??? +DROP TABLE expressions_words; +-- `categories_words2` references `categories` but is not in the database!? +DROP TABLE categories_words2; + +-- This table is especially added to aid lookups and autocompletions on all +-- possible inflections. +-- This is significantly faster than using the existing tables. +-- +-- FIXME: Some of the columns contain multiple comma-separated values +-- (sometimes in braces), so we probably need a Lua script for initializion. +-- TODO: Translations should probably be in this table, or a separate one as well. +-- But see above. +CREATE TABLE bare_inflections (`word_id` INTEGER NOT NULL, `bare` VARCHAR(100) NOT NULL COLLATE BINARY); +-- Indexing the `bare` column allows ultra-fast lookups even via GLOB +-- in most common cases. For this optimization to work, the column must +-- also be BINARY collated. See also: +-- https://www.sqlite.org/optoverview.html#the_like_optimization +CREATE INDEX idx_bare_inflections_bare ON bare_inflections (`bare`); +INSERT INTO bare_inflections SELECT word_id, REPLACE(temp, "'", "") AS bare +FROM ( + -- Search word might be a noun or adjective declension + SELECT word_id, nom AS temp FROM declensions + UNION ALL + SELECT word_id, gen AS temp FROM declensions + UNION ALL + SELECT word_id, dat AS temp FROM declensions + UNION ALL + SELECT word_id, acc AS temp FROM declensions + UNION ALL + SELECT word_id, inst AS temp FROM declensions + UNION ALL + SELECT word_id, prep AS temp FROM declensions + UNION ALL + -- Search word might be a special adjective inflection + SELECT word_id, comparative AS temp FROM adjectives + UNION ALL + SELECT word_id, superlative AS temp FROM adjectives + UNION ALL + SELECT word_id, short_m AS temp FROM adjectives + UNION ALL + SELECT word_id, short_f AS temp FROM adjectives + UNION ALL + SELECT word_id, short_n AS temp FROM adjectives + UNION ALL + SELECT word_id, short_pl AS temp FROM adjectives + UNION ALL + -- Search word might be a verb imperative or past form + SELECT word_id, imperative_sg AS temp FROM verbs + UNION ALL + SELECT word_id, imperative_pl AS temp FROM verbs + UNION ALL + SELECT word_id, past_m AS temp FROM verbs + UNION ALL + SELECT word_id, past_f AS temp FROM verbs + UNION ALL + SELECT word_id, past_n AS temp FROM verbs + UNION ALL + SELECT word_id, past_pl AS temp FROM verbs + UNION ALL + -- Search word might be a verb conjugation + SELECT word_id, sg1 AS temp FROM conjugations + UNION ALL + SELECT word_id, sg2 AS temp FROM conjugations + UNION ALL + SELECT word_id, sg3 AS temp FROM conjugations + UNION ALL + SELECT word_id, pl1 AS temp FROM conjugations + UNION ALL + SELECT word_id, pl2 AS temp FROM conjugations + UNION ALL + SELECT word_id, pl3 AS temp FROM conjugations +) +WHERE bare <> ""; + +-- Saves a few megabytes +VACUUM; |