aboutsummaryrefslogtreecommitdiff
path: root/postprocess.sql
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2019-04-18 23:20:46 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2019-04-18 23:20:46 +0300
commit481c69be02c95fbf20567d8de617315484e2d692 (patch)
tree71bf406543586f278a5e779a0befa57f0fe529e9 /postprocess.sql
parent07d4fa1fe85eba554becb20bb4a872acb320a423 (diff)
downloadopenrussian-cli-481c69be02c95fbf20567d8de617315484e2d692.tar.gz
major changes (unfortunately not done systematically)
* keep the openrussian-sql.zip to avoid bitrot (sudden breakage when building the project against a recent DB) * allow lookups against inflections. A special table `bare_inflections` was added to the DB in order to speed this up. * authentic autocompletions taking every parameter into account, using a magic `-C` parameter * language selection via locale or -L<lang> * translation lookup * allow multiple search terms on the command line without escaping * all SQL strings are properly escaped now to avoid (accidental) code insertion * luautf8 is mandatory now and used much more, which is safer than handling Unicode strings with the builtin functions * ignore "disable" database entries * added option -V (verbatim) * more protections against empty fields * print "DERIVED FROM", "AUDIO", "USAGE" and various word relation sections * print word ranking and level (A1-C2...)
Diffstat (limited to 'postprocess.sql')
-rw-r--r--postprocess.sql78
1 files changed, 78 insertions, 0 deletions
diff --git a/postprocess.sql b/postprocess.sql
new file mode 100644
index 0000000..5976cb1
--- /dev/null
+++ b/postprocess.sql
@@ -0,0 +1,78 @@
+-- Drop unused tables, hopefully saving a few megabytes.
+-- `expressions_words` is for excercises???
+DROP TABLE expressions_words;
+-- `categories_words2` references `categories` but is not in the database!?
+DROP TABLE categories_words2;
+
+-- This table is especially added to aid lookups and autocompletions on all
+-- possible inflections.
+-- This is significantly faster than using the existing tables.
+--
+-- FIXME: Some of the columns contain multiple comma-separated values
+-- (sometimes in braces), so we probably need a Lua script for initializion.
+-- TODO: Translations should probably be in this table, or a separate one as well.
+-- But see above.
+CREATE TABLE bare_inflections (`word_id` INTEGER NOT NULL, `bare` VARCHAR(100) NOT NULL COLLATE BINARY);
+-- Indexing the `bare` column allows ultra-fast lookups even via GLOB
+-- in most common cases. For this optimization to work, the column must
+-- also be BINARY collated. See also:
+-- https://www.sqlite.org/optoverview.html#the_like_optimization
+CREATE INDEX idx_bare_inflections_bare ON bare_inflections (`bare`);
+INSERT INTO bare_inflections SELECT word_id, REPLACE(temp, "'", "") AS bare
+FROM (
+ -- Search word might be a noun or adjective declension
+ SELECT word_id, nom AS temp FROM declensions
+ UNION ALL
+ SELECT word_id, gen AS temp FROM declensions
+ UNION ALL
+ SELECT word_id, dat AS temp FROM declensions
+ UNION ALL
+ SELECT word_id, acc AS temp FROM declensions
+ UNION ALL
+ SELECT word_id, inst AS temp FROM declensions
+ UNION ALL
+ SELECT word_id, prep AS temp FROM declensions
+ UNION ALL
+ -- Search word might be a special adjective inflection
+ SELECT word_id, comparative AS temp FROM adjectives
+ UNION ALL
+ SELECT word_id, superlative AS temp FROM adjectives
+ UNION ALL
+ SELECT word_id, short_m AS temp FROM adjectives
+ UNION ALL
+ SELECT word_id, short_f AS temp FROM adjectives
+ UNION ALL
+ SELECT word_id, short_n AS temp FROM adjectives
+ UNION ALL
+ SELECT word_id, short_pl AS temp FROM adjectives
+ UNION ALL
+ -- Search word might be a verb imperative or past form
+ SELECT word_id, imperative_sg AS temp FROM verbs
+ UNION ALL
+ SELECT word_id, imperative_pl AS temp FROM verbs
+ UNION ALL
+ SELECT word_id, past_m AS temp FROM verbs
+ UNION ALL
+ SELECT word_id, past_f AS temp FROM verbs
+ UNION ALL
+ SELECT word_id, past_n AS temp FROM verbs
+ UNION ALL
+ SELECT word_id, past_pl AS temp FROM verbs
+ UNION ALL
+ -- Search word might be a verb conjugation
+ SELECT word_id, sg1 AS temp FROM conjugations
+ UNION ALL
+ SELECT word_id, sg2 AS temp FROM conjugations
+ UNION ALL
+ SELECT word_id, sg3 AS temp FROM conjugations
+ UNION ALL
+ SELECT word_id, pl1 AS temp FROM conjugations
+ UNION ALL
+ SELECT word_id, pl2 AS temp FROM conjugations
+ UNION ALL
+ SELECT word_id, pl3 AS temp FROM conjugations
+)
+WHERE bare <> "";
+
+-- Saves a few megabytes
+VACUUM;