diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-02-02 18:20:10 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-02-02 18:20:10 +0300 |
commit | 56e9becdd43373c34769336077569ba5a8bafd05 (patch) | |
tree | 47574672bc3d397aad52a3a891ee354a7448949f | |
download | openrussian-cli-56e9becdd43373c34769336077569ba5a8bafd05.tar.gz |
initial commit
* matches have to be direct, which is not always practical
* there is no reverse search (from English search terms)
* missing Makefile
* missing Bash completions
-rw-r--r-- | .gitignore | 3 | ||||
-rwxr-xr-x | openrussian.lua | 262 | ||||
-rwxr-xr-x | update-sqlite3 | 15 |
3 files changed, 280 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4dd1371 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +openrussian-sql.zip +openrussian.sql +openrussian-sqlite3.db diff --git a/openrussian.lua b/openrussian.lua new file mode 100755 index 0000000..d8c9be5 --- /dev/null +++ b/openrussian.lua @@ -0,0 +1,262 @@ +#!/usr/bin/lua5.2 + +local driver = require "luasql.sqlite3" + +if #arg < 1 then + error("Search word required!") +end +local search_word = table.concat(arg, " ") +local out_stream +local lang = "en" + +local env = assert(driver.sqlite3()) +local con = assert(env:connect("openrussian-sqlite3.db")) + +-- Turns a character followed by apostroph into a combined +-- accented character. +local function map_accented(str) + return (str:gsub("'", "\\[u0301]")) +end +-- FIXME: This does not work for tables since tbl will count the +-- combined character as two. Theoretically, Groff has composite characters +-- like \u[u043E_0301] but they don't work for all the cyrillic +-- vocals. +local function map_tbl(str) + return (str:gsub("(..)'", "\\fI%1\\fP")) +end + +local function format_declension(tag, decl_id, short_form) + local cur = assert(con:execute(string.format([[ + SELECT * FROM declensions WHERE id = %d + ]], decl_id))) + local row = assert(cur:fetch({}, "a")) + cur:close() + out_stream:write(tag, ';', map_tbl(row.nom), ';', map_tbl(row.gen), ';', + map_tbl(row.dat), ';', map_tbl(row.acc), ';', + map_tbl(row.inst), ';', map_tbl(row.prep)) + if short_form then out_stream:write(';', map_tbl(short_form)) end + out_stream:write('\n') +end + +local function format_dummy_declension(tag, accented) + accented = map_tbl(accented) + out_stream:write(tag) + for _ = 1, 6 do out_stream:write(';', accented) end + out_stream:write('\n') +end + +local format = {} -- formatter functions by word category + +function format.noun(word_id, accented) + local cur = assert(con:execute(string.format([[ + SELECT * FROM nouns WHERE word_id = %d + ]], word_id))) + local row = assert(cur:fetch({}, "a")) + cur:close() + + out_stream:write('.SH WORD\n', + map_accented(accented), ' \\-\\- noun, ') + if row.gender and row.gender ~= "" then + local genders = {m = "male", f = "female", n = "neuter"} + out_stream:write(genders[row.gender], ', ') + end + out_stream:write(row.animate == 1 and 'animate' or 'inanimate', '\n') + + if row.partner and row.partner ~= "" then + -- FIXME: What exactly is a noun "partner"? + -- Seems to be used mostly for male/female pairs etc. + out_stream:write('.SH PARTNER\n', + row.partner, '\n') + end + + out_stream:write('.SH DECLENSION\n', + '.TS\n', + 'allbox,tab(;);\n', + 'L LB LB LB LB LB LB\n', + 'LB L L L L L L.\n', + ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive\n') + if row.pl_only == 0 then + if row.indeclinable == 1 then + format_dummy_declension('Singular', accented) + else + format_declension('Singular', row.decl_sg_id) + end + end + if row.sg_only == 0 then + if row.indeclinable == 1 then + format_dummy_declension('Plural', accented) + else + format_declension('Plural', row.decl_pl_id) + end + end + out_stream:write('.TE\n') +end + +function format.adjective(word_id, accented) + local cur = assert(con:execute(string.format([[ + SELECT * FROM adjectives WHERE word_id = %d + ]], word_id))) + local row = assert(cur:fetch({}, "a")) + cur:close() + + out_stream:write('.SH WORD\n', + map_accented(accented), ' \\-\\- adjective\n') + + out_stream:write('.SH DECLENSION\n', + '.TS\n', + 'allbox,tab(;);\n', + 'L LB LB LB LB LB LB LB\n', + 'LB L L L L L L L.\n', + ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive;Short\n') + format_declension('Male', row.decl_m_id, row.short_m) + format_declension('Neutral', row.decl_n_id, row.short_n) + format_declension('Female', row.decl_f_id, row.short_f) + format_declension('Plural', row.decl_pl_id, row.short_pl) + out_stream:write('.TE\n') + + if row.comparative then + out_stream:write('.SH COMPARATIVE\n', + map_accented(row.comparative), '\n') + end + + if row.superlative then + out_stream:write('.SH SUPERLATIVE\n', + map_accented(row.superlative), '\n') + end +end + +function format.verb(word_id, accented) + local cur = assert(con:execute(string.format([[ + SELECT * FROM verbs JOIN conjugations ON verbs.presfut_conj_id = conjugations.id + WHERE verbs.word_id = %d + ]], word_id))) + local row = assert(cur:fetch({}, "a")) + cur:close() + + out_stream:write('.SH WORD\n', + map_accented(accented), ' \\-\\- verb') + if row.aspect then out_stream:write(', ', row.aspect) end + out_stream:write('\n') + + if row.partner and row.partner ~= "" then + -- NOTE: Verb partners seem to be the aspect partners + out_stream:write('.SH PARTNER\n', + row.partner, '\n') + end + + -- FIXME: Can we assume that verbs without specified aspect are always + -- perfective? + out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n', + map_accented("\\[u042F] "), map_accented(row.sg1), '.\n.br\n', + map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n.br\n', + map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "), + map_accented(row.sg3), '.\n.br\n', + map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n.br\n', + map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n.br\n', + map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n.br\n') + + out_stream:write('.SH PAST\n', + map_accented("\\[u041E]\\[u043D] "), map_accented(row.past_m), '.\n.br\n', + map_accented("\\[u041E]\\[u043D]\\[u0430]' "), map_accented(row.past_f), '.\n.br\n', + map_accented("\\[u041E]\\[u043D]\\[u043E]' "), map_accented(row.past_n), '.\n.br\n', + map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.past_pl), '.\n') + + -- FIXME: Is the singular/plural distinction always obvious? + out_stream:write('.SH IMPERATIVE\n', + map_accented(row.imperative_sg), '! / ', + map_accented(row.imperative_pl), '!\n') +end + +function format.other(word_id, accented) + out_stream:write('.SH WORD\n', + map_accented(accented), '\n') +end + +local cur = assert(con:execute(string.format([[ + SELECT bare_id, accented, type, words.id AS word_id + FROM bares JOIN words ON bares.id = bare_id + WHERE bare = "%s" +]], search_word))) +local row = cur:fetch({}, "a") +cur:close() + +if not row then + io.stderr:write('Word "', search_word, '" not found!\n') +else + local bare_id, word_id = row.bare_id, row.word_id + local word_type = row.type or "other" + -- FIXME: Some words (e.g. personal pronouns) apparently do not + -- come with accents!? + local word_accented = row.accented or search_word + + -- Open stream only now, after no more messages have to be written to + -- stdout/stderr. + out_stream = io.stdout + --out_stream = io.popen("man /dev/stdin", "w") + + out_stream:write('.\\" t\n', + '.TH "', search_word, '" "', word_type, '"\n') + + -- + -- Word-specific sections + -- + format[word_type](row.word_id, word_accented) + + -- + -- Generic sections + -- + -- FIXME: Print other translations if primary + -- language is not available + cur = assert(con:execute(string.format([[ + SELECT tl FROM translations + WHERE word_id = %d AND lang = "%s" + ]], word_id, lang))) + row = cur:fetch({}, "a") + if row then + out_stream:write('.SH TRANSLATION\n') + + repeat + out_stream:write(row.tl) + row = cur:fetch({}, "a") + if row then out_stream:write(', ') end + until not row + + out_stream:write('\n') + end + cur:close() + + -- + -- NOTE: There can be many exampes, so print them last. + -- + cur = assert(con:execute(string.format([[ + SELECT ru, start, length, tl + FROM sentences_words JOIN sentences ON sentence_id = sentences.id + WHERE bare_id = %d AND lang = "%s" + ]], bare_id, lang))) + row = cur:fetch({}, "a") + if row then + out_stream:write('.SH EXAMPLES\n') + + repeat + -- FIXME: Highlight search word in sentences. + -- start/length are apparently in characters + -- instead of bytes. + --[[ + local ru_hl = row.ru:sub(1, row.start)..'\\fI'.. + row.ru:sub(row.start+1, row.start+1+row.length)..'\\fP'.. + row.ru:sub(row.start+1+row.length+1) + ]] + out_stream:write('.TP\n', + map_accented(row.ru), '\n', + row.tl, '\n') + row = cur:fetch({}, "a") + until not row + + end + cur:close() +end + +con:close() +env:close() + +if out_stream then out_stream:close() end diff --git a/update-sqlite3 b/update-sqlite3 new file mode 100755 index 0000000..da86baf --- /dev/null +++ b/update-sqlite3 @@ -0,0 +1,15 @@ +#!/bin/sh +# Downloads latest OpenRussian MySQL database dump and convert +# it to Sqlite3. +# +# NOTE: unzip cannot read from stdin yet. bsdtar could in principle, +# but does not allow extracting to stdout. + +TMPFILE=`mktemp` + +wget -O $TMPFILE https://en.openrussian.org/downloads/openrussian-sql.zip +unzip -p $TMPFILE openrussian.sql | \ +~/working-copies/mysql2sqlite/mysql2sqlite - | \ +sqlite3 openrussian-sqlite3.db + +rm $TMPFILE |