initial commit

* matches have to be direct, which is not always practical * there is no reverse search (from English search terms) * missing Makefile * missing Bash completions
author: Robin Haberkorn <robin.haberkorn@googlemail.com> 2019-02-02 18:20:10 +0300
committer: Robin Haberkorn <robin.haberkorn@googlemail.com> 2019-02-02 18:20:10 +0300
commit: 56e9becdd43373c34769336077569ba5a8bafd05 (patch)
tree: 47574672bc3d397aad52a3a891ee354a7448949f
download: openrussian-cli-56e9becdd43373c34769336077569ba5a8bafd05.tar.gz
3 files changed, 280 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4dd1371
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+openrussian-sql.zip
+openrussian.sql
+openrussian-sqlite3.db
diff --git a/openrussian.lua b/openrussian.lua
new file mode 100755
index 0000000..d8c9be5
--- /dev/null
+++ b/openrussian.lua
@@ -0,0 +1,262 @@
+#!/usr/bin/lua5.2
+
+local driver = require "luasql.sqlite3"
+
+if #arg < 1 then
+	error("Search word required!")
+end
+local search_word = table.concat(arg, " ")
+local out_stream
+local lang = "en"
+
+local env = assert(driver.sqlite3())
+local con = assert(env:connect("openrussian-sqlite3.db"))
+
+-- Turns a character followed by apostroph into a combined
+-- accented character.
+local function map_accented(str)
+	return (str:gsub("'", "\\[u0301]"))
+end
+-- FIXME: This does not work for tables since tbl will count the
+-- combined character as two. Theoretically, Groff has composite characters
+-- like \u[u043E_0301] but they don't work for all the cyrillic
+-- vocals.
+local function map_tbl(str)
+	return (str:gsub("(..)'", "\\fI%1\\fP"))
+end
+
+local function format_declension(tag, decl_id, short_form)
+	local cur = assert(con:execute(string.format([[
+		SELECT * FROM declensions WHERE id = %d
+	]], decl_id)))
+	local row = assert(cur:fetch({}, "a"))
+	cur:close()
+	out_stream:write(tag, ';', map_tbl(row.nom), ';', map_tbl(row.gen), ';',
+	                 map_tbl(row.dat), ';', map_tbl(row.acc), ';',
+	                 map_tbl(row.inst), ';', map_tbl(row.prep))
+	if short_form then out_stream:write(';', map_tbl(short_form)) end
+	out_stream:write('\n')
+end
+
+local function format_dummy_declension(tag, accented)
+	accented = map_tbl(accented)
+	out_stream:write(tag)
+	for _ = 1, 6 do out_stream:write(';', accented) end
+	out_stream:write('\n')
+end
+
+local format = {} -- formatter functions by word category
+
+function format.noun(word_id, accented)
+	local cur = assert(con:execute(string.format([[
+		SELECT * FROM nouns WHERE word_id = %d
+	]], word_id)))
+	local row = assert(cur:fetch({}, "a"))
+	cur:close()
+
+	out_stream:write('.SH WORD\n',
+	                 map_accented(accented), ' \\-\\- noun, ')
+	if row.gender and row.gender ~= "" then
+		local genders = {m = "male", f = "female", n = "neuter"}
+		out_stream:write(genders[row.gender], ', ')
+	end
+	out_stream:write(row.animate == 1 and 'animate' or 'inanimate', '\n')
+
+	if row.partner and row.partner ~= "" then
+		-- FIXME: What exactly is a noun "partner"?
+		-- Seems to be used mostly for male/female pairs etc.
+		out_stream:write('.SH PARTNER\n',
+		                 row.partner, '\n')
+	end
+
+	out_stream:write('.SH DECLENSION\n',
+	                 '.TS\n',
+	                 'allbox,tab(;);\n',
+	                 'L  LB LB LB LB LB LB\n',
+	                 'LB L  L  L  L  L  L.\n',
+	                 ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive\n')
+	if row.pl_only == 0 then
+		if row.indeclinable == 1 then
+			format_dummy_declension('Singular', accented)
+		else	                 
+			format_declension('Singular', row.decl_sg_id)
+		end
+	end
+	if row.sg_only == 0 then
+		if row.indeclinable == 1 then
+			format_dummy_declension('Plural', accented)
+		else	                 
+			format_declension('Plural', row.decl_pl_id)
+		end
+	end
+	out_stream:write('.TE\n')
+end
+
+function format.adjective(word_id, accented)
+	local cur = assert(con:execute(string.format([[
+		SELECT * FROM adjectives WHERE word_id = %d
+	]], word_id)))
+	local row = assert(cur:fetch({}, "a"))
+	cur:close()
+
+	out_stream:write('.SH WORD\n',
+	                 map_accented(accented), ' \\-\\- adjective\n')
+
+	out_stream:write('.SH DECLENSION\n',
+	                 '.TS\n',
+	                 'allbox,tab(;);\n',
+	                 'L  LB LB LB LB LB LB LB\n',
+	                 'LB L  L  L  L  L  L  L.\n',
+	                 ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive;Short\n')
+	format_declension('Male', row.decl_m_id, row.short_m)
+	format_declension('Neutral', row.decl_n_id, row.short_n)
+	format_declension('Female', row.decl_f_id, row.short_f)
+	format_declension('Plural', row.decl_pl_id, row.short_pl)
+	out_stream:write('.TE\n')
+
+	if row.comparative then
+		out_stream:write('.SH COMPARATIVE\n',
+		                 map_accented(row.comparative), '\n')
+	end
+
+	if row.superlative then
+		out_stream:write('.SH SUPERLATIVE\n',
+		                 map_accented(row.superlative), '\n')
+	end
+end
+
+function format.verb(word_id, accented)
+	local cur = assert(con:execute(string.format([[
+		SELECT * FROM verbs JOIN conjugations ON verbs.presfut_conj_id = conjugations.id
+		WHERE verbs.word_id = %d
+	]], word_id)))
+	local row = assert(cur:fetch({}, "a"))
+	cur:close()
+
+	out_stream:write('.SH WORD\n',
+	                 map_accented(accented), ' \\-\\- verb')
+	if row.aspect then out_stream:write(', ', row.aspect) end
+	out_stream:write('\n')
+
+	if row.partner and row.partner ~= "" then
+		-- NOTE: Verb partners seem to be the aspect partners
+		out_stream:write('.SH PARTNER\n',
+		                 row.partner, '\n')
+	end
+
+	-- FIXME: Can we assume that verbs without specified aspect are always
+	-- perfective?
+	out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n',
+	                 map_accented("\\[u042F] "), map_accented(row.sg1), '.\n.br\n',
+	                 map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n.br\n',
+	                 map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "),
+	                         map_accented(row.sg3), '.\n.br\n',
+	                 map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n.br\n',
+	                 map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n.br\n',
+	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n.br\n')
+
+	out_stream:write('.SH PAST\n',
+	                 map_accented("\\[u041E]\\[u043D] "), map_accented(row.past_m), '.\n.br\n',
+	                 map_accented("\\[u041E]\\[u043D]\\[u0430]' "), map_accented(row.past_f), '.\n.br\n',
+	                 map_accented("\\[u041E]\\[u043D]\\[u043E]' "), map_accented(row.past_n), '.\n.br\n',
+	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.past_pl), '.\n')
+
+	-- FIXME: Is the singular/plural distinction always obvious?
+	out_stream:write('.SH IMPERATIVE\n',
+	                 map_accented(row.imperative_sg), '! / ',
+	                 map_accented(row.imperative_pl), '!\n')
+end
+
+function format.other(word_id, accented)
+	out_stream:write('.SH WORD\n',
+	                 map_accented(accented), '\n')
+end
+
+local cur = assert(con:execute(string.format([[
+	SELECT bare_id, accented, type, words.id AS word_id
+	FROM bares JOIN words ON bares.id = bare_id
+	WHERE bare = "%s"
+]], search_word)))
+local row = cur:fetch({}, "a")
+cur:close()
+
+if not row then
+	io.stderr:write('Word "', search_word, '" not found!\n')
+else
+	local bare_id, word_id = row.bare_id, row.word_id
+	local word_type = row.type or "other"
+	-- FIXME: Some words (e.g. personal pronouns) apparently do not
+	-- come with accents!?
+	local word_accented = row.accented or search_word
+
+	-- Open stream only now, after no more messages have to be written to
+	-- stdout/stderr.
+	out_stream = io.stdout
+	--out_stream = io.popen("man /dev/stdin", "w")
+
+	out_stream:write('.\\" t\n',
+	                 '.TH "', search_word, '" "', word_type, '"\n')
+
+	--
+	-- Word-specific sections
+	--
+	format[word_type](row.word_id, word_accented)
+
+	--
+	-- Generic sections
+	--
+	-- FIXME: Print other translations if primary
+	-- language is not available
+	cur = assert(con:execute(string.format([[
+		SELECT tl FROM translations
+		WHERE word_id = %d AND lang = "%s"
+	]], word_id, lang)))
+	row = cur:fetch({}, "a")
+	if row then
+		out_stream:write('.SH TRANSLATION\n')
+
+		repeat
+			out_stream:write(row.tl)
+			row = cur:fetch({}, "a")
+			if row then out_stream:write(', ') end
+		until not row
+
+		out_stream:write('\n')
+	end
+	cur:close()
+
+	--
+	-- NOTE: There can be many exampes, so print them last.
+	--
+	cur = assert(con:execute(string.format([[
+		SELECT ru, start, length, tl
+		FROM sentences_words JOIN sentences ON sentence_id = sentences.id
+		WHERE bare_id = %d AND lang = "%s"
+	]], bare_id, lang)))
+	row = cur:fetch({}, "a")
+	if row then
+		out_stream:write('.SH EXAMPLES\n')
+
+		repeat
+			-- FIXME: Highlight search word in sentences.
+			-- start/length are apparently in characters
+			-- instead of bytes.
+			--[[
+			local ru_hl = row.ru:sub(1, row.start)..'\\fI'..
+			              row.ru:sub(row.start+1, row.start+1+row.length)..'\\fP'..
+			              row.ru:sub(row.start+1+row.length+1)
+			]]
+			out_stream:write('.TP\n',
+			                 map_accented(row.ru), '\n',
+			                 row.tl, '\n')
+			row = cur:fetch({}, "a")
+		until not row
+
+	end
+	cur:close()
+end
+
+con:close()
+env:close()
+
+if out_stream then out_stream:close() end
diff --git a/update-sqlite3 b/update-sqlite3
new file mode 100755
index 0000000..da86baf
--- /dev/null
+++ b/update-sqlite3
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Downloads latest OpenRussian MySQL database dump and convert
+# it to Sqlite3.
+#
+# NOTE: unzip cannot read from stdin yet. bsdtar could in principle,
+# but does not allow extracting to stdout.
+
+TMPFILE=`mktemp`
+
+wget -O $TMPFILE https://en.openrussian.org/downloads/openrussian-sql.zip
+unzip -p $TMPFILE openrussian.sql | \
+~/working-copies/mysql2sqlite/mysql2sqlite - | \
+sqlite3 openrussian-sqlite3.db
+
+rm $TMPFILE
author	Robin Haberkorn <robin.haberkorn@googlemail.com>	2019-02-02 18:20:10 +0300
committer	Robin Haberkorn <robin.haberkorn@googlemail.com>	2019-02-02 18:20:10 +0300
commit	56e9becdd43373c34769336077569ba5a8bafd05 (patch)
tree	47574672bc3d397aad52a3a891ee354a7448949f
download	openrussian-cli-56e9becdd43373c34769336077569ba5a8bafd05.tar.gz