#!/usr/bin/lua5.2

local driver = require "luasql.sqlite3"

local function usage(stream)
	stream:write("Usage: ", arg[0], " [-p] <word>\n")
end

for i = 1, #arg do
	if arg[i]:sub(1, 1) == "-" then
		if arg[i]:sub(2) == "p" then
			use_stdout = true
		else
			usage(io.stderr)
			os.exit(false)
		end
	else
		search_word = arg[i]
	end
end

if not search_word then
	usage(io.stderr);
	os.exit(false)
end

local function dirname(path)
	return path:match("^(.*)/.+$") or "."
end

-- Calculate the installation prefix at runtime, in order to locate
-- the installed data base.
-- This way, we don't have to preprocess the script during installation
local PREFIX = dirname(arg[0]).."/.."

local database = PREFIX.."/share/openrussian/openrussian-sqlite3.db"
if not io.open(database) then database = "openrussian-sqlite3.db" end

local out_stream
local lang = "en"

local env = assert(driver.sqlite3())
local con = assert(env:connect(database))

-- Turns a character followed by apostroph into a combined
-- accented character.
-- NOTE: This encodes the accent (u0301) in bytes, so it can be
-- used for printing to stdout or into Troff code.
local function map_accented(str)
	return (str:gsub("'", "\xCC\x81"))
end
-- FIXME: This does not work for tables since tbl will count the
-- combined character as two. Theoretically, Groff has composite characters
-- like \u[u043E_0301] but they don't work for all the cyrillic
-- vocals.
local function map_tbl(str)
	return (str:gsub("(..)'", "\\fI%1\\fP"))
end

local function format_declension(tag, decl_id, short_form)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM declensions WHERE id = %d
	]], decl_id)))
	local row = assert(cur:fetch({}, "a"))
	cur:close()
	out_stream:write(tag, ';', map_tbl(row.nom), ';', map_tbl(row.gen), ';',
	                 map_tbl(row.dat), ';', map_tbl(row.acc), ';',
	                 map_tbl(row.inst), ';', map_tbl(row.prep))
	if short_form then out_stream:write(';', map_tbl(short_form)) end
	out_stream:write('\n')
end

local function format_dummy_declension(tag, accented)
	accented = map_tbl(accented)
	out_stream:write(tag)
	for _ = 1, 6 do out_stream:write(';', accented) end
	out_stream:write('\n')
end

local format = {} -- formatter functions by word category

function format.noun(word_id, accented)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM nouns WHERE word_id = %d
	]], word_id)))
	local row = assert(cur:fetch({}, "a"))
	cur:close()

	out_stream:write('.SH WORD\n',
	                 map_accented(accented), ' \\-\\- noun, ')
	if row.gender and row.gender ~= "" then
		local genders = {m = "male", f = "female", n = "neuter"}
		out_stream:write(genders[row.gender], ', ')
	end
	out_stream:write(row.animate == 1 and 'animate' or 'inanimate', '\n')

	if row.partner and row.partner ~= "" then
		-- FIXME: What exactly is a noun "partner"?
		-- Seems to be used mostly for male/female pairs etc.
		out_stream:write('.SH PARTNER\n',
		                 row.partner, '\n')
	end

	out_stream:write('.SH DECLENSION\n',
	                 '.TS\n',
	                 'allbox,tab(;);\n',
	                 'L  LB LB LB LB LB LB\n',
	                 'LB L  L  L  L  L  L.\n',
	                 ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive\n')
	if row.pl_only == 0 then
		if row.indeclinable == 1 then
			format_dummy_declension('Singular', accented)
		else	                 
			format_declension('Singular', row.decl_sg_id)
		end
	end
	if row.sg_only == 0 then
		if row.indeclinable == 1 then
			format_dummy_declension('Plural', accented)
		else	                 
			format_declension('Plural', row.decl_pl_id)
		end
	end
	out_stream:write('.TE\n')
end

function format.adjective(word_id, accented)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM adjectives WHERE word_id = %d
	]], word_id)))
	local row = assert(cur:fetch({}, "a"))
	cur:close()

	out_stream:write('.SH WORD\n',
	                 map_accented(accented), ' \\-\\- adjective\n')

	out_stream:write('.SH DECLENSION\n',
	                 '.TS\n',
	                 'allbox,tab(;);\n',
	                 'L  LB LB LB LB LB LB LB\n',
	                 'LB L  L  L  L  L  L  L.\n',
	                 ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive;Short\n')
	format_declension('Male', row.decl_m_id, row.short_m)
	format_declension('Neutral', row.decl_n_id, row.short_n)
	format_declension('Female', row.decl_f_id, row.short_f)
	format_declension('Plural', row.decl_pl_id, row.short_pl)
	out_stream:write('.TE\n')

	if row.comparative then
		out_stream:write('.SH COMPARATIVE\n',
		                 map_accented(row.comparative), '\n')
	end

	if row.superlative then
		out_stream:write('.SH SUPERLATIVE\n',
		                 map_accented(row.superlative), '\n')
	end
end

function format.verb(word_id, accented)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM verbs JOIN conjugations ON verbs.presfut_conj_id = conjugations.id
		WHERE verbs.word_id = %d
	]], word_id)))
	local row = assert(cur:fetch({}, "a"))
	cur:close()

	out_stream:write('.SH WORD\n',
	                 map_accented(accented), ' \\-\\- verb')
	if row.aspect then out_stream:write(', ', row.aspect) end
	out_stream:write('\n')

	if row.partner and row.partner ~= "" then
		-- NOTE: Verb partners seem to be the aspect partners
		out_stream:write('.SH PARTNER\n',
		                 row.partner, '\n')
	end

	-- FIXME: Can we assume that verbs without specified aspect are always
	-- perfective?
	out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n',
	                 map_accented("\\[u042F] "), map_accented(row.sg1), '.\n.br\n',
	                 map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "),
	                         map_accented(row.sg3), '.\n.br\n',
	                 map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n.br\n',
	                 map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n.br\n')

	out_stream:write('.SH PAST\n',
	                 map_accented("\\[u041E]\\[u043D] "), map_accented(row.past_m), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u0430]' "), map_accented(row.past_f), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u043E]' "), map_accented(row.past_n), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.past_pl), '.\n')

	-- FIXME: Is the singular/plural distinction always obvious?
	out_stream:write('.SH IMPERATIVE\n',
	                 map_accented(row.imperative_sg), '! / ',
	                 map_accented(row.imperative_pl), '!\n')
end

function format.other(word_id, accented)
	out_stream:write('.SH WORD\n',
	                 map_accented(accented), '\n')
end

function get_translations(word_id)
	local ret = {}

	-- FIXME: Fetch other translations if primary
	-- language is not available
	local cur = assert(con:execute(string.format([[
		SELECT tl FROM translations
		WHERE word_id = %d AND lang = "%s"
	]], word_id, lang)))
	local row = cur:fetch({}, "a")
	while row do
		table.insert(ret, row.tl)
		row = cur:fetch({}, "a")
	end
	cur:close()

	return ret
end

-- NOTE: This lets SQL strip the accent char from the input, which
-- allows users to cut and paste from generated output while we don't
-- have to deal with Unicode in Lua.
local cur = assert(con:execute(string.format([[
	SELECT bare, accented, type, words.id AS word_id
	FROM words WHERE bare = REPLACE("%s", CHAR(0x0301), "")
]], search_word)))

local rows = {}
local row
repeat
	row = cur:fetch({}, "a")
	table.insert(rows, row)
until not row

cur:close()

if #rows == 0 then
	io.stderr:write('Word "', search_word, '" not found!\n')
	os.exit(false)
end

if #rows == 1 then
	row = rows[1]
else
	for i, row in ipairs(rows) do
		local word_accented = row.accented or row.bare
		local tl = get_translations(row.word_id)

		io.stdout:write(i, ") ", map_accented(word_accented))
		if #tl > 0 then io.stdout:write(" (", table.concat(tl, ", "), ")") end
		io.stdout:write("\n")
	end

	repeat
		io.stdout:write("Show [1..", #rows, ", press enter to cancel]? "):flush()
		local choice = io.stdin:read():lower()
		if choice == "" or choice == "q" then os.exit() end
		row = rows[tonumber(choice)]
	until row
end

local word_id = row.word_id
local word_type = row.type or "other"
-- NOTE: Some words (e.g. personal pronouns) apparently do not
-- come with accents!?
local word_accented = row.accented or row.bare

-- Open stream only now, after no more messages have to be written to
-- stdout/stderr.
out_stream = assert(use_stdout and io.stdout or io.popen("man /dev/stdin", "w"))

out_stream:write('.\\" t\n',
                 '.TH "', search_word, '" "', word_type, '"\n')

--
-- Word-specific sections
--
format[word_type](word_id, word_accented)

--
-- Generic sections
--
local tl = get_translations(word_id)
if #tl > 0 then
	out_stream:write('.SH TRANSLATION\n',
	                 table.concat(tl, ', '), '\n')
end

--
-- NOTE: There can be many examples, so print them last.
--
cur = assert(con:execute(string.format([[
	SELECT ru, start, length, tl
	FROM sentences_words JOIN sentences ON sentence_id = sentences.id
	WHERE word_id = %d AND lang = "%s"
]], word_id, lang)))
row = cur:fetch({}, "a")
if row then
	out_stream:write('.SH EXAMPLES\n')

	repeat
		-- FIXME: Highlight search word in sentences.
		-- start/length are apparently in characters
		-- instead of bytes.
		--[[
		local ru_hl = row.ru:sub(1, row.start)..'\\fI'..
		              row.ru:sub(row.start+1, row.start+1+row.length)..'\\fP'..
		              row.ru:sub(row.start+1+row.length+1)
		]]
		out_stream:write('.TP\n',
		                 map_accented(row.ru), '\n',
		                 row.tl, '\n')
		row = cur:fetch({}, "a")
	until not row

end
cur:close()

--
-- Cleanup
-- NOTE: Not strictly necessary, as everything is garbage-collected anyway
--
con:close()
env:close()

if out_stream then out_stream:close() end