openrussian.lua


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707

#!/usr/bin/lua5.2

local driver = require "luasql.sqlite3"
local lutf8 = require "lua-utf8"

local ACCENT = lutf8.char(0x0301) -- Accent combining character

local lang = os.setlocale(nil, "ctype"):match("^([^_]+)")

local search_words = {}

local function usage(stream)
	stream:write("Usage: ", arg[0], " [-L<lang>] [-V] [-p] <pattern...>\n",
	             "\t-L<lang>    Set language to <lang> (currently en or de, guessed from locale)\n",
	             "\t-V          Verbatim matching (no case folding and inflections)\n",
	             "\t-p          Print Troff code to stdout\n")
end

for i = 1, #arg do
	if arg[i]:sub(1, 1) == "-" then
		local opt = arg[i]:sub(2)

		if opt:sub(1, 1) == "L" then
			if #opt > 1 then
				lang = opt:sub(2)
			elseif i == #arg then
				usage(io.stderr)
				os.exit(false)
			else
				lang = arg[i+1]
				i = i + 1
			end
		elseif opt == "V" then
			verbatim = true
		elseif opt == "p" then
			use_stdout = true
		elseif opt == "C" then
			-- This is a "secret" command used for implementing
			-- auto-completions.
			-- It will usually be the first argument.
			auto_complete = true
		else
			usage(io.stderr)
			os.exit(false)
		end
	else
		table.insert(search_words, arg[i])
	end
end

if #search_words == 0 then
	usage(io.stderr);
	os.exit(false)
end

-- Allowing multiple arguments to be concat into the search words
-- is useful when searching for a translation which may contain
-- spaces without quoting the entire search term.
local search_word = table.concat(search_words, " ")..
                    (auto_complete and "*" or "")

-- FIXME: Currently only English and German are actually
-- contained in the database, but this might change.
-- Perhaps query the availability dynamically.
if lang ~= "en" and lang ~= "de" then lang = "en" end

local function dirname(path)
	return path:match("^(.*)/.+$") or "."
end

-- Calculate the installation prefix at runtime, in order to locate
-- the installed data base.
-- This way, we don't have to preprocess the script during installation
local PREFIX = dirname(arg[0]).."/.."

local database = PREFIX.."/share/openrussian/openrussian-sqlite3.db"
if not io.open(database) then database = "openrussian-sqlite3.db" end

local out_stream

local env = assert(driver.sqlite3())
local con = assert(env:connect(database))

-- A SQL-compatible globber.
-- Necessary since globbing is usually done as part of the
-- SQL query.
--
-- NOTE: This may be reimplemented more efficiently by translating
-- the glob pattern to a Lua pattern.
-- Unfortunately, the Glob pattern syntax appears to be undefined,
-- probably because it defaults to the system glob.
--
-- Alternatively, we might override the MATCH function with Lua patterns
-- and use MATCH instead of GLOB, but this might be inefficient.
-- In order to make use of the query optimizer, we must either use
-- LIKE or GLOB.
--
-- Yet another alternative might be to parse all translations into
-- a separate index, speeding up translation lookups and avoiding
-- the need for globbing in Lua here.
function glob(pattern, str)
	local cur = assert(con:execute(string.format([[
		SELECT '%s' GLOB '%s'
	]], con:escape(str), con:escape(pattern))))
	local row = assert(cur:fetch())
	cur:close()

	return row ~= 0
end

-- Turns a character followed by apostroph into a combined
-- accented character.
-- NOTE: This encodes the accent (u0301) in bytes, so it can be
-- used for printing to stdout or into Troff code.
local function map_accented(str)
	return (lutf8.gsub(str, "'", ACCENT))
end
-- FIXME: map_accented() does not work for tables since tbl will count the
-- combined character as two. Theoretically, Groff has composite characters
-- like \u[u043E_0301] but they don't work for all the cyrillic
-- vocals.
-- If we really wanted to, we could replace every accented character
-- with an inline macro that is defined at Troff runtime depending on the
-- output device, so we could get accented characters in PDF tables at least.
local function map_tbl(str)
	return (lutf8.gsub(str, "(.)'", "\\fI%1\\fP"))
end

-- FIXME: Apparently, there are entries without declension or empty declension
-- entries, e.g. kosha4ij.
-- These should be detected and the entire section should be omitted.
local function format_declension(tag, decl_id, short_form)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM declensions WHERE id = %d
	]], decl_id)))
	local row = assert(cur:fetch({}, "a"))
	cur:close()

	out_stream:write(tag, ';', map_tbl(row.nom or "-"), ';',
	                 map_tbl(row.gen or "-"), ';',
	                 map_tbl(row.dat or "-"), ';',
	                 map_tbl(row.acc or "-"), ';',
	                 map_tbl(row.inst or "-"), ';',
	                 map_tbl(row.prep or "-"))
	if short_form then out_stream:write(';', map_tbl(short_form)) end
	out_stream:write('\n')
end

local function format_dummy_declension(tag, accented)
	accented = map_tbl(accented)
	out_stream:write(tag)
	for _ = 1, 6 do out_stream:write(';', accented) end
	out_stream:write('\n')
end

local format = {} -- formatter functions by word category

function format.noun(word_id, accented)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM nouns WHERE word_id = %d
	]], word_id)))
	local row = cur:fetch({}, "a")
	cur:close()

	-- NOTE: This can probably happen as with any other word category
	-- (example?)
	if not row then return end

	out_stream:write('.SH GENDER\n')
	if row.gender and row.gender ~= "" then
		local genders = {m = "male", f = "female", n = "neuter"}
		out_stream:write(genders[row.gender], ', ')
	end
	out_stream:write(row.animate == 1 and 'animate' or 'inanimate', '\n')

	if row.partner and row.partner ~= "" then
		-- NOTE: Noun "partners" seem to be male/female counterparts.
		-- FIXME: It would also be nice to include an accented version,
		-- but since the DB lists the partner as a string instead of
		-- word_id, finding the right entry could be unreliable 
		out_stream:write('.SH PARTNER\n',
		                 row.partner, '\n')
	end

	-- FIXME: Rotate this table (only two columns: singular and plural).
	-- Lines should be short.
	out_stream:write('.SH DECLENSION\n',
	                 '.TS\n',
	                 'allbox,tab(;);\n',
	                 'L  LB LB LB LB LB LB\n',
	                 'LB L  L  L  L  L  L.\n',
	                 ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive\n')
	if row.pl_only == 0 then
		if row.indeclinable == 1 then
			format_dummy_declension('Singular', accented)
		else	                 
			format_declension('Singular', row.decl_sg_id)
		end
	end
	if row.sg_only == 0 then
		if row.indeclinable == 1 then
			format_dummy_declension('Plural', accented)
		else	                 
			format_declension('Plural', row.decl_pl_id)
		end
	end
	out_stream:write('.TE\n')
end

function format.adjective(word_id, accented)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM adjectives WHERE word_id = %d
	]], word_id)))
	local row = cur:fetch({}, "a")
	cur:close()

	-- NOTE: Seldomly (e.g. nesomnenno), there is no entry in adjectives
	if not row then return end

	--out_stream:write('.SH CATEGORY\n',
	--                 'adjective\n')

	-- FIXME: Rotate this table (columns will be gender+plural).
	-- Lines should be short.
	-- FIXME: Short form not always present
	out_stream:write('.SH DECLENSION\n',
	                 '.TS\n',
	                 'allbox,tab(;);\n',
	                 'L  LB LB LB LB LB LB LB\n',
	                 'LB L  L  L  L  L  L  L.\n',
	                 ';Nominative;Genitive;Dative;Accusative;Instrumental;Prepositive;Short\n')
	format_declension('Male', row.decl_m_id, row.short_m)
	format_declension('Neutral', row.decl_n_id, row.short_n)
	format_declension('Female', row.decl_f_id, row.short_f)
	format_declension('Plural', row.decl_pl_id, row.short_pl)
	out_stream:write('.TE\n')

	if row.comparative and row.comparative ~= "" then
		out_stream:write('.SH COMPARATIVE\n',
		                 map_accented(row.comparative), '\n')
	end

	if row.superlative and row.superlative ~= "" then
		out_stream:write('.SH SUPERLATIVE\n',
		                 map_accented(row.superlative), '\n')
	end
end

-- NOTE: There is no separate table for adverbs
-- Currently, we wouldn't print more than the category, which is also in the
-- header, so it is omitted.
function format.adverb(word_id, accented)
	--out_stream:write('.SH CATEGORY\n',
	--                 'adverb\n')
end

function format.verb(word_id, accented)
	local cur = assert(con:execute(string.format([[
		SELECT * FROM verbs JOIN conjugations ON verbs.presfut_conj_id = conjugations.id
		WHERE verbs.word_id = %d
	]], word_id)))
	local row = cur:fetch({}, "a")
	cur:close()

	-- NOTE: Seldomly (e.g. est' -- to be), there is no entry in verbs
	if not row then return end

	if row.aspect then
		out_stream:write('.SH ASPECT\n',
		                 row.aspect, '\n')
	end

	if row.partner and row.partner ~= "" then
		-- NOTE: Verb partners seem to be the aspect partners.
		-- They are either comma or semicolon separated.
		-- FIXME: It would also be nice to include an accented version,
		-- but since the DB lists the partner as a string instead of
		-- word_id, finding the right entry could be unreliable 
		out_stream:write('.SH PARTNER\n',
		                 lutf8.gsub(row.partner, "[;,]", ", "), '\n')
	end

	-- FIXME: Conjugation sometimes empty (e.g. widat')
	-- FIXME: Can we assume that verbs without specified aspect are always
	-- perfective?
	out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n',
	                 map_accented("\\[u042F] "), map_accented(row.sg1), '.\n.br\n',
	                 map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "),
	                         map_accented(row.sg3), '.\n.br\n',
	                 map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n.br\n',
	                 map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n.br\n')

	out_stream:write('.SH PAST\n',
	                 map_accented("\\[u041E]\\[u043D] "), map_accented(row.past_m), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u0430]' "), map_accented(row.past_f), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u043E]' "), map_accented(row.past_n), '.\n.br\n',
	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.past_pl), '.\n')

	-- FIXME: Is the singular/plural distinction always obvious?
	out_stream:write('.SH IMPERATIVE\n',
	                 map_accented(row.imperative_sg), '! / ',
	                 map_accented(row.imperative_pl), '!\n')
end

function format.other(word_id, accented)
	--out_stream:write('.SH CATEGORY\n',
	--                 'other\n')
end

local function get_translations(word_id)
	local ret = {}

	-- FIXME: Fetch other translations if primary
	-- language is not available
	local cur = assert(con:execute(string.format([[
		SELECT tl FROM translations
		WHERE word_id = %d AND lang = '%s'
	]], word_id, con:escape(lang))))
	local row = cur:fetch({}, "a")
	while row do
		-- NOTE: One entry might contain many comma-separated
		-- translations
		for word in lutf8.gmatch(row.tl..", ", "(.-), ") do
			table.insert(ret, word)
		end
		row = cur:fetch({}, "a")
	end
	cur:close()

	return ret
end

-- Format reference to row from the words-table.
-- FIXME: Not printed bold since bold text and accents
-- don't work together (URxvt).
local function get_reference(word_row)
	return map_accented(word_row.accented or word_row.bare)..
	       '('..(word_row.type or "other")..')'
end

-- NOTE: This strips the accent char, so users can cut and paste from
-- generated output.
-- This is done from Lua, since the right-hand side of GLOB should be a constant
-- to allow optimizations:
-- https://www.sqlite.org/optoverview.html#the_like_optimization
--
-- TODO: Double-check whether the GLOB is actually optimized.
-- Theoretically, we need COLLATE BINARY for that.
--
-- FIXME: Case-folding UTF8 / Collating is not supported by SQLite3.
-- If we want to support case-insensitive matching, it is mandatory, though.
-- Could be done using the ICU extension:
-- https://www.sqlite.org/src/artifact?ci=trunk&filename=ext/icu/README.txt
local cur = assert(con:execute(string.format([[
	SELECT bare AS completions, * FROM words
	WHERE LIKELY(disabled = 0) AND bare GLOB '%s'
	ORDER BY rank
]], con:escape(lutf8.gsub(search_word, ACCENT, "")))))

local rows = {}
local row
repeat
	row = cur:fetch({}, "a")
	table.insert(rows, row)
until not row

cur:close()

if not verbatim then
	--[==[
	-- FIXME: These queries are tooo sloooow! Perhaps that's why the openrussion.org
	-- website does not allow searching by declension prefixes.
	-- This is because of the need for string-concatenations for every possible word
	-- and because the GLOBbing cannot be optimized, even in the most common cases.
	-- FIXME: This does not find braced-terms. Glob patterns are simply not powerful
	-- enough to express "optional brace".
	-- We'd probably need regexp for that.
	cur = assert(con:execute(string.format([[
		SELECT REPLACE(temp, "'", "") AS completions, words.* FROM words JOIN (
			-- Search word might be a noun or adjective declension
			SELECT nom||","||gen||","||dat||","||acc||","||inst||","||prep AS temp, word_id
			FROM declensions
			UNION
			-- Search word might be a special adjective inflection
			SELECT comparative||","||superlative||","||
			       short_m||","||short_f||","||short_n||","||short_pl AS temp, word_id
			FROM adjectives
			UNION
			-- Search word might be a verb imperative, past form or conjugation
			SELECT imperative_sg||","||imperative_pl||","||past_m||","||past_f||","||past_n||","||past_pl||
			       sg1||","||sg2||","||sg3||","||pl1||","||pl2||","||pl3 AS temp, verbs.word_id
			FROM verbs LEFT JOIN conjugations ON presfut_conj_id = conjugations.id
		) ON words.id = word_id
		WHERE LIKELY(disabled = 0) AND ","||completions||"," GLOB '*,%s,*'
		ORDER BY rank
	]], con:escape(lutf8.gsub(search_word, ACCENT, "")))))

	-- This is an alternative to the above query.
	-- It eliminates the concatenations, but has to iterate many tables redundantly.
	-- Effectively it is twice as slow as the above query...
	cur = assert(con:execute(string.format([[
		SELECT REPLACE(temp, "'", "") AS completions, words.* FROM words JOIN (
			-- Search word might be a noun or adjective declension
			SELECT nom AS temp, word_id FROM declensions
			UNION ALL
			SELECT gen AS temp, word_id FROM declensions
			UNION ALL
			SELECT dat AS temp, word_id FROM declensions
			UNION ALL
			SELECT acc AS temp, word_id FROM declensions
			UNION ALL
			SELECT inst AS temp, word_id FROM declensions
			UNION ALL
			SELECT prep AS temp, word_id FROM declensions
			UNION ALL
			-- Search word might be a special adjective inflection
			SELECT comparative AS temp, word_id FROM adjectives
			UNION ALL
			SELECT superlative AS temp, word_id FROM adjectives
			UNION ALL
			SELECT short_m AS temp, word_id FROM adjectives
			UNION ALL
			SELECT short_f AS temp, word_id FROM adjectives
			UNION ALL
			SELECT short_n AS temp, word_id FROM adjectives
			UNION ALL
			SELECT short_pl AS temp, word_id FROM adjectives
			UNION ALL
			-- Search word might be a verb imperative or past form
			SELECT imperative_sg AS temp, word_id FROM verbs
			UNION ALL
			SELECT imperative_pl AS temp, word_id FROM verbs
			UNION ALL
			SELECT past_m AS temp, word_id FROM verbs
			UNION ALL
			SELECT past_f AS temp, word_id FROM verbs
			UNION ALL
			SELECT past_n AS temp, word_id FROM verbs
			UNION ALL
			SELECT past_pl AS temp, word_id FROM verbs
			UNION ALL
			-- Search word might be a verb conjugation
			SELECT sg1 AS temp, word_id FROM conjugations
			UNION ALL
			SELECT sg2 AS temp, word_id FROM conjugations
			UNION ALL
			SELECT sg3 AS temp, word_id FROM conjugations
			UNION ALL
			SELECT pl1 AS temp, word_id FROM conjugations
			UNION ALL
			SELECT pl2 AS temp, word_id FROM conjugations
			UNION ALL
			SELECT pl3 AS temp, word_id FROM conjugations
		) ON words.id = word_id
		WHERE LIKELY(disabled = 0) AND completions GLOB '%s'
		ORDER BY rank
	]], con:escape(lutf8.gsub(search_word, ACCENT, "")))))
	]==]

	-- This query uses a new `bare_inflections` table, since all queries
	-- using existing tables (see above) are way too slow, especially for
	-- autocompletions.
	-- NOTE: The right-hand side of GLOB must be a constant, so that it can be
	-- optimized using the index.
	cur = assert(con:execute(string.format([[
		SELECT bare_inflections.bare AS completions, words.*
		FROM words JOIN bare_inflections ON words.id = word_id
		WHERE LIKELY(disabled = 0) AND completions GLOB '%s'
		ORDER BY rank
	]], con:escape(lutf8.gsub(search_word, ACCENT, "")))))

	repeat
		row = cur:fetch({}, "a")
		table.insert(rows, row)
	until not row

	cur:close()
end

-- Only if we do not find a Russian word, we try to find a translation.
-- This is not wrapped with the above query into one using a LEFT JOIN since
-- two queries are significantly faster - probably because of having to perform less
-- string concatenations.
if #rows == 0 then
	-- NOTE: The translation entry frequently contains a comma-separated
	-- list of translations
	--
	-- FIXME: Case folding only works for ASCII, which should be sufficient for
	-- German/English text (almost)...
	-- FIXME: The string concatenation is a real slow-down and the GLOB cannot
	-- be optimized.
	-- Perhaps the translations should be in their own (new) indexed table.
	cur = assert(con:execute(string.format([[
		SELECT %s(", "||tl||", ") AS completions, words.*
		FROM words JOIN translations ON words.id = word_id
		WHERE LIKELY(disabled = 0) AND lang = '%s' AND completions GLOB %s('*, %s, *')
		ORDER BY rank
	]], verbatim and "" or "LOWER", con:escape(lang), verbatim and "" or "LOWER", con:escape(search_word))))

	repeat
		row = cur:fetch({}, "a")
		table.insert(rows, row)
	until not row

	cur:close()
end

if auto_complete then
	-- FIXME: See above for notes on case-folding
	local search_word_bare = lutf8.gsub(search_word, ACCENT, "")
	search_word_bare = verbatim and search_word_bare or search_word_bare:lower()

	for _, row in ipairs(rows) do
		-- NOTE: This code is reused for Russian base words, inflections and translated lookups,
		-- so there is a common `completions` column.
		-- Russian words can be treated like single-word translations.
		-- Terms in this column can be comma-separated with and without spaces and
		-- there may be braces.
		for word in lutf8.gmatch(row.completions..",", " *%(?(.-)%)?,") do
			if glob(search_word, word) then
				io.stdout:write(search_words[#search_words],
				                lutf8.sub(word, lutf8.len(search_word_bare)), "\n")
			end
		end
	end

	os.exit(true)
end

if #rows == 0 then
	io.stderr:write('Word "', search_word, '" not found!\n')
	os.exit(false)
end

-- Filter out duplicates
local word_ids = {}
local unique_rows = {}

for _, row in ipairs(rows) do
	if not word_ids[row.id] then
		table.insert(unique_rows, row)
		word_ids[row.id] = true
	end
end

if #unique_rows == 1 then
	row = unique_rows[1]
else
	for i, row in ipairs(unique_rows) do
		local word_accented = row.accented or row.bare
		local tl = get_translations(row.id)

		io.stdout:write(i, ") ", map_accented(word_accented))
		if #tl > 0 then io.stdout:write(" (", table.concat(tl, ", "), ")") end
		io.stdout:write("\n")
	end

	repeat
		io.stdout:write("Show [1..", #unique_rows, ", press enter to cancel]? "):flush()
		local choice = io.stdin:read():lower()
		if choice == "" or choice == "q" then os.exit() end
		row = unique_rows[tonumber(choice)]
	until row
end

local word_id = row.id
-- NOTE: Some words (e.g. personal pronouns) apparently do not
-- come with accents!?
local word_accented = row.accented or row.bare
local word_derived_from = row.derived_from_word_id
local word_audio = row.audio
local word_usage = row["usage_"..lang]
local word_type = row.type or "other"

-- Open stream only now, after no more messages have to be written to
-- stdout/stderr.
out_stream = assert(use_stdout and io.stdout or io.popen("man /dev/stdin", "w"))

-- NOTE: The headers and footers shouldn't contain critical information
-- since they might not be printed at all.
out_stream:write('.\\" t\n',
                 '.TH "', row.bare, '" "', word_type, '" "')
if row.rank then
	out_stream:write('#', row.rank, row.level and ' ('..row.level..')' or '')
else
	out_stream:write(row.level)
end
out_stream:write('" "openrussian.lua" "openrussian.org"\n')

--
-- Generic WORD section with translation.
--
out_stream:write('.SH WORD\n',
                 map_accented(word_accented))
local tl = get_translations(word_id)
if #tl > 0 then
	out_stream:write(' \\-\\- ', table.concat(tl, ', '))
end
out_stream:write('\n')

--
-- Word-specific sections
-- NOTE: word_accented is required only for format.noun() and could be
-- avoided altogether.
--
format[word_type](word_id, word_accented)

--
-- Generic sections
--
if word_usage then
	out_stream:write('.SH USAGE\n',
	                 word_usage, '\n')
end

-- FIXME: Perhaps this should rather be part of the SEE ALSO section
if word_derived_from then
	cur = assert(con:execute(string.format([[
		SELECT bare, accented, type FROM words
		WHERE LIKELY(disabled = 0) AND id = %d
	]], word_derived_from)))
	row = assert(cur:fetch({}, "a"))
	cur:close()

	out_stream:write('.SH DERIVED FROM\n',
	                 get_reference(row), '\n')
end

--
-- NOTE: There can be many examples, so print them late.
--
cur = assert(con:execute(string.format([[
	SELECT ru, start, length, tl
	FROM sentences_words JOIN sentences ON sentence_id = sentences.id
	WHERE word_id = %d AND lang = '%s'
]], word_id, con:escape(lang))))
row = cur:fetch({}, "a")
if row then
	out_stream:write('.SH EXAMPLES\n')

	repeat
		-- FIXME: The accent is not always available in the default
		-- italic font when formatting for PDF.
		local ru_hl = lutf8.sub(row.ru, 1, row.start)..'\\fI'..
		              lutf8.sub(row.ru, row.start+1, row.start+1+row.length-1)..'\\fP'..
		              lutf8.sub(row.ru, row.start+1+row.length)

		out_stream:write('.TP\n',
		                 map_accented(ru_hl), '\n',
		                 row.tl, '\n')
		row = cur:fetch({}, "a")
	until not row
end
cur:close()

-- Audio recordings might be useful occasionally, but this is an offline/terminal
-- application, so it makes sense to print them last (like URLs in manpages).
--
-- NOTE: There is an UE man-macro, but it doesn't seem to be very helpful here and
-- seems to bring no advantages when formatting as a PDF.
-- It could be typset in the default fixed-width font (\fC), but it does not contain
-- cyrillic characters, so we don't do that either.
if word_audio then
	out_stream:write('.SH AUDIO\n',
	                 word_audio, '\n')
end

-- Disable adjusting (space-stretching) for the related-word lists.
-- Don't forget to enable this again if something follows these sections.
out_stream:write('.na\n')

-- NOTE: The results are grouped by relation, so that they can be
-- easily printed in one section per relation.
-- Unfortunately, we cannot define custom collating sequences with LuaSQL.
-- FIXME: Print this under a single SEE ALSO master section?
-- FIXME: Results should perhaps be ordered by `type`?
cur = assert(con:execute(string.format([[
	SELECT bare, accented, type, relation
	FROM words_rels JOIN words ON rel_word_id = words.id
	WHERE LIKELY(disabled = 0) AND words_rels.word_id = %d
	ORDER BY relation, rank
]], word_id)))

local cur_relation
row = cur:fetch({}, "a")
while row do
	if cur_relation ~= row.relation then
		cur_relation = row.relation
		out_stream:write('.SH ', cur_relation:upper(), '\n')
	end
	out_stream:write(get_reference(row))
	row = cur:fetch({}, "a")
	out_stream:write(row and row.relation == cur_relation and ', ' or '\n')
end

cur:close()

--
-- Cleanup
-- NOTE: Not strictly necessary, as everything is garbage-collected anyway
--
con:close()
env:close()

if out_stream then out_stream:close() end