make check: test page generation for all words; various fixes

* using the new `make check` target various bugs have been discovered. All of them were related to missing database fields.
author: Robin Haberkorn <robin.haberkorn@googlemail.com> 2019-04-19 02:21:54 +0300
committer: Robin Haberkorn <robin.haberkorn@googlemail.com> 2019-04-19 02:21:54 +0300
commit: 54a5a4267090bb80d3337f1aa7419c8ba49c30b7 (patch)
tree: 0bf93b0b4225664adc421b2c8b27171f00cc3a76
parent: 57e82811588e37b247ece678f503655bd06aa090 (diff)
download: openrussian-cli-54a5a4267090bb80d3337f1aa7419c8ba49c30b7.tar.gz
3 files changed, 73 insertions, 51 deletions
diff --git a/Makefile b/Makefile
index a6b777f..55f7bf2 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,14 @@ openrussian-sqlite3.db : openrussian-sql.zip mysql2sqlite postprocess.sql
 	unzip -p $< openrussian.sql | ./mysql2sqlite - | sqlite3 $@
 	sqlite3 $@ -batch <postprocess.sql
 
+# Try to generate all possible pages
+check : openrussian-sqlite3.db openrussian
+	sqlite3 $< 'SELECT bare FROM words WHERE LIKELY(disabled = 0)' | \
+	while read -r bare; do \
+		./openrussian -V -p "$$bare" </dev/null >/dev/null || \
+		echo "Error generating \"$$bare\"" >/dev/stderr; \
+	done
+
 # NOTE: Installation of the Bash completions depends on the Debain bash-completion
 # package being installed or something similar
 install : openrussian openrussian-sqlite3.db openrussian-completion.bash
diff --git a/README.md b/README.md
index f7670db..91249b7 100644
--- a/README.md
+++ b/README.md
@@ -53,11 +53,15 @@ Building is straight forward:
 If you want to redownload the latest [openrussian.org](https://en.openrussian.org/)
 database:
 
-    make clean all
+    make clean all check
 
 **Warning:** While the database content might be newer, the database schema
 might also at any time become incompatible with the existing script.
-But you are of course welcome to contribute fixes/updates. :-)
+That is why a `check` is performed after building everything in the above
+example.
+If it returns lots of errors, you should probably stay with the original database.
+Otherwise, the error messages might help in fixing/upgrading the script.
+You are of course welcome to contribute patches. :-)
 
 ## Examples
 
diff --git a/openrussian.lua b/openrussian.lua
index a689769..9a0dff5 100755
--- a/openrussian.lua
+++ b/openrussian.lua
@@ -126,31 +126,28 @@ local function map_tbl(str)
 	return (lutf8.gsub(str, "(.)'", "\\fI%1\\fP"))
 end
 
--- FIXME: Apparently, there are entries without declension or empty declension
--- entries, e.g. kosha4ij.
--- These should be detected and the entire section should be omitted.
 local function format_declensions(...)
 	local decl = {}
 
 	for i, decl_id in ipairs{...} do
-		if type(decl_id) == "string" then
-			for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do
-				decl[case] = decl[case] or {}
-				decl[case][i] = map_tbl(decl_id)
-			end
-		else
+		if type(decl_id) == "number" then
 			local cur = assert(con:execute(string.format([[
 				SELECT nom, gen, dat, acc, inst, prep FROM declensions WHERE id = %d
 			]], decl_id)))
 			local row = assert(cur:fetch({}, "a"))
 			cur:close()
 
-			for case, val in pairs(row) do
+			for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do
 				decl[case] = decl[case] or {}
-				val = lutf8.gsub(val or '-', "[;,]%(", " (")
+				local val = lutf8.gsub(row[case] or '-', "[;,]%(", " (")
 				val = lutf8.gsub(val, "[;,]", ", ")
 				decl[case][i] = map_tbl(val)
 			end
+		else
+			for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do
+				decl[case] = decl[case] or {}
+				decl[case][i] = map_tbl(decl_id or '-')
+			end
 		end
 	end
 
@@ -178,7 +175,7 @@ function format.noun(word_id, accented)
 	out_stream:write('.SH GENDER\n')
 	if row.gender and row.gender ~= "" then
 		local genders = {m = "male", f = "female", n = "neuter"}
-		out_stream:write(genders[row.gender], ', ')
+		out_stream:write(genders[row.gender] or row.gender, ', ')
 	end
 	out_stream:write(row.animate == 1 and 'animate' or 'inanimate', '\n')
 
@@ -218,9 +215,6 @@ function format.adjective(word_id, accented)
 	-- NOTE: Seldomly (e.g. nesomnenno), there is no entry in adjectives
 	if not row then return end
 
-	--out_stream:write('.SH CATEGORY\n',
-	--                 'adjective\n')
-
 	out_stream:write('.SH DECLENSION\n',
 	                 '.TS\n',
 	                 'allbox,tab(;);\n',
@@ -228,11 +222,13 @@ function format.adjective(word_id, accented)
 	                 'LB L  L  L  L.\n',
 	                 ';Male;Neutral;Female;Plural\n')
 	format_declensions(row.decl_m_id, row.decl_n_id, row.decl_f_id, row.decl_pl_id)
-	out_stream:write('Short;',
-	                 map_tbl(row.short_m or '-'), ';',
-	                 map_tbl(row.short_n or '-'), ';',
-	                 map_tbl(row.short_f or '-'), ';',
-	                 map_tbl(row.short_pl or '-'), '\n')
+	if row.short_m or row.short_n or row.short_f or row.short_pl then
+		out_stream:write('Short;',
+		                 map_tbl(row.short_m or '-'), ';',
+		                 map_tbl(row.short_n or '-'), ';',
+		                 map_tbl(row.short_f or '-'), ';',
+		                 map_tbl(row.short_pl or '-'), '\n')
+	end
 	-- NOTE: It is unclear why the trailing .sp is necessary
 	out_stream:write('.TE\n',
 	                 '.sp\n')
@@ -251,10 +247,7 @@ end
 -- NOTE: There is no separate table for adverbs
 -- Currently, we wouldn't print more than the category, which is also in the
 -- header, so it is omitted.
-function format.adverb(word_id, accented)
-	--out_stream:write('.SH CATEGORY\n',
-	--                 'adverb\n')
-end
+function format.adverb(word_id, accented) end
 
 function format.verb(word_id, accented)
 	local cur = assert(con:execute(string.format([[
@@ -282,34 +275,49 @@ function format.verb(word_id, accented)
 		                 lutf8.gsub(row.partner, "[;,]", ", "), '\n')
 	end
 
-	-- FIXME: Conjugation sometimes empty (e.g. widat')
 	-- FIXME: Can we assume that verbs without specified aspect are always
 	-- perfective?
-	out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n',
-	                 map_accented("\\[u042F] "), map_accented(row.sg1), '.\n.br\n',
-	                 map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n.br\n',
-	                 map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "),
-	                         map_accented(row.sg3), '.\n.br\n',
-	                 map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n.br\n',
-	                 map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n.br\n',
-	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n.br\n')
-
-	out_stream:write('.SH PAST\n',
-	                 map_accented("\\[u041E]\\[u043D] "), map_accented(row.past_m), '.\n.br\n',
-	                 map_accented("\\[u041E]\\[u043D]\\[u0430]' "), map_accented(row.past_f), '.\n.br\n',
-	                 map_accented("\\[u041E]\\[u043D]\\[u043E]' "), map_accented(row.past_n), '.\n.br\n',
-	                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.past_pl), '.\n')
+	-- NOTE: Very seldomly (eg. widat'), all conjugations are missing.
+	-- Sometimes only the first person singular is missing.
+	if row.sg1 or row.sg2 or row.sg3 or row.pl1 or row.pl2 or row.pl3 then
+		out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n')
+		if row.sg1 then out_stream:write(map_accented("\\[u042F] "), map_accented(row.sg1), '.\n', '.br\n') end
+		out_stream:write(map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n', '.br\n',
+		                 map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "),
+		                         map_accented(row.sg3), '.\n', '.br\n',
+		                 map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n', '.br\n',
+		                 map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n', '.br\n',
+		                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n', '.br\n')
+	end
+
+	if row.past_m or row.past_f or row.past_n or row.past_pl then
+		out_stream:write('.SH PAST\n',
+		                 map_accented("\\[u041E]\\[u043D] "),
+		                         map_accented(lutf8.gsub(row.past_m, ",", "/")), '.\n', '.br\n',
+		                 map_accented("\\[u041E]\\[u043D]\\[u0430]' "),
+		                         map_accented(lutf8.gsub(row.past_f, ",", "/")), '.\n', '.br\n',
+		                 map_accented("\\[u041E]\\[u043D]\\[u043E]' "),
+		                         map_accented(lutf8.gsub(row.past_n, ",", "/")), '.\n', '.br\n',
+		                 map_accented("\\[u041E]\\[u043D]\\[u0438]' "),
+		                         map_accented(lutf8.gsub(row.past_pl, ",", "/")), '.\n')
+	end
 
 	-- FIXME: Is the singular/plural distinction always obvious?
-	out_stream:write('.SH IMPERATIVE\n',
-	                 map_accented(row.imperative_sg), '! / ',
-	                 map_accented(row.imperative_pl), '!\n')
+	-- FIXME: Seldom (eg. sxodit'), the plural is missing, but this may be a general
+	-- bug of the entry.
+	if row.imperative_sg or row.imperative_pl then
+		out_stream:write('.SH IMPERATIVE\n',
+		                 map_accented(row.imperative_sg), '! / ',
+		                 map_accented(row.imperative_pl), '!\n')
+	end
 end
 
-function format.other(word_id, accented)
-	--out_stream:write('.SH CATEGORY\n',
-	--                 'other\n')
-end
+-- NOTE: There is no separate table for expressions
+-- Currently, we wouldn't print more than the category, which is also in the
+-- header, so it is omitted.
+function format.expression(word_id, accented) end
+
+function format.other(word_id, accented) end
 
 local function get_translations(word_id)
 	local ret = {}
@@ -561,8 +569,10 @@ else
 
 	repeat
 		io.stdout:write("Show [1..", #unique_rows, ", press enter to cancel]? "):flush()
-		local choice = io.stdin:read():lower()
-		if choice == "" or choice == "q" then os.exit() end
+		-- If stdin is not available we always assume 1.
+		-- This can especially happen when using `make check`.
+		local choice = io.stdin:read() or "1"
+		if choice == "" or choice:lower() == "q" then os.exit() end
 		row = unique_rows[tonumber(choice)]
 	until row
 end
@@ -586,7 +596,7 @@ out_stream:write('.\\" t\n',
                  '.TH "', row.bare, '" "', word_type, '" "')
 if row.rank then
 	out_stream:write('#', row.rank, row.level and ' ('..row.level..')' or '')
-else
+elseif row.level then
 	out_stream:write(row.level)
 end
 out_stream:write('" "openrussian.lua" "openrussian.org"\n')
author	Robin Haberkorn <robin.haberkorn@googlemail.com>	2019-04-19 02:21:54 +0300
committer	Robin Haberkorn <robin.haberkorn@googlemail.com>	2019-04-19 02:21:54 +0300
commit	54a5a4267090bb80d3337f1aa7419c8ba49c30b7 (patch)
tree	0bf93b0b4225664adc421b2c8b27171f00cc3a76
parent	57e82811588e37b247ece678f503655bd06aa090 (diff)
download	openrussian-cli-54a5a4267090bb80d3337f1aa7419c8ba49c30b7.tar.gz