aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2019-04-19 02:21:54 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2019-04-19 02:21:54 +0300
commit54a5a4267090bb80d3337f1aa7419c8ba49c30b7 (patch)
tree0bf93b0b4225664adc421b2c8b27171f00cc3a76
parent57e82811588e37b247ece678f503655bd06aa090 (diff)
downloadopenrussian-cli-54a5a4267090bb80d3337f1aa7419c8ba49c30b7.tar.gz
make check: test page generation for all words; various fixes
* using the new `make check` target various bugs have been discovered. All of them were related to missing database fields.
-rw-r--r--Makefile8
-rw-r--r--README.md8
-rwxr-xr-xopenrussian.lua108
3 files changed, 73 insertions, 51 deletions
diff --git a/Makefile b/Makefile
index a6b777f..55f7bf2 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,14 @@ openrussian-sqlite3.db : openrussian-sql.zip mysql2sqlite postprocess.sql
unzip -p $< openrussian.sql | ./mysql2sqlite - | sqlite3 $@
sqlite3 $@ -batch <postprocess.sql
+# Try to generate all possible pages
+check : openrussian-sqlite3.db openrussian
+ sqlite3 $< 'SELECT bare FROM words WHERE LIKELY(disabled = 0)' | \
+ while read -r bare; do \
+ ./openrussian -V -p "$$bare" </dev/null >/dev/null || \
+ echo "Error generating \"$$bare\"" >/dev/stderr; \
+ done
+
# NOTE: Installation of the Bash completions depends on the Debain bash-completion
# package being installed or something similar
install : openrussian openrussian-sqlite3.db openrussian-completion.bash
diff --git a/README.md b/README.md
index f7670db..91249b7 100644
--- a/README.md
+++ b/README.md
@@ -53,11 +53,15 @@ Building is straight forward:
If you want to redownload the latest [openrussian.org](https://en.openrussian.org/)
database:
- make clean all
+ make clean all check
**Warning:** While the database content might be newer, the database schema
might also at any time become incompatible with the existing script.
-But you are of course welcome to contribute fixes/updates. :-)
+That is why a `check` is performed after building everything in the above
+example.
+If it returns lots of errors, you should probably stay with the original database.
+Otherwise, the error messages might help in fixing/upgrading the script.
+You are of course welcome to contribute patches. :-)
## Examples
diff --git a/openrussian.lua b/openrussian.lua
index a689769..9a0dff5 100755
--- a/openrussian.lua
+++ b/openrussian.lua
@@ -126,31 +126,28 @@ local function map_tbl(str)
return (lutf8.gsub(str, "(.)'", "\\fI%1\\fP"))
end
--- FIXME: Apparently, there are entries without declension or empty declension
--- entries, e.g. kosha4ij.
--- These should be detected and the entire section should be omitted.
local function format_declensions(...)
local decl = {}
for i, decl_id in ipairs{...} do
- if type(decl_id) == "string" then
- for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do
- decl[case] = decl[case] or {}
- decl[case][i] = map_tbl(decl_id)
- end
- else
+ if type(decl_id) == "number" then
local cur = assert(con:execute(string.format([[
SELECT nom, gen, dat, acc, inst, prep FROM declensions WHERE id = %d
]], decl_id)))
local row = assert(cur:fetch({}, "a"))
cur:close()
- for case, val in pairs(row) do
+ for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do
decl[case] = decl[case] or {}
- val = lutf8.gsub(val or '-', "[;,]%(", " (")
+ local val = lutf8.gsub(row[case] or '-', "[;,]%(", " (")
val = lutf8.gsub(val, "[;,]", ", ")
decl[case][i] = map_tbl(val)
end
+ else
+ for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do
+ decl[case] = decl[case] or {}
+ decl[case][i] = map_tbl(decl_id or '-')
+ end
end
end
@@ -178,7 +175,7 @@ function format.noun(word_id, accented)
out_stream:write('.SH GENDER\n')
if row.gender and row.gender ~= "" then
local genders = {m = "male", f = "female", n = "neuter"}
- out_stream:write(genders[row.gender], ', ')
+ out_stream:write(genders[row.gender] or row.gender, ', ')
end
out_stream:write(row.animate == 1 and 'animate' or 'inanimate', '\n')
@@ -218,9 +215,6 @@ function format.adjective(word_id, accented)
-- NOTE: Seldomly (e.g. nesomnenno), there is no entry in adjectives
if not row then return end
- --out_stream:write('.SH CATEGORY\n',
- -- 'adjective\n')
-
out_stream:write('.SH DECLENSION\n',
'.TS\n',
'allbox,tab(;);\n',
@@ -228,11 +222,13 @@ function format.adjective(word_id, accented)
'LB L L L L.\n',
';Male;Neutral;Female;Plural\n')
format_declensions(row.decl_m_id, row.decl_n_id, row.decl_f_id, row.decl_pl_id)
- out_stream:write('Short;',
- map_tbl(row.short_m or '-'), ';',
- map_tbl(row.short_n or '-'), ';',
- map_tbl(row.short_f or '-'), ';',
- map_tbl(row.short_pl or '-'), '\n')
+ if row.short_m or row.short_n or row.short_f or row.short_pl then
+ out_stream:write('Short;',
+ map_tbl(row.short_m or '-'), ';',
+ map_tbl(row.short_n or '-'), ';',
+ map_tbl(row.short_f or '-'), ';',
+ map_tbl(row.short_pl or '-'), '\n')
+ end
-- NOTE: It is unclear why the trailing .sp is necessary
out_stream:write('.TE\n',
'.sp\n')
@@ -251,10 +247,7 @@ end
-- NOTE: There is no separate table for adverbs
-- Currently, we wouldn't print more than the category, which is also in the
-- header, so it is omitted.
-function format.adverb(word_id, accented)
- --out_stream:write('.SH CATEGORY\n',
- -- 'adverb\n')
-end
+function format.adverb(word_id, accented) end
function format.verb(word_id, accented)
local cur = assert(con:execute(string.format([[
@@ -282,34 +275,49 @@ function format.verb(word_id, accented)
lutf8.gsub(row.partner, "[;,]", ", "), '\n')
end
- -- FIXME: Conjugation sometimes empty (e.g. widat')
-- FIXME: Can we assume that verbs without specified aspect are always
-- perfective?
- out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n',
- map_accented("\\[u042F] "), map_accented(row.sg1), '.\n.br\n',
- map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n.br\n',
- map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "),
- map_accented(row.sg3), '.\n.br\n',
- map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n.br\n',
- map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n.br\n',
- map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n.br\n')
-
- out_stream:write('.SH PAST\n',
- map_accented("\\[u041E]\\[u043D] "), map_accented(row.past_m), '.\n.br\n',
- map_accented("\\[u041E]\\[u043D]\\[u0430]' "), map_accented(row.past_f), '.\n.br\n',
- map_accented("\\[u041E]\\[u043D]\\[u043E]' "), map_accented(row.past_n), '.\n.br\n',
- map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.past_pl), '.\n')
+ -- NOTE: Very seldomly (eg. widat'), all conjugations are missing.
+ -- Sometimes only the first person singular is missing.
+ if row.sg1 or row.sg2 or row.sg3 or row.pl1 or row.pl2 or row.pl3 then
+ out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n')
+ if row.sg1 then out_stream:write(map_accented("\\[u042F] "), map_accented(row.sg1), '.\n', '.br\n') end
+ out_stream:write(map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n', '.br\n',
+ map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "),
+ map_accented(row.sg3), '.\n', '.br\n',
+ map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n', '.br\n',
+ map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n', '.br\n',
+ map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n', '.br\n')
+ end
+
+ if row.past_m or row.past_f or row.past_n or row.past_pl then
+ out_stream:write('.SH PAST\n',
+ map_accented("\\[u041E]\\[u043D] "),
+ map_accented(lutf8.gsub(row.past_m, ",", "/")), '.\n', '.br\n',
+ map_accented("\\[u041E]\\[u043D]\\[u0430]' "),
+ map_accented(lutf8.gsub(row.past_f, ",", "/")), '.\n', '.br\n',
+ map_accented("\\[u041E]\\[u043D]\\[u043E]' "),
+ map_accented(lutf8.gsub(row.past_n, ",", "/")), '.\n', '.br\n',
+ map_accented("\\[u041E]\\[u043D]\\[u0438]' "),
+ map_accented(lutf8.gsub(row.past_pl, ",", "/")), '.\n')
+ end
-- FIXME: Is the singular/plural distinction always obvious?
- out_stream:write('.SH IMPERATIVE\n',
- map_accented(row.imperative_sg), '! / ',
- map_accented(row.imperative_pl), '!\n')
+ -- FIXME: Seldom (eg. sxodit'), the plural is missing, but this may be a general
+ -- bug of the entry.
+ if row.imperative_sg or row.imperative_pl then
+ out_stream:write('.SH IMPERATIVE\n',
+ map_accented(row.imperative_sg), '! / ',
+ map_accented(row.imperative_pl), '!\n')
+ end
end
-function format.other(word_id, accented)
- --out_stream:write('.SH CATEGORY\n',
- -- 'other\n')
-end
+-- NOTE: There is no separate table for expressions
+-- Currently, we wouldn't print more than the category, which is also in the
+-- header, so it is omitted.
+function format.expression(word_id, accented) end
+
+function format.other(word_id, accented) end
local function get_translations(word_id)
local ret = {}
@@ -561,8 +569,10 @@ else
repeat
io.stdout:write("Show [1..", #unique_rows, ", press enter to cancel]? "):flush()
- local choice = io.stdin:read():lower()
- if choice == "" or choice == "q" then os.exit() end
+ -- If stdin is not available we always assume 1.
+ -- This can especially happen when using `make check`.
+ local choice = io.stdin:read() or "1"
+ if choice == "" or choice:lower() == "q" then os.exit() end
row = unique_rows[tonumber(choice)]
until row
end
@@ -586,7 +596,7 @@ out_stream:write('.\\" t\n',
'.TH "', row.bare, '" "', word_type, '" "')
if row.rank then
out_stream:write('#', row.rank, row.level and ' ('..row.level..')' or '')
-else
+elseif row.level then
out_stream:write(row.level)
end
out_stream:write('" "openrussian.lua" "openrussian.org"\n')