diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-04-19 02:21:54 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-04-19 02:21:54 +0300 |
commit | 54a5a4267090bb80d3337f1aa7419c8ba49c30b7 (patch) | |
tree | 0bf93b0b4225664adc421b2c8b27171f00cc3a76 | |
parent | 57e82811588e37b247ece678f503655bd06aa090 (diff) | |
download | openrussian-cli-54a5a4267090bb80d3337f1aa7419c8ba49c30b7.tar.gz |
make check: test page generation for all words; various fixes
* using the new `make check` target various bugs have been discovered.
All of them were related to missing database fields.
-rw-r--r-- | Makefile | 8 | ||||
-rw-r--r-- | README.md | 8 | ||||
-rwxr-xr-x | openrussian.lua | 108 |
3 files changed, 73 insertions, 51 deletions
@@ -28,6 +28,14 @@ openrussian-sqlite3.db : openrussian-sql.zip mysql2sqlite postprocess.sql unzip -p $< openrussian.sql | ./mysql2sqlite - | sqlite3 $@ sqlite3 $@ -batch <postprocess.sql +# Try to generate all possible pages +check : openrussian-sqlite3.db openrussian + sqlite3 $< 'SELECT bare FROM words WHERE LIKELY(disabled = 0)' | \ + while read -r bare; do \ + ./openrussian -V -p "$$bare" </dev/null >/dev/null || \ + echo "Error generating \"$$bare\"" >/dev/stderr; \ + done + # NOTE: Installation of the Bash completions depends on the Debain bash-completion # package being installed or something similar install : openrussian openrussian-sqlite3.db openrussian-completion.bash @@ -53,11 +53,15 @@ Building is straight forward: If you want to redownload the latest [openrussian.org](https://en.openrussian.org/) database: - make clean all + make clean all check **Warning:** While the database content might be newer, the database schema might also at any time become incompatible with the existing script. -But you are of course welcome to contribute fixes/updates. :-) +That is why a `check` is performed after building everything in the above +example. +If it returns lots of errors, you should probably stay with the original database. +Otherwise, the error messages might help in fixing/upgrading the script. +You are of course welcome to contribute patches. :-) ## Examples diff --git a/openrussian.lua b/openrussian.lua index a689769..9a0dff5 100755 --- a/openrussian.lua +++ b/openrussian.lua @@ -126,31 +126,28 @@ local function map_tbl(str) return (lutf8.gsub(str, "(.)'", "\\fI%1\\fP")) end --- FIXME: Apparently, there are entries without declension or empty declension --- entries, e.g. kosha4ij. --- These should be detected and the entire section should be omitted. local function format_declensions(...) local decl = {} for i, decl_id in ipairs{...} do - if type(decl_id) == "string" then - for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do - decl[case] = decl[case] or {} - decl[case][i] = map_tbl(decl_id) - end - else + if type(decl_id) == "number" then local cur = assert(con:execute(string.format([[ SELECT nom, gen, dat, acc, inst, prep FROM declensions WHERE id = %d ]], decl_id))) local row = assert(cur:fetch({}, "a")) cur:close() - for case, val in pairs(row) do + for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do decl[case] = decl[case] or {} - val = lutf8.gsub(val or '-', "[;,]%(", " (") + local val = lutf8.gsub(row[case] or '-', "[;,]%(", " (") val = lutf8.gsub(val, "[;,]", ", ") decl[case][i] = map_tbl(val) end + else + for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do + decl[case] = decl[case] or {} + decl[case][i] = map_tbl(decl_id or '-') + end end end @@ -178,7 +175,7 @@ function format.noun(word_id, accented) out_stream:write('.SH GENDER\n') if row.gender and row.gender ~= "" then local genders = {m = "male", f = "female", n = "neuter"} - out_stream:write(genders[row.gender], ', ') + out_stream:write(genders[row.gender] or row.gender, ', ') end out_stream:write(row.animate == 1 and 'animate' or 'inanimate', '\n') @@ -218,9 +215,6 @@ function format.adjective(word_id, accented) -- NOTE: Seldomly (e.g. nesomnenno), there is no entry in adjectives if not row then return end - --out_stream:write('.SH CATEGORY\n', - -- 'adjective\n') - out_stream:write('.SH DECLENSION\n', '.TS\n', 'allbox,tab(;);\n', @@ -228,11 +222,13 @@ function format.adjective(word_id, accented) 'LB L L L L.\n', ';Male;Neutral;Female;Plural\n') format_declensions(row.decl_m_id, row.decl_n_id, row.decl_f_id, row.decl_pl_id) - out_stream:write('Short;', - map_tbl(row.short_m or '-'), ';', - map_tbl(row.short_n or '-'), ';', - map_tbl(row.short_f or '-'), ';', - map_tbl(row.short_pl or '-'), '\n') + if row.short_m or row.short_n or row.short_f or row.short_pl then + out_stream:write('Short;', + map_tbl(row.short_m or '-'), ';', + map_tbl(row.short_n or '-'), ';', + map_tbl(row.short_f or '-'), ';', + map_tbl(row.short_pl or '-'), '\n') + end -- NOTE: It is unclear why the trailing .sp is necessary out_stream:write('.TE\n', '.sp\n') @@ -251,10 +247,7 @@ end -- NOTE: There is no separate table for adverbs -- Currently, we wouldn't print more than the category, which is also in the -- header, so it is omitted. -function format.adverb(word_id, accented) - --out_stream:write('.SH CATEGORY\n', - -- 'adverb\n') -end +function format.adverb(word_id, accented) end function format.verb(word_id, accented) local cur = assert(con:execute(string.format([[ @@ -282,34 +275,49 @@ function format.verb(word_id, accented) lutf8.gsub(row.partner, "[;,]", ", "), '\n') end - -- FIXME: Conjugation sometimes empty (e.g. widat') -- FIXME: Can we assume that verbs without specified aspect are always -- perfective? - out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n', - map_accented("\\[u042F] "), map_accented(row.sg1), '.\n.br\n', - map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n.br\n', - map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "), - map_accented(row.sg3), '.\n.br\n', - map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n.br\n', - map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n.br\n', - map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n.br\n') - - out_stream:write('.SH PAST\n', - map_accented("\\[u041E]\\[u043D] "), map_accented(row.past_m), '.\n.br\n', - map_accented("\\[u041E]\\[u043D]\\[u0430]' "), map_accented(row.past_f), '.\n.br\n', - map_accented("\\[u041E]\\[u043D]\\[u043E]' "), map_accented(row.past_n), '.\n.br\n', - map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.past_pl), '.\n') + -- NOTE: Very seldomly (eg. widat'), all conjugations are missing. + -- Sometimes only the first person singular is missing. + if row.sg1 or row.sg2 or row.sg3 or row.pl1 or row.pl2 or row.pl3 then + out_stream:write('.SH ', row.aspect == "imperfective" and 'PRESENT\n' or 'FUTURE\n') + if row.sg1 then out_stream:write(map_accented("\\[u042F] "), map_accented(row.sg1), '.\n', '.br\n') end + out_stream:write(map_accented("\\[u0422]\\[u044B] "), map_accented(row.sg2), '.\n', '.br\n', + map_accented("\\[u041E]\\[u043D]/\\[u041E]\\[u043D]\\[u0430]'/\\[u041E]\\[u043D]\\[u043E]' "), + map_accented(row.sg3), '.\n', '.br\n', + map_accented("\\[u041C]\\[u044B] "), map_accented(row.pl1), '.\n', '.br\n', + map_accented("\\[u0412]\\[u044B] "), map_accented(row.pl2), '.\n', '.br\n', + map_accented("\\[u041E]\\[u043D]\\[u0438]' "), map_accented(row.pl3), '.\n', '.br\n') + end + + if row.past_m or row.past_f or row.past_n or row.past_pl then + out_stream:write('.SH PAST\n', + map_accented("\\[u041E]\\[u043D] "), + map_accented(lutf8.gsub(row.past_m, ",", "/")), '.\n', '.br\n', + map_accented("\\[u041E]\\[u043D]\\[u0430]' "), + map_accented(lutf8.gsub(row.past_f, ",", "/")), '.\n', '.br\n', + map_accented("\\[u041E]\\[u043D]\\[u043E]' "), + map_accented(lutf8.gsub(row.past_n, ",", "/")), '.\n', '.br\n', + map_accented("\\[u041E]\\[u043D]\\[u0438]' "), + map_accented(lutf8.gsub(row.past_pl, ",", "/")), '.\n') + end -- FIXME: Is the singular/plural distinction always obvious? - out_stream:write('.SH IMPERATIVE\n', - map_accented(row.imperative_sg), '! / ', - map_accented(row.imperative_pl), '!\n') + -- FIXME: Seldom (eg. sxodit'), the plural is missing, but this may be a general + -- bug of the entry. + if row.imperative_sg or row.imperative_pl then + out_stream:write('.SH IMPERATIVE\n', + map_accented(row.imperative_sg), '! / ', + map_accented(row.imperative_pl), '!\n') + end end -function format.other(word_id, accented) - --out_stream:write('.SH CATEGORY\n', - -- 'other\n') -end +-- NOTE: There is no separate table for expressions +-- Currently, we wouldn't print more than the category, which is also in the +-- header, so it is omitted. +function format.expression(word_id, accented) end + +function format.other(word_id, accented) end local function get_translations(word_id) local ret = {} @@ -561,8 +569,10 @@ else repeat io.stdout:write("Show [1..", #unique_rows, ", press enter to cancel]? "):flush() - local choice = io.stdin:read():lower() - if choice == "" or choice == "q" then os.exit() end + -- If stdin is not available we always assume 1. + -- This can especially happen when using `make check`. + local choice = io.stdin:read() or "1" + if choice == "" or choice:lower() == "q" then os.exit() end row = unique_rows[tonumber(choice)] until row end @@ -586,7 +596,7 @@ out_stream:write('.\\" t\n', '.TH "', row.bare, '" "', word_type, '" "') if row.rank then out_stream:write('#', row.rank, row.level and ' ('..row.level..')' or '') -else +elseif row.level then out_stream:write(row.level) end out_stream:write('" "openrussian.lua" "openrussian.org"\n') |