diff options
author | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-04-19 04:42:19 +0300 |
---|---|---|
committer | Robin Haberkorn <robin.haberkorn@googlemail.com> | 2019-04-19 04:42:19 +0300 |
commit | 96b3ecb3baa0acced9816f5749d14b05e18a3b09 (patch) | |
tree | 950460e5d3568b148b990999666c6e248088688c | |
parent | 54a5a4267090bb80d3337f1aa7419c8ba49c30b7 (diff) | |
download | openrussian-cli-96b3ecb3baa0acced9816f5749d14b05e18a3b09.tar.gz |
improved normalization of lists
* single VARCHAR cells will sometimes contain (semi)colon separated lists - sometimes
with and sometimes without additional whitechar.
They are now always normalized to ", ".
-rwxr-xr-x | openrussian.lua | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/openrussian.lua b/openrussian.lua index 9a0dff5..476d99d 100755 --- a/openrussian.lua +++ b/openrussian.lua @@ -139,8 +139,8 @@ local function format_declensions(...) for _, case in ipairs{"nom", "gen", "dat", "acc", "inst", "prep"} do decl[case] = decl[case] or {} - local val = lutf8.gsub(row[case] or '-', "[;,]%(", " (") - val = lutf8.gsub(val, "[;,]", ", ") + local val = lutf8.gsub(row[case] or '-', "[;,] *%(", " (") + val = lutf8.gsub(val, "[;,] *", ", ") decl[case][i] = map_tbl(val) end else @@ -235,12 +235,12 @@ function format.adjective(word_id, accented) if row.comparative and row.comparative ~= "" then out_stream:write('.SH COMPARATIVE\n', - map_accented(lutf8.gsub(row.comparative, "[;,]", ", ")), '\n') + map_accented(lutf8.gsub(row.comparative, "[;,] *", ", ")), '\n') end if row.superlative and row.superlative ~= "" then out_stream:write('.SH SUPERLATIVE\n', - map_accented(lutf8.gsub(row.superlative, "[;,]", ", ")), '\n') + map_accented(lutf8.gsub(row.superlative, "[;,] *", ", ")), '\n') end end @@ -272,7 +272,7 @@ function format.verb(word_id, accented) -- but since the DB lists the partner as a string instead of -- word_id, finding the right entry could be unreliable out_stream:write('.SH PARTNER\n', - lutf8.gsub(row.partner, "[;,]", ", "), '\n') + lutf8.gsub(row.partner, "[;,] *", ", "), '\n') end -- FIXME: Can we assume that verbs without specified aspect are always |