aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2019-05-17 15:44:37 +0300
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2019-05-17 15:44:37 +0300
commitfa4d21193f17938b08ed23b0f6ed2805d4c46a7f (patch)
tree879512d6fad1a1c9650f403ba0e08d91db217cfe
parent8464dc3053056a1e0a8f386fb57783ed715a0249 (diff)
downloadopenrussian-cli-fa4d21193f17938b08ed23b0f6ed2805d4c46a7f.tar.gz
allow multiple translation languages to be specified via -L
* useful for maximizing the information on each page if you speak both English and German
-rw-r--r--README.md20
-rw-r--r--openrussian.119
-rwxr-xr-xopenrussian.lua123
3 files changed, 97 insertions, 65 deletions
diff --git a/README.md b/README.md
index ef506df..f8de0ff 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# An Offline Console Russian Dictionary (based on openrussian.org)
This is an offline console and terminal-friendly Russian dictionary,
-based on the database of [https://en.openrussian.org/](openrussian.org).
+based on the database of [openrussian.org](https://en.openrussian.org/).
* Works offline (without internet access) and is very fast.
This really pays off if you browse Russian words a lot.
@@ -24,11 +24,15 @@ based on the database of [https://en.openrussian.org/](openrussian.org).
Possible future features:
-* Bilingual modes (German and English translations on the same generated page)
- to increase the amount of information if you happen to speak both of these languages.
+* Limit the number of results (by default to 1000) - the sheer number of results
+ can slow down auto-completions.
+* Not all terminals can display the accent correctly (linux console), so we should have
+ a fallback.
+ Ideally this can be detected, or we simply whitelist terminal emulators via $TERM.
* Better internationalization when generating German pages (`-Lde`).
-* Lookups via popular ASCII-cyrillic transliterations - would be useful without
- a Russian/cyrillic keyboard layout.
+* Lookups via popular
+ [ASCII-cyrillic transliterations](https://en.wikipedia.org/wiki/Informal_romanizations_of_Cyrillic) -
+ would be useful without a Russian/cyrillic keyboard layout.
* Be tolerant to typing mistakes.
* Accented characters are still broken in nroff tables
(see https://lists.gnu.org/archive/html/groff/2018-08/msg00000.html).
@@ -44,7 +48,7 @@ Run-time dependencies:
sudo apt-get install lua5.2 lua-sql-sqlite3 man-db bash-completion
Furthermore, you will need the [luautf8 library](https://github.com/starwing/luautf8).
-Using luarocks, it may be installed as follows:
+Using [LuaRocks](https://luarocks.org/), it may be installed as follows:
sudo luarocks-5.2 install luautf8
@@ -76,6 +80,10 @@ Display the German translation:
openrussian -Lde саморазрушение
+Display both German and English translations, giving precedence to German:
+
+ openrussian -Lde -Len саморазрушение
+
If you are unsure which consonants appear in this word:
openrussian самора[сз]ру[шщ]ение
diff --git a/openrussian.1 b/openrussian.1
index 84c4970..5c4bdc4 100644
--- a/openrussian.1
+++ b/openrussian.1
@@ -38,9 +38,9 @@ nominative male adjectives);
word inflections (conjugations, imperatives, declinations),
unless the \fB\-V\fP (verbatim) option is specified;
.IP \(bu
-translations according to the translation language.
-The translation language (currently \fIen\fP or \fIde\fP)
-is guessed from the system
+translations according to the translation languages.
+The translation languages (currently \fIen\fP, \fIde\fP or a combination thereof)
+are guessed from the system
.BR locale (5),
but can be overwritten using the \fB\-L\fP option.
.RE
@@ -62,10 +62,14 @@ This may be useful for storing them, generating PDF files, etc.
.SH OPTIONS
.
.IP "\fB-L\fR \fIlang\fR"
-Overwrite the translation language.
-Either \fIen\fP or \fIde\fP may be specified.
+Overwrite the translation languages.
+Either \fIen\fP (English) or \fIde\fP (German) may be specified.
The space after \fB-L\fP is optional,
so you may also for instance write \(lq-Lde\(rq.
+Multiple translations can be displayed on the same page by specifying
+\fB-L\fP multiple times.
+The order \fB-L\fP flags also matters and determines the order of
+translated material on the result page.
.IP "\fB-V\fR"
Turn on verbatim matching, ie. turn off any kind of inflection matching
and case folding.
@@ -92,6 +96,11 @@ Display the German translation:
openrussian -Lde саморазрушение
.EE
.TP
+Display both German and English translations, giving precedence to German:
+.EX
+openrussian -Lde -Len саморазрушение
+.EE
+.TP
If you are unsure which consonants appear in this word:
.EX
openrussian самора[сз]ру[шщ]ение
diff --git a/openrussian.lua b/openrussian.lua
index b03ce0b..62215e4 100755
--- a/openrussian.lua
+++ b/openrussian.lua
@@ -5,13 +5,13 @@ local lutf8 = require "lua-utf8"
local ACCENT = lutf8.char(0x0301) -- Accent combining character
-local lang = os.setlocale(nil, "ctype"):match("^([^_]+)")
+local langs = {}
local search_words = {}
local function usage(stream)
stream:write("Usage: ", arg[0], " [-L<lang>] [-V] [-p] <pattern...>\n",
- "\t-L<lang> Set language to <lang> (currently en or de, guessed from locale)\n",
+ "\t-L<lang> Overwrite translation language (currently en or de)\n",
"\t-V Verbatim matching (no case folding and inflections)\n",
"\t-p Print Troff code to stdout\n")
end
@@ -22,12 +22,12 @@ for i = 1, #arg do
if opt:sub(1, 1) == "L" then
if #opt > 1 then
- lang = opt:sub(2)
+ table.insert(langs, opt:sub(2))
elseif i == #arg then
usage(io.stderr)
os.exit(false)
else
- lang = arg[i+1]
+ table.insert(langs, arg[i+1])
i = i + 1
end
elseif opt == "V" then
@@ -59,10 +59,12 @@ end
local search_word = table.concat(search_words, " ")..
(auto_complete and "*" or "")
+if #langs == 0 then langs = {os.setlocale(nil, "ctype"):match("^([^_]+)")} end
+
-- FIXME: Currently only English and German are actually
-- contained in the database, but this might change.
-- Perhaps query the availability dynamically.
-if lang ~= "en" and lang ~= "de" then lang = "en" end
+if langs[1] ~= "en" and langs[1] ~= "de" then langs = {"en"} end
local function dirname(path)
return path:match("^(.*)/.+$") or "."
@@ -322,22 +324,22 @@ function format.other(word_id, accented) end
local function get_translations(word_id)
local ret = {}
- -- FIXME: Fetch other translations if primary
- -- language is not available
- local cur = assert(con:execute(string.format([[
- SELECT tl FROM translations
- WHERE word_id = %d AND lang = '%s'
- ]], word_id, con:escape(lang))))
- local row = cur:fetch({}, "a")
- while row do
- -- NOTE: One entry might contain many comma-separated
- -- translations
- for word in lutf8.gmatch(row.tl..", ", "(.-), ") do
- table.insert(ret, word)
+ for _, lang in ipairs(langs) do
+ local cur = assert(con:execute(string.format([[
+ SELECT tl FROM translations
+ WHERE word_id = %d AND lang = '%s'
+ ]], word_id, con:escape(lang))))
+ local row = cur:fetch({}, "a")
+ while row do
+ -- NOTE: One entry might contain many comma-separated
+ -- translations
+ for word in lutf8.gmatch(row.tl..", ", "(.-), ") do
+ table.insert(ret, word)
+ end
+ row = cur:fetch({}, "a")
end
- row = cur:fetch({}, "a")
+ cur:close()
end
- cur:close()
return ret
end
@@ -494,27 +496,29 @@ end
-- two queries are significantly faster - probably because of having to perform less
-- string concatenations.
if #rows == 0 then
- -- NOTE: The translation entry frequently contains a comma-separated
- -- list of translations
- --
- -- FIXME: Case folding only works for ASCII, which should be sufficient for
- -- German/English text (almost)...
- -- FIXME: The string concatenation is a real slow-down and the GLOB cannot
- -- be optimized.
- -- Perhaps the translations should be in their own (new) indexed table.
- cur = assert(con:execute(string.format([[
- SELECT %s(", "||tl||", ") AS completions, words.*
- FROM words JOIN translations ON words.id = word_id
- WHERE LIKELY(disabled = 0) AND lang = '%s' AND completions GLOB %s('*, %s, *')
- ORDER BY rank
- ]], verbatim and "" or "LOWER", con:escape(lang), verbatim and "" or "LOWER", con:escape(search_word))))
-
- repeat
- row = cur:fetch({}, "a")
- table.insert(rows, row)
- until not row
-
- cur:close()
+ for _, lang in ipairs(langs) do
+ -- NOTE: The translation entry frequently contains a comma-separated
+ -- list of translations
+ --
+ -- FIXME: Case folding only works for ASCII, which should be sufficient for
+ -- German/English text (almost)...
+ -- FIXME: The string concatenation is a real slow-down and the GLOB cannot
+ -- be optimized.
+ -- Perhaps the translations should be in their own (new) indexed table.
+ cur = assert(con:execute(string.format([[
+ SELECT %s(", "||tl||", ") AS completions, words.*
+ FROM words JOIN translations ON words.id = word_id
+ WHERE LIKELY(disabled = 0) AND lang = '%s' AND completions GLOB %s('*, %s, *')
+ ORDER BY rank
+ ]], verbatim and "" or "LOWER", con:escape(lang), verbatim and "" or "LOWER", con:escape(search_word))))
+
+ repeat
+ row = cur:fetch({}, "a")
+ table.insert(rows, row)
+ until not row
+
+ cur:close()
+ end
end
if auto_complete then
@@ -583,8 +587,12 @@ local word_id = row.id
local word_accented = row.accented or row.bare
local word_derived_from = row.derived_from_word_id
local word_audio = row.audio
-local word_usage = row["usage_"..lang]
local word_type = row.type or "other"
+local word_usages = {}
+
+for _, lang in ipairs(langs) do
+ table.insert(word_usages, row["usage_"..lang])
+end
-- Open stream only now, after no more messages have to be written to
-- stdout/stderr.
@@ -622,9 +630,9 @@ format[word_type](word_id, word_accented)
--
-- Generic sections
--
-if word_usage then
+if #word_usages > 0 then
out_stream:write('.SH USAGE\n',
- word_usage, '\n')
+ table.concat(word_usages, ', '), '\n')
end
-- FIXME: Perhaps this should rather be part of the SEE ALSO section
@@ -643,16 +651,25 @@ end
--
-- NOTE: There can be many examples, so print them late.
--
-cur = assert(con:execute(string.format([[
- SELECT ru, start, length, tl
- FROM sentences_words JOIN sentences ON sentence_id = sentences.id
- WHERE word_id = %d AND lang = '%s'
-]], word_id, con:escape(lang))))
-row = cur:fetch({}, "a")
-if row then
- out_stream:write('.SH EXAMPLES\n')
+rows = {}
+for _, lang in ipairs(langs) do
+ cur = assert(con:execute(string.format([[
+ SELECT ru, start, length, tl
+ FROM sentences_words JOIN sentences ON sentence_id = sentences.id
+ WHERE word_id = %d AND lang = '%s'
+ ]], word_id, con:escape(lang))))
repeat
+ row = cur:fetch({}, "a")
+ table.insert(rows, row)
+ until not row
+
+ cur:close()
+end
+if #rows > 0 then
+ out_stream:write('.SH EXAMPLES\n')
+
+ for _, row in ipairs(rows) do
-- FIXME: The accent is not always available in the default
-- italic font when formatting for PDF.
local ru_hl = lutf8.sub(row.ru, 1, row.start)..'\\fI'..
@@ -662,10 +679,8 @@ if row then
out_stream:write('.TP\n',
map_accented(ru_hl), '\n',
row.tl, '\n')
- row = cur:fetch({}, "a")
- until not row
+ end
end
-cur:close()
-- Audio recordings might be useful occasionally, but this is an offline/terminal
-- application, so it makes sense to print them last (like URLs in manpages).