summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-24 00:31:40 +0100
committerPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-24 00:31:40 +0100
commit07ded7bc27e19ad200c5e14fd632a7ae8a36ac82 (patch)
treeb2160f6c58e5a151f248c007f51f68980eb68ca2
parented5c1a6441bc9498a65fec5b48c0cf110ec796ba (diff)
downloadtransliterator-07ded7bc27e19ad200c5e14fd632a7ae8a36ac82.tar.gz
moved lua stuff from main file to separate file
-rw-r--r--tex/context/third/transliterator/t-transliterator.mkiv272
-rw-r--r--tex/context/third/transliterator/transliterator.lua220
2 files changed, 227 insertions, 265 deletions
diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv
index b4a3c86..25c4231 100644
--- a/tex/context/third/transliterator/t-transliterator.mkiv
+++ b/tex/context/third/transliterator/t-transliterator.mkiv
@@ -1,6 +1,6 @@
%D \module
%D [ file=t-transliterator,
-%D version=2010.03.10,
+%D version=2010-12-23 22:10:09+0100,
%D title=\CONTEXT\ User Module,
%D subtitle=The Transliterator,
%D author=Philipp Gesang,
@@ -16,6 +16,9 @@
\startmodule[transliterator]
+%\ctxlua{environment.loadluafile ("transliterator")}
+\ctxlua{dofile ("transliterator.lua")}
+
\unprotect
%D Use the Transliterator by adding \type{\usemodule[transliterator]} somewhere
%D before \type{\starttext}. Adjust the Transliterator through the
@@ -45,260 +48,6 @@
%D made up my mind concerning Greek transliteration, any suggestions are
%D welcome.
-%D For clarity's sake we'll stuff everything Lua into one table.
-\startluacode
-translit = translit or {}
-translit.debug_count = 0
-\stopluacode
-
-%D We want to keep track of all the tables we'll create so we put them into
-%D a separate dictionary accompanied by a description string.
-\startluacode
-translit.tables = translit.tables or {}
-\stopluacode
-
-
-%D Next we define respective lists of vowels and consonants as used in the
-%D russian alphabet. They are needed later when substitution tables for some
-%D idiosyncratic transcriptions are generated.
-\startluacode
--- If you haven't heard of cyrillic scripts until now you might want to read
--- at least the first 15 pages of
--- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf
--- before you continue reading this file.
-
-translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я",
- "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"}
-translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н",
- "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ",
- "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н",
- "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"}
-\stopluacode
-
-%D Substitution tables are the very heart of the Transliterator. Due to the
-%D nature of languages and scripts exhaustive substitution is the simplest
-%D method for transliterations and transcriptions unless they are one-to-one
-%D mappings like those defined in ISO~9.
-%D
-%D To achieve better reusability we split the tables into segments, the most
-%D obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase
-%D and uppercase characters. Another set is constituted by e.~g. the
-%D \type{ru_old*} tables that allow adding transcription of historical
-%D characters if needed; by the way those are included in the default
-%D transliteration mode \type{ru_old}.
-
-%D Tables have been migrated into separate lua files.
-%D See {\tt
-%D trans_tables_glag.lua
-%D trans_tables_gr.lua
-%D trans_tables_iso9.lua
-%D trans_tables_scntfc.lua
-%D and
-%D trans_tables_trsc.lua.}
-
-%D Metatables allow for easy concatenation.
-\startluacode
-
-do
- -- This returns the Union of both key sets for the “+” operator.
- -- The values of the first table will be updated (read: overridden) by those given in the second.
- local Dict_add = {
- __add = function (dict_a, dict_b)
- assert (type(dict_a) == "table" and type(dict_b) == "table")
- local dict_result = setmetatable({}, Dict_add)
-
- for key, val in pairs(dict_a) do
- dict_result[key] = val
- end
-
- for key, val in pairs(dict_b) do
- dict_result[key] = val
- end
- return dict_result
- end
- }
-
- function translit.make_add_dict(dict)
- return setmetatable(dict, Dict_add)
- end
-end
-
-\stopluacode
-
-%D The function \type{translit.subst(s, t)} is used to replace any key of
-%D \type{t} that occurs in \type{s} with the according value of \type{t}.
-
-\startluacode
-
-function translit.subst (text, tab)
- for symbol, replacement in pairs(tab) do
- -- using ordinary gsub as suggested by Taco
- text = string.gsub(text, symbol, replacement)
- end
- return text
-end
-
-\stopluacode
-%D \type{translit.add_table(t, ...)} is used to build the final substitution tables
-%D from those we defined earlier; any keys in the previous table \type{t} are
-%D overwritten if they exist in the added tables \type{ta}, too.
-\startluacode
-
-
---function translit.add_table (...)
- --print ("---------------HERE---------------")
- --print (type(arg) .. ", Länge: " .. #arg)
- --local t = translit.make_add_dict{}
- --local cnt = 0
- --for _, tab in pairs(arg) do
- --cnt=cnt+1
- --print(cnt .. " - " .. type(tab))
- --t = t + tab
- --end
- --print(cnt)
- --for i,j in pairs(t) do print (i,j) end
- --return t
---end
-
-\stopluacode
-%D We might want to have all the table data nicely formatted by \CONTEXT\
-%D itself, here's how we'll do it. \type{translit.show_tab(t)} handles a
-%D single table \type{t}, builds a Natural TABLE out of its content and
-%D hands it down to the machine for typesetting. For debugging purposes it
-%D does not only print the replacement pairs but shows their code points as
-%D well.
-\startluacode
-
-function translit.show_tab (tab)
- -- Output a transliteration table, nicely formatted with natural tables.
- local cnt = 0
- context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"})
- context.setupTABLE({"c"}, {"each"}, {frame="off"})
- context.setupTABLE({"r"}, {"each"}, {frame="off"})
- context.setupTABLE({"c"}, {"first"}, {style="italic"})
- context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"})
- context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"})
- context.bTABLE({split="yes", option="stretch"})
- context.bTABLEhead()
- context.bTR()
- context.bTH() context("number") context.eTH()
- context.bTH() context("letters") context.eTH()
- context.bTH() context("n") context.eTH()
- context.bTH() context("replacement") context.eTH()
- context.bTH() context("n") context.eTH()
- context.bTH() context("bytes") context.eTH()
- context.bTH() context("repl. bytes") context.eTH()
- context.eTR()
- context.eTABLEhead()
- context.bTABLEbody()
- for key, val in pairs(tab) do
- local strempty = function (s)
- -- Some characters might not be replaced but removed, others might be
- -- multi-char sequences.
- if #s == 0 then return "nil"
- else
- local i = 0
- local r = ""
- -- The following loop could be replaced by checking the string length with utf.len(s) …
- repeat
- i = i + 1
- if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " " end
- until (false)
- return r
- end
- end
- cnt = cnt + 1
- context.bTR()
- context.bTC() context(cnt) context.eTC()
- context.bTC() context(key) context.eTC()
- context.bTC() context(string.len(key)) context.eTC()
- context.bTC() context(val) context.eTC()
- context.bTC() context(string.len(val)) context.eTC()
- context.bTC() context(strempty(key)) context.eTC()
- context.bTC() context(strempty(val)) context.eTC()
- context.eTR()
- end
- context.eTABLEbody()
- context.bTABLEfoot() context.bTR()
- context.bTC() context("number") context.eTC()
- context.bTC() context("letters") context.eTC()
- context.bTC() context("n") context.eTC()
- context.bTC() context("replacement") context.eTC()
- context.bTC() context("n") context.eTC()
- context.bTC() context("bytes") context.eTC()
- context.bTC() context("repl. bytes") context.eTC()
- context.eTR()
- context.eTABLEfoot()
- context.eTABLE()
-end
-
-\stopluacode
-%D Having to pick out single tables for printing can be tedious, therefore we
-%D let Lua do the job in our stead. \type{translit.show_all_tabs()} calls
-%D \type{translit.show_tab} on every table that is registered with
-%D \type{translit.table} -- and uses its registered key as table heading.
-\startluacode
-
-function translit.show_all_tabs ()
- environment.loadluafile ("trans_tables_iso9")
- environment.loadluafile ("trans_tables_trsc")
- environment.loadluafile ("trans_tables_scntfc")
- environment.loadluafile ("trans_tables_trsc")
- environment.loadluafile ("trans_tables_glag")
- environment.loadluafile ("trans_tables_gr")
- translit.gen_rules_en()
- translit.gen_rules_de()
- -- Output all translation tables that are registered within translit.tables.
- -- This will be quite unordered.
- context ("\\chapter{Transliterator Showing All Tables}")
- for key, val in pairs(translit.tables) do
- context ("\\section{" .. key .. "}")
- translit.show_tab (val)
- end
-end
-
-\stopluacode
-%D \type{translit.transliterate(m, t)} constitutes the
-%D metafunction that is called by the \type{\transliterate} command.
-%D It loads the transliteration tables according to \type{method} and calls the
-%D corresponding function.
-\startluacode
-
-function translit.transliterate (method, text)
- if method == "ru" or
- method == "ru_old" or
- method == "ru_old_jer_hack" or
- method == "all"
- then
- environment.loadluafile ("trans_tables_iso9")
- text = translit.iso9 (method, text)
- elseif method == "ru_transcript_de" or
- method == "ru_transcript_de_exp" or -- experimental lpeg
- method == "ru_transcript_en" or
- method == "ru_transcript_en_sub" or -- old multiple substitution
- method == "ru_cz" or
- method == "ocs_cz"
- then
- environment.loadluafile ("trans_tables_trsc")
- text = translit.transcript (method, text)
- elseif method == "iso9_ocs" or
- method == "iso9_ocs_hack" or
- method == "ocs" or
- method == "ocs_gla"
- then
- environment.loadluafile ("trans_tables_scntfc")
- text = translit.scientific (method, text)
- elseif method == "gr" or
- method == "gr_n"
- then
- environment.loadluafile ("trans_tables_gr")
- text = translit.dogreek (method, text)
- end
- context (text)
-end
-\stopluacode
-
-
%D The following will help debugging and reviewing tables. Make sure your
%D typescript can handle the characters, in general it's no use with Latin
%D Modern which unfortunately provides only a restricted set of the unicode
@@ -307,7 +56,7 @@ end
%D The user-level command to output a single substitution table is
%D \type{\showOneTranslitTab{#1}}.
\define[1]\showOneTranslitTab{%
- \ctxlua{
+ \startluacode
environment.loadluafile ("trans_tables_iso9")
environment.loadluafile ("trans_tables_trsc")
environment.loadluafile ("trans_tables_scntfc")
@@ -316,8 +65,8 @@ end
environment.loadluafile ("trans_tables_gr")
translit.gen_rules_en()
translit.gen_rules_de()
- translit.show_tab(#1)
- }%
+ translit.show_tab("\luaescapestring{#1}")
+ \stopluacode
}
%D The user-level command to output all defined tables is
@@ -326,13 +75,6 @@ end
\ctxlua{translit.show_all_tabs()}%
}
-\startluacode
-function translit.debug_next ()
- translit.debug_count = translit.debug_count + 1
- tex.print("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}")
-end
-\stopluacode
-
\def\translitDebug#1{%
\doif{\TRLdebug}{true}{%
%\inmargin{\ctxlua{translit.debug_next()} #1}% Unreadable with too many debug messages.
diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua
new file mode 100644
index 0000000..1bcbb32
--- /dev/null
+++ b/tex/context/third/transliterator/transliterator.lua
@@ -0,0 +1,220 @@
+#!/usr/bin/env texlua
+--------------------------------------------------------------------------------
+-- FILE: transliterator.lua
+-- USAGE: to be called by t-transliterator.mkiv
+-- DESCRIPTION: basic lua environment for the Transliterator module
+-- REQUIREMENTS: latest ConTeXt MkIV
+-- AUTHOR: Philipp Gesang (Phg), <megas.kapaneus@gmail.com>
+-- CREATED: 2010-12-23 22:12:31+0100
+--------------------------------------------------------------------------------
+--
+
+translit = translit or {}
+translit.tables = translit.tables or {}
+
+--------------------------------------------------------------------------------
+-- Predefining vowel lists
+--------------------------------------------------------------------------------
+-- If you haven't heard of cyrillic scripts until now you might want to read
+-- at least the first 15 pages of
+-- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf
+-- before you continue reading this file.
+translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я",
+ "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"}
+translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н",
+ "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ",
+ "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н",
+ "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"}
+
+-- Substitution tables are the very heart of the Transliterator. Due to the
+-- nature of languages and scripts exhaustive substitution is the simplest
+-- method for transliterations and transcriptions unless they are one-to-one
+-- mappings like those defined in ISO~9.
+--
+-- To achieve better reusability we split the tables into segments, the most
+-- obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase
+-- and uppercase characters. Another set is constituted by e.~g. the
+-- \type{ru_old*} tables that allow adding transcription of historical
+-- characters if needed; by the way those are included in the default
+-- transliteration mode \type{ru_old}.
+
+-- Tables can be found in separate Lua files.
+-- See {\tt
+-- trans_tables_glag.lua
+-- trans_tables_gr.lua
+-- trans_tables_iso9.lua
+-- trans_tables_scntfc.lua
+-- and
+-- trans_tables_trsc.lua.}
+
+-- Metatables allow for lazy concatenation.
+
+do
+ -- This returns the Union of both key sets for the “+” operator.
+ -- The values of the first table will be updated (read: overridden) by those given in the second.
+ local Dict_add = {
+ __add = function (dict_a, dict_b)
+ assert (type(dict_a) == "table" and type(dict_b) == "table")
+ local dict_result = setmetatable({}, Dict_add)
+
+ for key, val in pairs(dict_a) do
+ dict_result[key] = val
+ end
+
+ for key, val in pairs(dict_b) do
+ dict_result[key] = val
+ end
+ return dict_result
+ end
+ }
+
+ function translit.make_add_dict(dict)
+ return setmetatable(dict, Dict_add)
+ end
+end
+
+-- We might want to have all the table data nicely formatted by \CONTEXT\
+-- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a
+-- single table \type{t}, builds a Natural TABLE out of its content and
+-- hands it down to the machine for typesetting. For debugging purposes it
+-- does not only print the replacement pairs but shows their code points as
+-- well.
+
+-- handle the input chars and replacement values
+local strempty = function (s)
+ if s == "" then return "nil"
+ else
+ -- add the unicode positions of the replacements (can be more
+ -- than one with composed diacritics
+ local i = 1
+ local r = ""
+ repeat
+ r = r .. utf.byte(s,i) .. " "
+ i = i + 1
+ until utf.byte(s,i) == nil
+ return r
+ end
+end
+
+--function translit.show_tab (tab_name)
+ --local tab = translit[tab_name]
+function translit.show_tab (tab)
+ -- Output a transliteration table, nicely formatted with natural tables.
+ -- Lots of calls to context() but as it’s only a goodie this doesn’t
+ -- really matter.
+ local cnt = 0
+ context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"})
+ context.setupTABLE({"c"}, {"each"}, {frame="off"})
+ context.setupTABLE({"r"}, {"each"}, {frame="off"})
+ context.setupTABLE({"c"}, {"first"}, {style="italic"})
+ context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"})
+ context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"})
+ context.bTABLE({split="yes", option="stretch"})
+ context.bTABLEhead()
+ context.bTR()
+ context.bTH() context("number") context.eTH()
+ context.bTH() context("letters") context.eTH()
+ context.bTH() context("n") context.eTH()
+ context.bTH() context("replacement") context.eTH()
+ context.bTH() context("n") context.eTH()
+ context.bTH() context("bytes") context.eTH()
+ context.bTH() context("repl. bytes") context.eTH()
+ context.eTR()
+ context.eTABLEhead()
+ context.bTABLEbody()
+
+ for key, val in next,tab do
+ cnt = cnt + 1
+ context.bTR()
+ context.bTC() context(cnt) context.eTC()
+ context.bTC() context(key) context.eTC()
+ context.bTC() context(string.len(key)) context.eTC()
+ context.bTC() context(val) context.eTC()
+ context.bTC() context(string.len(val)) context.eTC()
+ context.bTC() context(strempty(key)) context.eTC()
+ context.bTC() context(strempty(val)) context.eTC()
+ context.eTR()
+ end
+
+ context.eTABLEbody()
+ context.bTABLEfoot() context.bTR()
+ context.bTC() context("number") context.eTC()
+ context.bTC() context("letters") context.eTC()
+ context.bTC() context("n") context.eTC()
+ context.bTC() context("replacement") context.eTC()
+ context.bTC() context("n") context.eTC()
+ context.bTC() context("bytes") context.eTC()
+ context.bTC() context("repl. bytes") context.eTC()
+ context.eTR()
+ context.eTABLEfoot()
+ context.eTABLE()
+end
+
+-- Having to pick out single tables for printing can be tedious, therefore we
+-- let Lua do the job in our stead. \type{translit.show_all_tabs()} calls
+-- \type{translit.show_tab} on every table that is registered with
+-- \type{translit.table} -- and uses its registered key as table heading.
+
+function translit.show_all_tabs ()
+ environment.loadluafile ("trans_tables_iso9")
+ environment.loadluafile ("trans_tables_trsc")
+ environment.loadluafile ("trans_tables_scntfc")
+ environment.loadluafile ("trans_tables_trsc")
+ environment.loadluafile ("trans_tables_glag")
+ environment.loadluafile ("trans_tables_gr")
+ translit.gen_rules_en()
+ translit.gen_rules_de()
+ -- Output all translation tables that are registered within translit.tables.
+ -- This will be quite unordered.
+ context.chapter("Transliterator Showing All Tables")
+ for key, val in pairs(translit.tables) do
+ context.section(key)
+ translit.show_tab (val)
+ end
+end
+
+
+-- \type{translit.transliterate(m, t)} constitutes the
+-- metafunction that is called by the \type{\transliterate} command.
+-- It loads the transliteration tables according to \type{method} and calls the
+-- corresponding function.
+
+function translit.transliterate (method, text)
+ if method == "ru" or
+ method == "ru_old" or
+ method == "ru_old_jer_hack" or
+ method == "all"
+ then
+ environment.loadluafile ("trans_tables_iso9")
+ text = translit.iso9 (method, text)
+ elseif method == "ru_transcript_de" or
+ method == "ru_transcript_de_exp" or -- experimental lpeg
+ method == "ru_transcript_en" or
+ method == "ru_transcript_en_sub" or -- old multiple substitution
+ method == "ru_cz" or
+ method == "ocs_cz"
+ then
+ environment.loadluafile ("trans_tables_trsc")
+ text = translit.transcript (method, text)
+ elseif method == "iso9_ocs" or
+ method == "iso9_ocs_hack" or
+ method == "ocs" or
+ method == "ocs_gla"
+ then
+ environment.loadluafile ("trans_tables_scntfc")
+ text = translit.scientific (method, text)
+ elseif method == "gr" or
+ method == "gr_n"
+ then
+ environment.loadluafile ("trans_tables_gr")
+ text = translit.dogreek (method, text)
+ end
+ context (text)
+end
+
+translit.debug_count = 0
+-- for internal use only
+function translit.debug_next ()
+ translit.debug_count = translit.debug_count + 1
+ context("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}")
+end