From 07ded7bc27e19ad200c5e14fd632a7ae8a36ac82 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 24 Dec 2010 00:31:40 +0100 Subject: moved lua stuff from main file to separate file --- .../third/transliterator/t-transliterator.mkiv | 272 +-------------------- 1 file changed, 7 insertions(+), 265 deletions(-) (limited to 'tex/context/third/transliterator/t-transliterator.mkiv') diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv index b4a3c86..25c4231 100644 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -1,6 +1,6 @@ %D \module %D [ file=t-transliterator, -%D version=2010.03.10, +%D version=2010-12-23 22:10:09+0100, %D title=\CONTEXT\ User Module, %D subtitle=The Transliterator, %D author=Philipp Gesang, @@ -16,6 +16,9 @@ \startmodule[transliterator] +%\ctxlua{environment.loadluafile ("transliterator")} +\ctxlua{dofile ("transliterator.lua")} + \unprotect %D Use the Transliterator by adding \type{\usemodule[transliterator]} somewhere %D before \type{\starttext}. Adjust the Transliterator through the @@ -45,260 +48,6 @@ %D made up my mind concerning Greek transliteration, any suggestions are %D welcome. -%D For clarity's sake we'll stuff everything Lua into one table. -\startluacode -translit = translit or {} -translit.debug_count = 0 -\stopluacode - -%D We want to keep track of all the tables we'll create so we put them into -%D a separate dictionary accompanied by a description string. -\startluacode -translit.tables = translit.tables or {} -\stopluacode - - -%D Next we define respective lists of vowels and consonants as used in the -%D russian alphabet. They are needed later when substitution tables for some -%D idiosyncratic transcriptions are generated. -\startluacode --- If you haven't heard of cyrillic scripts until now you might want to read --- at least the first 15 pages of --- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf --- before you continue reading this file. - -translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", - "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} -translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", - "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", - "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н", - "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} -\stopluacode - -%D Substitution tables are the very heart of the Transliterator. Due to the -%D nature of languages and scripts exhaustive substitution is the simplest -%D method for transliterations and transcriptions unless they are one-to-one -%D mappings like those defined in ISO~9. -%D -%D To achieve better reusability we split the tables into segments, the most -%D obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase -%D and uppercase characters. Another set is constituted by e.~g. the -%D \type{ru_old*} tables that allow adding transcription of historical -%D characters if needed; by the way those are included in the default -%D transliteration mode \type{ru_old}. - -%D Tables have been migrated into separate lua files. -%D See {\tt -%D trans_tables_glag.lua -%D trans_tables_gr.lua -%D trans_tables_iso9.lua -%D trans_tables_scntfc.lua -%D and -%D trans_tables_trsc.lua.} - -%D Metatables allow for easy concatenation. -\startluacode - -do - -- This returns the Union of both key sets for the “+” operator. - -- The values of the first table will be updated (read: overridden) by those given in the second. - local Dict_add = { - __add = function (dict_a, dict_b) - assert (type(dict_a) == "table" and type(dict_b) == "table") - local dict_result = setmetatable({}, Dict_add) - - for key, val in pairs(dict_a) do - dict_result[key] = val - end - - for key, val in pairs(dict_b) do - dict_result[key] = val - end - return dict_result - end - } - - function translit.make_add_dict(dict) - return setmetatable(dict, Dict_add) - end -end - -\stopluacode - -%D The function \type{translit.subst(s, t)} is used to replace any key of -%D \type{t} that occurs in \type{s} with the according value of \type{t}. - -\startluacode - -function translit.subst (text, tab) - for symbol, replacement in pairs(tab) do - -- using ordinary gsub as suggested by Taco - text = string.gsub(text, symbol, replacement) - end - return text -end - -\stopluacode -%D \type{translit.add_table(t, ...)} is used to build the final substitution tables -%D from those we defined earlier; any keys in the previous table \type{t} are -%D overwritten if they exist in the added tables \type{ta}, too. -\startluacode - - ---function translit.add_table (...) - --print ("---------------HERE---------------") - --print (type(arg) .. ", Länge: " .. #arg) - --local t = translit.make_add_dict{} - --local cnt = 0 - --for _, tab in pairs(arg) do - --cnt=cnt+1 - --print(cnt .. " - " .. type(tab)) - --t = t + tab - --end - --print(cnt) - --for i,j in pairs(t) do print (i,j) end - --return t ---end - -\stopluacode -%D We might want to have all the table data nicely formatted by \CONTEXT\ -%D itself, here's how we'll do it. \type{translit.show_tab(t)} handles a -%D single table \type{t}, builds a Natural TABLE out of its content and -%D hands it down to the machine for typesetting. For debugging purposes it -%D does not only print the replacement pairs but shows their code points as -%D well. -\startluacode - -function translit.show_tab (tab) - -- Output a transliteration table, nicely formatted with natural tables. - local cnt = 0 - context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"}) - context.setupTABLE({"c"}, {"each"}, {frame="off"}) - context.setupTABLE({"r"}, {"each"}, {frame="off"}) - context.setupTABLE({"c"}, {"first"}, {style="italic"}) - context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"}) - context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"}) - context.bTABLE({split="yes", option="stretch"}) - context.bTABLEhead() - context.bTR() - context.bTH() context("number") context.eTH() - context.bTH() context("letters") context.eTH() - context.bTH() context("n") context.eTH() - context.bTH() context("replacement") context.eTH() - context.bTH() context("n") context.eTH() - context.bTH() context("bytes") context.eTH() - context.bTH() context("repl. bytes") context.eTH() - context.eTR() - context.eTABLEhead() - context.bTABLEbody() - for key, val in pairs(tab) do - local strempty = function (s) - -- Some characters might not be replaced but removed, others might be - -- multi-char sequences. - if #s == 0 then return "nil" - else - local i = 0 - local r = "" - -- The following loop could be replaced by checking the string length with utf.len(s) … - repeat - i = i + 1 - if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " " end - until (false) - return r - end - end - cnt = cnt + 1 - context.bTR() - context.bTC() context(cnt) context.eTC() - context.bTC() context(key) context.eTC() - context.bTC() context(string.len(key)) context.eTC() - context.bTC() context(val) context.eTC() - context.bTC() context(string.len(val)) context.eTC() - context.bTC() context(strempty(key)) context.eTC() - context.bTC() context(strempty(val)) context.eTC() - context.eTR() - end - context.eTABLEbody() - context.bTABLEfoot() context.bTR() - context.bTC() context("number") context.eTC() - context.bTC() context("letters") context.eTC() - context.bTC() context("n") context.eTC() - context.bTC() context("replacement") context.eTC() - context.bTC() context("n") context.eTC() - context.bTC() context("bytes") context.eTC() - context.bTC() context("repl. bytes") context.eTC() - context.eTR() - context.eTABLEfoot() - context.eTABLE() -end - -\stopluacode -%D Having to pick out single tables for printing can be tedious, therefore we -%D let Lua do the job in our stead. \type{translit.show_all_tabs()} calls -%D \type{translit.show_tab} on every table that is registered with -%D \type{translit.table} -- and uses its registered key as table heading. -\startluacode - -function translit.show_all_tabs () - environment.loadluafile ("trans_tables_iso9") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_scntfc") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_glag") - environment.loadluafile ("trans_tables_gr") - translit.gen_rules_en() - translit.gen_rules_de() - -- Output all translation tables that are registered within translit.tables. - -- This will be quite unordered. - context ("\\chapter{Transliterator Showing All Tables}") - for key, val in pairs(translit.tables) do - context ("\\section{" .. key .. "}") - translit.show_tab (val) - end -end - -\stopluacode -%D \type{translit.transliterate(m, t)} constitutes the -%D metafunction that is called by the \type{\transliterate} command. -%D It loads the transliteration tables according to \type{method} and calls the -%D corresponding function. -\startluacode - -function translit.transliterate (method, text) - if method == "ru" or - method == "ru_old" or - method == "ru_old_jer_hack" or - method == "all" - then - environment.loadluafile ("trans_tables_iso9") - text = translit.iso9 (method, text) - elseif method == "ru_transcript_de" or - method == "ru_transcript_de_exp" or -- experimental lpeg - method == "ru_transcript_en" or - method == "ru_transcript_en_sub" or -- old multiple substitution - method == "ru_cz" or - method == "ocs_cz" - then - environment.loadluafile ("trans_tables_trsc") - text = translit.transcript (method, text) - elseif method == "iso9_ocs" or - method == "iso9_ocs_hack" or - method == "ocs" or - method == "ocs_gla" - then - environment.loadluafile ("trans_tables_scntfc") - text = translit.scientific (method, text) - elseif method == "gr" or - method == "gr_n" - then - environment.loadluafile ("trans_tables_gr") - text = translit.dogreek (method, text) - end - context (text) -end -\stopluacode - - %D The following will help debugging and reviewing tables. Make sure your %D typescript can handle the characters, in general it's no use with Latin %D Modern which unfortunately provides only a restricted set of the unicode @@ -307,7 +56,7 @@ end %D The user-level command to output a single substitution table is %D \type{\showOneTranslitTab{#1}}. \define[1]\showOneTranslitTab{% - \ctxlua{ + \startluacode environment.loadluafile ("trans_tables_iso9") environment.loadluafile ("trans_tables_trsc") environment.loadluafile ("trans_tables_scntfc") @@ -316,8 +65,8 @@ end environment.loadluafile ("trans_tables_gr") translit.gen_rules_en() translit.gen_rules_de() - translit.show_tab(#1) - }% + translit.show_tab("\luaescapestring{#1}") + \stopluacode } %D The user-level command to output all defined tables is @@ -326,13 +75,6 @@ end \ctxlua{translit.show_all_tabs()}% } -\startluacode -function translit.debug_next () - translit.debug_count = translit.debug_count + 1 - tex.print("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}") -end -\stopluacode - \def\translitDebug#1{% \doif{\TRLdebug}{true}{% %\inmargin{\ctxlua{translit.debug_next()} #1}% Unreadable with too many debug messages. -- cgit v1.2.3