diff options
| author | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-12-24 00:31:40 +0100 | 
|---|---|---|
| committer | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-12-24 00:31:40 +0100 | 
| commit | 07ded7bc27e19ad200c5e14fd632a7ae8a36ac82 (patch) | |
| tree | b2160f6c58e5a151f248c007f51f68980eb68ca2 /tex | |
| parent | ed5c1a6441bc9498a65fec5b48c0cf110ec796ba (diff) | |
| download | transliterator-07ded7bc27e19ad200c5e14fd632a7ae8a36ac82.tar.gz | |
moved lua stuff from main file to separate file
Diffstat (limited to 'tex')
| -rw-r--r-- | tex/context/third/transliterator/t-transliterator.mkiv | 272 | ||||
| -rw-r--r-- | tex/context/third/transliterator/transliterator.lua | 220 | 
2 files changed, 227 insertions, 265 deletions
diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv index b4a3c86..25c4231 100644 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -1,6 +1,6 @@  %D \module  %D   [      file=t-transliterator, -%D        version=2010.03.10, +%D        version=2010-12-23 22:10:09+0100,  %D          title=\CONTEXT\ User Module,  %D       subtitle=The Transliterator,  %D         author=Philipp Gesang, @@ -16,6 +16,9 @@  \startmodule[transliterator] +%\ctxlua{environment.loadluafile ("transliterator")} +\ctxlua{dofile ("transliterator.lua")} +  \unprotect  %D Use the Transliterator by adding \type{\usemodule[transliterator]} somewhere  %D before \type{\starttext}.  Adjust the Transliterator through the @@ -45,260 +48,6 @@  %D made up my mind concerning Greek transliteration, any suggestions are  %D welcome. -%D For clarity's sake we'll stuff everything Lua into one table. -\startluacode -translit = translit or {} -translit.debug_count = 0 -\stopluacode - -%D We want to keep track of all the tables we'll create so we put them into -%D a separate dictionary accompanied by a description string. -\startluacode -translit.tables = translit.tables or {} -\stopluacode - - -%D Next we define respective lists of vowels and consonants  as used in the -%D russian alphabet.  They are needed later when substitution tables for some -%D idiosyncratic transcriptions are generated. -\startluacode --- If you haven't heard of cyrillic scripts until now you might want to read --- at least the first 15 pages of  --- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf --- before you continue reading this file. - -translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", -                      "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} -translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н",  -                          "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", -                          "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н",  -                          "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} -\stopluacode - -%D Substitution tables are the very heart of the Transliterator.  Due to the -%D nature of languages and scripts exhaustive substitution is the simplest -%D method for transliterations and transcriptions unless they are one-to-one -%D mappings like those defined in ISO~9. -%D -%D To achieve better reusability we split the tables into segments, the most -%D obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase -%D and uppercase characters.  Another set is constituted by e.~g. the -%D \type{ru_old*} tables that allow adding transcription of historical -%D characters if needed; by the way those are included in the default -%D transliteration mode \type{ru_old}. - -%D Tables have been migrated into separate lua files. -%D See {\tt -%D trans_tables_glag.lua -%D trans_tables_gr.lua -%D trans_tables_iso9.lua -%D trans_tables_scntfc.lua -%D and -%D trans_tables_trsc.lua.} - -%D Metatables allow for easy concatenation. -\startluacode - -do -    -- This returns the Union of both key sets for the “+” operator. -    -- The values of the first table will be updated (read: overridden) by those given in the second. -    local Dict_add = { -        __add = function (dict_a, dict_b) -            assert (type(dict_a) == "table" and type(dict_b) == "table") -            local dict_result = setmetatable({}, Dict_add) - -            for key, val in pairs(dict_a) do -                dict_result[key] = val -            end - -            for key, val in pairs(dict_b) do -                dict_result[key] = val -            end -            return dict_result -        end -    } - -    function translit.make_add_dict(dict) -        return setmetatable(dict, Dict_add) -    end -end - -\stopluacode - -%D The function \type{translit.subst(s, t)} is used to replace any key of -%D \type{t} that occurs in \type{s} with the according value of \type{t}. - -\startluacode - -function translit.subst (text, tab) -  for symbol, replacement in pairs(tab) do -    -- using ordinary gsub as suggested by Taco -    text = string.gsub(text, symbol, replacement) -  end -  return text -end - -\stopluacode -%D \type{translit.add_table(t, ...)} is used to build the final substitution tables -%D from those we defined earlier; any keys in the previous table \type{t} are -%D overwritten if they exist in the added tables \type{ta}, too. -\startluacode - - ---function translit.add_table (...) -  --print ("---------------HERE---------------") -  --print (type(arg) .. ", Länge: " .. #arg) -  --local t = translit.make_add_dict{} -  --local cnt = 0 -  --for _, tab in pairs(arg) do -    --cnt=cnt+1 -    --print(cnt .. " - " .. type(tab)) -    --t = t + tab -  --end -  --print(cnt) -  --for i,j in pairs(t) do print (i,j) end -  --return t ---end - -\stopluacode -%D We might want to have all the table data nicely formatted by \CONTEXT\  -%D itself, here's how we'll do it.  \type{translit.show_tab(t)} handles a -%D single table \type{t}, builds a Natural TABLE out of its content and -%D hands it down to the machine for typesetting.  For debugging purposes it -%D does not only print the replacement pairs but shows their code points as -%D well. -\startluacode - -function translit.show_tab (tab) -  -- Output a transliteration table, nicely formatted with natural tables. -  local cnt = 0 -  context.setupTABLE({"r"}, {"each"},     {style="\\tfx", align="center"}) -  context.setupTABLE({"c"}, {"each"},     {frame="off"}) -  context.setupTABLE({"r"}, {"each"},     {frame="off"}) -  context.setupTABLE({"c"}, {"first"},    {style="italic"}) -  context.setupTABLE({"r"}, {"first"},    {style="bold", topframe="on", bottomframe="on"}) -  context.setupTABLE({"r"}, {"last"},     {style="bold", topframe="on", bottomframe="on"}) -  context.bTABLE({split="yes", option="stretch"}) -    context.bTABLEhead() -      context.bTR() -        context.bTH() context("number")         context.eTH() -        context.bTH() context("letters")        context.eTH() -        context.bTH() context("n")              context.eTH() -        context.bTH() context("replacement")    context.eTH() -        context.bTH() context("n")              context.eTH() -        context.bTH() context("bytes")          context.eTH() -        context.bTH() context("repl. bytes")    context.eTH() -      context.eTR() -    context.eTABLEhead() -    context.bTABLEbody() -      for key, val in pairs(tab) do -        local strempty = function (s)  -          -- Some characters might not be replaced but removed, others might be -          -- multi-char sequences. -          if #s == 0 then return "nil" -          else  -            local i = 0 -            local r = "" -            -- The following loop could be replaced by checking the string length with utf.len(s) … -            repeat -              i = i + 1 -              if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end -            until (false) -            return r -          end -        end -        cnt = cnt + 1 -        context.bTR() -          context.bTC() context(cnt)           context.eTC() -          context.bTC() context(key)           context.eTC() -          context.bTC() context(string.len(key))  context.eTC() -          context.bTC() context(val)           context.eTC() -          context.bTC() context(string.len(val))  context.eTC() -          context.bTC() context(strempty(key)) context.eTC() -          context.bTC() context(strempty(val)) context.eTC() -        context.eTR() -      end -    context.eTABLEbody() -    context.bTABLEfoot() context.bTR() -      context.bTC() context("number")       context.eTC() -      context.bTC() context("letters")      context.eTC() -      context.bTC() context("n")            context.eTC() -      context.bTC() context("replacement")  context.eTC() -      context.bTC() context("n")            context.eTC() -      context.bTC() context("bytes")        context.eTC() -      context.bTC() context("repl. bytes")  context.eTC() -      context.eTR() -    context.eTABLEfoot() -  context.eTABLE() -end - -\stopluacode -%D Having to pick out single tables for printing can be tedious, therefore we -%D let Lua do the job in our stead.  \type{translit.show_all_tabs()} calls -%D \type{translit.show_tab} on every table that is registered with -%D \type{translit.table} -- and uses its registered key as table heading. -\startluacode - -function translit.show_all_tabs () -    environment.loadluafile ("trans_tables_iso9") -    environment.loadluafile ("trans_tables_trsc") -    environment.loadluafile ("trans_tables_scntfc") -    environment.loadluafile ("trans_tables_trsc") -    environment.loadluafile ("trans_tables_glag") -    environment.loadluafile ("trans_tables_gr") -    translit.gen_rules_en() -    translit.gen_rules_de() -    -- Output all translation tables that are registered within translit.tables. -    -- This will be quite unordered.  -    context ("\\chapter{Transliterator Showing All Tables}") -    for key, val in pairs(translit.tables) do -        context ("\\section{" .. key .. "}") -        translit.show_tab (val) -    end -end - -\stopluacode -%D \type{translit.transliterate(m, t)} constitutes the -%D metafunction that is called by the \type{\transliterate} command. -%D It loads the transliteration tables according to \type{method} and calls the -%D corresponding function. -\startluacode - -function translit.transliterate (method, text) -  if      method == "ru"                or -          method == "ru_old"            or  -          method == "ru_old_jer_hack"   or  -          method == "all"  -          then -    environment.loadluafile ("trans_tables_iso9") -    text = translit.iso9 (method, text) -  elseif  method == "ru_transcript_de"      or -          method == "ru_transcript_de_exp"  or -- experimental lpeg -          method == "ru_transcript_en"      or -          method == "ru_transcript_en_sub"  or -- old multiple substitution -          method == "ru_cz"                 or -          method == "ocs_cz" -          then -    environment.loadluafile ("trans_tables_trsc") -    text = translit.transcript (method, text) -  elseif  method == "iso9_ocs"      or -          method == "iso9_ocs_hack" or -          method == "ocs"           or -          method == "ocs_gla" -          then -    environment.loadluafile ("trans_tables_scntfc") -    text = translit.scientific (method, text) -  elseif  method == "gr"        or -          method == "gr_n" -          then -    environment.loadluafile ("trans_tables_gr") -    text = translit.dogreek (method, text) -  end - context (text) -end -\stopluacode - -  %D The following will help debugging and reviewing tables.  Make sure your  %D typescript can handle the characters, in general it's no use with Latin  %D Modern which unfortunately provides only a restricted set of the unicode @@ -307,7 +56,7 @@ end  %D The user-level command to output a single substitution table is  %D \type{\showOneTranslitTab{#1}}.  \define[1]\showOneTranslitTab{% -  \ctxlua{ +  \startluacode      environment.loadluafile ("trans_tables_iso9")      environment.loadluafile ("trans_tables_trsc")      environment.loadluafile ("trans_tables_scntfc") @@ -316,8 +65,8 @@ end      environment.loadluafile ("trans_tables_gr")      translit.gen_rules_en()      translit.gen_rules_de() -    translit.show_tab(#1) -  }% +    translit.show_tab("\luaescapestring{#1}") +  \stopluacode  }  %D The user-level command to output all defined tables is @@ -326,13 +75,6 @@ end    \ctxlua{translit.show_all_tabs()}%  } -\startluacode -function translit.debug_next () -  translit.debug_count = translit.debug_count + 1 -  tex.print("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count ..  "}") -end -\stopluacode -  \def\translitDebug#1{%    \doif{\TRLdebug}{true}{%      %\inmargin{\ctxlua{translit.debug_next()} #1}% Unreadable with too many debug messages. diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua new file mode 100644 index 0000000..1bcbb32 --- /dev/null +++ b/tex/context/third/transliterator/transliterator.lua @@ -0,0 +1,220 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +--         FILE:  transliterator.lua +--        USAGE:  to be called by t-transliterator.mkiv  +--  DESCRIPTION:  basic lua environment for the Transliterator module +-- REQUIREMENTS:  latest ConTeXt MkIV +--       AUTHOR:  Philipp Gesang (Phg), <megas.kapaneus@gmail.com> +--      CREATED:  2010-12-23 22:12:31+0100 +-------------------------------------------------------------------------------- +-- + +translit = translit or {} +translit.tables = translit.tables or {} + +-------------------------------------------------------------------------------- +-- Predefining vowel lists +-------------------------------------------------------------------------------- +-- If you haven't heard of cyrillic scripts until now you might want to read +-- at least the first 15 pages of  +-- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf +-- before you continue reading this file. +translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", +                      "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} +translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н",  +                          "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", +                          "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н",  +                          "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} + +-- Substitution tables are the very heart of the Transliterator.  Due to the +-- nature of languages and scripts exhaustive substitution is the simplest +-- method for transliterations and transcriptions unless they are one-to-one +-- mappings like those defined in ISO~9. +-- +-- To achieve better reusability we split the tables into segments, the most +-- obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase +-- and uppercase characters.  Another set is constituted by e.~g. the +-- \type{ru_old*} tables that allow adding transcription of historical +-- characters if needed; by the way those are included in the default +-- transliteration mode \type{ru_old}. + +-- Tables can be found in separate Lua files. +-- See {\tt +-- trans_tables_glag.lua +-- trans_tables_gr.lua +-- trans_tables_iso9.lua +-- trans_tables_scntfc.lua +-- and +-- trans_tables_trsc.lua.} + +-- Metatables allow for lazy concatenation. + +do +    -- This returns the Union of both key sets for the “+” operator. +    -- The values of the first table will be updated (read: overridden) by those given in the second. +    local Dict_add = { +        __add = function (dict_a, dict_b) +            assert (type(dict_a) == "table" and type(dict_b) == "table") +            local dict_result = setmetatable({}, Dict_add) + +            for key, val in pairs(dict_a) do +                dict_result[key] = val +            end + +            for key, val in pairs(dict_b) do +                dict_result[key] = val +            end +            return dict_result +        end +    } + +    function translit.make_add_dict(dict) +        return setmetatable(dict, Dict_add) +    end +end + +-- We might want to have all the table data nicely formatted by \CONTEXT\  +-- itself, here's how we'll do it.  \type{translit.show_tab(t)} handles a +-- single table \type{t}, builds a Natural TABLE out of its content and +-- hands it down to the machine for typesetting.  For debugging purposes it +-- does not only print the replacement pairs but shows their code points as +-- well. + +-- handle the input chars and replacement values +local strempty = function (s)  +    if s == "" then return "nil" +    else  +        -- add the unicode positions of the replacements (can be more +        -- than one with composed diacritics +        local i = 1 +        local r = "" +        repeat +            r = r .. utf.byte(s,i) .. " " +            i = i + 1 +        until utf.byte(s,i) == nil +        return r +    end +end + +--function translit.show_tab (tab_name) +    --local tab = translit[tab_name] +function translit.show_tab (tab) +    -- Output a transliteration table, nicely formatted with natural tables. +    -- Lots of calls to context() but as it’s only a goodie this doesn’t +    -- really matter. +    local cnt = 0 +    context.setupTABLE({"r"}, {"each"},     {style="\\tfx", align="center"}) +    context.setupTABLE({"c"}, {"each"},     {frame="off"}) +    context.setupTABLE({"r"}, {"each"},     {frame="off"}) +    context.setupTABLE({"c"}, {"first"},    {style="italic"}) +    context.setupTABLE({"r"}, {"first"},    {style="bold", topframe="on", bottomframe="on"}) +    context.setupTABLE({"r"}, {"last"},     {style="bold", topframe="on", bottomframe="on"}) +    context.bTABLE({split="yes", option="stretch"}) +        context.bTABLEhead() +        context.bTR() +            context.bTH() context("number")         context.eTH() +            context.bTH() context("letters")        context.eTH() +            context.bTH() context("n")              context.eTH() +            context.bTH() context("replacement")    context.eTH() +            context.bTH() context("n")              context.eTH() +            context.bTH() context("bytes")          context.eTH() +            context.bTH() context("repl. bytes")    context.eTH() +        context.eTR() +        context.eTABLEhead() +        context.bTABLEbody() + +        for key, val in next,tab do +            cnt = cnt + 1 +            context.bTR() +            context.bTC() context(cnt)              context.eTC() +            context.bTC() context(key)              context.eTC() +            context.bTC() context(string.len(key))  context.eTC() +            context.bTC() context(val)              context.eTC() +            context.bTC() context(string.len(val))  context.eTC() +            context.bTC() context(strempty(key))    context.eTC() +            context.bTC() context(strempty(val))    context.eTC() +            context.eTR() +        end + +        context.eTABLEbody() +        context.bTABLEfoot() context.bTR() +        context.bTC() context("number")       context.eTC() +        context.bTC() context("letters")      context.eTC() +        context.bTC() context("n")            context.eTC() +        context.bTC() context("replacement")  context.eTC() +        context.bTC() context("n")            context.eTC() +        context.bTC() context("bytes")        context.eTC() +        context.bTC() context("repl. bytes")  context.eTC() +        context.eTR() +        context.eTABLEfoot() +    context.eTABLE() +end + +-- Having to pick out single tables for printing can be tedious, therefore we +-- let Lua do the job in our stead.  \type{translit.show_all_tabs()} calls +-- \type{translit.show_tab} on every table that is registered with +-- \type{translit.table} -- and uses its registered key as table heading. + +function translit.show_all_tabs () +    environment.loadluafile ("trans_tables_iso9") +    environment.loadluafile ("trans_tables_trsc") +    environment.loadluafile ("trans_tables_scntfc") +    environment.loadluafile ("trans_tables_trsc") +    environment.loadluafile ("trans_tables_glag") +    environment.loadluafile ("trans_tables_gr") +    translit.gen_rules_en() +    translit.gen_rules_de() +    -- Output all translation tables that are registered within translit.tables. +    -- This will be quite unordered.  +    context.chapter("Transliterator Showing All Tables") +    for key, val in pairs(translit.tables) do +        context.section(key) +        translit.show_tab (val) +    end +end + + +-- \type{translit.transliterate(m, t)} constitutes the +-- metafunction that is called by the \type{\transliterate} command. +-- It loads the transliteration tables according to \type{method} and calls the +-- corresponding function. + +function translit.transliterate (method, text) +    if      method == "ru"                or +            method == "ru_old"            or  +            method == "ru_old_jer_hack"   or  +            method == "all"  +            then +        environment.loadluafile ("trans_tables_iso9") +        text = translit.iso9 (method, text) +    elseif  method == "ru_transcript_de"      or +            method == "ru_transcript_de_exp"  or -- experimental lpeg +            method == "ru_transcript_en"      or +            method == "ru_transcript_en_sub"  or -- old multiple substitution +            method == "ru_cz"                 or +            method == "ocs_cz" +            then +        environment.loadluafile ("trans_tables_trsc") +        text = translit.transcript (method, text) +    elseif  method == "iso9_ocs"      or +            method == "iso9_ocs_hack" or +            method == "ocs"           or +            method == "ocs_gla" +            then +        environment.loadluafile ("trans_tables_scntfc") +        text = translit.scientific (method, text) +    elseif  method == "gr"        or +            method == "gr_n" +            then +        environment.loadluafile ("trans_tables_gr") +        text = translit.dogreek (method, text) +    end +    context (text) +end + +translit.debug_count = 0 +-- for internal use only +function translit.debug_next () +    translit.debug_count = translit.debug_count + 1 +    context("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count ..  "}") +end  | 
