diff options
Diffstat (limited to 'tex/context/third')
| -rw-r--r-- | tex/context/third/transliterator/t-transliterator.mkiv | 155 | 
1 files changed, 86 insertions, 69 deletions
diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv index fc07c1d..8f8cadf 100644 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -11,7 +11,7 @@  %D          email={pgesang at ix dot urz dot uni-heidelberg dot de}]  %D This module is licensed under the conditions of the BSD license with   %D two clauses: http://www.freebsd.org/copyright/freebsd-license.html. -%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.\newpage +%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.  \writestatus{loading}{Transliteration from non-Latin scripts} @@ -766,6 +766,8 @@ translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp  translit.ru_trsc_cz_add_low = {    ["ѕ"] = "dz", +  ["з"] = "z", +  ["ꙁ"] = "z",    ["і"] = "ï",    ["ѹ"] = "u",    ["ѡ"] = "ō", @@ -790,6 +792,8 @@ translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit  translit.ru_trsc_cz_add_upp = {    ["Ѕ"] = "Dz", +  ["З"] = "Z", +  ["Ꙁ"] = "Z",    ["І"] = "Ï",    ["Ѹ"] = "U",    ["Ѡ"] = "Ō", @@ -824,6 +828,15 @@ translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit  -- Source p. 77 of  -- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf +----------------------------------------------------------------------- +-- Lowercase and uppercase letter Uk -- “scientific transliteration” -- +----------------------------------------------------------------------- + +translit.ocs_uk = { +  ["oу"] = "u", +  ["оу"] = "u", +  ["Оу"] = "U", +}  -----------------------------------------------------------------------------  -- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” --  ----------------------------------------------------------------------------- @@ -853,7 +866,6 @@ translit.ocs_low = {    ["р"] = "r",    ["с"] = "s",    ["т"] = "t", -  ["оу"] = "u",    ["ѹ"] = "u",    ["ꙋ"] = "u",    ["ф"] = "f", @@ -915,7 +927,6 @@ translit.ocs_upp = {    ["Р"] = "R",    ["С"] = "S",    ["Т"] = "T", -  ["Оу"] = "U",    ["Ѹ"] = "U",    ["ꙋ"] = "U",    ["Ф"] = "F", @@ -1803,61 +1814,64 @@ end  function translit.show_tab (tab)    -- Output a transliteration table, nicely formatted with natural tables.    local cnt = 0 -  context ("\\setupTABLE[r][each]   [style=\\tfx,align=center] ") -  context ("\\setupTABLE[c][each]   [frame=off]") -  context ("\\setupTABLE[r][each]   [frame=off]") -  context ("\\setupTABLE[c][first]  [style=italic]") -  context ("\\setupTABLE[r][first]  [style=bold,topframe=on,bottomframe=on]") -  context ("\\setupTABLE[r][last]   [style=bold,topframe=on,bottomframe=on]") -  context ("\\bTABLE [split=yes,option=stretch]") -  context ("\\bTABLEhead\\bTR".. -    "\\bTH Number\\eTH" .. -    "\\bTH letters\\eTH" ..  -    "\\bTH n\\eTH" ..  -    "\\bTH replacement\\eTH".. -    "\\bTH n\\eTH" ..  -    "\\bTH bytes\\eTH".. -    "\\bTH repl. bytes".. -    "\\eTH\\eTR\\eTABLEhead ") -  context("\\bTABLEbody") -  for key, val in pairs(tab) do -    local strempty = function (s)  -      -- Some characters might not be replaced but removed, others might be -      -- multi-char sequences. -      if #s == 0 then return "nil" -      else  -        local i = 0 -        local r = "" -        -- The following loop could be replaced by checking the string length with utf.len(s) … -        repeat -          i = i + 1 -          if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end -        until (false) -        return r +  context.setupTABLE({"r"}, {"each"},     {style="\\tfx", align="center"}) +  context.setupTABLE({"c"}, {"each"},     {frame="off"}) +  context.setupTABLE({"r"}, {"each"},     {frame="off"}) +  context.setupTABLE({"c"}, {"first"},    {style="italic"}) +  context.setupTABLE({"r"}, {"first"},    {style="bold", topframe="on", bottomframe="on"}) +  context.setupTABLE({"r"}, {"last"},     {style="bold", topframe="on", bottomframe="on"}) +  context.bTABLE({split="yes", option="stretch"}) +    context.bTABLEhead() +      context.bTR() +        context.bTH() context("number")         context.eTH() +        context.bTH() context("letters")        context.eTH() +        context.bTH() context("n")              context.eTH() +        context.bTH() context("replacement")    context.eTH() +        context.bTH() context("n")              context.eTH() +        context.bTH() context("bytes")          context.eTH() +        context.bTH() context("repl. bytes")    context.eTH() +      context.eTR() +    context.eTABLEhead() +    context.bTABLEbody() +      for key, val in pairs(tab) do +        local strempty = function (s)  +          -- Some characters might not be replaced but removed, others might be +          -- multi-char sequences. +          if #s == 0 then return "nil" +          else  +            local i = 0 +            local r = "" +            -- The following loop could be replaced by checking the string length with utf.len(s) … +            repeat +              i = i + 1 +              if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end +            until (false) +            return r +          end +        end +        cnt = cnt + 1 +        context.bTR() +          context.bTC() context(cnt)           context.eTC() +          context.bTC() context(key)           context.eTC() +          context.bTC() context(utf.len(key))  context.eTC() +          context.bTC() context(val)           context.eTC() +          context.bTC() context(utf.len(val))  context.eTC() +          context.bTC() context(strempty(key)) context.eTC() +          context.bTC() context(strempty(val)) context.eTC() +        context.eTR()        end -    end -    cnt = cnt + 1 -    context ("\\bTR\\bTC " .. cnt .. "\\eTC") -    context ("\\bTC " ..  -      key .. "\\eTC\\bTC " ..  -      utf.len(key) .. "\\eTC\\bTC " ..  -      val .. "\\eTC\\bTC " ..   -      utf.len(val) .. "\\eTC\\bTC " ..   -      strempty(key) .. "\\eTC\\bTC " ..   -      strempty(val) .. "\\eTC") -    context ("\\eTR ") -  end -  context("\\eTABLEbody") -  context ("\\bTABLEfoot\\bTR".. -    "\\bTC Number\\eTC" .. -    "\\bTC letters\\eTC" ..  -    "\\bTC n\\eTC" ..  -    "\\bTC replacement\\eTC".. -    "\\bTC n\\eTC" ..  -    "\\bTC bytes\\eTC".. -    "\\bTC repl. bytes".. -    "\\eTC\\eTR\\eTABLEfoot ") -  context ("\\eTABLE ") +    context.eTABLEbody() +    context.bTABLEfoot() context.bTR() +      context.bTC() context("number")       context.eTC() +      context.bTC() context("letters")      context.eTC() +      context.bTC() context("n")            context.eTC() +      context.bTC() context("replacement")  context.eTC() +      context.bTC() context("n")            context.eTC() +      context.bTC() context("bytes")        context.eTC() +      context.bTC() context("repl. bytes")  context.eTC() +      context.eTR() +    context.eTABLEfoot() +  context.eTABLE()  end  \stopluacode @@ -1891,6 +1905,18 @@ function translit.transliterate (method, text)    if method == "ru" then      translit.add_table(repl_tab, translit.ru_upp)      translit.add_table(repl_tab, translit.ru_low) +  elseif method == "ru_old" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low)  +    translit.add_table(repl_tab, translit.ru_old_upp)  +    translit.add_table(repl_tab, translit.ru_old_low)  +  elseif method == "all" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low)  +    translit.add_table(repl_tab, translit.ru_old_upp)  +    translit.add_table(repl_tab, translit.ru_old_low)  +    translit.add_table(repl_tab, translit.non_ru_upp) +    translit.add_table(repl_tab, translit.non_ru_low)     elseif method == "ru_transcript_de" then      text = translit.subst (text, translit.ru_trsc_jrule)      text = translit.subst (text, translit.ru_trsc_irule) @@ -1910,18 +1936,6 @@ function translit.transliterate (method, text)      translit.add_table(repl_tab, translit.ru_trsc_en_low_first)      translit.add_table(repl_tab, translit.ru_trsc_en_upp)      translit.add_table(repl_tab, translit.ru_trsc_en_low) -  elseif method == "ru_old" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low)  -    translit.add_table(repl_tab, translit.ru_old_upp)  -    translit.add_table(repl_tab, translit.ru_old_low)  -  elseif method == "all" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low)  -    translit.add_table(repl_tab, translit.ru_old_upp)  -    translit.add_table(repl_tab, translit.ru_old_low)  -    translit.add_table(repl_tab, translit.non_ru_upp) -    translit.add_table(repl_tab, translit.non_ru_low)     elseif method == "iso9_ocs" then      translit.add_table(repl_tab, translit.ru_upp)      translit.add_table(repl_tab, translit.ru_low)  @@ -1930,6 +1944,9 @@ function translit.transliterate (method, text)      translit.add_table(repl_tab, translit.ocs_add_upp)      translit.add_table(repl_tab, translit.ocs_add_low)     elseif method == "ocs" then +    translit.add_table(repl_tab, translit.ocs_uk)  +    text = translit.subst (text, repl_tab) +    repl_tab = {}      translit.add_table(repl_tab, translit.ocs_low)       translit.add_table(repl_tab, translit.ocs_upp)     elseif method == "ocs_gla" then  | 
