diff options
author | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-03-02 14:06:04 +0100 |
---|---|---|
committer | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-03-02 14:06:04 +0100 |
commit | f0765800a77932422dcdb02766a06ddd6c7541d1 (patch) | |
tree | d7b6b908284d6d65bc276eb8370c6e45031c6eaa /tex/context/third | |
parent | fea0bcccf81692ab4e7bcb2a2e60793a2247daff (diff) | |
download | transliterator-f0765800a77932422dcdb02766a06ddd6c7541d1.tar.gz |
Continued documentation
Diffstat (limited to 'tex/context/third')
-rw-r--r-- | tex/context/third/transliterator/t-transliterator.mkiv | 155 |
1 files changed, 86 insertions, 69 deletions
diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv index fc07c1d..8f8cadf 100644 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -11,7 +11,7 @@ %D email={pgesang at ix dot urz dot uni-heidelberg dot de}] %D This module is licensed under the conditions of the BSD license with %D two clauses: http://www.freebsd.org/copyright/freebsd-license.html. -%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.\newpage +%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/. \writestatus{loading}{Transliteration from non-Latin scripts} @@ -766,6 +766,8 @@ translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp translit.ru_trsc_cz_add_low = { ["ѕ"] = "dz", + ["з"] = "z", + ["ꙁ"] = "z", ["і"] = "ï", ["ѹ"] = "u", ["ѡ"] = "ō", @@ -790,6 +792,8 @@ translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit translit.ru_trsc_cz_add_upp = { ["Ѕ"] = "Dz", + ["З"] = "Z", + ["Ꙁ"] = "Z", ["І"] = "Ï", ["Ѹ"] = "U", ["Ѡ"] = "Ō", @@ -824,6 +828,15 @@ translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit -- Source p. 77 of -- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf +----------------------------------------------------------------------- +-- Lowercase and uppercase letter Uk -- “scientific transliteration” -- +----------------------------------------------------------------------- + +translit.ocs_uk = { + ["oу"] = "u", + ["оу"] = "u", + ["Оу"] = "U", +} ----------------------------------------------------------------------------- -- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- ----------------------------------------------------------------------------- @@ -853,7 +866,6 @@ translit.ocs_low = { ["р"] = "r", ["с"] = "s", ["т"] = "t", - ["оу"] = "u", ["ѹ"] = "u", ["ꙋ"] = "u", ["ф"] = "f", @@ -915,7 +927,6 @@ translit.ocs_upp = { ["Р"] = "R", ["С"] = "S", ["Т"] = "T", - ["Оу"] = "U", ["Ѹ"] = "U", ["ꙋ"] = "U", ["Ф"] = "F", @@ -1803,61 +1814,64 @@ end function translit.show_tab (tab) -- Output a transliteration table, nicely formatted with natural tables. local cnt = 0 - context ("\\setupTABLE[r][each] [style=\\tfx,align=center] ") - context ("\\setupTABLE[c][each] [frame=off]") - context ("\\setupTABLE[r][each] [frame=off]") - context ("\\setupTABLE[c][first] [style=italic]") - context ("\\setupTABLE[r][first] [style=bold,topframe=on,bottomframe=on]") - context ("\\setupTABLE[r][last] [style=bold,topframe=on,bottomframe=on]") - context ("\\bTABLE [split=yes,option=stretch]") - context ("\\bTABLEhead\\bTR".. - "\\bTH Number\\eTH" .. - "\\bTH letters\\eTH" .. - "\\bTH n\\eTH" .. - "\\bTH replacement\\eTH".. - "\\bTH n\\eTH" .. - "\\bTH bytes\\eTH".. - "\\bTH repl. bytes".. - "\\eTH\\eTR\\eTABLEhead ") - context("\\bTABLEbody") - for key, val in pairs(tab) do - local strempty = function (s) - -- Some characters might not be replaced but removed, others might be - -- multi-char sequences. - if #s == 0 then return "nil" - else - local i = 0 - local r = "" - -- The following loop could be replaced by checking the string length with utf.len(s) … - repeat - i = i + 1 - if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " " end - until (false) - return r + context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"}) + context.setupTABLE({"c"}, {"each"}, {frame="off"}) + context.setupTABLE({"r"}, {"each"}, {frame="off"}) + context.setupTABLE({"c"}, {"first"}, {style="italic"}) + context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"}) + context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"}) + context.bTABLE({split="yes", option="stretch"}) + context.bTABLEhead() + context.bTR() + context.bTH() context("number") context.eTH() + context.bTH() context("letters") context.eTH() + context.bTH() context("n") context.eTH() + context.bTH() context("replacement") context.eTH() + context.bTH() context("n") context.eTH() + context.bTH() context("bytes") context.eTH() + context.bTH() context("repl. bytes") context.eTH() + context.eTR() + context.eTABLEhead() + context.bTABLEbody() + for key, val in pairs(tab) do + local strempty = function (s) + -- Some characters might not be replaced but removed, others might be + -- multi-char sequences. + if #s == 0 then return "nil" + else + local i = 0 + local r = "" + -- The following loop could be replaced by checking the string length with utf.len(s) … + repeat + i = i + 1 + if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " " end + until (false) + return r + end + end + cnt = cnt + 1 + context.bTR() + context.bTC() context(cnt) context.eTC() + context.bTC() context(key) context.eTC() + context.bTC() context(utf.len(key)) context.eTC() + context.bTC() context(val) context.eTC() + context.bTC() context(utf.len(val)) context.eTC() + context.bTC() context(strempty(key)) context.eTC() + context.bTC() context(strempty(val)) context.eTC() + context.eTR() end - end - cnt = cnt + 1 - context ("\\bTR\\bTC " .. cnt .. "\\eTC") - context ("\\bTC " .. - key .. "\\eTC\\bTC " .. - utf.len(key) .. "\\eTC\\bTC " .. - val .. "\\eTC\\bTC " .. - utf.len(val) .. "\\eTC\\bTC " .. - strempty(key) .. "\\eTC\\bTC " .. - strempty(val) .. "\\eTC") - context ("\\eTR ") - end - context("\\eTABLEbody") - context ("\\bTABLEfoot\\bTR".. - "\\bTC Number\\eTC" .. - "\\bTC letters\\eTC" .. - "\\bTC n\\eTC" .. - "\\bTC replacement\\eTC".. - "\\bTC n\\eTC" .. - "\\bTC bytes\\eTC".. - "\\bTC repl. bytes".. - "\\eTC\\eTR\\eTABLEfoot ") - context ("\\eTABLE ") + context.eTABLEbody() + context.bTABLEfoot() context.bTR() + context.bTC() context("number") context.eTC() + context.bTC() context("letters") context.eTC() + context.bTC() context("n") context.eTC() + context.bTC() context("replacement") context.eTC() + context.bTC() context("n") context.eTC() + context.bTC() context("bytes") context.eTC() + context.bTC() context("repl. bytes") context.eTC() + context.eTR() + context.eTABLEfoot() + context.eTABLE() end \stopluacode @@ -1891,6 +1905,18 @@ function translit.transliterate (method, text) if method == "ru" then translit.add_table(repl_tab, translit.ru_upp) translit.add_table(repl_tab, translit.ru_low) + elseif method == "ru_old" then + translit.add_table(repl_tab, translit.ru_upp) + translit.add_table(repl_tab, translit.ru_low) + translit.add_table(repl_tab, translit.ru_old_upp) + translit.add_table(repl_tab, translit.ru_old_low) + elseif method == "all" then + translit.add_table(repl_tab, translit.ru_upp) + translit.add_table(repl_tab, translit.ru_low) + translit.add_table(repl_tab, translit.ru_old_upp) + translit.add_table(repl_tab, translit.ru_old_low) + translit.add_table(repl_tab, translit.non_ru_upp) + translit.add_table(repl_tab, translit.non_ru_low) elseif method == "ru_transcript_de" then text = translit.subst (text, translit.ru_trsc_jrule) text = translit.subst (text, translit.ru_trsc_irule) @@ -1910,18 +1936,6 @@ function translit.transliterate (method, text) translit.add_table(repl_tab, translit.ru_trsc_en_low_first) translit.add_table(repl_tab, translit.ru_trsc_en_upp) translit.add_table(repl_tab, translit.ru_trsc_en_low) - elseif method == "ru_old" then - translit.add_table(repl_tab, translit.ru_upp) - translit.add_table(repl_tab, translit.ru_low) - translit.add_table(repl_tab, translit.ru_old_upp) - translit.add_table(repl_tab, translit.ru_old_low) - elseif method == "all" then - translit.add_table(repl_tab, translit.ru_upp) - translit.add_table(repl_tab, translit.ru_low) - translit.add_table(repl_tab, translit.ru_old_upp) - translit.add_table(repl_tab, translit.ru_old_low) - translit.add_table(repl_tab, translit.non_ru_upp) - translit.add_table(repl_tab, translit.non_ru_low) elseif method == "iso9_ocs" then translit.add_table(repl_tab, translit.ru_upp) translit.add_table(repl_tab, translit.ru_low) @@ -1930,6 +1944,9 @@ function translit.transliterate (method, text) translit.add_table(repl_tab, translit.ocs_add_upp) translit.add_table(repl_tab, translit.ocs_add_low) elseif method == "ocs" then + translit.add_table(repl_tab, translit.ocs_uk) + text = translit.subst (text, repl_tab) + repl_tab = {} translit.add_table(repl_tab, translit.ocs_low) translit.add_table(repl_tab, translit.ocs_upp) elseif method == "ocs_gla" then |