diff options
Diffstat (limited to 'tex/context')
| -rw-r--r-- | tex/context/interface/third/t-transliterator.xml | 3 | ||||
| -rw-r--r-- | tex/context/third/transliterator/t-transliterator.mkiv | 1922 | ||||
| -rw-r--r-- | tex/context/third/transliterator/trans_tables_glag.lua | 122 | ||||
| -rw-r--r-- | tex/context/third/transliterator/trans_tables_gr.lua | 693 | ||||
| -rw-r--r-- | tex/context/third/transliterator/trans_tables_iso9.lua | 288 | ||||
| -rw-r--r-- | tex/context/third/transliterator/trans_tables_scntfc.lua | 256 | ||||
| -rw-r--r-- | tex/context/third/transliterator/trans_tables_trsc.lua | 704 | 
7 files changed, 2148 insertions, 1840 deletions
diff --git a/tex/context/interface/third/t-transliterator.xml b/tex/context/interface/third/t-transliterator.xml index 2217e12..b5b189f 100644 --- a/tex/context/interface/third/t-transliterator.xml +++ b/tex/context/interface/third/t-transliterator.xml @@ -2,7 +2,7 @@  <cd:interface xmlns:cd="http://www.pragma-ade.com/commands"     name="context" language="en"  -  version="2010.02.27"> +  version="2010.3.10">    <cd:command name="setuptransliterator" file="t-transliterator.mkiv">      <cd:sequence> @@ -14,6 +14,7 @@            <cd:constant type="ru_old" default="yes"/>            <cd:constant type="ru"/>            <cd:constant type="ru_transcript_de"/> +          <cd:constant type="ru_transcript_de_exp"/>            <cd:constant type="ru_transcript_en"/>            <cd:constant type="all"/>            <cd:constant type="iso9_ocs"/> diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv index deca291..474af75 100644 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -1,7 +1,6 @@ -%D \enableregime[utf] -%D \module                                                                                                                  +%D \module  %D   [      file=t-transliterator, -%D        version=2010.03.07, +%D        version=2010.03.10,  %D          title=\CONTEXT\ User Module,  %D       subtitle=The Transliterator,  %D         author=Philipp Gesang, @@ -10,8 +9,8 @@  %D        license=2-clause BSD,  %D          email={pgesang at ix dot urz dot uni-heidelberg dot de}]  %D This module is licensed under the conditions of the BSD license with  -%D two clauses: http://www.freebsd.org/copyright/freebsd-license.html. -%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/. +%D two clauses, there is a copy it in a file named "COPYING" in the +%D transliterator source tree.  \writestatus{loading}{Transliteration from non-Latin scripts} @@ -82,1705 +81,14 @@ translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м",  %D characters if needed; by the way those are included in the default  %D transliteration mode \type{ru_old}. -%-===========================================================================-- -%-           ISO 9.1995(E) standardized transliteration for cyrillic         -- -%-===========================================================================-- - -\startluacode ------------------------------------------ --- Lowercase russian cyrillic alphabet -- ------------------------------------------ -translit.ru_low = { -  ["а"] = "a", -- U+0430 -> U+0061 -  ["б"] = "b", -- U+0431 -> U+0062 -  ["в"] = "v", -- U+0432 -> U+0076 -  ["г"] = "g", -- U+0433 -> U+0067 -  ["д"] = "d", -- U+0434 -> U+0064 -  ["е"] = "e", -- U+0435 -> U+0065 -  ["ё"] = "ë", -- U+0451 -> U+00eb -  ["ж"] = "ž", -- U+0436 -> U+017e -  ["з"] = "z", -- U+0437 -> U+007a -  ["и"] = "i", -- U+0438 -> U+0069 -  ["й"] = "j", -- U+0439 -> U+006a -  ["к"] = "k", -- U+043a -> U+006b -  ["л"] = "l", -- U+043b -> U+006c -  ["м"] = "m", -- U+043c -> U+006d -  ["н"] = "n", -- U+043d -> U+006e -  ["о"] = "o", -- U+043e -> U+006f -  ["п"] = "p", -- U+043f -> U+0070 -  ["р"] = "r", -- U+0440 -> U+0072 -  ["с"] = "s", -- U+0441 -> U+0073 -  ["т"] = "t", -- U+0442 -> U+0074 -  ["у"] = "u", -- U+0443 -> U+0075 -  ["ф"] = "f", -- U+0444 -> U+0066 -  ["х"] = "h", -- U+0445 -> U+0068 -  ["ц"] = "c", -- U+0446 -> U+0063 -  ["ч"] = "č", -- U+0447 -> U+010d -  ["ш"] = "š", -- U+0448 -> U+0161 -  ["щ"] = "ŝ", -- U+0449 -> U+015d -  ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is -  ["ы"] = "y", -- U+044b -> U+0079    used for uppercase, too. -  ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. -  ["э"] = "è", -- U+044d -> U+00e8 -  ["ю"] = "û", -- U+044e -> U+00fb -  ["я"] = "â"  -- U+044f -> U+00e2 -} - -translit.tables["russian lowercase ISO~9"] = translit.ru_low - ------------------------------------------ --- Uppercase russian cyrillic alphabet -- ------------------------------------------ - -translit.ru_upp = { -  ["А"] = "A", -- U+0410 -> U+0041 -  ["Б"] = "B", -- U+0411 -> U+0042 -  ["В"] = "V", -- U+0412 -> U+0056 -  ["Г"] = "G", -- U+0413 -> U+0047 -  ["Д"] = "D", -- U+0414 -> U+0044 -  ["Е"] = "E", -- U+0415 -> U+0045 -  ["Ё"] = "Ë", -- U+0401 -> U+00cb -  ["Ж"] = "Ž", -- U+0416 -> U+017d -  ["З"] = "Z", -- U+0417 -> U+005a -  ["И"] = "I", -- U+0418 -> U+0049 -  ["Й"] = "J", -- U+0419 -> U+004a -  ["К"] = "K", -- U+041a -> U+004b -  ["Л"] = "L", -- U+041b -> U+004c -  ["М"] = "M", -- U+041c -> U+004d -  ["Н"] = "N", -- U+041d -> U+004e -  ["О"] = "O", -- U+041e -> U+004f -  ["П"] = "P", -- U+041f -> U+0050 -  ["Р"] = "R", -- U+0420 -> U+0052 -  ["С"] = "S", -- U+0421 -> U+0053 -  ["Т"] = "T", -- U+0422 -> U+0054 -  ["У"] = "U", -- U+0423 -> U+0055 -  ["Ф"] = "F", -- U+0424 -> U+0046 -  ["Х"] = "H", -- U+0425 -> U+0048 -  ["Ц"] = "C", -- U+0426 -> U+0043 -  ["Ч"] = "Č", -- U+0427 -> U+010c -  ["Ш"] = "Š", -- U+0428 -> U+0160 -  ["Щ"] = "Ŝ", -- U+0429 -> U+015c -  ["Ъ"] = "ʺ", -- U+042a -> U+02ba -  ["Ы"] = "Y", -- U+042b -> U+0059 -  ["Ь"] = "ʹ", -- U+042c -> U+02b9 -  ["Э"] = "È", -- U+042d -> U+00c8 -  ["Ю"] = "Û", -- U+042e -> U+00db -  ["Я"] = "Â"  -- U+042f -> U+00c2 -} - -translit.tables["russian uppercase ISO~9"] = translit.ru_upp - ----------------------------------------------------------- --- Lowercase pre-1918 russian cyrillic additional chars -- ----------------------------------------------------------- --- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 - -translit.ru_old_low = { -  ["ѣ"] = "ě", -- U+048d -> U+011b -  ["і"] = "ì", -- U+0456 -> U+00ec -  ["ѳ"] = "f", -- U+0473 -> U+0066 -  ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 -} - -translit.tables["russian pre-1918 lowercase ISO~9"] = translit.ru_low - -translit.ru_old_upp = { -  ["Ѣ"] = "Ě", -- U+048c -> U+011a -  ["І"] = "Ì", -- U+0406 -> U+00cc -  ["Ѳ"] = "F", -- U+0424 -> U+0046 -  ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 -} - -translit.tables["russian pre-1918 uppercase ISO~9"] = translit.ru_upp - ---------------------------------------------------------- --- Lowercase characters from other cyrillic alphabets  -- ---------------------------------------------------------- - -translit.non_ru_low = { -  ["ӑ"] = "ă", -- U+04d1 -> U+0103 -  ["ӓ"] = "ä", -- U+04d3 -> U+00e4 -  ["ә"] = "a̋", -- u+04d9 -> U+0061+030b -  ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 -  ["ҕ"] = "ğ", -- U+0495 -> U+011f -  ["ғ"] = "ġ", -- U+0493 -> U+0121 -  ["ђ"] = "đ", -- U+0452 -> U+0111 -  ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 -  ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 -  ["є"] = "ê", -- U+0454 -> U+00ea -  ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 -  ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 -  ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 -  ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 -  ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 -  ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 -  ["ѕ"] = "ẑ", -- U+0455 -> U+1e91          -- Mapped to dz in old cyrillic non-ISO. -  ["ӡ"] = "ź", -- U+04e1 -> U+017a -  ["ӥ"] = "î", -- U+04e5 -> U+00ee -  ["і"] = "ì", -- U+0456 -> U+00ec -  ["ї"] = "ï", -- U+0457 -> U+00ef -  ["ј"] = "ǰ", -- U+0458 -> U+01f0 -  ["қ"] = "ķ", -- U+049b -> U+0137 -  ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 -  ["љ"] = "l̂", -- U+0459 -> U+006c+0302 -  ["њ"] = "n̂", -- U+045a -> U+006e+0302 -  ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 -  ["ң"] = "ṇ", -- U+04a3 -> U+1e47 -  ["ӧ"] = "ö", -- U+04e7 -> U+00f6 -  ["ө"] = "ô", -- U+04e9 -> U+00f4 -  ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 -  ["ҫ"] = "ç", -- U+04ab -> U+00e7 -  ["ҭ"] = "ţ", -- U+04ad -> U+0163 -  ["ћ"] = "ć", -- U+045b -> U+0107 -  ["ќ"] = "ḱ", -- U+045c -> U+1e31 -  ["у́"] = "ú", -- U+0443+ -> U+00fA -  ["ў"] = "ŭ", -- U+045e -> U+016d -  ["ӱ"] = "ü", -- U+04f1 -> U+00fc -  ["ӳ"] = "ű", -- U+04f3 -> U+0171 -  ["ү"] = "ù", -- U+04af -> U+00f9 -  ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 -  ["һ"] = "ḥ", -- U+04bb -> U+1e25 -  ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 -  ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 -  ["ҷ"] = "ç", -- U+04cc -> U+00e7 -  ["џ"] = "d̂", -- U+045f -> U+0064+0302 -  ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff -  ["ѣ"] = "ě", -- U+048d -> U+011b -  ["ѫ"] = "ǎ", -- U+046b -> U+01ce      -- Mapped to ǫ in non-ISO old cyrillic. -  ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. -  ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 -  ["ҩ"] = "ò", -- U+04a9 -> U+00f2 -  ["Ӏ"] = "‡"  -- U+04cf -> U+2021 -} - -translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low - ---------------------------------------------------------- --- Uppercase characters from other cyrillic alphabets  -- ---------------------------------------------------------- - -translit.non_ru_upp = { -  ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 -  ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 -  ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b -  ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 -  ["Ҕ"] = "Ğ", -- U+0494 -> U+011e -  ["Ғ"] = "Ġ", -- U+0492 -> U+0120 -  ["Ђ"] = "Đ", -- U+0402 -> U+0110 -  ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 -  ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 -  ["Є"] = "Ê", -- U+0404 -> U+00ca -  ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 -  ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 -  ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 -  ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 -  ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 -  ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 -  ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 -  ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 -  ["Ӥ"] = "Î", -- U+04e4 -> U+00ce -  ["І"] = "Ì", -- U+0406 -> U+00cc -  ["Ї"] = "Ï", -- U+0407 -> U+00cf -  ["Ј"] = "J̌", -- U+0408 -> U+004a+030c -  ["Қ"] = "Ķ", -- U+049a -> U+0136 -  ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 -  ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 -  ["Њ"] = "N̂", -- U+040a -> U+004e+0302 -  ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 -  ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 -  ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 -  ["Ө"] = "Ô", -- U+04e8 -> U+00d4 -  ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 -  ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 -  ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 -  ["Ћ"] = "Ć", -- U+040b -> U+0106 -  ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 -  ["У́"] = "Ú", -- U+0423 -> U+00da -  ["Ў"] = "Ŭ", -- U+040e -> U+016c -  ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc -  ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 -  ["Ү"] = "Ù", -- U+04ae -> U+00d9 -  ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 -  ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 -  ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 -  ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 -  ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 -  ["Џ"] = "D̂", -- U+040f -> U+0044+0302 -  ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 -  ["Ѣ"] = "Ě", -- U+048c -> U+011a -  ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd -  ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 -  ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 -  ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 -  ["’"] = "‵", -- U+2035 -> U+2019 -  ["Ӏ"] = "‡"  -- U+04c0 -> U+2021 -} - -translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp - -\stopluacode - -\startluacode - ---===========================================================================-- ---                      Legacy national transliterations                     -- ---===========================================================================-- --- Note: --- Use these only as a last resort.  ‘Vulgar’ transcription is ugly and --- chauvinistic. - ---------------------------------- --- German simple transcription -- ---------------------------------- --- Reference:   „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl., ---              Mannheim et. al. 1991. - --------------------------------------------------------- --- Lowercase German simple transcription---first pass -- --------------------------------------------------------- - -translit.ru_trsc_low_first = { -  [" е"] = " je", -  ["ъе"] = "je", -  ["ье"] = "je", -  [" ё"] = " jo", -  ["ъё"] = "jo", -  ["ьё"] = "jo", -  ["жё"] = "scho", -  ["чё"] = "tscho", -  ["шё"] = "scho", -  ["щё"] = "schtscho", -  ["ье"] = "je", -  ["ьи"] = "ji", -  ["ьо"] = "jo", -  ["ий"] = "i", -  ["ый"] = "y", -  ["кс"] = "x" -- Extraordinarily stupid one. -} - -translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first - --------------------------------------------------------- --- Uppercase German simple transcription---first pass -- --------------------------------------------------------- - -translit.ru_trsc_upp_first = { -  [" Е"] = " Je", -  ["Ъe"] = "Je",  -- Pedantic, isn't it? -  ["Ье"] = "Je", -  [" Ё"]  = "Jo", -  ["Ъё"] = "Jo", -  ["Ьё"] = "Jo", -  ["Жё"] = "Scho", -  ["Чё"] = "Tscho", -  ["Шё"] = "Scho", -  ["Щё"] = "Schtscho", -  ["Кс"] = "ks" -} - -translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first - -------------------------------------------- --- Lowercase German simple transcription -- -------------------------------------------- - -translit.ru_trsc_low = { -  ["а"] = "a", -  ["б"] = "b", -  ["в"] = "w", -  ["г"] = "g", -  ["д"] = "d", -  ["е"] = "e", -  ["ё"] = "jo", -  ["ж"] = "sch", -  ["з"] = "s", -  ["и"] = "i", -  ["й"] = "i", -  ["к"] = "k", -  ["л"] = "l", -  ["м"] = "m", -  ["н"] = "n", -  ["о"] = "o", -  ["п"] = "p", -  ["р"] = "r", -  ["с"] = "s", -  ["т"] = "t", -  ["у"] = "u", -  ["ф"] = "f", -  ["х"] = "ch", -  ["ц"] = "z", -  ["ч"] = "tsch", -  ["ш"] = "sch", -  ["щ"] = "schtsch", -  ["ъ"] = "", -  ["ы"] = "y", -  ["ь"] = "", -  ["э"] = "e", -  ["ю"] = "ju", -  ["я"] = "ja"  -} - -translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low - -------------------------------------------- --- Uppercase German simple transcription -- -------------------------------------------- - -translit.ru_trsc_upp = { -  ["А"] = "A", -  ["Б"] = "B", -  ["В"] = "W", -  ["Г"] = "G", -  ["Д"] = "D", -  ["Е"] = "E", -  ["Ё"] = "Jo", -  ["Ж"] = "Sch", -  ["З"] = "S", -  ["И"] = "I", -  ["Й"] = "J", -  ["К"] = "K", -  ["Л"] = "L", -  ["М"] = "M", -  ["Н"] = "N", -  ["О"] = "O", -  ["П"] = "P", -  ["Р"] = "R", -  ["С"] = "S", -  ["Т"] = "T", -  ["У"] = "U", -  ["Ф"] = "F", -  ["Х"] = "Ch", -  ["Ц"] = "Z", -  ["Ч"] = "Tsch", -  ["Ш"] = "Sch", -  ["Щ"] = "Schtsch", -  ["Ъ"] = "", -  ["Ы"] = "Y", -  ["Ь"] = "", -  ["Э"] = "E", -  ["Ю"] = "Ju", -  ["Я"] = "Ja"  -} - -translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp - -\stopluacode - -%D The following are more interesting than the previous tables because they -%D implement various rules.  For instance the table -%D \type{translit.ru_trsc_irule} holds a substitution dictionary for all -%D possible combinations (including nonsense galore) of a vowel preceding an -%D “й” (Russian short i) preceding a consonant; here we access the sets of -%D Russian vowels as well consonants that were defined earlier. - -\startluacode --- The й-rule, VйC -> ViC -translit.ru_trsc_irule = {} -for i, vow in ipairs(translit.ru_vowels) do -  for j, cons in ipairs(translit.ru_consonants) do -    local new_ante = vow .. "й" .. cons -    local new_post = vow .. "i" .. cons -    translit.ru_trsc_irule[new_ante] = new_post -  end -end - -translit.tables["German transcription i-rule"] = translit.ru_trsc_irule - --- The second й-rule, йV -> jV && [иы]йC -> [иы]jC -translit.ru_trsc_jrule = {} -for i, vow in ipairs(translit.ru_vowels) do -  local new_ante = "й" .. vow -  local new_post = "j" .. vow -  translit.ru_trsc_jrule[new_ante] = new_post -end - -translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} -for i, cons in ipairs(translit.ru_consonants) do -  for j, iy in ipairs(translit.ru_trsc_iy) do -    local new_ante = iy .. "й" .. cons -    local new_post = iy .. "j" .. cons -    translit.ru_trsc_jrule[new_ante] = new_post -  end -end - -translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule - --- The с-rule, VсV -> VssV -translit.ru_trsc_srule = {} -for i, vow_1 in ipairs(translit.ru_vowels) do -for j, vow_2 in ipairs(translit.ru_vowels) do -  local new_ante = vow_1 .. "с" .. vow_2 -  local new_post = vow_1 .. "ss" .. vow_2 -    translit.ru_trsc_srule[new_ante] = new_post -  end -end - -translit.tables["German transcription s-rule"] = translit.ru_trsc_srule - --- The sharp-s-rule, Vсх -> Vßх -translit.ru_trsc_sharpsrule = {} -for i, vow in ipairs(translit.ru_vowels) do -  local new_ante = vow .. "сх" -  local new_post = vow .. "ßх" -  translit.ru_trsc_sharpsrule[new_ante] = new_post -end - -translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule - --- The е-rule, Vе -> Vje -translit.ru_trsc_jerule = {} -for i, vow in ipairs(translit.ru_vowels) do -  local new_ante = vow .. "е" -  local new_post = vow .. "je" -  translit.ru_trsc_jerule[new_ante] = new_post -end - -translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule - --- The ё-rule, Vё -> Vjo --- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . --- Somebody should teach those DUDEN-guys parsimony. -translit.ru_trsc_jorule = {} -for i, vow in ipairs(translit.ru_vowels) do -  local new_ante = vow .. "ё" -  local new_post = vow .. "jo" -  translit.ru_trsc_jorule[new_ante] = new_post -end - -translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule - -\stopluacode - -\startluacode - ---------------------------------------------------------- --- Lowercase English simple transcription---first pass -- ---------------------------------------------------------- - -translit.ru_trsc_en_low_first = { -  [" е"] = " ye", -  ["ъе"] = "ye", -  ["ье"] = "ye", -  ["ье"] = "ye", -  ["ьи"] = "yi", -} - -translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first - ---------------------------------------------------------- --- Uppercase English simple transcription---first pass -- ---------------------------------------------------------- - -translit.ru_trsc_en_upp_first = { -  [" Е"] = " Ye", -  ["Ъe"] = "Ye", -  ["Ье"] = "Ye", -} - -translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first - --------------------------------------------- --- Lowercase English simple transcription -- --------------------------------------------- - -translit.ru_trsc_en_low = { -  ["а"] = "a", -  ["б"] = "b", -  ["в"] = "v", -  ["г"] = "g", -  ["д"] = "d", -  ["е"] = "e", -  ["ё"] = "e", -  ["ж"] = "zh", -  ["з"] = "z", -  ["и"] = "i", -  ["й"] = "y", -  ["к"] = "k", -  ["л"] = "l", -  ["м"] = "m", -  ["н"] = "n", -  ["о"] = "o", -  ["п"] = "p", -  ["р"] = "r", -  ["с"] = "s", -  ["т"] = "t", -  ["у"] = "u", -  ["ф"] = "f", -  ["х"] = "kh", -  ["ц"] = "ts", -  ["ч"] = "ch", -  ["ш"] = "sh", -  ["щ"] = "shsh", -  ["ъ"] = "", -  ["ы"] = "y", -  ["ь"] = "", -  ["э"] = "e", -  ["ю"] = "yu", -  ["я"] = "ya"  -} - -translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low - --------------------------------------------- --- Uppercase English simple transcription -- --------------------------------------------- - -translit.ru_trsc_en_upp = { -  ["А"] = "A", -  ["Б"] = "B", -  ["В"] = "V", -  ["Г"] = "G", -  ["Д"] = "D", -  ["Е"] = "E", -  ["Ё"] = "E", -  ["Ж"] = "Zh", -  ["З"] = "Z", -  ["И"] = "I", -  ["Й"] = "Y", -  ["К"] = "K", -  ["Л"] = "L", -  ["М"] = "M", -  ["Н"] = "N", -  ["О"] = "O", -  ["П"] = "P", -  ["Р"] = "R", -  ["С"] = "S", -  ["Т"] = "T", -  ["У"] = "U", -  ["Ф"] = "F", -  ["Х"] = "Kh", -  ["Ц"] = "Ts", -  ["Ч"] = "Ch", -  ["Ш"] = "Sh", -  ["Щ"] = "Shsh", -  ["Ъ"] = "", -  ["Ы"] = "Y", -  ["Ь"] = "", -  ["Э"] = "E", -  ["Ю"] = "Yu", -  ["Я"] = "Ya"  -} - -translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp - --- The english е-rule, Vе -> Vye -translit.ru_trsc_en_jerule = {} -for i, vow in ipairs(translit.ru_vowels) do -  local new_ante = vow .. "е" -  local new_post = vow .. "ye" -  translit.ru_trsc_en_jerule[new_ante] = new_post -end - -translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule - -\stopluacode - -\startluacode - ------------------------------------ --- Lowercase Czech transcription -- ------------------------------------ - -translit.ru_trsc_cz_low = { -  ["а"] = "a", -  ["б"] = "b", -  ["в"] = "v", -  ["г"] = "g", -  ["д"] = "d", -  ["е"] = "e", -  ["ё"] = "ë", -  ["ж"] = "ž", -  ["з"] = "z", -  ["и"] = "i", -  ["й"] = "j", -  ["к"] = "k", -  ["л"] = "l", -  ["м"] = "m", -  ["н"] = "n", -  ["о"] = "o", -  ["п"] = "p", -  ["р"] = "r", -  ["с"] = "s", -  ["т"] = "t", -  ["у"] = "u", -  ["ф"] = "f", -  ["х"] = "ch", -  ["ц"] = "c", -  ["ч"] = "č", -  ["ш"] = "š", -  ["щ"] = "šč", -  ["ъ"] = "ъ", -  ["ы"] = "y", -  ["ь"] = "ь", -  ["э"] = "è", -  ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but -  ["я"] = "ja"  -- that would complicate things a bit and linguists might not -}               -- agree. - -translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low - ------------------------------------ --- Uppercase Czech transcription -- ------------------------------------ - -translit.ru_trsc_cz_upp = { -  ["А"] = "A", -  ["Б"] = "B", -  ["В"] = "V", -  ["Г"] = "G", -  ["Д"] = "D", -  ["Е"] = "E", -  ["Ё"] = "Ë", -  ["Ж"] = "Ž", -  ["З"] = "Z", -  ["И"] = "I", -  ["Й"] = "J", -  ["К"] = "K", -  ["Л"] = "L", -  ["М"] = "M", -  ["Н"] = "N", -  ["О"] = "O", -  ["П"] = "P", -  ["Р"] = "R", -  ["С"] = "S", -  ["Т"] = "T", -  ["У"] = "U", -  ["Ф"] = "F", -  ["Х"] = "Ch", -  ["Ц"] = "C", -  ["Ч"] = "Č", -  ["Ш"] = "Š", -  ["Щ"] = "Šč", -  ["Ъ"] = "Ъ", -  ["Ы"] = "Y", -  ["Ь"] = "Ь", -  ["Э"] = "È", -  ["Ю"] = "Ju", -  ["Я"] = "Ja"  -} - -translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp - ----------------------------------------------- --- Lowercase Additional Czech Transcription -- ----------------------------------------------- - -translit.ru_trsc_cz_add_low = { -  ["ѕ"] = "dz", -  ["з"] = "z", -  ["ꙁ"] = "z", -  ["і"] = "ï", -  ["ѹ"] = "u", -  ["ѡ"] = "ō", -  ["ѣ"] = "ě", -  ["ѥ"] = "je", -  ["ѧ"] = "ę", -  ["ѩ"] = "ję", -  ["ѫ"] = "ǫ", -  ["ѭ"] = "jǫ", -  ["ѯ"] = "ks", -  ["ѱ"] = "ps", -  ["ѳ"] = "th", -  ["ѵ"] = "ÿ", -} - -translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low - - ----------------------------------------------- --- Uppercase Additional Czech Transcription -- ----------------------------------------------- - -translit.ru_trsc_cz_add_upp = { -  ["Ѕ"] = "Dz", -  ["З"] = "Z", -  ["Ꙁ"] = "Z", -  ["І"] = "Ï", -  ["Ѹ"] = "U", -  ["Ѡ"] = "Ō", -  ["Ѣ"] = "Ě", -  ["Ѥ"] = "Je", -  ["Ѧ"] = "Ę", -  ["Ѩ"] = "Ję", -  ["Ѫ"] = "Ǫ", -  ["Ѭ"] = "Jǫ", -  ["Ѯ"] = "Ks", -  ["Ѱ"] = "Ps", -  ["Ѳ"] = "Th", -  ["Ѵ"] = "Ÿ", -} - -translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp - -\stopluacode - -%-===========================================================================-- -%-                      Other transliterations                               -- -%-===========================================================================-- - -\startluacode - --- The following are needed because ISO 9 does not cover old Slavonic --- characters that became obsolete before the advent of гражданский шрифт. - --- Please note that these mappings are not bijective so don't expect the result  --- to be easily revertible (by machines). - --- Source p. 77 of --- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf - ------------------------------------------------------------------------ --- Lowercase and uppercase letter Uk -- “scientific transliteration” -- ------------------------------------------------------------------------ - -translit.ocs_uk = { -  ["oу"] = "u", -  ["оу"] = "u", -  ["Оу"] = "U", -} ------------------------------------------------------------------------------ --- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- ------------------------------------------------------------------------------ - -translit.ocs_low = { -  ["а"] = "a", -  ["б"] = "b", -  ["в"] = "v", -  ["г"] = "g", -  ["д"] = "d", -  ["є"] = "e", -  ["ж"] = "ž", -  ["ꙃ"] = "ʒ",      -- U+0292, alternative: dz U+01f3 -  ["ѕ"] = "ʒ", -  ["ꙁ"] = "z", -  ["з"] = "z", -  ["и"] = "i", -  ["і"] = "i", -  ["ї"] = "i", -  ["ћ"] = "g’", -  ["к"] = "k", -  ["л"] = "l", -  ["м"] = "m", -  ["н"] = "n", -  ["о"] = "o", -  ["п"] = "p", -  ["р"] = "r", -  ["с"] = "s", -  ["т"] = "t", -  ["у"] = "u", -  ["ѹ"] = "u", -  ["ꙋ"] = "u", -  ["ф"] = "f", -  ["х"] = "x", -  ["ѡ"] = "o", --"ō", -  ["ѿ"] = "ot",     -- U+047f -  ["ѽ"] = "o!",     -- U+047d -  ["ꙍ"] = "o!",     -- U+064D -  ["ц"] = "c", -  ["ч"] = "č", -  ["ш"] = "š", -  ["щ"] = "št", -  ["ъ"] = "ъ", -  ["ы"] = "y", -  ["ꙑ"] = "y",      -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. -  ["ь"] = "ь", -  ["ѣ"] = "ě", -  ["ю"] = "ju", -  ["ꙗ"] = "ja", -  ["ѥ"] = "je", -  ["ѧ"] = "ę", -  ["ѩ"] = "ję", -  ["ѫ"] = "ǫ", -  ["ѭ"] = "jǫ", -  ["ѯ"] = "ks", -  ["ѱ"] = "ps", -  ["ѳ"] = "th", -  ["ѵ"] = "ü", -} - -translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low - ------------------------------------------------------------------------------ --- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- ------------------------------------------------------------------------------ - -translit.ocs_upp = { -  ["А"] = "A", -  ["Б"] = "B", -  ["В"] = "V", -  ["Г"] = "G", -  ["Д"] = "D", -  ["Є"] = "E", -  ["Ж"] = "Ž", -  ["Ꙃ"] = "Ʒ",      -- U+01b7, alternative: Dz U+01f2 -  ["Ѕ"] = "Ʒ", -  ["Ꙁ"] = "Z", -  ["З"] = "Z", -  ["И"] = "I", -  ["І"] = "I", -  ["Ї"] = "I", -  ["Ћ"] = "G’", -  ["К"] = "K", -  ["Л"] = "L", -  ["М"] = "M", -  ["Н"] = "N", -  ["О"] = "O", -  ["П"] = "P", -  ["Р"] = "R", -  ["С"] = "S", -  ["Т"] = "T", -  ["У"] = "u", -  ["Ѹ"] = "U", -  ["ꙋ"] = "U", -  ["Ф"] = "F", -  ["Х"] = "X", -  ["Ѡ"] = "Ō", -  ["Ѿ"] = "Ot",     -- U+047c -  ["Ѽ"] = "O!",     -- U+047e -  ["Ꙍ"] = "O!",     -- U+064C -  ["Ц"] = "C", -  ["Ч"] = "Č", -  ["Ш"] = "Š", -  ["Щ"] = "Št", -  ["Ъ"] = "Ŭ", -  ["Ы"] = "Y", -  ["Ꙑ"] = "Y",  -- U+a650 -  ["Ь"] = "Ĭ", -  ["Ѣ"] = "Ě", -  ["Ю"] = "Ju", -  ["Ꙗ"] = "Ja", -  ["Ѥ"] = "Je", -  ["Ѧ"] = "Ę", -  ["Ѩ"] = "Ję", -  ["Ѫ"] = "Ǫ", -  ["Ѭ"] = "Jǫ", -  ["Ѯ"] = "Ks", -  ["Ѱ"] = "Ps", -  ["Ѳ"] = "Th", -  ["Ѵ"] = "Ü", -} - -translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp - --- Note on the additional tables: these cover characters that are not defined --- in ISO 9 but have a “scientific” transliteration.  You may use them as --- complementary mapping to ISO 9, trading off homogenity for completeness. - ----------------------------------------------------------------------------------------- --- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- ----------------------------------------------------------------------------------------- - -translit.ocs_add_low = { -  ["ѕ"] = "dz",         -- Mapped to ẑ in ISO 9 (Macedonian …) -  ["ѯ"] = "ks", -  ["ѱ"] = "ps", -  ["ѡ"] = "ô", -  ["ѿ"] = "ot",     -- U+047f -  ["ѫ"] = "ǫ",          -- Mapped to ǎ in ISO 9. -  ["ѧ"] = "ę", -  ["ѭ"] = "jǫ", -  ["ѩ"] = "ję", -  ["ѥ"] = "je", -  ["ѹ"] = "u",          -- Digraph uk. -  ["ꙋ"] = "u",          -- Monograph uk, U+a64b.  (No glyph yet in the "fixed" font in February 2010 …) -  ["ꙑ"] = "y",          -- U+a651 -} - -translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low - ----------------------------------------------------------------------------------------- --- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- ----------------------------------------------------------------------------------------- - -translit.ocs_add_upp = { -  ["Ѕ"] = "Dz", -  ["Ѯ"] = "Ks", -  ["Ѱ"] = "Ps", -  ["Ѡ"] = "Ô", -  ["Ѿ"] = "ot", -  ["Ѫ"] = "Ǫ", -  ["Ѧ"] = "Ę", -  ["Ѭ"] = "Jǫ", -  ["Ѩ"] = "Ję", -  ["Ѥ"] = "Je", -  ["Ѹ"] = "U",          -- Digraph uk. -  ["Ꙋ"] = "U",          -- Monograph Uk, U+a64a. -  ["Ꙑ"] = "Y",  -- U+a650 -} - -translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp - - -\stopluacode - -%-===========================================================================-- -%-                              Glagolica                                    -- -%-===========================================================================-- - -\startluacode - -------------------------------------------- --- Lowercase Glagolitic Transliteration  -- -------------------------------------------- - -translit.ocs_gla_low = { -  ["ⰰ"] = "a",  -- GLAGOLITIC SMALL LETTER AZU -  ["ⰱ"] = "b",  -- GLAGOLITIC SMALL LETTER BUKY -  ["ⰲ"] = "v",  -- GLAGOLITIC SMALL LETTER VEDE -  ["ⰳ"] = "g",  -- GLAGOLITIC SMALL LETTER GLAGOLI -  ["ⰴ"] = "d",  -- GLAGOLITIC SMALL LETTER DOBRO -  ["ⰵ"] = "e",  -- GLAGOLITIC SMALL LETTER YESTU -  ["ⰶ"] = "ž",  -- GLAGOLITIC SMALL LETTER ZHIVETE -  ["ⰷ"] = "ʒ",  -- GLAGOLITIC SMALL LETTER DZELO -  ["ⰸ"] = "z",  -- GLAGOLITIC SMALL LETTER ZEMLJA -  ["ⰹ"] = "i",  -- GLAGOLITIC SMALL LETTER IZHE -  ["ⰺ"] = "i",  -- GLAGOLITIC SMALL LETTER INITIAL IZHE -  ["ⰻ"] = "i",  -- GLAGOLITIC SMALL LETTER I -  ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI -  ["ⰽ"] = "k",  -- GLAGOLITIC SMALL LETTER KAKO -  ["ⰾ"] = "l",  -- GLAGOLITIC SMALL LETTER LJUDIJE -  ["ⰿ"] = "m",  -- GLAGOLITIC SMALL LETTER MYSLITE -  ["ⱀ"] = "n",  -- GLAGOLITIC SMALL LETTER NASHI -  ["ⱁ"] = "o",  -- GLAGOLITIC SMALL LETTER ONU -  ["ⱂ"] = "p",  -- GLAGOLITIC SMALL LETTER POKOJI -  ["ⱃ"] = "r",  -- GLAGOLITIC SMALL LETTER RITSI -  ["ⱄ"] = "s",  -- GLAGOLITIC SMALL LETTER SLOVO -  ["ⱅ"] = "t",  -- GLAGOLITIC SMALL LETTER TVRIDO -  ["ⱆ"] = "u",  -- GLAGOLITIC SMALL LETTER UKU -  ["ⱇ"] = "f",  -- GLAGOLITIC SMALL LETTER FRITU -  ["ⱈ"] = "x",  -- GLAGOLITIC SMALL LETTER HERU -  ["ⱉ"] = "o",  -- GLAGOLITIC SMALL LETTER OTU -  ["ⱊ"] = "?",  -- GLAGOLITIC SMALL LETTER PE -  ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA -  ["ⱌ"] = "c",  -- GLAGOLITIC SMALL LETTER TSI -  ["ⱍ"] = "č",  -- GLAGOLITIC SMALL LETTER CHRIVI -  ["ⱎ"] = "š",  -- GLAGOLITIC SMALL LETTER SHA -  ["ⱏ"] = "ъ",  -- GLAGOLITIC SMALL LETTER YERU -  ["ⱐ"] = "ь",  -- GLAGOLITIC SMALL LETTER YERI -  ["ⱑ"] = "ě",  -- GLAGOLITIC SMALL LETTER YATI -  ["ⱒ"] = "x",  -- GLAGOLITIC SMALL LETTER SPIDERY HA -  ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU -  ["ⱔ"] = "ę",  -- GLAGOLITIC SMALL LETTER SMALL YUS -  ["ⱕ"] = "y̨",  -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL  -  ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO -  ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU -  ["ⱘ"] = "ǫ",  -- GLAGOLITIC SMALL LETTER BIG YUS -  ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS -  ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA -  ["ⱛ"] = "ü",  -- GLAGOLITIC SMALL LETTER IZHITSA -  ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC -  ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A -  ["ⱞ"] = "m",  -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE -} - -translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low - ------------------------------------------------- --- Uppercase (?!) Glagolitic Transliteration  -- ------------------------------------------------- - -translit.ocs_gla_upp = { -  ["Ⰰ"] = "A",  -- GLAGOLITIC CAPITAL LETTER AZU -  ["Ⰱ"] = "B",  -- GLAGOLITIC CAPITAL LETTER BUKY -  ["Ⰲ"] = "V",  -- GLAGOLITIC CAPITAL LETTER VEDE -  ["Ⰳ"] = "G",  -- GLAGOLITIC CAPITAL LETTER GLAGOLI -  ["Ⰴ"] = "D",  -- GLAGOLITIC CAPITAL LETTER DOBRO -  ["Ⰵ"] = "E",  -- GLAGOLITIC CAPITAL LETTER YESTU -  ["Ⰶ"] = "Ž",  -- GLAGOLITIC CAPITAL LETTER ZHIVETE -  ["Ⰷ"] = "Ʒ",  -- GLAGOLITIC CAPITAL LETTER DZELO -  ["Ⰸ"] = "Z",  -- GLAGOLITIC CAPITAL LETTER ZEMLJA -  ["Ⰹ"] = "I",  -- GLAGOLITIC CAPITAL LETTER IZHE -  ["Ⰺ"] = "I",  -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE -  ["Ⰻ"] = "I",  -- GLAGOLITIC CAPITAL LETTER I -  ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI -  ["Ⰽ"] = "K",  -- GLAGOLITIC CAPITAL LETTER KAKO -  ["Ⰾ"] = "L",  -- GLAGOLITIC CAPITAL LETTER LJUDIJE -  ["Ⰿ"] = "M",  -- GLAGOLITIC CAPITAL LETTER MYSLITE -  ["Ⱀ"] = "N",  -- GLAGOLITIC CAPITAL LETTER NASHI -  ["Ⱁ"] = "O",  -- GLAGOLITIC CAPITAL LETTER ONU -  ["Ⱂ"] = "P",  -- GLAGOLITIC CAPITAL LETTER POKOJI -  ["Ⱃ"] = "R",  -- GLAGOLITIC CAPITAL LETTER RITSI -  ["Ⱄ"] = "S",  -- GLAGOLITIC CAPITAL LETTER SLOVO -  ["Ⱅ"] = "T",  -- GLAGOLITIC CAPITAL LETTER TVRIDO -  ["Ⱆ"] = "U",  -- GLAGOLITIC CAPITAL LETTER UKU -  ["Ⱇ"] = "F",  -- GLAGOLITIC CAPITAL LETTER FRITU -  ["Ⱈ"] = "X",  -- GLAGOLITIC CAPITAL LETTER HERU -  ["Ⱉ"] = "O",  -- GLAGOLITIC CAPITAL LETTER OTU -  ["Ⱊ"] = "?",  -- GLAGOLITIC CAPITAL LETTER PE -  ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA -  ["Ⱌ"] = "C",  -- GLAGOLITIC CAPITAL LETTER TSI -  ["Ⱍ"] = "Č",  -- GLAGOLITIC CAPITAL LETTER CHRIVI -  ["Ⱎ"] = "Š",  -- GLAGOLITIC CAPITAL LETTER SHA -  ["Ⱏ"] = "Ъ",  -- GLAGOLITIC CAPITAL LETTER YERU -  ["Ⱐ"] = "Ь",  -- GLAGOLITIC CAPITAL LETTER YERI -  ["Ⱑ"] = "Ě",  -- GLAGOLITIC CAPITAL LETTER YATI -  ["Ⱒ"] = "X",  -- GLAGOLITIC CAPITAL LETTER SPIDERY HA -  ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU -  ["Ⱔ"] = "Ę",  -- GLAGOLITIC CAPITAL LETTER SMALL YUS -  ["Ⱕ"] = "Y̨",  -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL -  ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO -  ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS -  ["Ⱘ"] = "Ǫ",  -- GLAGOLITIC CAPITAL LETTER BIG YUS -  ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS -  ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA -  ["Ⱛ"] = "Ü",  -- GLAGOLITIC CAPITAL LETTER IZHITSA -  ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC -  ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A -  ["Ⱞ"] = "M",  -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT -} - -translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp - -\stopluacode - -%-===========================================================================-- -%-                              Greek                                        -- -%-===========================================================================-- - -\startluacode - --- Note that the Greek transliteration mapping isn't bijective so transliterated --- texts won't be reversible.  (Shouldn't be impossible to make one up using --- diacritics on latin characters to represent all possible combinations of --- Greek breathings + accents.)  - --- Good reading on composed / precombined unicode: ---  http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed - -------------------------------------------------- --- Lowercase Greek Initial Position Diphthongs -- -------------------------------------------------- - -translit.gr_di_in_low = { -  [" αὑ"] = " hau", -  [" αὕ"] = " hau", -  [" αὓ"] = " hau", -  [" αὗ"] = " hau", -  [" εὑ"] = " heu", -  [" εὕ"] = " heu", -  [" εὓ"] = " heu", -  [" εὗ"] = " heu", -  [" ηὑ"] = " hēu", -  [" ηὕ"] = " hēu", -  [" ηὓ"] = " hēu", -  [" ηὗ"] = " hēu", -  [" οὑ"] = " hu", -  [" οὕ"] = " hu", -  [" οὓ"] = " hu", -  [" οὗ"] = " hu", -  [" ωὑ"] = " hōu", -  [" ωὕ"] = " hōu", -  [" ωὓ"] = " hōu", -  [" ωὗ"] = " hōu" -} - -translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low - -------------------------------------------------- --- Uppercase Greek Initial Position Diphthongs -- -------------------------------------------------- - -translit.gr_di_in_upp = { -  [" Αὑ"] = " Hau", -  [" Αὕ"] = " Hau", -  [" Αὓ"] = " Hau", -  [" Αὗ"] = " Hau", -  [" Εὑ"] = " Heu", -  [" Εὕ"] = " Heu", -  [" Εὓ"] = " Heu", -  [" Εὗ"] = " Heu", -  [" Ηὑ"] = " Hēu", -  [" Ηὕ"] = " Hēu", -  [" Ηὓ"] = " Hēu", -  [" Ηὗ"] = " Hēu", -  [" Οὑ"] = " Hu", -  [" Οὕ"] = " Hu", -  [" Οὓ"] = " Hu", -  [" Οὗ"] = " Hu", -  [" Ωὑ"] = " Hōu", -  [" Ωὕ"] = " Hōu", -  [" Ωὓ"] = " Hōu", -  [" Ωὗ"] = " Hōu" -} - -translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp - ---------------------------------------- --- Lowercase Greek Initial Position  -- ---------------------------------------- - -translit.gr_in_low = { -  [" ἁ"] = " ha", -  [" ἅ"] = " ha", -  [" ἃ"] = " ha", -  [" ἇ"] = " ha", -  [" ᾁ"] = " ha", -  [" ᾅ"] = " ha", -  [" ᾃ"] = " ha", -  [" ᾇ"] = " ha", -  [" ἑ"] = " he", -  [" ἕ"] = " he", -  [" ἓ"] = " he", -  [" ἡ"] = " hē", -  [" ἥ"] = " hē", -  [" ἣ"] = " hē", -  [" ἧ"] = " hē", -  [" ᾑ"] = " hē", -  [" ᾕ"] = " hē", -  [" ᾓ"] = " hē", -  [" ᾗ"] = " hē", -  [" ἱ"] = " hi", -  [" ἵ"] = " hi", -  [" ἳ"] = " hi", -  [" ἷ"] = " hi", -  [" ὁ"] = " ho", -  [" ὅ"] = " ho", -  [" ὃ"] = " ho", -  [" ὑ"] = " hy", -  [" ὕ"] = " hy", -  [" ὓ"] = " hy", -  [" ὗ"] = " hy", -  [" ὡ"] = " hō", -  [" ὥ"] = " hō", -  [" ὣ"] = " hō", -  [" ὧ"] = " hō", -  [" ᾡ"] = " hō", -  [" ᾥ"] = " hō", -  [" ᾣ"] = " hō", -  [" ᾧ"] = " hō", -} - -translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low - ---------------------------------------- --- Uppercase Greek Initial Position  -- ---------------------------------------- - -translit.gr_in_upp = { -  [" Ἁ"] = " Ha", -  [" Ἅ"] = " Ha", -  [" Ἃ"] = " Ha", -  [" Ἇ"] = " Ha", -  [" ᾉ"] = " Ha", -  [" ᾍ"] = " Ha", -  [" ᾋ"] = " Ha", -  [" ᾏ"] = " Ha", -  [" Ἑ"] = " He", -  [" Ἕ"] = " He", -  [" Ἓ"] = " He", -  [" Ἡ"] = " Hē", -  [" Ἥ"] = " Hē", -  [" Ἣ"] = " Hē", -  [" Ἧ"] = " Hē", -  [" ᾙ"] = " Hē", -  [" ᾝ"] = " Hē", -  [" ᾛ"] = " Hē", -  [" ᾟ"] = " Hē", -  [" Ἱ"] = " Hi", -  [" Ἵ"] = " Hi", -  [" Ἳ"] = " Hi", -  [" Ἷ"] = " Hi", -  [" Ὁ"] = " Ho", -  [" Ὅ"] = " Ho", -  [" Ὃ"] = " Ho", -  [" Ὑ"] = " Hy", -  [" Ὕ"] = " Hy", -  [" Ὓ"] = " Hy", -  [" Ὗ"] = " Hy", -  [" Ὡ"] = " Hō", -  [" Ὥ"] = " Hō", -  [" Ὣ"] = " Hō", -  [" Ὧ"] = " Hō", -  [" ᾩ"] = " Hō", -  [" ᾭ"] = " Hō", -  [" ᾫ"] = " Hō", -  [" ᾯ"] = " Hō", -} - -translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp - ---------------------------------- --- Lowercase Greek Diphthongs  -- ---------------------------------- - -translit.gr_di_low = { -  ["αυ"] = "au", -  ["αύ"] = "au", -  ["αὺ"] = "au", -  ["αῦ"] = "au", -  ["αὐ"] = "au", -  ["αὔ"] = "au", -  ["αὒ"] = "au", -  ["αὖ"] = "au", -  ["αὑ"] = "au", -  ["αὕ"] = "au", -  ["αὓ"] = "au", -  ["αὗ"] = "au", -  ["ευ"] = "eu", -  ["εύ"] = "eu", -  ["εὺ"] = "eu", -  ["εῦ"] = "eu", -  ["εὐ"] = "eu", -  ["εὔ"] = "eu", -  ["εὒ"] = "eu", -  ["εὖ"] = "eu", -  ["εὑ"] = "eu", -  ["εὕ"] = "eu", -  ["εὓ"] = "eu", -  ["εὗ"] = "eu", -  ["ηυ"] = "ēu", -  ["ηύ"] = "ēu", -  ["ηὺ"] = "ēu", -  ["ηῦ"] = "ēu", -  ["ηὐ"] = "ēu", -  ["ηὔ"] = "ēu", -  ["ηὒ"] = "ēu", -  ["ηὖ"] = "ēu", -  ["ηὑ"] = "ēu", -  ["ηὕ"] = "ēu", -  ["ηὓ"] = "ēu", -  ["ηὗ"] = "ēu", -  ["ου"] = "u", -  ["ου"] = "u", -  ["ου"] = "u", -  ["ού"] = "u", -  ["οὺ"] = "u", -  ["οῦ"] = "u", -  ["οὐ"] = "u", -  ["οὔ"] = "u", -  ["οὒ"] = "u", -  ["οὖ"] = "u", -  ["οὑ"] = "u", -  ["οὕ"] = "u", -  ["οὓ"] = "u", -  ["οὗ"] = "u", -  ["ωυ"] = "ōu", -  ["ωύ"] = "ōu", -  ["ωὺ"] = "ōu", -  ["ωῦ"] = "ōu", -  ["ωὐ"] = "ōu", -  ["ωὔ"] = "ōu", -  ["ωὒ"] = "ōu", -  ["ωὖ"] = "ōu", -  ["ωὑ"] = "ōu", -  ["ωὕ"] = "ōu", -  ["ωὓ"] = "ōu", -  ["ωὗ"] = "ōu", -  ["ῤῥ"] = "rrh", -} - -translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low - ---------------------------------- --- Uppercase Greek Diphthongs  -- ---------------------------------- - -translit.gr_di_upp = { -  ["Αυ"] = "Au", -  ["Αύ"] = "Au", -  ["Αὺ"] = "Au", -  ["Αῦ"] = "Au", -  ["Αὐ"] = "Au", -  ["Αὔ"] = "Au", -  ["Αὒ"] = "Au", -  ["Αὖ"] = "Au", -  ["Αὑ"] = "Au", -  ["Αὕ"] = "Au", -  ["Αὓ"] = "Au", -  ["Αὗ"] = "Au", -  ["Ευ"] = "Eu", -  ["Εύ"] = "Eu", -  ["Εὺ"] = "Eu", -  ["Εῦ"] = "Eu", -  ["Εὐ"] = "Eu", -  ["Εὔ"] = "Eu", -  ["Εὒ"] = "Eu", -  ["Εὖ"] = "Eu", -  ["Εὑ"] = "Eu", -  ["Εὕ"] = "Eu", -  ["Εὓ"] = "Eu", -  ["Εὗ"] = "Eu", -  ["Ηυ"] = "Ēu", -  ["Ηύ"] = "Ēu", -  ["Ηὺ"] = "Ēu", -  ["Ηῦ"] = "Ēu", -  ["Ηὐ"] = "Ēu", -  ["Ηὔ"] = "Ēu", -  ["Ηὒ"] = "Ēu", -  ["Ηὖ"] = "Ēu", -  ["Ηὑ"] = "Ēu", -  ["Ηὕ"] = "Ēu", -  ["Ηὓ"] = "Ēu", -  ["Ηὗ"] = "Ēu", -  ["Ου"] = "U", -  ["Ου"] = "U", -  ["Ου"] = "U", -  ["Ού"] = "U", -  ["Οὺ"] = "U", -  ["Οῦ"] = "U", -  ["Οὐ"] = "U", -  ["Οὔ"] = "U", -  ["Οὒ"] = "U", -  ["Οὖ"] = "U", -  ["Οὑ"] = "U", -  ["Οὕ"] = "U", -  ["Οὓ"] = "U", -  ["Οὗ"] = "U", -  ["Ωυ"] = "Ōu", -  ["Ωύ"] = "Ōu", -  ["Ωὺ"] = "Ōu", -  ["Ωῦ"] = "Ōu", -  ["Ωὐ"] = "Ōu", -  ["Ωὔ"] = "Ōu", -  ["Ωὒ"] = "Ōu", -  ["Ωὖ"] = "Ōu", -  ["Ωὑ"] = "Ōu", -  ["Ωὕ"] = "Ōu", -  ["Ωὓ"] = "Ōu", -  ["Ωὗ"] = "Ōu", -} - -translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp - --- The following will be used in an option that ensures transcription of --- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) -translit.gr_nrule = { -  ["γγ"] = "ng", -  ["γκ"] = "nk", -  ["γξ"] = "nx", -  ["γχ"] = "nch", -} - -translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule - -\stopluacode - -\startluacode - --------------------------------------- --- Lowercase Greek Transliteration  -- --------------------------------------- - -translit.gr_low = { -  ["α"] = "a", -  ["ά"] = "a", -  ["ὰ"] = "a", -  ["ᾶ"] = "a", -  ["ᾳ"] = "a", -  ["ἀ"] = "a", -  ["ἁ"] = "a", -  ["ἄ"] = "a", -  ["ἂ"] = "a", -  ["ἆ"] = "a", -  ["ἁ"] = "a", -  ["ἅ"] = "a", -  ["ἃ"] = "a", -  ["ἇ"] = "a", -  ["ᾁ"] = "a", -  ["ᾴ"] = "a", -  ["ᾲ"] = "a", -  ["ᾷ"] = "a", -  ["ᾄ"] = "a", -  ["ᾂ"] = "a", -  ["ᾅ"] = "a", -  ["ᾃ"] = "a", -  ["ᾆ"] = "a", -  ["ᾇ"] = "a", -  ["β"] = "b", -  ["γ"] = "g", -  ["δ"] = "d", -  ["ε"] = "e", -  ["έ"] = "e", -  ["ὲ"] = "e", -  ["ἐ"] = "e", -  ["ἔ"] = "e", -  ["ἒ"] = "e", -  ["ἑ"] = "e", -  ["ἕ"] = "e", -  ["ἓ"] = "e", -  ["ζ"] = "z", -  ["η"] = "ē", -  ["η"] = "ē", -  ["ή"] = "ē", -  ["ὴ"] = "ē", -  ["ῆ"] = "ē", -  ["ῃ"] = "ē", -  ["ἠ"] = "ē", -  ["ἤ"] = "ē", -  ["ἢ"] = "ē", -  ["ἦ"] = "ē", -  ["ᾐ"] = "ē", -  ["ἡ"] = "ē", -  ["ἥ"] = "ē", -  ["ἣ"] = "ē", -  ["ἧ"] = "ē", -  ["ᾑ"] = "ē", -  ["ῄ"] = "ē", -  ["ῂ"] = "ē", -  ["ῇ"] = "ē", -  ["ᾔ"] = "ē", -  ["ᾒ"] = "ē", -  ["ᾕ"] = "ē", -  ["ᾓ"] = "ē", -  ["ᾖ"] = "ē", -  ["ᾗ"] = "ē", -  ["θ"] = "th", -  ["ι"] = "i", -  ["ί"] = "i", -  ["ὶ"] = "i", -  ["ῖ"] = "i", -  ["ἰ"] = "i", -  ["ἴ"] = "i", -  ["ἲ"] = "i", -  ["ἶ"] = "i", -  ["ἱ"] = "i", -  ["ἵ"] = "i", -  ["ἳ"] = "i", -  ["ἷ"] = "i", -  ["ϊ"] = "i", -  ["ΐ"] = "i", -  ["ῒ"] = "i", -  ["ῗ"] = "i", -  ["κ"] = "k", -  ["λ"] = "l", -  ["μ"] = "m", -  ["ν"] = "n", -  ["ξ"] = "x", -  ["ο"] = "o", -  ["ό"] = "o", -  ["ὸ"] = "o", -  ["ὀ"] = "o", -  ["ὄ"] = "o", -  ["ὂ"] = "o", -  ["ὁ"] = "o", -  ["ὅ"] = "o", -  ["ὃ"] = "o", -  ["π"] = "p", -  ["ρ"] = "r", -  ["ῤ"] = "r", -  ["ῥ"] = "rh", -  ["σ"] = "s", -  ["ς"] = "s", -  ["τ"] = "t", -  ["υ"] = "y", -  ["ύ"] = "y", -  ["ὺ"] = "y", -  ["ῦ"] = "y", -  ["ὐ"] = "y", -  ["ὔ"] = "y", -  ["ὒ"] = "y", -  ["ὖ"] = "y", -  ["ὑ"] = "y", -  ["ὕ"] = "y", -  ["ὓ"] = "y", -  ["ὗ"] = "y", -  ["ϋ"] = "y", -  ["ΰ"] = "y", -  ["ῢ"] = "y", -  ["ῧ"] = "y", -  ["φ"] = "ph", -  ["χ"] = "ch", -  ["ψ"] = "ps", -  ["ω"] = "ō", -  ["ώ"] = "ō", -  ["ὼ"] = "ō", -  ["ῶ"] = "ō", -  ["ῳ"] = "ō", -  ["ὠ"] = "ō", -  ["ὤ"] = "ō", -  ["ὢ"] = "ō", -  ["ὦ"] = "ō", -  ["ᾠ"] = "ō", -  ["ὡ"] = "ō", -  ["ὥ"] = "ō", -  ["ὣ"] = "ō", -  ["ὧ"] = "ō", -  ["ᾡ"] = "ō", -  ["ῴ"] = "ō", -  ["ῲ"] = "ō", -  ["ῷ"] = "ō", -  ["ᾤ"] = "ō", -  ["ᾢ"] = "ō", -  ["ᾥ"] = "ō", -  ["ᾣ"] = "ō", -  ["ᾦ"] = "ō", -  ["ᾧ"] = "ō", -} - -translit.tables["Greek transliteration lowercase"] = translit.gr_low - --------------------------------------- --- Uppercase Greek Transliteration  -- --------------------------------------- - -translit.gr_upp = { -  ["Α"] = "A", -  ["Ά"] = "A", -  ["Ὰ"] = "A", ---["ᾶ"] = "A", -  ["ᾼ"] = "A", -  ["Ἀ"] = "A", -  ["Ἁ"] = "A", -  ["Ἄ"] = "A", -  ["Ἂ"] = "A", -  ["Ἆ"] = "A", -  ["Ἁ"] = "A", -  ["Ἅ"] = "A", -  ["Ἃ"] = "A", -  ["Ἇ"] = "A", -  ["ᾉ"] = "A", ---["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me ---["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript ---["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. -  ["ᾌ"] = "A", -  ["ᾊ"] = "A", -  ["ᾍ"] = "A", -  ["ᾋ"] = "A", -  ["ᾎ"] = "A", -  ["ᾏ"] = "A", -  ["Β"] = "B", -  ["Γ"] = "G", -  ["Δ"] = "D", -  ["Ε"] = "E", -  ["Έ"] = "E", -  ["Ὲ"] = "E", -  ["Ἐ"] = "E", -  ["Ἔ"] = "E", -  ["Ἒ"] = "E", -  ["Ἑ"] = "E", -  ["Ἕ"] = "E", -  ["Ἓ"] = "E", -  ["Ζ"] = "Z", -  ["Η"] = "Ē", -  ["Η"] = "Ē", -  ["Ή"] = "Ē", -  ["Ὴ"] = "Ē", ---["ῆ"] = "Ē", -  ["ῌ"] = "Ē", -  ["Ἠ"] = "Ē", -  ["Ἤ"] = "Ē", -  ["Ἢ"] = "Ē", -  ["Ἦ"] = "Ē", -  ["ᾘ"] = "Ē", -  ["Ἡ"] = "Ē", -  ["Ἥ"] = "Ē", -  ["Ἣ"] = "Ē", -  ["Ἧ"] = "Ē", -  ["ᾙ"] = "Ē", ---["ῄ"] = "Ē", ---["ῂ"] = "Ē", ---["ῇ"] = "Ē", -  ["ᾜ"] = "Ē", -  ["ᾚ"] = "Ē", -  ["ᾝ"] = "Ē", -  ["ᾛ"] = "Ē", -  ["ᾞ"] = "Ē", -  ["ᾟ"] = "Ē", -  ["Θ"] = "Th", -  ["Ι"] = "I", -  ["Ί"] = "I", -  ["Ὶ"] = "I", ---["ῖ"] = "I", -  ["Ἰ"] = "I", -  ["Ἴ"] = "I", -  ["Ἲ"] = "I", -  ["Ἶ"] = "I", -  ["Ἱ"] = "I", -  ["Ἵ"] = "I", -  ["Ἳ"] = "I", -  ["Ἷ"] = "I", -  ["Ϊ"] = "I", ---["ΐ"] = "I", ---["ῒ"] = "I", ---["ῗ"] = "I", -  ["Κ"] = "K", -  ["Λ"] = "L", -  ["Μ"] = "M", -  ["Ν"] = "N", -  ["Ξ"] = "X", -  ["Ο"] = "O", -  ["Ό"] = "O", -  ["Ὸ"] = "O", -  ["Ὀ"] = "O", -  ["Ὄ"] = "O", -  ["Ὂ"] = "O", -  ["Ὁ"] = "O", -  ["Ὅ"] = "O", -  ["Ὃ"] = "O", -  ["Π"] = "P", -  ["Ρ"] = "R", ---["ῤ"] = "R", -  ["Ῥ"] = "Rh", -  ["Σ"] = "S", -  ["Σ"] = "S", -  ["Τ"] = "T", -  ["Υ"] = "Y", -  ["Ύ"] = "Y", -  ["Ὺ"] = "Y", ---["ῦ"] = "Y", ---["ὐ"] = "Y", ---["ὔ"] = "Y", ---["ὒ"] = "Y", ---["ὖ"] = "Y", -  ["Ὑ"] = "Y", -  ["Ὕ"] = "Y", -  ["Ὓ"] = "Y", -  ["Ὗ"] = "Y", -  ["Ϋ"] = "Y", ---["ΰ"] = "Y", ---["ῢ"] = "Y", ---["ῧ"] = "Y", -  ["Φ"] = "Ph", -  ["Χ"] = "Ch", -  ["Ψ"] = "Ps", -  ["Ω"] = "Ō", -  ["Ώ"] = "Ō", -  ["Ὼ"] = "Ō", ---["ῶ"] = "Ō", -  ["ῼ"] = "Ō", -  ["Ὠ"] = "Ō", -  ["Ὤ"] = "Ō", -  ["Ὢ"] = "Ō", -  ["Ὦ"] = "Ō", -  ["ᾨ"] = "Ō", -  ["Ὡ"] = "Ō", -  ["Ὥ"] = "Ō", -  ["Ὣ"] = "Ō", -  ["Ὧ"] = "Ō", -  ["ᾩ"] = "Ō", ---["ῴ"] = "Ō", ---["ῲ"] = "Ō", ---["ῷ"] = "Ō", -  ["ᾬ"] = "Ō", -  ["ᾪ"] = "Ō", -  ["ᾭ"] = "Ō", -  ["ᾫ"] = "Ō", -  ["ᾮ"] = "Ō", -  ["ᾯ"] = "Ō", -} - -translit.tables["Greek transliteration uppercase"] = translit.gr_upp - ------------- --- Varia  -- ------------- - -translit.gr_other = { -  ["ϝ"] = "w", -  ["Ϝ"] = "W", -  ["ϙ"] = "q", -  ["Ϙ"] = "Q", -  ["ϡ"] = "ss", -  ["Ϡ"] = "Ss", -} - -translit.tables["Greek transliteration archaic characters"] = translit.gr_other - -\stopluacode - -%-===========================================================================-- -%-                              End Of Tables                                -- -%-===========================================================================-- +%D Tables have been migrated into separate lua files. +%D See {\tt +%D trans_tables_glag.lua +%D trans_tables_gr.lua +%D trans_tables_iso9.lua +%D trans_tables_scntfc.lua +%D and +%D trans_tables_trsc.lua.}  %D The function \type{translit.subst(s, t)} is used to replace any key of  %D \type{t} that occurs in \type{s} with the according value of \type{t}. @@ -1796,14 +104,17 @@ function translit.subst (text, tab)  end  \stopluacode -%D \type{translit.add_table(t, ta)} is used to build the final substitution tables +%D \type{translit.add_table(t, ...)} is used to build the final substitution tables  %D from those we defined earlier; any keys in the previous table \type{t} are -%D overwritten if they exist in the added table \type{ta}, too. +%D overwritten if they exist in the added tables \type{ta}, too.  \startluacode -function translit.add_table (t, t_add) -  for key, value in pairs (t_add) do -    t[key] = value + +function translit.add_table (t, ...) +  for _,tab in ipairs(arg) do +    for key, value in pairs (tab) do +      t[key] = value +    end    end    return t  end @@ -1859,9 +170,9 @@ function translit.show_tab (tab)          context.bTR()            context.bTC() context(cnt)           context.eTC()            context.bTC() context(key)           context.eTC() -          context.bTC() context(utf.len(key))  context.eTC() +          context.bTC() context(string.len(key))  context.eTC()            context.bTC() context(val)           context.eTC() -          context.bTC() context(utf.len(val))  context.eTC() +          context.bTC() context(string.len(val))  context.eTC()            context.bTC() context(strempty(key)) context.eTC()            context.bTC() context(strempty(val)) context.eTC()          context.eTR() @@ -1888,121 +199,63 @@ end  \startluacode  function translit.show_all_tabs () -  -- Output all translation tables that are registered within translit.tables. -  -- This will be quite unordered.  -  context ("\\chapter{Transliterator Showing All Tables}") -  for key, val in pairs(translit.tables) do -    context ("\\section{" .. key .. "}") -    translit.show_tab (val) -  end +    dofile "trans_tables_iso9.lua" +    dofile "trans_tables_trsc.lua" +    dofile "trans_tables_scntfc.lua" +    dofile "trans_tables_trsc.lua" +    dofile "trans_tables_glag.lua" +    dofile "trans_tables_gr.lua" +    translit.gen_rules_en() +    translit.gen_rules_de() +    -- Output all translation tables that are registered within translit.tables. +    -- This will be quite unordered.  +    context ("\\chapter{Transliterator Showing All Tables}") +    for key, val in pairs(translit.tables) do +        context ("\\section{" .. key .. "}") +        translit.show_tab (val) +    end  end  \stopluacode -%D Now to the function \type{translit.transliterate(m, t)}: it constitutes the -%D metafunction that is called by the \type{\transliterate} command and itself -%D chooses the substitution tables according to the \type{m} parameter and -%D applies them in a given order on the string \type{m}. -%D (The correct order in multi-pass substitution has to be enforced because the -%D tables are in fact unordered dictionaries.) +%D \type{translit.transliterate(m, t)} constitutes the +%D metafunction that is called by the \type{\transliterate} command. +%D It loads the transliteration tables according to \type{method} and calls the +%D corresponding function.  \startluacode  function translit.transliterate (method, text) -  local repl_tab = {} -  if method == "ru" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low) -  elseif method == "ru_old" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low)  -    translit.add_table(repl_tab, translit.ru_old_upp)  -    translit.add_table(repl_tab, translit.ru_old_low)  -  elseif method == "all" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low)  -    translit.add_table(repl_tab, translit.ru_old_upp)  -    translit.add_table(repl_tab, translit.ru_old_low)  -    translit.add_table(repl_tab, translit.non_ru_upp) -    translit.add_table(repl_tab, translit.non_ru_low)  -  elseif method == "ru_transcript_de" then -    text = translit.subst (text, translit.ru_trsc_jrule) -    text = translit.subst (text, translit.ru_trsc_irule) -    text = translit.subst (text, translit.ru_trsc_srule) -    text = translit.subst (text, translit.ru_trsc_sharpsrule) -    text = translit.subst (text, translit.ru_trsc_jerule) ---  text = translit.subst (text, translit.ru_trsc_jorule) -    translit.add_table(repl_tab, translit.ru_trsc_upp_first) -    translit.add_table(repl_tab, translit.ru_trsc_low_first) -    text = translit.subst (text, repl_tab) -    repl_tab = {} -    translit.add_table(repl_tab, translit.ru_trsc_upp) -    translit.add_table(repl_tab, translit.ru_trsc_low) -  elseif method == "ru_transcript_en" then -    text = translit.subst (text, translit.ru_trsc_en_jerule) -    translit.add_table(repl_tab, translit.ru_trsc_en_upp_first) -    translit.add_table(repl_tab, translit.ru_trsc_en_low_first) -    translit.add_table(repl_tab, translit.ru_trsc_en_upp) -    translit.add_table(repl_tab, translit.ru_trsc_en_low) -  elseif method == "iso9_ocs" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low)  -    translit.add_table(repl_tab, translit.ru_old_upp) -    translit.add_table(repl_tab, translit.ru_old_low)  -    translit.add_table(repl_tab, translit.non_ru_upp) -    translit.add_table(repl_tab, translit.non_ru_low)  -    translit.add_table(repl_tab, translit.ocs_add_upp) -    translit.add_table(repl_tab, translit.ocs_add_low)  -  elseif method == "ocs" then -    translit.add_table(repl_tab, translit.ocs_uk)  -    text = translit.subst (text, repl_tab) -    repl_tab = {} -    translit.add_table(repl_tab, translit.ocs_low)  -    translit.add_table(repl_tab, translit.ocs_upp)  -  elseif method == "ocs_gla" then -    translit.add_table(repl_tab, translit.ocs_gla_low)  -    translit.add_table(repl_tab, translit.ocs_gla_upp)  -  elseif method == "ru_cz" then -    translit.add_table(repl_tab, translit.ru_trsc_cz_low)  -    translit.add_table(repl_tab, translit.ru_trsc_cz_upp)  -  elseif method == "ocs_cz" then -    translit.add_table(repl_tab, translit.ru_trsc_cz_low)  -    translit.add_table(repl_tab, translit.ru_trsc_cz_upp)  -    translit.add_table(repl_tab, translit.ru_trsc_cz_add_low)  -    translit.add_table(repl_tab, translit.ru_trsc_cz_add_upp)  -  elseif method == "gr" then -    translit.add_table(repl_tab, translit.gr_di_in_low)  -    translit.add_table(repl_tab, translit.gr_di_in_upp)  -    translit.add_table(repl_tab, translit.gr_in_low)  -    translit.add_table(repl_tab, translit.gr_in_upp)  -    text = translit.subst (text, repl_tab) -    repl_tab = {} -    translit.add_table(repl_tab, translit.gr_di_low)  -    translit.add_table(repl_tab, translit.gr_di_upp)  -    text = translit.subst (text, repl_tab) -    repl_tab = {} -    translit.add_table(repl_tab, translit.gr_low)  -    translit.add_table(repl_tab, translit.gr_upp)  -    translit.add_table(repl_tab, translit.gr_other)  -  elseif method == "gr_n" then -    translit.add_table(repl_tab, translit.gr_di_in_low)  -    translit.add_table(repl_tab, translit.gr_di_in_upp)  -    translit.add_table(repl_tab, translit.gr_in_low)  -    translit.add_table(repl_tab, translit.gr_in_upp)  -    text = translit.subst (text, repl_tab) -    repl_tab = {} -    translit.add_table(repl_tab, translit.gr_di_low)  -    translit.add_table(repl_tab, translit.gr_di_upp)  -    translit.add_table(repl_tab, translit.gr_nrule)  -    text = translit.subst (text, repl_tab) -    repl_tab = {} -    translit.add_table(repl_tab, translit.gr_low)  -    translit.add_table(repl_tab, translit.gr_upp)  -    translit.add_table(repl_tab, translit.gr_other)  +  if      method == "ru"      or +          method == "ru_old"  or  +          method == "all"  +          then +    dofile "trans_tables_iso9.lua" +    text = translit.iso9 (method, text) +  elseif  method == "ru_transcript_de"      or +          method == "ru_transcript_de_exp"  or -- experimental lpeg +          method == "ru_transcript_en"      or +          method == "ru_transcript_en_sub"  or -- old multiple substitution +          method == "ru_cz"                 or +          method == "ocs_cz" +          then +    dofile "trans_tables_trsc.lua" +    text = translit.transcript (method, text) +  elseif  method == "iso9_ocs" or +          method == "ocs"      or +          method == "ocs_gla" +          then +    dofile "trans_tables_scntfc.lua" +    text = translit.scientific (method, text) +  elseif  method == "gr"        or +          method == "gr_n" +          then +    dofile "trans_tables_gr.lua" +    text = translit.dogreek (method, text)    end -  text = translit.subst (text, repl_tab) -  tex.print (text) + context (text)  end  \stopluacode +  %D The following will help debugging and reviewing tables.  Make sure your  %D typescript can handle the characters, in general it's no use with Latin  %D Modern which unfortunately provides only a restricted set of the unicode @@ -2011,11 +264,21 @@ end  %D The user-level command to output a single substitution table is  %D \type{\showOneTranslitTab{#1}}.  \define[1]\showOneTranslitTab{% -  \ctxlua{translit.show_tab(#1)}% +  \ctxlua{ +    dofile "trans_tables_iso9.lua" +    dofile "trans_tables_trsc.lua" +    dofile "trans_tables_scntfc.lua" +    dofile "trans_tables_trsc.lua" +    dofile "trans_tables_glag.lua" +    dofile "trans_tables_gr.lua" +    translit.gen_rules_en() +    translit.gen_rules_de() +    translit.show_tab(#1) +  }%  }  %D The user-level command to output all defined tables is -%D \type{\showTranslitTabs{#1}}. +%D \type{\showTranslitTabs}.  \define\showTranslitTabs{%    \ctxlua{translit.show_all_tabs()}%  } @@ -2047,8 +310,8 @@ end  %D leaves the latter unchanged.  Thus, in order to permanently switch to  %D another transliteration style the user would have to set it by calling  %D \type{\setuptransliterate} again. +% Credits for rewriting the TeX code go to Wolfgang as well. -% thanks again, Wolfgang!  \def\dotransliterate[#1]#2{%    \bgroup%    \iffirstargument @@ -2059,28 +322,8 @@ end    \egroup%  } -%\definestartstop[transliterate][% -  %before={\startbuffer}, -  %after={\stopbuffer\transliterate{\getbuffer}} -%] -  \def\transliterate{\dosingleempty\dotransliterate} -%\def\starttransliterate {% -  %\bgroup\dostarttransliterate% -%} - -%\def\stoptransliterate {% -  %\egroup -  %\@EA\transliterate{% -    %\getbuffer[trl]% -  %}% -%} - -%\def\dostarttransliterate{% -  %\dostartbuffer[trl][starttransliterate][stoptransliterate]% -%} -  \def\starttransliterate{%    \bgroup%    \dosingleempty\dostarttransliterate @@ -2091,6 +334,7 @@ end       \setuptransliterate[#1]%     \fi     \language[\TRLhyphenate]% +   %\ctxlua{translit.transliterate("\TRLmode","\luaescapestring{#2}")}%     \ctxlua{translit.transliterate("\TRLmode","\luaescapestring{#2}")}%    \egroup%  } diff --git a/tex/context/third/transliterator/trans_tables_glag.lua b/tex/context/third/transliterator/trans_tables_glag.lua new file mode 100644 index 0000000..1b1ff18 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_glag.lua @@ -0,0 +1,122 @@ + +--===========================================================================-- +--                              Glagolica                                    -- +--===========================================================================-- + +------------------------------------------- +-- Lowercase Glagolitic Transliteration  -- +------------------------------------------- + +translit.ocs_gla_low = { +  ["ⰰ"] = "a",  -- GLAGOLITIC SMALL LETTER AZU +  ["ⰱ"] = "b",  -- GLAGOLITIC SMALL LETTER BUKY +  ["ⰲ"] = "v",  -- GLAGOLITIC SMALL LETTER VEDE +  ["ⰳ"] = "g",  -- GLAGOLITIC SMALL LETTER GLAGOLI +  ["ⰴ"] = "d",  -- GLAGOLITIC SMALL LETTER DOBRO +  ["ⰵ"] = "e",  -- GLAGOLITIC SMALL LETTER YESTU +  ["ⰶ"] = "ž",  -- GLAGOLITIC SMALL LETTER ZHIVETE +  ["ⰷ"] = "ʒ",  -- GLAGOLITIC SMALL LETTER DZELO +  ["ⰸ"] = "z",  -- GLAGOLITIC SMALL LETTER ZEMLJA +  ["ⰹ"] = "i",  -- GLAGOLITIC SMALL LETTER IZHE +  ["ⰺ"] = "i",  -- GLAGOLITIC SMALL LETTER INITIAL IZHE +  ["ⰻ"] = "i",  -- GLAGOLITIC SMALL LETTER I +  ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI +  ["ⰽ"] = "k",  -- GLAGOLITIC SMALL LETTER KAKO +  ["ⰾ"] = "l",  -- GLAGOLITIC SMALL LETTER LJUDIJE +  ["ⰿ"] = "m",  -- GLAGOLITIC SMALL LETTER MYSLITE +  ["ⱀ"] = "n",  -- GLAGOLITIC SMALL LETTER NASHI +  ["ⱁ"] = "o",  -- GLAGOLITIC SMALL LETTER ONU +  ["ⱂ"] = "p",  -- GLAGOLITIC SMALL LETTER POKOJI +  ["ⱃ"] = "r",  -- GLAGOLITIC SMALL LETTER RITSI +  ["ⱄ"] = "s",  -- GLAGOLITIC SMALL LETTER SLOVO +  ["ⱅ"] = "t",  -- GLAGOLITIC SMALL LETTER TVRIDO +  ["ⱆ"] = "u",  -- GLAGOLITIC SMALL LETTER UKU +  ["ⱇ"] = "f",  -- GLAGOLITIC SMALL LETTER FRITU +  ["ⱈ"] = "x",  -- GLAGOLITIC SMALL LETTER HERU +  ["ⱉ"] = "o",  -- GLAGOLITIC SMALL LETTER OTU +  ["ⱊ"] = "?",  -- GLAGOLITIC SMALL LETTER PE +  ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA +  ["ⱌ"] = "c",  -- GLAGOLITIC SMALL LETTER TSI +  ["ⱍ"] = "č",  -- GLAGOLITIC SMALL LETTER CHRIVI +  ["ⱎ"] = "š",  -- GLAGOLITIC SMALL LETTER SHA +  ["ⱏ"] = "ъ",  -- GLAGOLITIC SMALL LETTER YERU +  ["ⱐ"] = "ь",  -- GLAGOLITIC SMALL LETTER YERI +  ["ⱑ"] = "ě",  -- GLAGOLITIC SMALL LETTER YATI +  ["ⱒ"] = "x",  -- GLAGOLITIC SMALL LETTER SPIDERY HA +  ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU +  ["ⱔ"] = "ę",  -- GLAGOLITIC SMALL LETTER SMALL YUS +  ["ⱕ"] = "y̨",  -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL  +  ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO +  ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU +  ["ⱘ"] = "ǫ",  -- GLAGOLITIC SMALL LETTER BIG YUS +  ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS +  ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA +  ["ⱛ"] = "ü",  -- GLAGOLITIC SMALL LETTER IZHITSA +  ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC +  ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A +  ["ⱞ"] = "m",  -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE +} + +translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low + +------------------------------------------------ +-- Uppercase (?!) Glagolitic Transliteration  -- +------------------------------------------------ + +translit.ocs_gla_upp = { +  ["Ⰰ"] = "A",  -- GLAGOLITIC CAPITAL LETTER AZU +  ["Ⰱ"] = "B",  -- GLAGOLITIC CAPITAL LETTER BUKY +  ["Ⰲ"] = "V",  -- GLAGOLITIC CAPITAL LETTER VEDE +  ["Ⰳ"] = "G",  -- GLAGOLITIC CAPITAL LETTER GLAGOLI +  ["Ⰴ"] = "D",  -- GLAGOLITIC CAPITAL LETTER DOBRO +  ["Ⰵ"] = "E",  -- GLAGOLITIC CAPITAL LETTER YESTU +  ["Ⰶ"] = "Ž",  -- GLAGOLITIC CAPITAL LETTER ZHIVETE +  ["Ⰷ"] = "Ʒ",  -- GLAGOLITIC CAPITAL LETTER DZELO +  ["Ⰸ"] = "Z",  -- GLAGOLITIC CAPITAL LETTER ZEMLJA +  ["Ⰹ"] = "I",  -- GLAGOLITIC CAPITAL LETTER IZHE +  ["Ⰺ"] = "I",  -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE +  ["Ⰻ"] = "I",  -- GLAGOLITIC CAPITAL LETTER I +  ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI +  ["Ⰽ"] = "K",  -- GLAGOLITIC CAPITAL LETTER KAKO +  ["Ⰾ"] = "L",  -- GLAGOLITIC CAPITAL LETTER LJUDIJE +  ["Ⰿ"] = "M",  -- GLAGOLITIC CAPITAL LETTER MYSLITE +  ["Ⱀ"] = "N",  -- GLAGOLITIC CAPITAL LETTER NASHI +  ["Ⱁ"] = "O",  -- GLAGOLITIC CAPITAL LETTER ONU +  ["Ⱂ"] = "P",  -- GLAGOLITIC CAPITAL LETTER POKOJI +  ["Ⱃ"] = "R",  -- GLAGOLITIC CAPITAL LETTER RITSI +  ["Ⱄ"] = "S",  -- GLAGOLITIC CAPITAL LETTER SLOVO +  ["Ⱅ"] = "T",  -- GLAGOLITIC CAPITAL LETTER TVRIDO +  ["Ⱆ"] = "U",  -- GLAGOLITIC CAPITAL LETTER UKU +  ["Ⱇ"] = "F",  -- GLAGOLITIC CAPITAL LETTER FRITU +  ["Ⱈ"] = "X",  -- GLAGOLITIC CAPITAL LETTER HERU +  ["Ⱉ"] = "O",  -- GLAGOLITIC CAPITAL LETTER OTU +  ["Ⱊ"] = "?",  -- GLAGOLITIC CAPITAL LETTER PE +  ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA +  ["Ⱌ"] = "C",  -- GLAGOLITIC CAPITAL LETTER TSI +  ["Ⱍ"] = "Č",  -- GLAGOLITIC CAPITAL LETTER CHRIVI +  ["Ⱎ"] = "Š",  -- GLAGOLITIC CAPITAL LETTER SHA +  ["Ⱏ"] = "Ъ",  -- GLAGOLITIC CAPITAL LETTER YERU +  ["Ⱐ"] = "Ь",  -- GLAGOLITIC CAPITAL LETTER YERI +  ["Ⱑ"] = "Ě",  -- GLAGOLITIC CAPITAL LETTER YATI +  ["Ⱒ"] = "X",  -- GLAGOLITIC CAPITAL LETTER SPIDERY HA +  ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU +  ["Ⱔ"] = "Ę",  -- GLAGOLITIC CAPITAL LETTER SMALL YUS +  ["Ⱕ"] = "Y̨",  -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +  ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO +  ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +  ["Ⱘ"] = "Ǫ",  -- GLAGOLITIC CAPITAL LETTER BIG YUS +  ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +  ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA +  ["Ⱛ"] = "Ü",  -- GLAGOLITIC CAPITAL LETTER IZHITSA +  ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC +  ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A +  ["Ⱞ"] = "M",  -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT +} + +translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp + +--===========================================================================-- +--                              End Of Tables                                -- +--===========================================================================-- + + diff --git a/tex/context/third/transliterator/trans_tables_gr.lua b/tex/context/third/transliterator/trans_tables_gr.lua new file mode 100644 index 0000000..7ce6855 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_gr.lua @@ -0,0 +1,693 @@ + +--===========================================================================-- +--                              Greek                                        -- +--===========================================================================-- + + +-- Note that the Greek transliteration mapping isn't bijective so transliterated +-- texts won't be reversible.  (Shouldn't be impossible to make one up using +-- diacritics on latin characters to represent all possible combinations of +-- Greek breathings + accents.)  + +-- Good reading on composed / precombined unicode: +--  http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed + +------------------------------------------------- +-- Lowercase Greek Initial Position Diphthongs -- +------------------------------------------------- + +translit.gr_di_in_low = { +  [" αὑ"] = " hau", +  [" αὕ"] = " hau", +  [" αὓ"] = " hau", +  [" αὗ"] = " hau", +  [" εὑ"] = " heu", +  [" εὕ"] = " heu", +  [" εὓ"] = " heu", +  [" εὗ"] = " heu", +  [" ηὑ"] = " hēu", +  [" ηὕ"] = " hēu", +  [" ηὓ"] = " hēu", +  [" ηὗ"] = " hēu", +  [" οὑ"] = " hu", +  [" οὕ"] = " hu", +  [" οὓ"] = " hu", +  [" οὗ"] = " hu", +  [" ωὑ"] = " hōu", +  [" ωὕ"] = " hōu", +  [" ωὓ"] = " hōu", +  [" ωὗ"] = " hōu" +} + +translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low + +------------------------------------------------- +-- Uppercase Greek Initial Position Diphthongs -- +------------------------------------------------- + +translit.gr_di_in_upp = { +  [" Αὑ"] = " Hau", +  [" Αὕ"] = " Hau", +  [" Αὓ"] = " Hau", +  [" Αὗ"] = " Hau", +  [" Εὑ"] = " Heu", +  [" Εὕ"] = " Heu", +  [" Εὓ"] = " Heu", +  [" Εὗ"] = " Heu", +  [" Ηὑ"] = " Hēu", +  [" Ηὕ"] = " Hēu", +  [" Ηὓ"] = " Hēu", +  [" Ηὗ"] = " Hēu", +  [" Οὑ"] = " Hu", +  [" Οὕ"] = " Hu", +  [" Οὓ"] = " Hu", +  [" Οὗ"] = " Hu", +  [" Ωὑ"] = " Hōu", +  [" Ωὕ"] = " Hōu", +  [" Ωὓ"] = " Hōu", +  [" Ωὗ"] = " Hōu" +} + +translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp + +--------------------------------------- +-- Lowercase Greek Initial Position  -- +--------------------------------------- + +translit.gr_in_low = { +  [" ἁ"] = " ha", +  [" ἅ"] = " ha", +  [" ἃ"] = " ha", +  [" ἇ"] = " ha", +  [" ᾁ"] = " ha", +  [" ᾅ"] = " ha", +  [" ᾃ"] = " ha", +  [" ᾇ"] = " ha", +  [" ἑ"] = " he", +  [" ἕ"] = " he", +  [" ἓ"] = " he", +  [" ἡ"] = " hē", +  [" ἥ"] = " hē", +  [" ἣ"] = " hē", +  [" ἧ"] = " hē", +  [" ᾑ"] = " hē", +  [" ᾕ"] = " hē", +  [" ᾓ"] = " hē", +  [" ᾗ"] = " hē", +  [" ἱ"] = " hi", +  [" ἵ"] = " hi", +  [" ἳ"] = " hi", +  [" ἷ"] = " hi", +  [" ὁ"] = " ho", +  [" ὅ"] = " ho", +  [" ὃ"] = " ho", +  [" ὑ"] = " hy", +  [" ὕ"] = " hy", +  [" ὓ"] = " hy", +  [" ὗ"] = " hy", +  [" ὡ"] = " hō", +  [" ὥ"] = " hō", +  [" ὣ"] = " hō", +  [" ὧ"] = " hō", +  [" ᾡ"] = " hō", +  [" ᾥ"] = " hō", +  [" ᾣ"] = " hō", +  [" ᾧ"] = " hō", +} + +translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low + +--------------------------------------- +-- Uppercase Greek Initial Position  -- +--------------------------------------- + +translit.gr_in_upp = { +  [" Ἁ"] = " Ha", +  [" Ἅ"] = " Ha", +  [" Ἃ"] = " Ha", +  [" Ἇ"] = " Ha", +  [" ᾉ"] = " Ha", +  [" ᾍ"] = " Ha", +  [" ᾋ"] = " Ha", +  [" ᾏ"] = " Ha", +  [" Ἑ"] = " He", +  [" Ἕ"] = " He", +  [" Ἓ"] = " He", +  [" Ἡ"] = " Hē", +  [" Ἥ"] = " Hē", +  [" Ἣ"] = " Hē", +  [" Ἧ"] = " Hē", +  [" ᾙ"] = " Hē", +  [" ᾝ"] = " Hē", +  [" ᾛ"] = " Hē", +  [" ᾟ"] = " Hē", +  [" Ἱ"] = " Hi", +  [" Ἵ"] = " Hi", +  [" Ἳ"] = " Hi", +  [" Ἷ"] = " Hi", +  [" Ὁ"] = " Ho", +  [" Ὅ"] = " Ho", +  [" Ὃ"] = " Ho", +  [" Ὑ"] = " Hy", +  [" Ὕ"] = " Hy", +  [" Ὓ"] = " Hy", +  [" Ὗ"] = " Hy", +  [" Ὡ"] = " Hō", +  [" Ὥ"] = " Hō", +  [" Ὣ"] = " Hō", +  [" Ὧ"] = " Hō", +  [" ᾩ"] = " Hō", +  [" ᾭ"] = " Hō", +  [" ᾫ"] = " Hō", +  [" ᾯ"] = " Hō", +} + +translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp + +--------------------------------- +-- Lowercase Greek Diphthongs  -- +--------------------------------- + +translit.gr_di_low = { +  ["αυ"] = "au", +  ["αύ"] = "au", +  ["αὺ"] = "au", +  ["αῦ"] = "au", +  ["αὐ"] = "au", +  ["αὔ"] = "au", +  ["αὒ"] = "au", +  ["αὖ"] = "au", +  ["αὑ"] = "au", +  ["αὕ"] = "au", +  ["αὓ"] = "au", +  ["αὗ"] = "au", +  ["ευ"] = "eu", +  ["εύ"] = "eu", +  ["εὺ"] = "eu", +  ["εῦ"] = "eu", +  ["εὐ"] = "eu", +  ["εὔ"] = "eu", +  ["εὒ"] = "eu", +  ["εὖ"] = "eu", +  ["εὑ"] = "eu", +  ["εὕ"] = "eu", +  ["εὓ"] = "eu", +  ["εὗ"] = "eu", +  ["ηυ"] = "ēu", +  ["ηύ"] = "ēu", +  ["ηὺ"] = "ēu", +  ["ηῦ"] = "ēu", +  ["ηὐ"] = "ēu", +  ["ηὔ"] = "ēu", +  ["ηὒ"] = "ēu", +  ["ηὖ"] = "ēu", +  ["ηὑ"] = "ēu", +  ["ηὕ"] = "ēu", +  ["ηὓ"] = "ēu", +  ["ηὗ"] = "ēu", +  ["ου"] = "u", +  ["ου"] = "u", +  ["ου"] = "u", +  ["ού"] = "u", +  ["οὺ"] = "u", +  ["οῦ"] = "u", +  ["οὐ"] = "u", +  ["οὔ"] = "u", +  ["οὒ"] = "u", +  ["οὖ"] = "u", +  ["οὑ"] = "u", +  ["οὕ"] = "u", +  ["οὓ"] = "u", +  ["οὗ"] = "u", +  ["ωυ"] = "ōu", +  ["ωύ"] = "ōu", +  ["ωὺ"] = "ōu", +  ["ωῦ"] = "ōu", +  ["ωὐ"] = "ōu", +  ["ωὔ"] = "ōu", +  ["ωὒ"] = "ōu", +  ["ωὖ"] = "ōu", +  ["ωὑ"] = "ōu", +  ["ωὕ"] = "ōu", +  ["ωὓ"] = "ōu", +  ["ωὗ"] = "ōu", +  ["ῤῥ"] = "rrh", +} + +translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low + +--------------------------------- +-- Uppercase Greek Diphthongs  -- +--------------------------------- + +translit.gr_di_upp = { +  ["Αυ"] = "Au", +  ["Αύ"] = "Au", +  ["Αὺ"] = "Au", +  ["Αῦ"] = "Au", +  ["Αὐ"] = "Au", +  ["Αὔ"] = "Au", +  ["Αὒ"] = "Au", +  ["Αὖ"] = "Au", +  ["Αὑ"] = "Au", +  ["Αὕ"] = "Au", +  ["Αὓ"] = "Au", +  ["Αὗ"] = "Au", +  ["Ευ"] = "Eu", +  ["Εύ"] = "Eu", +  ["Εὺ"] = "Eu", +  ["Εῦ"] = "Eu", +  ["Εὐ"] = "Eu", +  ["Εὔ"] = "Eu", +  ["Εὒ"] = "Eu", +  ["Εὖ"] = "Eu", +  ["Εὑ"] = "Eu", +  ["Εὕ"] = "Eu", +  ["Εὓ"] = "Eu", +  ["Εὗ"] = "Eu", +  ["Ηυ"] = "Ēu", +  ["Ηύ"] = "Ēu", +  ["Ηὺ"] = "Ēu", +  ["Ηῦ"] = "Ēu", +  ["Ηὐ"] = "Ēu", +  ["Ηὔ"] = "Ēu", +  ["Ηὒ"] = "Ēu", +  ["Ηὖ"] = "Ēu", +  ["Ηὑ"] = "Ēu", +  ["Ηὕ"] = "Ēu", +  ["Ηὓ"] = "Ēu", +  ["Ηὗ"] = "Ēu", +  ["Ου"] = "U", +  ["Ου"] = "U", +  ["Ου"] = "U", +  ["Ού"] = "U", +  ["Οὺ"] = "U", +  ["Οῦ"] = "U", +  ["Οὐ"] = "U", +  ["Οὔ"] = "U", +  ["Οὒ"] = "U", +  ["Οὖ"] = "U", +  ["Οὑ"] = "U", +  ["Οὕ"] = "U", +  ["Οὓ"] = "U", +  ["Οὗ"] = "U", +  ["Ωυ"] = "Ōu", +  ["Ωύ"] = "Ōu", +  ["Ωὺ"] = "Ōu", +  ["Ωῦ"] = "Ōu", +  ["Ωὐ"] = "Ōu", +  ["Ωὔ"] = "Ōu", +  ["Ωὒ"] = "Ōu", +  ["Ωὖ"] = "Ōu", +  ["Ωὑ"] = "Ōu", +  ["Ωὕ"] = "Ōu", +  ["Ωὓ"] = "Ōu", +  ["Ωὗ"] = "Ōu", +} + +translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp + +-- The following will be used in an option that ensures transcription of +-- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) +translit.gr_nrule = { +  ["γγ"] = "ng", +  ["γκ"] = "nk", +  ["γξ"] = "nx", +  ["γχ"] = "nch", +} + +translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule + + +-------------------------------------- +-- Lowercase Greek Transliteration  -- +-------------------------------------- + +translit.gr_low = { +  ["α"] = "a", +  ["ά"] = "a", +  ["ὰ"] = "a", +  ["ᾶ"] = "a", +  ["ᾳ"] = "a", +  ["ἀ"] = "a", +  ["ἁ"] = "a", +  ["ἄ"] = "a", +  ["ἂ"] = "a", +  ["ἆ"] = "a", +  ["ἁ"] = "a", +  ["ἅ"] = "a", +  ["ἃ"] = "a", +  ["ἇ"] = "a", +  ["ᾁ"] = "a", +  ["ᾴ"] = "a", +  ["ᾲ"] = "a", +  ["ᾷ"] = "a", +  ["ᾄ"] = "a", +  ["ᾂ"] = "a", +  ["ᾅ"] = "a", +  ["ᾃ"] = "a", +  ["ᾆ"] = "a", +  ["ᾇ"] = "a", +  ["β"] = "b", +  ["γ"] = "g", +  ["δ"] = "d", +  ["ε"] = "e", +  ["έ"] = "e", +  ["ὲ"] = "e", +  ["ἐ"] = "e", +  ["ἔ"] = "e", +  ["ἒ"] = "e", +  ["ἑ"] = "e", +  ["ἕ"] = "e", +  ["ἓ"] = "e", +  ["ζ"] = "z", +  ["η"] = "ē", +  ["η"] = "ē", +  ["ή"] = "ē", +  ["ὴ"] = "ē", +  ["ῆ"] = "ē", +  ["ῃ"] = "ē", +  ["ἠ"] = "ē", +  ["ἤ"] = "ē", +  ["ἢ"] = "ē", +  ["ἦ"] = "ē", +  ["ᾐ"] = "ē", +  ["ἡ"] = "ē", +  ["ἥ"] = "ē", +  ["ἣ"] = "ē", +  ["ἧ"] = "ē", +  ["ᾑ"] = "ē", +  ["ῄ"] = "ē", +  ["ῂ"] = "ē", +  ["ῇ"] = "ē", +  ["ᾔ"] = "ē", +  ["ᾒ"] = "ē", +  ["ᾕ"] = "ē", +  ["ᾓ"] = "ē", +  ["ᾖ"] = "ē", +  ["ᾗ"] = "ē", +  ["θ"] = "th", +  ["ι"] = "i", +  ["ί"] = "i", +  ["ὶ"] = "i", +  ["ῖ"] = "i", +  ["ἰ"] = "i", +  ["ἴ"] = "i", +  ["ἲ"] = "i", +  ["ἶ"] = "i", +  ["ἱ"] = "i", +  ["ἵ"] = "i", +  ["ἳ"] = "i", +  ["ἷ"] = "i", +  ["ϊ"] = "i", +  ["ΐ"] = "i", +  ["ῒ"] = "i", +  ["ῗ"] = "i", +  ["κ"] = "k", +  ["λ"] = "l", +  ["μ"] = "m", +  ["ν"] = "n", +  ["ξ"] = "x", +  ["ο"] = "o", +  ["ό"] = "o", +  ["ὸ"] = "o", +  ["ὀ"] = "o", +  ["ὄ"] = "o", +  ["ὂ"] = "o", +  ["ὁ"] = "o", +  ["ὅ"] = "o", +  ["ὃ"] = "o", +  ["π"] = "p", +  ["ρ"] = "r", +  ["ῤ"] = "r", +  ["ῥ"] = "rh", +  ["σ"] = "s", +  ["ς"] = "s", +  ["τ"] = "t", +  ["υ"] = "y", +  ["ύ"] = "y", +  ["ὺ"] = "y", +  ["ῦ"] = "y", +  ["ὐ"] = "y", +  ["ὔ"] = "y", +  ["ὒ"] = "y", +  ["ὖ"] = "y", +  ["ὑ"] = "y", +  ["ὕ"] = "y", +  ["ὓ"] = "y", +  ["ὗ"] = "y", +  ["ϋ"] = "y", +  ["ΰ"] = "y", +  ["ῢ"] = "y", +  ["ῧ"] = "y", +  ["φ"] = "ph", +  ["χ"] = "ch", +  ["ψ"] = "ps", +  ["ω"] = "ō", +  ["ώ"] = "ō", +  ["ὼ"] = "ō", +  ["ῶ"] = "ō", +  ["ῳ"] = "ō", +  ["ὠ"] = "ō", +  ["ὤ"] = "ō", +  ["ὢ"] = "ō", +  ["ὦ"] = "ō", +  ["ᾠ"] = "ō", +  ["ὡ"] = "ō", +  ["ὥ"] = "ō", +  ["ὣ"] = "ō", +  ["ὧ"] = "ō", +  ["ᾡ"] = "ō", +  ["ῴ"] = "ō", +  ["ῲ"] = "ō", +  ["ῷ"] = "ō", +  ["ᾤ"] = "ō", +  ["ᾢ"] = "ō", +  ["ᾥ"] = "ō", +  ["ᾣ"] = "ō", +  ["ᾦ"] = "ō", +  ["ᾧ"] = "ō", +} + +translit.tables["Greek transliteration lowercase"] = translit.gr_low + +-------------------------------------- +-- Uppercase Greek Transliteration  -- +-------------------------------------- + +translit.gr_upp = { +  ["Α"] = "A", +  ["Ά"] = "A", +  ["Ὰ"] = "A", +--["ᾶ"] = "A", +  ["ᾼ"] = "A", +  ["Ἀ"] = "A", +  ["Ἁ"] = "A", +  ["Ἄ"] = "A", +  ["Ἂ"] = "A", +  ["Ἆ"] = "A", +  ["Ἁ"] = "A", +  ["Ἅ"] = "A", +  ["Ἃ"] = "A", +  ["Ἇ"] = "A", +  ["ᾉ"] = "A", +--["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me +--["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript +--["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. +  ["ᾌ"] = "A", +  ["ᾊ"] = "A", +  ["ᾍ"] = "A", +  ["ᾋ"] = "A", +  ["ᾎ"] = "A", +  ["ᾏ"] = "A", +  ["Β"] = "B", +  ["Γ"] = "G", +  ["Δ"] = "D", +  ["Ε"] = "E", +  ["Έ"] = "E", +  ["Ὲ"] = "E", +  ["Ἐ"] = "E", +  ["Ἔ"] = "E", +  ["Ἒ"] = "E", +  ["Ἑ"] = "E", +  ["Ἕ"] = "E", +  ["Ἓ"] = "E", +  ["Ζ"] = "Z", +  ["Η"] = "Ē", +  ["Η"] = "Ē", +  ["Ή"] = "Ē", +  ["Ὴ"] = "Ē", +--["ῆ"] = "Ē", +  ["ῌ"] = "Ē", +  ["Ἠ"] = "Ē", +  ["Ἤ"] = "Ē", +  ["Ἢ"] = "Ē", +  ["Ἦ"] = "Ē", +  ["ᾘ"] = "Ē", +  ["Ἡ"] = "Ē", +  ["Ἥ"] = "Ē", +  ["Ἣ"] = "Ē", +  ["Ἧ"] = "Ē", +  ["ᾙ"] = "Ē", +--["ῄ"] = "Ē", +--["ῂ"] = "Ē", +--["ῇ"] = "Ē", +  ["ᾜ"] = "Ē", +  ["ᾚ"] = "Ē", +  ["ᾝ"] = "Ē", +  ["ᾛ"] = "Ē", +  ["ᾞ"] = "Ē", +  ["ᾟ"] = "Ē", +  ["Θ"] = "Th", +  ["Ι"] = "I", +  ["Ί"] = "I", +  ["Ὶ"] = "I", +--["ῖ"] = "I", +  ["Ἰ"] = "I", +  ["Ἴ"] = "I", +  ["Ἲ"] = "I", +  ["Ἶ"] = "I", +  ["Ἱ"] = "I", +  ["Ἵ"] = "I", +  ["Ἳ"] = "I", +  ["Ἷ"] = "I", +  ["Ϊ"] = "I", +--["ΐ"] = "I", +--["ῒ"] = "I", +--["ῗ"] = "I", +  ["Κ"] = "K", +  ["Λ"] = "L", +  ["Μ"] = "M", +  ["Ν"] = "N", +  ["Ξ"] = "X", +  ["Ο"] = "O", +  ["Ό"] = "O", +  ["Ὸ"] = "O", +  ["Ὀ"] = "O", +  ["Ὄ"] = "O", +  ["Ὂ"] = "O", +  ["Ὁ"] = "O", +  ["Ὅ"] = "O", +  ["Ὃ"] = "O", +  ["Π"] = "P", +  ["Ρ"] = "R", +--["ῤ"] = "R", +  ["Ῥ"] = "Rh", +  ["Σ"] = "S", +  ["Σ"] = "S", +  ["Τ"] = "T", +  ["Υ"] = "Y", +  ["Ύ"] = "Y", +  ["Ὺ"] = "Y", +--["ῦ"] = "Y", +--["ὐ"] = "Y", +--["ὔ"] = "Y", +--["ὒ"] = "Y", +--["ὖ"] = "Y", +  ["Ὑ"] = "Y", +  ["Ὕ"] = "Y", +  ["Ὓ"] = "Y", +  ["Ὗ"] = "Y", +  ["Ϋ"] = "Y", +--["ΰ"] = "Y", +--["ῢ"] = "Y", +--["ῧ"] = "Y", +  ["Φ"] = "Ph", +  ["Χ"] = "Ch", +  ["Ψ"] = "Ps", +  ["Ω"] = "Ō", +  ["Ώ"] = "Ō", +  ["Ὼ"] = "Ō", +--["ῶ"] = "Ō", +  ["ῼ"] = "Ō", +  ["Ὠ"] = "Ō", +  ["Ὤ"] = "Ō", +  ["Ὢ"] = "Ō", +  ["Ὦ"] = "Ō", +  ["ᾨ"] = "Ō", +  ["Ὡ"] = "Ō", +  ["Ὥ"] = "Ō", +  ["Ὣ"] = "Ō", +  ["Ὧ"] = "Ō", +  ["ᾩ"] = "Ō", +--["ῴ"] = "Ō", +--["ῲ"] = "Ō", +--["ῷ"] = "Ō", +  ["ᾬ"] = "Ō", +  ["ᾪ"] = "Ō", +  ["ᾭ"] = "Ō", +  ["ᾫ"] = "Ō", +  ["ᾮ"] = "Ō", +  ["ᾯ"] = "Ō", +} + +translit.tables["Greek transliteration uppercase"] = translit.gr_upp + +------------ +-- Varia  -- +------------ + +translit.gr_other = { +  ["ϝ"] = "w", +  ["Ϝ"] = "W", +  ["ϙ"] = "q", +  ["Ϙ"] = "Q", +  ["ϡ"] = "ss", +  ["Ϡ"] = "Ss", +} + +translit.tables["Greek transliteration archaic characters"] = translit.gr_other + +--===========================================================================-- +--                              End Of Tables                                -- +--===========================================================================-- + +function translit.dogreek (mode, text) +    local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs + + +    -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html +    local utfchar = R("\000\127") +  +        R("\194\223") * R("\128\191") +  +        R("\224\240") * R("\128\191") * R("\128\191") +  +        R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + +    -- Add keys of a dictionary to a ruleset. +    function addrules (dict, rules) +        for i, _ in pairs(dict) do +            if rules == nil then rules = P(i) +            else rules = rules + P(i) +            end +        end +        return rules +    end + +    if mode == "gr" or mode == "gr_n" then + +        local gr_di_in, gr_in, gr_di, gr = {}, {}, {}, {} +        gr_di_in = translit.add_table( gr_di_in,    translit.gr_di_in_low,  translit.gr_di_in_upp   ) +        gr_in    = translit.add_table( gr_in,       translit.gr_in_low,     translit.gr_in_upp      ) +        gr_di    = translit.add_table( gr_di,       translit.gr_di_low,     translit.gr_di_upp      ) +        gr       = translit.add_table( gr,          translit.gr_low,        translit.gr_upp         , translit.gr_other) + +        if mode == "gr_n" then gr_di = translit.add_table( gr_di,           translit.gr_nrule       ) end + +        local p_di_in, p_in, p_di, p + +        p_di_in = addrules( gr_di_in,  p_di_in ) +        p_in    = addrules( gr_in,     p_in    ) +        p_di    = addrules( gr_di,     p_di    ) +        p       = addrules( gr,        p       ) + +        local init_diph = Cs(p_di_in            / gr_di_in  ) +        local init      = Cs(p_in               / gr_in     ) +        local diph      = Cs(p_di               / gr_di     ) +        local other     = Cs(p                  / gr        ) + +        local g = Cs((init_diph + init + diph + other + utfchar)^0) + +        text = g:match(text) +        return text +    end +end + diff --git a/tex/context/third/transliterator/trans_tables_iso9.lua b/tex/context/third/transliterator/trans_tables_iso9.lua new file mode 100644 index 0000000..f85ed35 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_iso9.lua @@ -0,0 +1,288 @@ + +--===========================================================================-- +--           ISO 9.1995(E) standardized transliteration for cyrillic         -- +--===========================================================================-- + +----------------------------------------- +-- Lowercase russian cyrillic alphabet -- +----------------------------------------- +translit.ru_low = { +  ["а"] = "a", -- U+0430 -> U+0061 +  ["б"] = "b", -- U+0431 -> U+0062 +  ["в"] = "v", -- U+0432 -> U+0076 +  ["г"] = "g", -- U+0433 -> U+0067 +  ["д"] = "d", -- U+0434 -> U+0064 +  ["е"] = "e", -- U+0435 -> U+0065 +  ["ё"] = "ë", -- U+0451 -> U+00eb +  ["ж"] = "ž", -- U+0436 -> U+017e +  ["з"] = "z", -- U+0437 -> U+007a +  ["и"] = "i", -- U+0438 -> U+0069 +  ["й"] = "j", -- U+0439 -> U+006a +  ["к"] = "k", -- U+043a -> U+006b +  ["л"] = "l", -- U+043b -> U+006c +  ["м"] = "m", -- U+043c -> U+006d +  ["н"] = "n", -- U+043d -> U+006e +  ["о"] = "o", -- U+043e -> U+006f +  ["п"] = "p", -- U+043f -> U+0070 +  ["р"] = "r", -- U+0440 -> U+0072 +  ["с"] = "s", -- U+0441 -> U+0073 +  ["т"] = "t", -- U+0442 -> U+0074 +  ["у"] = "u", -- U+0443 -> U+0075 +  ["ф"] = "f", -- U+0444 -> U+0066 +  ["х"] = "h", -- U+0445 -> U+0068 +  ["ц"] = "c", -- U+0446 -> U+0063 +  ["ч"] = "č", -- U+0447 -> U+010d +  ["ш"] = "š", -- U+0448 -> U+0161 +  ["щ"] = "ŝ", -- U+0449 -> U+015d +  ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is +  ["ы"] = "y", -- U+044b -> U+0079    used for uppercase, too. +  ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. +  ["э"] = "è", -- U+044d -> U+00e8 +  ["ю"] = "û", -- U+044e -> U+00fb +  ["я"] = "â"  -- U+044f -> U+00e2 +} + +translit.tables["russian lowercase ISO~9"] = translit.ru_low + +----------------------------------------- +-- Uppercase russian cyrillic alphabet -- +----------------------------------------- + +translit.ru_upp = { +  ["А"] = "A", -- U+0410 -> U+0041 +  ["Б"] = "B", -- U+0411 -> U+0042 +  ["В"] = "V", -- U+0412 -> U+0056 +  ["Г"] = "G", -- U+0413 -> U+0047 +  ["Д"] = "D", -- U+0414 -> U+0044 +  ["Е"] = "E", -- U+0415 -> U+0045 +  ["Ё"] = "Ë", -- U+0401 -> U+00cb +  ["Ж"] = "Ž", -- U+0416 -> U+017d +  ["З"] = "Z", -- U+0417 -> U+005a +  ["И"] = "I", -- U+0418 -> U+0049 +  ["Й"] = "J", -- U+0419 -> U+004a +  ["К"] = "K", -- U+041a -> U+004b +  ["Л"] = "L", -- U+041b -> U+004c +  ["М"] = "M", -- U+041c -> U+004d +  ["Н"] = "N", -- U+041d -> U+004e +  ["О"] = "O", -- U+041e -> U+004f +  ["П"] = "P", -- U+041f -> U+0050 +  ["Р"] = "R", -- U+0420 -> U+0052 +  ["С"] = "S", -- U+0421 -> U+0053 +  ["Т"] = "T", -- U+0422 -> U+0054 +  ["У"] = "U", -- U+0423 -> U+0055 +  ["Ф"] = "F", -- U+0424 -> U+0046 +  ["Х"] = "H", -- U+0425 -> U+0048 +  ["Ц"] = "C", -- U+0426 -> U+0043 +  ["Ч"] = "Č", -- U+0427 -> U+010c +  ["Ш"] = "Š", -- U+0428 -> U+0160 +  ["Щ"] = "Ŝ", -- U+0429 -> U+015c +  ["Ъ"] = "ʺ", -- U+042a -> U+02ba +  ["Ы"] = "Y", -- U+042b -> U+0059 +  ["Ь"] = "ʹ", -- U+042c -> U+02b9 +  ["Э"] = "È", -- U+042d -> U+00c8 +  ["Ю"] = "Û", -- U+042e -> U+00db +  ["Я"] = "Â"  -- U+042f -> U+00c2 +} + +translit.tables["russian uppercase ISO~9"] = translit.ru_upp + +---------------------------------------------------------- +-- Lowercase pre-1918 russian cyrillic additional chars -- +---------------------------------------------------------- +-- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 + +translit.ru_old_low = { +  ["ѣ"] = "ě", -- U+048d -> U+011b -- 2-byte +  ["і"] = "ì", -- U+0456 -> U+00ec -- 2-byte +  ["ѳ"] = "f", -- U+0473 -> U+0066 -- 2-byte +  ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 -- 3-byte +} + +translit.tables["russian pre-1918 lowercase ISO~9 2 byte"] = translit.ru_old_low + +translit.ru_old_upp = { +  ["Ѣ"] = "Ě", -- U+048c -> U+011a -- 2-byte +  ["І"] = "Ì", -- U+0406 -> U+00cc -- 2-byte +  ["Ѳ"] = "F", -- U+0424 -> U+0046 -- 2-byte +  ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 -- 3-byte +} + +translit.tables["russian pre-1918 uppercase ISO~9 2 byte"] = translit.ru_old_upp + +--------------------------------------------------------- +-- Lowercase characters from other cyrillic alphabets  -- +--------------------------------------------------------- + +translit.non_ru_low = { +  ["ӑ"] = "ă", -- U+04d1 -> U+0103 +  ["ӓ"] = "ä", -- U+04d3 -> U+00e4 +  ["ә"] = "a̋", -- u+04d9 -> U+0061+030b +  ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 +  ["ҕ"] = "ğ", -- U+0495 -> U+011f +  ["ғ"] = "ġ", -- U+0493 -> U+0121 +  ["ђ"] = "đ", -- U+0452 -> U+0111 +  ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 +  ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 +  ["є"] = "ê", -- U+0454 -> U+00ea +  ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 +  ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 +  ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 +  ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 +  ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 +  ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 +  ["ѕ"] = "ẑ", -- U+0455 -> U+1e91          -- Mapped to dz in old cyrillic non-ISO. +  ["ӡ"] = "ź", -- U+04e1 -> U+017a +  ["ӥ"] = "î", -- U+04e5 -> U+00ee +  ["і"] = "ì", -- U+0456 -> U+00ec +  ["ї"] = "ï", -- U+0457 -> U+00ef +  ["ј"] = "ǰ", -- U+0458 -> U+01f0 +  ["қ"] = "ķ", -- U+049b -> U+0137 +  ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 +  ["љ"] = "l̂", -- U+0459 -> U+006c+0302 +  ["њ"] = "n̂", -- U+045a -> U+006e+0302 +  ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 +  ["ң"] = "ṇ", -- U+04a3 -> U+1e47 +  ["ӧ"] = "ö", -- U+04e7 -> U+00f6 +  ["ө"] = "ô", -- U+04e9 -> U+00f4 +  ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 +  ["ҫ"] = "ç", -- U+04ab -> U+00e7 +  ["ҭ"] = "ţ", -- U+04ad -> U+0163 +  ["ћ"] = "ć", -- U+045b -> U+0107 +  ["ќ"] = "ḱ", -- U+045c -> U+1e31 +  ["у́"] = "ú", -- U+0443+ -> U+00fA +  ["ў"] = "ŭ", -- U+045e -> U+016d +  ["ӱ"] = "ü", -- U+04f1 -> U+00fc +  ["ӳ"] = "ű", -- U+04f3 -> U+0171 +  ["ү"] = "ù", -- U+04af -> U+00f9 +  ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 +  ["һ"] = "ḥ", -- U+04bb -> U+1e25 +  ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 +  ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 +  ["ҷ"] = "ç", -- U+04cc -> U+00e7 +  ["џ"] = "d̂", -- U+045f -> U+0064+0302 +  ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff +  ["ѣ"] = "ě", -- U+048d -> U+011b +  ["ѫ"] = "ǎ", -- U+046b -> U+01ce      -- Mapped to ǫ in non-ISO old cyrillic. +  ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. +  ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 +  ["ҩ"] = "ò", -- U+04a9 -> U+00f2 +  ["Ӏ"] = "‡"  -- U+04cf -> U+2021 +} + +translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low + +--------------------------------------------------------- +-- Uppercase characters from other cyrillic alphabets  -- +--------------------------------------------------------- + +translit.non_ru_upp = { +  ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 +  ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 +  ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b +  ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 +  ["Ҕ"] = "Ğ", -- U+0494 -> U+011e +  ["Ғ"] = "Ġ", -- U+0492 -> U+0120 +  ["Ђ"] = "Đ", -- U+0402 -> U+0110 +  ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 +  ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 +  ["Є"] = "Ê", -- U+0404 -> U+00ca +  ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 +  ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 +  ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 +  ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 +  ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 +  ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 +  ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 +  ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 +  ["Ӥ"] = "Î", -- U+04e4 -> U+00ce +  ["І"] = "Ì", -- U+0406 -> U+00cc +  ["Ї"] = "Ï", -- U+0407 -> U+00cf +  ["Ј"] = "J̌", -- U+0408 -> U+004a+030c +  ["Қ"] = "Ķ", -- U+049a -> U+0136 +  ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 +  ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 +  ["Њ"] = "N̂", -- U+040a -> U+004e+0302 +  ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 +  ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 +  ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 +  ["Ө"] = "Ô", -- U+04e8 -> U+00d4 +  ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 +  ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 +  ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 +  ["Ћ"] = "Ć", -- U+040b -> U+0106 +  ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 +  ["У́"] = "Ú", -- U+0423 -> U+00da +  ["Ў"] = "Ŭ", -- U+040e -> U+016c +  ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc +  ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 +  ["Ү"] = "Ù", -- U+04ae -> U+00d9 +  ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 +  ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 +  ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 +  ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 +  ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 +  ["Џ"] = "D̂", -- U+040f -> U+0044+0302 +  ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 +  ["Ѣ"] = "Ě", -- U+048c -> U+011a +  ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd +  ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 +  ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 +  ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 +  ["’"] = "‵", -- U+2035 -> U+2019 +  ["Ӏ"] = "‡"  -- U+04c0 -> U+2021 +} + +--===========================================================================-- +--                              End Of Tables                                -- +--===========================================================================-- + +translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp + +function translit.iso9 (mode, text) +    local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs +    local loc = lpeg.locale () + +    -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html +    local utfchar = R("\000\127") +  +        R("\194\223") * R("\128\191") +  +        R("\224\240") * R("\128\191") * R("\128\191") +  +        R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + +    -- Add keys of a dictionary to a ruleset. +    function addrules (dict, rules) +        for i, _ in pairs(dict) do +            if rules == nil then rules = P(i) +            else rules = rules + P(i) +            end +        end +        return rules +    end + +    local iso9 = {} +    iso9 = translit.add_table( iso9, +        translit.ru_upp, +        translit.ru_low +    ) +    if mode == "ru_old" or mode == "all" then +        iso9 = translit.add_table(iso9, +            translit.ru_old_upp, +            translit.ru_old_low +        ) +        if mode == "all" then +            iso9 = translit.add_table(iso9, +                translit.non_ru_upp, +                translit.non_ru_low +            ) +        end +    end + +    local p_iso9 +    p_iso9 = addrules (iso9, p_iso9) + +    local p_cyr = Cs(p_iso9) / iso9 + +    local iso9_parser = Cs((p_cyr + utfchar)^0) +    text = iso9_parser:match(text) + +    return text +end diff --git a/tex/context/third/transliterator/trans_tables_scntfc.lua b/tex/context/third/transliterator/trans_tables_scntfc.lua new file mode 100644 index 0000000..bf9a510 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_scntfc.lua @@ -0,0 +1,256 @@ + +--===========================================================================-- +--                      Other transliterations                               -- +--===========================================================================-- + + +-- The following are needed because ISO 9 does not cover old Slavonic +-- characters that became obsolete before the advent of гражданский шрифт. + +-- Please note that these mappings are not bijective so don't expect the result  +-- to be easily revertible (by machines). + +-- Source p. 77 of +-- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf + +----------------------------------------------------------------------- +-- Lowercase and uppercase letter Uk -- “scientific transliteration” -- +----------------------------------------------------------------------- + +translit.ocs_uk = { +  ["oу"] = "u", +  ["оу"] = "u", +  ["Оу"] = "U", +} +----------------------------------------------------------------------------- +-- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- +----------------------------------------------------------------------------- + +translit.ocs_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "v", +  ["г"] = "g", +  ["д"] = "d", +  ["є"] = "e", +  ["ж"] = "ž", +  ["ꙃ"] = "ʒ",      -- U+0292, alternative: dz U+01f3 +  ["ѕ"] = "ʒ", +  ["ꙁ"] = "z", +  ["з"] = "z", +  ["и"] = "i", +  ["і"] = "i", +  ["ї"] = "i", +  ["ћ"] = "g’", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["у"] = "u", +  ["ѹ"] = "u", +  ["ꙋ"] = "u", +  ["ф"] = "f", +  ["х"] = "x", +  ["ѡ"] = "o", --"ō", +  ["ѿ"] = "ot",     -- U+047f +  ["ѽ"] = "o!",     -- U+047d +  ["ꙍ"] = "o!",     -- U+064D +  ["ц"] = "c", +  ["ч"] = "č", +  ["ш"] = "š", +  ["щ"] = "št", +  ["ъ"] = "ъ", +  ["ы"] = "y", +  ["ꙑ"] = "y",      -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. +  ["ь"] = "ь", +  ["ѣ"] = "ě", +  ["ю"] = "ju", +  ["ꙗ"] = "ja", +  ["ѥ"] = "je", +  ["ѧ"] = "ę", +  ["ѩ"] = "ję", +  ["ѫ"] = "ǫ", +  ["ѭ"] = "jǫ", +  ["ѯ"] = "ks", +  ["ѱ"] = "ps", +  ["ѳ"] = "th", +  ["ѵ"] = "ü", +} + +translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low + +----------------------------------------------------------------------------- +-- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- +----------------------------------------------------------------------------- + +translit.ocs_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "V", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Є"] = "E", +  ["Ж"] = "Ž", +  ["Ꙃ"] = "Ʒ",      -- U+01b7, alternative: Dz U+01f2 +  ["Ѕ"] = "Ʒ", +  ["Ꙁ"] = "Z", +  ["З"] = "Z", +  ["И"] = "I", +  ["І"] = "I", +  ["Ї"] = "I", +  ["Ћ"] = "G’", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["У"] = "u", +  ["Ѹ"] = "U", +  ["ꙋ"] = "U", +  ["Ф"] = "F", +  ["Х"] = "X", +  ["Ѡ"] = "Ō", +  ["Ѿ"] = "Ot",     -- U+047c +  ["Ѽ"] = "O!",     -- U+047e +  ["Ꙍ"] = "O!",     -- U+064C +  ["Ц"] = "C", +  ["Ч"] = "Č", +  ["Ш"] = "Š", +  ["Щ"] = "Št", +  ["Ъ"] = "Ŭ", +  ["Ы"] = "Y", +  ["Ꙑ"] = "Y",  -- U+a650 +  ["Ь"] = "Ĭ", +  ["Ѣ"] = "Ě", +  ["Ю"] = "Ju", +  ["Ꙗ"] = "Ja", +  ["Ѥ"] = "Je", +  ["Ѧ"] = "Ę", +  ["Ѩ"] = "Ję", +  ["Ѫ"] = "Ǫ", +  ["Ѭ"] = "Jǫ", +  ["Ѯ"] = "Ks", +  ["Ѱ"] = "Ps", +  ["Ѳ"] = "Th", +  ["Ѵ"] = "Ü", +} + +translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp + +-- Note on the additional tables: these cover characters that are not defined +-- in ISO 9 but have a “scientific” transliteration.  You may use them as +-- complementary mapping to ISO 9, trading off homogenity for completeness. + +---------------------------------------------------------------------------------------- +-- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- +---------------------------------------------------------------------------------------- + +translit.ocs_add_low = { +  ["ѕ"] = "dz",         -- Mapped to ẑ in ISO 9 (Macedonian …) +  ["ѯ"] = "ks", +  ["ѱ"] = "ps", +  ["ѡ"] = "ô", +  ["ѿ"] = "ot",     -- U+047f +  ["ѫ"] = "ǫ",          -- Mapped to ǎ in ISO 9. +  ["ѧ"] = "ę", +  ["ѭ"] = "jǫ", +  ["ѩ"] = "ję", +  ["ѥ"] = "je", +  ["ѹ"] = "u",          -- Digraph uk. +  ["ꙋ"] = "u",          -- Monograph uk, U+a64b.  (No glyph yet in the "fixed" font in February 2010 …) +  ["ꙑ"] = "y",          -- U+a651 +} + +translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low + +---------------------------------------------------------------------------------------- +-- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- +---------------------------------------------------------------------------------------- + +translit.ocs_add_upp = { +  ["Ѕ"] = "Dz", +  ["Ѯ"] = "Ks", +  ["Ѱ"] = "Ps", +  ["Ѡ"] = "Ô", +  ["Ѿ"] = "ot", +  ["Ѫ"] = "Ǫ", +  ["Ѧ"] = "Ę", +  ["Ѭ"] = "Jǫ", +  ["Ѩ"] = "Ję", +  ["Ѥ"] = "Je", +  ["Ѹ"] = "U",          -- Digraph uk. +  ["Ꙋ"] = "U",          -- Monograph Uk, U+a64a. +  ["Ꙑ"] = "Y",  -- U+a650 +} + +translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp + +--===========================================================================-- +--                              End Of Tables                                -- +--===========================================================================-- + +function translit.scientific (mode, text) +    local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs +    local loc = lpeg.locale () + +    -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html +    local utfchar = R("\000\127") +  +        R("\194\223") * R("\128\191") +  +        R("\224\240") * R("\128\191") * R("\128\191") +  +        R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + +    local cyr = {} +    local cyruk, p_cyruk, p_cyr +    local scientific_parser +    if mode == ("iso9_ocs") then + +        dofile "trans_tables_iso9.lua" +        translit.add_table( cyr, +            translit.ru_upp, +            translit.ru_low, +            translit.ru_old_upp, +            translit.ru_old_low, +            translit.non_ru_upp, +            translit.non_ru_low, +            translit.ocs_add_low, +            translit.ocs_add_upp +        ) + +        p_cyr               = Cs(utfchar) / cyr +        scientific_parser   = Cs((p_cyr + utfchar)^0) + +    elseif mode == ("ocs") then + +        for i,_ in pairs(translit.ocs_uk) do  +            if cyruk == nil then cyruk = P(i)       -- is this The Right Way build  patterns from a table? +            else cyruk = cyruk + P(i) +            end +        end +        translit.add_table( cyr, translit.ocs_low, translit.ocs_upp ) + +        p_cyruk     = Cs(P(cyruk))  / translit.ocs_uk +        p_cyr       = Cs(utfchar)   / cyr + +        scientific_parser = Cs((p_cyruk + p_cyr + utfchar)^0) + +    elseif mode == ("ocs_gla") then +        dofile "trans_tables_glag.lua" +        translit.add_table( cyr, translit.ocs_gla_low, translit.ocs_gla_upp ) +        p_cyr = Cs(utfchar) / cyr +        scientific_parser = Cs((p_cyr + utfchar)^0) +    end + +    text = scientific_parser:match(text) + +    return text +end + diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua new file mode 100644 index 0000000..b3061c0 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_trsc.lua @@ -0,0 +1,704 @@ +--===========================================================================-- +--                      Legacy national transliterations                     -- +--===========================================================================-- +--------------------------------- +-- German simple transcription -- +--------------------------------- +-- Reference:   „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl., +--              Mannheim et. al. 1991. + +-------------------------------------------------------- +-- Lowercase German simple transcription---first pass -- +-------------------------------------------------------- + +translit.ru_trsc_low_first = { +  [" е"] = " je", +  ["ъе"] = "je", +  ["ье"] = "je", +  [" ё"] = " jo", +  ["ъё"] = "jo", +  ["ьё"] = "jo", +  ["жё"] = "scho", +  ["чё"] = "tscho", +  ["шё"] = "scho", +  ["щё"] = "schtscho", +  ["ье"] = "je", +  ["ьи"] = "ji", +  ["ьо"] = "jo", +  ["ий"] = "i", +  ["ый"] = "y", +  ["кс"] = "x" -- Extraordinarily stupid one. +} + +translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first + +-------------------------------------------------------- +-- Uppercase German simple transcription---first pass -- +-------------------------------------------------------- + +translit.ru_trsc_upp_first = { +  [" Е"] = " Je", +  ["Ъe"] = "Je",  -- Pedantic, isn't it? +  ["Ье"] = "Je", +  [" Ё"]  = "Jo", +  ["Ъё"] = "Jo", +  ["Ьё"] = "Jo", +  ["Жё"] = "Scho", +  ["Чё"] = "Tscho", +  ["Шё"] = "Scho", +  ["Щё"] = "Schtscho", +  ["Кс"] = "ks" +} + +translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first + +------------------------------------------- +-- Lowercase German simple transcription -- +------------------------------------------- + +translit.ru_trsc_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "w", +  ["г"] = "g", +  ["д"] = "d", +  ["е"] = "e", +  ["ё"] = "jo", +  ["ж"] = "sch", +  ["з"] = "s", +  ["и"] = "i", +  ["й"] = "i", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["у"] = "u", +  ["ф"] = "f", +  ["х"] = "ch", +  ["ц"] = "z", +  ["ч"] = "tsch", +  ["ш"] = "sch", +  ["щ"] = "schtsch", +  ["ъ"] = "", +  ["ы"] = "y", +  ["ь"] = "", +  ["э"] = "e", +  ["ю"] = "ju", +  ["я"] = "ja"  +} + +translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low + +------------------------------------------- +-- Uppercase German simple transcription -- +------------------------------------------- + +translit.ru_trsc_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "W", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Е"] = "E", +  ["Ё"] = "Jo", +  ["Ж"] = "Sch", +  ["З"] = "S", +  ["И"] = "I", +  ["Й"] = "J", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["У"] = "U", +  ["Ф"] = "F", +  ["Х"] = "Ch", +  ["Ц"] = "Z", +  ["Ч"] = "Tsch", +  ["Ш"] = "Sch", +  ["Щ"] = "Schtsch", +  ["Ъ"] = "", +  ["Ы"] = "Y", +  ["Ь"] = "", +  ["Э"] = "E", +  ["Ю"] = "Ju", +  ["Я"] = "Ja"  +} + +translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp + +translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} + +function translit.gen_rules_de() +    -- The following are more interesting than the previous tables because they +    -- implement various rules.  For instance the table +    -- \type{translit.ru_trsc_irule} holds a substitution dictionary for all +    -- possible combinations (including nonsense galore) of a vowel preceding an +    -- “й” (Russian short i) preceding a consonant; here we access the sets of +    -- Russian vowels as well consonants that were defined earlier. + +    -- The й-rule, VйC -> ViC +    translit.ru_trsc_irule = {} +    for _, vow in ipairs(translit.ru_vowels) do +    for _, cons in ipairs(translit.ru_consonants) do +        local new_ante = vow .. "й" .. cons +        local new_post = vow .. "i" .. cons +        translit.ru_trsc_irule[new_ante] = new_post +    end +    end + +    translit.tables["German transcription i-rule"] = translit.ru_trsc_irule + +    -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC +    translit.ru_trsc_jrule = {} +    for _, vow in ipairs(translit.ru_vowels) do +    local new_ante = "й" .. vow +    local new_post = "j" .. vow +    translit.ru_trsc_jrule[new_ante] = new_post +    end + +    for _, cons in ipairs(translit.ru_consonants) do +    for _, iy in ipairs(translit.ru_trsc_iy) do +        local new_ante = iy .. "й" .. cons +        local new_post = iy .. "j" .. cons +        translit.ru_trsc_jrule[new_ante] = new_post +    end +    end + +    translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule + +    -- The с-rule, VсV -> VssV +    translit.ru_trsc_srule = {} +    for i, vow_1 in ipairs(translit.ru_vowels) do +    for j, vow_2 in ipairs(translit.ru_vowels) do +    local new_ante = vow_1 .. "с" .. vow_2 +    local new_post = vow_1 .. "ss" .. vow_2 +        translit.ru_trsc_srule[new_ante] = new_post +    end +    end + +    translit.tables["German transcription s-rule"] = translit.ru_trsc_srule + +    -- The sharp-s-rule, Vсх -> Vßх +    translit.ru_trsc_sharpsrule = {} +    for i, vow in ipairs(translit.ru_vowels) do +    local new_ante = vow .. "сх" +    local new_post = vow .. "ßх" +    translit.ru_trsc_sharpsrule[new_ante] = new_post +    end + +    translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule + +    -- The е-rule, Vе -> Vje +    translit.ru_trsc_jerule = {} +    for i, vow in ipairs(translit.ru_vowels) do +    local new_ante = vow .. "е" +    local new_post = vow .. "je" +    translit.ru_trsc_jerule[new_ante] = new_post +    end + +    translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule + +    -- The ё-rule, Vё -> Vjo +    -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . +    -- Somebody should teach those DUDEN-guys parsimony. +    translit.ru_trsc_jorule = {} +    for i, vow in ipairs(translit.ru_vowels) do +    local new_ante = vow .. "ё" +    local new_post = vow .. "jo" +    translit.ru_trsc_jorule[new_ante] = new_post +    end + +    translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule + +end + +--------------------------------------------------------- +-- Lowercase English simple transcription---first pass -- +--------------------------------------------------------- + +translit.ru_trsc_en_low_first = { +  [" е"] = " ye", +  ["ъе"] = "ye", +  ["ье"] = "ye", +  ["ье"] = "ye", +  ["ьи"] = "yi", +} + +translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first + +--------------------------------------------------------- +-- Uppercase English simple transcription---first pass -- +--------------------------------------------------------- + +translit.ru_trsc_en_upp_first = { +  [" Е"] = " Ye", +  ["Ъe"] = "Ye", +  ["Ье"] = "Ye", +} + +translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first + +-------------------------------------------- +-- Lowercase English simple transcription -- +-------------------------------------------- + +translit.ru_trsc_en_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "v", +  ["г"] = "g", +  ["д"] = "d", +  ["е"] = "e", +  ["ё"] = "e", +  ["ж"] = "zh", +  ["з"] = "z", +  ["и"] = "i", +  ["й"] = "y", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["у"] = "u", +  ["ф"] = "f", +  ["х"] = "kh", +  ["ц"] = "ts", +  ["ч"] = "ch", +  ["ш"] = "sh", +  ["щ"] = "shsh", +  ["ъ"] = "", +  ["ы"] = "y", +  ["ь"] = "", +  ["э"] = "e", +  ["ю"] = "yu", +  ["я"] = "ya"  +} + +translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low + +-------------------------------------------- +-- Uppercase English simple transcription -- +-------------------------------------------- + +translit.ru_trsc_en_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "V", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Е"] = "E", +  ["Ё"] = "E", +  ["Ж"] = "Zh", +  ["З"] = "Z", +  ["И"] = "I", +  ["Й"] = "Y", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["У"] = "U", +  ["Ф"] = "F", +  ["Х"] = "Kh", +  ["Ц"] = "Ts", +  ["Ч"] = "Ch", +  ["Ш"] = "Sh", +  ["Щ"] = "Shsh", +  ["Ъ"] = "", +  ["Ы"] = "Y", +  ["Ь"] = "", +  ["Э"] = "E", +  ["Ю"] = "Yu", +  ["Я"] = "Ya"  +} + +translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp + +         +function translit.gen_rules_en () +    -- The english е-rule, Vе -> Vye +    translit.ru_trsc_en_jerule = {} +    for i, vow in ipairs(translit.ru_vowels) do +        local new_ante = vow .. "е" +        local new_post = vow .. "ye" +        translit.ru_trsc_en_jerule[new_ante] = new_post +    end + +    translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule +end + + +----------------------------------- +-- Lowercase Czech transcription -- +----------------------------------- + +translit.ru_trsc_cz_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "v", +  ["г"] = "g", +  ["д"] = "d", +  ["е"] = "e", +  ["ё"] = "ë", +  ["ж"] = "ž", +  ["з"] = "z", +  ["и"] = "i", +  ["й"] = "j", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["у"] = "u", +  ["ф"] = "f", +  ["х"] = "ch", +  ["ц"] = "c", +  ["ч"] = "č", +  ["ш"] = "š", +  ["щ"] = "šč", +  ["ъ"] = "ъ", +  ["ы"] = "y", +  ["ь"] = "ь", +  ["э"] = "è", +  ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but +  ["я"] = "ja"  -- that would complicate things a bit and linguists might not +}               -- agree. + +translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low + +----------------------------------- +-- Uppercase Czech transcription -- +----------------------------------- + +translit.ru_trsc_cz_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "V", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Е"] = "E", +  ["Ё"] = "Ë", +  ["Ж"] = "Ž", +  ["З"] = "Z", +  ["И"] = "I", +  ["Й"] = "J", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["У"] = "U", +  ["Ф"] = "F", +  ["Х"] = "Ch", +  ["Ц"] = "C", +  ["Ч"] = "Č", +  ["Ш"] = "Š", +  ["Щ"] = "Šč", +  ["Ъ"] = "Ъ", +  ["Ы"] = "Y", +  ["Ь"] = "Ь", +  ["Э"] = "È", +  ["Ю"] = "Ju", +  ["Я"] = "Ja"  +} + +translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp + +---------------------------------------------- +-- Lowercase Additional Czech Transcription -- +---------------------------------------------- + +translit.ru_trsc_cz_add_low = { +  ["ѕ"] = "dz", +  ["з"] = "z", +  ["ꙁ"] = "z", +  ["і"] = "ï", +  ["ѹ"] = "u", +  ["ѡ"] = "ō", +  ["ѣ"] = "ě", +  ["ѥ"] = "je", +  ["ѧ"] = "ę", +  ["ѩ"] = "ję", +  ["ѫ"] = "ǫ", +  ["ѭ"] = "jǫ", +  ["ѯ"] = "ks", +  ["ѱ"] = "ps", +  ["ѳ"] = "th", +  ["ѵ"] = "ÿ", +} + +translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low + + +---------------------------------------------- +-- Uppercase Additional Czech Transcription -- +---------------------------------------------- + +translit.ru_trsc_cz_add_upp = { +  ["Ѕ"] = "Dz", +  ["З"] = "Z", +  ["Ꙁ"] = "Z", +  ["І"] = "Ï", +  ["Ѹ"] = "U", +  ["Ѡ"] = "Ō", +  ["Ѣ"] = "Ě", +  ["Ѥ"] = "Je", +  ["Ѧ"] = "Ę", +  ["Ѩ"] = "Ję", +  ["Ѫ"] = "Ǫ", +  ["Ѭ"] = "Jǫ", +  ["Ѯ"] = "Ks", +  ["Ѱ"] = "Ps", +  ["Ѳ"] = "Th", +  ["Ѵ"] = "Ÿ", +} + +translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp + +--===========================================================================-- +--                              End Of Tables                                -- +--===========================================================================-- + +function translit.transcript (mode, text) +    local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs +    local loc = lpeg.locale () + +    -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html +    local utfchar = R("\000\127") +  +        R("\194\223") * R("\128\191") +  +        R("\224\240") * R("\128\191") * R("\128\191") +  +        R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + +    local trsc_parser, p_rules, capt + +    local p_de + +    function tab_subst (s, ...) +        local p_tmp, tmp = nil, {} +        for _,tab in ipairs(arg) do +            translit.add_table( tmp, tab) +        end +        p_tmp = addrules(tmp, p_tmp) +        local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0) +        return fp:match(s) +    end + +    -- Add keys of a dictionary to a ruleset. +    function addrules (dict, rules) +        for i, _ in pairs(dict) do +            if rules == nil then rules = P(i) +            else rules = rules + P(i) +            end +        end +        return rules +    end + +    -- The following is needed becaus lpeg.S doesn't work with utf. +    local vow, con, iy +    for _,v in ipairs (translit.ru_vowels) do +        if vow == nil then vow = P(v) +        else vow = vow + P(v) +        end +    end + +    for _,c in ipairs (translit.ru_consonants) do +        if con == nil then con = P(c) +        else con = con + P(c) +        end +    end + +    for _,i in ipairs (translit.ru_trsc_iy) do +        if iy == nil then iy = P(i) +        else iy = iy + P(i) +        end +    end + +    if mode == "ru_transcript_de_exp" then + +        local de_low_upp = {} +        de_low_upp = translit.add_table( de_low_upp, translit.ru_trsc_upp, translit.ru_trsc_low ) + +        local twochar +        local tworepl = {} + +        twochar = addrules( translit.ru_trsc_low_first, twochar ) +        twochar = addrules( translit.ru_trsc_upp_first, twochar ) + +        tworepl = translit.add_table( tworepl, translit.ru_trsc_low_first, translit.ru_trsc_upp_first ) + +        -- The й-rule, VйC -> ViC +        local function V_i_C (s) +            local ante = utf.sub(s, 1, 1) +            local post = utf.sub(s, 3, 3) +            return de_low_upp[ante] .. "i" .. de_low_upp[post] +        end + +        -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC +        local function iy_j_C (s) +            local ante = utf.sub(s, 1, 1) +            local post = utf.sub(s, 3, 3) +            return de_low_upp[ante] .. "j" .. de_low_upp[post] +        end + +        local function j_V (s) +            local post = utf.sub(s, 2, 2) +            return "j" .. de_low_upp[post] +        end + +        -- The с-rule, VсV -> VssV +        local function V_ss_V (s) +            local ante = utf.sub(s, 1, 1) +            local post = utf.sub(s, 3, 3) +            return de_low_upp[ante] .. "ss" .. de_low_upp[post] +        end + +        -- The sharp-s-rule, Vсх -> Vßх +        local function V_sz_ch (s) +            local ante = utf.sub(s, 1, 1) +            return de_low_upp[ante] .. "ßch"  +        end + +        -- The е-rule, Vе -> Vje +        local function V_je (s) +            local ante = utf.sub(s, 1, 1) +            return de_low_upp[ante] .. "je"  +        end + +        -- Reapplying V_je on its result + next char would make the following +        -- two rules obsolete. +        local function V_jeje (s) +            local ante = utf.sub(s, 1, 1) +            return de_low_upp[ante] .. "jeje"  +        end + +        local function V___je (s) +            local ante = utf.sub(s, 1, 1) +            return de_low_upp[ante] .. "jeje"  +        end + +        -- The ё-rule, Vё -> Vjo +        -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . +        -- Somebody should teach those DUDEN guys parsimony. +        local function V_jo (s) +            local ante = utf.sub(s, 1, 1) +            return de_low_upp[ante] .. "jo"  +        end + +        local iyrule    = Cs((iy * "й" * con)   / iy_j_C) +        local jrule     = Cs(("й" * vow)        / j_V) +        local irule     = Cs((vow * "й" * con)  / V_i_C) + +        local ssrule    = Cs((vow * "с" * vow)  / V_ss_V) +        local szrule    = Cs((vow * "сх")       / V_sz_ch) + +        --local _jrule    = Cs((vow * "ее")       / V___je) +        local jjrule    = Cs((vow * "ее")       / V_jeje) +        local jerule    = Cs((vow * "е")        / V_je) +        local jorule    = Cs((vow * "ё")        / V_jo) + +        local dvoje     = Cs(twochar            / tworepl) +        local other     = Cs((utfchar)          / de_low_upp) + +        local izhe      = iyrule + jrule + irule +        local slovo     = ssrule + szrule +        local jest      = jjrule + jerule + jorule + +        local g = Cs((izhe + slovo + jest + dvoje + other + utfchar)^0) + +        text = g:match(text) +        return text + +    elseif mode == "ru_transcript_de" then + +        translit.gen_rules_de() + +        -- This is possibly slower than using string:gsub. + +        text = tab_subst(text, translit.ru_trsc_jrule) +        text = tab_subst(text, translit.ru_trsc_irule) +        text = tab_subst(text, translit.ru_trsc_jerule) +        text = tab_subst(text, translit.ru_trsc_srule) +        text = tab_subst(text, translit.ru_trsc_sharpsrule) +        text = tab_subst(text, translit.ru_trsc_jorule) +        text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first) +        text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low) + +        return text + +    elseif mode == "ru_transcript_en_exp" then + +        local en_low_upp = {} +        en_low_upp = translit.add_table(en_low_upp, translit.ru_trsc_en_low, translit.ru_trsc_en_upp) + +        local twochar +        local tworepl = {} + +        twochar = addrules( translit.ru_trsc_en_low_first, twochar) +        twochar = addrules( translit.ru_trsc_en_upp_first, twochar) + +        tworepl = translit.add_table(tworepl, translit.ru_trsc_en_low_first, translit.ru_trsc_en_upp_first)  + +        -- The е-rule, Vе -> Vye +        local function V_je (s) +            local ante = utf.sub(s, 1, 1) +            return en_low_upp[ante] .. "ye"  +        end + +        local jerule    = Cs((vow * "е")        / V_je) + +        local dvoje     = Cs(twochar            / tworepl) +        local other     = Cs((utfchar)          / en_low_upp) + +        local g = Cs((dvoje + jerule + other + utfchar)^0) + +        text = g:match(text) + +        return text + +    elseif mode == "ru_transcript_en" then  + +        translit.gen_rules_en() + +        text = tab_subst(text, translit.ru_trsc_en_jerule) +        text = tab_subst(text, translit.ru_trsc_en_low_first,   translit.ru_trsc_en_upp_first) +        text = tab_subst(text, translit.ru_trsc_en_low,         translit.ru_trsc_en_upp) + +        return text + +    elseif mode == "ru_cz" or mode ==  "ocs_cz" then  +        text = tab_subst(text, translit.ru_trsc_cz_low, translit.ru_trsc_cz_upp) +        if mode == "ocs_cz" then +            text = tab_subst(text, translit.ru_trsc_cz_add_low, translit.ru_trsc_cz_add_upp) +        end +         +        return text +    end + +end +  | 
