diff options
author | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-03-10 09:32:20 +0100 |
---|---|---|
committer | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-03-10 09:32:20 +0100 |
commit | 53e1c8df1422d0e8b75dca725ec8a141698810ae (patch) | |
tree | ff2d0bb36aa7bb760abfb9f259abae3e58c88ae5 /tex/context | |
parent | ee0e10b17847045b623b0fd69d41a7a948616e71 (diff) | |
download | transliterator-53e1c8df1422d0e8b75dca725ec8a141698810ae.tar.gz |
Now using LPeg, tables and functions in different files.
Diffstat (limited to 'tex/context')
-rw-r--r-- | tex/context/interface/third/t-transliterator.xml | 3 | ||||
-rw-r--r-- | tex/context/third/transliterator/t-transliterator.mkiv | 1922 | ||||
-rw-r--r-- | tex/context/third/transliterator/trans_tables_glag.lua | 122 | ||||
-rw-r--r-- | tex/context/third/transliterator/trans_tables_gr.lua | 693 | ||||
-rw-r--r-- | tex/context/third/transliterator/trans_tables_iso9.lua | 288 | ||||
-rw-r--r-- | tex/context/third/transliterator/trans_tables_scntfc.lua | 256 | ||||
-rw-r--r-- | tex/context/third/transliterator/trans_tables_trsc.lua | 704 |
7 files changed, 2148 insertions, 1840 deletions
diff --git a/tex/context/interface/third/t-transliterator.xml b/tex/context/interface/third/t-transliterator.xml index 2217e12..b5b189f 100644 --- a/tex/context/interface/third/t-transliterator.xml +++ b/tex/context/interface/third/t-transliterator.xml @@ -2,7 +2,7 @@ <cd:interface xmlns:cd="http://www.pragma-ade.com/commands" name="context" language="en" - version="2010.02.27"> + version="2010.3.10"> <cd:command name="setuptransliterator" file="t-transliterator.mkiv"> <cd:sequence> @@ -14,6 +14,7 @@ <cd:constant type="ru_old" default="yes"/> <cd:constant type="ru"/> <cd:constant type="ru_transcript_de"/> + <cd:constant type="ru_transcript_de_exp"/> <cd:constant type="ru_transcript_en"/> <cd:constant type="all"/> <cd:constant type="iso9_ocs"/> diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv index deca291..474af75 100644 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -1,7 +1,6 @@ -%D \enableregime[utf] -%D \module +%D \module %D [ file=t-transliterator, -%D version=2010.03.07, +%D version=2010.03.10, %D title=\CONTEXT\ User Module, %D subtitle=The Transliterator, %D author=Philipp Gesang, @@ -10,8 +9,8 @@ %D license=2-clause BSD, %D email={pgesang at ix dot urz dot uni-heidelberg dot de}] %D This module is licensed under the conditions of the BSD license with -%D two clauses: http://www.freebsd.org/copyright/freebsd-license.html. -%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/. +%D two clauses, there is a copy it in a file named "COPYING" in the +%D transliterator source tree. \writestatus{loading}{Transliteration from non-Latin scripts} @@ -82,1705 +81,14 @@ translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", %D characters if needed; by the way those are included in the default %D transliteration mode \type{ru_old}. -%-===========================================================================-- -%- ISO 9.1995(E) standardized transliteration for cyrillic -- -%-===========================================================================-- - -\startluacode ------------------------------------------ --- Lowercase russian cyrillic alphabet -- ------------------------------------------ -translit.ru_low = { - ["а"] = "a", -- U+0430 -> U+0061 - ["б"] = "b", -- U+0431 -> U+0062 - ["в"] = "v", -- U+0432 -> U+0076 - ["г"] = "g", -- U+0433 -> U+0067 - ["д"] = "d", -- U+0434 -> U+0064 - ["е"] = "e", -- U+0435 -> U+0065 - ["ё"] = "ë", -- U+0451 -> U+00eb - ["ж"] = "ž", -- U+0436 -> U+017e - ["з"] = "z", -- U+0437 -> U+007a - ["и"] = "i", -- U+0438 -> U+0069 - ["й"] = "j", -- U+0439 -> U+006a - ["к"] = "k", -- U+043a -> U+006b - ["л"] = "l", -- U+043b -> U+006c - ["м"] = "m", -- U+043c -> U+006d - ["н"] = "n", -- U+043d -> U+006e - ["о"] = "o", -- U+043e -> U+006f - ["п"] = "p", -- U+043f -> U+0070 - ["р"] = "r", -- U+0440 -> U+0072 - ["с"] = "s", -- U+0441 -> U+0073 - ["т"] = "t", -- U+0442 -> U+0074 - ["у"] = "u", -- U+0443 -> U+0075 - ["ф"] = "f", -- U+0444 -> U+0066 - ["х"] = "h", -- U+0445 -> U+0068 - ["ц"] = "c", -- U+0446 -> U+0063 - ["ч"] = "č", -- U+0447 -> U+010d - ["ш"] = "š", -- U+0448 -> U+0161 - ["щ"] = "ŝ", -- U+0449 -> U+015d - ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is - ["ы"] = "y", -- U+044b -> U+0079 used for uppercase, too. - ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. - ["э"] = "è", -- U+044d -> U+00e8 - ["ю"] = "û", -- U+044e -> U+00fb - ["я"] = "â" -- U+044f -> U+00e2 -} - -translit.tables["russian lowercase ISO~9"] = translit.ru_low - ------------------------------------------ --- Uppercase russian cyrillic alphabet -- ------------------------------------------ - -translit.ru_upp = { - ["А"] = "A", -- U+0410 -> U+0041 - ["Б"] = "B", -- U+0411 -> U+0042 - ["В"] = "V", -- U+0412 -> U+0056 - ["Г"] = "G", -- U+0413 -> U+0047 - ["Д"] = "D", -- U+0414 -> U+0044 - ["Е"] = "E", -- U+0415 -> U+0045 - ["Ё"] = "Ë", -- U+0401 -> U+00cb - ["Ж"] = "Ž", -- U+0416 -> U+017d - ["З"] = "Z", -- U+0417 -> U+005a - ["И"] = "I", -- U+0418 -> U+0049 - ["Й"] = "J", -- U+0419 -> U+004a - ["К"] = "K", -- U+041a -> U+004b - ["Л"] = "L", -- U+041b -> U+004c - ["М"] = "M", -- U+041c -> U+004d - ["Н"] = "N", -- U+041d -> U+004e - ["О"] = "O", -- U+041e -> U+004f - ["П"] = "P", -- U+041f -> U+0050 - ["Р"] = "R", -- U+0420 -> U+0052 - ["С"] = "S", -- U+0421 -> U+0053 - ["Т"] = "T", -- U+0422 -> U+0054 - ["У"] = "U", -- U+0423 -> U+0055 - ["Ф"] = "F", -- U+0424 -> U+0046 - ["Х"] = "H", -- U+0425 -> U+0048 - ["Ц"] = "C", -- U+0426 -> U+0043 - ["Ч"] = "Č", -- U+0427 -> U+010c - ["Ш"] = "Š", -- U+0428 -> U+0160 - ["Щ"] = "Ŝ", -- U+0429 -> U+015c - ["Ъ"] = "ʺ", -- U+042a -> U+02ba - ["Ы"] = "Y", -- U+042b -> U+0059 - ["Ь"] = "ʹ", -- U+042c -> U+02b9 - ["Э"] = "È", -- U+042d -> U+00c8 - ["Ю"] = "Û", -- U+042e -> U+00db - ["Я"] = "Â" -- U+042f -> U+00c2 -} - -translit.tables["russian uppercase ISO~9"] = translit.ru_upp - ----------------------------------------------------------- --- Lowercase pre-1918 russian cyrillic additional chars -- ----------------------------------------------------------- --- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 - -translit.ru_old_low = { - ["ѣ"] = "ě", -- U+048d -> U+011b - ["і"] = "ì", -- U+0456 -> U+00ec - ["ѳ"] = "f", -- U+0473 -> U+0066 - ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 -} - -translit.tables["russian pre-1918 lowercase ISO~9"] = translit.ru_low - -translit.ru_old_upp = { - ["Ѣ"] = "Ě", -- U+048c -> U+011a - ["І"] = "Ì", -- U+0406 -> U+00cc - ["Ѳ"] = "F", -- U+0424 -> U+0046 - ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 -} - -translit.tables["russian pre-1918 uppercase ISO~9"] = translit.ru_upp - ---------------------------------------------------------- --- Lowercase characters from other cyrillic alphabets -- ---------------------------------------------------------- - -translit.non_ru_low = { - ["ӑ"] = "ă", -- U+04d1 -> U+0103 - ["ӓ"] = "ä", -- U+04d3 -> U+00e4 - ["ә"] = "a̋", -- u+04d9 -> U+0061+030b - ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 - ["ҕ"] = "ğ", -- U+0495 -> U+011f - ["ғ"] = "ġ", -- U+0493 -> U+0121 - ["ђ"] = "đ", -- U+0452 -> U+0111 - ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 - ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 - ["є"] = "ê", -- U+0454 -> U+00ea - ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 - ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 - ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 - ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 - ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 - ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 - ["ѕ"] = "ẑ", -- U+0455 -> U+1e91 -- Mapped to dz in old cyrillic non-ISO. - ["ӡ"] = "ź", -- U+04e1 -> U+017a - ["ӥ"] = "î", -- U+04e5 -> U+00ee - ["і"] = "ì", -- U+0456 -> U+00ec - ["ї"] = "ï", -- U+0457 -> U+00ef - ["ј"] = "ǰ", -- U+0458 -> U+01f0 - ["қ"] = "ķ", -- U+049b -> U+0137 - ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 - ["љ"] = "l̂", -- U+0459 -> U+006c+0302 - ["њ"] = "n̂", -- U+045a -> U+006e+0302 - ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 - ["ң"] = "ṇ", -- U+04a3 -> U+1e47 - ["ӧ"] = "ö", -- U+04e7 -> U+00f6 - ["ө"] = "ô", -- U+04e9 -> U+00f4 - ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 - ["ҫ"] = "ç", -- U+04ab -> U+00e7 - ["ҭ"] = "ţ", -- U+04ad -> U+0163 - ["ћ"] = "ć", -- U+045b -> U+0107 - ["ќ"] = "ḱ", -- U+045c -> U+1e31 - ["у́"] = "ú", -- U+0443+ -> U+00fA - ["ў"] = "ŭ", -- U+045e -> U+016d - ["ӱ"] = "ü", -- U+04f1 -> U+00fc - ["ӳ"] = "ű", -- U+04f3 -> U+0171 - ["ү"] = "ù", -- U+04af -> U+00f9 - ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 - ["һ"] = "ḥ", -- U+04bb -> U+1e25 - ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 - ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 - ["ҷ"] = "ç", -- U+04cc -> U+00e7 - ["џ"] = "d̂", -- U+045f -> U+0064+0302 - ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff - ["ѣ"] = "ě", -- U+048d -> U+011b - ["ѫ"] = "ǎ", -- U+046b -> U+01ce -- Mapped to ǫ in non-ISO old cyrillic. - ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. - ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 - ["ҩ"] = "ò", -- U+04a9 -> U+00f2 - ["Ӏ"] = "‡" -- U+04cf -> U+2021 -} - -translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low - ---------------------------------------------------------- --- Uppercase characters from other cyrillic alphabets -- ---------------------------------------------------------- - -translit.non_ru_upp = { - ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 - ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 - ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b - ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 - ["Ҕ"] = "Ğ", -- U+0494 -> U+011e - ["Ғ"] = "Ġ", -- U+0492 -> U+0120 - ["Ђ"] = "Đ", -- U+0402 -> U+0110 - ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 - ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 - ["Є"] = "Ê", -- U+0404 -> U+00ca - ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 - ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 - ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 - ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 - ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 - ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 - ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 - ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 - ["Ӥ"] = "Î", -- U+04e4 -> U+00ce - ["І"] = "Ì", -- U+0406 -> U+00cc - ["Ї"] = "Ï", -- U+0407 -> U+00cf - ["Ј"] = "J̌", -- U+0408 -> U+004a+030c - ["Қ"] = "Ķ", -- U+049a -> U+0136 - ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 - ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 - ["Њ"] = "N̂", -- U+040a -> U+004e+0302 - ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 - ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 - ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 - ["Ө"] = "Ô", -- U+04e8 -> U+00d4 - ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 - ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 - ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 - ["Ћ"] = "Ć", -- U+040b -> U+0106 - ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 - ["У́"] = "Ú", -- U+0423 -> U+00da - ["Ў"] = "Ŭ", -- U+040e -> U+016c - ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc - ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 - ["Ү"] = "Ù", -- U+04ae -> U+00d9 - ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 - ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 - ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 - ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 - ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 - ["Џ"] = "D̂", -- U+040f -> U+0044+0302 - ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 - ["Ѣ"] = "Ě", -- U+048c -> U+011a - ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd - ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 - ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 - ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 - ["’"] = "‵", -- U+2035 -> U+2019 - ["Ӏ"] = "‡" -- U+04c0 -> U+2021 -} - -translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp - -\stopluacode - -\startluacode - ---===========================================================================-- --- Legacy national transliterations -- ---===========================================================================-- --- Note: --- Use these only as a last resort. ‘Vulgar’ transcription is ugly and --- chauvinistic. - ---------------------------------- --- German simple transcription -- ---------------------------------- --- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl., --- Mannheim et. al. 1991. - --------------------------------------------------------- --- Lowercase German simple transcription---first pass -- --------------------------------------------------------- - -translit.ru_trsc_low_first = { - [" е"] = " je", - ["ъе"] = "je", - ["ье"] = "je", - [" ё"] = " jo", - ["ъё"] = "jo", - ["ьё"] = "jo", - ["жё"] = "scho", - ["чё"] = "tscho", - ["шё"] = "scho", - ["щё"] = "schtscho", - ["ье"] = "je", - ["ьи"] = "ji", - ["ьо"] = "jo", - ["ий"] = "i", - ["ый"] = "y", - ["кс"] = "x" -- Extraordinarily stupid one. -} - -translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first - --------------------------------------------------------- --- Uppercase German simple transcription---first pass -- --------------------------------------------------------- - -translit.ru_trsc_upp_first = { - [" Е"] = " Je", - ["Ъe"] = "Je", -- Pedantic, isn't it? - ["Ье"] = "Je", - [" Ё"] = "Jo", - ["Ъё"] = "Jo", - ["Ьё"] = "Jo", - ["Жё"] = "Scho", - ["Чё"] = "Tscho", - ["Шё"] = "Scho", - ["Щё"] = "Schtscho", - ["Кс"] = "ks" -} - -translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first - -------------------------------------------- --- Lowercase German simple transcription -- -------------------------------------------- - -translit.ru_trsc_low = { - ["а"] = "a", - ["б"] = "b", - ["в"] = "w", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ё"] = "jo", - ["ж"] = "sch", - ["з"] = "s", - ["и"] = "i", - ["й"] = "i", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "ch", - ["ц"] = "z", - ["ч"] = "tsch", - ["ш"] = "sch", - ["щ"] = "schtsch", - ["ъ"] = "", - ["ы"] = "y", - ["ь"] = "", - ["э"] = "e", - ["ю"] = "ju", - ["я"] = "ja" -} - -translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low - -------------------------------------------- --- Uppercase German simple transcription -- -------------------------------------------- - -translit.ru_trsc_upp = { - ["А"] = "A", - ["Б"] = "B", - ["В"] = "W", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "Jo", - ["Ж"] = "Sch", - ["З"] = "S", - ["И"] = "I", - ["Й"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Ch", - ["Ц"] = "Z", - ["Ч"] = "Tsch", - ["Ш"] = "Sch", - ["Щ"] = "Schtsch", - ["Ъ"] = "", - ["Ы"] = "Y", - ["Ь"] = "", - ["Э"] = "E", - ["Ю"] = "Ju", - ["Я"] = "Ja" -} - -translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp - -\stopluacode - -%D The following are more interesting than the previous tables because they -%D implement various rules. For instance the table -%D \type{translit.ru_trsc_irule} holds a substitution dictionary for all -%D possible combinations (including nonsense galore) of a vowel preceding an -%D “й” (Russian short i) preceding a consonant; here we access the sets of -%D Russian vowels as well consonants that were defined earlier. - -\startluacode --- The й-rule, VйC -> ViC -translit.ru_trsc_irule = {} -for i, vow in ipairs(translit.ru_vowels) do - for j, cons in ipairs(translit.ru_consonants) do - local new_ante = vow .. "й" .. cons - local new_post = vow .. "i" .. cons - translit.ru_trsc_irule[new_ante] = new_post - end -end - -translit.tables["German transcription i-rule"] = translit.ru_trsc_irule - --- The second й-rule, йV -> jV && [иы]йC -> [иы]jC -translit.ru_trsc_jrule = {} -for i, vow in ipairs(translit.ru_vowels) do - local new_ante = "й" .. vow - local new_post = "j" .. vow - translit.ru_trsc_jrule[new_ante] = new_post -end - -translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} -for i, cons in ipairs(translit.ru_consonants) do - for j, iy in ipairs(translit.ru_trsc_iy) do - local new_ante = iy .. "й" .. cons - local new_post = iy .. "j" .. cons - translit.ru_trsc_jrule[new_ante] = new_post - end -end - -translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule - --- The с-rule, VсV -> VssV -translit.ru_trsc_srule = {} -for i, vow_1 in ipairs(translit.ru_vowels) do -for j, vow_2 in ipairs(translit.ru_vowels) do - local new_ante = vow_1 .. "с" .. vow_2 - local new_post = vow_1 .. "ss" .. vow_2 - translit.ru_trsc_srule[new_ante] = new_post - end -end - -translit.tables["German transcription s-rule"] = translit.ru_trsc_srule - --- The sharp-s-rule, Vсх -> Vßх -translit.ru_trsc_sharpsrule = {} -for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "сх" - local new_post = vow .. "ßх" - translit.ru_trsc_sharpsrule[new_ante] = new_post -end - -translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule - --- The е-rule, Vе -> Vje -translit.ru_trsc_jerule = {} -for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "е" - local new_post = vow .. "je" - translit.ru_trsc_jerule[new_ante] = new_post -end - -translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule - --- The ё-rule, Vё -> Vjo --- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . --- Somebody should teach those DUDEN-guys parsimony. -translit.ru_trsc_jorule = {} -for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "ё" - local new_post = vow .. "jo" - translit.ru_trsc_jorule[new_ante] = new_post -end - -translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule - -\stopluacode - -\startluacode - ---------------------------------------------------------- --- Lowercase English simple transcription---first pass -- ---------------------------------------------------------- - -translit.ru_trsc_en_low_first = { - [" е"] = " ye", - ["ъе"] = "ye", - ["ье"] = "ye", - ["ье"] = "ye", - ["ьи"] = "yi", -} - -translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first - ---------------------------------------------------------- --- Uppercase English simple transcription---first pass -- ---------------------------------------------------------- - -translit.ru_trsc_en_upp_first = { - [" Е"] = " Ye", - ["Ъe"] = "Ye", - ["Ье"] = "Ye", -} - -translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first - --------------------------------------------- --- Lowercase English simple transcription -- --------------------------------------------- - -translit.ru_trsc_en_low = { - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ё"] = "e", - ["ж"] = "zh", - ["з"] = "z", - ["и"] = "i", - ["й"] = "y", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "kh", - ["ц"] = "ts", - ["ч"] = "ch", - ["ш"] = "sh", - ["щ"] = "shsh", - ["ъ"] = "", - ["ы"] = "y", - ["ь"] = "", - ["э"] = "e", - ["ю"] = "yu", - ["я"] = "ya" -} - -translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low - --------------------------------------------- --- Uppercase English simple transcription -- --------------------------------------------- - -translit.ru_trsc_en_upp = { - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "E", - ["Ж"] = "Zh", - ["З"] = "Z", - ["И"] = "I", - ["Й"] = "Y", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Kh", - ["Ц"] = "Ts", - ["Ч"] = "Ch", - ["Ш"] = "Sh", - ["Щ"] = "Shsh", - ["Ъ"] = "", - ["Ы"] = "Y", - ["Ь"] = "", - ["Э"] = "E", - ["Ю"] = "Yu", - ["Я"] = "Ya" -} - -translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp - --- The english е-rule, Vе -> Vye -translit.ru_trsc_en_jerule = {} -for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "е" - local new_post = vow .. "ye" - translit.ru_trsc_en_jerule[new_ante] = new_post -end - -translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule - -\stopluacode - -\startluacode - ------------------------------------ --- Lowercase Czech transcription -- ------------------------------------ - -translit.ru_trsc_cz_low = { - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ё"] = "ë", - ["ж"] = "ž", - ["з"] = "z", - ["и"] = "i", - ["й"] = "j", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "ch", - ["ц"] = "c", - ["ч"] = "č", - ["ш"] = "š", - ["щ"] = "šč", - ["ъ"] = "ъ", - ["ы"] = "y", - ["ь"] = "ь", - ["э"] = "è", - ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but - ["я"] = "ja" -- that would complicate things a bit and linguists might not -} -- agree. - -translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low - ------------------------------------ --- Uppercase Czech transcription -- ------------------------------------ - -translit.ru_trsc_cz_upp = { - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "Ë", - ["Ж"] = "Ž", - ["З"] = "Z", - ["И"] = "I", - ["Й"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Ch", - ["Ц"] = "C", - ["Ч"] = "Č", - ["Ш"] = "Š", - ["Щ"] = "Šč", - ["Ъ"] = "Ъ", - ["Ы"] = "Y", - ["Ь"] = "Ь", - ["Э"] = "È", - ["Ю"] = "Ju", - ["Я"] = "Ja" -} - -translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp - ----------------------------------------------- --- Lowercase Additional Czech Transcription -- ----------------------------------------------- - -translit.ru_trsc_cz_add_low = { - ["ѕ"] = "dz", - ["з"] = "z", - ["ꙁ"] = "z", - ["і"] = "ï", - ["ѹ"] = "u", - ["ѡ"] = "ō", - ["ѣ"] = "ě", - ["ѥ"] = "je", - ["ѧ"] = "ę", - ["ѩ"] = "ję", - ["ѫ"] = "ǫ", - ["ѭ"] = "jǫ", - ["ѯ"] = "ks", - ["ѱ"] = "ps", - ["ѳ"] = "th", - ["ѵ"] = "ÿ", -} - -translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low - - ----------------------------------------------- --- Uppercase Additional Czech Transcription -- ----------------------------------------------- - -translit.ru_trsc_cz_add_upp = { - ["Ѕ"] = "Dz", - ["З"] = "Z", - ["Ꙁ"] = "Z", - ["І"] = "Ï", - ["Ѹ"] = "U", - ["Ѡ"] = "Ō", - ["Ѣ"] = "Ě", - ["Ѥ"] = "Je", - ["Ѧ"] = "Ę", - ["Ѩ"] = "Ję", - ["Ѫ"] = "Ǫ", - ["Ѭ"] = "Jǫ", - ["Ѯ"] = "Ks", - ["Ѱ"] = "Ps", - ["Ѳ"] = "Th", - ["Ѵ"] = "Ÿ", -} - -translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp - -\stopluacode - -%-===========================================================================-- -%- Other transliterations -- -%-===========================================================================-- - -\startluacode - --- The following are needed because ISO 9 does not cover old Slavonic --- characters that became obsolete before the advent of гражданский шрифт. - --- Please note that these mappings are not bijective so don't expect the result --- to be easily revertible (by machines). - --- Source p. 77 of --- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf - ------------------------------------------------------------------------ --- Lowercase and uppercase letter Uk -- “scientific transliteration” -- ------------------------------------------------------------------------ - -translit.ocs_uk = { - ["oу"] = "u", - ["оу"] = "u", - ["Оу"] = "U", -} ------------------------------------------------------------------------------ --- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- ------------------------------------------------------------------------------ - -translit.ocs_low = { - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["є"] = "e", - ["ж"] = "ž", - ["ꙃ"] = "ʒ", -- U+0292, alternative: dz U+01f3 - ["ѕ"] = "ʒ", - ["ꙁ"] = "z", - ["з"] = "z", - ["и"] = "i", - ["і"] = "i", - ["ї"] = "i", - ["ћ"] = "g’", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ѹ"] = "u", - ["ꙋ"] = "u", - ["ф"] = "f", - ["х"] = "x", - ["ѡ"] = "o", --"ō", - ["ѿ"] = "ot", -- U+047f - ["ѽ"] = "o!", -- U+047d - ["ꙍ"] = "o!", -- U+064D - ["ц"] = "c", - ["ч"] = "č", - ["ш"] = "š", - ["щ"] = "št", - ["ъ"] = "ъ", - ["ы"] = "y", - ["ꙑ"] = "y", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. - ["ь"] = "ь", - ["ѣ"] = "ě", - ["ю"] = "ju", - ["ꙗ"] = "ja", - ["ѥ"] = "je", - ["ѧ"] = "ę", - ["ѩ"] = "ję", - ["ѫ"] = "ǫ", - ["ѭ"] = "jǫ", - ["ѯ"] = "ks", - ["ѱ"] = "ps", - ["ѳ"] = "th", - ["ѵ"] = "ü", -} - -translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low - ------------------------------------------------------------------------------ --- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- ------------------------------------------------------------------------------ - -translit.ocs_upp = { - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Є"] = "E", - ["Ж"] = "Ž", - ["Ꙃ"] = "Ʒ", -- U+01b7, alternative: Dz U+01f2 - ["Ѕ"] = "Ʒ", - ["Ꙁ"] = "Z", - ["З"] = "Z", - ["И"] = "I", - ["І"] = "I", - ["Ї"] = "I", - ["Ћ"] = "G’", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "u", - ["Ѹ"] = "U", - ["ꙋ"] = "U", - ["Ф"] = "F", - ["Х"] = "X", - ["Ѡ"] = "Ō", - ["Ѿ"] = "Ot", -- U+047c - ["Ѽ"] = "O!", -- U+047e - ["Ꙍ"] = "O!", -- U+064C - ["Ц"] = "C", - ["Ч"] = "Č", - ["Ш"] = "Š", - ["Щ"] = "Št", - ["Ъ"] = "Ŭ", - ["Ы"] = "Y", - ["Ꙑ"] = "Y", -- U+a650 - ["Ь"] = "Ĭ", - ["Ѣ"] = "Ě", - ["Ю"] = "Ju", - ["Ꙗ"] = "Ja", - ["Ѥ"] = "Je", - ["Ѧ"] = "Ę", - ["Ѩ"] = "Ję", - ["Ѫ"] = "Ǫ", - ["Ѭ"] = "Jǫ", - ["Ѯ"] = "Ks", - ["Ѱ"] = "Ps", - ["Ѳ"] = "Th", - ["Ѵ"] = "Ü", -} - -translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp - --- Note on the additional tables: these cover characters that are not defined --- in ISO 9 but have a “scientific” transliteration. You may use them as --- complementary mapping to ISO 9, trading off homogenity for completeness. - ----------------------------------------------------------------------------------------- --- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- ----------------------------------------------------------------------------------------- - -translit.ocs_add_low = { - ["ѕ"] = "dz", -- Mapped to ẑ in ISO 9 (Macedonian …) - ["ѯ"] = "ks", - ["ѱ"] = "ps", - ["ѡ"] = "ô", - ["ѿ"] = "ot", -- U+047f - ["ѫ"] = "ǫ", -- Mapped to ǎ in ISO 9. - ["ѧ"] = "ę", - ["ѭ"] = "jǫ", - ["ѩ"] = "ję", - ["ѥ"] = "je", - ["ѹ"] = "u", -- Digraph uk. - ["ꙋ"] = "u", -- Monograph uk, U+a64b. (No glyph yet in the "fixed" font in February 2010 …) - ["ꙑ"] = "y", -- U+a651 -} - -translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low - ----------------------------------------------------------------------------------------- --- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- ----------------------------------------------------------------------------------------- - -translit.ocs_add_upp = { - ["Ѕ"] = "Dz", - ["Ѯ"] = "Ks", - ["Ѱ"] = "Ps", - ["Ѡ"] = "Ô", - ["Ѿ"] = "ot", - ["Ѫ"] = "Ǫ", - ["Ѧ"] = "Ę", - ["Ѭ"] = "Jǫ", - ["Ѩ"] = "Ję", - ["Ѥ"] = "Je", - ["Ѹ"] = "U", -- Digraph uk. - ["Ꙋ"] = "U", -- Monograph Uk, U+a64a. - ["Ꙑ"] = "Y", -- U+a650 -} - -translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp - - -\stopluacode - -%-===========================================================================-- -%- Glagolica -- -%-===========================================================================-- - -\startluacode - -------------------------------------------- --- Lowercase Glagolitic Transliteration -- -------------------------------------------- - -translit.ocs_gla_low = { - ["ⰰ"] = "a", -- GLAGOLITIC SMALL LETTER AZU - ["ⰱ"] = "b", -- GLAGOLITIC SMALL LETTER BUKY - ["ⰲ"] = "v", -- GLAGOLITIC SMALL LETTER VEDE - ["ⰳ"] = "g", -- GLAGOLITIC SMALL LETTER GLAGOLI - ["ⰴ"] = "d", -- GLAGOLITIC SMALL LETTER DOBRO - ["ⰵ"] = "e", -- GLAGOLITIC SMALL LETTER YESTU - ["ⰶ"] = "ž", -- GLAGOLITIC SMALL LETTER ZHIVETE - ["ⰷ"] = "ʒ", -- GLAGOLITIC SMALL LETTER DZELO - ["ⰸ"] = "z", -- GLAGOLITIC SMALL LETTER ZEMLJA - ["ⰹ"] = "i", -- GLAGOLITIC SMALL LETTER IZHE - ["ⰺ"] = "i", -- GLAGOLITIC SMALL LETTER INITIAL IZHE - ["ⰻ"] = "i", -- GLAGOLITIC SMALL LETTER I - ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI - ["ⰽ"] = "k", -- GLAGOLITIC SMALL LETTER KAKO - ["ⰾ"] = "l", -- GLAGOLITIC SMALL LETTER LJUDIJE - ["ⰿ"] = "m", -- GLAGOLITIC SMALL LETTER MYSLITE - ["ⱀ"] = "n", -- GLAGOLITIC SMALL LETTER NASHI - ["ⱁ"] = "o", -- GLAGOLITIC SMALL LETTER ONU - ["ⱂ"] = "p", -- GLAGOLITIC SMALL LETTER POKOJI - ["ⱃ"] = "r", -- GLAGOLITIC SMALL LETTER RITSI - ["ⱄ"] = "s", -- GLAGOLITIC SMALL LETTER SLOVO - ["ⱅ"] = "t", -- GLAGOLITIC SMALL LETTER TVRIDO - ["ⱆ"] = "u", -- GLAGOLITIC SMALL LETTER UKU - ["ⱇ"] = "f", -- GLAGOLITIC SMALL LETTER FRITU - ["ⱈ"] = "x", -- GLAGOLITIC SMALL LETTER HERU - ["ⱉ"] = "o", -- GLAGOLITIC SMALL LETTER OTU - ["ⱊ"] = "?", -- GLAGOLITIC SMALL LETTER PE - ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA - ["ⱌ"] = "c", -- GLAGOLITIC SMALL LETTER TSI - ["ⱍ"] = "č", -- GLAGOLITIC SMALL LETTER CHRIVI - ["ⱎ"] = "š", -- GLAGOLITIC SMALL LETTER SHA - ["ⱏ"] = "ъ", -- GLAGOLITIC SMALL LETTER YERU - ["ⱐ"] = "ь", -- GLAGOLITIC SMALL LETTER YERI - ["ⱑ"] = "ě", -- GLAGOLITIC SMALL LETTER YATI - ["ⱒ"] = "x", -- GLAGOLITIC SMALL LETTER SPIDERY HA - ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU - ["ⱔ"] = "ę", -- GLAGOLITIC SMALL LETTER SMALL YUS - ["ⱕ"] = "y̨", -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL - ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO - ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU - ["ⱘ"] = "ǫ", -- GLAGOLITIC SMALL LETTER BIG YUS - ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS - ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA - ["ⱛ"] = "ü", -- GLAGOLITIC SMALL LETTER IZHITSA - ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC - ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A - ["ⱞ"] = "m", -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE -} - -translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low - ------------------------------------------------- --- Uppercase (?!) Glagolitic Transliteration -- ------------------------------------------------- - -translit.ocs_gla_upp = { - ["Ⰰ"] = "A", -- GLAGOLITIC CAPITAL LETTER AZU - ["Ⰱ"] = "B", -- GLAGOLITIC CAPITAL LETTER BUKY - ["Ⰲ"] = "V", -- GLAGOLITIC CAPITAL LETTER VEDE - ["Ⰳ"] = "G", -- GLAGOLITIC CAPITAL LETTER GLAGOLI - ["Ⰴ"] = "D", -- GLAGOLITIC CAPITAL LETTER DOBRO - ["Ⰵ"] = "E", -- GLAGOLITIC CAPITAL LETTER YESTU - ["Ⰶ"] = "Ž", -- GLAGOLITIC CAPITAL LETTER ZHIVETE - ["Ⰷ"] = "Ʒ", -- GLAGOLITIC CAPITAL LETTER DZELO - ["Ⰸ"] = "Z", -- GLAGOLITIC CAPITAL LETTER ZEMLJA - ["Ⰹ"] = "I", -- GLAGOLITIC CAPITAL LETTER IZHE - ["Ⰺ"] = "I", -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE - ["Ⰻ"] = "I", -- GLAGOLITIC CAPITAL LETTER I - ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI - ["Ⰽ"] = "K", -- GLAGOLITIC CAPITAL LETTER KAKO - ["Ⰾ"] = "L", -- GLAGOLITIC CAPITAL LETTER LJUDIJE - ["Ⰿ"] = "M", -- GLAGOLITIC CAPITAL LETTER MYSLITE - ["Ⱀ"] = "N", -- GLAGOLITIC CAPITAL LETTER NASHI - ["Ⱁ"] = "O", -- GLAGOLITIC CAPITAL LETTER ONU - ["Ⱂ"] = "P", -- GLAGOLITIC CAPITAL LETTER POKOJI - ["Ⱃ"] = "R", -- GLAGOLITIC CAPITAL LETTER RITSI - ["Ⱄ"] = "S", -- GLAGOLITIC CAPITAL LETTER SLOVO - ["Ⱅ"] = "T", -- GLAGOLITIC CAPITAL LETTER TVRIDO - ["Ⱆ"] = "U", -- GLAGOLITIC CAPITAL LETTER UKU - ["Ⱇ"] = "F", -- GLAGOLITIC CAPITAL LETTER FRITU - ["Ⱈ"] = "X", -- GLAGOLITIC CAPITAL LETTER HERU - ["Ⱉ"] = "O", -- GLAGOLITIC CAPITAL LETTER OTU - ["Ⱊ"] = "?", -- GLAGOLITIC CAPITAL LETTER PE - ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA - ["Ⱌ"] = "C", -- GLAGOLITIC CAPITAL LETTER TSI - ["Ⱍ"] = "Č", -- GLAGOLITIC CAPITAL LETTER CHRIVI - ["Ⱎ"] = "Š", -- GLAGOLITIC CAPITAL LETTER SHA - ["Ⱏ"] = "Ъ", -- GLAGOLITIC CAPITAL LETTER YERU - ["Ⱐ"] = "Ь", -- GLAGOLITIC CAPITAL LETTER YERI - ["Ⱑ"] = "Ě", -- GLAGOLITIC CAPITAL LETTER YATI - ["Ⱒ"] = "X", -- GLAGOLITIC CAPITAL LETTER SPIDERY HA - ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU - ["Ⱔ"] = "Ę", -- GLAGOLITIC CAPITAL LETTER SMALL YUS - ["Ⱕ"] = "Y̨", -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL - ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO - ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS - ["Ⱘ"] = "Ǫ", -- GLAGOLITIC CAPITAL LETTER BIG YUS - ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS - ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA - ["Ⱛ"] = "Ü", -- GLAGOLITIC CAPITAL LETTER IZHITSA - ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC - ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A - ["Ⱞ"] = "M", -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT -} - -translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp - -\stopluacode - -%-===========================================================================-- -%- Greek -- -%-===========================================================================-- - -\startluacode - --- Note that the Greek transliteration mapping isn't bijective so transliterated --- texts won't be reversible. (Shouldn't be impossible to make one up using --- diacritics on latin characters to represent all possible combinations of --- Greek breathings + accents.) - --- Good reading on composed / precombined unicode: --- http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed - -------------------------------------------------- --- Lowercase Greek Initial Position Diphthongs -- -------------------------------------------------- - -translit.gr_di_in_low = { - [" αὑ"] = " hau", - [" αὕ"] = " hau", - [" αὓ"] = " hau", - [" αὗ"] = " hau", - [" εὑ"] = " heu", - [" εὕ"] = " heu", - [" εὓ"] = " heu", - [" εὗ"] = " heu", - [" ηὑ"] = " hēu", - [" ηὕ"] = " hēu", - [" ηὓ"] = " hēu", - [" ηὗ"] = " hēu", - [" οὑ"] = " hu", - [" οὕ"] = " hu", - [" οὓ"] = " hu", - [" οὗ"] = " hu", - [" ωὑ"] = " hōu", - [" ωὕ"] = " hōu", - [" ωὓ"] = " hōu", - [" ωὗ"] = " hōu" -} - -translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low - -------------------------------------------------- --- Uppercase Greek Initial Position Diphthongs -- -------------------------------------------------- - -translit.gr_di_in_upp = { - [" Αὑ"] = " Hau", - [" Αὕ"] = " Hau", - [" Αὓ"] = " Hau", - [" Αὗ"] = " Hau", - [" Εὑ"] = " Heu", - [" Εὕ"] = " Heu", - [" Εὓ"] = " Heu", - [" Εὗ"] = " Heu", - [" Ηὑ"] = " Hēu", - [" Ηὕ"] = " Hēu", - [" Ηὓ"] = " Hēu", - [" Ηὗ"] = " Hēu", - [" Οὑ"] = " Hu", - [" Οὕ"] = " Hu", - [" Οὓ"] = " Hu", - [" Οὗ"] = " Hu", - [" Ωὑ"] = " Hōu", - [" Ωὕ"] = " Hōu", - [" Ωὓ"] = " Hōu", - [" Ωὗ"] = " Hōu" -} - -translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp - ---------------------------------------- --- Lowercase Greek Initial Position -- ---------------------------------------- - -translit.gr_in_low = { - [" ἁ"] = " ha", - [" ἅ"] = " ha", - [" ἃ"] = " ha", - [" ἇ"] = " ha", - [" ᾁ"] = " ha", - [" ᾅ"] = " ha", - [" ᾃ"] = " ha", - [" ᾇ"] = " ha", - [" ἑ"] = " he", - [" ἕ"] = " he", - [" ἓ"] = " he", - [" ἡ"] = " hē", - [" ἥ"] = " hē", - [" ἣ"] = " hē", - [" ἧ"] = " hē", - [" ᾑ"] = " hē", - [" ᾕ"] = " hē", - [" ᾓ"] = " hē", - [" ᾗ"] = " hē", - [" ἱ"] = " hi", - [" ἵ"] = " hi", - [" ἳ"] = " hi", - [" ἷ"] = " hi", - [" ὁ"] = " ho", - [" ὅ"] = " ho", - [" ὃ"] = " ho", - [" ὑ"] = " hy", - [" ὕ"] = " hy", - [" ὓ"] = " hy", - [" ὗ"] = " hy", - [" ὡ"] = " hō", - [" ὥ"] = " hō", - [" ὣ"] = " hō", - [" ὧ"] = " hō", - [" ᾡ"] = " hō", - [" ᾥ"] = " hō", - [" ᾣ"] = " hō", - [" ᾧ"] = " hō", -} - -translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low - ---------------------------------------- --- Uppercase Greek Initial Position -- ---------------------------------------- - -translit.gr_in_upp = { - [" Ἁ"] = " Ha", - [" Ἅ"] = " Ha", - [" Ἃ"] = " Ha", - [" Ἇ"] = " Ha", - [" ᾉ"] = " Ha", - [" ᾍ"] = " Ha", - [" ᾋ"] = " Ha", - [" ᾏ"] = " Ha", - [" Ἑ"] = " He", - [" Ἕ"] = " He", - [" Ἓ"] = " He", - [" Ἡ"] = " Hē", - [" Ἥ"] = " Hē", - [" Ἣ"] = " Hē", - [" Ἧ"] = " Hē", - [" ᾙ"] = " Hē", - [" ᾝ"] = " Hē", - [" ᾛ"] = " Hē", - [" ᾟ"] = " Hē", - [" Ἱ"] = " Hi", - [" Ἵ"] = " Hi", - [" Ἳ"] = " Hi", - [" Ἷ"] = " Hi", - [" Ὁ"] = " Ho", - [" Ὅ"] = " Ho", - [" Ὃ"] = " Ho", - [" Ὑ"] = " Hy", - [" Ὕ"] = " Hy", - [" Ὓ"] = " Hy", - [" Ὗ"] = " Hy", - [" Ὡ"] = " Hō", - [" Ὥ"] = " Hō", - [" Ὣ"] = " Hō", - [" Ὧ"] = " Hō", - [" ᾩ"] = " Hō", - [" ᾭ"] = " Hō", - [" ᾫ"] = " Hō", - [" ᾯ"] = " Hō", -} - -translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp - ---------------------------------- --- Lowercase Greek Diphthongs -- ---------------------------------- - -translit.gr_di_low = { - ["αυ"] = "au", - ["αύ"] = "au", - ["αὺ"] = "au", - ["αῦ"] = "au", - ["αὐ"] = "au", - ["αὔ"] = "au", - ["αὒ"] = "au", - ["αὖ"] = "au", - ["αὑ"] = "au", - ["αὕ"] = "au", - ["αὓ"] = "au", - ["αὗ"] = "au", - ["ευ"] = "eu", - ["εύ"] = "eu", - ["εὺ"] = "eu", - ["εῦ"] = "eu", - ["εὐ"] = "eu", - ["εὔ"] = "eu", - ["εὒ"] = "eu", - ["εὖ"] = "eu", - ["εὑ"] = "eu", - ["εὕ"] = "eu", - ["εὓ"] = "eu", - ["εὗ"] = "eu", - ["ηυ"] = "ēu", - ["ηύ"] = "ēu", - ["ηὺ"] = "ēu", - ["ηῦ"] = "ēu", - ["ηὐ"] = "ēu", - ["ηὔ"] = "ēu", - ["ηὒ"] = "ēu", - ["ηὖ"] = "ēu", - ["ηὑ"] = "ēu", - ["ηὕ"] = "ēu", - ["ηὓ"] = "ēu", - ["ηὗ"] = "ēu", - ["ου"] = "u", - ["ου"] = "u", - ["ου"] = "u", - ["ού"] = "u", - ["οὺ"] = "u", - ["οῦ"] = "u", - ["οὐ"] = "u", - ["οὔ"] = "u", - ["οὒ"] = "u", - ["οὖ"] = "u", - ["οὑ"] = "u", - ["οὕ"] = "u", - ["οὓ"] = "u", - ["οὗ"] = "u", - ["ωυ"] = "ōu", - ["ωύ"] = "ōu", - ["ωὺ"] = "ōu", - ["ωῦ"] = "ōu", - ["ωὐ"] = "ōu", - ["ωὔ"] = "ōu", - ["ωὒ"] = "ōu", - ["ωὖ"] = "ōu", - ["ωὑ"] = "ōu", - ["ωὕ"] = "ōu", - ["ωὓ"] = "ōu", - ["ωὗ"] = "ōu", - ["ῤῥ"] = "rrh", -} - -translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low - ---------------------------------- --- Uppercase Greek Diphthongs -- ---------------------------------- - -translit.gr_di_upp = { - ["Αυ"] = "Au", - ["Αύ"] = "Au", - ["Αὺ"] = "Au", - ["Αῦ"] = "Au", - ["Αὐ"] = "Au", - ["Αὔ"] = "Au", - ["Αὒ"] = "Au", - ["Αὖ"] = "Au", - ["Αὑ"] = "Au", - ["Αὕ"] = "Au", - ["Αὓ"] = "Au", - ["Αὗ"] = "Au", - ["Ευ"] = "Eu", - ["Εύ"] = "Eu", - ["Εὺ"] = "Eu", - ["Εῦ"] = "Eu", - ["Εὐ"] = "Eu", - ["Εὔ"] = "Eu", - ["Εὒ"] = "Eu", - ["Εὖ"] = "Eu", - ["Εὑ"] = "Eu", - ["Εὕ"] = "Eu", - ["Εὓ"] = "Eu", - ["Εὗ"] = "Eu", - ["Ηυ"] = "Ēu", - ["Ηύ"] = "Ēu", - ["Ηὺ"] = "Ēu", - ["Ηῦ"] = "Ēu", - ["Ηὐ"] = "Ēu", - ["Ηὔ"] = "Ēu", - ["Ηὒ"] = "Ēu", - ["Ηὖ"] = "Ēu", - ["Ηὑ"] = "Ēu", - ["Ηὕ"] = "Ēu", - ["Ηὓ"] = "Ēu", - ["Ηὗ"] = "Ēu", - ["Ου"] = "U", - ["Ου"] = "U", - ["Ου"] = "U", - ["Ού"] = "U", - ["Οὺ"] = "U", - ["Οῦ"] = "U", - ["Οὐ"] = "U", - ["Οὔ"] = "U", - ["Οὒ"] = "U", - ["Οὖ"] = "U", - ["Οὑ"] = "U", - ["Οὕ"] = "U", - ["Οὓ"] = "U", - ["Οὗ"] = "U", - ["Ωυ"] = "Ōu", - ["Ωύ"] = "Ōu", - ["Ωὺ"] = "Ōu", - ["Ωῦ"] = "Ōu", - ["Ωὐ"] = "Ōu", - ["Ωὔ"] = "Ōu", - ["Ωὒ"] = "Ōu", - ["Ωὖ"] = "Ōu", - ["Ωὑ"] = "Ōu", - ["Ωὕ"] = "Ōu", - ["Ωὓ"] = "Ōu", - ["Ωὗ"] = "Ōu", -} - -translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp - --- The following will be used in an option that ensures transcription of --- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) -translit.gr_nrule = { - ["γγ"] = "ng", - ["γκ"] = "nk", - ["γξ"] = "nx", - ["γχ"] = "nch", -} - -translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule - -\stopluacode - -\startluacode - --------------------------------------- --- Lowercase Greek Transliteration -- --------------------------------------- - -translit.gr_low = { - ["α"] = "a", - ["ά"] = "a", - ["ὰ"] = "a", - ["ᾶ"] = "a", - ["ᾳ"] = "a", - ["ἀ"] = "a", - ["ἁ"] = "a", - ["ἄ"] = "a", - ["ἂ"] = "a", - ["ἆ"] = "a", - ["ἁ"] = "a", - ["ἅ"] = "a", - ["ἃ"] = "a", - ["ἇ"] = "a", - ["ᾁ"] = "a", - ["ᾴ"] = "a", - ["ᾲ"] = "a", - ["ᾷ"] = "a", - ["ᾄ"] = "a", - ["ᾂ"] = "a", - ["ᾅ"] = "a", - ["ᾃ"] = "a", - ["ᾆ"] = "a", - ["ᾇ"] = "a", - ["β"] = "b", - ["γ"] = "g", - ["δ"] = "d", - ["ε"] = "e", - ["έ"] = "e", - ["ὲ"] = "e", - ["ἐ"] = "e", - ["ἔ"] = "e", - ["ἒ"] = "e", - ["ἑ"] = "e", - ["ἕ"] = "e", - ["ἓ"] = "e", - ["ζ"] = "z", - ["η"] = "ē", - ["η"] = "ē", - ["ή"] = "ē", - ["ὴ"] = "ē", - ["ῆ"] = "ē", - ["ῃ"] = "ē", - ["ἠ"] = "ē", - ["ἤ"] = "ē", - ["ἢ"] = "ē", - ["ἦ"] = "ē", - ["ᾐ"] = "ē", - ["ἡ"] = "ē", - ["ἥ"] = "ē", - ["ἣ"] = "ē", - ["ἧ"] = "ē", - ["ᾑ"] = "ē", - ["ῄ"] = "ē", - ["ῂ"] = "ē", - ["ῇ"] = "ē", - ["ᾔ"] = "ē", - ["ᾒ"] = "ē", - ["ᾕ"] = "ē", - ["ᾓ"] = "ē", - ["ᾖ"] = "ē", - ["ᾗ"] = "ē", - ["θ"] = "th", - ["ι"] = "i", - ["ί"] = "i", - ["ὶ"] = "i", - ["ῖ"] = "i", - ["ἰ"] = "i", - ["ἴ"] = "i", - ["ἲ"] = "i", - ["ἶ"] = "i", - ["ἱ"] = "i", - ["ἵ"] = "i", - ["ἳ"] = "i", - ["ἷ"] = "i", - ["ϊ"] = "i", - ["ΐ"] = "i", - ["ῒ"] = "i", - ["ῗ"] = "i", - ["κ"] = "k", - ["λ"] = "l", - ["μ"] = "m", - ["ν"] = "n", - ["ξ"] = "x", - ["ο"] = "o", - ["ό"] = "o", - ["ὸ"] = "o", - ["ὀ"] = "o", - ["ὄ"] = "o", - ["ὂ"] = "o", - ["ὁ"] = "o", - ["ὅ"] = "o", - ["ὃ"] = "o", - ["π"] = "p", - ["ρ"] = "r", - ["ῤ"] = "r", - ["ῥ"] = "rh", - ["σ"] = "s", - ["ς"] = "s", - ["τ"] = "t", - ["υ"] = "y", - ["ύ"] = "y", - ["ὺ"] = "y", - ["ῦ"] = "y", - ["ὐ"] = "y", - ["ὔ"] = "y", - ["ὒ"] = "y", - ["ὖ"] = "y", - ["ὑ"] = "y", - ["ὕ"] = "y", - ["ὓ"] = "y", - ["ὗ"] = "y", - ["ϋ"] = "y", - ["ΰ"] = "y", - ["ῢ"] = "y", - ["ῧ"] = "y", - ["φ"] = "ph", - ["χ"] = "ch", - ["ψ"] = "ps", - ["ω"] = "ō", - ["ώ"] = "ō", - ["ὼ"] = "ō", - ["ῶ"] = "ō", - ["ῳ"] = "ō", - ["ὠ"] = "ō", - ["ὤ"] = "ō", - ["ὢ"] = "ō", - ["ὦ"] = "ō", - ["ᾠ"] = "ō", - ["ὡ"] = "ō", - ["ὥ"] = "ō", - ["ὣ"] = "ō", - ["ὧ"] = "ō", - ["ᾡ"] = "ō", - ["ῴ"] = "ō", - ["ῲ"] = "ō", - ["ῷ"] = "ō", - ["ᾤ"] = "ō", - ["ᾢ"] = "ō", - ["ᾥ"] = "ō", - ["ᾣ"] = "ō", - ["ᾦ"] = "ō", - ["ᾧ"] = "ō", -} - -translit.tables["Greek transliteration lowercase"] = translit.gr_low - --------------------------------------- --- Uppercase Greek Transliteration -- --------------------------------------- - -translit.gr_upp = { - ["Α"] = "A", - ["Ά"] = "A", - ["Ὰ"] = "A", ---["ᾶ"] = "A", - ["ᾼ"] = "A", - ["Ἀ"] = "A", - ["Ἁ"] = "A", - ["Ἄ"] = "A", - ["Ἂ"] = "A", - ["Ἆ"] = "A", - ["Ἁ"] = "A", - ["Ἅ"] = "A", - ["Ἃ"] = "A", - ["Ἇ"] = "A", - ["ᾉ"] = "A", ---["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me ---["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript ---["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. - ["ᾌ"] = "A", - ["ᾊ"] = "A", - ["ᾍ"] = "A", - ["ᾋ"] = "A", - ["ᾎ"] = "A", - ["ᾏ"] = "A", - ["Β"] = "B", - ["Γ"] = "G", - ["Δ"] = "D", - ["Ε"] = "E", - ["Έ"] = "E", - ["Ὲ"] = "E", - ["Ἐ"] = "E", - ["Ἔ"] = "E", - ["Ἒ"] = "E", - ["Ἑ"] = "E", - ["Ἕ"] = "E", - ["Ἓ"] = "E", - ["Ζ"] = "Z", - ["Η"] = "Ē", - ["Η"] = "Ē", - ["Ή"] = "Ē", - ["Ὴ"] = "Ē", ---["ῆ"] = "Ē", - ["ῌ"] = "Ē", - ["Ἠ"] = "Ē", - ["Ἤ"] = "Ē", - ["Ἢ"] = "Ē", - ["Ἦ"] = "Ē", - ["ᾘ"] = "Ē", - ["Ἡ"] = "Ē", - ["Ἥ"] = "Ē", - ["Ἣ"] = "Ē", - ["Ἧ"] = "Ē", - ["ᾙ"] = "Ē", ---["ῄ"] = "Ē", ---["ῂ"] = "Ē", ---["ῇ"] = "Ē", - ["ᾜ"] = "Ē", - ["ᾚ"] = "Ē", - ["ᾝ"] = "Ē", - ["ᾛ"] = "Ē", - ["ᾞ"] = "Ē", - ["ᾟ"] = "Ē", - ["Θ"] = "Th", - ["Ι"] = "I", - ["Ί"] = "I", - ["Ὶ"] = "I", ---["ῖ"] = "I", - ["Ἰ"] = "I", - ["Ἴ"] = "I", - ["Ἲ"] = "I", - ["Ἶ"] = "I", - ["Ἱ"] = "I", - ["Ἵ"] = "I", - ["Ἳ"] = "I", - ["Ἷ"] = "I", - ["Ϊ"] = "I", ---["ΐ"] = "I", ---["ῒ"] = "I", ---["ῗ"] = "I", - ["Κ"] = "K", - ["Λ"] = "L", - ["Μ"] = "M", - ["Ν"] = "N", - ["Ξ"] = "X", - ["Ο"] = "O", - ["Ό"] = "O", - ["Ὸ"] = "O", - ["Ὀ"] = "O", - ["Ὄ"] = "O", - ["Ὂ"] = "O", - ["Ὁ"] = "O", - ["Ὅ"] = "O", - ["Ὃ"] = "O", - ["Π"] = "P", - ["Ρ"] = "R", ---["ῤ"] = "R", - ["Ῥ"] = "Rh", - ["Σ"] = "S", - ["Σ"] = "S", - ["Τ"] = "T", - ["Υ"] = "Y", - ["Ύ"] = "Y", - ["Ὺ"] = "Y", ---["ῦ"] = "Y", ---["ὐ"] = "Y", ---["ὔ"] = "Y", ---["ὒ"] = "Y", ---["ὖ"] = "Y", - ["Ὑ"] = "Y", - ["Ὕ"] = "Y", - ["Ὓ"] = "Y", - ["Ὗ"] = "Y", - ["Ϋ"] = "Y", ---["ΰ"] = "Y", ---["ῢ"] = "Y", ---["ῧ"] = "Y", - ["Φ"] = "Ph", - ["Χ"] = "Ch", - ["Ψ"] = "Ps", - ["Ω"] = "Ō", - ["Ώ"] = "Ō", - ["Ὼ"] = "Ō", ---["ῶ"] = "Ō", - ["ῼ"] = "Ō", - ["Ὠ"] = "Ō", - ["Ὤ"] = "Ō", - ["Ὢ"] = "Ō", - ["Ὦ"] = "Ō", - ["ᾨ"] = "Ō", - ["Ὡ"] = "Ō", - ["Ὥ"] = "Ō", - ["Ὣ"] = "Ō", - ["Ὧ"] = "Ō", - ["ᾩ"] = "Ō", ---["ῴ"] = "Ō", ---["ῲ"] = "Ō", ---["ῷ"] = "Ō", - ["ᾬ"] = "Ō", - ["ᾪ"] = "Ō", - ["ᾭ"] = "Ō", - ["ᾫ"] = "Ō", - ["ᾮ"] = "Ō", - ["ᾯ"] = "Ō", -} - -translit.tables["Greek transliteration uppercase"] = translit.gr_upp - ------------- --- Varia -- ------------- - -translit.gr_other = { - ["ϝ"] = "w", - ["Ϝ"] = "W", - ["ϙ"] = "q", - ["Ϙ"] = "Q", - ["ϡ"] = "ss", - ["Ϡ"] = "Ss", -} - -translit.tables["Greek transliteration archaic characters"] = translit.gr_other - -\stopluacode - -%-===========================================================================-- -%- End Of Tables -- -%-===========================================================================-- +%D Tables have been migrated into separate lua files. +%D See {\tt +%D trans_tables_glag.lua +%D trans_tables_gr.lua +%D trans_tables_iso9.lua +%D trans_tables_scntfc.lua +%D and +%D trans_tables_trsc.lua.} %D The function \type{translit.subst(s, t)} is used to replace any key of %D \type{t} that occurs in \type{s} with the according value of \type{t}. @@ -1796,14 +104,17 @@ function translit.subst (text, tab) end \stopluacode -%D \type{translit.add_table(t, ta)} is used to build the final substitution tables +%D \type{translit.add_table(t, ...)} is used to build the final substitution tables %D from those we defined earlier; any keys in the previous table \type{t} are -%D overwritten if they exist in the added table \type{ta}, too. +%D overwritten if they exist in the added tables \type{ta}, too. \startluacode -function translit.add_table (t, t_add) - for key, value in pairs (t_add) do - t[key] = value + +function translit.add_table (t, ...) + for _,tab in ipairs(arg) do + for key, value in pairs (tab) do + t[key] = value + end end return t end @@ -1859,9 +170,9 @@ function translit.show_tab (tab) context.bTR() context.bTC() context(cnt) context.eTC() context.bTC() context(key) context.eTC() - context.bTC() context(utf.len(key)) context.eTC() + context.bTC() context(string.len(key)) context.eTC() context.bTC() context(val) context.eTC() - context.bTC() context(utf.len(val)) context.eTC() + context.bTC() context(string.len(val)) context.eTC() context.bTC() context(strempty(key)) context.eTC() context.bTC() context(strempty(val)) context.eTC() context.eTR() @@ -1888,121 +199,63 @@ end \startluacode function translit.show_all_tabs () - -- Output all translation tables that are registered within translit.tables. - -- This will be quite unordered. - context ("\\chapter{Transliterator Showing All Tables}") - for key, val in pairs(translit.tables) do - context ("\\section{" .. key .. "}") - translit.show_tab (val) - end + dofile "trans_tables_iso9.lua" + dofile "trans_tables_trsc.lua" + dofile "trans_tables_scntfc.lua" + dofile "trans_tables_trsc.lua" + dofile "trans_tables_glag.lua" + dofile "trans_tables_gr.lua" + translit.gen_rules_en() + translit.gen_rules_de() + -- Output all translation tables that are registered within translit.tables. + -- This will be quite unordered. + context ("\\chapter{Transliterator Showing All Tables}") + for key, val in pairs(translit.tables) do + context ("\\section{" .. key .. "}") + translit.show_tab (val) + end end \stopluacode -%D Now to the function \type{translit.transliterate(m, t)}: it constitutes the -%D metafunction that is called by the \type{\transliterate} command and itself -%D chooses the substitution tables according to the \type{m} parameter and -%D applies them in a given order on the string \type{m}. -%D (The correct order in multi-pass substitution has to be enforced because the -%D tables are in fact unordered dictionaries.) +%D \type{translit.transliterate(m, t)} constitutes the +%D metafunction that is called by the \type{\transliterate} command. +%D It loads the transliteration tables according to \type{method} and calls the +%D corresponding function. \startluacode function translit.transliterate (method, text) - local repl_tab = {} - if method == "ru" then - translit.add_table(repl_tab, translit.ru_upp) - translit.add_table(repl_tab, translit.ru_low) - elseif method == "ru_old" then - translit.add_table(repl_tab, translit.ru_upp) - translit.add_table(repl_tab, translit.ru_low) - translit.add_table(repl_tab, translit.ru_old_upp) - translit.add_table(repl_tab, translit.ru_old_low) - elseif method == "all" then - translit.add_table(repl_tab, translit.ru_upp) - translit.add_table(repl_tab, translit.ru_low) - translit.add_table(repl_tab, translit.ru_old_upp) - translit.add_table(repl_tab, translit.ru_old_low) - translit.add_table(repl_tab, translit.non_ru_upp) - translit.add_table(repl_tab, translit.non_ru_low) - elseif method == "ru_transcript_de" then - text = translit.subst (text, translit.ru_trsc_jrule) - text = translit.subst (text, translit.ru_trsc_irule) - text = translit.subst (text, translit.ru_trsc_srule) - text = translit.subst (text, translit.ru_trsc_sharpsrule) - text = translit.subst (text, translit.ru_trsc_jerule) --- text = translit.subst (text, translit.ru_trsc_jorule) - translit.add_table(repl_tab, translit.ru_trsc_upp_first) - translit.add_table(repl_tab, translit.ru_trsc_low_first) - text = translit.subst (text, repl_tab) - repl_tab = {} - translit.add_table(repl_tab, translit.ru_trsc_upp) - translit.add_table(repl_tab, translit.ru_trsc_low) - elseif method == "ru_transcript_en" then - text = translit.subst (text, translit.ru_trsc_en_jerule) - translit.add_table(repl_tab, translit.ru_trsc_en_upp_first) - translit.add_table(repl_tab, translit.ru_trsc_en_low_first) - translit.add_table(repl_tab, translit.ru_trsc_en_upp) - translit.add_table(repl_tab, translit.ru_trsc_en_low) - elseif method == "iso9_ocs" then - translit.add_table(repl_tab, translit.ru_upp) - translit.add_table(repl_tab, translit.ru_low) - translit.add_table(repl_tab, translit.ru_old_upp) - translit.add_table(repl_tab, translit.ru_old_low) - translit.add_table(repl_tab, translit.non_ru_upp) - translit.add_table(repl_tab, translit.non_ru_low) - translit.add_table(repl_tab, translit.ocs_add_upp) - translit.add_table(repl_tab, translit.ocs_add_low) - elseif method == "ocs" then - translit.add_table(repl_tab, translit.ocs_uk) - text = translit.subst (text, repl_tab) - repl_tab = {} - translit.add_table(repl_tab, translit.ocs_low) - translit.add_table(repl_tab, translit.ocs_upp) - elseif method == "ocs_gla" then - translit.add_table(repl_tab, translit.ocs_gla_low) - translit.add_table(repl_tab, translit.ocs_gla_upp) - elseif method == "ru_cz" then - translit.add_table(repl_tab, translit.ru_trsc_cz_low) - translit.add_table(repl_tab, translit.ru_trsc_cz_upp) - elseif method == "ocs_cz" then - translit.add_table(repl_tab, translit.ru_trsc_cz_low) - translit.add_table(repl_tab, translit.ru_trsc_cz_upp) - translit.add_table(repl_tab, translit.ru_trsc_cz_add_low) - translit.add_table(repl_tab, translit.ru_trsc_cz_add_upp) - elseif method == "gr" then - translit.add_table(repl_tab, translit.gr_di_in_low) - translit.add_table(repl_tab, translit.gr_di_in_upp) - translit.add_table(repl_tab, translit.gr_in_low) - translit.add_table(repl_tab, translit.gr_in_upp) - text = translit.subst (text, repl_tab) - repl_tab = {} - translit.add_table(repl_tab, translit.gr_di_low) - translit.add_table(repl_tab, translit.gr_di_upp) - text = translit.subst (text, repl_tab) - repl_tab = {} - translit.add_table(repl_tab, translit.gr_low) - translit.add_table(repl_tab, translit.gr_upp) - translit.add_table(repl_tab, translit.gr_other) - elseif method == "gr_n" then - translit.add_table(repl_tab, translit.gr_di_in_low) - translit.add_table(repl_tab, translit.gr_di_in_upp) - translit.add_table(repl_tab, translit.gr_in_low) - translit.add_table(repl_tab, translit.gr_in_upp) - text = translit.subst (text, repl_tab) - repl_tab = {} - translit.add_table(repl_tab, translit.gr_di_low) - translit.add_table(repl_tab, translit.gr_di_upp) - translit.add_table(repl_tab, translit.gr_nrule) - text = translit.subst (text, repl_tab) - repl_tab = {} - translit.add_table(repl_tab, translit.gr_low) - translit.add_table(repl_tab, translit.gr_upp) - translit.add_table(repl_tab, translit.gr_other) + if method == "ru" or + method == "ru_old" or + method == "all" + then + dofile "trans_tables_iso9.lua" + text = translit.iso9 (method, text) + elseif method == "ru_transcript_de" or + method == "ru_transcript_de_exp" or -- experimental lpeg + method == "ru_transcript_en" or + method == "ru_transcript_en_sub" or -- old multiple substitution + method == "ru_cz" or + method == "ocs_cz" + then + dofile "trans_tables_trsc.lua" + text = translit.transcript (method, text) + elseif method == "iso9_ocs" or + method == "ocs" or + method == "ocs_gla" + then + dofile "trans_tables_scntfc.lua" + text = translit.scientific (method, text) + elseif method == "gr" or + method == "gr_n" + then + dofile "trans_tables_gr.lua" + text = translit.dogreek (method, text) end - text = translit.subst (text, repl_tab) - tex.print (text) + context (text) end \stopluacode + %D The following will help debugging and reviewing tables. Make sure your %D typescript can handle the characters, in general it's no use with Latin %D Modern which unfortunately provides only a restricted set of the unicode @@ -2011,11 +264,21 @@ end %D The user-level command to output a single substitution table is %D \type{\showOneTranslitTab{#1}}. \define[1]\showOneTranslitTab{% - \ctxlua{translit.show_tab(#1)}% + \ctxlua{ + dofile "trans_tables_iso9.lua" + dofile "trans_tables_trsc.lua" + dofile "trans_tables_scntfc.lua" + dofile "trans_tables_trsc.lua" + dofile "trans_tables_glag.lua" + dofile "trans_tables_gr.lua" + translit.gen_rules_en() + translit.gen_rules_de() + translit.show_tab(#1) + }% } %D The user-level command to output all defined tables is -%D \type{\showTranslitTabs{#1}}. +%D \type{\showTranslitTabs}. \define\showTranslitTabs{% \ctxlua{translit.show_all_tabs()}% } @@ -2047,8 +310,8 @@ end %D leaves the latter unchanged. Thus, in order to permanently switch to %D another transliteration style the user would have to set it by calling %D \type{\setuptransliterate} again. +% Credits for rewriting the TeX code go to Wolfgang as well. -% thanks again, Wolfgang! \def\dotransliterate[#1]#2{% \bgroup% \iffirstargument @@ -2059,28 +322,8 @@ end \egroup% } -%\definestartstop[transliterate][% - %before={\startbuffer}, - %after={\stopbuffer\transliterate{\getbuffer}} -%] - \def\transliterate{\dosingleempty\dotransliterate} -%\def\starttransliterate {% - %\bgroup\dostarttransliterate% -%} - -%\def\stoptransliterate {% - %\egroup - %\@EA\transliterate{% - %\getbuffer[trl]% - %}% -%} - -%\def\dostarttransliterate{% - %\dostartbuffer[trl][starttransliterate][stoptransliterate]% -%} - \def\starttransliterate{% \bgroup% \dosingleempty\dostarttransliterate @@ -2091,6 +334,7 @@ end \setuptransliterate[#1]% \fi \language[\TRLhyphenate]% + %\ctxlua{translit.transliterate("\TRLmode","\luaescapestring{#2}")}% \ctxlua{translit.transliterate("\TRLmode","\luaescapestring{#2}")}% \egroup% } diff --git a/tex/context/third/transliterator/trans_tables_glag.lua b/tex/context/third/transliterator/trans_tables_glag.lua new file mode 100644 index 0000000..1b1ff18 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_glag.lua @@ -0,0 +1,122 @@ + +--===========================================================================-- +-- Glagolica -- +--===========================================================================-- + +------------------------------------------- +-- Lowercase Glagolitic Transliteration -- +------------------------------------------- + +translit.ocs_gla_low = { + ["ⰰ"] = "a", -- GLAGOLITIC SMALL LETTER AZU + ["ⰱ"] = "b", -- GLAGOLITIC SMALL LETTER BUKY + ["ⰲ"] = "v", -- GLAGOLITIC SMALL LETTER VEDE + ["ⰳ"] = "g", -- GLAGOLITIC SMALL LETTER GLAGOLI + ["ⰴ"] = "d", -- GLAGOLITIC SMALL LETTER DOBRO + ["ⰵ"] = "e", -- GLAGOLITIC SMALL LETTER YESTU + ["ⰶ"] = "ž", -- GLAGOLITIC SMALL LETTER ZHIVETE + ["ⰷ"] = "ʒ", -- GLAGOLITIC SMALL LETTER DZELO + ["ⰸ"] = "z", -- GLAGOLITIC SMALL LETTER ZEMLJA + ["ⰹ"] = "i", -- GLAGOLITIC SMALL LETTER IZHE + ["ⰺ"] = "i", -- GLAGOLITIC SMALL LETTER INITIAL IZHE + ["ⰻ"] = "i", -- GLAGOLITIC SMALL LETTER I + ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI + ["ⰽ"] = "k", -- GLAGOLITIC SMALL LETTER KAKO + ["ⰾ"] = "l", -- GLAGOLITIC SMALL LETTER LJUDIJE + ["ⰿ"] = "m", -- GLAGOLITIC SMALL LETTER MYSLITE + ["ⱀ"] = "n", -- GLAGOLITIC SMALL LETTER NASHI + ["ⱁ"] = "o", -- GLAGOLITIC SMALL LETTER ONU + ["ⱂ"] = "p", -- GLAGOLITIC SMALL LETTER POKOJI + ["ⱃ"] = "r", -- GLAGOLITIC SMALL LETTER RITSI + ["ⱄ"] = "s", -- GLAGOLITIC SMALL LETTER SLOVO + ["ⱅ"] = "t", -- GLAGOLITIC SMALL LETTER TVRIDO + ["ⱆ"] = "u", -- GLAGOLITIC SMALL LETTER UKU + ["ⱇ"] = "f", -- GLAGOLITIC SMALL LETTER FRITU + ["ⱈ"] = "x", -- GLAGOLITIC SMALL LETTER HERU + ["ⱉ"] = "o", -- GLAGOLITIC SMALL LETTER OTU + ["ⱊ"] = "?", -- GLAGOLITIC SMALL LETTER PE + ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA + ["ⱌ"] = "c", -- GLAGOLITIC SMALL LETTER TSI + ["ⱍ"] = "č", -- GLAGOLITIC SMALL LETTER CHRIVI + ["ⱎ"] = "š", -- GLAGOLITIC SMALL LETTER SHA + ["ⱏ"] = "ъ", -- GLAGOLITIC SMALL LETTER YERU + ["ⱐ"] = "ь", -- GLAGOLITIC SMALL LETTER YERI + ["ⱑ"] = "ě", -- GLAGOLITIC SMALL LETTER YATI + ["ⱒ"] = "x", -- GLAGOLITIC SMALL LETTER SPIDERY HA + ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU + ["ⱔ"] = "ę", -- GLAGOLITIC SMALL LETTER SMALL YUS + ["ⱕ"] = "y̨", -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL + ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO + ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU + ["ⱘ"] = "ǫ", -- GLAGOLITIC SMALL LETTER BIG YUS + ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS + ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA + ["ⱛ"] = "ü", -- GLAGOLITIC SMALL LETTER IZHITSA + ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC + ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A + ["ⱞ"] = "m", -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE +} + +translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low + +------------------------------------------------ +-- Uppercase (?!) Glagolitic Transliteration -- +------------------------------------------------ + +translit.ocs_gla_upp = { + ["Ⰰ"] = "A", -- GLAGOLITIC CAPITAL LETTER AZU + ["Ⰱ"] = "B", -- GLAGOLITIC CAPITAL LETTER BUKY + ["Ⰲ"] = "V", -- GLAGOLITIC CAPITAL LETTER VEDE + ["Ⰳ"] = "G", -- GLAGOLITIC CAPITAL LETTER GLAGOLI + ["Ⰴ"] = "D", -- GLAGOLITIC CAPITAL LETTER DOBRO + ["Ⰵ"] = "E", -- GLAGOLITIC CAPITAL LETTER YESTU + ["Ⰶ"] = "Ž", -- GLAGOLITIC CAPITAL LETTER ZHIVETE + ["Ⰷ"] = "Ʒ", -- GLAGOLITIC CAPITAL LETTER DZELO + ["Ⰸ"] = "Z", -- GLAGOLITIC CAPITAL LETTER ZEMLJA + ["Ⰹ"] = "I", -- GLAGOLITIC CAPITAL LETTER IZHE + ["Ⰺ"] = "I", -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE + ["Ⰻ"] = "I", -- GLAGOLITIC CAPITAL LETTER I + ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI + ["Ⰽ"] = "K", -- GLAGOLITIC CAPITAL LETTER KAKO + ["Ⰾ"] = "L", -- GLAGOLITIC CAPITAL LETTER LJUDIJE + ["Ⰿ"] = "M", -- GLAGOLITIC CAPITAL LETTER MYSLITE + ["Ⱀ"] = "N", -- GLAGOLITIC CAPITAL LETTER NASHI + ["Ⱁ"] = "O", -- GLAGOLITIC CAPITAL LETTER ONU + ["Ⱂ"] = "P", -- GLAGOLITIC CAPITAL LETTER POKOJI + ["Ⱃ"] = "R", -- GLAGOLITIC CAPITAL LETTER RITSI + ["Ⱄ"] = "S", -- GLAGOLITIC CAPITAL LETTER SLOVO + ["Ⱅ"] = "T", -- GLAGOLITIC CAPITAL LETTER TVRIDO + ["Ⱆ"] = "U", -- GLAGOLITIC CAPITAL LETTER UKU + ["Ⱇ"] = "F", -- GLAGOLITIC CAPITAL LETTER FRITU + ["Ⱈ"] = "X", -- GLAGOLITIC CAPITAL LETTER HERU + ["Ⱉ"] = "O", -- GLAGOLITIC CAPITAL LETTER OTU + ["Ⱊ"] = "?", -- GLAGOLITIC CAPITAL LETTER PE + ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA + ["Ⱌ"] = "C", -- GLAGOLITIC CAPITAL LETTER TSI + ["Ⱍ"] = "Č", -- GLAGOLITIC CAPITAL LETTER CHRIVI + ["Ⱎ"] = "Š", -- GLAGOLITIC CAPITAL LETTER SHA + ["Ⱏ"] = "Ъ", -- GLAGOLITIC CAPITAL LETTER YERU + ["Ⱐ"] = "Ь", -- GLAGOLITIC CAPITAL LETTER YERI + ["Ⱑ"] = "Ě", -- GLAGOLITIC CAPITAL LETTER YATI + ["Ⱒ"] = "X", -- GLAGOLITIC CAPITAL LETTER SPIDERY HA + ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU + ["Ⱔ"] = "Ę", -- GLAGOLITIC CAPITAL LETTER SMALL YUS + ["Ⱕ"] = "Y̨", -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO + ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + ["Ⱘ"] = "Ǫ", -- GLAGOLITIC CAPITAL LETTER BIG YUS + ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA + ["Ⱛ"] = "Ü", -- GLAGOLITIC CAPITAL LETTER IZHITSA + ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC + ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A + ["Ⱞ"] = "M", -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT +} + +translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + + diff --git a/tex/context/third/transliterator/trans_tables_gr.lua b/tex/context/third/transliterator/trans_tables_gr.lua new file mode 100644 index 0000000..7ce6855 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_gr.lua @@ -0,0 +1,693 @@ + +--===========================================================================-- +-- Greek -- +--===========================================================================-- + + +-- Note that the Greek transliteration mapping isn't bijective so transliterated +-- texts won't be reversible. (Shouldn't be impossible to make one up using +-- diacritics on latin characters to represent all possible combinations of +-- Greek breathings + accents.) + +-- Good reading on composed / precombined unicode: +-- http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed + +------------------------------------------------- +-- Lowercase Greek Initial Position Diphthongs -- +------------------------------------------------- + +translit.gr_di_in_low = { + [" αὑ"] = " hau", + [" αὕ"] = " hau", + [" αὓ"] = " hau", + [" αὗ"] = " hau", + [" εὑ"] = " heu", + [" εὕ"] = " heu", + [" εὓ"] = " heu", + [" εὗ"] = " heu", + [" ηὑ"] = " hēu", + [" ηὕ"] = " hēu", + [" ηὓ"] = " hēu", + [" ηὗ"] = " hēu", + [" οὑ"] = " hu", + [" οὕ"] = " hu", + [" οὓ"] = " hu", + [" οὗ"] = " hu", + [" ωὑ"] = " hōu", + [" ωὕ"] = " hōu", + [" ωὓ"] = " hōu", + [" ωὗ"] = " hōu" +} + +translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low + +------------------------------------------------- +-- Uppercase Greek Initial Position Diphthongs -- +------------------------------------------------- + +translit.gr_di_in_upp = { + [" Αὑ"] = " Hau", + [" Αὕ"] = " Hau", + [" Αὓ"] = " Hau", + [" Αὗ"] = " Hau", + [" Εὑ"] = " Heu", + [" Εὕ"] = " Heu", + [" Εὓ"] = " Heu", + [" Εὗ"] = " Heu", + [" Ηὑ"] = " Hēu", + [" Ηὕ"] = " Hēu", + [" Ηὓ"] = " Hēu", + [" Ηὗ"] = " Hēu", + [" Οὑ"] = " Hu", + [" Οὕ"] = " Hu", + [" Οὓ"] = " Hu", + [" Οὗ"] = " Hu", + [" Ωὑ"] = " Hōu", + [" Ωὕ"] = " Hōu", + [" Ωὓ"] = " Hōu", + [" Ωὗ"] = " Hōu" +} + +translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp + +--------------------------------------- +-- Lowercase Greek Initial Position -- +--------------------------------------- + +translit.gr_in_low = { + [" ἁ"] = " ha", + [" ἅ"] = " ha", + [" ἃ"] = " ha", + [" ἇ"] = " ha", + [" ᾁ"] = " ha", + [" ᾅ"] = " ha", + [" ᾃ"] = " ha", + [" ᾇ"] = " ha", + [" ἑ"] = " he", + [" ἕ"] = " he", + [" ἓ"] = " he", + [" ἡ"] = " hē", + [" ἥ"] = " hē", + [" ἣ"] = " hē", + [" ἧ"] = " hē", + [" ᾑ"] = " hē", + [" ᾕ"] = " hē", + [" ᾓ"] = " hē", + [" ᾗ"] = " hē", + [" ἱ"] = " hi", + [" ἵ"] = " hi", + [" ἳ"] = " hi", + [" ἷ"] = " hi", + [" ὁ"] = " ho", + [" ὅ"] = " ho", + [" ὃ"] = " ho", + [" ὑ"] = " hy", + [" ὕ"] = " hy", + [" ὓ"] = " hy", + [" ὗ"] = " hy", + [" ὡ"] = " hō", + [" ὥ"] = " hō", + [" ὣ"] = " hō", + [" ὧ"] = " hō", + [" ᾡ"] = " hō", + [" ᾥ"] = " hō", + [" ᾣ"] = " hō", + [" ᾧ"] = " hō", +} + +translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low + +--------------------------------------- +-- Uppercase Greek Initial Position -- +--------------------------------------- + +translit.gr_in_upp = { + [" Ἁ"] = " Ha", + [" Ἅ"] = " Ha", + [" Ἃ"] = " Ha", + [" Ἇ"] = " Ha", + [" ᾉ"] = " Ha", + [" ᾍ"] = " Ha", + [" ᾋ"] = " Ha", + [" ᾏ"] = " Ha", + [" Ἑ"] = " He", + [" Ἕ"] = " He", + [" Ἓ"] = " He", + [" Ἡ"] = " Hē", + [" Ἥ"] = " Hē", + [" Ἣ"] = " Hē", + [" Ἧ"] = " Hē", + [" ᾙ"] = " Hē", + [" ᾝ"] = " Hē", + [" ᾛ"] = " Hē", + [" ᾟ"] = " Hē", + [" Ἱ"] = " Hi", + [" Ἵ"] = " Hi", + [" Ἳ"] = " Hi", + [" Ἷ"] = " Hi", + [" Ὁ"] = " Ho", + [" Ὅ"] = " Ho", + [" Ὃ"] = " Ho", + [" Ὑ"] = " Hy", + [" Ὕ"] = " Hy", + [" Ὓ"] = " Hy", + [" Ὗ"] = " Hy", + [" Ὡ"] = " Hō", + [" Ὥ"] = " Hō", + [" Ὣ"] = " Hō", + [" Ὧ"] = " Hō", + [" ᾩ"] = " Hō", + [" ᾭ"] = " Hō", + [" ᾫ"] = " Hō", + [" ᾯ"] = " Hō", +} + +translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp + +--------------------------------- +-- Lowercase Greek Diphthongs -- +--------------------------------- + +translit.gr_di_low = { + ["αυ"] = "au", + ["αύ"] = "au", + ["αὺ"] = "au", + ["αῦ"] = "au", + ["αὐ"] = "au", + ["αὔ"] = "au", + ["αὒ"] = "au", + ["αὖ"] = "au", + ["αὑ"] = "au", + ["αὕ"] = "au", + ["αὓ"] = "au", + ["αὗ"] = "au", + ["ευ"] = "eu", + ["εύ"] = "eu", + ["εὺ"] = "eu", + ["εῦ"] = "eu", + ["εὐ"] = "eu", + ["εὔ"] = "eu", + ["εὒ"] = "eu", + ["εὖ"] = "eu", + ["εὑ"] = "eu", + ["εὕ"] = "eu", + ["εὓ"] = "eu", + ["εὗ"] = "eu", + ["ηυ"] = "ēu", + ["ηύ"] = "ēu", + ["ηὺ"] = "ēu", + ["ηῦ"] = "ēu", + ["ηὐ"] = "ēu", + ["ηὔ"] = "ēu", + ["ηὒ"] = "ēu", + ["ηὖ"] = "ēu", + ["ηὑ"] = "ēu", + ["ηὕ"] = "ēu", + ["ηὓ"] = "ēu", + ["ηὗ"] = "ēu", + ["ου"] = "u", + ["ου"] = "u", + ["ου"] = "u", + ["ού"] = "u", + ["οὺ"] = "u", + ["οῦ"] = "u", + ["οὐ"] = "u", + ["οὔ"] = "u", + ["οὒ"] = "u", + ["οὖ"] = "u", + ["οὑ"] = "u", + ["οὕ"] = "u", + ["οὓ"] = "u", + ["οὗ"] = "u", + ["ωυ"] = "ōu", + ["ωύ"] = "ōu", + ["ωὺ"] = "ōu", + ["ωῦ"] = "ōu", + ["ωὐ"] = "ōu", + ["ωὔ"] = "ōu", + ["ωὒ"] = "ōu", + ["ωὖ"] = "ōu", + ["ωὑ"] = "ōu", + ["ωὕ"] = "ōu", + ["ωὓ"] = "ōu", + ["ωὗ"] = "ōu", + ["ῤῥ"] = "rrh", +} + +translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low + +--------------------------------- +-- Uppercase Greek Diphthongs -- +--------------------------------- + +translit.gr_di_upp = { + ["Αυ"] = "Au", + ["Αύ"] = "Au", + ["Αὺ"] = "Au", + ["Αῦ"] = "Au", + ["Αὐ"] = "Au", + ["Αὔ"] = "Au", + ["Αὒ"] = "Au", + ["Αὖ"] = "Au", + ["Αὑ"] = "Au", + ["Αὕ"] = "Au", + ["Αὓ"] = "Au", + ["Αὗ"] = "Au", + ["Ευ"] = "Eu", + ["Εύ"] = "Eu", + ["Εὺ"] = "Eu", + ["Εῦ"] = "Eu", + ["Εὐ"] = "Eu", + ["Εὔ"] = "Eu", + ["Εὒ"] = "Eu", + ["Εὖ"] = "Eu", + ["Εὑ"] = "Eu", + ["Εὕ"] = "Eu", + ["Εὓ"] = "Eu", + ["Εὗ"] = "Eu", + ["Ηυ"] = "Ēu", + ["Ηύ"] = "Ēu", + ["Ηὺ"] = "Ēu", + ["Ηῦ"] = "Ēu", + ["Ηὐ"] = "Ēu", + ["Ηὔ"] = "Ēu", + ["Ηὒ"] = "Ēu", + ["Ηὖ"] = "Ēu", + ["Ηὑ"] = "Ēu", + ["Ηὕ"] = "Ēu", + ["Ηὓ"] = "Ēu", + ["Ηὗ"] = "Ēu", + ["Ου"] = "U", + ["Ου"] = "U", + ["Ου"] = "U", + ["Ού"] = "U", + ["Οὺ"] = "U", + ["Οῦ"] = "U", + ["Οὐ"] = "U", + ["Οὔ"] = "U", + ["Οὒ"] = "U", + ["Οὖ"] = "U", + ["Οὑ"] = "U", + ["Οὕ"] = "U", + ["Οὓ"] = "U", + ["Οὗ"] = "U", + ["Ωυ"] = "Ōu", + ["Ωύ"] = "Ōu", + ["Ωὺ"] = "Ōu", + ["Ωῦ"] = "Ōu", + ["Ωὐ"] = "Ōu", + ["Ωὔ"] = "Ōu", + ["Ωὒ"] = "Ōu", + ["Ωὖ"] = "Ōu", + ["Ωὑ"] = "Ōu", + ["Ωὕ"] = "Ōu", + ["Ωὓ"] = "Ōu", + ["Ωὗ"] = "Ōu", +} + +translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp + +-- The following will be used in an option that ensures transcription of +-- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) +translit.gr_nrule = { + ["γγ"] = "ng", + ["γκ"] = "nk", + ["γξ"] = "nx", + ["γχ"] = "nch", +} + +translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule + + +-------------------------------------- +-- Lowercase Greek Transliteration -- +-------------------------------------- + +translit.gr_low = { + ["α"] = "a", + ["ά"] = "a", + ["ὰ"] = "a", + ["ᾶ"] = "a", + ["ᾳ"] = "a", + ["ἀ"] = "a", + ["ἁ"] = "a", + ["ἄ"] = "a", + ["ἂ"] = "a", + ["ἆ"] = "a", + ["ἁ"] = "a", + ["ἅ"] = "a", + ["ἃ"] = "a", + ["ἇ"] = "a", + ["ᾁ"] = "a", + ["ᾴ"] = "a", + ["ᾲ"] = "a", + ["ᾷ"] = "a", + ["ᾄ"] = "a", + ["ᾂ"] = "a", + ["ᾅ"] = "a", + ["ᾃ"] = "a", + ["ᾆ"] = "a", + ["ᾇ"] = "a", + ["β"] = "b", + ["γ"] = "g", + ["δ"] = "d", + ["ε"] = "e", + ["έ"] = "e", + ["ὲ"] = "e", + ["ἐ"] = "e", + ["ἔ"] = "e", + ["ἒ"] = "e", + ["ἑ"] = "e", + ["ἕ"] = "e", + ["ἓ"] = "e", + ["ζ"] = "z", + ["η"] = "ē", + ["η"] = "ē", + ["ή"] = "ē", + ["ὴ"] = "ē", + ["ῆ"] = "ē", + ["ῃ"] = "ē", + ["ἠ"] = "ē", + ["ἤ"] = "ē", + ["ἢ"] = "ē", + ["ἦ"] = "ē", + ["ᾐ"] = "ē", + ["ἡ"] = "ē", + ["ἥ"] = "ē", + ["ἣ"] = "ē", + ["ἧ"] = "ē", + ["ᾑ"] = "ē", + ["ῄ"] = "ē", + ["ῂ"] = "ē", + ["ῇ"] = "ē", + ["ᾔ"] = "ē", + ["ᾒ"] = "ē", + ["ᾕ"] = "ē", + ["ᾓ"] = "ē", + ["ᾖ"] = "ē", + ["ᾗ"] = "ē", + ["θ"] = "th", + ["ι"] = "i", + ["ί"] = "i", + ["ὶ"] = "i", + ["ῖ"] = "i", + ["ἰ"] = "i", + ["ἴ"] = "i", + ["ἲ"] = "i", + ["ἶ"] = "i", + ["ἱ"] = "i", + ["ἵ"] = "i", + ["ἳ"] = "i", + ["ἷ"] = "i", + ["ϊ"] = "i", + ["ΐ"] = "i", + ["ῒ"] = "i", + ["ῗ"] = "i", + ["κ"] = "k", + ["λ"] = "l", + ["μ"] = "m", + ["ν"] = "n", + ["ξ"] = "x", + ["ο"] = "o", + ["ό"] = "o", + ["ὸ"] = "o", + ["ὀ"] = "o", + ["ὄ"] = "o", + ["ὂ"] = "o", + ["ὁ"] = "o", + ["ὅ"] = "o", + ["ὃ"] = "o", + ["π"] = "p", + ["ρ"] = "r", + ["ῤ"] = "r", + ["ῥ"] = "rh", + ["σ"] = "s", + ["ς"] = "s", + ["τ"] = "t", + ["υ"] = "y", + ["ύ"] = "y", + ["ὺ"] = "y", + ["ῦ"] = "y", + ["ὐ"] = "y", + ["ὔ"] = "y", + ["ὒ"] = "y", + ["ὖ"] = "y", + ["ὑ"] = "y", + ["ὕ"] = "y", + ["ὓ"] = "y", + ["ὗ"] = "y", + ["ϋ"] = "y", + ["ΰ"] = "y", + ["ῢ"] = "y", + ["ῧ"] = "y", + ["φ"] = "ph", + ["χ"] = "ch", + ["ψ"] = "ps", + ["ω"] = "ō", + ["ώ"] = "ō", + ["ὼ"] = "ō", + ["ῶ"] = "ō", + ["ῳ"] = "ō", + ["ὠ"] = "ō", + ["ὤ"] = "ō", + ["ὢ"] = "ō", + ["ὦ"] = "ō", + ["ᾠ"] = "ō", + ["ὡ"] = "ō", + ["ὥ"] = "ō", + ["ὣ"] = "ō", + ["ὧ"] = "ō", + ["ᾡ"] = "ō", + ["ῴ"] = "ō", + ["ῲ"] = "ō", + ["ῷ"] = "ō", + ["ᾤ"] = "ō", + ["ᾢ"] = "ō", + ["ᾥ"] = "ō", + ["ᾣ"] = "ō", + ["ᾦ"] = "ō", + ["ᾧ"] = "ō", +} + +translit.tables["Greek transliteration lowercase"] = translit.gr_low + +-------------------------------------- +-- Uppercase Greek Transliteration -- +-------------------------------------- + +translit.gr_upp = { + ["Α"] = "A", + ["Ά"] = "A", + ["Ὰ"] = "A", +--["ᾶ"] = "A", + ["ᾼ"] = "A", + ["Ἀ"] = "A", + ["Ἁ"] = "A", + ["Ἄ"] = "A", + ["Ἂ"] = "A", + ["Ἆ"] = "A", + ["Ἁ"] = "A", + ["Ἅ"] = "A", + ["Ἃ"] = "A", + ["Ἇ"] = "A", + ["ᾉ"] = "A", +--["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me +--["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript +--["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. + ["ᾌ"] = "A", + ["ᾊ"] = "A", + ["ᾍ"] = "A", + ["ᾋ"] = "A", + ["ᾎ"] = "A", + ["ᾏ"] = "A", + ["Β"] = "B", + ["Γ"] = "G", + ["Δ"] = "D", + ["Ε"] = "E", + ["Έ"] = "E", + ["Ὲ"] = "E", + ["Ἐ"] = "E", + ["Ἔ"] = "E", + ["Ἒ"] = "E", + ["Ἑ"] = "E", + ["Ἕ"] = "E", + ["Ἓ"] = "E", + ["Ζ"] = "Z", + ["Η"] = "Ē", + ["Η"] = "Ē", + ["Ή"] = "Ē", + ["Ὴ"] = "Ē", +--["ῆ"] = "Ē", + ["ῌ"] = "Ē", + ["Ἠ"] = "Ē", + ["Ἤ"] = "Ē", + ["Ἢ"] = "Ē", + ["Ἦ"] = "Ē", + ["ᾘ"] = "Ē", + ["Ἡ"] = "Ē", + ["Ἥ"] = "Ē", + ["Ἣ"] = "Ē", + ["Ἧ"] = "Ē", + ["ᾙ"] = "Ē", +--["ῄ"] = "Ē", +--["ῂ"] = "Ē", +--["ῇ"] = "Ē", + ["ᾜ"] = "Ē", + ["ᾚ"] = "Ē", + ["ᾝ"] = "Ē", + ["ᾛ"] = "Ē", + ["ᾞ"] = "Ē", + ["ᾟ"] = "Ē", + ["Θ"] = "Th", + ["Ι"] = "I", + ["Ί"] = "I", + ["Ὶ"] = "I", +--["ῖ"] = "I", + ["Ἰ"] = "I", + ["Ἴ"] = "I", + ["Ἲ"] = "I", + ["Ἶ"] = "I", + ["Ἱ"] = "I", + ["Ἵ"] = "I", + ["Ἳ"] = "I", + ["Ἷ"] = "I", + ["Ϊ"] = "I", +--["ΐ"] = "I", +--["ῒ"] = "I", +--["ῗ"] = "I", + ["Κ"] = "K", + ["Λ"] = "L", + ["Μ"] = "M", + ["Ν"] = "N", + ["Ξ"] = "X", + ["Ο"] = "O", + ["Ό"] = "O", + ["Ὸ"] = "O", + ["Ὀ"] = "O", + ["Ὄ"] = "O", + ["Ὂ"] = "O", + ["Ὁ"] = "O", + ["Ὅ"] = "O", + ["Ὃ"] = "O", + ["Π"] = "P", + ["Ρ"] = "R", +--["ῤ"] = "R", + ["Ῥ"] = "Rh", + ["Σ"] = "S", + ["Σ"] = "S", + ["Τ"] = "T", + ["Υ"] = "Y", + ["Ύ"] = "Y", + ["Ὺ"] = "Y", +--["ῦ"] = "Y", +--["ὐ"] = "Y", +--["ὔ"] = "Y", +--["ὒ"] = "Y", +--["ὖ"] = "Y", + ["Ὑ"] = "Y", + ["Ὕ"] = "Y", + ["Ὓ"] = "Y", + ["Ὗ"] = "Y", + ["Ϋ"] = "Y", +--["ΰ"] = "Y", +--["ῢ"] = "Y", +--["ῧ"] = "Y", + ["Φ"] = "Ph", + ["Χ"] = "Ch", + ["Ψ"] = "Ps", + ["Ω"] = "Ō", + ["Ώ"] = "Ō", + ["Ὼ"] = "Ō", +--["ῶ"] = "Ō", + ["ῼ"] = "Ō", + ["Ὠ"] = "Ō", + ["Ὤ"] = "Ō", + ["Ὢ"] = "Ō", + ["Ὦ"] = "Ō", + ["ᾨ"] = "Ō", + ["Ὡ"] = "Ō", + ["Ὥ"] = "Ō", + ["Ὣ"] = "Ō", + ["Ὧ"] = "Ō", + ["ᾩ"] = "Ō", +--["ῴ"] = "Ō", +--["ῲ"] = "Ō", +--["ῷ"] = "Ō", + ["ᾬ"] = "Ō", + ["ᾪ"] = "Ō", + ["ᾭ"] = "Ō", + ["ᾫ"] = "Ō", + ["ᾮ"] = "Ō", + ["ᾯ"] = "Ō", +} + +translit.tables["Greek transliteration uppercase"] = translit.gr_upp + +------------ +-- Varia -- +------------ + +translit.gr_other = { + ["ϝ"] = "w", + ["Ϝ"] = "W", + ["ϙ"] = "q", + ["Ϙ"] = "Q", + ["ϡ"] = "ss", + ["Ϡ"] = "Ss", +} + +translit.tables["Greek transliteration archaic characters"] = translit.gr_other + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + +function translit.dogreek (mode, text) + local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs + + + -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html + local utfchar = R("\000\127") + + R("\194\223") * R("\128\191") + + R("\224\240") * R("\128\191") * R("\128\191") + + R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + + -- Add keys of a dictionary to a ruleset. + function addrules (dict, rules) + for i, _ in pairs(dict) do + if rules == nil then rules = P(i) + else rules = rules + P(i) + end + end + return rules + end + + if mode == "gr" or mode == "gr_n" then + + local gr_di_in, gr_in, gr_di, gr = {}, {}, {}, {} + gr_di_in = translit.add_table( gr_di_in, translit.gr_di_in_low, translit.gr_di_in_upp ) + gr_in = translit.add_table( gr_in, translit.gr_in_low, translit.gr_in_upp ) + gr_di = translit.add_table( gr_di, translit.gr_di_low, translit.gr_di_upp ) + gr = translit.add_table( gr, translit.gr_low, translit.gr_upp , translit.gr_other) + + if mode == "gr_n" then gr_di = translit.add_table( gr_di, translit.gr_nrule ) end + + local p_di_in, p_in, p_di, p + + p_di_in = addrules( gr_di_in, p_di_in ) + p_in = addrules( gr_in, p_in ) + p_di = addrules( gr_di, p_di ) + p = addrules( gr, p ) + + local init_diph = Cs(p_di_in / gr_di_in ) + local init = Cs(p_in / gr_in ) + local diph = Cs(p_di / gr_di ) + local other = Cs(p / gr ) + + local g = Cs((init_diph + init + diph + other + utfchar)^0) + + text = g:match(text) + return text + end +end + diff --git a/tex/context/third/transliterator/trans_tables_iso9.lua b/tex/context/third/transliterator/trans_tables_iso9.lua new file mode 100644 index 0000000..f85ed35 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_iso9.lua @@ -0,0 +1,288 @@ + +--===========================================================================-- +-- ISO 9.1995(E) standardized transliteration for cyrillic -- +--===========================================================================-- + +----------------------------------------- +-- Lowercase russian cyrillic alphabet -- +----------------------------------------- +translit.ru_low = { + ["а"] = "a", -- U+0430 -> U+0061 + ["б"] = "b", -- U+0431 -> U+0062 + ["в"] = "v", -- U+0432 -> U+0076 + ["г"] = "g", -- U+0433 -> U+0067 + ["д"] = "d", -- U+0434 -> U+0064 + ["е"] = "e", -- U+0435 -> U+0065 + ["ё"] = "ë", -- U+0451 -> U+00eb + ["ж"] = "ž", -- U+0436 -> U+017e + ["з"] = "z", -- U+0437 -> U+007a + ["и"] = "i", -- U+0438 -> U+0069 + ["й"] = "j", -- U+0439 -> U+006a + ["к"] = "k", -- U+043a -> U+006b + ["л"] = "l", -- U+043b -> U+006c + ["м"] = "m", -- U+043c -> U+006d + ["н"] = "n", -- U+043d -> U+006e + ["о"] = "o", -- U+043e -> U+006f + ["п"] = "p", -- U+043f -> U+0070 + ["р"] = "r", -- U+0440 -> U+0072 + ["с"] = "s", -- U+0441 -> U+0073 + ["т"] = "t", -- U+0442 -> U+0074 + ["у"] = "u", -- U+0443 -> U+0075 + ["ф"] = "f", -- U+0444 -> U+0066 + ["х"] = "h", -- U+0445 -> U+0068 + ["ц"] = "c", -- U+0446 -> U+0063 + ["ч"] = "č", -- U+0447 -> U+010d + ["ш"] = "š", -- U+0448 -> U+0161 + ["щ"] = "ŝ", -- U+0449 -> U+015d + ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is + ["ы"] = "y", -- U+044b -> U+0079 used for uppercase, too. + ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. + ["э"] = "è", -- U+044d -> U+00e8 + ["ю"] = "û", -- U+044e -> U+00fb + ["я"] = "â" -- U+044f -> U+00e2 +} + +translit.tables["russian lowercase ISO~9"] = translit.ru_low + +----------------------------------------- +-- Uppercase russian cyrillic alphabet -- +----------------------------------------- + +translit.ru_upp = { + ["А"] = "A", -- U+0410 -> U+0041 + ["Б"] = "B", -- U+0411 -> U+0042 + ["В"] = "V", -- U+0412 -> U+0056 + ["Г"] = "G", -- U+0413 -> U+0047 + ["Д"] = "D", -- U+0414 -> U+0044 + ["Е"] = "E", -- U+0415 -> U+0045 + ["Ё"] = "Ë", -- U+0401 -> U+00cb + ["Ж"] = "Ž", -- U+0416 -> U+017d + ["З"] = "Z", -- U+0417 -> U+005a + ["И"] = "I", -- U+0418 -> U+0049 + ["Й"] = "J", -- U+0419 -> U+004a + ["К"] = "K", -- U+041a -> U+004b + ["Л"] = "L", -- U+041b -> U+004c + ["М"] = "M", -- U+041c -> U+004d + ["Н"] = "N", -- U+041d -> U+004e + ["О"] = "O", -- U+041e -> U+004f + ["П"] = "P", -- U+041f -> U+0050 + ["Р"] = "R", -- U+0420 -> U+0052 + ["С"] = "S", -- U+0421 -> U+0053 + ["Т"] = "T", -- U+0422 -> U+0054 + ["У"] = "U", -- U+0423 -> U+0055 + ["Ф"] = "F", -- U+0424 -> U+0046 + ["Х"] = "H", -- U+0425 -> U+0048 + ["Ц"] = "C", -- U+0426 -> U+0043 + ["Ч"] = "Č", -- U+0427 -> U+010c + ["Ш"] = "Š", -- U+0428 -> U+0160 + ["Щ"] = "Ŝ", -- U+0429 -> U+015c + ["Ъ"] = "ʺ", -- U+042a -> U+02ba + ["Ы"] = "Y", -- U+042b -> U+0059 + ["Ь"] = "ʹ", -- U+042c -> U+02b9 + ["Э"] = "È", -- U+042d -> U+00c8 + ["Ю"] = "Û", -- U+042e -> U+00db + ["Я"] = "Â" -- U+042f -> U+00c2 +} + +translit.tables["russian uppercase ISO~9"] = translit.ru_upp + +---------------------------------------------------------- +-- Lowercase pre-1918 russian cyrillic additional chars -- +---------------------------------------------------------- +-- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 + +translit.ru_old_low = { + ["ѣ"] = "ě", -- U+048d -> U+011b -- 2-byte + ["і"] = "ì", -- U+0456 -> U+00ec -- 2-byte + ["ѳ"] = "f", -- U+0473 -> U+0066 -- 2-byte + ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 -- 3-byte +} + +translit.tables["russian pre-1918 lowercase ISO~9 2 byte"] = translit.ru_old_low + +translit.ru_old_upp = { + ["Ѣ"] = "Ě", -- U+048c -> U+011a -- 2-byte + ["І"] = "Ì", -- U+0406 -> U+00cc -- 2-byte + ["Ѳ"] = "F", -- U+0424 -> U+0046 -- 2-byte + ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 -- 3-byte +} + +translit.tables["russian pre-1918 uppercase ISO~9 2 byte"] = translit.ru_old_upp + +--------------------------------------------------------- +-- Lowercase characters from other cyrillic alphabets -- +--------------------------------------------------------- + +translit.non_ru_low = { + ["ӑ"] = "ă", -- U+04d1 -> U+0103 + ["ӓ"] = "ä", -- U+04d3 -> U+00e4 + ["ә"] = "a̋", -- u+04d9 -> U+0061+030b + ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 + ["ҕ"] = "ğ", -- U+0495 -> U+011f + ["ғ"] = "ġ", -- U+0493 -> U+0121 + ["ђ"] = "đ", -- U+0452 -> U+0111 + ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 + ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 + ["є"] = "ê", -- U+0454 -> U+00ea + ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 + ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 + ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 + ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 + ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 + ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 + ["ѕ"] = "ẑ", -- U+0455 -> U+1e91 -- Mapped to dz in old cyrillic non-ISO. + ["ӡ"] = "ź", -- U+04e1 -> U+017a + ["ӥ"] = "î", -- U+04e5 -> U+00ee + ["і"] = "ì", -- U+0456 -> U+00ec + ["ї"] = "ï", -- U+0457 -> U+00ef + ["ј"] = "ǰ", -- U+0458 -> U+01f0 + ["қ"] = "ķ", -- U+049b -> U+0137 + ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 + ["љ"] = "l̂", -- U+0459 -> U+006c+0302 + ["њ"] = "n̂", -- U+045a -> U+006e+0302 + ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 + ["ң"] = "ṇ", -- U+04a3 -> U+1e47 + ["ӧ"] = "ö", -- U+04e7 -> U+00f6 + ["ө"] = "ô", -- U+04e9 -> U+00f4 + ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 + ["ҫ"] = "ç", -- U+04ab -> U+00e7 + ["ҭ"] = "ţ", -- U+04ad -> U+0163 + ["ћ"] = "ć", -- U+045b -> U+0107 + ["ќ"] = "ḱ", -- U+045c -> U+1e31 + ["у́"] = "ú", -- U+0443+ -> U+00fA + ["ў"] = "ŭ", -- U+045e -> U+016d + ["ӱ"] = "ü", -- U+04f1 -> U+00fc + ["ӳ"] = "ű", -- U+04f3 -> U+0171 + ["ү"] = "ù", -- U+04af -> U+00f9 + ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 + ["һ"] = "ḥ", -- U+04bb -> U+1e25 + ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 + ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 + ["ҷ"] = "ç", -- U+04cc -> U+00e7 + ["џ"] = "d̂", -- U+045f -> U+0064+0302 + ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff + ["ѣ"] = "ě", -- U+048d -> U+011b + ["ѫ"] = "ǎ", -- U+046b -> U+01ce -- Mapped to ǫ in non-ISO old cyrillic. + ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. + ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 + ["ҩ"] = "ò", -- U+04a9 -> U+00f2 + ["Ӏ"] = "‡" -- U+04cf -> U+2021 +} + +translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low + +--------------------------------------------------------- +-- Uppercase characters from other cyrillic alphabets -- +--------------------------------------------------------- + +translit.non_ru_upp = { + ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 + ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 + ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b + ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 + ["Ҕ"] = "Ğ", -- U+0494 -> U+011e + ["Ғ"] = "Ġ", -- U+0492 -> U+0120 + ["Ђ"] = "Đ", -- U+0402 -> U+0110 + ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 + ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 + ["Є"] = "Ê", -- U+0404 -> U+00ca + ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 + ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 + ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 + ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 + ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 + ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 + ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 + ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 + ["Ӥ"] = "Î", -- U+04e4 -> U+00ce + ["І"] = "Ì", -- U+0406 -> U+00cc + ["Ї"] = "Ï", -- U+0407 -> U+00cf + ["Ј"] = "J̌", -- U+0408 -> U+004a+030c + ["Қ"] = "Ķ", -- U+049a -> U+0136 + ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 + ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 + ["Њ"] = "N̂", -- U+040a -> U+004e+0302 + ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 + ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 + ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 + ["Ө"] = "Ô", -- U+04e8 -> U+00d4 + ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 + ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 + ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 + ["Ћ"] = "Ć", -- U+040b -> U+0106 + ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 + ["У́"] = "Ú", -- U+0423 -> U+00da + ["Ў"] = "Ŭ", -- U+040e -> U+016c + ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc + ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 + ["Ү"] = "Ù", -- U+04ae -> U+00d9 + ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 + ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 + ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 + ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 + ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 + ["Џ"] = "D̂", -- U+040f -> U+0044+0302 + ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 + ["Ѣ"] = "Ě", -- U+048c -> U+011a + ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd + ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 + ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 + ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 + ["’"] = "‵", -- U+2035 -> U+2019 + ["Ӏ"] = "‡" -- U+04c0 -> U+2021 +} + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + +translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp + +function translit.iso9 (mode, text) + local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs + local loc = lpeg.locale () + + -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html + local utfchar = R("\000\127") + + R("\194\223") * R("\128\191") + + R("\224\240") * R("\128\191") * R("\128\191") + + R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + + -- Add keys of a dictionary to a ruleset. + function addrules (dict, rules) + for i, _ in pairs(dict) do + if rules == nil then rules = P(i) + else rules = rules + P(i) + end + end + return rules + end + + local iso9 = {} + iso9 = translit.add_table( iso9, + translit.ru_upp, + translit.ru_low + ) + if mode == "ru_old" or mode == "all" then + iso9 = translit.add_table(iso9, + translit.ru_old_upp, + translit.ru_old_low + ) + if mode == "all" then + iso9 = translit.add_table(iso9, + translit.non_ru_upp, + translit.non_ru_low + ) + end + end + + local p_iso9 + p_iso9 = addrules (iso9, p_iso9) + + local p_cyr = Cs(p_iso9) / iso9 + + local iso9_parser = Cs((p_cyr + utfchar)^0) + text = iso9_parser:match(text) + + return text +end diff --git a/tex/context/third/transliterator/trans_tables_scntfc.lua b/tex/context/third/transliterator/trans_tables_scntfc.lua new file mode 100644 index 0000000..bf9a510 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_scntfc.lua @@ -0,0 +1,256 @@ + +--===========================================================================-- +-- Other transliterations -- +--===========================================================================-- + + +-- The following are needed because ISO 9 does not cover old Slavonic +-- characters that became obsolete before the advent of гражданский шрифт. + +-- Please note that these mappings are not bijective so don't expect the result +-- to be easily revertible (by machines). + +-- Source p. 77 of +-- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf + +----------------------------------------------------------------------- +-- Lowercase and uppercase letter Uk -- “scientific transliteration” -- +----------------------------------------------------------------------- + +translit.ocs_uk = { + ["oу"] = "u", + ["оу"] = "u", + ["Оу"] = "U", +} +----------------------------------------------------------------------------- +-- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- +----------------------------------------------------------------------------- + +translit.ocs_low = { + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["є"] = "e", + ["ж"] = "ž", + ["ꙃ"] = "ʒ", -- U+0292, alternative: dz U+01f3 + ["ѕ"] = "ʒ", + ["ꙁ"] = "z", + ["з"] = "z", + ["и"] = "i", + ["і"] = "i", + ["ї"] = "i", + ["ћ"] = "g’", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ѹ"] = "u", + ["ꙋ"] = "u", + ["ф"] = "f", + ["х"] = "x", + ["ѡ"] = "o", --"ō", + ["ѿ"] = "ot", -- U+047f + ["ѽ"] = "o!", -- U+047d + ["ꙍ"] = "o!", -- U+064D + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "št", + ["ъ"] = "ъ", + ["ы"] = "y", + ["ꙑ"] = "y", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. + ["ь"] = "ь", + ["ѣ"] = "ě", + ["ю"] = "ju", + ["ꙗ"] = "ja", + ["ѥ"] = "je", + ["ѧ"] = "ę", + ["ѩ"] = "ję", + ["ѫ"] = "ǫ", + ["ѭ"] = "jǫ", + ["ѯ"] = "ks", + ["ѱ"] = "ps", + ["ѳ"] = "th", + ["ѵ"] = "ü", +} + +translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low + +----------------------------------------------------------------------------- +-- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- +----------------------------------------------------------------------------- + +translit.ocs_upp = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Є"] = "E", + ["Ж"] = "Ž", + ["Ꙃ"] = "Ʒ", -- U+01b7, alternative: Dz U+01f2 + ["Ѕ"] = "Ʒ", + ["Ꙁ"] = "Z", + ["З"] = "Z", + ["И"] = "I", + ["І"] = "I", + ["Ї"] = "I", + ["Ћ"] = "G’", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "u", + ["Ѹ"] = "U", + ["ꙋ"] = "U", + ["Ф"] = "F", + ["Х"] = "X", + ["Ѡ"] = "Ō", + ["Ѿ"] = "Ot", -- U+047c + ["Ѽ"] = "O!", -- U+047e + ["Ꙍ"] = "O!", -- U+064C + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Št", + ["Ъ"] = "Ŭ", + ["Ы"] = "Y", + ["Ꙑ"] = "Y", -- U+a650 + ["Ь"] = "Ĭ", + ["Ѣ"] = "Ě", + ["Ю"] = "Ju", + ["Ꙗ"] = "Ja", + ["Ѥ"] = "Je", + ["Ѧ"] = "Ę", + ["Ѩ"] = "Ję", + ["Ѫ"] = "Ǫ", + ["Ѭ"] = "Jǫ", + ["Ѯ"] = "Ks", + ["Ѱ"] = "Ps", + ["Ѳ"] = "Th", + ["Ѵ"] = "Ü", +} + +translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp + +-- Note on the additional tables: these cover characters that are not defined +-- in ISO 9 but have a “scientific” transliteration. You may use them as +-- complementary mapping to ISO 9, trading off homogenity for completeness. + +---------------------------------------------------------------------------------------- +-- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- +---------------------------------------------------------------------------------------- + +translit.ocs_add_low = { + ["ѕ"] = "dz", -- Mapped to ẑ in ISO 9 (Macedonian …) + ["ѯ"] = "ks", + ["ѱ"] = "ps", + ["ѡ"] = "ô", + ["ѿ"] = "ot", -- U+047f + ["ѫ"] = "ǫ", -- Mapped to ǎ in ISO 9. + ["ѧ"] = "ę", + ["ѭ"] = "jǫ", + ["ѩ"] = "ję", + ["ѥ"] = "je", + ["ѹ"] = "u", -- Digraph uk. + ["ꙋ"] = "u", -- Monograph uk, U+a64b. (No glyph yet in the "fixed" font in February 2010 …) + ["ꙑ"] = "y", -- U+a651 +} + +translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low + +---------------------------------------------------------------------------------------- +-- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- +---------------------------------------------------------------------------------------- + +translit.ocs_add_upp = { + ["Ѕ"] = "Dz", + ["Ѯ"] = "Ks", + ["Ѱ"] = "Ps", + ["Ѡ"] = "Ô", + ["Ѿ"] = "ot", + ["Ѫ"] = "Ǫ", + ["Ѧ"] = "Ę", + ["Ѭ"] = "Jǫ", + ["Ѩ"] = "Ję", + ["Ѥ"] = "Je", + ["Ѹ"] = "U", -- Digraph uk. + ["Ꙋ"] = "U", -- Monograph Uk, U+a64a. + ["Ꙑ"] = "Y", -- U+a650 +} + +translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + +function translit.scientific (mode, text) + local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs + local loc = lpeg.locale () + + -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html + local utfchar = R("\000\127") + + R("\194\223") * R("\128\191") + + R("\224\240") * R("\128\191") * R("\128\191") + + R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + + local cyr = {} + local cyruk, p_cyruk, p_cyr + local scientific_parser + if mode == ("iso9_ocs") then + + dofile "trans_tables_iso9.lua" + translit.add_table( cyr, + translit.ru_upp, + translit.ru_low, + translit.ru_old_upp, + translit.ru_old_low, + translit.non_ru_upp, + translit.non_ru_low, + translit.ocs_add_low, + translit.ocs_add_upp + ) + + p_cyr = Cs(utfchar) / cyr + scientific_parser = Cs((p_cyr + utfchar)^0) + + elseif mode == ("ocs") then + + for i,_ in pairs(translit.ocs_uk) do + if cyruk == nil then cyruk = P(i) -- is this The Right Way build patterns from a table? + else cyruk = cyruk + P(i) + end + end + translit.add_table( cyr, translit.ocs_low, translit.ocs_upp ) + + p_cyruk = Cs(P(cyruk)) / translit.ocs_uk + p_cyr = Cs(utfchar) / cyr + + scientific_parser = Cs((p_cyruk + p_cyr + utfchar)^0) + + elseif mode == ("ocs_gla") then + dofile "trans_tables_glag.lua" + translit.add_table( cyr, translit.ocs_gla_low, translit.ocs_gla_upp ) + p_cyr = Cs(utfchar) / cyr + scientific_parser = Cs((p_cyr + utfchar)^0) + end + + text = scientific_parser:match(text) + + return text +end + diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua new file mode 100644 index 0000000..b3061c0 --- /dev/null +++ b/tex/context/third/transliterator/trans_tables_trsc.lua @@ -0,0 +1,704 @@ +--===========================================================================-- +-- Legacy national transliterations -- +--===========================================================================-- +--------------------------------- +-- German simple transcription -- +--------------------------------- +-- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl., +-- Mannheim et. al. 1991. + +-------------------------------------------------------- +-- Lowercase German simple transcription---first pass -- +-------------------------------------------------------- + +translit.ru_trsc_low_first = { + [" е"] = " je", + ["ъе"] = "je", + ["ье"] = "je", + [" ё"] = " jo", + ["ъё"] = "jo", + ["ьё"] = "jo", + ["жё"] = "scho", + ["чё"] = "tscho", + ["шё"] = "scho", + ["щё"] = "schtscho", + ["ье"] = "je", + ["ьи"] = "ji", + ["ьо"] = "jo", + ["ий"] = "i", + ["ый"] = "y", + ["кс"] = "x" -- Extraordinarily stupid one. +} + +translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first + +-------------------------------------------------------- +-- Uppercase German simple transcription---first pass -- +-------------------------------------------------------- + +translit.ru_trsc_upp_first = { + [" Е"] = " Je", + ["Ъe"] = "Je", -- Pedantic, isn't it? + ["Ье"] = "Je", + [" Ё"] = "Jo", + ["Ъё"] = "Jo", + ["Ьё"] = "Jo", + ["Жё"] = "Scho", + ["Чё"] = "Tscho", + ["Шё"] = "Scho", + ["Щё"] = "Schtscho", + ["Кс"] = "ks" +} + +translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first + +------------------------------------------- +-- Lowercase German simple transcription -- +------------------------------------------- + +translit.ru_trsc_low = { + ["а"] = "a", + ["б"] = "b", + ["в"] = "w", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "jo", + ["ж"] = "sch", + ["з"] = "s", + ["и"] = "i", + ["й"] = "i", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "ch", + ["ц"] = "z", + ["ч"] = "tsch", + ["ш"] = "sch", + ["щ"] = "schtsch", + ["ъ"] = "", + ["ы"] = "y", + ["ь"] = "", + ["э"] = "e", + ["ю"] = "ju", + ["я"] = "ja" +} + +translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low + +------------------------------------------- +-- Uppercase German simple transcription -- +------------------------------------------- + +translit.ru_trsc_upp = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "W", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Jo", + ["Ж"] = "Sch", + ["З"] = "S", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Ch", + ["Ц"] = "Z", + ["Ч"] = "Tsch", + ["Ш"] = "Sch", + ["Щ"] = "Schtsch", + ["Ъ"] = "", + ["Ы"] = "Y", + ["Ь"] = "", + ["Э"] = "E", + ["Ю"] = "Ju", + ["Я"] = "Ja" +} + +translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp + +translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} + +function translit.gen_rules_de() + -- The following are more interesting than the previous tables because they + -- implement various rules. For instance the table + -- \type{translit.ru_trsc_irule} holds a substitution dictionary for all + -- possible combinations (including nonsense galore) of a vowel preceding an + -- “й” (Russian short i) preceding a consonant; here we access the sets of + -- Russian vowels as well consonants that were defined earlier. + + -- The й-rule, VйC -> ViC + translit.ru_trsc_irule = {} + for _, vow in ipairs(translit.ru_vowels) do + for _, cons in ipairs(translit.ru_consonants) do + local new_ante = vow .. "й" .. cons + local new_post = vow .. "i" .. cons + translit.ru_trsc_irule[new_ante] = new_post + end + end + + translit.tables["German transcription i-rule"] = translit.ru_trsc_irule + + -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC + translit.ru_trsc_jrule = {} + for _, vow in ipairs(translit.ru_vowels) do + local new_ante = "й" .. vow + local new_post = "j" .. vow + translit.ru_trsc_jrule[new_ante] = new_post + end + + for _, cons in ipairs(translit.ru_consonants) do + for _, iy in ipairs(translit.ru_trsc_iy) do + local new_ante = iy .. "й" .. cons + local new_post = iy .. "j" .. cons + translit.ru_trsc_jrule[new_ante] = new_post + end + end + + translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule + + -- The с-rule, VсV -> VssV + translit.ru_trsc_srule = {} + for i, vow_1 in ipairs(translit.ru_vowels) do + for j, vow_2 in ipairs(translit.ru_vowels) do + local new_ante = vow_1 .. "с" .. vow_2 + local new_post = vow_1 .. "ss" .. vow_2 + translit.ru_trsc_srule[new_ante] = new_post + end + end + + translit.tables["German transcription s-rule"] = translit.ru_trsc_srule + + -- The sharp-s-rule, Vсх -> Vßх + translit.ru_trsc_sharpsrule = {} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "сх" + local new_post = vow .. "ßх" + translit.ru_trsc_sharpsrule[new_ante] = new_post + end + + translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule + + -- The е-rule, Vе -> Vje + translit.ru_trsc_jerule = {} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "е" + local new_post = vow .. "je" + translit.ru_trsc_jerule[new_ante] = new_post + end + + translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule + + -- The ё-rule, Vё -> Vjo + -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . + -- Somebody should teach those DUDEN-guys parsimony. + translit.ru_trsc_jorule = {} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "ё" + local new_post = vow .. "jo" + translit.ru_trsc_jorule[new_ante] = new_post + end + + translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule + +end + +--------------------------------------------------------- +-- Lowercase English simple transcription---first pass -- +--------------------------------------------------------- + +translit.ru_trsc_en_low_first = { + [" е"] = " ye", + ["ъе"] = "ye", + ["ье"] = "ye", + ["ье"] = "ye", + ["ьи"] = "yi", +} + +translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first + +--------------------------------------------------------- +-- Uppercase English simple transcription---first pass -- +--------------------------------------------------------- + +translit.ru_trsc_en_upp_first = { + [" Е"] = " Ye", + ["Ъe"] = "Ye", + ["Ье"] = "Ye", +} + +translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first + +-------------------------------------------- +-- Lowercase English simple transcription -- +-------------------------------------------- + +translit.ru_trsc_en_low = { + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "e", + ["ж"] = "zh", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "kh", + ["ц"] = "ts", + ["ч"] = "ch", + ["ш"] = "sh", + ["щ"] = "shsh", + ["ъ"] = "", + ["ы"] = "y", + ["ь"] = "", + ["э"] = "e", + ["ю"] = "yu", + ["я"] = "ya" +} + +translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low + +-------------------------------------------- +-- Uppercase English simple transcription -- +-------------------------------------------- + +translit.ru_trsc_en_upp = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "E", + ["Ж"] = "Zh", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "Y", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Kh", + ["Ц"] = "Ts", + ["Ч"] = "Ch", + ["Ш"] = "Sh", + ["Щ"] = "Shsh", + ["Ъ"] = "", + ["Ы"] = "Y", + ["Ь"] = "", + ["Э"] = "E", + ["Ю"] = "Yu", + ["Я"] = "Ya" +} + +translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp + + +function translit.gen_rules_en () + -- The english е-rule, Vе -> Vye + translit.ru_trsc_en_jerule = {} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "е" + local new_post = vow .. "ye" + translit.ru_trsc_en_jerule[new_ante] = new_post + end + + translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule +end + + +----------------------------------- +-- Lowercase Czech transcription -- +----------------------------------- + +translit.ru_trsc_cz_low = { + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "ë", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "ch", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ъ", + ["ы"] = "y", + ["ь"] = "ь", + ["э"] = "è", + ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but + ["я"] = "ja" -- that would complicate things a bit and linguists might not +} -- agree. + +translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low + +----------------------------------- +-- Uppercase Czech transcription -- +----------------------------------- + +translit.ru_trsc_cz_upp = { + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Ë", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Ch", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "Ъ", + ["Ы"] = "Y", + ["Ь"] = "Ь", + ["Э"] = "È", + ["Ю"] = "Ju", + ["Я"] = "Ja" +} + +translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp + +---------------------------------------------- +-- Lowercase Additional Czech Transcription -- +---------------------------------------------- + +translit.ru_trsc_cz_add_low = { + ["ѕ"] = "dz", + ["з"] = "z", + ["ꙁ"] = "z", + ["і"] = "ï", + ["ѹ"] = "u", + ["ѡ"] = "ō", + ["ѣ"] = "ě", + ["ѥ"] = "je", + ["ѧ"] = "ę", + ["ѩ"] = "ję", + ["ѫ"] = "ǫ", + ["ѭ"] = "jǫ", + ["ѯ"] = "ks", + ["ѱ"] = "ps", + ["ѳ"] = "th", + ["ѵ"] = "ÿ", +} + +translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low + + +---------------------------------------------- +-- Uppercase Additional Czech Transcription -- +---------------------------------------------- + +translit.ru_trsc_cz_add_upp = { + ["Ѕ"] = "Dz", + ["З"] = "Z", + ["Ꙁ"] = "Z", + ["І"] = "Ï", + ["Ѹ"] = "U", + ["Ѡ"] = "Ō", + ["Ѣ"] = "Ě", + ["Ѥ"] = "Je", + ["Ѧ"] = "Ę", + ["Ѩ"] = "Ję", + ["Ѫ"] = "Ǫ", + ["Ѭ"] = "Jǫ", + ["Ѯ"] = "Ks", + ["Ѱ"] = "Ps", + ["Ѳ"] = "Th", + ["Ѵ"] = "Ÿ", +} + +translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + +function translit.transcript (mode, text) + local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs + local loc = lpeg.locale () + + -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html + local utfchar = R("\000\127") + + R("\194\223") * R("\128\191") + + R("\224\240") * R("\128\191") * R("\128\191") + + R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + + local trsc_parser, p_rules, capt + + local p_de + + function tab_subst (s, ...) + local p_tmp, tmp = nil, {} + for _,tab in ipairs(arg) do + translit.add_table( tmp, tab) + end + p_tmp = addrules(tmp, p_tmp) + local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0) + return fp:match(s) + end + + -- Add keys of a dictionary to a ruleset. + function addrules (dict, rules) + for i, _ in pairs(dict) do + if rules == nil then rules = P(i) + else rules = rules + P(i) + end + end + return rules + end + + -- The following is needed becaus lpeg.S doesn't work with utf. + local vow, con, iy + for _,v in ipairs (translit.ru_vowels) do + if vow == nil then vow = P(v) + else vow = vow + P(v) + end + end + + for _,c in ipairs (translit.ru_consonants) do + if con == nil then con = P(c) + else con = con + P(c) + end + end + + for _,i in ipairs (translit.ru_trsc_iy) do + if iy == nil then iy = P(i) + else iy = iy + P(i) + end + end + + if mode == "ru_transcript_de_exp" then + + local de_low_upp = {} + de_low_upp = translit.add_table( de_low_upp, translit.ru_trsc_upp, translit.ru_trsc_low ) + + local twochar + local tworepl = {} + + twochar = addrules( translit.ru_trsc_low_first, twochar ) + twochar = addrules( translit.ru_trsc_upp_first, twochar ) + + tworepl = translit.add_table( tworepl, translit.ru_trsc_low_first, translit.ru_trsc_upp_first ) + + -- The й-rule, VйC -> ViC + local function V_i_C (s) + local ante = utf.sub(s, 1, 1) + local post = utf.sub(s, 3, 3) + return de_low_upp[ante] .. "i" .. de_low_upp[post] + end + + -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC + local function iy_j_C (s) + local ante = utf.sub(s, 1, 1) + local post = utf.sub(s, 3, 3) + return de_low_upp[ante] .. "j" .. de_low_upp[post] + end + + local function j_V (s) + local post = utf.sub(s, 2, 2) + return "j" .. de_low_upp[post] + end + + -- The с-rule, VсV -> VssV + local function V_ss_V (s) + local ante = utf.sub(s, 1, 1) + local post = utf.sub(s, 3, 3) + return de_low_upp[ante] .. "ss" .. de_low_upp[post] + end + + -- The sharp-s-rule, Vсх -> Vßх + local function V_sz_ch (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "ßch" + end + + -- The е-rule, Vе -> Vje + local function V_je (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "je" + end + + -- Reapplying V_je on its result + next char would make the following + -- two rules obsolete. + local function V_jeje (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "jeje" + end + + local function V___je (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "jeje" + end + + -- The ё-rule, Vё -> Vjo + -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . + -- Somebody should teach those DUDEN guys parsimony. + local function V_jo (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "jo" + end + + local iyrule = Cs((iy * "й" * con) / iy_j_C) + local jrule = Cs(("й" * vow) / j_V) + local irule = Cs((vow * "й" * con) / V_i_C) + + local ssrule = Cs((vow * "с" * vow) / V_ss_V) + local szrule = Cs((vow * "сх") / V_sz_ch) + + --local _jrule = Cs((vow * "ее") / V___je) + local jjrule = Cs((vow * "ее") / V_jeje) + local jerule = Cs((vow * "е") / V_je) + local jorule = Cs((vow * "ё") / V_jo) + + local dvoje = Cs(twochar / tworepl) + local other = Cs((utfchar) / de_low_upp) + + local izhe = iyrule + jrule + irule + local slovo = ssrule + szrule + local jest = jjrule + jerule + jorule + + local g = Cs((izhe + slovo + jest + dvoje + other + utfchar)^0) + + text = g:match(text) + return text + + elseif mode == "ru_transcript_de" then + + translit.gen_rules_de() + + -- This is possibly slower than using string:gsub. + + text = tab_subst(text, translit.ru_trsc_jrule) + text = tab_subst(text, translit.ru_trsc_irule) + text = tab_subst(text, translit.ru_trsc_jerule) + text = tab_subst(text, translit.ru_trsc_srule) + text = tab_subst(text, translit.ru_trsc_sharpsrule) + text = tab_subst(text, translit.ru_trsc_jorule) + text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first) + text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low) + + return text + + elseif mode == "ru_transcript_en_exp" then + + local en_low_upp = {} + en_low_upp = translit.add_table(en_low_upp, translit.ru_trsc_en_low, translit.ru_trsc_en_upp) + + local twochar + local tworepl = {} + + twochar = addrules( translit.ru_trsc_en_low_first, twochar) + twochar = addrules( translit.ru_trsc_en_upp_first, twochar) + + tworepl = translit.add_table(tworepl, translit.ru_trsc_en_low_first, translit.ru_trsc_en_upp_first) + + -- The е-rule, Vе -> Vye + local function V_je (s) + local ante = utf.sub(s, 1, 1) + return en_low_upp[ante] .. "ye" + end + + local jerule = Cs((vow * "е") / V_je) + + local dvoje = Cs(twochar / tworepl) + local other = Cs((utfchar) / en_low_upp) + + local g = Cs((dvoje + jerule + other + utfchar)^0) + + text = g:match(text) + + return text + + elseif mode == "ru_transcript_en" then + + translit.gen_rules_en() + + text = tab_subst(text, translit.ru_trsc_en_jerule) + text = tab_subst(text, translit.ru_trsc_en_low_first, translit.ru_trsc_en_upp_first) + text = tab_subst(text, translit.ru_trsc_en_low, translit.ru_trsc_en_upp) + + return text + + elseif mode == "ru_cz" or mode == "ocs_cz" then + text = tab_subst(text, translit.ru_trsc_cz_low, translit.ru_trsc_cz_upp) + if mode == "ocs_cz" then + text = tab_subst(text, translit.ru_trsc_cz_add_low, translit.ru_trsc_cz_add_upp) + end + + return text + end + +end + |