%D \enableregime[utf] %D \module %D [ file=t-transliterator, %D version=2010.03.07, %D title=\CONTEXT\ User Module, %D subtitle=The Transliterator, %D author=Philipp Gesang, %D date=\currentdate, %D copyright=Philipp Gesang, %D license=2-clause BSD, %D email={pgesang at ix dot urz dot uni-heidelberg dot de}] %D This module is licensed under the conditions of the BSD license with %D two clauses: http://www.freebsd.org/copyright/freebsd-license.html. %D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/. \writestatus{loading}{Transliteration from non-Latin scripts} \startmodule[transliterator] \unprotect %D Use the Transliterator by adding \type{\usemodule[transliterator]} somewhere %D before \type{\starttext}. Adjust the Transliterator through the %D \type{\setuptransliterate} command. As a first argument it accepts a set of %D key-value options; at present you may configure \type{mode} and %D \type{hyphenate}. % emendation by Wolfgang Schuster \def\setuptransliterate{\dodoubleargument\getparameters[TRL]} %D At first we'll set some defaults: \setuptransliterate[mode=ru_old,hyphenate=cz,debug=false] %D Possible values for \type{mode} are by the time of this writing: %D \type{ru}, \type{ru_transcript_de}, \type{ru_transcript_en}, \type{ru_old}, %D \type{all}, \type{iso9_ocs}, \type{ocs}, \type{ocs_gla}, \type{ru_cz}, %D \type{ocs_cz}, \type{gr} and \type{gr_n}. %D Possible values for \type{hyphenate} are all valid \CONTEXT\ language code, for an %D overview see \type{http://wiki.contextgarden.net/Language_Codes}. %D In praxi you may want to choose either Czech (the default) or Slovak %D (\type{sk}) for most transliterations from cyrillic scripts. I've not yet %D made up my mind concerning Greek transliteration, any suggestions are %D welcome. %D For clarity's sake we'll stuff everything Lua into one table. \startluacode translit = translit or {} translit.debug_count = 0 \stopluacode %D We want to keep track of all the tables we'll create so we put them into %D a separate dictionary accompanied by a description string. \startluacode translit.tables = {} \stopluacode %D Next we define respective lists of vowels and consonants as used in the %D russian alphabet. They are needed later when substitution tables for some %D idiosyncratic transcriptions are generated. \startluacode -- If you haven't heard of cyrillic scripts until now you might want to read -- at least the first 15 pages of -- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf -- before you continue reading this file. translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н", "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} \stopluacode %D Substitution tables are the very heart of the Transliterator. Due to the %D nature of languages and scripts exhaustive substitution is the simplest %D method for transliterations and transcriptions unless they are one-to-one %D mappings like those defined in ISO~9. %D %D To achieve better reusability we split the tables into segments, the most %D obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase %D and uppercase characters. Another set is constituted by e.~g. the %D \type{ru_old*} tables that allow adding transcription of historical %D characters if needed; by the way those are included in the default %D transliteration mode \type{ru_old}. %-===========================================================================-- %- ISO 9.1995(E) standardized transliteration for cyrillic -- %-===========================================================================-- \startluacode ----------------------------------------- -- Lowercase russian cyrillic alphabet -- ----------------------------------------- translit.ru_low = { ["а"] = "a", -- U+0430 -> U+0061 ["б"] = "b", -- U+0431 -> U+0062 ["в"] = "v", -- U+0432 -> U+0076 ["г"] = "g", -- U+0433 -> U+0067 ["д"] = "d", -- U+0434 -> U+0064 ["е"] = "e", -- U+0435 -> U+0065 ["ё"] = "ë", -- U+0451 -> U+00eb ["ж"] = "ž", -- U+0436 -> U+017e ["з"] = "z", -- U+0437 -> U+007a ["и"] = "i", -- U+0438 -> U+0069 ["й"] = "j", -- U+0439 -> U+006a ["к"] = "k", -- U+043a -> U+006b ["л"] = "l", -- U+043b -> U+006c ["м"] = "m", -- U+043c -> U+006d ["н"] = "n", -- U+043d -> U+006e ["о"] = "o", -- U+043e -> U+006f ["п"] = "p", -- U+043f -> U+0070 ["р"] = "r", -- U+0440 -> U+0072 ["с"] = "s", -- U+0441 -> U+0073 ["т"] = "t", -- U+0442 -> U+0074 ["у"] = "u", -- U+0443 -> U+0075 ["ф"] = "f", -- U+0444 -> U+0066 ["х"] = "h", -- U+0445 -> U+0068 ["ц"] = "c", -- U+0446 -> U+0063 ["ч"] = "č", -- U+0447 -> U+010d ["ш"] = "š", -- U+0448 -> U+0161 ["щ"] = "ŝ", -- U+0449 -> U+015d ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is ["ы"] = "y", -- U+044b -> U+0079 used for uppercase, too. ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. ["э"] = "è", -- U+044d -> U+00e8 ["ю"] = "û", -- U+044e -> U+00fb ["я"] = "â" -- U+044f -> U+00e2 } translit.tables["russian lowercase ISO~9"] = translit.ru_low ----------------------------------------- -- Uppercase russian cyrillic alphabet -- ----------------------------------------- translit.ru_upp = { ["А"] = "A", -- U+0410 -> U+0041 ["Б"] = "B", -- U+0411 -> U+0042 ["В"] = "V", -- U+0412 -> U+0056 ["Г"] = "G", -- U+0413 -> U+0047 ["Д"] = "D", -- U+0414 -> U+0044 ["Е"] = "E", -- U+0415 -> U+0045 ["Ё"] = "Ë", -- U+0401 -> U+00cb ["Ж"] = "Ž", -- U+0416 -> U+017d ["З"] = "Z", -- U+0417 -> U+005a ["И"] = "I", -- U+0418 -> U+0049 ["Й"] = "J", -- U+0419 -> U+004a ["К"] = "K", -- U+041a -> U+004b ["Л"] = "L", -- U+041b -> U+004c ["М"] = "M", -- U+041c -> U+004d ["Н"] = "N", -- U+041d -> U+004e ["О"] = "O", -- U+041e -> U+004f ["П"] = "P", -- U+041f -> U+0050 ["Р"] = "R", -- U+0420 -> U+0052 ["С"] = "S", -- U+0421 -> U+0053 ["Т"] = "T", -- U+0422 -> U+0054 ["У"] = "U", -- U+0423 -> U+0055 ["Ф"] = "F", -- U+0424 -> U+0046 ["Х"] = "H", -- U+0425 -> U+0048 ["Ц"] = "C", -- U+0426 -> U+0043 ["Ч"] = "Č", -- U+0427 -> U+010c ["Ш"] = "Š", -- U+0428 -> U+0160 ["Щ"] = "Ŝ", -- U+0429 -> U+015c ["Ъ"] = "ʺ", -- U+042a -> U+02ba ["Ы"] = "Y", -- U+042b -> U+0059 ["Ь"] = "ʹ", -- U+042c -> U+02b9 ["Э"] = "È", -- U+042d -> U+00c8 ["Ю"] = "Û", -- U+042e -> U+00db ["Я"] = "Â" -- U+042f -> U+00c2 } translit.tables["russian uppercase ISO~9"] = translit.ru_upp ---------------------------------------------------------- -- Lowercase pre-1918 russian cyrillic additional chars -- ---------------------------------------------------------- -- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 translit.ru_old_low = { ["ѣ"] = "ě", -- U+048d -> U+011b ["і"] = "ì", -- U+0456 -> U+00ec ["ѳ"] = "f", -- U+0473 -> U+0066 ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 } translit.tables["russian pre-1918 lowercase ISO~9"] = translit.ru_low translit.ru_old_upp = { ["Ѣ"] = "Ě", -- U+048c -> U+011a ["І"] = "Ì", -- U+0406 -> U+00cc ["Ѳ"] = "F", -- U+0424 -> U+0046 ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 } translit.tables["russian pre-1918 uppercase ISO~9"] = translit.ru_upp --------------------------------------------------------- -- Lowercase characters from other cyrillic alphabets -- --------------------------------------------------------- translit.non_ru_low = { ["ӑ"] = "ă", -- U+04d1 -> U+0103 ["ӓ"] = "ä", -- U+04d3 -> U+00e4 ["ә"] = "a̋", -- u+04d9 -> U+0061+030b ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 ["ҕ"] = "ğ", -- U+0495 -> U+011f ["ғ"] = "ġ", -- U+0493 -> U+0121 ["ђ"] = "đ", -- U+0452 -> U+0111 ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 ["є"] = "ê", -- U+0454 -> U+00ea ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 ["ѕ"] = "ẑ", -- U+0455 -> U+1e91 -- Mapped to dz in old cyrillic non-ISO. ["ӡ"] = "ź", -- U+04e1 -> U+017a ["ӥ"] = "î", -- U+04e5 -> U+00ee ["і"] = "ì", -- U+0456 -> U+00ec ["ї"] = "ï", -- U+0457 -> U+00ef ["ј"] = "ǰ", -- U+0458 -> U+01f0 ["қ"] = "ķ", -- U+049b -> U+0137 ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 ["љ"] = "l̂", -- U+0459 -> U+006c+0302 ["њ"] = "n̂", -- U+045a -> U+006e+0302 ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 ["ң"] = "ṇ", -- U+04a3 -> U+1e47 ["ӧ"] = "ö", -- U+04e7 -> U+00f6 ["ө"] = "ô", -- U+04e9 -> U+00f4 ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 ["ҫ"] = "ç", -- U+04ab -> U+00e7 ["ҭ"] = "ţ", -- U+04ad -> U+0163 ["ћ"] = "ć", -- U+045b -> U+0107 ["ќ"] = "ḱ", -- U+045c -> U+1e31 ["у́"] = "ú", -- U+0443+ -> U+00fA ["ў"] = "ŭ", -- U+045e -> U+016d ["ӱ"] = "ü", -- U+04f1 -> U+00fc ["ӳ"] = "ű", -- U+04f3 -> U+0171 ["ү"] = "ù", -- U+04af -> U+00f9 ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 ["һ"] = "ḥ", -- U+04bb -> U+1e25 ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 ["ҷ"] = "ç", -- U+04cc -> U+00e7 ["џ"] = "d̂", -- U+045f -> U+0064+0302 ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff ["ѣ"] = "ě", -- U+048d -> U+011b ["ѫ"] = "ǎ", -- U+046b -> U+01ce -- Mapped to ǫ in non-ISO old cyrillic. ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 ["ҩ"] = "ò", -- U+04a9 -> U+00f2 ["Ӏ"] = "‡" -- U+04cf -> U+2021 } translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low --------------------------------------------------------- -- Uppercase characters from other cyrillic alphabets -- --------------------------------------------------------- translit.non_ru_upp = { ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 ["Ҕ"] = "Ğ", -- U+0494 -> U+011e ["Ғ"] = "Ġ", -- U+0492 -> U+0120 ["Ђ"] = "Đ", -- U+0402 -> U+0110 ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 ["Є"] = "Ê", -- U+0404 -> U+00ca ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 ["Ӥ"] = "Î", -- U+04e4 -> U+00ce ["І"] = "Ì", -- U+0406 -> U+00cc ["Ї"] = "Ï", -- U+0407 -> U+00cf ["Ј"] = "J̌", -- U+0408 -> U+004a+030c ["Қ"] = "Ķ", -- U+049a -> U+0136 ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 ["Њ"] = "N̂", -- U+040a -> U+004e+0302 ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 ["Ө"] = "Ô", -- U+04e8 -> U+00d4 ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 ["Ћ"] = "Ć", -- U+040b -> U+0106 ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 ["У́"] = "Ú", -- U+0423 -> U+00da ["Ў"] = "Ŭ", -- U+040e -> U+016c ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 ["Ү"] = "Ù", -- U+04ae -> U+00d9 ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 ["Џ"] = "D̂", -- U+040f -> U+0044+0302 ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 ["Ѣ"] = "Ě", -- U+048c -> U+011a ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 ["’"] = "‵", -- U+2035 -> U+2019 ["Ӏ"] = "‡" -- U+04c0 -> U+2021 } translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp \stopluacode \startluacode --===========================================================================-- -- Legacy national transliterations -- --===========================================================================-- -- Note: -- Use these only as a last resort. ‘Vulgar’ transcription is ugly and -- chauvinistic. --------------------------------- -- German simple transcription -- --------------------------------- -- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl., -- Mannheim et. al. 1991. -------------------------------------------------------- -- Lowercase German simple transcription---first pass -- -------------------------------------------------------- translit.ru_trsc_low_first = { [" е"] = " je", ["ъе"] = "je", ["ье"] = "je", [" ё"] = " jo", ["ъё"] = "jo", ["ьё"] = "jo", ["жё"] = "scho", ["чё"] = "tscho", ["шё"] = "scho", ["щё"] = "schtscho", ["ье"] = "je", ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i", ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one. } translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first -------------------------------------------------------- -- Uppercase German simple transcription---first pass -- -------------------------------------------------------- translit.ru_trsc_upp_first = { [" Е"] = " Je", ["Ъe"] = "Je", -- Pedantic, isn't it? ["Ье"] = "Je", [" Ё"] = "Jo", ["Ъё"] = "Jo", ["Ьё"] = "Jo", ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", ["Щё"] = "Schtscho", ["Кс"] = "ks" } translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first ------------------------------------------- -- Lowercase German simple transcription -- ------------------------------------------- translit.ru_trsc_low = { ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "sch", ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z", ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", ["ъ"] = "", ["ы"] = "y", ["ь"] = "", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja" } translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low ------------------------------------------- -- Uppercase German simple transcription -- ------------------------------------------- translit.ru_trsc_upp = { ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Sch", ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z", ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch", ["Ъ"] = "", ["Ы"] = "Y", ["Ь"] = "", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja" } translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp \stopluacode %D The following are more interesting than the previous tables because they %D implement various rules. For instance the table %D \type{translit.ru_trsc_irule} holds a substitution dictionary for all %D possible combinations (including nonsense galore) of a vowel preceding an %D “й” (Russian short i) preceding a consonant; here we access the sets of %D Russian vowels as well consonants that were defined earlier. \startluacode -- The й-rule, VйC -> ViC translit.ru_trsc_irule = {} for i, vow in ipairs(translit.ru_vowels) do for j, cons in ipairs(translit.ru_consonants) do local new_ante = vow .. "й" .. cons local new_post = vow .. "i" .. cons translit.ru_trsc_irule[new_ante] = new_post end end translit.tables["German transcription i-rule"] = translit.ru_trsc_irule -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC translit.ru_trsc_jrule = {} for i, vow in ipairs(translit.ru_vowels) do local new_ante = "й" .. vow local new_post = "j" .. vow translit.ru_trsc_jrule[new_ante] = new_post end translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} for i, cons in ipairs(translit.ru_consonants) do for j, iy in ipairs(translit.ru_trsc_iy) do local new_ante = iy .. "й" .. cons local new_post = iy .. "j" .. cons translit.ru_trsc_jrule[new_ante] = new_post end end translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule -- The с-rule, VсV -> VssV translit.ru_trsc_srule = {} for i, vow_1 in ipairs(translit.ru_vowels) do for j, vow_2 in ipairs(translit.ru_vowels) do local new_ante = vow_1 .. "с" .. vow_2 local new_post = vow_1 .. "ss" .. vow_2 translit.ru_trsc_srule[new_ante] = new_post end end translit.tables["German transcription s-rule"] = translit.ru_trsc_srule -- The sharp-s-rule, Vсх -> Vßх translit.ru_trsc_sharpsrule = {} for i, vow in ipairs(translit.ru_vowels) do local new_ante = vow .. "сх" local new_post = vow .. "ßх" translit.ru_trsc_sharpsrule[new_ante] = new_post end translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule -- The е-rule, Vе -> Vje translit.ru_trsc_jerule = {} for i, vow in ipairs(translit.ru_vowels) do local new_ante = vow .. "е" local new_post = vow .. "je" translit.ru_trsc_jerule[new_ante] = new_post end translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule -- The ё-rule, Vё -> Vjo -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . -- Somebody should teach those DUDEN-guys parsimony. translit.ru_trsc_jorule = {} for i, vow in ipairs(translit.ru_vowels) do local new_ante = vow .. "ё" local new_post = vow .. "jo" translit.ru_trsc_jorule[new_ante] = new_post end translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule \stopluacode \startluacode --------------------------------------------------------- -- Lowercase English simple transcription---first pass -- --------------------------------------------------------- translit.ru_trsc_en_low_first = { [" е"] = " ye", ["ъе"] = "ye", ["ье"] = "ye", ["ье"] = "ye", ["ьи"] = "yi", } translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first --------------------------------------------------------- -- Uppercase English simple transcription---first pass -- --------------------------------------------------------- translit.ru_trsc_en_upp_first = { [" Е"] = " Ye", ["Ъe"] = "Ye", ["Ье"] = "Ye", } translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first -------------------------------------------- -- Lowercase English simple transcription -- -------------------------------------------- translit.ru_trsc_en_low = { ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "e", ["ж"] = "zh", ["з"] = "z", ["и"] = "i", ["й"] = "y", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "kh", ["ц"] = "ts", ["ч"] = "ch", ["ш"] = "sh", ["щ"] = "shsh", ["ъ"] = "", ["ы"] = "y", ["ь"] = "", ["э"] = "e", ["ю"] = "yu", ["я"] = "ya" } translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low -------------------------------------------- -- Uppercase English simple transcription -- -------------------------------------------- translit.ru_trsc_en_upp = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "E", ["Ж"] = "Zh", ["З"] = "Z", ["И"] = "I", ["Й"] = "Y", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "Kh", ["Ц"] = "Ts", ["Ч"] = "Ch", ["Ш"] = "Sh", ["Щ"] = "Shsh", ["Ъ"] = "", ["Ы"] = "Y", ["Ь"] = "", ["Э"] = "E", ["Ю"] = "Yu", ["Я"] = "Ya" } translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp -- The english е-rule, Vе -> Vye translit.ru_trsc_en_jerule = {} for i, vow in ipairs(translit.ru_vowels) do local new_ante = vow .. "е" local new_post = vow .. "ye" translit.ru_trsc_en_jerule[new_ante] = new_post end translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule \stopluacode \startluacode ----------------------------------- -- Lowercase Czech transcription -- ----------------------------------- translit.ru_trsc_cz_low = { ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "ë", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "šč", ["ъ"] = "ъ", ["ы"] = "y", ["ь"] = "ь", ["э"] = "è", ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but ["я"] = "ja" -- that would complicate things a bit and linguists might not } -- agree. translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low ----------------------------------- -- Uppercase Czech transcription -- ----------------------------------- translit.ru_trsc_cz_upp = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Ë", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Šč", ["Ъ"] = "Ъ", ["Ы"] = "Y", ["Ь"] = "Ь", ["Э"] = "È", ["Ю"] = "Ju", ["Я"] = "Ja" } translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp ---------------------------------------------- -- Lowercase Additional Czech Transcription -- ---------------------------------------------- translit.ru_trsc_cz_add_low = { ["ѕ"] = "dz", ["з"] = "z", ["ꙁ"] = "z", ["і"] = "ï", ["ѹ"] = "u", ["ѡ"] = "ō", ["ѣ"] = "ě", ["ѥ"] = "je", ["ѧ"] = "ę", ["ѩ"] = "ję", ["ѫ"] = "ǫ", ["ѭ"] = "jǫ", ["ѯ"] = "ks", ["ѱ"] = "ps", ["ѳ"] = "th", ["ѵ"] = "ÿ", } translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low ---------------------------------------------- -- Uppercase Additional Czech Transcription -- ---------------------------------------------- translit.ru_trsc_cz_add_upp = { ["Ѕ"] = "Dz", ["З"] = "Z", ["Ꙁ"] = "Z", ["І"] = "Ï", ["Ѹ"] = "U", ["Ѡ"] = "Ō", ["Ѣ"] = "Ě", ["Ѥ"] = "Je", ["Ѧ"] = "Ę", ["Ѩ"] = "Ję", ["Ѫ"] = "Ǫ", ["Ѭ"] = "Jǫ", ["Ѯ"] = "Ks", ["Ѱ"] = "Ps", ["Ѳ"] = "Th", ["Ѵ"] = "Ÿ", } translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp \stopluacode %-===========================================================================-- %- Other transliterations -- %-===========================================================================-- \startluacode -- The following are needed because ISO 9 does not cover old Slavonic -- characters that became obsolete before the advent of гражданский шрифт. -- Please note that these mappings are not bijective so don't expect the result -- to be easily revertible (by machines). -- Source p. 77 of -- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf ----------------------------------------------------------------------- -- Lowercase and uppercase letter Uk -- “scientific transliteration” -- ----------------------------------------------------------------------- translit.ocs_uk = { ["oу"] = "u", ["оу"] = "u", ["Оу"] = "U", } ----------------------------------------------------------------------------- -- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- ----------------------------------------------------------------------------- translit.ocs_low = { ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["є"] = "e", ["ж"] = "ž", ["ꙃ"] = "ʒ", -- U+0292, alternative: dz U+01f3 ["ѕ"] = "ʒ", ["ꙁ"] = "z", ["з"] = "z", ["и"] = "i", ["і"] = "i", ["ї"] = "i", ["ћ"] = "g’", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ѹ"] = "u", ["ꙋ"] = "u", ["ф"] = "f", ["х"] = "x", ["ѡ"] = "o", --"ō", ["ѿ"] = "ot", -- U+047f ["ѽ"] = "o!", -- U+047d ["ꙍ"] = "o!", -- U+064D ["ц"] = "c", ["ч"] = "č", ["ш"] = "š", ["щ"] = "št", ["ъ"] = "ъ", ["ы"] = "y", ["ꙑ"] = "y", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. ["ь"] = "ь", ["ѣ"] = "ě", ["ю"] = "ju", ["ꙗ"] = "ja", ["ѥ"] = "je", ["ѧ"] = "ę", ["ѩ"] = "ję", ["ѫ"] = "ǫ", ["ѭ"] = "jǫ", ["ѯ"] = "ks", ["ѱ"] = "ps", ["ѳ"] = "th", ["ѵ"] = "ü", } translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low ----------------------------------------------------------------------------- -- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- ----------------------------------------------------------------------------- translit.ocs_upp = { ["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Є"] = "E", ["Ж"] = "Ž", ["Ꙃ"] = "Ʒ", -- U+01b7, alternative: Dz U+01f2 ["Ѕ"] = "Ʒ", ["Ꙁ"] = "Z", ["З"] = "Z", ["И"] = "I", ["І"] = "I", ["Ї"] = "I", ["Ћ"] = "G’", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "u", ["Ѹ"] = "U", ["ꙋ"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ѡ"] = "Ō", ["Ѿ"] = "Ot", -- U+047c ["Ѽ"] = "O!", -- U+047e ["Ꙍ"] = "O!", -- U+064C ["Ц"] = "C", ["Ч"] = "Č", ["Ш"] = "Š", ["Щ"] = "Št", ["Ъ"] = "Ŭ", ["Ы"] = "Y", ["Ꙑ"] = "Y", -- U+a650 ["Ь"] = "Ĭ", ["Ѣ"] = "Ě", ["Ю"] = "Ju", ["Ꙗ"] = "Ja", ["Ѥ"] = "Je", ["Ѧ"] = "Ę", ["Ѩ"] = "Ję", ["Ѫ"] = "Ǫ", ["Ѭ"] = "Jǫ", ["Ѯ"] = "Ks", ["Ѱ"] = "Ps", ["Ѳ"] = "Th", ["Ѵ"] = "Ü", } translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp -- Note on the additional tables: these cover characters that are not defined -- in ISO 9 but have a “scientific” transliteration. You may use them as -- complementary mapping to ISO 9, trading off homogenity for completeness. ---------------------------------------------------------------------------------------- -- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- ---------------------------------------------------------------------------------------- translit.ocs_add_low = { ["ѕ"] = "dz", -- Mapped to ẑ in ISO 9 (Macedonian …) ["ѯ"] = "ks", ["ѱ"] = "ps", ["ѡ"] = "ô", ["ѿ"] = "ot", -- U+047f ["ѫ"] = "ǫ", -- Mapped to ǎ in ISO 9. ["ѧ"] = "ę", ["ѭ"] = "jǫ", ["ѩ"] = "ję", ["ѥ"] = "je", ["ѹ"] = "u", -- Digraph uk. ["ꙋ"] = "u", -- Monograph uk, U+a64b. (No glyph yet in the "fixed" font in February 2010 …) ["ꙑ"] = "y", -- U+a651 } translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low ---------------------------------------------------------------------------------------- -- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- ---------------------------------------------------------------------------------------- translit.ocs_add_upp = { ["Ѕ"] = "Dz", ["Ѯ"] = "Ks", ["Ѱ"] = "Ps", ["Ѡ"] = "Ô", ["Ѿ"] = "ot", ["Ѫ"] = "Ǫ", ["Ѧ"] = "Ę", ["Ѭ"] = "Jǫ", ["Ѩ"] = "Ję", ["Ѥ"] = "Je", ["Ѹ"] = "U", -- Digraph uk. ["Ꙋ"] = "U", -- Monograph Uk, U+a64a. ["Ꙑ"] = "Y", -- U+a650 } translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp \stopluacode %-===========================================================================-- %- Glagolica -- %-===========================================================================-- \startluacode ------------------------------------------- -- Lowercase Glagolitic Transliteration -- ------------------------------------------- translit.ocs_gla_low = { ["ⰰ"] = "a", -- GLAGOLITIC SMALL LETTER AZU ["ⰱ"] = "b", -- GLAGOLITIC SMALL LETTER BUKY ["ⰲ"] = "v", -- GLAGOLITIC SMALL LETTER VEDE ["ⰳ"] = "g", -- GLAGOLITIC SMALL LETTER GLAGOLI ["ⰴ"] = "d", -- GLAGOLITIC SMALL LETTER DOBRO ["ⰵ"] = "e", -- GLAGOLITIC SMALL LETTER YESTU ["ⰶ"] = "ž", -- GLAGOLITIC SMALL LETTER ZHIVETE ["ⰷ"] = "ʒ", -- GLAGOLITIC SMALL LETTER DZELO ["ⰸ"] = "z", -- GLAGOLITIC SMALL LETTER ZEMLJA ["ⰹ"] = "i", -- GLAGOLITIC SMALL LETTER IZHE ["ⰺ"] = "i", -- GLAGOLITIC SMALL LETTER INITIAL IZHE ["ⰻ"] = "i", -- GLAGOLITIC SMALL LETTER I ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI ["ⰽ"] = "k", -- GLAGOLITIC SMALL LETTER KAKO ["ⰾ"] = "l", -- GLAGOLITIC SMALL LETTER LJUDIJE ["ⰿ"] = "m", -- GLAGOLITIC SMALL LETTER MYSLITE ["ⱀ"] = "n", -- GLAGOLITIC SMALL LETTER NASHI ["ⱁ"] = "o", -- GLAGOLITIC SMALL LETTER ONU ["ⱂ"] = "p", -- GLAGOLITIC SMALL LETTER POKOJI ["ⱃ"] = "r", -- GLAGOLITIC SMALL LETTER RITSI ["ⱄ"] = "s", -- GLAGOLITIC SMALL LETTER SLOVO ["ⱅ"] = "t", -- GLAGOLITIC SMALL LETTER TVRIDO ["ⱆ"] = "u", -- GLAGOLITIC SMALL LETTER UKU ["ⱇ"] = "f", -- GLAGOLITIC SMALL LETTER FRITU ["ⱈ"] = "x", -- GLAGOLITIC SMALL LETTER HERU ["ⱉ"] = "o", -- GLAGOLITIC SMALL LETTER OTU ["ⱊ"] = "?", -- GLAGOLITIC SMALL LETTER PE ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA ["ⱌ"] = "c", -- GLAGOLITIC SMALL LETTER TSI ["ⱍ"] = "č", -- GLAGOLITIC SMALL LETTER CHRIVI ["ⱎ"] = "š", -- GLAGOLITIC SMALL LETTER SHA ["ⱏ"] = "ъ", -- GLAGOLITIC SMALL LETTER YERU ["ⱐ"] = "ь", -- GLAGOLITIC SMALL LETTER YERI ["ⱑ"] = "ě", -- GLAGOLITIC SMALL LETTER YATI ["ⱒ"] = "x", -- GLAGOLITIC SMALL LETTER SPIDERY HA ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU ["ⱔ"] = "ę", -- GLAGOLITIC SMALL LETTER SMALL YUS ["ⱕ"] = "y̨", -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU ["ⱘ"] = "ǫ", -- GLAGOLITIC SMALL LETTER BIG YUS ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA ["ⱛ"] = "ü", -- GLAGOLITIC SMALL LETTER IZHITSA ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A ["ⱞ"] = "m", -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE } translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low ------------------------------------------------ -- Uppercase (?!) Glagolitic Transliteration -- ------------------------------------------------ translit.ocs_gla_upp = { ["Ⰰ"] = "A", -- GLAGOLITIC CAPITAL LETTER AZU ["Ⰱ"] = "B", -- GLAGOLITIC CAPITAL LETTER BUKY ["Ⰲ"] = "V", -- GLAGOLITIC CAPITAL LETTER VEDE ["Ⰳ"] = "G", -- GLAGOLITIC CAPITAL LETTER GLAGOLI ["Ⰴ"] = "D", -- GLAGOLITIC CAPITAL LETTER DOBRO ["Ⰵ"] = "E", -- GLAGOLITIC CAPITAL LETTER YESTU ["Ⰶ"] = "Ž", -- GLAGOLITIC CAPITAL LETTER ZHIVETE ["Ⰷ"] = "Ʒ", -- GLAGOLITIC CAPITAL LETTER DZELO ["Ⰸ"] = "Z", -- GLAGOLITIC CAPITAL LETTER ZEMLJA ["Ⰹ"] = "I", -- GLAGOLITIC CAPITAL LETTER IZHE ["Ⰺ"] = "I", -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE ["Ⰻ"] = "I", -- GLAGOLITIC CAPITAL LETTER I ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI ["Ⰽ"] = "K", -- GLAGOLITIC CAPITAL LETTER KAKO ["Ⰾ"] = "L", -- GLAGOLITIC CAPITAL LETTER LJUDIJE ["Ⰿ"] = "M", -- GLAGOLITIC CAPITAL LETTER MYSLITE ["Ⱀ"] = "N", -- GLAGOLITIC CAPITAL LETTER NASHI ["Ⱁ"] = "O", -- GLAGOLITIC CAPITAL LETTER ONU ["Ⱂ"] = "P", -- GLAGOLITIC CAPITAL LETTER POKOJI ["Ⱃ"] = "R", -- GLAGOLITIC CAPITAL LETTER RITSI ["Ⱄ"] = "S", -- GLAGOLITIC CAPITAL LETTER SLOVO ["Ⱅ"] = "T", -- GLAGOLITIC CAPITAL LETTER TVRIDO ["Ⱆ"] = "U", -- GLAGOLITIC CAPITAL LETTER UKU ["Ⱇ"] = "F", -- GLAGOLITIC CAPITAL LETTER FRITU ["Ⱈ"] = "X", -- GLAGOLITIC CAPITAL LETTER HERU ["Ⱉ"] = "O", -- GLAGOLITIC CAPITAL LETTER OTU ["Ⱊ"] = "?", -- GLAGOLITIC CAPITAL LETTER PE ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA ["Ⱌ"] = "C", -- GLAGOLITIC CAPITAL LETTER TSI ["Ⱍ"] = "Č", -- GLAGOLITIC CAPITAL LETTER CHRIVI ["Ⱎ"] = "Š", -- GLAGOLITIC CAPITAL LETTER SHA ["Ⱏ"] = "Ъ", -- GLAGOLITIC CAPITAL LETTER YERU ["Ⱐ"] = "Ь", -- GLAGOLITIC CAPITAL LETTER YERI ["Ⱑ"] = "Ě", -- GLAGOLITIC CAPITAL LETTER YATI ["Ⱒ"] = "X", -- GLAGOLITIC CAPITAL LETTER SPIDERY HA ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU ["Ⱔ"] = "Ę", -- GLAGOLITIC CAPITAL LETTER SMALL YUS ["Ⱕ"] = "Y̨", -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS ["Ⱘ"] = "Ǫ", -- GLAGOLITIC CAPITAL LETTER BIG YUS ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA ["Ⱛ"] = "Ü", -- GLAGOLITIC CAPITAL LETTER IZHITSA ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A ["Ⱞ"] = "M", -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT } translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp \stopluacode %-===========================================================================-- %- Greek -- %-===========================================================================-- \startluacode -- Note that the Greek transliteration mapping isn't bijective so transliterated -- texts won't be reversible. (Shouldn't be impossible to make one up using -- diacritics on latin characters to represent all possible combinations of -- Greek breathings + accents.) -- Good reading on composed / precombined unicode: -- http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed ------------------------------------------------- -- Lowercase Greek Initial Position Diphthongs -- ------------------------------------------------- translit.gr_di_in_low = { [" αὑ"] = " hau", [" αὕ"] = " hau", [" αὓ"] = " hau", [" αὗ"] = " hau", [" εὑ"] = " heu", [" εὕ"] = " heu", [" εὓ"] = " heu", [" εὗ"] = " heu", [" ηὑ"] = " hēu", [" ηὕ"] = " hēu", [" ηὓ"] = " hēu", [" ηὗ"] = " hēu", [" οὑ"] = " hu", [" οὕ"] = " hu", [" οὓ"] = " hu", [" οὗ"] = " hu", [" ωὑ"] = " hōu", [" ωὕ"] = " hōu", [" ωὓ"] = " hōu", [" ωὗ"] = " hōu" } translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low ------------------------------------------------- -- Uppercase Greek Initial Position Diphthongs -- ------------------------------------------------- translit.gr_di_in_upp = { [" Αὑ"] = " Hau", [" Αὕ"] = " Hau", [" Αὓ"] = " Hau", [" Αὗ"] = " Hau", [" Εὑ"] = " Heu", [" Εὕ"] = " Heu", [" Εὓ"] = " Heu", [" Εὗ"] = " Heu", [" Ηὑ"] = " Hēu", [" Ηὕ"] = " Hēu", [" Ηὓ"] = " Hēu", [" Ηὗ"] = " Hēu", [" Οὑ"] = " Hu", [" Οὕ"] = " Hu", [" Οὓ"] = " Hu", [" Οὗ"] = " Hu", [" Ωὑ"] = " Hōu", [" Ωὕ"] = " Hōu", [" Ωὓ"] = " Hōu", [" Ωὗ"] = " Hōu" } translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp --------------------------------------- -- Lowercase Greek Initial Position -- --------------------------------------- translit.gr_in_low = { [" ἁ"] = " ha", [" ἅ"] = " ha", [" ἃ"] = " ha", [" ἇ"] = " ha", [" ᾁ"] = " ha", [" ᾅ"] = " ha", [" ᾃ"] = " ha", [" ᾇ"] = " ha", [" ἑ"] = " he", [" ἕ"] = " he", [" ἓ"] = " he", [" ἡ"] = " hē", [" ἥ"] = " hē", [" ἣ"] = " hē", [" ἧ"] = " hē", [" ᾑ"] = " hē", [" ᾕ"] = " hē", [" ᾓ"] = " hē", [" ᾗ"] = " hē", [" ἱ"] = " hi", [" ἵ"] = " hi", [" ἳ"] = " hi", [" ἷ"] = " hi", [" ὁ"] = " ho", [" ὅ"] = " ho", [" ὃ"] = " ho", [" ὑ"] = " hy", [" ὕ"] = " hy", [" ὓ"] = " hy", [" ὗ"] = " hy", [" ὡ"] = " hō", [" ὥ"] = " hō", [" ὣ"] = " hō", [" ὧ"] = " hō", [" ᾡ"] = " hō", [" ᾥ"] = " hō", [" ᾣ"] = " hō", [" ᾧ"] = " hō", } translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low --------------------------------------- -- Uppercase Greek Initial Position -- --------------------------------------- translit.gr_in_upp = { [" Ἁ"] = " Ha", [" Ἅ"] = " Ha", [" Ἃ"] = " Ha", [" Ἇ"] = " Ha", [" ᾉ"] = " Ha", [" ᾍ"] = " Ha", [" ᾋ"] = " Ha", [" ᾏ"] = " Ha", [" Ἑ"] = " He", [" Ἕ"] = " He", [" Ἓ"] = " He", [" Ἡ"] = " Hē", [" Ἥ"] = " Hē", [" Ἣ"] = " Hē", [" Ἧ"] = " Hē", [" ᾙ"] = " Hē", [" ᾝ"] = " Hē", [" ᾛ"] = " Hē", [" ᾟ"] = " Hē", [" Ἱ"] = " Hi", [" Ἵ"] = " Hi", [" Ἳ"] = " Hi", [" Ἷ"] = " Hi", [" Ὁ"] = " Ho", [" Ὅ"] = " Ho", [" Ὃ"] = " Ho", [" Ὑ"] = " Hy", [" Ὕ"] = " Hy", [" Ὓ"] = " Hy", [" Ὗ"] = " Hy", [" Ὡ"] = " Hō", [" Ὥ"] = " Hō", [" Ὣ"] = " Hō", [" Ὧ"] = " Hō", [" ᾩ"] = " Hō", [" ᾭ"] = " Hō", [" ᾫ"] = " Hō", [" ᾯ"] = " Hō", } translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp --------------------------------- -- Lowercase Greek Diphthongs -- --------------------------------- translit.gr_di_low = { ["αυ"] = "au", ["αύ"] = "au", ["αὺ"] = "au", ["αῦ"] = "au", ["αὐ"] = "au", ["αὔ"] = "au", ["αὒ"] = "au", ["αὖ"] = "au", ["αὑ"] = "au", ["αὕ"] = "au", ["αὓ"] = "au", ["αὗ"] = "au", ["ευ"] = "eu", ["εύ"] = "eu", ["εὺ"] = "eu", ["εῦ"] = "eu", ["εὐ"] = "eu", ["εὔ"] = "eu", ["εὒ"] = "eu", ["εὖ"] = "eu", ["εὑ"] = "eu", ["εὕ"] = "eu", ["εὓ"] = "eu", ["εὗ"] = "eu", ["ηυ"] = "ēu", ["ηύ"] = "ēu", ["ηὺ"] = "ēu", ["ηῦ"] = "ēu", ["ηὐ"] = "ēu", ["ηὔ"] = "ēu", ["ηὒ"] = "ēu", ["ηὖ"] = "ēu", ["ηὑ"] = "ēu", ["ηὕ"] = "ēu", ["ηὓ"] = "ēu", ["ηὗ"] = "ēu", ["ου"] = "u", ["ου"] = "u", ["ου"] = "u", ["ού"] = "u", ["οὺ"] = "u", ["οῦ"] = "u", ["οὐ"] = "u", ["οὔ"] = "u", ["οὒ"] = "u", ["οὖ"] = "u", ["οὑ"] = "u", ["οὕ"] = "u", ["οὓ"] = "u", ["οὗ"] = "u", ["ωυ"] = "ōu", ["ωύ"] = "ōu", ["ωὺ"] = "ōu", ["ωῦ"] = "ōu", ["ωὐ"] = "ōu", ["ωὔ"] = "ōu", ["ωὒ"] = "ōu", ["ωὖ"] = "ōu", ["ωὑ"] = "ōu", ["ωὕ"] = "ōu", ["ωὓ"] = "ōu", ["ωὗ"] = "ōu", ["ῤῥ"] = "rrh", } translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low --------------------------------- -- Uppercase Greek Diphthongs -- --------------------------------- translit.gr_di_upp = { ["Αυ"] = "Au", ["Αύ"] = "Au", ["Αὺ"] = "Au", ["Αῦ"] = "Au", ["Αὐ"] = "Au", ["Αὔ"] = "Au", ["Αὒ"] = "Au", ["Αὖ"] = "Au", ["Αὑ"] = "Au", ["Αὕ"] = "Au", ["Αὓ"] = "Au", ["Αὗ"] = "Au", ["Ευ"] = "Eu", ["Εύ"] = "Eu", ["Εὺ"] = "Eu", ["Εῦ"] = "Eu", ["Εὐ"] = "Eu", ["Εὔ"] = "Eu", ["Εὒ"] = "Eu", ["Εὖ"] = "Eu", ["Εὑ"] = "Eu", ["Εὕ"] = "Eu", ["Εὓ"] = "Eu", ["Εὗ"] = "Eu", ["Ηυ"] = "Ēu", ["Ηύ"] = "Ēu", ["Ηὺ"] = "Ēu", ["Ηῦ"] = "Ēu", ["Ηὐ"] = "Ēu", ["Ηὔ"] = "Ēu", ["Ηὒ"] = "Ēu", ["Ηὖ"] = "Ēu", ["Ηὑ"] = "Ēu", ["Ηὕ"] = "Ēu", ["Ηὓ"] = "Ēu", ["Ηὗ"] = "Ēu", ["Ου"] = "U", ["Ου"] = "U", ["Ου"] = "U", ["Ού"] = "U", ["Οὺ"] = "U", ["Οῦ"] = "U", ["Οὐ"] = "U", ["Οὔ"] = "U", ["Οὒ"] = "U", ["Οὖ"] = "U", ["Οὑ"] = "U", ["Οὕ"] = "U", ["Οὓ"] = "U", ["Οὗ"] = "U", ["Ωυ"] = "Ōu", ["Ωύ"] = "Ōu", ["Ωὺ"] = "Ōu", ["Ωῦ"] = "Ōu", ["Ωὐ"] = "Ōu", ["Ωὔ"] = "Ōu", ["Ωὒ"] = "Ōu", ["Ωὖ"] = "Ōu", ["Ωὑ"] = "Ōu", ["Ωὕ"] = "Ōu", ["Ωὓ"] = "Ōu", ["Ωὗ"] = "Ōu", } translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp -- The following will be used in an option that ensures transcription of -- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) translit.gr_nrule = { ["γγ"] = "ng", ["γκ"] = "nk", ["γξ"] = "nx", ["γχ"] = "nch", } translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule \stopluacode \startluacode -------------------------------------- -- Lowercase Greek Transliteration -- -------------------------------------- translit.gr_low = { ["α"] = "a", ["ά"] = "a", ["ὰ"] = "a", ["ᾶ"] = "a", ["ᾳ"] = "a", ["ἀ"] = "a", ["ἁ"] = "a", ["ἄ"] = "a", ["ἂ"] = "a", ["ἆ"] = "a", ["ἁ"] = "a", ["ἅ"] = "a", ["ἃ"] = "a", ["ἇ"] = "a", ["ᾁ"] = "a", ["ᾴ"] = "a", ["ᾲ"] = "a", ["ᾷ"] = "a", ["ᾄ"] = "a", ["ᾂ"] = "a", ["ᾅ"] = "a", ["ᾃ"] = "a", ["ᾆ"] = "a", ["ᾇ"] = "a", ["β"] = "b", ["γ"] = "g", ["δ"] = "d", ["ε"] = "e", ["έ"] = "e", ["ὲ"] = "e", ["ἐ"] = "e", ["ἔ"] = "e", ["ἒ"] = "e", ["ἑ"] = "e", ["ἕ"] = "e", ["ἓ"] = "e", ["ζ"] = "z", ["η"] = "ē", ["η"] = "ē", ["ή"] = "ē", ["ὴ"] = "ē", ["ῆ"] = "ē", ["ῃ"] = "ē", ["ἠ"] = "ē", ["ἤ"] = "ē", ["ἢ"] = "ē", ["ἦ"] = "ē", ["ᾐ"] = "ē", ["ἡ"] = "ē", ["ἥ"] = "ē", ["ἣ"] = "ē", ["ἧ"] = "ē", ["ᾑ"] = "ē", ["ῄ"] = "ē", ["ῂ"] = "ē", ["ῇ"] = "ē", ["ᾔ"] = "ē", ["ᾒ"] = "ē", ["ᾕ"] = "ē", ["ᾓ"] = "ē", ["ᾖ"] = "ē", ["ᾗ"] = "ē", ["θ"] = "th", ["ι"] = "i", ["ί"] = "i", ["ὶ"] = "i", ["ῖ"] = "i", ["ἰ"] = "i", ["ἴ"] = "i", ["ἲ"] = "i", ["ἶ"] = "i", ["ἱ"] = "i", ["ἵ"] = "i", ["ἳ"] = "i", ["ἷ"] = "i", ["ϊ"] = "i", ["ΐ"] = "i", ["ῒ"] = "i", ["ῗ"] = "i", ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ξ"] = "x", ["ο"] = "o", ["ό"] = "o", ["ὸ"] = "o", ["ὀ"] = "o", ["ὄ"] = "o", ["ὂ"] = "o", ["ὁ"] = "o", ["ὅ"] = "o", ["ὃ"] = "o", ["π"] = "p", ["ρ"] = "r", ["ῤ"] = "r", ["ῥ"] = "rh", ["σ"] = "s", ["ς"] = "s", ["τ"] = "t", ["υ"] = "y", ["ύ"] = "y", ["ὺ"] = "y", ["ῦ"] = "y", ["ὐ"] = "y", ["ὔ"] = "y", ["ὒ"] = "y", ["ὖ"] = "y", ["ὑ"] = "y", ["ὕ"] = "y", ["ὓ"] = "y", ["ὗ"] = "y", ["ϋ"] = "y", ["ΰ"] = "y", ["ῢ"] = "y", ["ῧ"] = "y", ["φ"] = "ph", ["χ"] = "ch", ["ψ"] = "ps", ["ω"] = "ō", ["ώ"] = "ō", ["ὼ"] = "ō", ["ῶ"] = "ō", ["ῳ"] = "ō", ["ὠ"] = "ō", ["ὤ"] = "ō", ["ὢ"] = "ō", ["ὦ"] = "ō", ["ᾠ"] = "ō", ["ὡ"] = "ō", ["ὥ"] = "ō", ["ὣ"] = "ō", ["ὧ"] = "ō", ["ᾡ"] = "ō", ["ῴ"] = "ō", ["ῲ"] = "ō", ["ῷ"] = "ō", ["ᾤ"] = "ō", ["ᾢ"] = "ō", ["ᾥ"] = "ō", ["ᾣ"] = "ō", ["ᾦ"] = "ō", ["ᾧ"] = "ō", } translit.tables["Greek transliteration lowercase"] = translit.gr_low -------------------------------------- -- Uppercase Greek Transliteration -- -------------------------------------- translit.gr_upp = { ["Α"] = "A", ["Ά"] = "A", ["Ὰ"] = "A", --["ᾶ"] = "A", ["ᾼ"] = "A", ["Ἀ"] = "A", ["Ἁ"] = "A", ["Ἄ"] = "A", ["Ἂ"] = "A", ["Ἆ"] = "A", ["Ἁ"] = "A", ["Ἅ"] = "A", ["Ἃ"] = "A", ["Ἇ"] = "A", ["ᾉ"] = "A", --["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me --["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript --["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. ["ᾌ"] = "A", ["ᾊ"] = "A", ["ᾍ"] = "A", ["ᾋ"] = "A", ["ᾎ"] = "A", ["ᾏ"] = "A", ["Β"] = "B", ["Γ"] = "G", ["Δ"] = "D", ["Ε"] = "E", ["Έ"] = "E", ["Ὲ"] = "E", ["Ἐ"] = "E", ["Ἔ"] = "E", ["Ἒ"] = "E", ["Ἑ"] = "E", ["Ἕ"] = "E", ["Ἓ"] = "E", ["Ζ"] = "Z", ["Η"] = "Ē", ["Η"] = "Ē", ["Ή"] = "Ē", ["Ὴ"] = "Ē", --["ῆ"] = "Ē", ["ῌ"] = "Ē", ["Ἠ"] = "Ē", ["Ἤ"] = "Ē", ["Ἢ"] = "Ē", ["Ἦ"] = "Ē", ["ᾘ"] = "Ē", ["Ἡ"] = "Ē", ["Ἥ"] = "Ē", ["Ἣ"] = "Ē", ["Ἧ"] = "Ē", ["ᾙ"] = "Ē", --["ῄ"] = "Ē", --["ῂ"] = "Ē", --["ῇ"] = "Ē", ["ᾜ"] = "Ē", ["ᾚ"] = "Ē", ["ᾝ"] = "Ē", ["ᾛ"] = "Ē", ["ᾞ"] = "Ē", ["ᾟ"] = "Ē", ["Θ"] = "Th", ["Ι"] = "I", ["Ί"] = "I", ["Ὶ"] = "I", --["ῖ"] = "I", ["Ἰ"] = "I", ["Ἴ"] = "I", ["Ἲ"] = "I", ["Ἶ"] = "I", ["Ἱ"] = "I", ["Ἵ"] = "I", ["Ἳ"] = "I", ["Ἷ"] = "I", ["Ϊ"] = "I", --["ΐ"] = "I", --["ῒ"] = "I", --["ῗ"] = "I", ["Κ"] = "K", ["Λ"] = "L", ["Μ"] = "M", ["Ν"] = "N", ["Ξ"] = "X", ["Ο"] = "O", ["Ό"] = "O", ["Ὸ"] = "O", ["Ὀ"] = "O", ["Ὄ"] = "O", ["Ὂ"] = "O", ["Ὁ"] = "O", ["Ὅ"] = "O", ["Ὃ"] = "O", ["Π"] = "P", ["Ρ"] = "R", --["ῤ"] = "R", ["Ῥ"] = "Rh", ["Σ"] = "S", ["Σ"] = "S", ["Τ"] = "T", ["Υ"] = "Y", ["Ύ"] = "Y", ["Ὺ"] = "Y", --["ῦ"] = "Y", --["ὐ"] = "Y", --["ὔ"] = "Y", --["ὒ"] = "Y", --["ὖ"] = "Y", ["Ὑ"] = "Y", ["Ὕ"] = "Y", ["Ὓ"] = "Y", ["Ὗ"] = "Y", ["Ϋ"] = "Y", --["ΰ"] = "Y", --["ῢ"] = "Y", --["ῧ"] = "Y", ["Φ"] = "Ph", ["Χ"] = "Ch", ["Ψ"] = "Ps", ["Ω"] = "Ō", ["Ώ"] = "Ō", ["Ὼ"] = "Ō", --["ῶ"] = "Ō", ["ῼ"] = "Ō", ["Ὠ"] = "Ō", ["Ὤ"] = "Ō", ["Ὢ"] = "Ō", ["Ὦ"] = "Ō", ["ᾨ"] = "Ō", ["Ὡ"] = "Ō", ["Ὥ"] = "Ō", ["Ὣ"] = "Ō", ["Ὧ"] = "Ō", ["ᾩ"] = "Ō", --["ῴ"] = "Ō", --["ῲ"] = "Ō", --["ῷ"] = "Ō", ["ᾬ"] = "Ō", ["ᾪ"] = "Ō", ["ᾭ"] = "Ō", ["ᾫ"] = "Ō", ["ᾮ"] = "Ō", ["ᾯ"] = "Ō", } translit.tables["Greek transliteration uppercase"] = translit.gr_upp ------------ -- Varia -- ------------ translit.gr_other = { ["ϝ"] = "w", ["Ϝ"] = "W", ["ϙ"] = "q", ["Ϙ"] = "Q", ["ϡ"] = "ss", ["Ϡ"] = "Ss", } translit.tables["Greek transliteration archaic characters"] = translit.gr_other \stopluacode %-===========================================================================-- %- End Of Tables -- %-===========================================================================-- %D The function \type{translit.subst(s, t)} is used to replace any key of %D \type{t} that occurs in \type{s} with the according value of \type{t}. \startluacode function translit.subst (text, tab) for symbol, replacement in pairs(tab) do -- using ordinary gsub as suggested by Taco text = string.gsub(text, symbol, replacement) end return text end \stopluacode %D \type{translit.add_table(t, ta)} is used to build the final substitution tables %D from those we defined earlier; any keys in the previous table \type{t} are %D overwritten if they exist in the added table \type{ta}, too. \startluacode function translit.add_table (t, t_add) for key, value in pairs (t_add) do t[key] = value end return t end \stopluacode %D We might want to have all the table data nicely formatted by \CONTEXT\ %D itself, here's how we'll do it. \type{translit.show_tab(t)} handles a %D single table \type{t}, builds a Natural TABLE out of its content and %D hands it down to the machine for typesetting. For debugging purposes it %D does not only print the replacement pairs but shows their code points as %D well. \startluacode function translit.show_tab (tab) -- Output a transliteration table, nicely formatted with natural tables. local cnt = 0 context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"}) context.setupTABLE({"c"}, {"each"}, {frame="off"}) context.setupTABLE({"r"}, {"each"}, {frame="off"}) context.setupTABLE({"c"}, {"first"}, {style="italic"}) context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"}) context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"}) context.bTABLE({split="yes", option="stretch"}) context.bTABLEhead() context.bTR() context.bTH() context("number") context.eTH() context.bTH() context("letters") context.eTH() context.bTH() context("n") context.eTH() context.bTH() context("replacement") context.eTH() context.bTH() context("n") context.eTH() context.bTH() context("bytes") context.eTH() context.bTH() context("repl. bytes") context.eTH() context.eTR() context.eTABLEhead() context.bTABLEbody() for key, val in pairs(tab) do local strempty = function (s) -- Some characters might not be replaced but removed, others might be -- multi-char sequences. if #s == 0 then return "nil" else local i = 0 local r = "" -- The following loop could be replaced by checking the string length with utf.len(s) … repeat i = i + 1 if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " " end until (false) return r end end cnt = cnt + 1 context.bTR() context.bTC() context(cnt) context.eTC() context.bTC() context(key) context.eTC() context.bTC() context(utf.len(key)) context.eTC() context.bTC() context(val) context.eTC() context.bTC() context(utf.len(val)) context.eTC() context.bTC() context(strempty(key)) context.eTC() context.bTC() context(strempty(val)) context.eTC() context.eTR() end context.eTABLEbody() context.bTABLEfoot() context.bTR() context.bTC() context("number") context.eTC() context.bTC() context("letters") context.eTC() context.bTC() context("n") context.eTC() context.bTC() context("replacement") context.eTC() context.bTC() context("n") context.eTC() context.bTC() context("bytes") context.eTC() context.bTC() context("repl. bytes") context.eTC() context.eTR() context.eTABLEfoot() context.eTABLE() end \stopluacode %D Having to pick out single tables for printing can be tedious, therefore we %D let Lua do the job in our stead. \type{translit.show_all_tabs()} calls %D \type{translit.show_tab} on every table that is registered with %D \type{translit.table} -- and uses its registered key as table heading. \startluacode function translit.show_all_tabs () -- Output all translation tables that are registered within translit.tables. -- This will be quite unordered. context ("\\chapter{Transliterator Showing All Tables}") for key, val in pairs(translit.tables) do context ("\\section{" .. key .. "}") translit.show_tab (val) end end \stopluacode %D Now to the function \type{translit.transliterate(m, t)}: it constitutes the %D metafunction that is called by the \type{\transliterate} command and itself %D chooses the substitution tables according to the \type{m} parameter and %D applies them in a given order on the string \type{m}. %D (The correct order in multi-pass substitution has to be enforced because the %D tables are in fact unordered dictionaries.) \startluacode function translit.transliterate (method, text) local repl_tab = {} if method == "ru" then translit.add_table(repl_tab, translit.ru_upp) translit.add_table(repl_tab, translit.ru_low) elseif method == "ru_old" then translit.add_table(repl_tab, translit.ru_upp) translit.add_table(repl_tab, translit.ru_low) translit.add_table(repl_tab, translit.ru_old_upp) translit.add_table(repl_tab, translit.ru_old_low) elseif method == "all" then translit.add_table(repl_tab, translit.ru_upp) translit.add_table(repl_tab, translit.ru_low) translit.add_table(repl_tab, translit.ru_old_upp) translit.add_table(repl_tab, translit.ru_old_low) translit.add_table(repl_tab, translit.non_ru_upp) translit.add_table(repl_tab, translit.non_ru_low) elseif method == "ru_transcript_de" then text = translit.subst (text, translit.ru_trsc_jrule) text = translit.subst (text, translit.ru_trsc_irule) text = translit.subst (text, translit.ru_trsc_srule) text = translit.subst (text, translit.ru_trsc_sharpsrule) text = translit.subst (text, translit.ru_trsc_jerule) -- text = translit.subst (text, translit.ru_trsc_jorule) translit.add_table(repl_tab, translit.ru_trsc_upp_first) translit.add_table(repl_tab, translit.ru_trsc_low_first) text = translit.subst (text, repl_tab) repl_tab = {} translit.add_table(repl_tab, translit.ru_trsc_upp) translit.add_table(repl_tab, translit.ru_trsc_low) elseif method == "ru_transcript_en" then text = translit.subst (text, translit.ru_trsc_en_jerule) translit.add_table(repl_tab, translit.ru_trsc_en_upp_first) translit.add_table(repl_tab, translit.ru_trsc_en_low_first) translit.add_table(repl_tab, translit.ru_trsc_en_upp) translit.add_table(repl_tab, translit.ru_trsc_en_low) elseif method == "iso9_ocs" then translit.add_table(repl_tab, translit.ru_upp) translit.add_table(repl_tab, translit.ru_low) translit.add_table(repl_tab, translit.ru_old_upp) translit.add_table(repl_tab, translit.ru_old_low) translit.add_table(repl_tab, translit.non_ru_upp) translit.add_table(repl_tab, translit.non_ru_low) translit.add_table(repl_tab, translit.ocs_add_upp) translit.add_table(repl_tab, translit.ocs_add_low) elseif method == "ocs" then translit.add_table(repl_tab, translit.ocs_uk) text = translit.subst (text, repl_tab) repl_tab = {} translit.add_table(repl_tab, translit.ocs_low) translit.add_table(repl_tab, translit.ocs_upp) elseif method == "ocs_gla" then translit.add_table(repl_tab, translit.ocs_gla_low) translit.add_table(repl_tab, translit.ocs_gla_upp) elseif method == "ru_cz" then translit.add_table(repl_tab, translit.ru_trsc_cz_low) translit.add_table(repl_tab, translit.ru_trsc_cz_upp) elseif method == "ocs_cz" then translit.add_table(repl_tab, translit.ru_trsc_cz_low) translit.add_table(repl_tab, translit.ru_trsc_cz_upp) translit.add_table(repl_tab, translit.ru_trsc_cz_add_low) translit.add_table(repl_tab, translit.ru_trsc_cz_add_upp) elseif method == "gr" then translit.add_table(repl_tab, translit.gr_di_in_low) translit.add_table(repl_tab, translit.gr_di_in_upp) translit.add_table(repl_tab, translit.gr_in_low) translit.add_table(repl_tab, translit.gr_in_upp) text = translit.subst (text, repl_tab) repl_tab = {} translit.add_table(repl_tab, translit.gr_di_low) translit.add_table(repl_tab, translit.gr_di_upp) text = translit.subst (text, repl_tab) repl_tab = {} translit.add_table(repl_tab, translit.gr_low) translit.add_table(repl_tab, translit.gr_upp) translit.add_table(repl_tab, translit.gr_other) elseif method == "gr_n" then translit.add_table(repl_tab, translit.gr_di_in_low) translit.add_table(repl_tab, translit.gr_di_in_upp) translit.add_table(repl_tab, translit.gr_in_low) translit.add_table(repl_tab, translit.gr_in_upp) text = translit.subst (text, repl_tab) repl_tab = {} translit.add_table(repl_tab, translit.gr_di_low) translit.add_table(repl_tab, translit.gr_di_upp) translit.add_table(repl_tab, translit.gr_nrule) text = translit.subst (text, repl_tab) repl_tab = {} translit.add_table(repl_tab, translit.gr_low) translit.add_table(repl_tab, translit.gr_upp) translit.add_table(repl_tab, translit.gr_other) end text = translit.subst (text, repl_tab) tex.print (text) end \stopluacode %D The following will help debugging and reviewing tables. Make sure your %D typescript can handle the characters, in general it's no use with Latin %D Modern which unfortunately provides only a restricted set of the unicode %D range. %D %D The user-level command to output a single substitution table is %D \type{\showOneTranslitTab{#1}}. \define[1]\showOneTranslitTab{% \ctxlua{translit.show_tab(#1)}% } %D The user-level command to output all defined tables is %D \type{\showTranslitTabs{#1}}. \define\showTranslitTabs{% \ctxlua{translit.show_all_tabs()}% } \startluacode function translit.debug_next () translit.debug_count = translit.debug_count + 1 tex.print("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}") end \stopluacode \def\translitDebug#1{% \doif{\TRLdebug}{true}{% %\inmargin{\ctxlua{translit.debug_next()} #1}% Unreadable with too many debug messages. {\ss\inmargin{\ctxlua{translit.debug_next()}} #1}% }% } %D The user-level command \type{\transliterate[#1]{#2}} does the job of %D switching to a given language (for hyphenation) and adjusting the %D substitution method locally. It takes an optional list \type{[#1]} of %D key-value arguments to allow ad-hoc specification of either two that deviate %D from the defaults set initially by means of \type{\setuptransliterate}. %D %D Internally, \type{\dotransliterate} is called according to the \CONTEXT\ %D coding style and in case the user provides \type{hyphenate=} or %D \type{mode=} those will be used instead of the globals. Note that this %D leaves the latter unchanged. Thus, in order to permanently switch to %D another transliteration style the user would have to set it by calling %D \type{\setuptransliterate} again. % thanks again, Wolfgang! \def\dotransliterate[#1]#2{% \bgroup% \iffirstargument \getparameters[TRL][#1]% \fi \language[\TRLhyphenate]% \ctxlua{translit.transliterate("\TRLmode","\luaescapestring{#2}")}% \egroup% } %\definestartstop[transliterate][% %before={\startbuffer}, %after={\stopbuffer\transliterate{\getbuffer}} %] \def\transliterate{\dosingleempty\dotransliterate} %\def\starttransliterate {% %\bgroup\dostarttransliterate% %} %\def\stoptransliterate {% %\egroup %\@EA\transliterate{% %\getbuffer[trl]% %}% %} %\def\dostarttransliterate{% %\dostartbuffer[trl][starttransliterate][stoptransliterate]% %} \def\starttransliterate{% \bgroup% \dosingleempty\dostarttransliterate } \long\def\dostarttransliterate[#1]#2\stoptransliterate{% \iffirstargument \setuptransliterate[#1]% \fi \language[\TRLhyphenate]% \ctxlua{translit.transliterate("\TRLmode","\luaescapestring{#2}")}% \egroup% } \protect \endinput % vim:ft=context