diff options
author | Philipp Gesang <gesang@stud.uni-heidelberg.de> | 2013-02-20 15:05:55 +0100 |
---|---|---|
committer | Philipp Gesang <gesang@stud.uni-heidelberg.de> | 2013-02-20 15:05:55 +0100 |
commit | 41c4f194a139f769a5c0290106756c67877de8cf (patch) | |
tree | 31ac266baf7b50986943ed4d32b8eaed19470d85 | |
parent | 10589ad4c4acc186e582d2a7afcc45f4ef1c2515 (diff) | |
download | transliterator-41c4f194a139f769a5c0290106756c67877de8cf.tar.gz |
update transliteration functions for lpeg v0.10 and lua 5.2
-rw-r--r-- | tex/context/third/transliterator/trans_tables_trsc.lua | 535 | ||||
-rw-r--r-- | tex/context/third/transliterator/transliterator.lua | 2 |
2 files changed, 304 insertions, 233 deletions
diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua index ce907bc..561d8d0 100644 --- a/tex/context/third/transliterator/trans_tables_trsc.lua +++ b/tex/context/third/transliterator/trans_tables_trsc.lua @@ -1,13 +1,19 @@ ---===========================================================================-- --- Legacy national transliterations -- ---===========================================================================-- +--===================================================================-- +-- Legacy national transliterations -- +--===================================================================-- local translit = thirddata.translit +local addrules = translit.addrules +local utfchar = translit.utfchar + +local lpegmatch = lpeg.match +local tablepack = table.pack -- lua 5.2 precaution --------------------------------- -- German simple transcription -- --------------------------------- --- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl., +-- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; +-- 20. Aufl., -- Mannheim et. al. 1991. if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then @@ -35,7 +41,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then ["кс"] = "x" -- Extraordinarily stupid one. } - translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first + translit.tables["German transcription first pass lowercase"] + = translit.ru_trsc_low_first -------------------------------------------------------- -- Uppercase German simple transcription---first pass -- @@ -55,7 +62,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then ["Кс"] = "ks" } - translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first + translit.tables["German transcription first pass uppercase"] + = translit.ru_trsc_upp_first ------------------------------------------- -- Lowercase German simple transcription -- @@ -97,7 +105,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then ["я"] = "ja" } - translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low + translit.tables["German transcription second pass lowercase"] + = translit.ru_trsc_low ------------------------------------------- -- Uppercase German simple transcription -- @@ -139,17 +148,19 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then ["Я"] = "Ja" } - translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp + translit.tables["German transcription second pass uppercase"] + = translit.ru_trsc_upp translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} function translit.gen_rules_de() - -- The following are more interesting than the previous tables because they - -- implement various rules. For instance the table - -- \type{translit.ru_trsc_irule} holds a substitution dictionary for all - -- possible combinations (including nonsense galore) of a vowel preceding an - -- “й” (Russian short i) preceding a consonant; here we access the sets of - -- Russian vowels as well consonants that were defined earlier. + -- The following are more interesting than the previous tables + -- because they implement various rules. For instance the + -- table \type{translit.ru_trsc_irule} holds a substitution + -- dictionary for all possible combinations (including nonsense + -- galore) of a vowel preceding an “й” (Russian short i) + -- preceding a consonant; here we access the sets of Russian + -- vowels as well consonants that were defined earlier. -- The й-rule, VйC -> ViC translit.ru_trsc_irule = translit.make_add_dict{} @@ -161,7 +172,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then end end - translit.tables["German transcription i-rule"] = translit.ru_trsc_irule + translit.tables["German transcription i-rule"] + = translit.ru_trsc_irule -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC translit.ru_trsc_jrule = {} @@ -179,7 +191,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then end end - translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule + translit.tables["German transcription j-rule"] + = translit.ru_trsc_jrule -- The с-rule, VсV -> VssV translit.ru_trsc_srule = translit.make_add_dict{} @@ -191,7 +204,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then end end - translit.tables["German transcription s-rule"] = translit.ru_trsc_srule + translit.tables["German transcription s-rule"] + = translit.ru_trsc_srule -- The sharp-s-rule, Vсх -> Vßх translit.ru_trsc_sharpsrule = translit.make_add_dict{} @@ -201,7 +215,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then translit.ru_trsc_sharpsrule[new_ante] = new_post end - translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule + translit.tables["German transcription sharp-s-rule"] + = translit.ru_trsc_sharpsrule -- The е-rule, Vе -> Vje translit.ru_trsc_jerule = translit.make_add_dict{} @@ -211,7 +226,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then translit.ru_trsc_jerule[new_ante] = new_post end - translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule + translit.tables["German transcription je-rule"] + = translit.ru_trsc_jerule -- The ё-rule, Vё -> Vjo -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . @@ -223,7 +239,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then translit.ru_trsc_jorule[new_ante] = new_post end - translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule + translit.tables["German transcription (redundant) jo-rule"] + = translit.ru_trsc_jorule end @@ -233,8 +250,8 @@ end if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then - -- This is about *eight* times as fast as the old pattern. Just waiting for - -- v0.10 to make it into luatex. + -- This is about *eight* times as fast as the old pattern. Just + -- waiting for v0.10 to make it into luatex. local de_tables = { } @@ -243,51 +260,64 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then -------------------------------------------------------- de_tables[1] = { -- lowercase initial - [" е"] = " je", ["ъе"] = "je", ["ье"] = "je", [" ё"] = " jo", ["ъё"] = "jo", - ["ьё"] = "jo", ["жё"] = "scho", ["цё"] = "scho", ["чё"] = "zo", ["шё"] = "scho", - ["щё"] = "schtscho", ["ье"] = "je", ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i", - ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one. + [" е"] = " je", ["ъе"] = "je", ["ье"] = "je", + [" ё"] = " jo", ["ъё"] = "jo", ["ьё"] = "jo", + ["жё"] = "scho", ["цё"] = "scho", ["чё"] = "zo", + ["шё"] = "scho", ["щё"] = "schtscho", ["ье"] = "je", + ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i", + ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one. } - translit.tables["German transcription first pass lowercase"] = de_tables[1] + translit.tables["German transcription first pass lowercase"] + = de_tables[1] -------------------------------------------------------- -- Uppercase German simple transcription---first pass -- -------------------------------------------------------- de_tables[2] = { -- uppercase initial - [" Е"] = " Je", ["Ъe"] = "Je", ["Ье"] = "Je", [" Ё"] = "Jo", ["Ъё"] = "Jo", - ["Ьё"] = "Jo", ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", ["Щё"] = "Schtscho", - ["Кс"] = "ks" + [" Е"] = " Je", ["Ъe"] = "Je", ["Ье"] = "Je", + [" Ё"] = "Jo", ["Ъё"] = "Jo", ["Ьё"] = "Jo", + ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", + ["Щё"] = "Schtscho", ["Кс"] = "ks" } - translit.tables["German transcription first pass uppercase"] = de_tables[2] + translit.tables["German transcription first pass uppercase"] + = de_tables[2] ------------------------------------------- -- Lowercase German simple transcription -- ------------------------------------------- de_tables[3] = { -- lowercase - ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", ["д"] = "d", ["е"] = "e", - ["ё"] = "jo", ["ж"] = "sch", ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k", - ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", - ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z", - ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", ["ъ"] = "", ["ы"] = "y", ["ь"] = "", - ["э"] = "e", ["ю"] = "ju", ["я"] = "ja" + ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", + ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "sch", + ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k", + ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", + ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", + ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z", + ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", + ["ъ"] = "", ["ы"] = "y", ["ь"] = "", ["э"] = "e", + ["ю"] = "ju", ["я"] = "ja" } - translit.tables["German transcription second pass lowercase"] = de_tables[3] + translit.tables["German transcription second pass lowercase"] + = de_tables[3] ------------------------------------------- -- Uppercase German simple transcription -- ------------------------------------------- de_tables[4] = { -- uppercase - ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", - ["Ё"] = "Jo", ["Ж"] = "Sch", ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K", - ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", - ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z", - ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch", ["Ъ"] = "", ["Ы"] = "Y", ["Ь"] = "", - ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja" + ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", + ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Sch", + ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K", + ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", + ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", + ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z", + ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch",["Ъ"] = "", + ["Ы"] = "Y", ["Ь"] = "", ["Э"] = "E", ["Ю"] = "Ju", + ["Я"] = "Ja" } - translit.tables["German transcription second pass uppercase"] = de_tables[4] + translit.tables["German transcription second pass uppercase"] + = de_tables[4] local B, P, Cs = lpeg.B, lpeg.P, lpeg.Cs @@ -302,8 +332,9 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then } local Vo = P{ - P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + "ю" + - "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + "Я" + "Ю" + P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + + "ю" + "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + + "Я" + "Ю" } local iy = P"и" + P"ы" + P"И" + P"Ы" @@ -312,7 +343,7 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then -- Pattern generation. ------------------------------------------- - local p_transcript + local p_transcript for _, set in next, de_tables do for str, rep in next, set do @@ -332,7 +363,11 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then local jerule = B(Vo,2) * Cs(P"е") / "je" local jorule = B(Vo,2) * Cs(P"ё") / "jo" - translit.future_ru_transcript_de = Cs((iyrule + jrule + irule + jerule + srule + ssrule + jorule + p_transcript + 1)^0) + translit.future_ru_transcript_de + = Cs((iyrule + jrule + irule + + jerule + srule + ssrule + + jorule + p_transcript + 1)^0 + ) end if not translit.done_ru_trsc_en then @@ -349,7 +384,8 @@ if not translit.done_ru_trsc_en then ["ьи"] = "yi", } - translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first + translit.tables["English transcription lowercase first pass"] + = translit.ru_trsc_en_low_first --------------------------------------------------------- -- Uppercase English simple transcription---first pass -- @@ -361,7 +397,8 @@ if not translit.done_ru_trsc_en then ["Ье"] = "Ye", } - translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first + translit.tables["English transcription uppercase first pass"] + = translit.ru_trsc_en_upp_first -------------------------------------------- -- Lowercase English simple transcription -- @@ -403,51 +440,52 @@ if not translit.done_ru_trsc_en then ["я"] = "ya" } - translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low + translit.tables["English transcription lowercase second pass"] + = translit.ru_trsc_en_low -------------------------------------------- -- Uppercase English simple transcription -- -------------------------------------------- translit.ru_trsc_en_upp = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "E", - ["Ж"] = "Zh", - ["З"] = "Z", - ["И"] = "I", - ["Й"] = "Y", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Kh", - ["Ц"] = "Ts", - ["Ч"] = "Ch", - ["Ш"] = "Sh", - ["Щ"] = "Shsh", - ["Ъ"] = "", - ["Ы"] = "Y", - ["Ь"] = "", - ["Э"] = "E", - ["Ю"] = "Yu", - ["Я"] = "Ya" + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "E", + ["Ж"] = "Zh", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "Y", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Kh", + ["Ц"] = "Ts", + ["Ч"] = "Ch", + ["Ш"] = "Sh", + ["Щ"] = "Shsh", + ["Ъ"] = "", + ["Ы"] = "Y", + ["Ь"] = "", + ["Э"] = "E", + ["Ю"] = "Yu", + ["Я"] = "Ya" } - translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp + translit.tables["English transcription uppercase second pass"] + = translit.ru_trsc_en_upp - function translit.gen_rules_en () -- The english е-rule, Vе -> Vye translit.ru_trsc_en_jerule = translit.make_add_dict{} @@ -457,7 +495,8 @@ if not translit.done_ru_trsc_en then translit.ru_trsc_en_jerule[new_ante] = new_post end - translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule + translit.tables["English transcription ye-rule"] + = translit.ru_trsc_en_jerule end translit.gen_rules_en() @@ -471,84 +510,86 @@ if not translit.done_ru_trsc_cz then ----------------------------------- translit.ru_trsc_cz_low = translit.make_add_dict{ - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ё"] = "ë", - ["ж"] = "ž", - ["з"] = "z", - ["и"] = "i", - ["й"] = "j", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "ch", - ["ц"] = "c", - ["ч"] = "č", - ["ш"] = "š", - ["щ"] = "šč", - ["ъ"] = "ъ", - ["ы"] = "y", - ["ь"] = "ь", - ["э"] = "è", - ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but - ["я"] = "ja" -- that would complicate things a bit and linguists might not - } -- agree. - - translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "ë", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "ch", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ъ", + ["ы"] = "y", + ["ь"] = "ь", + ["э"] = "è", + ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and + } -- тя -> ťa, but ["я"] = "ja" that would complicate things a + -- bit and linguists might not agree. + + translit.tables["Czech transcription lowercase"] + = translit.ru_trsc_cz_low ----------------------------------- -- Uppercase Czech transcription -- ----------------------------------- translit.ru_trsc_cz_upp = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "Ë", - ["Ж"] = "Ž", - ["З"] = "Z", - ["И"] = "I", - ["Й"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Ch", - ["Ц"] = "C", - ["Ч"] = "Č", - ["Ш"] = "Š", - ["Щ"] = "Šč", - ["Ъ"] = "Ъ", - ["Ы"] = "Y", - ["Ь"] = "Ь", - ["Э"] = "È", - ["Ю"] = "Ju", - ["Я"] = "Ja" + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Ë", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Ch", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "Ъ", + ["Ы"] = "Y", + ["Ь"] = "Ь", + ["Э"] = "È", + ["Ю"] = "Ju", + ["Я"] = "Ja" } - translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp + translit.tables["Czech transcription uppercase"] + = translit.ru_trsc_cz_upp ---------------------------------------------- -- Lowercase Additional Czech Transcription -- @@ -573,7 +614,9 @@ if not translit.done_ru_trsc_cz then ["ѵ"] = "ÿ", } - translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low + translit.tables[ + "Czech transcription for OCS and pre-1918 lowercase"] + = translit.ru_trsc_cz_add_low ---------------------------------------------- @@ -599,38 +642,95 @@ if not translit.done_ru_trsc_cz then ["Ѵ"] = "Ÿ", } - translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp + translit.tables[ + "Czech transcription for OCS and pre-1918 uppercase"] + = translit.ru_trsc_cz_add_upp translit.done_ru_trsc_cz = true end ---===========================================================================-- --- End Of Tables -- ---===========================================================================-- +--===================================================================-- +-- End Of Tables -- +--===================================================================-- local function transcript (mode, text) local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs - local addrules = translit.addrules - local utfchar = translit.utfchar local trsc_parser, p_rules, capt, p_de local function tab_subst (s, ...) + local sets = { ... } local p_tmp, tmp = nil, translit.make_add_dict{} - for _,tab in ipairs(arg) do - tmp = tmp + tab + for n=1, #sets do + local set = sets[n] + tmp = tmp + set end p_tmp = addrules(tmp, p_tmp) local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0) - return fp:match(s) + return lpegmatch(fp, s) end - local vow, con, iy - vow = addrules(translit.ru_vowels, vow) - con = addrules(translit.ru_consonants, con) - iy = addrules(translit.ru_trsc_iy, iy ) + if mode == "ru_transcript_en" then + + text = tab_subst(text, translit.ru_trsc_en_jerule) + text = tab_subst(text, + translit.ru_trsc_en_low_first, + translit.ru_trsc_en_upp_first) + text = tab_subst(text, + translit.ru_trsc_en_low, + translit.ru_trsc_en_upp) + + return text + + elseif mode == "ru_transcript_en_exp" then + + local en_low_upp = translit.make_add_dict{} + en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp + + local twochar + local tworepl = translit.make_add_dict{} + + twochar = addrules( translit.ru_trsc_en_low_first, twochar) + twochar = addrules( translit.ru_trsc_en_upp_first, twochar) + + tworepl = translit.ru_trsc_en_low_first + + translit.ru_trsc_en_upp_first + + -- The е-rule, Vе -> Vye + local function V_je (s) + local ante = utf.sub(s, 1, 1) + return en_low_upp[ante] .. "ye" + end + + local jerule = Cs((vow * "е") / V_je) + + local dvoje = Cs(twochar / tworepl) + local other = Cs((utfchar) / en_low_upp) + + local g = Cs((dvoje + jerule + other + utfchar)^0) + + text = g:match(text) + + return text + + elseif mode == "ru_cz" or mode == "ocs_cz" then + text = tab_subst(text, + translit.ru_trsc_cz_low, + translit.ru_trsc_cz_upp) + if mode == "ocs_cz" then + text = tab_subst(text, + translit.ru_trsc_cz_add_low, + translit.ru_trsc_cz_add_upp) + end + return text + end if mode == "ru_transcript_de_exp" then + local vow, con, iy + vow = addrules(translit.ru_vowels, vow) + con = addrules(translit.ru_consonants, con) + iy = addrules(translit.ru_trsc_iy, iy ) + local de_low_upp = translit.make_add_dict{} de_low_upp = translit.ru_trsc_upp + translit.ru_trsc_low @@ -640,7 +740,8 @@ local function transcript (mode, text) twochar = addrules( translit.ru_trsc_low_first, twochar ) twochar = addrules( translit.ru_trsc_upp_first, twochar ) - tworepl = translit.ru_trsc_low_first + translit.ru_trsc_upp_first + tworepl = translit.ru_trsc_low_first + + translit.ru_trsc_upp_first -- The й-rule, VйC -> ViC local function V_i_C (s) @@ -671,25 +772,25 @@ local function transcript (mode, text) -- The sharp-s-rule, Vсх -> Vßх local function V_sz_ch (s) local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "ßch" + return de_low_upp[ante] .. "ßch" end -- The е-rule, Vе -> Vje local function V_je (s) local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "je" + return de_low_upp[ante] .. "je" end - -- Reapplying V_je on its result + next char would make the following - -- two rules obsolete. + -- Reapplying V_je on its result + next char would make the + -- following two rules obsolete. local function V_jeje (s) local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "jeje" + return de_low_upp[ante] .. "jeje" end local function V___je (s) local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "jeje" + return de_low_upp[ante] .. "jeje" end -- The ё-rule, Vё -> Vjo @@ -697,7 +798,7 @@ local function transcript (mode, text) -- Somebody should teach those DUDEN guys parsimony. local function V_jo (s) local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "jo" + return de_low_upp[ante] .. "jo" end local iyrule = Cs((iy * "й" * con) / iy_j_C) @@ -725,6 +826,7 @@ local function transcript (mode, text) return text elseif mode == "ru_transcript_de" then + if lpeg.version() == "0.9" then text = tab_subst(text, translit.ru_trsc_jrule) @@ -733,66 +835,33 @@ local function transcript (mode, text) text = tab_subst(text, translit.ru_trsc_srule) text = tab_subst(text, translit.ru_trsc_sharpsrule) text = tab_subst(text, translit.ru_trsc_jorule) - text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first) - text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low) + text = tab_subst(text, + translit.ru_trsc_upp_first, + translit.ru_trsc_low_first) + text = tab_subst(text, + translit.ru_trsc_upp, + translit.ru_trsc_low) return text elseif lpeg.version() == "0.10" then return translit.future_ru_transcript_de:match(text) end - elseif mode == "ru_transcript_en_exp" then - - local en_low_upp = translit.make_add_dict{} - en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp - - local twochar - local tworepl = translit.make_add_dict{} - - twochar = addrules( translit.ru_trsc_en_low_first, twochar) - twochar = addrules( translit.ru_trsc_en_upp_first, twochar) - - tworepl = translit.ru_trsc_en_low_first + translit.ru_trsc_en_upp_first - - -- The е-rule, Vе -> Vye - local function V_je (s) - local ante = utf.sub(s, 1, 1) - return en_low_upp[ante] .. "ye" - end - - local jerule = Cs((vow * "е") / V_je) - - local dvoje = Cs(twochar / tworepl) - local other = Cs((utfchar) / en_low_upp) - - local g = Cs((dvoje + jerule + other + utfchar)^0) - - text = g:match(text) - - return text - - elseif mode == "ru_transcript_en" then - - text = tab_subst(text, translit.ru_trsc_en_jerule) - text = tab_subst(text, translit.ru_trsc_en_low_first, translit.ru_trsc_en_upp_first) - text = tab_subst(text, translit.ru_trsc_en_low, translit.ru_trsc_en_upp) - - return text - - elseif mode == "ru_cz" or mode == "ocs_cz" then - text = tab_subst(text, translit.ru_trsc_cz_low, translit.ru_trsc_cz_upp) - if mode == "ocs_cz" then - text = tab_subst(text, translit.ru_trsc_cz_add_low, translit.ru_trsc_cz_add_upp) - end - - return text end end -translit.methods ["ru_transcript_de"] = function (text) return transcript("ru_transcript_de" , text) end -translit.methods ["ru_transcript_de_exp"] = function (text) return transcript("ru_transcript_de_exp", text) end -translit.methods ["ru_transcript_en"] = function (text) return transcript("ru_transcript_en" , text) end -translit.methods ["ru_transcript_en_exp"] = function (text) return transcript("ru_transcript_en_exp", text) end -translit.methods ["ru_cz"] = function (text) return transcript("ru_cz" , text) end -translit.methods ["ocs_cz"] = function (text) return transcript("ocs_cz" , text) end +translit.methods ["ru_transcript_de"] + = function (text) return transcript("ru_transcript_de" , text) end +translit.methods ["ru_transcript_de_exp"] + = function (text) return transcript("ru_transcript_de_exp", text) end +translit.methods ["ru_transcript_en"] + = function (text) return transcript("ru_transcript_en" , text) end +translit.methods ["ru_transcript_en_exp"] + = function (text) return transcript("ru_transcript_en_exp", text) end +translit.methods ["ru_cz"] + = function (text) return transcript("ru_cz" , text) end +translit.methods ["ocs_cz"] + = function (text) return transcript("ocs_cz" , text) end + +-- vim:sw=4:ts=4:expandtab:ft=lua diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua index 4ca7ea0..873e6d6 100644 --- a/tex/context/third/transliterator/transliterator.lua +++ b/tex/context/third/transliterator/transliterator.lua @@ -276,3 +276,5 @@ function translit.transliterate (method, text) end context ( methods[method](text) ) end + +-- vim:sw=4:ts=4:expandtab:ft=lua |