update transliteration functions for lpeg v0.10 and lua 5.2

author: Philipp Gesang <gesang@stud.uni-heidelberg.de> 2013-02-20 15:05:55 +0100
committer: Philipp Gesang <gesang@stud.uni-heidelberg.de> 2013-02-20 15:05:55 +0100
commit: 41c4f194a139f769a5c0290106756c67877de8cf (patch)
tree: 31ac266baf7b50986943ed4d32b8eaed19470d85
parent: 10589ad4c4acc186e582d2a7afcc45f4ef1c2515 (diff)
download: transliterator-41c4f194a139f769a5c0290106756c67877de8cf.tar.gz
2 files changed, 304 insertions, 233 deletions
diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua
index ce907bc..561d8d0 100644
--- a/tex/context/third/transliterator/trans_tables_trsc.lua
+++ b/tex/context/third/transliterator/trans_tables_trsc.lua
@@ -1,13 +1,19 @@
---===========================================================================--
---                      Legacy national transliterations                     --
---===========================================================================--
+--===================================================================--
+--                 Legacy national transliterations                  --
+--===================================================================--
 
 local translit = thirddata.translit
+local addrules = translit.addrules
+local utfchar  = translit.utfchar
+
+local lpegmatch = lpeg.match
+local tablepack = table.pack -- lua 5.2 precaution
 
 ---------------------------------
 -- German simple transcription --
 ---------------------------------
--- Reference:   „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl.,
+-- Reference:   „DUDEN. Rechtschreibung der deutschen Sprache“;
+--              20. Aufl.,
 --              Mannheim et. al. 1991.
 
 if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
@@ -35,7 +41,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
     ["кс"] = "x" -- Extraordinarily stupid one.
     }
 
-    translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first
+    translit.tables["German transcription first pass lowercase"]
+      = translit.ru_trsc_low_first
 
     --------------------------------------------------------
     -- Uppercase German simple transcription---first pass --
@@ -55,7 +62,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
     ["Кс"] = "ks"
     }
 
-    translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first
+    translit.tables["German transcription first pass uppercase"]
+      = translit.ru_trsc_upp_first
 
     -------------------------------------------
     -- Lowercase German simple transcription --
@@ -97,7 +105,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
     ["я"] = "ja" 
     }
 
-    translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low
+    translit.tables["German transcription second pass lowercase"]
+      = translit.ru_trsc_low
 
     -------------------------------------------
     -- Uppercase German simple transcription --
@@ -139,17 +148,19 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
     ["Я"] = "Ja" 
     }
 
-    translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp
+    translit.tables["German transcription second pass uppercase"]
+      = translit.ru_trsc_upp
 
     translit.ru_trsc_iy = {"и", "ы", "И", "Ы"}
 
     function translit.gen_rules_de()
-        -- The following are more interesting than the previous tables because they
-        -- implement various rules.  For instance the table
-        -- \type{translit.ru_trsc_irule} holds a substitution dictionary for all
-        -- possible combinations (including nonsense galore) of a vowel preceding an
-        -- “й” (Russian short i) preceding a consonant; here we access the sets of
-        -- Russian vowels as well consonants that were defined earlier.
+        -- The following are more interesting than the previous tables
+        -- because they implement various rules.  For instance the
+        -- table \type{translit.ru_trsc_irule} holds a substitution
+        -- dictionary for all possible combinations (including nonsense
+        -- galore) of a vowel preceding an “й” (Russian short i)
+        -- preceding a consonant; here we access the sets of Russian
+        -- vowels as well consonants that were defined earlier.
 
         -- The й-rule, VйC -> ViC
         translit.ru_trsc_irule = translit.make_add_dict{}
@@ -161,7 +172,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
         end
         end
 
-        translit.tables["German transcription i-rule"] = translit.ru_trsc_irule
+        translit.tables["German transcription i-rule"]
+          = translit.ru_trsc_irule
 
         -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC
         translit.ru_trsc_jrule = {}
@@ -179,7 +191,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
         end
         end
 
-        translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule
+        translit.tables["German transcription j-rule"]
+          = translit.ru_trsc_jrule
 
         -- The с-rule, VсV -> VssV
         translit.ru_trsc_srule = translit.make_add_dict{}
@@ -191,7 +204,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
         end
         end
 
-        translit.tables["German transcription s-rule"] = translit.ru_trsc_srule
+        translit.tables["German transcription s-rule"]
+          = translit.ru_trsc_srule
 
         -- The sharp-s-rule, Vсх -> Vßх
         translit.ru_trsc_sharpsrule = translit.make_add_dict{}
@@ -201,7 +215,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
         translit.ru_trsc_sharpsrule[new_ante] = new_post
         end
 
-        translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule
+        translit.tables["German transcription sharp-s-rule"]
+          = translit.ru_trsc_sharpsrule
 
         -- The е-rule, Vе -> Vje
         translit.ru_trsc_jerule = translit.make_add_dict{}
@@ -211,7 +226,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
         translit.ru_trsc_jerule[new_ante] = new_post
         end
 
-        translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule
+        translit.tables["German transcription je-rule"]
+          = translit.ru_trsc_jerule
 
         -- The ё-rule, Vё -> Vjo
         -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo .
@@ -223,7 +239,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
         translit.ru_trsc_jorule[new_ante] = new_post
         end
 
-        translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule
+        translit.tables["German transcription (redundant) jo-rule"]
+          = translit.ru_trsc_jorule
 
     end
 
@@ -233,8 +250,8 @@ end
 
 if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
 
-    -- This is about *eight* times as fast as the old pattern. Just waiting for
-    -- v0.10 to make it into luatex.
+    -- This is about *eight* times as fast as the old pattern. Just
+    -- waiting for v0.10 to make it into luatex.
 
     local de_tables = { }
 
@@ -243,51 +260,64 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
     --------------------------------------------------------
 
     de_tables[1] = { -- lowercase initial
-        [" е"] = " je",      ["ъе"] = "je",   ["ье"] = "je",    [" ё"] = " jo",  ["ъё"] = "jo",
-        ["ьё"] = "jo",       ["жё"] = "scho", ["цё"] = "scho",  ["чё"] = "zo",   ["шё"] = "scho",
-        ["щё"] = "schtscho", ["ье"] = "je",   ["ьи"] = "ji",    ["ьо"] = "jo",   ["ий"] = "i",
-        ["ый"] = "y",        ["кс"] = "x" -- Extraordinarily stupid one.
+        [" е"] = " je",  ["ъе"] = "je",       ["ье"] = "je",
+        [" ё"] = " jo",  ["ъё"] = "jo",       ["ьё"] = "jo",
+        ["жё"] = "scho", ["цё"] = "scho",     ["чё"] = "zo",
+        ["шё"] = "scho", ["щё"] = "schtscho", ["ье"] = "je",
+        ["ьи"] = "ji",   ["ьо"] = "jo",       ["ий"] = "i",
+        ["ый"] = "y",    ["кс"] = "x" -- Extraordinarily stupid one.
     }
-    translit.tables["German transcription first pass lowercase"] = de_tables[1]
+    translit.tables["German transcription first pass lowercase"]
+      = de_tables[1]
 
     --------------------------------------------------------
     -- Uppercase German simple transcription---first pass --
     --------------------------------------------------------
 
     de_tables[2] = { -- uppercase initial
-        [" Е"] = " Je", ["Ъe"] = "Je",   ["Ье"] = "Je",    [" Ё"]  = "Jo",  ["Ъё"] = "Jo",
-        ["Ьё"] = "Jo",  ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", ["Щё"] = "Schtscho",
-        ["Кс"] = "ks"
+        [" Е"] = " Je",      ["Ъe"] = "Je",    ["Ье"] = "Je",
+        [" Ё"]  = "Jo",      ["Ъё"] = "Jo",    ["Ьё"] = "Jo",
+        ["Жё"] = "Scho",     ["Чё"] = "Tscho", ["Шё"] = "Scho",
+        ["Щё"] = "Schtscho", ["Кс"] = "ks"
     }
-    translit.tables["German transcription first pass uppercase"] = de_tables[2]
+    translit.tables["German transcription first pass uppercase"]
+      = de_tables[2]
 
     -------------------------------------------
     -- Lowercase German simple transcription --
     -------------------------------------------
 
     de_tables[3] = { -- lowercase
-        ["а"] = "a",    ["б"] = "b",   ["в"] = "w",       ["г"] = "g", ["д"] = "d",  ["е"] = "e",
-        ["ё"] = "jo",   ["ж"] = "sch", ["з"] = "s",       ["и"] = "i", ["й"] = "i",  ["к"] = "k",
-        ["л"] = "l",    ["м"] = "m",   ["н"] = "n",       ["о"] = "o", ["п"] = "p",  ["р"] = "r",
-        ["с"] = "s",    ["т"] = "t",   ["у"] = "u",       ["ф"] = "f", ["х"] = "ch", ["ц"] = "z",
-        ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", ["ъ"] = "",  ["ы"] = "y",  ["ь"] = "",
-        ["э"] = "e",    ["ю"] = "ju",  ["я"] = "ja" 
+        ["а"] = "a",    ["б"] = "b",   ["в"] = "w",  ["г"] = "g",
+        ["д"] = "d",    ["е"] = "e",   ["ё"] = "jo", ["ж"] = "sch",
+        ["з"] = "s",    ["и"] = "i",   ["й"] = "i",  ["к"] = "k",
+        ["л"] = "l",    ["м"] = "m",   ["н"] = "n",  ["о"] = "o",
+        ["п"] = "p",    ["р"] = "r",   ["с"] = "s",  ["т"] = "t",
+        ["у"] = "u",    ["ф"] = "f",   ["х"] = "ch", ["ц"] = "z",
+        ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch",
+        ["ъ"] = "",     ["ы"] = "y",   ["ь"] = "",   ["э"] = "e",
+        ["ю"] = "ju",   ["я"] = "ja" 
     }
-    translit.tables["German transcription second pass lowercase"] = de_tables[3]
+    translit.tables["German transcription second pass lowercase"]
+      = de_tables[3]
 
     -------------------------------------------
     -- Uppercase German simple transcription --
     -------------------------------------------
 
     de_tables[4] = { -- uppercase
-        ["А"] = "A",    ["Б"] = "B",   ["В"] = "W",       ["Г"] = "G", ["Д"] = "D",  ["Е"] = "E",
-        ["Ё"] = "Jo",   ["Ж"] = "Sch", ["З"] = "S",       ["И"] = "I", ["Й"] = "J",  ["К"] = "K",
-        ["Л"] = "L",    ["М"] = "M",   ["Н"] = "N",       ["О"] = "O", ["П"] = "P",  ["Р"] = "R",
-        ["С"] = "S",    ["Т"] = "T",   ["У"] = "U",       ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z",
-        ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch", ["Ъ"] = "",  ["Ы"] = "Y",  ["Ь"] = "",
-        ["Э"] = "E",    ["Ю"] = "Ju",  ["Я"] = "Ja" 
+        ["А"] = "A",    ["Б"] = "B",   ["В"] = "W",      ["Г"] = "G",
+        ["Д"] = "D",    ["Е"] = "E",   ["Ё"] = "Jo",     ["Ж"] = "Sch",
+        ["З"] = "S",    ["И"] = "I",   ["Й"] = "J",      ["К"] = "K",
+        ["Л"] = "L",    ["М"] = "M",   ["Н"] = "N",      ["О"] = "O",
+        ["П"] = "P",    ["Р"] = "R",   ["С"] = "S",      ["Т"] = "T",
+        ["У"] = "U",    ["Ф"] = "F",   ["Х"] = "Ch",     ["Ц"] = "Z",
+        ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch",["Ъ"] = "",
+        ["Ы"] = "Y",    ["Ь"] = "",    ["Э"] = "E",      ["Ю"] = "Ju",
+        ["Я"] = "Ja"
     }
-    translit.tables["German transcription second pass uppercase"] = de_tables[4]
+    translit.tables["German transcription second pass uppercase"]
+      = de_tables[4]
 
     local B, P, Cs = lpeg.B, lpeg.P, lpeg.Cs
 
@@ -302,8 +332,9 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
     }
 
     local Vo = P{
-       P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + "ю" +
-        "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + "Я" + "Ю"
+       P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" +
+        "ю" + "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" +
+        "Я" + "Ю"
     }
 
     local iy = P"и" + P"ы" + P"И" + P"Ы"
@@ -312,7 +343,7 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
     -- Pattern generation.
     -------------------------------------------
 
-    local p_transcript 
+    local p_transcript
 
     for _, set in next, de_tables do
         for str, rep in next, set do
@@ -332,7 +363,11 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
     local jerule = B(Vo,2) * Cs(P"е")         / "je"
     local jorule = B(Vo,2) * Cs(P"ё")         / "jo"
 
-    translit.future_ru_transcript_de = Cs((iyrule + jrule + irule + jerule + srule + ssrule + jorule + p_transcript + 1)^0)
+    translit.future_ru_transcript_de
+      = Cs((iyrule + jrule + irule
+          + jerule + srule + ssrule
+          + jorule + p_transcript + 1)^0
+        )
 end
 
 if not translit.done_ru_trsc_en then
@@ -349,7 +384,8 @@ if not translit.done_ru_trsc_en then
     ["ьи"] = "yi",
     }
 
-    translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first
+    translit.tables["English transcription lowercase first pass"]
+      = translit.ru_trsc_en_low_first
 
     ---------------------------------------------------------
     -- Uppercase English simple transcription---first pass --
@@ -361,7 +397,8 @@ if not translit.done_ru_trsc_en then
     ["Ье"] = "Ye",
     }
 
-    translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first
+    translit.tables["English transcription uppercase first pass"]
+      = translit.ru_trsc_en_upp_first
 
     --------------------------------------------
     -- Lowercase English simple transcription --
@@ -403,51 +440,52 @@ if not translit.done_ru_trsc_en then
     ["я"] = "ya" 
     }
 
-    translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low
+    translit.tables["English transcription lowercase second pass"]
+      = translit.ru_trsc_en_low
 
     --------------------------------------------
     -- Uppercase English simple transcription --
     --------------------------------------------
 
     translit.ru_trsc_en_upp = translit.make_add_dict{
-    ["А"] = "A",
-    ["Б"] = "B",
-    ["В"] = "V",
-    ["Г"] = "G",
-    ["Д"] = "D",
-    ["Е"] = "E",
-    ["Ё"] = "E",
-    ["Ж"] = "Zh",
-    ["З"] = "Z",
-    ["И"] = "I",
-    ["Й"] = "Y",
-    ["К"] = "K",
-    ["Л"] = "L",
-    ["М"] = "M",
-    ["Н"] = "N",
-    ["О"] = "O",
-    ["П"] = "P",
-    ["Р"] = "R",
-    ["С"] = "S",
-    ["Т"] = "T",
-    ["У"] = "U",
-    ["Ф"] = "F",
-    ["Х"] = "Kh",
-    ["Ц"] = "Ts",
-    ["Ч"] = "Ch",
-    ["Ш"] = "Sh",
-    ["Щ"] = "Shsh",
-    ["Ъ"] = "",
-    ["Ы"] = "Y",
-    ["Ь"] = "",
-    ["Э"] = "E",
-    ["Ю"] = "Yu",
-    ["Я"] = "Ya" 
+      ["А"] = "A",
+      ["Б"] = "B",
+      ["В"] = "V",
+      ["Г"] = "G",
+      ["Д"] = "D",
+      ["Е"] = "E",
+      ["Ё"] = "E",
+      ["Ж"] = "Zh",
+      ["З"] = "Z",
+      ["И"] = "I",
+      ["Й"] = "Y",
+      ["К"] = "K",
+      ["Л"] = "L",
+      ["М"] = "M",
+      ["Н"] = "N",
+      ["О"] = "O",
+      ["П"] = "P",
+      ["Р"] = "R",
+      ["С"] = "S",
+      ["Т"] = "T",
+      ["У"] = "U",
+      ["Ф"] = "F",
+      ["Х"] = "Kh",
+      ["Ц"] = "Ts",
+      ["Ч"] = "Ch",
+      ["Ш"] = "Sh",
+      ["Щ"] = "Shsh",
+      ["Ъ"] = "",
+      ["Ы"] = "Y",
+      ["Ь"] = "",
+      ["Э"] = "E",
+      ["Ю"] = "Yu",
+      ["Я"] = "Ya"
     }
 
-    translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp
+    translit.tables["English transcription uppercase second pass"]
+      = translit.ru_trsc_en_upp
 
-            
     function translit.gen_rules_en ()
         -- The english е-rule, Vе -> Vye
         translit.ru_trsc_en_jerule = translit.make_add_dict{}
@@ -457,7 +495,8 @@ if not translit.done_ru_trsc_en then
             translit.ru_trsc_en_jerule[new_ante] = new_post
         end
 
-        translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule
+        translit.tables["English transcription ye-rule"]
+          = translit.ru_trsc_en_jerule
     end
 
     translit.gen_rules_en()
@@ -471,84 +510,86 @@ if not translit.done_ru_trsc_cz then
     -----------------------------------
 
     translit.ru_trsc_cz_low = translit.make_add_dict{
-    ["а"] = "a",
-    ["б"] = "b",
-    ["в"] = "v",
-    ["г"] = "g",
-    ["д"] = "d",
-    ["е"] = "e",
-    ["ё"] = "ë",
-    ["ж"] = "ž",
-    ["з"] = "z",
-    ["и"] = "i",
-    ["й"] = "j",
-    ["к"] = "k",
-    ["л"] = "l",
-    ["м"] = "m",
-    ["н"] = "n",
-    ["о"] = "o",
-    ["п"] = "p",
-    ["р"] = "r",
-    ["с"] = "s",
-    ["т"] = "t",
-    ["у"] = "u",
-    ["ф"] = "f",
-    ["х"] = "ch",
-    ["ц"] = "c",
-    ["ч"] = "č",
-    ["ш"] = "š",
-    ["щ"] = "šč",
-    ["ъ"] = "ъ",
-    ["ы"] = "y",
-    ["ь"] = "ь",
-    ["э"] = "è",
-    ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but
-    ["я"] = "ja"  -- that would complicate things a bit and linguists might not
-    }               -- agree.
-
-    translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low
+      ["а"] = "a",
+      ["б"] = "b",
+      ["в"] = "v",
+      ["г"] = "g",
+      ["д"] = "d",
+      ["е"] = "e",
+      ["ё"] = "ë",
+      ["ж"] = "ž",
+      ["з"] = "z",
+      ["и"] = "i",
+      ["й"] = "j",
+      ["к"] = "k",
+      ["л"] = "l",
+      ["м"] = "m",
+      ["н"] = "n",
+      ["о"] = "o",
+      ["п"] = "p",
+      ["р"] = "r",
+      ["с"] = "s",
+      ["т"] = "t",
+      ["у"] = "u",
+      ["ф"] = "f",
+      ["х"] = "ch",
+      ["ц"] = "c",
+      ["ч"] = "č",
+      ["ш"] = "š",
+      ["щ"] = "šč",
+      ["ъ"] = "ъ",
+      ["ы"] = "y",
+      ["ь"] = "ь",
+      ["э"] = "è",
+      ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and
+    }  -- тя -> ťa, but ["я"] = "ja"  that would complicate things a
+       -- bit and linguists might not agree.
+
+    translit.tables["Czech transcription lowercase"]
+      = translit.ru_trsc_cz_low
 
     -----------------------------------
     -- Uppercase Czech transcription --
     -----------------------------------
 
     translit.ru_trsc_cz_upp = translit.make_add_dict{
-    ["А"] = "A",
-    ["Б"] = "B",
-    ["В"] = "V",
-    ["Г"] = "G",
-    ["Д"] = "D",
-    ["Е"] = "E",
-    ["Ё"] = "Ë",
-    ["Ж"] = "Ž",
-    ["З"] = "Z",
-    ["И"] = "I",
-    ["Й"] = "J",
-    ["К"] = "K",
-    ["Л"] = "L",
-    ["М"] = "M",
-    ["Н"] = "N",
-    ["О"] = "O",
-    ["П"] = "P",
-    ["Р"] = "R",
-    ["С"] = "S",
-    ["Т"] = "T",
-    ["У"] = "U",
-    ["Ф"] = "F",
-    ["Х"] = "Ch",
-    ["Ц"] = "C",
-    ["Ч"] = "Č",
-    ["Ш"] = "Š",
-    ["Щ"] = "Šč",
-    ["Ъ"] = "Ъ",
-    ["Ы"] = "Y",
-    ["Ь"] = "Ь",
-    ["Э"] = "È",
-    ["Ю"] = "Ju",
-    ["Я"] = "Ja" 
+      ["А"] = "A",
+      ["Б"] = "B",
+      ["В"] = "V",
+      ["Г"] = "G",
+      ["Д"] = "D",
+      ["Е"] = "E",
+      ["Ё"] = "Ë",
+      ["Ж"] = "Ž",
+      ["З"] = "Z",
+      ["И"] = "I",
+      ["Й"] = "J",
+      ["К"] = "K",
+      ["Л"] = "L",
+      ["М"] = "M",
+      ["Н"] = "N",
+      ["О"] = "O",
+      ["П"] = "P",
+      ["Р"] = "R",
+      ["С"] = "S",
+      ["Т"] = "T",
+      ["У"] = "U",
+      ["Ф"] = "F",
+      ["Х"] = "Ch",
+      ["Ц"] = "C",
+      ["Ч"] = "Č",
+      ["Ш"] = "Š",
+      ["Щ"] = "Šč",
+      ["Ъ"] = "Ъ",
+      ["Ы"] = "Y",
+      ["Ь"] = "Ь",
+      ["Э"] = "È",
+      ["Ю"] = "Ju",
+      ["Я"] = "Ja" 
     }
 
-    translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp
+    translit.tables["Czech transcription uppercase"]
+      = translit.ru_trsc_cz_upp
 
     ----------------------------------------------
     -- Lowercase Additional Czech Transcription --
@@ -573,7 +614,9 @@ if not translit.done_ru_trsc_cz then
     ["ѵ"] = "ÿ",
     }
 
-    translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low
+    translit.tables[
+      "Czech transcription for OCS and pre-1918 lowercase"]
+      = translit.ru_trsc_cz_add_low
 
 
     ----------------------------------------------
@@ -599,38 +642,95 @@ if not translit.done_ru_trsc_cz then
     ["Ѵ"] = "Ÿ",
     }
 
-    translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp
+    translit.tables[
+      "Czech transcription for OCS and pre-1918 uppercase"]
+      = translit.ru_trsc_cz_add_upp
     translit.done_ru_trsc_cz = true
 end
 
---===========================================================================--
---                              End Of Tables                                --
---===========================================================================--
+--===================================================================--
+--                           End Of Tables                           --
+--===================================================================--
 
 local function transcript (mode, text)
     local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs
-    local addrules       = translit.addrules
-    local utfchar        = translit.utfchar
 
     local trsc_parser, p_rules, capt, p_de
 
     local function tab_subst (s, ...)
+        local sets = { ... }
         local p_tmp, tmp = nil, translit.make_add_dict{}
-        for _,tab in ipairs(arg) do
-            tmp = tmp + tab
+        for n=1, #sets do
+            local set = sets[n]
+            tmp = tmp + set
         end
         p_tmp = addrules(tmp, p_tmp)
         local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0)
-        return fp:match(s)
+        return lpegmatch(fp, s)
     end
 
-    local vow, con, iy
-    vow = addrules(translit.ru_vowels,     vow)
-    con = addrules(translit.ru_consonants, con)
-    iy  = addrules(translit.ru_trsc_iy,    iy )
+    if mode == "ru_transcript_en" then
+
+        text = tab_subst(text, translit.ru_trsc_en_jerule)
+        text = tab_subst(text,
+                  translit.ru_trsc_en_low_first,
+                  translit.ru_trsc_en_upp_first)
+        text = tab_subst(text,
+                  translit.ru_trsc_en_low,
+                  translit.ru_trsc_en_upp)
+
+        return text
+
+    elseif mode == "ru_transcript_en_exp" then
+
+        local en_low_upp = translit.make_add_dict{}
+        en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp
+
+        local twochar
+        local tworepl = translit.make_add_dict{}
+
+        twochar = addrules( translit.ru_trsc_en_low_first, twochar)
+        twochar = addrules( translit.ru_trsc_en_upp_first, twochar)
+
+        tworepl = translit.ru_trsc_en_low_first
+                + translit.ru_trsc_en_upp_first
+
+        -- The е-rule, Vе -> Vye
+        local function V_je (s)
+            local ante = utf.sub(s, 1, 1)
+            return en_low_upp[ante] .. "ye"
+        end
+
+        local jerule    = Cs((vow * "е")        / V_je)
+
+        local dvoje     = Cs(twochar            / tworepl)
+        local other     = Cs((utfchar)          / en_low_upp)
+
+        local g = Cs((dvoje + jerule + other + utfchar)^0)
+
+        text = g:match(text)
+
+        return text
+
+    elseif mode == "ru_cz" or mode ==  "ocs_cz" then
+        text = tab_subst(text,
+                         translit.ru_trsc_cz_low,
+                         translit.ru_trsc_cz_upp)
+        if mode == "ocs_cz" then
+            text = tab_subst(text,
+                      translit.ru_trsc_cz_add_low,
+                      translit.ru_trsc_cz_add_upp)
+        end
+        return text
+    end
 
     if mode == "ru_transcript_de_exp" then
 
+        local vow, con, iy
+        vow = addrules(translit.ru_vowels,     vow)
+        con = addrules(translit.ru_consonants, con)
+        iy  = addrules(translit.ru_trsc_iy,    iy )
+
         local de_low_upp = translit.make_add_dict{}
         de_low_upp = translit.ru_trsc_upp + translit.ru_trsc_low
 
@@ -640,7 +740,8 @@ local function transcript (mode, text)
         twochar = addrules( translit.ru_trsc_low_first, twochar )
         twochar = addrules( translit.ru_trsc_upp_first, twochar )
 
-        tworepl = translit.ru_trsc_low_first + translit.ru_trsc_upp_first
+        tworepl = translit.ru_trsc_low_first
+                + translit.ru_trsc_upp_first
 
         -- The й-rule, VйC -> ViC
         local function V_i_C (s)
@@ -671,25 +772,25 @@ local function transcript (mode, text)
         -- The sharp-s-rule, Vсх -> Vßх
         local function V_sz_ch (s)
             local ante = utf.sub(s, 1, 1)
-            return de_low_upp[ante] .. "ßch" 
+            return de_low_upp[ante] .. "ßch"
         end
 
         -- The е-rule, Vе -> Vje
         local function V_je (s)
             local ante = utf.sub(s, 1, 1)
-            return de_low_upp[ante] .. "je" 
+            return de_low_upp[ante] .. "je"
         end
 
-        -- Reapplying V_je on its result + next char would make the following
-        -- two rules obsolete.
+        -- Reapplying V_je on its result + next char would make the
+        -- following two rules obsolete.
         local function V_jeje (s)
             local ante = utf.sub(s, 1, 1)
-            return de_low_upp[ante] .. "jeje" 
+            return de_low_upp[ante] .. "jeje"
         end
 
         local function V___je (s)
             local ante = utf.sub(s, 1, 1)
-            return de_low_upp[ante] .. "jeje" 
+            return de_low_upp[ante] .. "jeje"
         end
 
         -- The ё-rule, Vё -> Vjo
@@ -697,7 +798,7 @@ local function transcript (mode, text)
         -- Somebody should teach those DUDEN guys parsimony.
         local function V_jo (s)
             local ante = utf.sub(s, 1, 1)
-            return de_low_upp[ante] .. "jo" 
+            return de_low_upp[ante] .. "jo"
         end
 
         local iyrule    = Cs((iy * "й" * con)   / iy_j_C)
@@ -725,6 +826,7 @@ local function transcript (mode, text)
         return text
 
     elseif mode == "ru_transcript_de" then
+
         if lpeg.version() == "0.9" then
 
             text = tab_subst(text, translit.ru_trsc_jrule)
@@ -733,66 +835,33 @@ local function transcript (mode, text)
             text = tab_subst(text, translit.ru_trsc_srule)
             text = tab_subst(text, translit.ru_trsc_sharpsrule)
             text = tab_subst(text, translit.ru_trsc_jorule)
-            text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first)
-            text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low)
+            text = tab_subst(text,
+                      translit.ru_trsc_upp_first,
+                      translit.ru_trsc_low_first)
+            text = tab_subst(text,
+                      translit.ru_trsc_upp,
+                      translit.ru_trsc_low)
 
             return text
         elseif lpeg.version() == "0.10" then
             return translit.future_ru_transcript_de:match(text)
         end
 
-    elseif mode == "ru_transcript_en_exp" then
-
-        local en_low_upp = translit.make_add_dict{}
-        en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp
-
-        local twochar
-        local tworepl = translit.make_add_dict{}
-
-        twochar = addrules( translit.ru_trsc_en_low_first, twochar)
-        twochar = addrules( translit.ru_trsc_en_upp_first, twochar)
-
-        tworepl = translit.ru_trsc_en_low_first + translit.ru_trsc_en_upp_first
-
-        -- The е-rule, Vе -> Vye
-        local function V_je (s)
-            local ante = utf.sub(s, 1, 1)
-            return en_low_upp[ante] .. "ye" 
-        end
-
-        local jerule    = Cs((vow * "е")        / V_je)
-
-        local dvoje     = Cs(twochar            / tworepl)
-        local other     = Cs((utfchar)          / en_low_upp)
-
-        local g = Cs((dvoje + jerule + other + utfchar)^0)
-
-        text = g:match(text)
-
-        return text
-
-    elseif mode == "ru_transcript_en" then 
-
-        text = tab_subst(text, translit.ru_trsc_en_jerule)
-        text = tab_subst(text, translit.ru_trsc_en_low_first,   translit.ru_trsc_en_upp_first)
-        text = tab_subst(text, translit.ru_trsc_en_low,         translit.ru_trsc_en_upp)
-
-        return text
-
-    elseif mode == "ru_cz" or mode ==  "ocs_cz" then 
-        text = tab_subst(text, translit.ru_trsc_cz_low, translit.ru_trsc_cz_upp)
-        if mode == "ocs_cz" then
-            text = tab_subst(text, translit.ru_trsc_cz_add_low, translit.ru_trsc_cz_add_upp)
-        end
-        
-        return text
     end
 
 end
 
-translit.methods ["ru_transcript_de"]     = function (text) return transcript("ru_transcript_de"    , text) end
-translit.methods ["ru_transcript_de_exp"] = function (text) return transcript("ru_transcript_de_exp", text) end
-translit.methods ["ru_transcript_en"]     = function (text) return transcript("ru_transcript_en"    , text) end
-translit.methods ["ru_transcript_en_exp"] = function (text) return transcript("ru_transcript_en_exp", text) end
-translit.methods ["ru_cz"]                = function (text) return transcript("ru_cz"               , text) end
-translit.methods ["ocs_cz"]               = function (text) return transcript("ocs_cz"              , text) end
+translit.methods ["ru_transcript_de"]
+  = function (text) return transcript("ru_transcript_de"    , text) end
+translit.methods ["ru_transcript_de_exp"]
+  = function (text) return transcript("ru_transcript_de_exp", text) end
+translit.methods ["ru_transcript_en"]
+  = function (text) return transcript("ru_transcript_en"    , text) end
+translit.methods ["ru_transcript_en_exp"]
+  = function (text) return transcript("ru_transcript_en_exp", text) end
+translit.methods ["ru_cz"]
+  = function (text) return transcript("ru_cz"               , text) end
+translit.methods ["ocs_cz"]
+  = function (text) return transcript("ocs_cz"              , text) end
+
+-- vim:sw=4:ts=4:expandtab:ft=lua
diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua
index 4ca7ea0..873e6d6 100644
--- a/tex/context/third/transliterator/transliterator.lua
+++ b/tex/context/third/transliterator/transliterator.lua
@@ -276,3 +276,5 @@ function translit.transliterate (method, text)
     end
     context ( methods[method](text) )
 end
+
+-- vim:sw=4:ts=4:expandtab:ft=lua
author	Philipp Gesang <gesang@stud.uni-heidelberg.de>	2013-02-20 15:05:55 +0100
committer	Philipp Gesang <gesang@stud.uni-heidelberg.de>	2013-02-20 15:05:55 +0100
commit	41c4f194a139f769a5c0290106756c67877de8cf (patch)
tree	31ac266baf7b50986943ed4d32b8eaed19470d85
parent	10589ad4c4acc186e582d2a7afcc45f4ef1c2515 (diff)
download	transliterator-41c4f194a139f769a5c0290106756c67877de8cf.tar.gz