diff options
| author | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-12-29 19:30:30 +0100 | 
|---|---|---|
| committer | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-12-29 19:30:30 +0100 | 
| commit | 5413b9fd0f31e10410095d924e16483235bc530d (patch) | |
| tree | b6e1560bc48408a3f7f18095a9fbc0934ff4ae01 /tex/context/third | |
| parent | c648ccf7728650440b6d7046211128a58ddbbc5f (diff) | |
| download | transliterator-5413b9fd0f31e10410095d924e16483235bc530d.tar.gz | |
Transcription code for lpeg v.0.10.
Diffstat (limited to 'tex/context/third')
| -rw-r--r-- | tex/context/third/transliterator/trans_tables_trsc.lua | 150 | 
1 files changed, 129 insertions, 21 deletions
diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua index 6ba07a2..a5c53cb 100644 --- a/tex/context/third/transliterator/trans_tables_trsc.lua +++ b/tex/context/third/transliterator/trans_tables_trsc.lua @@ -7,11 +7,12 @@  -- Reference:   „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl.,  --              Mannheim et. al. 1991. --------------------------------------------------------- --- Lowercase German simple transcription---first pass -- --------------------------------------------------------- +if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then + +    -------------------------------------------------------- +    -- Lowercase German simple transcription---first pass -- +    -------------------------------------------------------- -if not translit.done_ru_trsc_de then      translit.ru_trsc_low_first = translit.make_add_dict{      [" е"] = " je",      ["ъе"] = "je", @@ -222,10 +223,117 @@ if not translit.done_ru_trsc_de then          translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule      end + +    translit.gen_rules_de()      translit.done_ru_trsc_de = true  end +if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then + +    -- This is four times as fast as the old pattern. Just waiting for v0.10 to +    -- make it into luatex. + +    local de_tables = { } + +    -------------------------------------------------------- +    -- Lowercase German simple transcription---first pass -- +    -------------------------------------------------------- + +    de_tables[1] = { -- lowercase initial +        [" е"] = " je",      ["ъе"] = "je",   ["ье"] = "je",    [" ё"] = " jo",  ["ъё"] = "jo", +        ["ьё"] = "jo",       ["жё"] = "scho", ["цё"] = "scho",  ["чё"] = "zo",   ["шё"] = "scho", +        ["щё"] = "schtscho", ["ье"] = "je",   ["ьи"] = "ji",    ["ьо"] = "jo",   ["ий"] = "i", +        ["ый"] = "y",        ["кс"] = "x" -- Extraordinarily stupid one. +    } +    translit.tables["German transcription first pass lowercase"] = de_tables[1] + +    -------------------------------------------------------- +    -- Uppercase German simple transcription---first pass -- +    -------------------------------------------------------- + +    de_tables[2] = { -- uppercase initial +        [" Е"] = " Je", ["Ъe"] = "Je",   ["Ье"] = "Je",    [" Ё"]  = "Jo",  ["Ъё"] = "Jo", +        ["Ьё"] = "Jo",  ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", ["Щё"] = "Schtscho", +        ["Кс"] = "ks" +    } +    translit.tables["German transcription first pass uppercase"] = de_tables[2] + +    ------------------------------------------- +    -- Lowercase German simple transcription -- +    ------------------------------------------- + +    de_tables[3] = { -- lowercase +        ["а"] = "a",    ["б"] = "b",   ["в"] = "w",       ["г"] = "g", ["д"] = "d",  ["е"] = "e", +        ["ё"] = "jo",   ["ж"] = "sch", ["з"] = "s",       ["и"] = "i", ["й"] = "i",  ["к"] = "k", +        ["л"] = "l",    ["м"] = "m",   ["н"] = "n",       ["о"] = "o", ["п"] = "p",  ["р"] = "r", +        ["с"] = "s",    ["т"] = "t",   ["у"] = "u",       ["ф"] = "f", ["х"] = "ch", ["ц"] = "z", +        ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", ["ъ"] = "",  ["ы"] = "y",  ["ь"] = "", +        ["э"] = "e",    ["ю"] = "ju",  ["я"] = "ja"  +    } +    translit.tables["German transcription second pass lowercase"] = de_tables[3] + +    ------------------------------------------- +    -- Uppercase German simple transcription -- +    ------------------------------------------- + +    de_tables[4] = { -- uppercase +        ["А"] = "A",    ["Б"] = "B",   ["В"] = "W",       ["Г"] = "G", ["Д"] = "D",  ["Е"] = "E", +        ["Ё"] = "Jo",   ["Ж"] = "Sch", ["З"] = "S",       ["И"] = "I", ["Й"] = "J",  ["К"] = "K", +        ["Л"] = "L",    ["М"] = "M",   ["Н"] = "N",       ["О"] = "O", ["П"] = "P",  ["Р"] = "R", +        ["С"] = "S",    ["Т"] = "T",   ["У"] = "U",       ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z", +        ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch", ["Ъ"] = "",  ["Ы"] = "Y",  ["Ь"] = "", +        ["Э"] = "E",    ["Ю"] = "Ju",  ["Я"] = "Ja"  +    } +    translit.tables["German transcription second pass uppercase"] = de_tables[4] + +    local B, P, Cs = lpeg.B, lpeg.P, lpeg.Cs + +    -- All chars are 2-byte. +    local Co = P{ +       P"б" + "в" + "г" + "д" + "ж" + "з" + "к" + "л" + "м" + "н" + +        "п" + "р" + "с" + "т" + "ф" + "х" + "ц" + "ч" + "ш" + "щ" + +        "ъ" + "ь" + +        "Б" + "В" + "Г" + "Д" + "Ж" + "З" + "К" + "Л" + "М" + "Н" + +        "П" + "Р" + "С" + "Т" + "Ф" + "Х" + "Ц" + "Ч" + "Ш" + "Щ" + +        "Ъ" + "Ь" +    } + +    local Vo = P{ +       P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + "ю" + +        "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + "Я" + "Ю" +    } + +    local iy = P"и" + P"ы" + P"И" + P"Ы" + +    ------------------------------------------- +    -- Pattern generation. +    ------------------------------------------- + +    local p_transcript  + +    for _, set in next, de_tables do +        for str, rep in next, set do +            if not p_transcript then -- it’ll be empty initially +                p_transcript = P(str) / rep +            else +                p_transcript = p_transcript + (P(str) / rep) +            end +        end +    end + +    local irule  = B(Vo,2) * Cs(P"й") * #Co   / "i" +    local iyrule = B(iy,2) * Cs(P"й") * #Co   / "j" +    local jrule  =           Cs(P"й") * #Vo   / "j" +    local srule  = B(Vo,2) * Cs(P"с") * #Vo   / "ss" +    local ssrule = B(Vo,2) * Cs(P"с") * #P"х" / "ß" +    local jerule = B(Vo,2) * Cs(P"е")         / "je" +    local jorule = B(Vo,2) * Cs(P"ё")         / "jo" + +    translit.future_ru_transcript_de = Cs((iyrule + jrule + irule + jerule + srule + ssrule + jorule + p_transcript + 1)^0) +end +  if not translit.done_ru_trsc_en then +      ---------------------------------------------------------      -- Lowercase English simple transcription---first pass --      --------------------------------------------------------- @@ -348,6 +456,8 @@ if not translit.done_ru_trsc_en then          translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule      end + +    translit.gen_rules_en()      translit.done_ru_trsc_en = true  end @@ -612,21 +722,21 @@ local function transcript (mode, text)          return text      elseif mode == "ru_transcript_de" then - -        translit.gen_rules_de() - -        -- This is possibly slower than using string:gsub. - -        text = tab_subst(text, translit.ru_trsc_jrule) -        text = tab_subst(text, translit.ru_trsc_irule) -        text = tab_subst(text, translit.ru_trsc_jerule) -        text = tab_subst(text, translit.ru_trsc_srule) -        text = tab_subst(text, translit.ru_trsc_sharpsrule) -        text = tab_subst(text, translit.ru_trsc_jorule) -        text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first) -        text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low) - -        return text +        if lpeg.version() == "0.9" then + +            text = tab_subst(text, translit.ru_trsc_jrule) +            text = tab_subst(text, translit.ru_trsc_irule) +            text = tab_subst(text, translit.ru_trsc_jerule) +            text = tab_subst(text, translit.ru_trsc_srule) +            text = tab_subst(text, translit.ru_trsc_sharpsrule) +            text = tab_subst(text, translit.ru_trsc_jorule) +            text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first) +            text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low) + +            return text +        elseif lpeg.version() == "0.10" then +            return translit.future_ru_transcript_de:match(text) +        end      elseif mode == "ru_transcript_en_exp" then @@ -660,8 +770,6 @@ local function transcript (mode, text)      elseif mode == "ru_transcript_en" then  -        translit.gen_rules_en() -          text = tab_subst(text, translit.ru_trsc_en_jerule)          text = tab_subst(text, translit.ru_trsc_en_low_first,   translit.ru_trsc_en_upp_first)          text = tab_subst(text, translit.ru_trsc_en_low,         translit.ru_trsc_en_upp)  | 
