--===========================================================================--
--                      Legacy national transliterations                     --
--===========================================================================--
---------------------------------
-- German simple transcription --
---------------------------------
-- Reference:   „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl.,
--              Mannheim et. al. 1991.

--------------------------------------------------------
-- Lowercase German simple transcription---first pass --
--------------------------------------------------------

translit.ru_trsc_low_first = translit.make_add_dict{
  [" е"] = " je",
  ["ъе"] = "je",
  ["ье"] = "je",
  [" ё"] = " jo",
  ["ъё"] = "jo",
  ["ьё"] = "jo",
  ["жё"] = "scho",
  ["чё"] = "tscho",
  ["шё"] = "scho",
  ["щё"] = "schtscho",
  ["ье"] = "je",
  ["ьи"] = "ji",
  ["ьо"] = "jo",
  ["ий"] = "i",
  ["ый"] = "y",
  ["кс"] = "x" -- Extraordinarily stupid one.
}

translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first

--------------------------------------------------------
-- Uppercase German simple transcription---first pass --
--------------------------------------------------------

translit.ru_trsc_upp_first = translit.make_add_dict{
  [" Е"] = " Je",
  ["Ъe"] = "Je",  -- Pedantic, isn't it?
  ["Ье"] = "Je",
  [" Ё"]  = "Jo",
  ["Ъё"] = "Jo",
  ["Ьё"] = "Jo",
  ["Жё"] = "Scho",
  ["Чё"] = "Tscho",
  ["Шё"] = "Scho",
  ["Щё"] = "Schtscho",
  ["Кс"] = "ks"
}

translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first

-------------------------------------------
-- Lowercase German simple transcription --
-------------------------------------------

translit.ru_trsc_low = translit.make_add_dict{
  ["а"] = "a",
  ["б"] = "b",
  ["в"] = "w",
  ["г"] = "g",
  ["д"] = "d",
  ["е"] = "e",
  ["ё"] = "jo",
  ["ж"] = "sch",
  ["з"] = "s",
  ["и"] = "i",
  ["й"] = "i",
  ["к"] = "k",
  ["л"] = "l",
  ["м"] = "m",
  ["н"] = "n",
  ["о"] = "o",
  ["п"] = "p",
  ["р"] = "r",
  ["с"] = "s",
  ["т"] = "t",
  ["у"] = "u",
  ["ф"] = "f",
  ["х"] = "ch",
  ["ц"] = "z",
  ["ч"] = "tsch",
  ["ш"] = "sch",
  ["щ"] = "schtsch",
  ["ъ"] = "",
  ["ы"] = "y",
  ["ь"] = "",
  ["э"] = "e",
  ["ю"] = "ju",
  ["я"] = "ja" 
}

translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low

-------------------------------------------
-- Uppercase German simple transcription --
-------------------------------------------

translit.ru_trsc_upp = translit.make_add_dict{
  ["А"] = "A",
  ["Б"] = "B",
  ["В"] = "W",
  ["Г"] = "G",
  ["Д"] = "D",
  ["Е"] = "E",
  ["Ё"] = "Jo",
  ["Ж"] = "Sch",
  ["З"] = "S",
  ["И"] = "I",
  ["Й"] = "J",
  ["К"] = "K",
  ["Л"] = "L",
  ["М"] = "M",
  ["Н"] = "N",
  ["О"] = "O",
  ["П"] = "P",
  ["Р"] = "R",
  ["С"] = "S",
  ["Т"] = "T",
  ["У"] = "U",
  ["Ф"] = "F",
  ["Х"] = "Ch",
  ["Ц"] = "Z",
  ["Ч"] = "Tsch",
  ["Ш"] = "Sch",
  ["Щ"] = "Schtsch",
  ["Ъ"] = "",
  ["Ы"] = "Y",
  ["Ь"] = "",
  ["Э"] = "E",
  ["Ю"] = "Ju",
  ["Я"] = "Ja" 
}

translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp

translit.ru_trsc_iy = {"и", "ы", "И", "Ы"}

function translit.gen_rules_de()
    -- The following are more interesting than the previous tables because they
    -- implement various rules.  For instance the table
    -- \type{translit.ru_trsc_irule} holds a substitution dictionary for all
    -- possible combinations (including nonsense galore) of a vowel preceding an
    -- “й” (Russian short i) preceding a consonant; here we access the sets of
    -- Russian vowels as well consonants that were defined earlier.

    -- The й-rule, VйC -> ViC
    translit.ru_trsc_irule = translit.make_add_dict{}
    for _, vow in ipairs(translit.ru_vowels) do
    for _, cons in ipairs(translit.ru_consonants) do
        local new_ante = vow .. "й" .. cons
        local new_post = vow .. "i" .. cons
        translit.ru_trsc_irule[new_ante] = new_post
    end
    end

    translit.tables["German transcription i-rule"] = translit.ru_trsc_irule

    -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC
    translit.ru_trsc_jrule = {}
    for _, vow in ipairs(translit.ru_vowels) do
    local new_ante = "й" .. vow
    local new_post = "j" .. vow
    translit.ru_trsc_jrule[new_ante] = new_post
    end

    for _, cons in ipairs(translit.ru_consonants) do
    for _, iy in ipairs(translit.ru_trsc_iy) do
        local new_ante = iy .. "й" .. cons
        local new_post = iy .. "j" .. cons
        translit.ru_trsc_jrule[new_ante] = new_post
    end
    end

    translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule

    -- The с-rule, VсV -> VssV
    translit.ru_trsc_srule = translit.make_add_dict{}
    for i, vow_1 in ipairs(translit.ru_vowels) do
    for j, vow_2 in ipairs(translit.ru_vowels) do
    local new_ante = vow_1 .. "с" .. vow_2
    local new_post = vow_1 .. "ss" .. vow_2
        translit.ru_trsc_srule[new_ante] = new_post
    end
    end

    translit.tables["German transcription s-rule"] = translit.ru_trsc_srule

    -- The sharp-s-rule, Vсх -> Vßх
    translit.ru_trsc_sharpsrule = translit.make_add_dict{}
    for i, vow in ipairs(translit.ru_vowels) do
    local new_ante = vow .. "сх"
    local new_post = vow .. "ßх"
    translit.ru_trsc_sharpsrule[new_ante] = new_post
    end

    translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule

    -- The е-rule, Vе -> Vje
    translit.ru_trsc_jerule = translit.make_add_dict{}
    for i, vow in ipairs(translit.ru_vowels) do
    local new_ante = vow .. "е"
    local new_post = vow .. "je"
    translit.ru_trsc_jerule[new_ante] = new_post
    end

    translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule

    -- The ё-rule, Vё -> Vjo
    -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo .
    -- Somebody should teach those DUDEN-guys parsimony.
    translit.ru_trsc_jorule = translit.make_add_dict{}
    for i, vow in ipairs(translit.ru_vowels) do
    local new_ante = vow .. "ё"
    local new_post = vow .. "jo"
    translit.ru_trsc_jorule[new_ante] = new_post
    end

    translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule

end

---------------------------------------------------------
-- Lowercase English simple transcription---first pass --
---------------------------------------------------------

translit.ru_trsc_en_low_first = translit.make_add_dict{
  [" е"] = " ye",
  ["ъе"] = "ye",
  ["ье"] = "ye",
  ["ье"] = "ye",
  ["ьи"] = "yi",
}

translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first

---------------------------------------------------------
-- Uppercase English simple transcription---first pass --
---------------------------------------------------------

translit.ru_trsc_en_upp_first = translit.make_add_dict{
  [" Е"] = " Ye",
  ["Ъe"] = "Ye",
  ["Ье"] = "Ye",
}

translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first

--------------------------------------------
-- Lowercase English simple transcription --
--------------------------------------------

translit.ru_trsc_en_low = translit.make_add_dict{
  ["а"] = "a",
  ["б"] = "b",
  ["в"] = "v",
  ["г"] = "g",
  ["д"] = "d",
  ["е"] = "e",
  ["ё"] = "e",
  ["ж"] = "zh",
  ["з"] = "z",
  ["и"] = "i",
  ["й"] = "y",
  ["к"] = "k",
  ["л"] = "l",
  ["м"] = "m",
  ["н"] = "n",
  ["о"] = "o",
  ["п"] = "p",
  ["р"] = "r",
  ["с"] = "s",
  ["т"] = "t",
  ["у"] = "u",
  ["ф"] = "f",
  ["х"] = "kh",
  ["ц"] = "ts",
  ["ч"] = "ch",
  ["ш"] = "sh",
  ["щ"] = "shsh",
  ["ъ"] = "",
  ["ы"] = "y",
  ["ь"] = "",
  ["э"] = "e",
  ["ю"] = "yu",
  ["я"] = "ya" 
}

translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low

--------------------------------------------
-- Uppercase English simple transcription --
--------------------------------------------

translit.ru_trsc_en_upp = translit.make_add_dict{
  ["А"] = "A",
  ["Б"] = "B",
  ["В"] = "V",
  ["Г"] = "G",
  ["Д"] = "D",
  ["Е"] = "E",
  ["Ё"] = "E",
  ["Ж"] = "Zh",
  ["З"] = "Z",
  ["И"] = "I",
  ["Й"] = "Y",
  ["К"] = "K",
  ["Л"] = "L",
  ["М"] = "M",
  ["Н"] = "N",
  ["О"] = "O",
  ["П"] = "P",
  ["Р"] = "R",
  ["С"] = "S",
  ["Т"] = "T",
  ["У"] = "U",
  ["Ф"] = "F",
  ["Х"] = "Kh",
  ["Ц"] = "Ts",
  ["Ч"] = "Ch",
  ["Ш"] = "Sh",
  ["Щ"] = "Shsh",
  ["Ъ"] = "",
  ["Ы"] = "Y",
  ["Ь"] = "",
  ["Э"] = "E",
  ["Ю"] = "Yu",
  ["Я"] = "Ya" 
}

translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp

        
function translit.gen_rules_en ()
    -- The english е-rule, Vе -> Vye
    translit.ru_trsc_en_jerule = translit.make_add_dict{}
    for i, vow in ipairs(translit.ru_vowels) do
        local new_ante = vow .. "е"
        local new_post = vow .. "ye"
        translit.ru_trsc_en_jerule[new_ante] = new_post
    end

    translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule
end


-----------------------------------
-- Lowercase Czech transcription --
-----------------------------------

translit.ru_trsc_cz_low = translit.make_add_dict{
  ["а"] = "a",
  ["б"] = "b",
  ["в"] = "v",
  ["г"] = "g",
  ["д"] = "d",
  ["е"] = "e",
  ["ё"] = "ë",
  ["ж"] = "ž",
  ["з"] = "z",
  ["и"] = "i",
  ["й"] = "j",
  ["к"] = "k",
  ["л"] = "l",
  ["м"] = "m",
  ["н"] = "n",
  ["о"] = "o",
  ["п"] = "p",
  ["р"] = "r",
  ["с"] = "s",
  ["т"] = "t",
  ["у"] = "u",
  ["ф"] = "f",
  ["х"] = "ch",
  ["ц"] = "c",
  ["ч"] = "č",
  ["ш"] = "š",
  ["щ"] = "šč",
  ["ъ"] = "ъ",
  ["ы"] = "y",
  ["ь"] = "ь",
  ["э"] = "è",
  ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but
  ["я"] = "ja"  -- that would complicate things a bit and linguists might not
}               -- agree.

translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low

-----------------------------------
-- Uppercase Czech transcription --
-----------------------------------

translit.ru_trsc_cz_upp = translit.make_add_dict{
  ["А"] = "A",
  ["Б"] = "B",
  ["В"] = "V",
  ["Г"] = "G",
  ["Д"] = "D",
  ["Е"] = "E",
  ["Ё"] = "Ë",
  ["Ж"] = "Ž",
  ["З"] = "Z",
  ["И"] = "I",
  ["Й"] = "J",
  ["К"] = "K",
  ["Л"] = "L",
  ["М"] = "M",
  ["Н"] = "N",
  ["О"] = "O",
  ["П"] = "P",
  ["Р"] = "R",
  ["С"] = "S",
  ["Т"] = "T",
  ["У"] = "U",
  ["Ф"] = "F",
  ["Х"] = "Ch",
  ["Ц"] = "C",
  ["Ч"] = "Č",
  ["Ш"] = "Š",
  ["Щ"] = "Šč",
  ["Ъ"] = "Ъ",
  ["Ы"] = "Y",
  ["Ь"] = "Ь",
  ["Э"] = "È",
  ["Ю"] = "Ju",
  ["Я"] = "Ja" 
}

translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp

----------------------------------------------
-- Lowercase Additional Czech Transcription --
----------------------------------------------

translit.ru_trsc_cz_add_low = translit.make_add_dict{
  ["ѕ"] = "dz",
  ["з"] = "z",
  ["ꙁ"] = "z",
  ["і"] = "ï",
  ["ѹ"] = "u",
  ["ѡ"] = "ō",
  ["ѣ"] = "ě",
  ["ѥ"] = "je",
  ["ѧ"] = "ę",
  ["ѩ"] = "ję",
  ["ѫ"] = "ǫ",
  ["ѭ"] = "jǫ",
  ["ѯ"] = "ks",
  ["ѱ"] = "ps",
  ["ѳ"] = "th",
  ["ѵ"] = "ÿ",
}

translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low


----------------------------------------------
-- Uppercase Additional Czech Transcription --
----------------------------------------------

translit.ru_trsc_cz_add_upp = translit.make_add_dict{
  ["Ѕ"] = "Dz",
  ["З"] = "Z",
  ["Ꙁ"] = "Z",
  ["І"] = "Ï",
  ["Ѹ"] = "U",
  ["Ѡ"] = "Ō",
  ["Ѣ"] = "Ě",
  ["Ѥ"] = "Je",
  ["Ѧ"] = "Ę",
  ["Ѩ"] = "Ję",
  ["Ѫ"] = "Ǫ",
  ["Ѭ"] = "Jǫ",
  ["Ѯ"] = "Ks",
  ["Ѱ"] = "Ps",
  ["Ѳ"] = "Th",
  ["Ѵ"] = "Ÿ",
}

translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp

--===========================================================================--
--                              End Of Tables                                --
--===========================================================================--

function translit.transcript (mode, text)
    local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs
    local loc = lpeg.locale ()

    -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html
    local utfchar = R("\000\127") + 
        R("\194\223") * R("\128\191") + 
        R("\224\240") * R("\128\191") * R("\128\191") + 
        R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191")

    local trsc_parser, p_rules, capt

    local p_de

    function tab_subst (s, ...)
        local p_tmp, tmp = nil, translit.make_add_dict{}
        for _,tab in ipairs(arg) do
            tmp = tmp + tab
        end
        p_tmp = addrules(tmp, p_tmp)
        local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0)
        return fp:match(s)
    end

    -- Add keys of a dictionary to a ruleset.
    function addrules (dict, rules)
        for i, _ in pairs(dict) do
            if rules == nil then rules = P(i)
            else rules = rules + P(i)
            end
        end
        return rules
    end

    -- The following is needed becaus lpeg.S doesn't work with utf.
    local vow, con, iy
    for _,v in ipairs (translit.ru_vowels) do
        if vow == nil then vow = P(v)
        else vow = vow + P(v)
        end
    end

    for _,c in ipairs (translit.ru_consonants) do
        if con == nil then con = P(c)
        else con = con + P(c)
        end
    end

    for _,i in ipairs (translit.ru_trsc_iy) do
        if iy == nil then iy = P(i)
        else iy = iy + P(i)
        end
    end

    if mode == "ru_transcript_de_exp" then

        local de_low_upp = translit.make_add_dict{}
        de_low_upp = translit.ru_trsc_upp + translit.ru_trsc_low

        local twochar
        local tworepl = translit.make_add_dict{}

        twochar = addrules( translit.ru_trsc_low_first, twochar )
        twochar = addrules( translit.ru_trsc_upp_first, twochar )

        tworepl = translit.ru_trsc_low_first + translit.ru_trsc_upp_first

        -- The й-rule, VйC -> ViC
        local function V_i_C (s)
            local ante = utf.sub(s, 1, 1)
            local post = utf.sub(s, 3, 3)
            return de_low_upp[ante] .. "i" .. de_low_upp[post]
        end

        -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC
        local function iy_j_C (s)
            local ante = utf.sub(s, 1, 1)
            local post = utf.sub(s, 3, 3)
            return de_low_upp[ante] .. "j" .. de_low_upp[post]
        end

        local function j_V (s)
            local post = utf.sub(s, 2, 2)
            return "j" .. de_low_upp[post]
        end

        -- The с-rule, VсV -> VssV
        local function V_ss_V (s)
            local ante = utf.sub(s, 1, 1)
            local post = utf.sub(s, 3, 3)
            return de_low_upp[ante] .. "ss" .. de_low_upp[post]
        end

        -- The sharp-s-rule, Vсх -> Vßх
        local function V_sz_ch (s)
            local ante = utf.sub(s, 1, 1)
            return de_low_upp[ante] .. "ßch" 
        end

        -- The е-rule, Vе -> Vje
        local function V_je (s)
            local ante = utf.sub(s, 1, 1)
            return de_low_upp[ante] .. "je" 
        end

        -- Reapplying V_je on its result + next char would make the following
        -- two rules obsolete.
        local function V_jeje (s)
            local ante = utf.sub(s, 1, 1)
            return de_low_upp[ante] .. "jeje" 
        end

        local function V___je (s)
            local ante = utf.sub(s, 1, 1)
            return de_low_upp[ante] .. "jeje" 
        end

        -- The ё-rule, Vё -> Vjo
        -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo .
        -- Somebody should teach those DUDEN guys parsimony.
        local function V_jo (s)
            local ante = utf.sub(s, 1, 1)
            return de_low_upp[ante] .. "jo" 
        end

        local iyrule    = Cs((iy * "й" * con)   / iy_j_C)
        local jrule     = Cs(("й" * vow)        / j_V)
        local irule     = Cs((vow * "й" * con)  / V_i_C)

        local ssrule    = Cs((vow * "с" * vow)  / V_ss_V)
        local szrule    = Cs((vow * "сх")       / V_sz_ch)

        --local _jrule    = Cs((vow * "ее")       / V___je)
        local jjrule    = Cs((vow * "ее")       / V_jeje)
        local jerule    = Cs((vow * "е")        / V_je)
        local jorule    = Cs((vow * "ё")        / V_jo)

        local dvoje     = Cs(twochar            / tworepl)
        local other     = Cs((utfchar)          / de_low_upp)

        local izhe      = iyrule + jrule + irule
        local slovo     = ssrule + szrule
        local jest      = jjrule + jerule + jorule

        local g = Cs((izhe + slovo + jest + dvoje + other + utfchar)^0)

        text = g:match(text)
        return text

    elseif mode == "ru_transcript_de" then

        translit.gen_rules_de()

        -- This is possibly slower than using string:gsub.

        text = tab_subst(text, translit.ru_trsc_jrule)
        text = tab_subst(text, translit.ru_trsc_irule)
        text = tab_subst(text, translit.ru_trsc_jerule)
        text = tab_subst(text, translit.ru_trsc_srule)
        text = tab_subst(text, translit.ru_trsc_sharpsrule)
        text = tab_subst(text, translit.ru_trsc_jorule)
        text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first)
        text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low)

        return text

    elseif mode == "ru_transcript_en_exp" then

        local en_low_upp = translit.make_add_dict{}
        en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp

        local twochar
        local tworepl = translit.make_add_dict{}

        twochar = addrules( translit.ru_trsc_en_low_first, twochar)
        twochar = addrules( translit.ru_trsc_en_upp_first, twochar)

        tworepl = translit.ru_trsc_en_low_first + translit.ru_trsc_en_upp_first

        -- The е-rule, Vе -> Vye
        local function V_je (s)
            local ante = utf.sub(s, 1, 1)
            return en_low_upp[ante] .. "ye" 
        end

        local jerule    = Cs((vow * "е")        / V_je)

        local dvoje     = Cs(twochar            / tworepl)
        local other     = Cs((utfchar)          / en_low_upp)

        local g = Cs((dvoje + jerule + other + utfchar)^0)

        text = g:match(text)

        return text

    elseif mode == "ru_transcript_en" then 

        translit.gen_rules_en()

        text = tab_subst(text, translit.ru_trsc_en_jerule)
        text = tab_subst(text, translit.ru_trsc_en_low_first,   translit.ru_trsc_en_upp_first)
        text = tab_subst(text, translit.ru_trsc_en_low,         translit.ru_trsc_en_upp)

        return text

    elseif mode == "ru_cz" or mode ==  "ocs_cz" then 
        text = tab_subst(text, translit.ru_trsc_cz_low, translit.ru_trsc_cz_upp)
        if mode == "ocs_cz" then
            text = tab_subst(text, translit.ru_trsc_cz_add_low, translit.ru_trsc_cz_add_upp)
        end
        
        return text
    end

end