From f3a4c4e7b2cfa0a12bf465a7753681aff2de39bc Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Tue, 10 May 2011 21:33:04 +0200 Subject: ambiguity hinting --- .../third/transliterator/trans_tables_sr.lua | 90 ++++++++++++---------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua index 3a3fa4c..953d934 100644 --- a/tex/context/third/transliterator/trans_tables_sr.lua +++ b/tex/context/third/transliterator/trans_tables_sr.lua @@ -9,45 +9,6 @@ if not translit.done_serbian then -- Lowercase Serbian (Cyrillic -> Latin) -- -------------------------------------------- translit.sr_tolt_lower = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Ђ"] = "Đ", - ["Е"] = "E", - ["Ж"] = "Ž", - ["З"] = "Z", - ["И"] = "I", - ["Ј"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["Љ"] = "Lj", - ["М"] = "M", - ["Н"] = "N", - ["Њ"] = "Nj", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["Ћ"] = "Ć", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "H", - ["Ц"] = "C", - ["Ч"] = "Č", - ["Џ"] = "Dž", - ["Ш"] = "Š", - } - - translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower - - -------------------------------------------- - -- Uppercase Serbian (Cyrillic -> Latin) -- - -------------------------------------------- - - translit.sr_tolt_upper = translit.make_add_dict{ ["а"] = "a", ["б"] = "b", ["в"] = "v", @@ -80,6 +41,45 @@ if not translit.done_serbian then ["ш"] = "š", } + translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower + + -------------------------------------------- + -- Uppercase Serbian (Cyrillic -> Latin) -- + -------------------------------------------- + + translit.sr_tolt_upper = translit.make_add_dict{ + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Ђ"] = "Đ", + ["Е"] = "E", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Ј"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["Љ"] = "Lj", + ["М"] = "M", + ["Н"] = "N", + ["Њ"] = "Nj", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["Ћ"] = "Ć", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "H", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Џ"] = "Dž", + ["Ш"] = "Š", + } + translit.tables["Serbian Cyr->Lat Transliteration uppercase"] = translit.sr_tolt_upper local function __inverse_tab (t) @@ -95,8 +95,6 @@ if not translit.done_serbian then --- Good reading up front: --- - local hintchar = "|" - local except = { ["nadživ"] = "надћив", -- nadživeti and derivatives } @@ -131,6 +129,13 @@ if not translit.done_serbian then end translit.serbian_exceptions = { } + + local _p_hintchar = P"|" / "" + local hintme = P"dln" + for left in hintme:utfcharacters() do + local right = translit.sr_tocy_lower[left] + local LEFT, RIGHT = upper(left), upper(right) + end translit.serbian_exceptions.p_tocy = p_tocy translit.serbian_exceptions.p_tolt = p_tolt translit.serbian_exceptions.p_tocy_init = p_i_tocy @@ -156,7 +161,8 @@ local function sr (mode, text) -- transliteration from latin script requires macro handling … local _p_macro = P[[\]] * R("az", "AZ")^1 - local _p_sr = translit.addrules (trl_sr, _p_sr) + local _p_sr = translit.addrules (trl_sr) + local p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + (_p_sr / trl_sr) + utfchar)^0) return p_sr:match(text) -- cgit v1.2.3