summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <megas.kapaneus@gmail.com>2011-05-10 21:33:04 +0200
committerPhilipp Gesang <megas.kapaneus@gmail.com>2011-05-10 21:33:04 +0200
commitf3a4c4e7b2cfa0a12bf465a7753681aff2de39bc (patch)
treef6a0abc1c2738dfa81794d8455cb2a8c58d4417d
parent5203b25743770b62410cc6bc9dc3127bcfb03f22 (diff)
downloadtransliterator-f3a4c4e7b2cfa0a12bf465a7753681aff2de39bc.tar.gz
ambiguity hinting
-rw-r--r--tex/context/third/transliterator/trans_tables_sr.lua90
1 files changed, 48 insertions, 42 deletions
diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua
index 3a3fa4c..953d934 100644
--- a/tex/context/third/transliterator/trans_tables_sr.lua
+++ b/tex/context/third/transliterator/trans_tables_sr.lua
@@ -9,45 +9,6 @@ if not translit.done_serbian then
-- Lowercase Serbian (Cyrillic -> Latin) --
--------------------------------------------
translit.sr_tolt_lower = translit.make_add_dict{
- ["А"] = "A",
- ["Б"] = "B",
- ["В"] = "V",
- ["Г"] = "G",
- ["Д"] = "D",
- ["Ђ"] = "Đ",
- ["Е"] = "E",
- ["Ж"] = "Ž",
- ["З"] = "Z",
- ["И"] = "I",
- ["Ј"] = "J",
- ["К"] = "K",
- ["Л"] = "L",
- ["Љ"] = "Lj",
- ["М"] = "M",
- ["Н"] = "N",
- ["Њ"] = "Nj",
- ["О"] = "O",
- ["П"] = "P",
- ["Р"] = "R",
- ["С"] = "S",
- ["Т"] = "T",
- ["Ћ"] = "Ć",
- ["У"] = "U",
- ["Ф"] = "F",
- ["Х"] = "H",
- ["Ц"] = "C",
- ["Ч"] = "Č",
- ["Џ"] = "Dž",
- ["Ш"] = "Š",
- }
-
- translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower
-
- --------------------------------------------
- -- Uppercase Serbian (Cyrillic -> Latin) --
- --------------------------------------------
-
- translit.sr_tolt_upper = translit.make_add_dict{
["а"] = "a",
["б"] = "b",
["в"] = "v",
@@ -80,6 +41,45 @@ if not translit.done_serbian then
["ш"] = "š",
}
+ translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower
+
+ --------------------------------------------
+ -- Uppercase Serbian (Cyrillic -> Latin) --
+ --------------------------------------------
+
+ translit.sr_tolt_upper = translit.make_add_dict{
+ ["А"] = "A",
+ ["Б"] = "B",
+ ["В"] = "V",
+ ["Г"] = "G",
+ ["Д"] = "D",
+ ["Ђ"] = "Đ",
+ ["Е"] = "E",
+ ["Ж"] = "Ž",
+ ["З"] = "Z",
+ ["И"] = "I",
+ ["Ј"] = "J",
+ ["К"] = "K",
+ ["Л"] = "L",
+ ["Љ"] = "Lj",
+ ["М"] = "M",
+ ["Н"] = "N",
+ ["Њ"] = "Nj",
+ ["О"] = "O",
+ ["П"] = "P",
+ ["Р"] = "R",
+ ["С"] = "S",
+ ["Т"] = "T",
+ ["Ћ"] = "Ć",
+ ["У"] = "U",
+ ["Ф"] = "F",
+ ["Х"] = "H",
+ ["Ц"] = "C",
+ ["Ч"] = "Č",
+ ["Џ"] = "Dž",
+ ["Ш"] = "Š",
+ }
+
translit.tables["Serbian Cyr->Lat Transliteration uppercase"] = translit.sr_tolt_upper
local function __inverse_tab (t)
@@ -95,8 +95,6 @@ if not translit.done_serbian then
--- Good reading up front:
--- <http://en.wikipedia.org/wiki/User:Aleksandar_Šušnjar/Serbian_Wikipedia's_Challenges#Real-time_transliteration_for_display>
- local hintchar = "|"
-
local except = {
["nadživ"] = "надћив", -- nadživeti and derivatives
}
@@ -131,6 +129,13 @@ if not translit.done_serbian then
end
translit.serbian_exceptions = { }
+
+ local _p_hintchar = P"|" / ""
+ local hintme = P"dln"
+ for left in hintme:utfcharacters() do
+ local right = translit.sr_tocy_lower[left]
+ local LEFT, RIGHT = upper(left), upper(right)
+ end
translit.serbian_exceptions.p_tocy = p_tocy
translit.serbian_exceptions.p_tolt = p_tolt
translit.serbian_exceptions.p_tocy_init = p_i_tocy
@@ -156,7 +161,8 @@ local function sr (mode, text)
-- transliteration from latin script requires macro handling …
local _p_macro = P[[\]] * R("az", "AZ")^1
- local _p_sr = translit.addrules (trl_sr, _p_sr)
+ local _p_sr = translit.addrules (trl_sr)
+
local p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + (_p_sr / trl_sr) + utfchar)^0)
return p_sr:match(text)