summaryrefslogtreecommitdiff
path: root/src/trans_tables_sr.lua
diff options
context:
space:
mode:
authorPhilipp Gesang <phg@phi-gamma.net>2021-11-21 19:22:18 +0100
committerPhilipp Gesang <phg@phi-gamma.net>2021-11-21 19:29:47 +0100
commit814b93d12bc9a0792b150527495ece0847a343fc (patch)
treed7ce166f7f642956e12db8104493075f9d71d4f6 /src/trans_tables_sr.lua
parent798c814949998d48b06d37b55d7f26d72477bf82 (diff)
downloadtransliterator-814b93d12bc9a0792b150527495ece0847a343fc.tar.gz
reorganize source tree
Diffstat (limited to 'src/trans_tables_sr.lua')
-rw-r--r--src/trans_tables_sr.lua241
1 files changed, 241 insertions, 0 deletions
diff --git a/src/trans_tables_sr.lua b/src/trans_tables_sr.lua
new file mode 100644
index 0000000..4f549c5
--- /dev/null
+++ b/src/trans_tables_sr.lua
@@ -0,0 +1,241 @@
+
+--===========================================================================--
+-- Serbian --
+--===========================================================================--
+
+local translit = thirddata.translit
+local pcache = translit.parser_cache
+local lpegmatch = lpeg.match
+
+
+-- Special thanks to Mojca Miklavec and Arthur Reutenauer for their
+-- assistance in creating these transliteration routines.
+
+if not translit.done_serbian then
+ --------------------------------------------
+ -- Lowercase Serbian (Cyrillic -> Latin) --
+ --------------------------------------------
+ translit.sr_tolt_lower = translit.make_add_dict{
+ ["а"] = "a",
+ ["б"] = "b",
+ ["в"] = "v",
+ ["г"] = "g",
+ ["д"] = "d",
+ ["ђ"] = "đ",
+ ["е"] = "e",
+ ["ж"] = "ž",
+ ["з"] = "z",
+ ["и"] = "i",
+ ["ј"] = "j",
+ ["к"] = "k",
+ ["л"] = "l",
+ ["љ"] = "lj",
+ ["м"] = "m",
+ ["н"] = "n",
+ ["њ"] = "nj",
+ ["о"] = "o",
+ ["п"] = "p",
+ ["р"] = "r",
+ ["с"] = "s",
+ ["т"] = "t",
+ ["ћ"] = "ć",
+ ["у"] = "u",
+ ["ф"] = "f",
+ ["х"] = "h",
+ ["ц"] = "c",
+ ["ч"] = "č",
+ ["џ"] = "dž",
+ ["ш"] = "š",
+ }
+
+ translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower
+
+ --------------------------------------------
+ -- Uppercase Serbian (Cyrillic -> Latin) --
+ --------------------------------------------
+
+ translit.sr_tolt_upper = translit.make_add_dict{
+ ["А"] = "A",
+ ["Б"] = "B",
+ ["В"] = "V",
+ ["Г"] = "G",
+ ["Д"] = "D",
+ ["Ђ"] = "Đ",
+ ["Е"] = "E",
+ ["Ж"] = "Ž",
+ ["З"] = "Z",
+ ["И"] = "I",
+ ["Ј"] = "J",
+ ["К"] = "K",
+ ["Л"] = "L",
+ ["Љ"] = "Lj",
+ ["М"] = "M",
+ ["Н"] = "N",
+ ["Њ"] = "Nj",
+ ["О"] = "O",
+ ["П"] = "P",
+ ["Р"] = "R",
+ ["С"] = "S",
+ ["Т"] = "T",
+ ["Ћ"] = "Ć",
+ ["У"] = "U",
+ ["Ф"] = "F",
+ ["Х"] = "H",
+ ["Ц"] = "C",
+ ["Ч"] = "Č",
+ ["Џ"] = "Dž",
+ ["Ш"] = "Š",
+ }
+
+ translit.tables["Serbian Cyr->Lat Transliteration uppercase"] = translit.sr_tolt_upper
+
+ local function __inverse_tab (t)
+ local result = { }
+ for k,v in next,t do result[v] = k end
+ return result
+ end
+
+ translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower))
+ translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper))
+
+
+ --- Good reading up front:
+ --- <http://en.wikipedia.org/wiki/User:Aleksandar_Šušnjar/Serbian_Wikipedia's_Challenges#Real-time_transliteration_for_display>
+ --- <http://www.vokabular.org/forum/index.php?topic=3817.15>
+
+ local except = {
+ ["konjug"] = "конјуг",
+ ["konjunk"] = "конјунк",
+ ["injekc"] = "инјекц",
+ ["injunkt"] = "инјункт",
+ ["panjelin"] = "панјелин",
+ ["tanjug"] = "танјуг",
+ ["vanjezič"] = "ванјезич",
+ ["vanjadransk"] = "ванјадранск",
+
+ ["nadžanj"] = "наджањ",
+ ["nadždrel"] = "надждрел",
+ ["nadžet"] = "наджет",
+ ["nadživ"] = "наджив",
+ ["nadžnj"] = "наджњ",
+ ["nadžup"] = "наджуп",
+ ["odžal"] = "оджал",
+ ["odžar"] = "оджар",
+ ["odživ"] = "оджив",
+ ["odžubor"] = "оджубор",
+ ["odžur"] = "оджур",
+ ["odžvak"] = "оджвак",
+ ["podžanr"] = "поджанр",
+ ["podže"] = "подже", -- “поджећи”
+ }
+
+ local P = lpeg.P
+ local sub, upper = unicode.utf8.sub, unicode.utf8.upper
+
+ local p_tocy, p_i_tocy, p_tolt, p_i_tolt
+
+ for left, right in next, except do -- generating exception patterns for both sides
+ local Left = upper(sub(left, 1, 1)) .. sub(left, 2)
+ local Right = upper(sub(right, 1, 1)) .. sub(right, 2)
+ local LEFT, RIGHT = upper(left), upper(right)
+
+ local p_i_left = P(left) / right + P(Left) / Right + P(LEFT) / RIGHT
+ local p_i_right = P(right) / left + P(Right) / Left + P(RIGHT) / LEFT
+
+ local p_left = P" " * p_i_left
+ local p_right = P" " * p_i_right
+
+ if not p_tocy then
+ p_tocy = p_left
+ p_i_tocy = p_i_left
+ p_tolt = p_right
+ p_i_tolt = p_i_right
+ else
+ p_tocy = p_tocy + p_left
+ p_i_tocy = p_i_tocy + p_i_left
+ p_tolt = p_tolt + p_right
+ p_i_tolt = p_i_tolt + p_i_right
+ end
+ end
+
+ local _p_hintchar = P"*" / ""
+ local hintme = "dln"
+ local _p_tocy_hint, _p_tolt_hint
+
+ for left in hintme:utfcharacters() do
+ local right = translit.sr_tocy_lower[left]
+ local LEFT, RIGHT = upper(left), upper(right)
+ if not _p_tocy_hint then
+ _p_tocy_hint = P(left) / right + P(LEFT) / RIGHT
+ _p_tolt_hint = P(right) / left + P(RIGHT) / LEFT
+ else
+ _p_tocy_hint = _p_tocy_hint + P(left) / right + P(LEFT) / RIGHT
+ _p_tolt_hint = _p_tolt_hint + P(right) / left + P(RIGHT) / LEFT
+ end
+ end
+
+ translit.serbian_exceptions = { }
+ translit.serbian_exceptions.p_tocy = p_tocy
+ translit.serbian_exceptions.p_tolt = p_tolt
+ translit.serbian_exceptions.p_tocy_init = p_i_tocy
+ translit.serbian_exceptions.p_tolt_init = p_i_tolt
+ translit.serbian_exceptions.p_tocy_hint = _p_tocy_hint * _p_hintchar
+ translit.serbian_exceptions.p_tolt_hint = _p_tolt_hint * _p_hintchar
+
+ translit.done_serbian = true
+end
+
+--===========================================================================--
+-- End Of Tables --
+--===========================================================================--
+
+
+local t = translit
+local function sr (mode)
+ local P, R, Cs = lpeg.P, lpeg.R, lpeg.Cs
+ local utfchar = translit.utfchar
+ local modestr = "p_" .. mode:match("to..$")
+ local _p_sre = t.serbian_exceptions[modestr]
+ local _p_sre_i = t.serbian_exceptions[modestr .. "_init"]
+
+ local trl_sr = translit.make_add_dict{}
+ trl_sr = t[mode.."_upper"] + t[mode.."_lower"]
+
+ -- transliteration from latin script requires macro handling …
+ local _p_macro = P[[\]] * R("az", "AZ")^1 -- assuming standard catcodes
+ local _p_sr = translit.addrules (trl_sr, _p_sr) / trl_sr
+ if translit.hinting then
+ _p_sr = t.serbian_exceptions[modestr .. "_hint"] + _p_sr
+ end
+
+ local p_sr
+ if translit.sr_except then
+ p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + _p_sr + utfchar)^0)
+ else
+ p_sr = Cs((_p_macro + _p_sr + utfchar)^0)
+ end
+
+ return p_sr
+end
+
+translit.methods["sr_tolt"] = function (text)
+ local pname = "sr_tolt" .. tostring(translit.hinting) .. tostring(translit.sr_except)
+ local p = pcache[pname]
+ if not p then
+ p = sr("sr_tolt")
+ pcache[pname] = p
+ end
+ return lpegmatch(p, text)
+end
+
+translit.methods["sr_tocy"] = function (text)
+ local pname = "sr_tocy" .. tostring(translit.hinting) .. tostring(translit.sr_except)
+ local p = pcache[pname]
+ if not p then
+ p = sr("sr_tocy")
+ pcache[pname] = p
+ end
+ return lpegmatch(p, text)
+end
+
+-- vim:ft=lua:sw=4:ts=4