From 025223cd801e62eeae6085dcd1a516975bd88260 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Tue, 10 May 2011 15:09:40 +0200 Subject: yet better exception handling --- .../third/transliterator/transliterator.tex | 2 ++ .../third/transliterator/trans_tables_sr.lua | 24 ++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/doc/context/third/transliterator/transliterator.tex b/doc/context/third/transliterator/transliterator.tex index b11cb3b..a758dbb 100644 --- a/doc/context/third/transliterator/transliterator.tex +++ b/doc/context/third/transliterator/transliterator.tex @@ -730,6 +730,8 @@ alphabets are \type{sr_tolt} and \type{sr_tocy}. \trlex{sr_tocy}{hr}{sr}{computer-modern-unicode}{% Transliteration latinica \rightarrow\ ćirilica.% }{% + nadživeti, Nadživeti, NADŽIVETI + Srpski jezik je jedan od slovenskih jezika iz porodice indoevropskih jezika. Prvi pisani spomenici u srpskoj redakciji staroslovenskog jezika potiču iz XI i XII veka. diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua index 0f9eae4..8e27168 100644 --- a/tex/context/third/transliterator/trans_tables_sr.lua +++ b/tex/context/third/transliterator/trans_tables_sr.lua @@ -92,23 +92,30 @@ if not translit.done_serbian then translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper)) local except = { - ["nadživeti"] = "надживети", + ["nadživ"] = "наджив", -- nadživeti and derivatives } local P = lpeg.P local sub, upper = unicode.utf8.sub, unicode.utf8.upper - local sre, p_sre = {} + local sre, srei, p_sre, p_sre_i = { }, { } for lat, cyr in next, except do local Lat = upper(sub(lat, 1, 1)) .. sub(lat, 2) local Cyr = upper(sub(cyr, 1, 1)) .. sub(cyr, 2) local LAT, CYR = upper(lat), upper(cyr) - p_sre = p_sre and p_sre + P(lat) + Lat + LAT or P(lat) + Lat + LAT - sre[lat], sre[Lat], sre[LAT] = cyr, Cyr, CYR + local p_lat = P" " * (P(lat) + Lat + LAT) + + p_sre_i = p_sre_i and p_sre_i + P(lat) + Lat + LAT or P(lat) + Lat + LAT + p_sre = p_sre and p_sre + p_lat or p_lat + + srei[lat], srei[Lat], srei[LAT] = cyr, Cyr, CYR + sre[" "..lat], sre[" "..Lat], sre[" "..LAT] = " "..cyr, " "..Cyr, " "..CYR end - translit.serbian_exceptions = sre - translit.p_serbian_exceptions = p_sre + translit.serbian_exceptions = sre + translit.serbian_exceptions_init = srei + translit.p_serbian_exceptions = p_sre + translit.p_serbian_exceptions_init = p_sre_i translit.done_serbian = true end @@ -122,7 +129,8 @@ local t = translit local function sr (mode, text) local P, R, Cs = lpeg.P, lpeg.R, lpeg.Cs local utfchar = translit.utfchar - local _p_sre = translit.p_serbian_exceptions / translit.serbian_exceptions + local _p_sre = translit.p_serbian_exceptions / translit.serbian_exceptions + local _p_sre_i = translit.p_serbian_exceptions_init / translit.serbian_exceptions_init local trl_sr = translit.make_add_dict{} trl_sr = t[mode.."_upper"] + t[mode.."_lower"] @@ -130,7 +138,7 @@ local function sr (mode, text) -- transliteration from latin script requires macro handling … local _p_macro = P[[\]] * R("az", "AZ")^1 local _p_sr = translit.addrules (trl_sr, _p_sr) - local p_sr = Cs((_p_macro + _p_sre + (_p_sr / trl_sr) + utfchar)^0) + local p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + (_p_sr / trl_sr) + utfchar)^0) return p_sr:match(text) end -- cgit v1.2.3