summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2011-05-10 15:09:40 +0200
committerPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2011-05-10 15:09:40 +0200
commit025223cd801e62eeae6085dcd1a516975bd88260 (patch)
treed4b9461355c480819f7533d6de02c9b636de045e
parent6565f523e31cc7befbd7b473a2e9bc006559ad6c (diff)
downloadtransliterator-025223cd801e62eeae6085dcd1a516975bd88260.tar.gz
yet better exception handling
-rw-r--r--doc/context/third/transliterator/transliterator.tex2
-rw-r--r--tex/context/third/transliterator/trans_tables_sr.lua24
2 files changed, 18 insertions, 8 deletions
diff --git a/doc/context/third/transliterator/transliterator.tex b/doc/context/third/transliterator/transliterator.tex
index b11cb3b..a758dbb 100644
--- a/doc/context/third/transliterator/transliterator.tex
+++ b/doc/context/third/transliterator/transliterator.tex
@@ -730,6 +730,8 @@ alphabets are \type{sr_tolt} and \type{sr_tocy}.
\trlex{sr_tocy}{hr}{sr}{computer-modern-unicode}{%
Transliteration latinica \rightarrow\ ćirilica.%
}{%
+ nadživeti, Nadživeti, NADŽIVETI
+
Srpski jezik je jedan od slovenskih jezika iz porodice
indoevropskih jezika. Prvi pisani spomenici u srpskoj
redakciji staroslovenskog jezika potiču iz XI i XII veka.
diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua
index 0f9eae4..8e27168 100644
--- a/tex/context/third/transliterator/trans_tables_sr.lua
+++ b/tex/context/third/transliterator/trans_tables_sr.lua
@@ -92,23 +92,30 @@ if not translit.done_serbian then
translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper))
local except = {
- ["nadživeti"] = "надживети",
+ ["nadživ"] = "наджив", -- nadživeti and derivatives
}
local P = lpeg.P
local sub, upper = unicode.utf8.sub, unicode.utf8.upper
- local sre, p_sre = {}
+ local sre, srei, p_sre, p_sre_i = { }, { }
for lat, cyr in next, except do
local Lat = upper(sub(lat, 1, 1)) .. sub(lat, 2)
local Cyr = upper(sub(cyr, 1, 1)) .. sub(cyr, 2)
local LAT, CYR = upper(lat), upper(cyr)
- p_sre = p_sre and p_sre + P(lat) + Lat + LAT or P(lat) + Lat + LAT
- sre[lat], sre[Lat], sre[LAT] = cyr, Cyr, CYR
+ local p_lat = P" " * (P(lat) + Lat + LAT)
+
+ p_sre_i = p_sre_i and p_sre_i + P(lat) + Lat + LAT or P(lat) + Lat + LAT
+ p_sre = p_sre and p_sre + p_lat or p_lat
+
+ srei[lat], srei[Lat], srei[LAT] = cyr, Cyr, CYR
+ sre[" "..lat], sre[" "..Lat], sre[" "..LAT] = " "..cyr, " "..Cyr, " "..CYR
end
- translit.serbian_exceptions = sre
- translit.p_serbian_exceptions = p_sre
+ translit.serbian_exceptions = sre
+ translit.serbian_exceptions_init = srei
+ translit.p_serbian_exceptions = p_sre
+ translit.p_serbian_exceptions_init = p_sre_i
translit.done_serbian = true
end
@@ -122,7 +129,8 @@ local t = translit
local function sr (mode, text)
local P, R, Cs = lpeg.P, lpeg.R, lpeg.Cs
local utfchar = translit.utfchar
- local _p_sre = translit.p_serbian_exceptions / translit.serbian_exceptions
+ local _p_sre = translit.p_serbian_exceptions / translit.serbian_exceptions
+ local _p_sre_i = translit.p_serbian_exceptions_init / translit.serbian_exceptions_init
local trl_sr = translit.make_add_dict{}
trl_sr = t[mode.."_upper"] + t[mode.."_lower"]
@@ -130,7 +138,7 @@ local function sr (mode, text)
-- transliteration from latin script requires macro handling …
local _p_macro = P[[\]] * R("az", "AZ")^1
local _p_sr = translit.addrules (trl_sr, _p_sr)
- local p_sr = Cs((_p_macro + _p_sre + (_p_sr / trl_sr) + utfchar)^0)
+ local p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + (_p_sr / trl_sr) + utfchar)^0)
return p_sr:match(text)
end