From 5203b25743770b62410cc6bc9dc3127bcfb03f22 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Tue, 10 May 2011 19:31:50 +0200 Subject: fixed application order of multi-char rules --- tex/context/third/transliterator/trans_tables_sr.lua | 7 ++++++- tex/context/third/transliterator/transliterator.lua | 19 ++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua index 8a4744a..3a3fa4c 100644 --- a/tex/context/third/transliterator/trans_tables_sr.lua +++ b/tex/context/third/transliterator/trans_tables_sr.lua @@ -91,6 +91,12 @@ if not translit.done_serbian then translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower)) translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper)) + + --- Good reading up front: + --- + + local hintchar = "|" + local except = { ["nadživ"] = "надћив", -- nadživeti and derivatives } @@ -98,7 +104,6 @@ if not translit.done_serbian then local P = lpeg.P local sub, upper = unicode.utf8.sub, unicode.utf8.upper - --local e_tocy, e_i_tocy, e_tolt, e_i_tolt = { }, { }, { }, { } local p_tocy, p_i_tocy, p_tolt, p_i_tolt for left, right in next, except do -- generating exception patterns for both sides diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua index d794c05..f1d34ae 100644 --- a/tex/context/third/transliterator/transliterator.lua +++ b/tex/context/third/transliterator/transliterator.lua @@ -84,11 +84,24 @@ end -- Generate a rule pattern from hash table. do local P, R, V = lpeg.P, lpeg.R, lpeg.V + local len = unicode.utf8.len + -- multi-char rules first function translit.addrules (dict, rules) - for i, _ in pairs(dict) do - if rules == nil then rules = P(i) - else rules = rules + P(i) + local by_length, occurring_lengths = { }, { } + for chr, _ in next, dict do + local l = len(chr) + if not by_length[l] then + by_length[l] = { } + occurring_lengths[#occurring_lengths+1] = l + end + by_length[l][#by_length[l]+1] = chr + end + table.sort(occurring_lengths) + for i=#occurring_lengths, 1, -1 do + local l = occurring_lengths[i] + for _, chr in next, by_length[l] do + rules = rules and rules + P(chr) or P(chr) end end return rules -- cgit v1.2.3