diff options
author | Philipp Gesang <megas.kapaneus@gmail.com> | 2011-05-10 19:31:50 +0200 |
---|---|---|
committer | Philipp Gesang <megas.kapaneus@gmail.com> | 2011-05-10 19:31:50 +0200 |
commit | 5203b25743770b62410cc6bc9dc3127bcfb03f22 (patch) | |
tree | 0a2cdb7515a4bfaa9c0515d6367e6d14209ce0ac | |
parent | 3125c85ca062459ddd50eb9c1d80b35d358deacb (diff) | |
download | transliterator-5203b25743770b62410cc6bc9dc3127bcfb03f22.tar.gz |
fixed application order of multi-char rules
-rw-r--r-- | tex/context/third/transliterator/trans_tables_sr.lua | 7 | ||||
-rw-r--r-- | tex/context/third/transliterator/transliterator.lua | 19 |
2 files changed, 22 insertions, 4 deletions
diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua index 8a4744a..3a3fa4c 100644 --- a/tex/context/third/transliterator/trans_tables_sr.lua +++ b/tex/context/third/transliterator/trans_tables_sr.lua @@ -91,6 +91,12 @@ if not translit.done_serbian then translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower)) translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper)) + + --- Good reading up front: + --- <http://en.wikipedia.org/wiki/User:Aleksandar_Šušnjar/Serbian_Wikipedia's_Challenges#Real-time_transliteration_for_display> + + local hintchar = "|" + local except = { ["nadživ"] = "надћив", -- nadživeti and derivatives } @@ -98,7 +104,6 @@ if not translit.done_serbian then local P = lpeg.P local sub, upper = unicode.utf8.sub, unicode.utf8.upper - --local e_tocy, e_i_tocy, e_tolt, e_i_tolt = { }, { }, { }, { } local p_tocy, p_i_tocy, p_tolt, p_i_tolt for left, right in next, except do -- generating exception patterns for both sides diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua index d794c05..f1d34ae 100644 --- a/tex/context/third/transliterator/transliterator.lua +++ b/tex/context/third/transliterator/transliterator.lua @@ -84,11 +84,24 @@ end -- Generate a rule pattern from hash table. do local P, R, V = lpeg.P, lpeg.R, lpeg.V + local len = unicode.utf8.len + -- multi-char rules first function translit.addrules (dict, rules) - for i, _ in pairs(dict) do - if rules == nil then rules = P(i) - else rules = rules + P(i) + local by_length, occurring_lengths = { }, { } + for chr, _ in next, dict do + local l = len(chr) + if not by_length[l] then + by_length[l] = { } + occurring_lengths[#occurring_lengths+1] = l + end + by_length[l][#by_length[l]+1] = chr + end + table.sort(occurring_lengths) + for i=#occurring_lengths, 1, -1 do + local l = occurring_lengths[i] + for _, chr in next, by_length[l] do + rules = rules and rules + P(chr) or P(chr) end end return rules |