diff options
| author | Philipp Gesang <megas.kapaneus@gmail.com> | 2011-05-10 19:31:50 +0200 | 
|---|---|---|
| committer | Philipp Gesang <megas.kapaneus@gmail.com> | 2011-05-10 19:31:50 +0200 | 
| commit | 5203b25743770b62410cc6bc9dc3127bcfb03f22 (patch) | |
| tree | 0a2cdb7515a4bfaa9c0515d6367e6d14209ce0ac /tex/context | |
| parent | 3125c85ca062459ddd50eb9c1d80b35d358deacb (diff) | |
| download | transliterator-5203b25743770b62410cc6bc9dc3127bcfb03f22.tar.gz | |
fixed application order of multi-char rules
Diffstat (limited to 'tex/context')
| -rw-r--r-- | tex/context/third/transliterator/trans_tables_sr.lua | 7 | ||||
| -rw-r--r-- | tex/context/third/transliterator/transliterator.lua | 19 | 
2 files changed, 22 insertions, 4 deletions
diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua index 8a4744a..3a3fa4c 100644 --- a/tex/context/third/transliterator/trans_tables_sr.lua +++ b/tex/context/third/transliterator/trans_tables_sr.lua @@ -91,6 +91,12 @@ if not translit.done_serbian then      translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower))      translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper)) + +    --- Good reading up front: +    --- <http://en.wikipedia.org/wiki/User:Aleksandar_Šušnjar/Serbian_Wikipedia's_Challenges#Real-time_transliteration_for_display> + +    local hintchar = "|" +      local except = {          ["nadživ"] = "надћив", -- nadživeti and derivatives      } @@ -98,7 +104,6 @@ if not translit.done_serbian then      local P = lpeg.P      local sub, upper = unicode.utf8.sub, unicode.utf8.upper -    --local e_tocy, e_i_tocy, e_tolt, e_i_tolt = { }, { }, { }, { }      local p_tocy, p_i_tocy, p_tolt, p_i_tolt      for left, right in next, except do -- generating exception patterns for both sides diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua index d794c05..f1d34ae 100644 --- a/tex/context/third/transliterator/transliterator.lua +++ b/tex/context/third/transliterator/transliterator.lua @@ -84,11 +84,24 @@ end  -- Generate a rule pattern from hash table.  do      local P, R, V = lpeg.P, lpeg.R, lpeg.V +    local len     = unicode.utf8.len +    -- multi-char rules first      function translit.addrules (dict, rules) -        for i, _ in pairs(dict) do -            if rules == nil then rules = P(i) -            else rules = rules + P(i) +        local by_length, occurring_lengths = { }, { } +        for chr, _ in next, dict do +            local l = len(chr) +            if not by_length[l] then +                by_length[l] = { } +                occurring_lengths[#occurring_lengths+1] = l +            end +            by_length[l][#by_length[l]+1] = chr +        end +        table.sort(occurring_lengths) +        for i=#occurring_lengths, 1, -1 do +            local l = occurring_lengths[i] +            for _, chr in next, by_length[l] do +                rules = rules and rules + P(chr) or P(chr)              end          end          return rules  | 
