diff options
Diffstat (limited to 'tex/context/third')
5 files changed, 70 insertions, 73 deletions
diff --git a/tex/context/third/transliterator/trans_tables_gr.lua b/tex/context/third/transliterator/trans_tables_gr.lua index 084478d..a2167ee 100644 --- a/tex/context/third/transliterator/trans_tables_gr.lua +++ b/tex/context/third/transliterator/trans_tables_gr.lua @@ -643,34 +643,42 @@ translit.tables["Greek transliteration archaic characters"] = translit.gr_other  --===========================================================================--  function translit.dogreek (mode, text) -    local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs +    local P, V, Cs = lpeg.P, lpeg.V, lpeg.Cs      local addrules = translit.addrules -    local utfchar = lpeg.patterns.utf8char +    local utfchar = translit.utfchar      if mode == "gr" or mode == "gr_n" then          local gr_di_in, gr_in, gr_di, gr = translit.make_add_dict{}, translit.make_add_dict{}, translit.make_add_dict{}, translit.make_add_dict{} -        gr_di_in = gr_di_in +   translit.gr_di_in_low + translit.gr_di_in_upp -        gr_in    = gr_in +      translit.gr_in_low +    translit.gr_in_upp -        gr_di    = gr_di +      translit.gr_di_low +    translit.gr_di_upp -        gr       = gr +         translit.gr_low +       translit.gr_upp +         translit.gr_other +        gr_di_in = gr_di_in + translit.gr_di_in_low + translit.gr_di_in_upp +        gr_in    = gr_in    + translit.gr_in_low    + translit.gr_in_upp +        gr_di    = gr_di    + translit.gr_di_low    + translit.gr_di_upp +        gr       = gr       + translit.gr_low       + translit.gr_upp       + translit.gr_other -        if mode == "gr_n" then gr_di = gr_di +          translit.gr_nrule         end +        if mode == "gr_n" then gr_di = gr_di + translit.gr_nrule end          local p_di_in, p_in, p_di, p -        p_di_in = addrules( gr_di_in,  p_di_in ) -        p_in    = addrules( gr_in,     p_in    ) -        p_di    = addrules( gr_di,     p_di    ) -        p       = addrules( gr,        p       ) - -        local init_diph = Cs(p_di_in            / gr_di_in  ) -        local init      = Cs(p_in               / gr_in     ) -        local diph      = Cs(p_di               / gr_di     ) -        local other     = Cs(p                  / gr        ) - -        local g = Cs((init_diph + init + diph + other + utfchar)^0) - +        p_di_in = addrules( gr_di_in, p_di_in ) +        p_in    = addrules( gr_in,    p_in    ) +        p_di    = addrules( gr_di,    p_di    ) +        p       = addrules( gr,       p       ) + +        local g = P{ -- 2959 rules +            Cs((V"init_diph" +              + V"init" +              + V"diph" +              + V"other" +              + utfchar +            )^0), + +            init_diph = Cs(p_di_in / gr_di_in  ), +            init      = Cs(p_in    / gr_in     ), +            diph      = Cs(p_di    / gr_di     ), +            other     = Cs(p       / gr        ), +        } + +        --g:print()          text = g:match(text)          return text      end diff --git a/tex/context/third/transliterator/trans_tables_iso9.lua b/tex/context/third/transliterator/trans_tables_iso9.lua index 4057a0e..ad99e23 100644 --- a/tex/context/third/transliterator/trans_tables_iso9.lua +++ b/tex/context/third/transliterator/trans_tables_iso9.lua @@ -252,9 +252,7 @@ translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp  function translit.iso9 (mode, text)      local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs      local addrules = translit.addrules -    local utfchar = lpeg.patterns.utf8char - -    -- Add keys of a dictionary to a ruleset. +    local utfchar = translit.utfchar      local iso9 = translit.make_add_dict{}      iso9 = translit.ru_upp + translit.ru_low @@ -268,22 +266,15 @@ function translit.iso9 (mode, text)                   + translit.non_ru_upp                   + translit.non_ru_low          end -    end - -    if mode == "ru_old_jer_hack" then +    elseif mode == "ru_old_jer_hack" then          iso9 = iso9               + translit.ru_old_upp               + translit.ru_old_low               + translit.ru_jer_hack      end -    local p_iso9 -    p_iso9 = addrules (iso9, p_iso9) - -    local p_cyr = Cs(p_iso9) / iso9 - -    local iso9_parser = Cs((p_cyr + utfchar)^0) -    text = iso9_parser:match(text) +    local p_iso9 = addrules (iso9, p_iso9) +    local iso9_parser = Cs((p_iso9 / iso9 + utfchar)^0) -    return text +    return iso9_parser:match(text)  end diff --git a/tex/context/third/transliterator/trans_tables_scntfc.lua b/tex/context/third/transliterator/trans_tables_scntfc.lua index f54eb8e..9f92cf5 100644 --- a/tex/context/third/transliterator/trans_tables_scntfc.lua +++ b/tex/context/third/transliterator/trans_tables_scntfc.lua @@ -200,12 +200,13 @@ translit.tables["OCS \\quotation{scientific} transliteration additional uppercas  --===========================================================================--  function translit.scientific (mode, text) -    local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs -    local utfchar = lpeg.patterns.utf8char +    local P, Cs = lpeg.P, lpeg.Cs +    local utfchar = translit.utfchar +    local addrules = translit.addrules      local cyr = translit.make_add_dict{} -    local cyruk, p_cyruk, p_cyr -    local scientific_parser +    local cyruk, p_cyruk, p_cyr, scientific_parser +      if mode == "iso9_ocs" or mode == "iso9_ocs_hack" then          environment.loadluafile("trans_tables_iso9") @@ -222,33 +223,29 @@ function translit.scientific (mode, text)              cyr = cyr + translit.ru_jer_hack          end -        p_cyr               = Cs(utfchar) / cyr -        scientific_parser   = Cs((p_cyr + utfchar)^0) +        p_cyr = addrules(cyr, p_cyr) -    elseif mode == ("ocs") then +        scientific_parser = Cs((p_cyr / cyr + utfchar)^0) -        for i,_ in pairs(translit.ocs_uk) do  -            if cyruk == nil then cyruk = P(i)       -- is this The Right Way build  patterns from a table? -            else cyruk = cyruk + P(i) -            end -        end +    elseif mode == ("ocs") then          cyr = translit.ocs_low + translit.ocs_upp -        p_cyruk     = Cs(P(cyruk))  / translit.ocs_uk -        p_cyr       = Cs(utfchar)   / cyr +        p_cyruk = addrules(translit.ocs_uk, cyruk) +        p_cyr   = addrules(cyr,             p_cyr) -        scientific_parser = Cs((p_cyruk + p_cyr + utfchar)^0) +        scientific_parser = Cs((p_cyruk / translit.ocs_uk +                              + p_cyr   / cyr +                              + utfchar)^0)      elseif mode == ("ocs_gla") then          environment.loadluafile( "trans_tables_glag")          cyr = translit.ocs_gla_low + translit.ocs_gla_upp -        p_cyr = Cs(utfchar) / cyr -        scientific_parser = Cs((p_cyr + utfchar)^0) -    end -    text = scientific_parser:match(text) +        p_cyr = addrules(cyr, p_cyr) +        scientific_parser = Cs((p_cyr / cyr + utfchar)^0) +    end -    return text +    return scientific_parser:match(text)  end diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua index 0458539..bdeaf89 100644 --- a/tex/context/third/transliterator/trans_tables_trsc.lua +++ b/tex/context/third/transliterator/trans_tables_trsc.lua @@ -488,7 +488,7 @@ translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit  function translit.transcript (mode, text)      local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs      local addrules = translit.addrules -    local utfchar = lpeg.patterns.utf8char +    local utfchar = translit.utfchar      local trsc_parser, p_rules, capt, p_de @@ -502,25 +502,10 @@ function translit.transcript (mode, text)          return fp:match(s)      end -    -- The following is needed becaus lpeg.S doesn't work with utf.      local vow, con, iy -    for _,v in ipairs (translit.ru_vowels) do -        if vow == nil then vow = P(v) -        else vow = vow + P(v) -        end -    end - -    for _,c in ipairs (translit.ru_consonants) do -        if con == nil then con = P(c) -        else con = con + P(c) -        end -    end - -    for _,i in ipairs (translit.ru_trsc_iy) do -        if iy == nil then iy = P(i) -        else iy = iy + P(i) -        end -    end +    vow = addrules(translit.ru_vowels,     vow) +    con = addrules(translit.ru_consonants, con) +    iy  = addrules(translit.ru_trsc_iy,    iy )      if mode == "ru_transcript_de_exp" then diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua index 110d862..36eb804 100644 --- a/tex/context/third/transliterator/transliterator.lua +++ b/tex/context/third/transliterator/transliterator.lua @@ -93,6 +93,22 @@ do      end  end +-- Modified version of Hans’s utf pattern (l-lpeg.lua). + +do +    local P, R, V = lpeg.P, lpeg.R, lpeg.V + +    translit.utfchar = P{ +        V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", + +        utf8next  = R("\128\191"), +        utf8one   = R("\000\127"), +        utf8two   = R("\194\223") * V"utf8next", +        utf8three = R("\224\239") * V"utf8next" * V"utf8next", +        utf8four  = R("\240\244") * V"utf8next" * V"utf8next" * V"utf8next", +    } +end +  -- We might want to have all the table data nicely formatted by \CONTEXT\   -- itself, here's how we'll do it.  \type{translit.show_tab(t)} handles a  -- single table \type{t}, builds a Natural TABLE out of its content and @@ -220,7 +236,7 @@ function translit.transliterate (method, text)      elseif  method == "ru_transcript_de"      or              method == "ru_transcript_de_exp"  or -- experimental lpeg              method == "ru_transcript_en"      or -            method == "ru_transcript_en_sub"  or -- old multiple substitution +            method == "ru_transcript_en_exp"  or              method == "ru_cz"                 or              method == "ocs_cz"              then  | 
