From 26ae25dbea9b066eae665f8aefb2b046ac67d431 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 24 Dec 2010 14:48:09 +0100 Subject: =?UTF-8?q?using=20context=E2=80=99s=20builtin=20pattern=20for=20u?= =?UTF-8?q?tf8=20characters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../third/transliterator/trans_tables_gr.lua | 19 ++---------------- .../third/transliterator/trans_tables_iso9.lua | 17 ++-------------- .../third/transliterator/trans_tables_scntfc.lua | 8 +------- .../third/transliterator/trans_tables_trsc.lua | 23 +++------------------- .../third/transliterator/transliterator.lua | 13 ++++++++++++ 5 files changed, 21 insertions(+), 59 deletions(-) diff --git a/tex/context/third/transliterator/trans_tables_gr.lua b/tex/context/third/transliterator/trans_tables_gr.lua index bed775a..084478d 100644 --- a/tex/context/third/transliterator/trans_tables_gr.lua +++ b/tex/context/third/transliterator/trans_tables_gr.lua @@ -644,23 +644,8 @@ translit.tables["Greek transliteration archaic characters"] = translit.gr_other function translit.dogreek (mode, text) local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs - - - -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html - local utfchar = R("\000\127") + - R("\194\223") * R("\128\191") + - R("\224\240") * R("\128\191") * R("\128\191") + - R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") - - -- Add keys of a dictionary to a ruleset. - function addrules (dict, rules) - for i, _ in pairs(dict) do - if rules == nil then rules = P(i) - else rules = rules + P(i) - end - end - return rules - end + local addrules = translit.addrules + local utfchar = lpeg.patterns.utf8char if mode == "gr" or mode == "gr_n" then diff --git a/tex/context/third/transliterator/trans_tables_iso9.lua b/tex/context/third/transliterator/trans_tables_iso9.lua index e5fa980..4057a0e 100644 --- a/tex/context/third/transliterator/trans_tables_iso9.lua +++ b/tex/context/third/transliterator/trans_tables_iso9.lua @@ -251,23 +251,10 @@ translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp function translit.iso9 (mode, text) local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs - local loc = lpeg.locale () - - -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html - local utfchar = R("\000\127") + - R("\194\223") * R("\128\191") + - R("\224\240") * R("\128\191") * R("\128\191") + - R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + local addrules = translit.addrules + local utfchar = lpeg.patterns.utf8char -- Add keys of a dictionary to a ruleset. - function addrules (dict, rules) - for i, _ in pairs(dict) do - if rules == nil then rules = P(i) - else rules = rules + P(i) - end - end - return rules - end local iso9 = translit.make_add_dict{} iso9 = translit.ru_upp + translit.ru_low diff --git a/tex/context/third/transliterator/trans_tables_scntfc.lua b/tex/context/third/transliterator/trans_tables_scntfc.lua index 7132fc8..f54eb8e 100644 --- a/tex/context/third/transliterator/trans_tables_scntfc.lua +++ b/tex/context/third/transliterator/trans_tables_scntfc.lua @@ -201,13 +201,7 @@ translit.tables["OCS \\quotation{scientific} transliteration additional uppercas function translit.scientific (mode, text) local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs - local loc = lpeg.locale () - - -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html - local utfchar = R("\000\127") + - R("\194\223") * R("\128\191") + - R("\224\240") * R("\128\191") * R("\128\191") + - R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") + local utfchar = lpeg.patterns.utf8char local cyr = translit.make_add_dict{} local cyruk, p_cyruk, p_cyr diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua index cad3567..0458539 100644 --- a/tex/context/third/transliterator/trans_tables_trsc.lua +++ b/tex/context/third/transliterator/trans_tables_trsc.lua @@ -487,17 +487,10 @@ translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit function translit.transcript (mode, text) local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs - local loc = lpeg.locale () + local addrules = translit.addrules + local utfchar = lpeg.patterns.utf8char - -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html - local utfchar = R("\000\127") + - R("\194\223") * R("\128\191") + - R("\224\240") * R("\128\191") * R("\128\191") + - R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191") - - local trsc_parser, p_rules, capt - - local p_de + local trsc_parser, p_rules, capt, p_de function tab_subst (s, ...) local p_tmp, tmp = nil, translit.make_add_dict{} @@ -509,16 +502,6 @@ function translit.transcript (mode, text) return fp:match(s) end - -- Add keys of a dictionary to a ruleset. - function addrules (dict, rules) - for i, _ in pairs(dict) do - if rules == nil then rules = P(i) - else rules = rules + P(i) - end - end - return rules - end - -- The following is needed becaus lpeg.S doesn't work with utf. local vow, con, iy for _,v in ipairs (translit.ru_vowels) do diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua index 4b64d82..110d862 100644 --- a/tex/context/third/transliterator/transliterator.lua +++ b/tex/context/third/transliterator/transliterator.lua @@ -80,6 +80,19 @@ end -- Auxiliary Functions -------------------------------------------------------------------------------- +-- Generate a rule pattern from hash table. +do + local P = lpeg.P + function translit.addrules (dict, rules) + for i, _ in pairs(dict) do + if rules == nil then rules = P(i) + else rules = rules + P(i) + end + end + return rules + end +end + -- We might want to have all the table data nicely formatted by \CONTEXT\ -- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a -- single table \type{t}, builds a Natural TABLE out of its content and -- cgit v1.2.3