From 8f57b0ace4826104e586e289d0977b55570f0c8b Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 24 Dec 2010 16:05:30 +0100 Subject: utf character handling; optimized iso9 parser code --- tex/context/third/transliterator/transliterator.lua | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tex/context/third/transliterator/transliterator.lua') diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua index 110d862..36eb804 100644 --- a/tex/context/third/transliterator/transliterator.lua +++ b/tex/context/third/transliterator/transliterator.lua @@ -93,6 +93,22 @@ do end end +-- Modified version of Hans’s utf pattern (l-lpeg.lua). + +do + local P, R, V = lpeg.P, lpeg.R, lpeg.V + + translit.utfchar = P{ + V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", + + utf8next = R("\128\191"), + utf8one = R("\000\127"), + utf8two = R("\194\223") * V"utf8next", + utf8three = R("\224\239") * V"utf8next" * V"utf8next", + utf8four = R("\240\244") * V"utf8next" * V"utf8next" * V"utf8next", + } +end + -- We might want to have all the table data nicely formatted by \CONTEXT\ -- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a -- single table \type{t}, builds a Natural TABLE out of its content and @@ -220,7 +236,7 @@ function translit.transliterate (method, text) elseif method == "ru_transcript_de" or method == "ru_transcript_de_exp" or -- experimental lpeg method == "ru_transcript_en" or - method == "ru_transcript_en_sub" or -- old multiple substitution + method == "ru_transcript_en_exp" or method == "ru_cz" or method == "ocs_cz" then -- cgit v1.2.3