summaryrefslogtreecommitdiff
path: root/tex/context/third/transliterator/transliterator.lua
diff options
context:
space:
mode:
authorPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-24 16:05:30 +0100
committerPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-24 16:05:30 +0100
commit8f57b0ace4826104e586e289d0977b55570f0c8b (patch)
tree030fbdb090f5efd4c1b626c6c7114508b53e8108 /tex/context/third/transliterator/transliterator.lua
parent26ae25dbea9b066eae665f8aefb2b046ac67d431 (diff)
downloadtransliterator-8f57b0ace4826104e586e289d0977b55570f0c8b.tar.gz
utf character handling; optimized iso9 parser code
Diffstat (limited to 'tex/context/third/transliterator/transliterator.lua')
-rw-r--r--tex/context/third/transliterator/transliterator.lua18
1 files changed, 17 insertions, 1 deletions
diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua
index 110d862..36eb804 100644
--- a/tex/context/third/transliterator/transliterator.lua
+++ b/tex/context/third/transliterator/transliterator.lua
@@ -93,6 +93,22 @@ do
end
end
+-- Modified version of Hans’s utf pattern (l-lpeg.lua).
+
+do
+ local P, R, V = lpeg.P, lpeg.R, lpeg.V
+
+ translit.utfchar = P{
+ V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four",
+
+ utf8next = R("\128\191"),
+ utf8one = R("\000\127"),
+ utf8two = R("\194\223") * V"utf8next",
+ utf8three = R("\224\239") * V"utf8next" * V"utf8next",
+ utf8four = R("\240\244") * V"utf8next" * V"utf8next" * V"utf8next",
+ }
+end
+
-- We might want to have all the table data nicely formatted by \CONTEXT\
-- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a
-- single table \type{t}, builds a Natural TABLE out of its content and
@@ -220,7 +236,7 @@ function translit.transliterate (method, text)
elseif method == "ru_transcript_de" or
method == "ru_transcript_de_exp" or -- experimental lpeg
method == "ru_transcript_en" or
- method == "ru_transcript_en_sub" or -- old multiple substitution
+ method == "ru_transcript_en_exp" or
method == "ru_cz" or
method == "ocs_cz"
then