summaryrefslogtreecommitdiff
path: root/tex
diff options
context:
space:
mode:
authorPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-29 19:30:30 +0100
committerPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-29 19:30:30 +0100
commit5413b9fd0f31e10410095d924e16483235bc530d (patch)
treeb6e1560bc48408a3f7f18095a9fbc0934ff4ae01 /tex
parentc648ccf7728650440b6d7046211128a58ddbbc5f (diff)
downloadtransliterator-5413b9fd0f31e10410095d924e16483235bc530d.tar.gz
Transcription code for lpeg v.0.10.
Diffstat (limited to 'tex')
-rw-r--r--tex/context/third/transliterator/trans_tables_trsc.lua150
1 files changed, 129 insertions, 21 deletions
diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua
index 6ba07a2..a5c53cb 100644
--- a/tex/context/third/transliterator/trans_tables_trsc.lua
+++ b/tex/context/third/transliterator/trans_tables_trsc.lua
@@ -7,11 +7,12 @@
-- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl.,
-- Mannheim et. al. 1991.
---------------------------------------------------------
--- Lowercase German simple transcription---first pass --
---------------------------------------------------------
+if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
+
+ --------------------------------------------------------
+ -- Lowercase German simple transcription---first pass --
+ --------------------------------------------------------
-if not translit.done_ru_trsc_de then
translit.ru_trsc_low_first = translit.make_add_dict{
[" е"] = " je",
["ъе"] = "je",
@@ -222,10 +223,117 @@ if not translit.done_ru_trsc_de then
translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule
end
+
+ translit.gen_rules_de()
translit.done_ru_trsc_de = true
end
+if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
+
+ -- This is four times as fast as the old pattern. Just waiting for v0.10 to
+ -- make it into luatex.
+
+ local de_tables = { }
+
+ --------------------------------------------------------
+ -- Lowercase German simple transcription---first pass --
+ --------------------------------------------------------
+
+ de_tables[1] = { -- lowercase initial
+ [" е"] = " je", ["ъе"] = "je", ["ье"] = "je", [" ё"] = " jo", ["ъё"] = "jo",
+ ["ьё"] = "jo", ["жё"] = "scho", ["цё"] = "scho", ["чё"] = "zo", ["шё"] = "scho",
+ ["щё"] = "schtscho", ["ье"] = "je", ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i",
+ ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one.
+ }
+ translit.tables["German transcription first pass lowercase"] = de_tables[1]
+
+ --------------------------------------------------------
+ -- Uppercase German simple transcription---first pass --
+ --------------------------------------------------------
+
+ de_tables[2] = { -- uppercase initial
+ [" Е"] = " Je", ["Ъe"] = "Je", ["Ье"] = "Je", [" Ё"] = "Jo", ["Ъё"] = "Jo",
+ ["Ьё"] = "Jo", ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", ["Щё"] = "Schtscho",
+ ["Кс"] = "ks"
+ }
+ translit.tables["German transcription first pass uppercase"] = de_tables[2]
+
+ -------------------------------------------
+ -- Lowercase German simple transcription --
+ -------------------------------------------
+
+ de_tables[3] = { -- lowercase
+ ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", ["д"] = "d", ["е"] = "e",
+ ["ё"] = "jo", ["ж"] = "sch", ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k",
+ ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r",
+ ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z",
+ ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", ["ъ"] = "", ["ы"] = "y", ["ь"] = "",
+ ["э"] = "e", ["ю"] = "ju", ["я"] = "ja"
+ }
+ translit.tables["German transcription second pass lowercase"] = de_tables[3]
+
+ -------------------------------------------
+ -- Uppercase German simple transcription --
+ -------------------------------------------
+
+ de_tables[4] = { -- uppercase
+ ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E",
+ ["Ё"] = "Jo", ["Ж"] = "Sch", ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K",
+ ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R",
+ ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z",
+ ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch", ["Ъ"] = "", ["Ы"] = "Y", ["Ь"] = "",
+ ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja"
+ }
+ translit.tables["German transcription second pass uppercase"] = de_tables[4]
+
+ local B, P, Cs = lpeg.B, lpeg.P, lpeg.Cs
+
+ -- All chars are 2-byte.
+ local Co = P{
+ P"б" + "в" + "г" + "д" + "ж" + "з" + "к" + "л" + "м" + "н" +
+ "п" + "р" + "с" + "т" + "ф" + "х" + "ц" + "ч" + "ш" + "щ" +
+ "ъ" + "ь" +
+ "Б" + "В" + "Г" + "Д" + "Ж" + "З" + "К" + "Л" + "М" + "Н" +
+ "П" + "Р" + "С" + "Т" + "Ф" + "Х" + "Ц" + "Ч" + "Ш" + "Щ" +
+ "Ъ" + "Ь"
+ }
+
+ local Vo = P{
+ P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + "ю" +
+ "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + "Я" + "Ю"
+ }
+
+ local iy = P"и" + P"ы" + P"И" + P"Ы"
+
+ -------------------------------------------
+ -- Pattern generation.
+ -------------------------------------------
+
+ local p_transcript
+
+ for _, set in next, de_tables do
+ for str, rep in next, set do
+ if not p_transcript then -- it’ll be empty initially
+ p_transcript = P(str) / rep
+ else
+ p_transcript = p_transcript + (P(str) / rep)
+ end
+ end
+ end
+
+ local irule = B(Vo,2) * Cs(P"й") * #Co / "i"
+ local iyrule = B(iy,2) * Cs(P"й") * #Co / "j"
+ local jrule = Cs(P"й") * #Vo / "j"
+ local srule = B(Vo,2) * Cs(P"с") * #Vo / "ss"
+ local ssrule = B(Vo,2) * Cs(P"с") * #P"х" / "ß"
+ local jerule = B(Vo,2) * Cs(P"е") / "je"
+ local jorule = B(Vo,2) * Cs(P"ё") / "jo"
+
+ translit.future_ru_transcript_de = Cs((iyrule + jrule + irule + jerule + srule + ssrule + jorule + p_transcript + 1)^0)
+end
+
if not translit.done_ru_trsc_en then
+
---------------------------------------------------------
-- Lowercase English simple transcription---first pass --
---------------------------------------------------------
@@ -348,6 +456,8 @@ if not translit.done_ru_trsc_en then
translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule
end
+
+ translit.gen_rules_en()
translit.done_ru_trsc_en = true
end
@@ -612,21 +722,21 @@ local function transcript (mode, text)
return text
elseif mode == "ru_transcript_de" then
-
- translit.gen_rules_de()
-
- -- This is possibly slower than using string:gsub.
-
- text = tab_subst(text, translit.ru_trsc_jrule)
- text = tab_subst(text, translit.ru_trsc_irule)
- text = tab_subst(text, translit.ru_trsc_jerule)
- text = tab_subst(text, translit.ru_trsc_srule)
- text = tab_subst(text, translit.ru_trsc_sharpsrule)
- text = tab_subst(text, translit.ru_trsc_jorule)
- text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first)
- text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low)
-
- return text
+ if lpeg.version() == "0.9" then
+
+ text = tab_subst(text, translit.ru_trsc_jrule)
+ text = tab_subst(text, translit.ru_trsc_irule)
+ text = tab_subst(text, translit.ru_trsc_jerule)
+ text = tab_subst(text, translit.ru_trsc_srule)
+ text = tab_subst(text, translit.ru_trsc_sharpsrule)
+ text = tab_subst(text, translit.ru_trsc_jorule)
+ text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first)
+ text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low)
+
+ return text
+ elseif lpeg.version() == "0.10" then
+ return translit.future_ru_transcript_de:match(text)
+ end
elseif mode == "ru_transcript_en_exp" then
@@ -660,8 +770,6 @@ local function transcript (mode, text)
elseif mode == "ru_transcript_en" then
- translit.gen_rules_en()
-
text = tab_subst(text, translit.ru_trsc_en_jerule)
text = tab_subst(text, translit.ru_trsc_en_low_first, translit.ru_trsc_en_upp_first)
text = tab_subst(text, translit.ru_trsc_en_low, translit.ru_trsc_en_upp)