summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <gesang@stud.uni-heidelberg.de>2013-02-20 15:05:55 +0100
committerPhilipp Gesang <gesang@stud.uni-heidelberg.de>2013-02-20 15:05:55 +0100
commit41c4f194a139f769a5c0290106756c67877de8cf (patch)
tree31ac266baf7b50986943ed4d32b8eaed19470d85
parent10589ad4c4acc186e582d2a7afcc45f4ef1c2515 (diff)
downloadtransliterator-41c4f194a139f769a5c0290106756c67877de8cf.tar.gz
update transliteration functions for lpeg v0.10 and lua 5.2
-rw-r--r--tex/context/third/transliterator/trans_tables_trsc.lua535
-rw-r--r--tex/context/third/transliterator/transliterator.lua2
2 files changed, 304 insertions, 233 deletions
diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua
index ce907bc..561d8d0 100644
--- a/tex/context/third/transliterator/trans_tables_trsc.lua
+++ b/tex/context/third/transliterator/trans_tables_trsc.lua
@@ -1,13 +1,19 @@
---===========================================================================--
--- Legacy national transliterations --
---===========================================================================--
+--===================================================================--
+-- Legacy national transliterations --
+--===================================================================--
local translit = thirddata.translit
+local addrules = translit.addrules
+local utfchar = translit.utfchar
+
+local lpegmatch = lpeg.match
+local tablepack = table.pack -- lua 5.2 precaution
---------------------------------
-- German simple transcription --
---------------------------------
--- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl.,
+-- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“;
+-- 20. Aufl.,
-- Mannheim et. al. 1991.
if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
@@ -35,7 +41,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
["кс"] = "x" -- Extraordinarily stupid one.
}
- translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first
+ translit.tables["German transcription first pass lowercase"]
+ = translit.ru_trsc_low_first
--------------------------------------------------------
-- Uppercase German simple transcription---first pass --
@@ -55,7 +62,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
["Кс"] = "ks"
}
- translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first
+ translit.tables["German transcription first pass uppercase"]
+ = translit.ru_trsc_upp_first
-------------------------------------------
-- Lowercase German simple transcription --
@@ -97,7 +105,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
["я"] = "ja"
}
- translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low
+ translit.tables["German transcription second pass lowercase"]
+ = translit.ru_trsc_low
-------------------------------------------
-- Uppercase German simple transcription --
@@ -139,17 +148,19 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
["Я"] = "Ja"
}
- translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp
+ translit.tables["German transcription second pass uppercase"]
+ = translit.ru_trsc_upp
translit.ru_trsc_iy = {"и", "ы", "И", "Ы"}
function translit.gen_rules_de()
- -- The following are more interesting than the previous tables because they
- -- implement various rules. For instance the table
- -- \type{translit.ru_trsc_irule} holds a substitution dictionary for all
- -- possible combinations (including nonsense galore) of a vowel preceding an
- -- “й” (Russian short i) preceding a consonant; here we access the sets of
- -- Russian vowels as well consonants that were defined earlier.
+ -- The following are more interesting than the previous tables
+ -- because they implement various rules. For instance the
+ -- table \type{translit.ru_trsc_irule} holds a substitution
+ -- dictionary for all possible combinations (including nonsense
+ -- galore) of a vowel preceding an “й” (Russian short i)
+ -- preceding a consonant; here we access the sets of Russian
+ -- vowels as well consonants that were defined earlier.
-- The й-rule, VйC -> ViC
translit.ru_trsc_irule = translit.make_add_dict{}
@@ -161,7 +172,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
end
end
- translit.tables["German transcription i-rule"] = translit.ru_trsc_irule
+ translit.tables["German transcription i-rule"]
+ = translit.ru_trsc_irule
-- The second й-rule, йV -> jV && [иы]йC -> [иы]jC
translit.ru_trsc_jrule = {}
@@ -179,7 +191,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
end
end
- translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule
+ translit.tables["German transcription j-rule"]
+ = translit.ru_trsc_jrule
-- The с-rule, VсV -> VssV
translit.ru_trsc_srule = translit.make_add_dict{}
@@ -191,7 +204,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
end
end
- translit.tables["German transcription s-rule"] = translit.ru_trsc_srule
+ translit.tables["German transcription s-rule"]
+ = translit.ru_trsc_srule
-- The sharp-s-rule, Vсх -> Vßх
translit.ru_trsc_sharpsrule = translit.make_add_dict{}
@@ -201,7 +215,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
translit.ru_trsc_sharpsrule[new_ante] = new_post
end
- translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule
+ translit.tables["German transcription sharp-s-rule"]
+ = translit.ru_trsc_sharpsrule
-- The е-rule, Vе -> Vje
translit.ru_trsc_jerule = translit.make_add_dict{}
@@ -211,7 +226,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
translit.ru_trsc_jerule[new_ante] = new_post
end
- translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule
+ translit.tables["German transcription je-rule"]
+ = translit.ru_trsc_jerule
-- The ё-rule, Vё -> Vjo
-- This should be redundant as [жцчшщ]ё -> o, else ё -> jo .
@@ -223,7 +239,8 @@ if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then
translit.ru_trsc_jorule[new_ante] = new_post
end
- translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule
+ translit.tables["German transcription (redundant) jo-rule"]
+ = translit.ru_trsc_jorule
end
@@ -233,8 +250,8 @@ end
if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
- -- This is about *eight* times as fast as the old pattern. Just waiting for
- -- v0.10 to make it into luatex.
+ -- This is about *eight* times as fast as the old pattern. Just
+ -- waiting for v0.10 to make it into luatex.
local de_tables = { }
@@ -243,51 +260,64 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
--------------------------------------------------------
de_tables[1] = { -- lowercase initial
- [" е"] = " je", ["ъе"] = "je", ["ье"] = "je", [" ё"] = " jo", ["ъё"] = "jo",
- ["ьё"] = "jo", ["жё"] = "scho", ["цё"] = "scho", ["чё"] = "zo", ["шё"] = "scho",
- ["щё"] = "schtscho", ["ье"] = "je", ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i",
- ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one.
+ [" е"] = " je", ["ъе"] = "je", ["ье"] = "je",
+ [" ё"] = " jo", ["ъё"] = "jo", ["ьё"] = "jo",
+ ["жё"] = "scho", ["цё"] = "scho", ["чё"] = "zo",
+ ["шё"] = "scho", ["щё"] = "schtscho", ["ье"] = "je",
+ ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i",
+ ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one.
}
- translit.tables["German transcription first pass lowercase"] = de_tables[1]
+ translit.tables["German transcription first pass lowercase"]
+ = de_tables[1]
--------------------------------------------------------
-- Uppercase German simple transcription---first pass --
--------------------------------------------------------
de_tables[2] = { -- uppercase initial
- [" Е"] = " Je", ["Ъe"] = "Je", ["Ье"] = "Je", [" Ё"] = "Jo", ["Ъё"] = "Jo",
- ["Ьё"] = "Jo", ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", ["Щё"] = "Schtscho",
- ["Кс"] = "ks"
+ [" Е"] = " Je", ["Ъe"] = "Je", ["Ье"] = "Je",
+ [" Ё"] = "Jo", ["Ъё"] = "Jo", ["Ьё"] = "Jo",
+ ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho",
+ ["Щё"] = "Schtscho", ["Кс"] = "ks"
}
- translit.tables["German transcription first pass uppercase"] = de_tables[2]
+ translit.tables["German transcription first pass uppercase"]
+ = de_tables[2]
-------------------------------------------
-- Lowercase German simple transcription --
-------------------------------------------
de_tables[3] = { -- lowercase
- ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", ["д"] = "d", ["е"] = "e",
- ["ё"] = "jo", ["ж"] = "sch", ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k",
- ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r",
- ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z",
- ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", ["ъ"] = "", ["ы"] = "y", ["ь"] = "",
- ["э"] = "e", ["ю"] = "ju", ["я"] = "ja"
+ ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g",
+ ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "sch",
+ ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k",
+ ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o",
+ ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t",
+ ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z",
+ ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch",
+ ["ъ"] = "", ["ы"] = "y", ["ь"] = "", ["э"] = "e",
+ ["ю"] = "ju", ["я"] = "ja"
}
- translit.tables["German transcription second pass lowercase"] = de_tables[3]
+ translit.tables["German transcription second pass lowercase"]
+ = de_tables[3]
-------------------------------------------
-- Uppercase German simple transcription --
-------------------------------------------
de_tables[4] = { -- uppercase
- ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E",
- ["Ё"] = "Jo", ["Ж"] = "Sch", ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K",
- ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R",
- ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z",
- ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch", ["Ъ"] = "", ["Ы"] = "Y", ["Ь"] = "",
- ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja"
+ ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G",
+ ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Sch",
+ ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K",
+ ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O",
+ ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T",
+ ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z",
+ ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch",["Ъ"] = "",
+ ["Ы"] = "Y", ["Ь"] = "", ["Э"] = "E", ["Ю"] = "Ju",
+ ["Я"] = "Ja"
}
- translit.tables["German transcription second pass uppercase"] = de_tables[4]
+ translit.tables["German transcription second pass uppercase"]
+ = de_tables[4]
local B, P, Cs = lpeg.B, lpeg.P, lpeg.Cs
@@ -302,8 +332,9 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
}
local Vo = P{
- P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + "ю" +
- "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + "Я" + "Ю"
+ P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" +
+ "ю" + "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" +
+ "Я" + "Ю"
}
local iy = P"и" + P"ы" + P"И" + P"Ы"
@@ -312,7 +343,7 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
-- Pattern generation.
-------------------------------------------
- local p_transcript
+ local p_transcript
for _, set in next, de_tables do
for str, rep in next, set do
@@ -332,7 +363,11 @@ if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then
local jerule = B(Vo,2) * Cs(P"е") / "je"
local jorule = B(Vo,2) * Cs(P"ё") / "jo"
- translit.future_ru_transcript_de = Cs((iyrule + jrule + irule + jerule + srule + ssrule + jorule + p_transcript + 1)^0)
+ translit.future_ru_transcript_de
+ = Cs((iyrule + jrule + irule
+ + jerule + srule + ssrule
+ + jorule + p_transcript + 1)^0
+ )
end
if not translit.done_ru_trsc_en then
@@ -349,7 +384,8 @@ if not translit.done_ru_trsc_en then
["ьи"] = "yi",
}
- translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first
+ translit.tables["English transcription lowercase first pass"]
+ = translit.ru_trsc_en_low_first
---------------------------------------------------------
-- Uppercase English simple transcription---first pass --
@@ -361,7 +397,8 @@ if not translit.done_ru_trsc_en then
["Ье"] = "Ye",
}
- translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first
+ translit.tables["English transcription uppercase first pass"]
+ = translit.ru_trsc_en_upp_first
--------------------------------------------
-- Lowercase English simple transcription --
@@ -403,51 +440,52 @@ if not translit.done_ru_trsc_en then
["я"] = "ya"
}
- translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low
+ translit.tables["English transcription lowercase second pass"]
+ = translit.ru_trsc_en_low
--------------------------------------------
-- Uppercase English simple transcription --
--------------------------------------------
translit.ru_trsc_en_upp = translit.make_add_dict{
- ["А"] = "A",
- ["Б"] = "B",
- ["В"] = "V",
- ["Г"] = "G",
- ["Д"] = "D",
- ["Е"] = "E",
- ["Ё"] = "E",
- ["Ж"] = "Zh",
- ["З"] = "Z",
- ["И"] = "I",
- ["Й"] = "Y",
- ["К"] = "K",
- ["Л"] = "L",
- ["М"] = "M",
- ["Н"] = "N",
- ["О"] = "O",
- ["П"] = "P",
- ["Р"] = "R",
- ["С"] = "S",
- ["Т"] = "T",
- ["У"] = "U",
- ["Ф"] = "F",
- ["Х"] = "Kh",
- ["Ц"] = "Ts",
- ["Ч"] = "Ch",
- ["Ш"] = "Sh",
- ["Щ"] = "Shsh",
- ["Ъ"] = "",
- ["Ы"] = "Y",
- ["Ь"] = "",
- ["Э"] = "E",
- ["Ю"] = "Yu",
- ["Я"] = "Ya"
+ ["А"] = "A",
+ ["Б"] = "B",
+ ["В"] = "V",
+ ["Г"] = "G",
+ ["Д"] = "D",
+ ["Е"] = "E",
+ ["Ё"] = "E",
+ ["Ж"] = "Zh",
+ ["З"] = "Z",
+ ["И"] = "I",
+ ["Й"] = "Y",
+ ["К"] = "K",
+ ["Л"] = "L",
+ ["М"] = "M",
+ ["Н"] = "N",
+ ["О"] = "O",
+ ["П"] = "P",
+ ["Р"] = "R",
+ ["С"] = "S",
+ ["Т"] = "T",
+ ["У"] = "U",
+ ["Ф"] = "F",
+ ["Х"] = "Kh",
+ ["Ц"] = "Ts",
+ ["Ч"] = "Ch",
+ ["Ш"] = "Sh",
+ ["Щ"] = "Shsh",
+ ["Ъ"] = "",
+ ["Ы"] = "Y",
+ ["Ь"] = "",
+ ["Э"] = "E",
+ ["Ю"] = "Yu",
+ ["Я"] = "Ya"
}
- translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp
+ translit.tables["English transcription uppercase second pass"]
+ = translit.ru_trsc_en_upp
-
function translit.gen_rules_en ()
-- The english е-rule, Vе -> Vye
translit.ru_trsc_en_jerule = translit.make_add_dict{}
@@ -457,7 +495,8 @@ if not translit.done_ru_trsc_en then
translit.ru_trsc_en_jerule[new_ante] = new_post
end
- translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule
+ translit.tables["English transcription ye-rule"]
+ = translit.ru_trsc_en_jerule
end
translit.gen_rules_en()
@@ -471,84 +510,86 @@ if not translit.done_ru_trsc_cz then
-----------------------------------
translit.ru_trsc_cz_low = translit.make_add_dict{
- ["а"] = "a",
- ["б"] = "b",
- ["в"] = "v",
- ["г"] = "g",
- ["д"] = "d",
- ["е"] = "e",
- ["ё"] = "ë",
- ["ж"] = "ž",
- ["з"] = "z",
- ["и"] = "i",
- ["й"] = "j",
- ["к"] = "k",
- ["л"] = "l",
- ["м"] = "m",
- ["н"] = "n",
- ["о"] = "o",
- ["п"] = "p",
- ["р"] = "r",
- ["с"] = "s",
- ["т"] = "t",
- ["у"] = "u",
- ["ф"] = "f",
- ["х"] = "ch",
- ["ц"] = "c",
- ["ч"] = "č",
- ["ш"] = "š",
- ["щ"] = "šč",
- ["ъ"] = "ъ",
- ["ы"] = "y",
- ["ь"] = "ь",
- ["э"] = "è",
- ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but
- ["я"] = "ja" -- that would complicate things a bit and linguists might not
- } -- agree.
-
- translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low
+ ["а"] = "a",
+ ["б"] = "b",
+ ["в"] = "v",
+ ["г"] = "g",
+ ["д"] = "d",
+ ["е"] = "e",
+ ["ё"] = "ë",
+ ["ж"] = "ž",
+ ["з"] = "z",
+ ["и"] = "i",
+ ["й"] = "j",
+ ["к"] = "k",
+ ["л"] = "l",
+ ["м"] = "m",
+ ["н"] = "n",
+ ["о"] = "o",
+ ["п"] = "p",
+ ["р"] = "r",
+ ["с"] = "s",
+ ["т"] = "t",
+ ["у"] = "u",
+ ["ф"] = "f",
+ ["х"] = "ch",
+ ["ц"] = "c",
+ ["ч"] = "č",
+ ["ш"] = "š",
+ ["щ"] = "šč",
+ ["ъ"] = "ъ",
+ ["ы"] = "y",
+ ["ь"] = "ь",
+ ["э"] = "è",
+ ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and
+ } -- тя -> ťa, but ["я"] = "ja" that would complicate things a
+ -- bit and linguists might not agree.
+
+ translit.tables["Czech transcription lowercase"]
+ = translit.ru_trsc_cz_low
-----------------------------------
-- Uppercase Czech transcription --
-----------------------------------
translit.ru_trsc_cz_upp = translit.make_add_dict{
- ["А"] = "A",
- ["Б"] = "B",
- ["В"] = "V",
- ["Г"] = "G",
- ["Д"] = "D",
- ["Е"] = "E",
- ["Ё"] = "Ë",
- ["Ж"] = "Ž",
- ["З"] = "Z",
- ["И"] = "I",
- ["Й"] = "J",
- ["К"] = "K",
- ["Л"] = "L",
- ["М"] = "M",
- ["Н"] = "N",
- ["О"] = "O",
- ["П"] = "P",
- ["Р"] = "R",
- ["С"] = "S",
- ["Т"] = "T",
- ["У"] = "U",
- ["Ф"] = "F",
- ["Х"] = "Ch",
- ["Ц"] = "C",
- ["Ч"] = "Č",
- ["Ш"] = "Š",
- ["Щ"] = "Šč",
- ["Ъ"] = "Ъ",
- ["Ы"] = "Y",
- ["Ь"] = "Ь",
- ["Э"] = "È",
- ["Ю"] = "Ju",
- ["Я"] = "Ja"
+ ["А"] = "A",
+ ["Б"] = "B",
+ ["В"] = "V",
+ ["Г"] = "G",
+ ["Д"] = "D",
+ ["Е"] = "E",
+ ["Ё"] = "Ë",
+ ["Ж"] = "Ž",
+ ["З"] = "Z",
+ ["И"] = "I",
+ ["Й"] = "J",
+ ["К"] = "K",
+ ["Л"] = "L",
+ ["М"] = "M",
+ ["Н"] = "N",
+ ["О"] = "O",
+ ["П"] = "P",
+ ["Р"] = "R",
+ ["С"] = "S",
+ ["Т"] = "T",
+ ["У"] = "U",
+ ["Ф"] = "F",
+ ["Х"] = "Ch",
+ ["Ц"] = "C",
+ ["Ч"] = "Č",
+ ["Ш"] = "Š",
+ ["Щ"] = "Šč",
+ ["Ъ"] = "Ъ",
+ ["Ы"] = "Y",
+ ["Ь"] = "Ь",
+ ["Э"] = "È",
+ ["Ю"] = "Ju",
+ ["Я"] = "Ja"
}
- translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp
+ translit.tables["Czech transcription uppercase"]
+ = translit.ru_trsc_cz_upp
----------------------------------------------
-- Lowercase Additional Czech Transcription --
@@ -573,7 +614,9 @@ if not translit.done_ru_trsc_cz then
["ѵ"] = "ÿ",
}
- translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low
+ translit.tables[
+ "Czech transcription for OCS and pre-1918 lowercase"]
+ = translit.ru_trsc_cz_add_low
----------------------------------------------
@@ -599,38 +642,95 @@ if not translit.done_ru_trsc_cz then
["Ѵ"] = "Ÿ",
}
- translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp
+ translit.tables[
+ "Czech transcription for OCS and pre-1918 uppercase"]
+ = translit.ru_trsc_cz_add_upp
translit.done_ru_trsc_cz = true
end
---===========================================================================--
--- End Of Tables --
---===========================================================================--
+--===================================================================--
+-- End Of Tables --
+--===================================================================--
local function transcript (mode, text)
local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs
- local addrules = translit.addrules
- local utfchar = translit.utfchar
local trsc_parser, p_rules, capt, p_de
local function tab_subst (s, ...)
+ local sets = { ... }
local p_tmp, tmp = nil, translit.make_add_dict{}
- for _,tab in ipairs(arg) do
- tmp = tmp + tab
+ for n=1, #sets do
+ local set = sets[n]
+ tmp = tmp + set
end
p_tmp = addrules(tmp, p_tmp)
local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0)
- return fp:match(s)
+ return lpegmatch(fp, s)
end
- local vow, con, iy
- vow = addrules(translit.ru_vowels, vow)
- con = addrules(translit.ru_consonants, con)
- iy = addrules(translit.ru_trsc_iy, iy )
+ if mode == "ru_transcript_en" then
+
+ text = tab_subst(text, translit.ru_trsc_en_jerule)
+ text = tab_subst(text,
+ translit.ru_trsc_en_low_first,
+ translit.ru_trsc_en_upp_first)
+ text = tab_subst(text,
+ translit.ru_trsc_en_low,
+ translit.ru_trsc_en_upp)
+
+ return text
+
+ elseif mode == "ru_transcript_en_exp" then
+
+ local en_low_upp = translit.make_add_dict{}
+ en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp
+
+ local twochar
+ local tworepl = translit.make_add_dict{}
+
+ twochar = addrules( translit.ru_trsc_en_low_first, twochar)
+ twochar = addrules( translit.ru_trsc_en_upp_first, twochar)
+
+ tworepl = translit.ru_trsc_en_low_first
+ + translit.ru_trsc_en_upp_first
+
+ -- The е-rule, Vе -> Vye
+ local function V_je (s)
+ local ante = utf.sub(s, 1, 1)
+ return en_low_upp[ante] .. "ye"
+ end
+
+ local jerule = Cs((vow * "е") / V_je)
+
+ local dvoje = Cs(twochar / tworepl)
+ local other = Cs((utfchar) / en_low_upp)
+
+ local g = Cs((dvoje + jerule + other + utfchar)^0)
+
+ text = g:match(text)
+
+ return text
+
+ elseif mode == "ru_cz" or mode == "ocs_cz" then
+ text = tab_subst(text,
+ translit.ru_trsc_cz_low,
+ translit.ru_trsc_cz_upp)
+ if mode == "ocs_cz" then
+ text = tab_subst(text,
+ translit.ru_trsc_cz_add_low,
+ translit.ru_trsc_cz_add_upp)
+ end
+ return text
+ end
if mode == "ru_transcript_de_exp" then
+ local vow, con, iy
+ vow = addrules(translit.ru_vowels, vow)
+ con = addrules(translit.ru_consonants, con)
+ iy = addrules(translit.ru_trsc_iy, iy )
+
local de_low_upp = translit.make_add_dict{}
de_low_upp = translit.ru_trsc_upp + translit.ru_trsc_low
@@ -640,7 +740,8 @@ local function transcript (mode, text)
twochar = addrules( translit.ru_trsc_low_first, twochar )
twochar = addrules( translit.ru_trsc_upp_first, twochar )
- tworepl = translit.ru_trsc_low_first + translit.ru_trsc_upp_first
+ tworepl = translit.ru_trsc_low_first
+ + translit.ru_trsc_upp_first
-- The й-rule, VйC -> ViC
local function V_i_C (s)
@@ -671,25 +772,25 @@ local function transcript (mode, text)
-- The sharp-s-rule, Vсх -> Vßх
local function V_sz_ch (s)
local ante = utf.sub(s, 1, 1)
- return de_low_upp[ante] .. "ßch"
+ return de_low_upp[ante] .. "ßch"
end
-- The е-rule, Vе -> Vje
local function V_je (s)
local ante = utf.sub(s, 1, 1)
- return de_low_upp[ante] .. "je"
+ return de_low_upp[ante] .. "je"
end
- -- Reapplying V_je on its result + next char would make the following
- -- two rules obsolete.
+ -- Reapplying V_je on its result + next char would make the
+ -- following two rules obsolete.
local function V_jeje (s)
local ante = utf.sub(s, 1, 1)
- return de_low_upp[ante] .. "jeje"
+ return de_low_upp[ante] .. "jeje"
end
local function V___je (s)
local ante = utf.sub(s, 1, 1)
- return de_low_upp[ante] .. "jeje"
+ return de_low_upp[ante] .. "jeje"
end
-- The ё-rule, Vё -> Vjo
@@ -697,7 +798,7 @@ local function transcript (mode, text)
-- Somebody should teach those DUDEN guys parsimony.
local function V_jo (s)
local ante = utf.sub(s, 1, 1)
- return de_low_upp[ante] .. "jo"
+ return de_low_upp[ante] .. "jo"
end
local iyrule = Cs((iy * "й" * con) / iy_j_C)
@@ -725,6 +826,7 @@ local function transcript (mode, text)
return text
elseif mode == "ru_transcript_de" then
+
if lpeg.version() == "0.9" then
text = tab_subst(text, translit.ru_trsc_jrule)
@@ -733,66 +835,33 @@ local function transcript (mode, text)
text = tab_subst(text, translit.ru_trsc_srule)
text = tab_subst(text, translit.ru_trsc_sharpsrule)
text = tab_subst(text, translit.ru_trsc_jorule)
- text = tab_subst(text, translit.ru_trsc_upp_first, translit.ru_trsc_low_first)
- text = tab_subst(text, translit.ru_trsc_upp, translit.ru_trsc_low)
+ text = tab_subst(text,
+ translit.ru_trsc_upp_first,
+ translit.ru_trsc_low_first)
+ text = tab_subst(text,
+ translit.ru_trsc_upp,
+ translit.ru_trsc_low)
return text
elseif lpeg.version() == "0.10" then
return translit.future_ru_transcript_de:match(text)
end
- elseif mode == "ru_transcript_en_exp" then
-
- local en_low_upp = translit.make_add_dict{}
- en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp
-
- local twochar
- local tworepl = translit.make_add_dict{}
-
- twochar = addrules( translit.ru_trsc_en_low_first, twochar)
- twochar = addrules( translit.ru_trsc_en_upp_first, twochar)
-
- tworepl = translit.ru_trsc_en_low_first + translit.ru_trsc_en_upp_first
-
- -- The е-rule, Vе -> Vye
- local function V_je (s)
- local ante = utf.sub(s, 1, 1)
- return en_low_upp[ante] .. "ye"
- end
-
- local jerule = Cs((vow * "е") / V_je)
-
- local dvoje = Cs(twochar / tworepl)
- local other = Cs((utfchar) / en_low_upp)
-
- local g = Cs((dvoje + jerule + other + utfchar)^0)
-
- text = g:match(text)
-
- return text
-
- elseif mode == "ru_transcript_en" then
-
- text = tab_subst(text, translit.ru_trsc_en_jerule)
- text = tab_subst(text, translit.ru_trsc_en_low_first, translit.ru_trsc_en_upp_first)
- text = tab_subst(text, translit.ru_trsc_en_low, translit.ru_trsc_en_upp)
-
- return text
-
- elseif mode == "ru_cz" or mode == "ocs_cz" then
- text = tab_subst(text, translit.ru_trsc_cz_low, translit.ru_trsc_cz_upp)
- if mode == "ocs_cz" then
- text = tab_subst(text, translit.ru_trsc_cz_add_low, translit.ru_trsc_cz_add_upp)
- end
-
- return text
end
end
-translit.methods ["ru_transcript_de"] = function (text) return transcript("ru_transcript_de" , text) end
-translit.methods ["ru_transcript_de_exp"] = function (text) return transcript("ru_transcript_de_exp", text) end
-translit.methods ["ru_transcript_en"] = function (text) return transcript("ru_transcript_en" , text) end
-translit.methods ["ru_transcript_en_exp"] = function (text) return transcript("ru_transcript_en_exp", text) end
-translit.methods ["ru_cz"] = function (text) return transcript("ru_cz" , text) end
-translit.methods ["ocs_cz"] = function (text) return transcript("ocs_cz" , text) end
+translit.methods ["ru_transcript_de"]
+ = function (text) return transcript("ru_transcript_de" , text) end
+translit.methods ["ru_transcript_de_exp"]
+ = function (text) return transcript("ru_transcript_de_exp", text) end
+translit.methods ["ru_transcript_en"]
+ = function (text) return transcript("ru_transcript_en" , text) end
+translit.methods ["ru_transcript_en_exp"]
+ = function (text) return transcript("ru_transcript_en_exp", text) end
+translit.methods ["ru_cz"]
+ = function (text) return transcript("ru_cz" , text) end
+translit.methods ["ocs_cz"]
+ = function (text) return transcript("ocs_cz" , text) end
+
+-- vim:sw=4:ts=4:expandtab:ft=lua
diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua
index 4ca7ea0..873e6d6 100644
--- a/tex/context/third/transliterator/transliterator.lua
+++ b/tex/context/third/transliterator/transliterator.lua
@@ -276,3 +276,5 @@ function translit.transliterate (method, text)
end
context ( methods[method](text) )
end
+
+-- vim:sw=4:ts=4:expandtab:ft=lua