summaryrefslogtreecommitdiff
path: root/tex/context/third/transliterator/trans_tables_scntfc.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/third/transliterator/trans_tables_scntfc.lua')
-rw-r--r--tex/context/third/transliterator/trans_tables_scntfc.lua256
1 files changed, 256 insertions, 0 deletions
diff --git a/tex/context/third/transliterator/trans_tables_scntfc.lua b/tex/context/third/transliterator/trans_tables_scntfc.lua
new file mode 100644
index 0000000..bf9a510
--- /dev/null
+++ b/tex/context/third/transliterator/trans_tables_scntfc.lua
@@ -0,0 +1,256 @@
+
+--===========================================================================--
+-- Other transliterations --
+--===========================================================================--
+
+
+-- The following are needed because ISO 9 does not cover old Slavonic
+-- characters that became obsolete before the advent of гражданский шрифт.
+
+-- Please note that these mappings are not bijective so don't expect the result
+-- to be easily revertible (by machines).
+
+-- Source p. 77 of
+-- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf
+
+-----------------------------------------------------------------------
+-- Lowercase and uppercase letter Uk -- “scientific transliteration” --
+-----------------------------------------------------------------------
+
+translit.ocs_uk = {
+ ["oу"] = "u",
+ ["оу"] = "u",
+ ["Оу"] = "U",
+}
+-----------------------------------------------------------------------------
+-- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” --
+-----------------------------------------------------------------------------
+
+translit.ocs_low = {
+ ["а"] = "a",
+ ["б"] = "b",
+ ["в"] = "v",
+ ["г"] = "g",
+ ["д"] = "d",
+ ["є"] = "e",
+ ["ж"] = "ž",
+ ["ꙃ"] = "ʒ", -- U+0292, alternative: dz U+01f3
+ ["ѕ"] = "ʒ",
+ ["ꙁ"] = "z",
+ ["з"] = "z",
+ ["и"] = "i",
+ ["і"] = "i",
+ ["ї"] = "i",
+ ["ћ"] = "g’",
+ ["к"] = "k",
+ ["л"] = "l",
+ ["м"] = "m",
+ ["н"] = "n",
+ ["о"] = "o",
+ ["п"] = "p",
+ ["р"] = "r",
+ ["с"] = "s",
+ ["т"] = "t",
+ ["у"] = "u",
+ ["ѹ"] = "u",
+ ["ꙋ"] = "u",
+ ["ф"] = "f",
+ ["х"] = "x",
+ ["ѡ"] = "o", --"ō",
+ ["ѿ"] = "ot", -- U+047f
+ ["ѽ"] = "o!", -- U+047d
+ ["ꙍ"] = "o!", -- U+064D
+ ["ц"] = "c",
+ ["ч"] = "č",
+ ["ш"] = "š",
+ ["щ"] = "št",
+ ["ъ"] = "ъ",
+ ["ы"] = "y",
+ ["ꙑ"] = "y", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia.
+ ["ь"] = "ь",
+ ["ѣ"] = "ě",
+ ["ю"] = "ju",
+ ["ꙗ"] = "ja",
+ ["ѥ"] = "je",
+ ["ѧ"] = "ę",
+ ["ѩ"] = "ję",
+ ["ѫ"] = "ǫ",
+ ["ѭ"] = "jǫ",
+ ["ѯ"] = "ks",
+ ["ѱ"] = "ps",
+ ["ѳ"] = "th",
+ ["ѵ"] = "ü",
+}
+
+translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low
+
+-----------------------------------------------------------------------------
+-- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” --
+-----------------------------------------------------------------------------
+
+translit.ocs_upp = {
+ ["А"] = "A",
+ ["Б"] = "B",
+ ["В"] = "V",
+ ["Г"] = "G",
+ ["Д"] = "D",
+ ["Є"] = "E",
+ ["Ж"] = "Ž",
+ ["Ꙃ"] = "Ʒ", -- U+01b7, alternative: Dz U+01f2
+ ["Ѕ"] = "Ʒ",
+ ["Ꙁ"] = "Z",
+ ["З"] = "Z",
+ ["И"] = "I",
+ ["І"] = "I",
+ ["Ї"] = "I",
+ ["Ћ"] = "G’",
+ ["К"] = "K",
+ ["Л"] = "L",
+ ["М"] = "M",
+ ["Н"] = "N",
+ ["О"] = "O",
+ ["П"] = "P",
+ ["Р"] = "R",
+ ["С"] = "S",
+ ["Т"] = "T",
+ ["У"] = "u",
+ ["Ѹ"] = "U",
+ ["ꙋ"] = "U",
+ ["Ф"] = "F",
+ ["Х"] = "X",
+ ["Ѡ"] = "Ō",
+ ["Ѿ"] = "Ot", -- U+047c
+ ["Ѽ"] = "O!", -- U+047e
+ ["Ꙍ"] = "O!", -- U+064C
+ ["Ц"] = "C",
+ ["Ч"] = "Č",
+ ["Ш"] = "Š",
+ ["Щ"] = "Št",
+ ["Ъ"] = "Ŭ",
+ ["Ы"] = "Y",
+ ["Ꙑ"] = "Y", -- U+a650
+ ["Ь"] = "Ĭ",
+ ["Ѣ"] = "Ě",
+ ["Ю"] = "Ju",
+ ["Ꙗ"] = "Ja",
+ ["Ѥ"] = "Je",
+ ["Ѧ"] = "Ę",
+ ["Ѩ"] = "Ję",
+ ["Ѫ"] = "Ǫ",
+ ["Ѭ"] = "Jǫ",
+ ["Ѯ"] = "Ks",
+ ["Ѱ"] = "Ps",
+ ["Ѳ"] = "Th",
+ ["Ѵ"] = "Ü",
+}
+
+translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp
+
+-- Note on the additional tables: these cover characters that are not defined
+-- in ISO 9 but have a “scientific” transliteration. You may use them as
+-- complementary mapping to ISO 9, trading off homogenity for completeness.
+
+----------------------------------------------------------------------------------------
+-- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” --
+----------------------------------------------------------------------------------------
+
+translit.ocs_add_low = {
+ ["ѕ"] = "dz", -- Mapped to ẑ in ISO 9 (Macedonian …)
+ ["ѯ"] = "ks",
+ ["ѱ"] = "ps",
+ ["ѡ"] = "ô",
+ ["ѿ"] = "ot", -- U+047f
+ ["ѫ"] = "ǫ", -- Mapped to ǎ in ISO 9.
+ ["ѧ"] = "ę",
+ ["ѭ"] = "jǫ",
+ ["ѩ"] = "ję",
+ ["ѥ"] = "je",
+ ["ѹ"] = "u", -- Digraph uk.
+ ["ꙋ"] = "u", -- Monograph uk, U+a64b. (No glyph yet in the "fixed" font in February 2010 …)
+ ["ꙑ"] = "y", -- U+a651
+}
+
+translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low
+
+----------------------------------------------------------------------------------------
+-- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” --
+----------------------------------------------------------------------------------------
+
+translit.ocs_add_upp = {
+ ["Ѕ"] = "Dz",
+ ["Ѯ"] = "Ks",
+ ["Ѱ"] = "Ps",
+ ["Ѡ"] = "Ô",
+ ["Ѿ"] = "ot",
+ ["Ѫ"] = "Ǫ",
+ ["Ѧ"] = "Ę",
+ ["Ѭ"] = "Jǫ",
+ ["Ѩ"] = "Ję",
+ ["Ѥ"] = "Je",
+ ["Ѹ"] = "U", -- Digraph uk.
+ ["Ꙋ"] = "U", -- Monograph Uk, U+a64a.
+ ["Ꙑ"] = "Y", -- U+a650
+}
+
+translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp
+
+--===========================================================================--
+-- End Of Tables --
+--===========================================================================--
+
+function translit.scientific (mode, text)
+ local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs
+ local loc = lpeg.locale ()
+
+ -- http://lua-users.org/lists/lua-l/2009-06/msg00343.html
+ local utfchar = R("\000\127") +
+ R("\194\223") * R("\128\191") +
+ R("\224\240") * R("\128\191") * R("\128\191") +
+ R("\241\244") * R("\128\191") * R("\128\191") * R("\128\191")
+
+ local cyr = {}
+ local cyruk, p_cyruk, p_cyr
+ local scientific_parser
+ if mode == ("iso9_ocs") then
+
+ dofile "trans_tables_iso9.lua"
+ translit.add_table( cyr,
+ translit.ru_upp,
+ translit.ru_low,
+ translit.ru_old_upp,
+ translit.ru_old_low,
+ translit.non_ru_upp,
+ translit.non_ru_low,
+ translit.ocs_add_low,
+ translit.ocs_add_upp
+ )
+
+ p_cyr = Cs(utfchar) / cyr
+ scientific_parser = Cs((p_cyr + utfchar)^0)
+
+ elseif mode == ("ocs") then
+
+ for i,_ in pairs(translit.ocs_uk) do
+ if cyruk == nil then cyruk = P(i) -- is this The Right Way build patterns from a table?
+ else cyruk = cyruk + P(i)
+ end
+ end
+ translit.add_table( cyr, translit.ocs_low, translit.ocs_upp )
+
+ p_cyruk = Cs(P(cyruk)) / translit.ocs_uk
+ p_cyr = Cs(utfchar) / cyr
+
+ scientific_parser = Cs((p_cyruk + p_cyr + utfchar)^0)
+
+ elseif mode == ("ocs_gla") then
+ dofile "trans_tables_glag.lua"
+ translit.add_table( cyr, translit.ocs_gla_low, translit.ocs_gla_upp )
+ p_cyr = Cs(utfchar) / cyr
+ scientific_parser = Cs((p_cyr + utfchar)^0)
+ end
+
+ text = scientific_parser:match(text)
+
+ return text
+end
+