diff options
| -rw-r--r-- | .hgignore | 35 | ||||
| -rw-r--r-- | doc/context/third/transliterator/transliterator.tex | 88 | ||||
| -rw-r--r-- | tex/context/interface/third/t-transliterator.xml | 46 | ||||
| -rw-r--r-- | tex/context/third/transliterator/t-transliterator.mkiv | 2057 | ||||
| -rw-r--r-- | tex/context/third/transliterator/t-transliterator.tex | 1 | 
5 files changed, 2227 insertions, 0 deletions
diff --git a/.hgignore b/.hgignore new file mode 100644 index 0000000..ffcf81a --- /dev/null +++ b/.hgignore @@ -0,0 +1,35 @@ +syntax:glob +*.swp +*.pdf +*.aux +*.bbl +*.log +*.url +*.toc +*.ind +*.out +*.dvi +*.blg +*.4ct +*.idv +*.html +*.css +*.4tc +*.lg +*.xref +*.idx +*.tmp +*.djvu +*.ps +*.make +*.d +*.fls +*-blx.bib +*.temp +*.latexmain +*.snm +*.nav +*.vrb +*.top +*.tuc +*.swo diff --git a/doc/context/third/transliterator/transliterator.tex b/doc/context/third/transliterator/transliterator.tex new file mode 100644 index 0000000..47b1dc3 --- /dev/null +++ b/doc/context/third/transliterator/transliterator.tex @@ -0,0 +1,88 @@ +\setupinteraction [state=start] +\setupcombinedlist[content][interaction=all,focus=standard] +\usemodule[transliterator] + +% == REFERENCES =============================================================== +\startpublication[ +  k=duden, +  t=book, +  a={Drosdowski/Müller/Scholze-Stubenrecht/Wermke}, +  y=1952, +  n=1, +  s={DUDEN}, +] +\editor[]{Günther}[]{}{Drosdowski} +\editor[]{Wolfgang}[]{}{Müller} +\editor[]{Werner}[]{}{Schulze-Stubenrecht} +\editor[]{Matthias}[]{}{Wermke} +\pubyear{1991} +\title{DUDEN Rechtschreibung der deutschen Sprache} +\city{Mannheim et. al.} +\edition{20} +\stoppublication + +\startpublication[ +  k=bornemann, +  t=book, +  a={Bornemann/Risch}, +  y=1978, +  n=1, +  s={Grammatik}, +] +\author[]{Eduard}[]{}{Bornemann} +\author[]{Ernst}[]{}{Risch} +\pubyear{1978} +\title{Griechische Grammatik} +\city{Frankfurt am Main} +\edition{2} +\stoppublication + +\startpublication[ +  k=kirschbaum, +  t=book, +  a={Kirschbaum}, +  y=2001, +  n=1, +  s={Grammatik}, +] +\author[]{Ernst Georg}[]{}{Kirschbaum} +\pubyear{2001} +\title{Grammatik der russischen Sprache} +\city{Berlin} +\edition{1} +\stoppublication + +\startpublication[ +  k=aksstudien, +  t=book, +  a={Birnbaum/Schaeken}, +  y=1999, +  n=1, +  s={Studien}, +] +\author[]{Henrik}[]{}{Birnbaum} +\author[]{Jos}[]{}{Schaeken} +\pubyear{1999} +\title{Altkirchenslavische Studien} +\volume{2} +\city{München} +\edition{1} +\url{http://www.schaeken.nl/lu/research/online/publications/akslstud/index.htm} +\stoppublication + +%============================================================================== + +\usemodule[int-load] +\loadsetups[t-transliterator.xml]  + +\starttext +\chapter{Usage and Functionality} +\setup{setupTranslit} + + +\chapter{Introduction} + +\chapter{References} + +\stoptext +%   vim:ft=context diff --git a/tex/context/interface/third/t-transliterator.xml b/tex/context/interface/third/t-transliterator.xml new file mode 100644 index 0000000..2ffb449 --- /dev/null +++ b/tex/context/interface/third/t-transliterator.xml @@ -0,0 +1,46 @@ +<?xml version="1.0" standalone="yes"?> + +<cd:interface xmlns:cd="http://www.pragma-ade.com/commands"  +  name="context" language="en"  +  version="2010.02.27"> + +  <cd:command name="setupTranslit" file="t-transliterator.mkiv"> +    <cd:sequence> +      <cd:string value="setupTranslit"/> +    </cd:sequence> +    <cd:arguments> +      <cd:assignments list="yes" optional="no"> +        <cd:parameter name="mode"> +          <cd:constant type="ru_old" default="yes"/> +          <cd:constant type="ru"/> +          <cd:constant type="ru_transcript_de"/> +          <cd:constant type="ru_transcript_en"/> +          <cd:constant type="all"/> +          <cd:constant type="iso9_ocs"/> +          <cd:constant type="ocs"/> +          <cd:constant type="ocs_gla"/> +          <cd:constant type="ru_cz"/> +          <cd:constant type="ocs_cz"/> +          <cd:constant type="gr"/> +          <cd:constant type="gr_n"/> +        </cd:parameter> +        <cd:parameter name="hyphenate"> +          <cd:constant type="cz" default="yes"/> +          <cd:constant type="sk"/> +        </cd:parameter> +      </cd:assignments> +    </cd:arguments> +  </cd:command> + +  <cd:command name="transliterate" file="t-transliterator.mkiv"> +    <cd:sequence> +      <cd:string value="transliterate"/> +    </cd:sequence> +    <cd:arguments> +      <cd:assignments list="yes" optional="yes"> +        <cd:inherit name="setupTranslit"/> +      </cd:assignments> +    </cd:arguments> +  </cd:command> + +</cd:interface> diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv new file mode 100644 index 0000000..fc07c1d --- /dev/null +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -0,0 +1,2057 @@ +%D \enableregime[utf] +%D \module                                                                                                                  +%D   [      file=t-degrade, +%D        version=2010.02.26, +%D          title=\CONTEXT\ User Module, +%D       subtitle=The Transliterator, +%D         author=Philipp Gesang, +%D           date=\currentdate, +%D      copyright=Philipp Gesang, +%D        license=2-clause BSD, +%D          email={pgesang at ix dot urz dot uni-heidelberg dot de}] +%D This module is licensed under the conditions of the BSD license with  +%D two clauses: http://www.freebsd.org/copyright/freebsd-license.html. +%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.\newpage + +\writestatus{loading}{Transliteration from non-Latin scripts} + +\startmodule[transliterator] + +\unprotect + +%D Use the Transliterator by adding \type{\usemodule[transliterator]} somewhere +%D before \type{\starttext}.  Adjust the Transliterator through the +%D \type{\setupTranslit} command.  As a first argument it accepts a set of +%D key-value options; at present you may configure \type{mode} and +%D \type{hyphenate}. +\def\setupTranslit[#1]{\getparameters[TRL][#1]} + +%D At first we'll set some defaults: +\setupTranslit[mode=ru_old,hyphenate=cz,debug=false] +%D Possible values for \type{mode} are by the time of this writing: +%D \type{ru}, \type{ru_transcript_de}, \type{ru_transcript_en}, \type{ru_old}, +%D \type{all}, \type{iso9_ocs}, \type{ocs}, \type{ocs_gla}, \type{ru_cz}, +%D \type{ocs_cz}, \type{gr} and \type{gr_n}. +%D Possible values for \type{hyphenate} are all valid \CONTEXT\ language code, for an +%D overview see \type{http://wiki.contextgarden.net/Language_Codes}. +%D In praxi you may want to choose either Czech (the default) or Slovak +%D (\type{sk}) for most transliterations from cyrillic scripts.  I've not yet +%D made up my mind concerning Greek transliteration, any suggestions are +%D welcome. + +%D For clarity's sake we'll stuff everything Lua into one table. +\startluacode +translit = translit or {} +translit.debug_count = 0 +\stopluacode + +%D We want to keep track of all the tables we'll create so we put them into +%D a separate dictionary accompanied by a description string. +\startluacode +translit.tables = {} +\stopluacode + + +%D Next we define respective lists of vowels and consonants  as used in the +%D russian alphabet.  They are needed later when substitution tables for some +%D idiosyncratic transcriptions are generated. +\startluacode +-- If you haven't heard of cyrillic scripts until now you might want to read +-- at least the first 15 pages of  +-- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf +-- before you continue reading this file. + +translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", +                      "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} +translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н",  +                          "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", +                          "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н",  +                          "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} +\stopluacode + +%D Substitution tables are the very heart of the Transliterator.  Due to the +%D nature of languages and scripts exhaustive substitution is the simplest +%D method for transliterations and transcriptions unless they are one-to-one +%D mappings like those defined in ISO~9. +%D +%D To achieve better reusability we split the tables into segments, the most +%D obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase +%D and uppercase characters.  Another set is constituted by e.~g. the +%D \type{ru_old*} tables that allow adding transcription of historical +%D characters if needed; by the way those are included in the default +%D transliteration mode \type{ru_old}. + +%-===========================================================================-- +%-           ISO 9.1995(E) standardized transliteration for cyrillic         -- +%-===========================================================================-- + +\startluacode +----------------------------------------- +-- Lowercase russian cyrillic alphabet -- +----------------------------------------- +translit.ru_low = { +  ["а"] = "a", -- U+0430 -> U+0061 +  ["б"] = "b", -- U+0431 -> U+0062 +  ["в"] = "v", -- U+0432 -> U+0076 +  ["г"] = "g", -- U+0433 -> U+0067 +  ["д"] = "d", -- U+0434 -> U+0064 +  ["е"] = "e", -- U+0435 -> U+0065 +  ["ё"] = "ë", -- U+0451 -> U+00eb +  ["ж"] = "ž", -- U+0436 -> U+017e +  ["з"] = "z", -- U+0437 -> U+007a +  ["и"] = "i", -- U+0438 -> U+0069 +  ["й"] = "j", -- U+0439 -> U+006a +  ["к"] = "k", -- U+043a -> U+006b +  ["л"] = "l", -- U+043b -> U+006c +  ["м"] = "m", -- U+043c -> U+006d +  ["н"] = "n", -- U+043d -> U+006e +  ["о"] = "o", -- U+043e -> U+006f +  ["п"] = "p", -- U+043f -> U+0070 +  ["р"] = "r", -- U+0440 -> U+0072 +  ["с"] = "s", -- U+0441 -> U+0073 +  ["т"] = "t", -- U+0442 -> U+0074 +  ["у"] = "u", -- U+0443 -> U+0075 +  ["ф"] = "f", -- U+0444 -> U+0066 +  ["х"] = "h", -- U+0445 -> U+0068 +  ["ц"] = "c", -- U+0446 -> U+0063 +  ["ч"] = "č", -- U+0447 -> U+010d +  ["ш"] = "š", -- U+0448 -> U+0161 +  ["щ"] = "ŝ", -- U+0449 -> U+015d +  ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is +  ["ы"] = "y", -- U+044b -> U+0079    used for uppercase, too. +  ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. +  ["э"] = "è", -- U+044d -> U+00e8 +  ["ю"] = "û", -- U+044e -> U+00fb +  ["я"] = "â"  -- U+044f -> U+00e2 +} + +translit.tables["russian lowercase ISO~9"] = translit.ru_low + +----------------------------------------- +-- Uppercase russian cyrillic alphabet -- +----------------------------------------- + +translit.ru_upp = { +  ["А"] = "A", -- U+0410 -> U+0041 +  ["Б"] = "B", -- U+0411 -> U+0042 +  ["В"] = "V", -- U+0412 -> U+0056 +  ["Г"] = "G", -- U+0413 -> U+0047 +  ["Д"] = "D", -- U+0414 -> U+0044 +  ["Е"] = "E", -- U+0415 -> U+0045 +  ["Ё"] = "Ë", -- U+0401 -> U+00cb +  ["Ж"] = "Ž", -- U+0416 -> U+017d +  ["З"] = "Z", -- U+0417 -> U+005a +  ["И"] = "I", -- U+0418 -> U+0049 +  ["Й"] = "J", -- U+0419 -> U+004a +  ["К"] = "K", -- U+041a -> U+004b +  ["Л"] = "L", -- U+041b -> U+004c +  ["М"] = "M", -- U+041c -> U+004d +  ["Н"] = "N", -- U+041d -> U+004e +  ["О"] = "O", -- U+041e -> U+004f +  ["П"] = "P", -- U+041f -> U+0050 +  ["Р"] = "R", -- U+0420 -> U+0052 +  ["С"] = "S", -- U+0421 -> U+0053 +  ["Т"] = "T", -- U+0422 -> U+0054 +  ["У"] = "U", -- U+0423 -> U+0055 +  ["Ф"] = "F", -- U+0424 -> U+0046 +  ["Х"] = "H", -- U+0425 -> U+0048 +  ["Ц"] = "C", -- U+0426 -> U+0043 +  ["Ч"] = "Č", -- U+0427 -> U+010c +  ["Ш"] = "Š", -- U+0428 -> U+0160 +  ["Щ"] = "Ŝ", -- U+0429 -> U+015c +  ["Ъ"] = "ʺ", -- U+042a -> U+02ba +  ["Ы"] = "Y", -- U+042b -> U+0059 +  ["Ь"] = "ʹ", -- U+042c -> U+02b9 +  ["Э"] = "È", -- U+042d -> U+00c8 +  ["Ю"] = "Û", -- U+042e -> U+00db +  ["Я"] = "Â"  -- U+042f -> U+00c2 +} + +translit.tables["russian uppercase ISO~9"] = translit.ru_upp + +---------------------------------------------------------- +-- Lowercase pre-1918 russian cyrillic additional chars -- +---------------------------------------------------------- +-- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 + +translit.ru_old_low = { +  ["ѣ"] = "ě", -- U+048d -> U+011b +  ["і"] = "ì", -- U+0456 -> U+00ec +  ["ѳ"] = "f", -- U+0473 -> U+0066 +  ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 +} + +translit.tables["russian pre-1918 lowercase ISO~9"] = translit.ru_low + +translit.ru_old_upp = { +  ["Ѣ"] = "Ě", -- U+048c -> U+011a +  ["І"] = "Ì", -- U+0406 -> U+00cc +  ["Ѳ"] = "F", -- U+0424 -> U+0046 +  ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 +} + +translit.tables["russian pre-1918 uppercase ISO~9"] = translit.ru_upp + +--------------------------------------------------------- +-- Lowercase characters from other cyrillic alphabets  -- +--------------------------------------------------------- + +translit.non_ru_low = { +  ["ӑ"] = "ă", -- U+04d1 -> U+0103 +  ["ӓ"] = "ä", -- U+04d3 -> U+00e4 +  ["ә"] = "a̋", -- u+04d9 -> U+0061+030b +  ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 +  ["ҕ"] = "ğ", -- U+0495 -> U+011f +  ["ғ"] = "ġ", -- U+0493 -> U+0121 +  ["ђ"] = "đ", -- U+0452 -> U+0111 +  ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 +  ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 +  ["є"] = "ê", -- U+0454 -> U+00ea +  ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 +  ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 +  ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 +  ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 +  ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 +  ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 +  ["ѕ"] = "ẑ", -- U+0455 -> U+1e91          -- Mapped to dz in old cyrillic non-ISO. +  ["ӡ"] = "ź", -- U+04e1 -> U+017a +  ["ӥ"] = "î", -- U+04e5 -> U+00ee +  ["і"] = "ì", -- U+0456 -> U+00ec +  ["ї"] = "ï", -- U+0457 -> U+00ef +  ["ј"] = "ǰ", -- U+0458 -> U+01f0 +  ["қ"] = "ķ", -- U+049b -> U+0137 +  ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 +  ["љ"] = "l̂", -- U+0459 -> U+006c+0302 +  ["њ"] = "n̂", -- U+045a -> U+006e+0302 +  ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 +  ["ң"] = "ṇ", -- U+04a3 -> U+1e47 +  ["ӧ"] = "ö", -- U+04e7 -> U+00f6 +  ["ө"] = "ô", -- U+04e9 -> U+00f4 +  ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 +  ["ҫ"] = "ç", -- U+04ab -> U+00e7 +  ["ҭ"] = "ţ", -- U+04ad -> U+0163 +  ["ћ"] = "ć", -- U+045b -> U+0107 +  ["ќ"] = "ḱ", -- U+045c -> U+1e31 +  ["у́"] = "ú", -- U+0443+ -> U+00fA +  ["ў"] = "ŭ", -- U+045e -> U+016d +  ["ӱ"] = "ü", -- U+04f1 -> U+00fc +  ["ӳ"] = "ű", -- U+04f3 -> U+0171 +  ["ү"] = "ù", -- U+04af -> U+00f9 +  ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 +  ["һ"] = "ḥ", -- U+04bb -> U+1e25 +  ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 +  ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 +  ["ҷ"] = "ç", -- U+04cc -> U+00e7 +  ["џ"] = "d̂", -- U+045f -> U+0064+0302 +  ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff +  ["ѣ"] = "ě", -- U+048d -> U+011b +  ["ѫ"] = "ǎ", -- U+046b -> U+01ce      -- Mapped to ǫ in non-ISO old cyrillic. +  ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. +  ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 +  ["ҩ"] = "ò", -- U+04a9 -> U+00f2 +  ["Ӏ"] = "‡"  -- U+04cf -> U+2021 +} + +translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low + +--------------------------------------------------------- +-- Uppercase characters from other cyrillic alphabets  -- +--------------------------------------------------------- + +translit.non_ru_upp = { +  ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 +  ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 +  ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b +  ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 +  ["Ҕ"] = "Ğ", -- U+0494 -> U+011e +  ["Ғ"] = "Ġ", -- U+0492 -> U+0120 +  ["Ђ"] = "Đ", -- U+0402 -> U+0110 +  ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 +  ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 +  ["Є"] = "Ê", -- U+0404 -> U+00ca +  ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 +  ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 +  ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 +  ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 +  ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 +  ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 +  ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 +  ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 +  ["Ӥ"] = "Î", -- U+04e4 -> U+00ce +  ["І"] = "Ì", -- U+0406 -> U+00cc +  ["Ї"] = "Ï", -- U+0407 -> U+00cf +  ["Ј"] = "J̌", -- U+0408 -> U+004a+030c +  ["Қ"] = "Ķ", -- U+049a -> U+0136 +  ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 +  ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 +  ["Њ"] = "N̂", -- U+040a -> U+004e+0302 +  ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 +  ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 +  ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 +  ["Ө"] = "Ô", -- U+04e8 -> U+00d4 +  ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 +  ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 +  ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 +  ["Ћ"] = "Ć", -- U+040b -> U+0106 +  ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 +  ["У́"] = "Ú", -- U+0423 -> U+00da +  ["Ў"] = "Ŭ", -- U+040e -> U+016c +  ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc +  ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 +  ["Ү"] = "Ù", -- U+04ae -> U+00d9 +  ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 +  ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 +  ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 +  ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 +  ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 +  ["Џ"] = "D̂", -- U+040f -> U+0044+0302 +  ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 +  ["Ѣ"] = "Ě", -- U+048c -> U+011a +  ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd +  ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 +  ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 +  ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 +  ["’"] = "‵", -- U+2035 -> U+2019 +  ["Ӏ"] = "‡"  -- U+04c0 -> U+2021 +} + +translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp + +\stopluacode + +\startluacode + +--===========================================================================-- +--                      Legacy national transliterations                     -- +--===========================================================================-- +-- Note: +-- Use these only as a last resort.  ‘Vulgar’ transcription is ugly and +-- chauvinistic. + +--------------------------------- +-- German simple transcription -- +--------------------------------- +-- Reference:   „DUDEN. Rechtschreibung der deutschen Sprache“; 20. Aufl., +--              Mannheim et. al. 1991. + +-------------------------------------------------------- +-- Lowercase German simple transcription---first pass -- +-------------------------------------------------------- + +translit.ru_trsc_low_first = { +  [" е"] = " je", +  ["ъе"] = "je", +  ["ье"] = "je", +  [" ё"] = " jo", +  ["ъё"] = "jo", +  ["ьё"] = "jo", +  ["жё"] = "scho", +  ["чё"] = "tscho", +  ["шё"] = "scho", +  ["щё"] = "schtscho", +  ["ье"] = "je", +  ["ьи"] = "ji", +  ["ьо"] = "jo", +  ["ий"] = "i", +  ["ый"] = "y", +  ["кс"] = "x" -- Extraordinarily stupid one. +} + +translit.tables["German transcription first pass lowercase"] = translit.ru_trsc_low_first + +-------------------------------------------------------- +-- Uppercase German simple transcription---first pass -- +-------------------------------------------------------- + +translit.ru_trsc_upp_first = { +  [" Е"] = " Je", +  ["Ъe"] = "Je",  -- Pedantic, isn't it? +  ["Ье"] = "Je", +  [" Ё"]  = "Jo", +  ["Ъё"] = "Jo", +  ["Ьё"] = "Jo", +  ["Жё"] = "Scho", +  ["Чё"] = "Tscho", +  ["Шё"] = "Scho", +  ["Щё"] = "Schtscho", +  ["Кс"] = "ks" +} + +translit.tables["German transcription first pass uppercase"] = translit.ru_trsc_upp_first + +------------------------------------------- +-- Lowercase German simple transcription -- +------------------------------------------- + +translit.ru_trsc_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "w", +  ["г"] = "g", +  ["д"] = "d", +  ["е"] = "e", +  ["ё"] = "jo", +  ["ж"] = "sch", +  ["з"] = "s", +  ["и"] = "i", +  ["й"] = "i", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["у"] = "u", +  ["ф"] = "f", +  ["х"] = "ch", +  ["ц"] = "z", +  ["ч"] = "tsch", +  ["ш"] = "sch", +  ["щ"] = "schtsch", +  ["ъ"] = "", +  ["ы"] = "y", +  ["ь"] = "", +  ["э"] = "e", +  ["ю"] = "ju", +  ["я"] = "ja"  +} + +translit.tables["German transcription second pass lowercase"] = translit.ru_trsc_low + +------------------------------------------- +-- Uppercase German simple transcription -- +------------------------------------------- + +translit.ru_trsc_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "W", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Е"] = "E", +  ["Ё"] = "Jo", +  ["Ж"] = "Sch", +  ["З"] = "S", +  ["И"] = "I", +  ["Й"] = "J", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["У"] = "U", +  ["Ф"] = "F", +  ["Х"] = "Ch", +  ["Ц"] = "Z", +  ["Ч"] = "Tsch", +  ["Ш"] = "Sch", +  ["Щ"] = "Schtsch", +  ["Ъ"] = "", +  ["Ы"] = "Y", +  ["Ь"] = "", +  ["Э"] = "E", +  ["Ю"] = "Ju", +  ["Я"] = "Ja"  +} + +translit.tables["German transcription second pass uppercase"] = translit.ru_trsc_upp + +\stopluacode + +%D The following are more interesting than the previous tables because they +%D implement various rules.  For instance the table +%D \type{translit.ru_trsc_irule} holds a substitution dictionary for all +%D possible combinations (including nonsense galore) of a vowel preceding an +%D “й” (Russian short i) preceding a consonant; here we access the sets of +%D Russian vowels as well consonants that were defined earlier. + +\startluacode +-- The й-rule, VйC -> ViC +translit.ru_trsc_irule = {} +for i, vow in ipairs(translit.ru_vowels) do +  for j, cons in ipairs(translit.ru_consonants) do +    local new_ante = vow .. "й" .. cons +    local new_post = vow .. "i" .. cons +    translit.ru_trsc_irule[new_ante] = new_post +  end +end + +translit.tables["German transcription i-rule"] = translit.ru_trsc_irule + +-- The second й-rule, йV -> jV && [иы]йC -> [иы]jC +translit.ru_trsc_jrule = {} +for i, vow in ipairs(translit.ru_vowels) do +  local new_ante = "й" .. vow +  local new_post = "j" .. vow +  translit.ru_trsc_jrule[new_ante] = new_post +end + +translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} +for i, cons in ipairs(translit.ru_consonants) do +  for j, iy in ipairs(translit.ru_trsc_iy) do +    local new_ante = iy .. "й" .. cons +    local new_post = iy .. "j" .. cons +    translit.ru_trsc_jrule[new_ante] = new_post +  end +end + +translit.tables["German transcription j-rule"] = translit.ru_trsc_jrule + +-- The с-rule, VсV -> VssV +translit.ru_trsc_srule = {} +for i, vow_1 in ipairs(translit.ru_vowels) do +for j, vow_2 in ipairs(translit.ru_vowels) do +  local new_ante = vow_1 .. "с" .. vow_2 +  local new_post = vow_1 .. "ss" .. vow_2 +    translit.ru_trsc_srule[new_ante] = new_post +  end +end + +translit.tables["German transcription s-rule"] = translit.ru_trsc_srule + +-- The sharp-s-rule, Vсх -> Vßх +translit.ru_trsc_sharpsrule = {} +for i, vow in ipairs(translit.ru_vowels) do +  local new_ante = vow .. "сх" +  local new_post = vow .. "ßх" +  translit.ru_trsc_sharpsrule[new_ante] = new_post +end + +translit.tables["German transcription sharp-s-rule"] = translit.ru_trsc_sharpsrule + +-- The е-rule, Vе -> Vje +translit.ru_trsc_jerule = {} +for i, vow in ipairs(translit.ru_vowels) do +  local new_ante = vow .. "е" +  local new_post = vow .. "je" +  translit.ru_trsc_jerule[new_ante] = new_post +end + +translit.tables["German transcription je-rule"] = translit.ru_trsc_jerule + +-- The ё-rule, Vё -> Vjo +-- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . +-- Somebody should teach those DUDEN-guys parsimony. +translit.ru_trsc_jorule = {} +for i, vow in ipairs(translit.ru_vowels) do +  local new_ante = vow .. "ё" +  local new_post = vow .. "jo" +  translit.ru_trsc_jorule[new_ante] = new_post +end + +translit.tables["German transcription (redundant) jo-rule"] = translit.ru_trsc_jorule + +\stopluacode + +\startluacode + +--------------------------------------------------------- +-- Lowercase English simple transcription---first pass -- +--------------------------------------------------------- + +translit.ru_trsc_en_low_first = { +  [" е"] = " ye", +  ["ъе"] = "ye", +  ["ье"] = "ye", +  ["ье"] = "ye", +  ["ьи"] = "yi", +} + +translit.tables["English transcription lowercase first pass"] = translit.ru_trsc_en_low_first + +--------------------------------------------------------- +-- Uppercase English simple transcription---first pass -- +--------------------------------------------------------- + +translit.ru_trsc_en_upp_first = { +  [" Е"] = " Ye", +  ["Ъe"] = "Ye", +  ["Ье"] = "Ye", +} + +translit.tables["English transcription uppercase first pass"] = translit.ru_trsc_en_upp_first + +-------------------------------------------- +-- Lowercase English simple transcription -- +-------------------------------------------- + +translit.ru_trsc_en_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "v", +  ["г"] = "g", +  ["д"] = "d", +  ["е"] = "e", +  ["ё"] = "e", +  ["ж"] = "zh", +  ["з"] = "z", +  ["и"] = "i", +  ["й"] = "y", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["у"] = "u", +  ["ф"] = "f", +  ["х"] = "kh", +  ["ц"] = "ts", +  ["ч"] = "ch", +  ["ш"] = "sh", +  ["щ"] = "shsh", +  ["ъ"] = "", +  ["ы"] = "y", +  ["ь"] = "", +  ["э"] = "e", +  ["ю"] = "yu", +  ["я"] = "ya"  +} + +translit.tables["English transcription lowercase second pass"] = translit.ru_trsc_en_low + +-------------------------------------------- +-- Uppercase English simple transcription -- +-------------------------------------------- + +translit.ru_trsc_en_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "V", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Е"] = "E", +  ["Ё"] = "E", +  ["Ж"] = "Zh", +  ["З"] = "Z", +  ["И"] = "I", +  ["Й"] = "Y", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["У"] = "U", +  ["Ф"] = "F", +  ["Х"] = "Kh", +  ["Ц"] = "Ts", +  ["Ч"] = "Ch", +  ["Ш"] = "Sh", +  ["Щ"] = "Shsh", +  ["Ъ"] = "", +  ["Ы"] = "Y", +  ["Ь"] = "", +  ["Э"] = "E", +  ["Ю"] = "Yu", +  ["Я"] = "Ya"  +} + +translit.tables["English transcription uppercase second pass"] = translit.ru_trsc_en_upp + +-- The english е-rule, Vе -> Vye +translit.ru_trsc_en_jerule = {} +for i, vow in ipairs(translit.ru_vowels) do +  local new_ante = vow .. "е" +  local new_post = vow .. "ye" +  translit.ru_trsc_en_jerule[new_ante] = new_post +end + +translit.tables["English transcription ye-rule"] = translit.ru_trsc_en_jerule + +\stopluacode + +\startluacode + +----------------------------------- +-- Lowercase Czech transcription -- +----------------------------------- + +translit.ru_trsc_cz_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "v", +  ["г"] = "g", +  ["д"] = "d", +  ["е"] = "e", +  ["ё"] = "ë", +  ["ж"] = "ž", +  ["з"] = "z", +  ["и"] = "i", +  ["й"] = "j", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["у"] = "u", +  ["ф"] = "f", +  ["х"] = "ch", +  ["ц"] = "c", +  ["ч"] = "č", +  ["ш"] = "š", +  ["щ"] = "šč", +  ["ъ"] = "ъ", +  ["ы"] = "y", +  ["ь"] = "ь", +  ["э"] = "è", +  ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and тя -> ťa, but +  ["я"] = "ja"  -- that would complicate things a bit and linguists might not +}               -- agree. + +translit.tables["Czech transcription lowercase"] = translit.ru_trsc_cz_low + +----------------------------------- +-- Uppercase Czech transcription -- +----------------------------------- + +translit.ru_trsc_cz_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "V", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Е"] = "E", +  ["Ё"] = "Ë", +  ["Ж"] = "Ž", +  ["З"] = "Z", +  ["И"] = "I", +  ["Й"] = "J", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["У"] = "U", +  ["Ф"] = "F", +  ["Х"] = "Ch", +  ["Ц"] = "C", +  ["Ч"] = "Č", +  ["Ш"] = "Š", +  ["Щ"] = "Šč", +  ["Ъ"] = "Ъ", +  ["Ы"] = "Y", +  ["Ь"] = "Ь", +  ["Э"] = "È", +  ["Ю"] = "Ju", +  ["Я"] = "Ja"  +} + +translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp + +---------------------------------------------- +-- Lowercase Additional Czech Transcription -- +---------------------------------------------- + +translit.ru_trsc_cz_add_low = { +  ["ѕ"] = "dz", +  ["і"] = "ï", +  ["ѹ"] = "u", +  ["ѡ"] = "ō", +  ["ѣ"] = "ě", +  ["ѥ"] = "je", +  ["ѧ"] = "ę", +  ["ѩ"] = "ję", +  ["ѫ"] = "ǫ", +  ["ѭ"] = "jǫ", +  ["ѯ"] = "ks", +  ["ѱ"] = "ps", +  ["ѳ"] = "th", +  ["ѵ"] = "ÿ", +} + +translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit.ru_trsc_cz_add_low + + +---------------------------------------------- +-- Uppercase Additional Czech Transcription -- +---------------------------------------------- + +translit.ru_trsc_cz_add_upp = { +  ["Ѕ"] = "Dz", +  ["І"] = "Ï", +  ["Ѹ"] = "U", +  ["Ѡ"] = "Ō", +  ["Ѣ"] = "Ě", +  ["Ѥ"] = "Je", +  ["Ѧ"] = "Ę", +  ["Ѩ"] = "Ję", +  ["Ѫ"] = "Ǫ", +  ["Ѭ"] = "Jǫ", +  ["Ѯ"] = "Ks", +  ["Ѱ"] = "Ps", +  ["Ѳ"] = "Th", +  ["Ѵ"] = "Ÿ", +} + +translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit.ru_trsc_cz_add_upp + +\stopluacode + +%-===========================================================================-- +%-                      Other transliterations                               -- +%-===========================================================================-- + +\startluacode + +-- The following are needed because ISO 9 does not cover old Slavonic +-- characters that became obsolete before the advent of гражданский шрифт. + +-- Please note that these mappings are not bijective so don't expect the result  +-- to be easily revertible (by machines). + +-- Source p. 77 of +-- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf + +----------------------------------------------------------------------------- +-- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- +----------------------------------------------------------------------------- + +translit.ocs_low = { +  ["а"] = "a", +  ["б"] = "b", +  ["в"] = "v", +  ["г"] = "g", +  ["д"] = "d", +  ["є"] = "e", +  ["ж"] = "ž", +  ["ꙃ"] = "ʒ",      -- U+0292, alternative: dz U+01f3 +  ["ѕ"] = "ʒ", +  ["ꙁ"] = "z", +  ["з"] = "z", +  ["и"] = "i", +  ["і"] = "i", +  ["ї"] = "i", +  ["ћ"] = "g’", +  ["к"] = "k", +  ["л"] = "l", +  ["м"] = "m", +  ["н"] = "n", +  ["о"] = "o", +  ["п"] = "p", +  ["р"] = "r", +  ["с"] = "s", +  ["т"] = "t", +  ["оу"] = "u", +  ["ѹ"] = "u", +  ["ꙋ"] = "u", +  ["ф"] = "f", +  ["х"] = "x", +  ["ѡ"] = "o", --"ō", +  ["ѿ"] = "ot",     -- U+047f +  ["ѽ"] = "o!",     -- U+047d +  ["ꙍ"] = "o!",     -- U+064D +  ["ц"] = "c", +  ["ч"] = "č", +  ["ш"] = "š", +  ["щ"] = "št", +  ["ъ"] = "ъ", +  ["ы"] = "y", +  ["ꙑ"] = "y",      -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. +  ["ь"] = "ь", +  ["ѣ"] = "ě", +  ["ю"] = "ju", +  ["ꙗ"] = "ja", +  ["ѥ"] = "je", +  ["ѧ"] = "ę", +  ["ѩ"] = "ję", +  ["ѫ"] = "ǫ", +  ["ѭ"] = "jǫ", +  ["ѯ"] = "ks", +  ["ѱ"] = "ps", +  ["ѳ"] = "th", +  ["ѵ"] = "ü", +} + +translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low + +----------------------------------------------------------------------------- +-- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- +----------------------------------------------------------------------------- + +translit.ocs_upp = { +  ["А"] = "A", +  ["Б"] = "B", +  ["В"] = "V", +  ["Г"] = "G", +  ["Д"] = "D", +  ["Є"] = "E", +  ["Ж"] = "Ž", +  ["Ꙃ"] = "Ʒ",      -- U+01b7, alternative: Dz U+01f2 +  ["Ѕ"] = "Ʒ", +  ["Ꙁ"] = "Z", +  ["З"] = "Z", +  ["И"] = "I", +  ["І"] = "I", +  ["Ї"] = "I", +  ["Ћ"] = "G’", +  ["К"] = "K", +  ["Л"] = "L", +  ["М"] = "M", +  ["Н"] = "N", +  ["О"] = "O", +  ["П"] = "P", +  ["Р"] = "R", +  ["С"] = "S", +  ["Т"] = "T", +  ["Оу"] = "U", +  ["Ѹ"] = "U", +  ["ꙋ"] = "U", +  ["Ф"] = "F", +  ["Х"] = "X", +  ["Ѡ"] = "Ō", +  ["Ѿ"] = "Ot",     -- U+047c +  ["Ѽ"] = "O!",     -- U+047e +  ["Ꙍ"] = "O!",     -- U+064C +  ["Ц"] = "C", +  ["Ч"] = "Č", +  ["Ш"] = "Š", +  ["Щ"] = "Št", +  ["Ъ"] = "Ŭ", +  ["Ы"] = "Y", +  ["Ꙑ"] = "Y",  -- U+a650 +  ["Ь"] = "Ĭ", +  ["Ѣ"] = "Ě", +  ["Ю"] = "Ju", +  ["Ꙗ"] = "Ja", +  ["Ѥ"] = "Je", +  ["Ѧ"] = "Ę", +  ["Ѩ"] = "Ję", +  ["Ѫ"] = "Ǫ", +  ["Ѭ"] = "Jǫ", +  ["Ѯ"] = "Ks", +  ["Ѱ"] = "Ps", +  ["Ѳ"] = "Th", +  ["Ѵ"] = "Ü", +} + +translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp + +-- Note on the additional tables: these cover characters that are not defined +-- in ISO 9 but have a “scientific” transliteration.  You may use them as +-- complementary mapping to ISO 9, trading off homogenity for completeness. + +---------------------------------------------------------------------------------------- +-- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- +---------------------------------------------------------------------------------------- + +translit.ocs_add_low = { +  ["ѕ"] = "dz",         -- Mapped to ẑ in ISO 9 (Macedonian …) +  ["ѯ"] = "ks", +  ["ѱ"] = "ps", +  ["ѡ"] = "ô", +  ["ѫ"] = "ǫ",          -- Mapped to ǎ in ISO 9. +  ["ѧ"] = "ę", +  ["ѭ"] = "jǫ", +  ["ѩ"] = "ję", +  ["ѥ"] = "je", +  ["ѹ"] = "u",          -- Digraph uk. +  ["ꙋ"] = "u",          -- Monograph uk, U+a64b.  (No glyph yet in the "fixed" font in February 2010 …) +  ["ꙑ"] = "y",          -- U+a651 +} + +translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low + +---------------------------------------------------------------------------------------- +-- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- +---------------------------------------------------------------------------------------- + +translit.ocs_add_upp = { +  ["Ѕ"] = "Dz", +  ["Ѯ"] = "Ks", +  ["Ѱ"] = "Ps", +  ["Ѡ"] = "Ô", +  ["Ѫ"] = "Ǫ", +  ["Ѧ"] = "Ę", +  ["Ѭ"] = "Jǫ", +  ["Ѩ"] = "Ję", +  ["Ѥ"] = "Je", +  ["Ѹ"] = "U",          -- Digraph uk. +  ["Ꙋ"] = "U",          -- Monograph Uk, U+a64a. +  ["Ꙑ"] = "Y",  -- U+a650 +} + +translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp + + +\stopluacode + +%-===========================================================================-- +%-                              Glagolica                                    -- +%-===========================================================================-- + +\startluacode + +------------------------------------------- +-- Lowercase Glagolitic Transliteration  -- +------------------------------------------- + +translit.ocs_gla_low = { +  ["ⰰ"] = "a",  -- GLAGOLITIC SMALL LETTER AZU +  ["ⰱ"] = "b",  -- GLAGOLITIC SMALL LETTER BUKY +  ["ⰲ"] = "v",  -- GLAGOLITIC SMALL LETTER VEDE +  ["ⰳ"] = "g",  -- GLAGOLITIC SMALL LETTER GLAGOLI +  ["ⰴ"] = "d",  -- GLAGOLITIC SMALL LETTER DOBRO +  ["ⰵ"] = "e",  -- GLAGOLITIC SMALL LETTER YESTU +  ["ⰶ"] = "ž",  -- GLAGOLITIC SMALL LETTER ZHIVETE +  ["ⰷ"] = "ʒ",  -- GLAGOLITIC SMALL LETTER DZELO +  ["ⰸ"] = "z",  -- GLAGOLITIC SMALL LETTER ZEMLJA +  ["ⰹ"] = "i",  -- GLAGOLITIC SMALL LETTER IZHE +  ["ⰺ"] = "i",  -- GLAGOLITIC SMALL LETTER INITIAL IZHE +  ["ⰻ"] = "i",  -- GLAGOLITIC SMALL LETTER I +  ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI +  ["ⰽ"] = "k",  -- GLAGOLITIC SMALL LETTER KAKO +  ["ⰾ"] = "l",  -- GLAGOLITIC SMALL LETTER LJUDIJE +  ["ⰿ"] = "m",  -- GLAGOLITIC SMALL LETTER MYSLITE +  ["ⱀ"] = "n",  -- GLAGOLITIC SMALL LETTER NASHI +  ["ⱁ"] = "o",  -- GLAGOLITIC SMALL LETTER ONU +  ["ⱂ"] = "p",  -- GLAGOLITIC SMALL LETTER POKOJI +  ["ⱃ"] = "r",  -- GLAGOLITIC SMALL LETTER RITSI +  ["ⱄ"] = "s",  -- GLAGOLITIC SMALL LETTER SLOVO +  ["ⱅ"] = "t",  -- GLAGOLITIC SMALL LETTER TVRIDO +  ["ⱆ"] = "u",  -- GLAGOLITIC SMALL LETTER UKU +  ["ⱇ"] = "f",  -- GLAGOLITIC SMALL LETTER FRITU +  ["ⱈ"] = "x",  -- GLAGOLITIC SMALL LETTER HERU +  ["ⱉ"] = "o",  -- GLAGOLITIC SMALL LETTER OTU +  ["ⱊ"] = "?",  -- GLAGOLITIC SMALL LETTER PE +  ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA +  ["ⱌ"] = "c",  -- GLAGOLITIC SMALL LETTER TSI +  ["ⱍ"] = "č",  -- GLAGOLITIC SMALL LETTER CHRIVI +  ["ⱎ"] = "š",  -- GLAGOLITIC SMALL LETTER SHA +  ["ⱏ"] = "ъ",  -- GLAGOLITIC SMALL LETTER YERU +  ["ⱐ"] = "ь",  -- GLAGOLITIC SMALL LETTER YERI +  ["ⱑ"] = "ě",  -- GLAGOLITIC SMALL LETTER YATI +  ["ⱒ"] = "x",  -- GLAGOLITIC SMALL LETTER SPIDERY HA +  ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU +  ["ⱔ"] = "ę",  -- GLAGOLITIC SMALL LETTER SMALL YUS +  ["ⱕ"] = "y̨",  -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL  +  ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO +  ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU +  ["ⱘ"] = "ǫ",  -- GLAGOLITIC SMALL LETTER BIG YUS +  ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS +  ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA +  ["ⱛ"] = "ü",  -- GLAGOLITIC SMALL LETTER IZHITSA +  ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC +  ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A +  ["ⱞ"] = "m",  -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE +} + +translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low + +------------------------------------------------ +-- Uppercase (?!) Glagolitic Transliteration  -- +------------------------------------------------ + +translit.ocs_gla_upp = { +  ["Ⰰ"] = "A",  -- GLAGOLITIC CAPITAL LETTER AZU +  ["Ⰱ"] = "B",  -- GLAGOLITIC CAPITAL LETTER BUKY +  ["Ⰲ"] = "V",  -- GLAGOLITIC CAPITAL LETTER VEDE +  ["Ⰳ"] = "G",  -- GLAGOLITIC CAPITAL LETTER GLAGOLI +  ["Ⰴ"] = "D",  -- GLAGOLITIC CAPITAL LETTER DOBRO +  ["Ⰵ"] = "E",  -- GLAGOLITIC CAPITAL LETTER YESTU +  ["Ⰶ"] = "Ž",  -- GLAGOLITIC CAPITAL LETTER ZHIVETE +  ["Ⰷ"] = "Ʒ",  -- GLAGOLITIC CAPITAL LETTER DZELO +  ["Ⰸ"] = "Z",  -- GLAGOLITIC CAPITAL LETTER ZEMLJA +  ["Ⰹ"] = "I",  -- GLAGOLITIC CAPITAL LETTER IZHE +  ["Ⰺ"] = "I",  -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE +  ["Ⰻ"] = "I",  -- GLAGOLITIC CAPITAL LETTER I +  ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI +  ["Ⰽ"] = "K",  -- GLAGOLITIC CAPITAL LETTER KAKO +  ["Ⰾ"] = "L",  -- GLAGOLITIC CAPITAL LETTER LJUDIJE +  ["Ⰿ"] = "M",  -- GLAGOLITIC CAPITAL LETTER MYSLITE +  ["Ⱀ"] = "N",  -- GLAGOLITIC CAPITAL LETTER NASHI +  ["Ⱁ"] = "O",  -- GLAGOLITIC CAPITAL LETTER ONU +  ["Ⱂ"] = "P",  -- GLAGOLITIC CAPITAL LETTER POKOJI +  ["Ⱃ"] = "R",  -- GLAGOLITIC CAPITAL LETTER RITSI +  ["Ⱄ"] = "S",  -- GLAGOLITIC CAPITAL LETTER SLOVO +  ["Ⱅ"] = "T",  -- GLAGOLITIC CAPITAL LETTER TVRIDO +  ["Ⱆ"] = "U",  -- GLAGOLITIC CAPITAL LETTER UKU +  ["Ⱇ"] = "F",  -- GLAGOLITIC CAPITAL LETTER FRITU +  ["Ⱈ"] = "X",  -- GLAGOLITIC CAPITAL LETTER HERU +  ["Ⱉ"] = "O",  -- GLAGOLITIC CAPITAL LETTER OTU +  ["Ⱊ"] = "?",  -- GLAGOLITIC CAPITAL LETTER PE +  ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA +  ["Ⱌ"] = "C",  -- GLAGOLITIC CAPITAL LETTER TSI +  ["Ⱍ"] = "Č",  -- GLAGOLITIC CAPITAL LETTER CHRIVI +  ["Ⱎ"] = "Š",  -- GLAGOLITIC CAPITAL LETTER SHA +  ["Ⱏ"] = "Ъ",  -- GLAGOLITIC CAPITAL LETTER YERU +  ["Ⱐ"] = "Ь",  -- GLAGOLITIC CAPITAL LETTER YERI +  ["Ⱑ"] = "Ě",  -- GLAGOLITIC CAPITAL LETTER YATI +  ["Ⱒ"] = "X",  -- GLAGOLITIC CAPITAL LETTER SPIDERY HA +  ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU +  ["Ⱔ"] = "Ę",  -- GLAGOLITIC CAPITAL LETTER SMALL YUS +  ["Ⱕ"] = "Y̨",  -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +  ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO +  ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +  ["Ⱘ"] = "Ǫ",  -- GLAGOLITIC CAPITAL LETTER BIG YUS +  ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +  ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA +  ["Ⱛ"] = "Ü",  -- GLAGOLITIC CAPITAL LETTER IZHITSA +  ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC +  ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A +  ["Ⱞ"] = "M",  -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT +} + +translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp + +\stopluacode + +%-===========================================================================-- +%-                              Greek                                        -- +%-===========================================================================-- + +\startluacode + +-- Note that the Greek transliteration mapping isn't bijective so transliterated +-- texts won't be reversible.  (Shouldn't be impossible to make one up using +-- diacritics on latin characters to represent all possible combinations of +-- Greek breathings + accents.)  + +-- Good reading on composed / precombined unicode: +--  http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed + +------------------------------------------------- +-- Lowercase Greek Initial Position Diphthongs -- +------------------------------------------------- + +translit.gr_di_in_low = { +  [" αὑ"] = " hau", +  [" αὕ"] = " hau", +  [" αὓ"] = " hau", +  [" αὗ"] = " hau", +  [" εὑ"] = " heu", +  [" εὕ"] = " heu", +  [" εὓ"] = " heu", +  [" εὗ"] = " heu", +  [" ηὑ"] = " hēu", +  [" ηὕ"] = " hēu", +  [" ηὓ"] = " hēu", +  [" ηὗ"] = " hēu", +  [" οὑ"] = " hu", +  [" οὕ"] = " hu", +  [" οὓ"] = " hu", +  [" οὗ"] = " hu", +  [" ωὑ"] = " hōu", +  [" ωὕ"] = " hōu", +  [" ωὓ"] = " hōu", +  [" ωὗ"] = " hōu" +} + +translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low + +------------------------------------------------- +-- Uppercase Greek Initial Position Diphthongs -- +------------------------------------------------- + +translit.gr_di_in_upp = { +  [" Αὑ"] = " Hau", +  [" Αὕ"] = " Hau", +  [" Αὓ"] = " Hau", +  [" Αὗ"] = " Hau", +  [" Εὑ"] = " Heu", +  [" Εὕ"] = " Heu", +  [" Εὓ"] = " Heu", +  [" Εὗ"] = " Heu", +  [" Ηὑ"] = " Hēu", +  [" Ηὕ"] = " Hēu", +  [" Ηὓ"] = " Hēu", +  [" Ηὗ"] = " Hēu", +  [" Οὑ"] = " Hu", +  [" Οὕ"] = " Hu", +  [" Οὓ"] = " Hu", +  [" Οὗ"] = " Hu", +  [" Ωὑ"] = " Hōu", +  [" Ωὕ"] = " Hōu", +  [" Ωὓ"] = " Hōu", +  [" Ωὗ"] = " Hōu" +} + +translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp + +--------------------------------------- +-- Lowercase Greek Initial Position  -- +--------------------------------------- + +translit.gr_in_low = { +  [" ἁ"] = " ha", +  [" ἅ"] = " ha", +  [" ἃ"] = " ha", +  [" ἇ"] = " ha", +  [" ᾁ"] = " ha", +  [" ᾅ"] = " ha", +  [" ᾃ"] = " ha", +  [" ᾇ"] = " ha", +  [" ἑ"] = " he", +  [" ἕ"] = " he", +  [" ἓ"] = " he", +  [" ἡ"] = " hē", +  [" ἥ"] = " hē", +  [" ἣ"] = " hē", +  [" ἧ"] = " hē", +  [" ᾑ"] = " hē", +  [" ᾕ"] = " hē", +  [" ᾓ"] = " hē", +  [" ᾗ"] = " hē", +  [" ἱ"] = " hi", +  [" ἵ"] = " hi", +  [" ἳ"] = " hi", +  [" ἷ"] = " hi", +  [" ὁ"] = " ho", +  [" ὅ"] = " ho", +  [" ὃ"] = " ho", +  [" ὑ"] = " hy", +  [" ὕ"] = " hy", +  [" ὓ"] = " hy", +  [" ὗ"] = " hy", +  [" ὡ"] = " hō", +  [" ὥ"] = " hō", +  [" ὣ"] = " hō", +  [" ὧ"] = " hō", +  [" ᾡ"] = " hō", +  [" ᾥ"] = " hō", +  [" ᾣ"] = " hō", +  [" ᾧ"] = " hō", +} + +translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low + +--------------------------------------- +-- Uppercase Greek Initial Position  -- +--------------------------------------- + +translit.gr_in_upp = { +  [" Ἁ"] = " Ha", +  [" Ἅ"] = " Ha", +  [" Ἃ"] = " Ha", +  [" Ἇ"] = " Ha", +  [" ᾉ"] = " Ha", +  [" ᾍ"] = " Ha", +  [" ᾋ"] = " Ha", +  [" ᾏ"] = " Ha", +  [" Ἑ"] = " He", +  [" Ἕ"] = " He", +  [" Ἓ"] = " He", +  [" Ἡ"] = " Hē", +  [" Ἥ"] = " Hē", +  [" Ἣ"] = " Hē", +  [" Ἧ"] = " Hē", +  [" ᾙ"] = " Hē", +  [" ᾝ"] = " Hē", +  [" ᾛ"] = " Hē", +  [" ᾟ"] = " Hē", +  [" Ἱ"] = " Hi", +  [" Ἵ"] = " Hi", +  [" Ἳ"] = " Hi", +  [" Ἷ"] = " Hi", +  [" Ὁ"] = " Ho", +  [" Ὅ"] = " Ho", +  [" Ὃ"] = " Ho", +  [" Ὑ"] = " Hy", +  [" Ὕ"] = " Hy", +  [" Ὓ"] = " Hy", +  [" Ὗ"] = " Hy", +  [" Ὡ"] = " Hō", +  [" Ὥ"] = " Hō", +  [" Ὣ"] = " Hō", +  [" Ὧ"] = " Hō", +  [" ᾩ"] = " Hō", +  [" ᾭ"] = " Hō", +  [" ᾫ"] = " Hō", +  [" ᾯ"] = " Hō", +} + +translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp + +--------------------------------- +-- Lowercase Greek Diphthongs  -- +--------------------------------- + +translit.gr_di_low = { +  ["αυ"] = "au", +  ["αύ"] = "au", +  ["αὺ"] = "au", +  ["αῦ"] = "au", +  ["αὐ"] = "au", +  ["αὔ"] = "au", +  ["αὒ"] = "au", +  ["αὖ"] = "au", +  ["αὑ"] = "au", +  ["αὕ"] = "au", +  ["αὓ"] = "au", +  ["αὗ"] = "au", +  ["ευ"] = "eu", +  ["εύ"] = "eu", +  ["εὺ"] = "eu", +  ["εῦ"] = "eu", +  ["εὐ"] = "eu", +  ["εὔ"] = "eu", +  ["εὒ"] = "eu", +  ["εὖ"] = "eu", +  ["εὑ"] = "eu", +  ["εὕ"] = "eu", +  ["εὓ"] = "eu", +  ["εὗ"] = "eu", +  ["ηυ"] = "ēu", +  ["ηύ"] = "ēu", +  ["ηὺ"] = "ēu", +  ["ηῦ"] = "ēu", +  ["ηὐ"] = "ēu", +  ["ηὔ"] = "ēu", +  ["ηὒ"] = "ēu", +  ["ηὖ"] = "ēu", +  ["ηὑ"] = "ēu", +  ["ηὕ"] = "ēu", +  ["ηὓ"] = "ēu", +  ["ηὗ"] = "ēu", +  ["ου"] = "u", +  ["ου"] = "u", +  ["ου"] = "u", +  ["ού"] = "u", +  ["οὺ"] = "u", +  ["οῦ"] = "u", +  ["οὐ"] = "u", +  ["οὔ"] = "u", +  ["οὒ"] = "u", +  ["οὖ"] = "u", +  ["οὑ"] = "u", +  ["οὕ"] = "u", +  ["οὓ"] = "u", +  ["οὗ"] = "u", +  ["ωυ"] = "ōu", +  ["ωύ"] = "ōu", +  ["ωὺ"] = "ōu", +  ["ωῦ"] = "ōu", +  ["ωὐ"] = "ōu", +  ["ωὔ"] = "ōu", +  ["ωὒ"] = "ōu", +  ["ωὖ"] = "ōu", +  ["ωὑ"] = "ōu", +  ["ωὕ"] = "ōu", +  ["ωὓ"] = "ōu", +  ["ωὗ"] = "ōu", +  ["ῤῥ"] = "rrh", +} + +translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low + +--------------------------------- +-- Uppercase Greek Diphthongs  -- +--------------------------------- + +translit.gr_di_upp = { +  ["Αυ"] = "Au", +  ["Αύ"] = "Au", +  ["Αὺ"] = "Au", +  ["Αῦ"] = "Au", +  ["Αὐ"] = "Au", +  ["Αὔ"] = "Au", +  ["Αὒ"] = "Au", +  ["Αὖ"] = "Au", +  ["Αὑ"] = "Au", +  ["Αὕ"] = "Au", +  ["Αὓ"] = "Au", +  ["Αὗ"] = "Au", +  ["Ευ"] = "Eu", +  ["Εύ"] = "Eu", +  ["Εὺ"] = "Eu", +  ["Εῦ"] = "Eu", +  ["Εὐ"] = "Eu", +  ["Εὔ"] = "Eu", +  ["Εὒ"] = "Eu", +  ["Εὖ"] = "Eu", +  ["Εὑ"] = "Eu", +  ["Εὕ"] = "Eu", +  ["Εὓ"] = "Eu", +  ["Εὗ"] = "Eu", +  ["Ηυ"] = "Ēu", +  ["Ηύ"] = "Ēu", +  ["Ηὺ"] = "Ēu", +  ["Ηῦ"] = "Ēu", +  ["Ηὐ"] = "Ēu", +  ["Ηὔ"] = "Ēu", +  ["Ηὒ"] = "Ēu", +  ["Ηὖ"] = "Ēu", +  ["Ηὑ"] = "Ēu", +  ["Ηὕ"] = "Ēu", +  ["Ηὓ"] = "Ēu", +  ["Ηὗ"] = "Ēu", +  ["Ου"] = "U", +  ["Ου"] = "U", +  ["Ου"] = "U", +  ["Ού"] = "U", +  ["Οὺ"] = "U", +  ["Οῦ"] = "U", +  ["Οὐ"] = "U", +  ["Οὔ"] = "U", +  ["Οὒ"] = "U", +  ["Οὖ"] = "U", +  ["Οὑ"] = "U", +  ["Οὕ"] = "U", +  ["Οὓ"] = "U", +  ["Οὗ"] = "U", +  ["Ωυ"] = "Ōu", +  ["Ωύ"] = "Ōu", +  ["Ωὺ"] = "Ōu", +  ["Ωῦ"] = "Ōu", +  ["Ωὐ"] = "Ōu", +  ["Ωὔ"] = "Ōu", +  ["Ωὒ"] = "Ōu", +  ["Ωὖ"] = "Ōu", +  ["Ωὑ"] = "Ōu", +  ["Ωὕ"] = "Ōu", +  ["Ωὓ"] = "Ōu", +  ["Ωὗ"] = "Ōu", +} + +translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp + +-- The following will be used in an option that ensures transcription of +-- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) +translit.gr_nrule = { +  ["γγ"] = "ng", +  ["γκ"] = "nk", +  ["γξ"] = "nx", +  ["γχ"] = "nch", +} + +translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule + +\stopluacode + +\startluacode + +-------------------------------------- +-- Lowercase Greek Transliteration  -- +-------------------------------------- + +translit.gr_low = { +  ["α"] = "a", +  ["ά"] = "a", +  ["ὰ"] = "a", +  ["ᾶ"] = "a", +  ["ᾳ"] = "a", +  ["ἀ"] = "a", +  ["ἁ"] = "a", +  ["ἄ"] = "a", +  ["ἂ"] = "a", +  ["ἆ"] = "a", +  ["ἁ"] = "a", +  ["ἅ"] = "a", +  ["ἃ"] = "a", +  ["ἇ"] = "a", +  ["ᾁ"] = "a", +  ["ᾴ"] = "a", +  ["ᾲ"] = "a", +  ["ᾷ"] = "a", +  ["ᾄ"] = "a", +  ["ᾂ"] = "a", +  ["ᾅ"] = "a", +  ["ᾃ"] = "a", +  ["ᾆ"] = "a", +  ["ᾇ"] = "a", +  ["β"] = "b", +  ["γ"] = "g", +  ["δ"] = "d", +  ["ε"] = "e", +  ["έ"] = "e", +  ["ὲ"] = "e", +  ["ἐ"] = "e", +  ["ἔ"] = "e", +  ["ἒ"] = "e", +  ["ἑ"] = "e", +  ["ἕ"] = "e", +  ["ἓ"] = "e", +  ["ζ"] = "z", +  ["η"] = "ē", +  ["η"] = "ē", +  ["ή"] = "ē", +  ["ὴ"] = "ē", +  ["ῆ"] = "ē", +  ["ῃ"] = "ē", +  ["ἠ"] = "ē", +  ["ἤ"] = "ē", +  ["ἢ"] = "ē", +  ["ἦ"] = "ē", +  ["ᾐ"] = "ē", +  ["ἡ"] = "ē", +  ["ἥ"] = "ē", +  ["ἣ"] = "ē", +  ["ἧ"] = "ē", +  ["ᾑ"] = "ē", +  ["ῄ"] = "ē", +  ["ῂ"] = "ē", +  ["ῇ"] = "ē", +  ["ᾔ"] = "ē", +  ["ᾒ"] = "ē", +  ["ᾕ"] = "ē", +  ["ᾓ"] = "ē", +  ["ᾖ"] = "ē", +  ["ᾗ"] = "ē", +  ["θ"] = "th", +  ["ι"] = "i", +  ["ί"] = "i", +  ["ὶ"] = "i", +  ["ῖ"] = "i", +  ["ἰ"] = "i", +  ["ἴ"] = "i", +  ["ἲ"] = "i", +  ["ἶ"] = "i", +  ["ἱ"] = "i", +  ["ἵ"] = "i", +  ["ἳ"] = "i", +  ["ἷ"] = "i", +  ["ϊ"] = "i", +  ["ΐ"] = "i", +  ["ῒ"] = "i", +  ["ῗ"] = "i", +  ["κ"] = "k", +  ["λ"] = "l", +  ["μ"] = "m", +  ["ν"] = "n", +  ["ξ"] = "x", +  ["ο"] = "o", +  ["ό"] = "o", +  ["ὸ"] = "o", +  ["ὀ"] = "o", +  ["ὄ"] = "o", +  ["ὂ"] = "o", +  ["ὁ"] = "o", +  ["ὅ"] = "o", +  ["ὃ"] = "o", +  ["π"] = "p", +  ["ρ"] = "r", +  ["ῤ"] = "r", +  ["ῥ"] = "rh", +  ["σ"] = "s", +  ["ς"] = "s", +  ["τ"] = "t", +  ["υ"] = "y", +  ["ύ"] = "y", +  ["ὺ"] = "y", +  ["ῦ"] = "y", +  ["ὐ"] = "y", +  ["ὔ"] = "y", +  ["ὒ"] = "y", +  ["ὖ"] = "y", +  ["ὑ"] = "y", +  ["ὕ"] = "y", +  ["ὓ"] = "y", +  ["ὗ"] = "y", +  ["ϋ"] = "y", +  ["ΰ"] = "y", +  ["ῢ"] = "y", +  ["ῧ"] = "y", +  ["φ"] = "ph", +  ["χ"] = "ch", +  ["ψ"] = "ps", +  ["ω"] = "ō", +  ["ώ"] = "ō", +  ["ὼ"] = "ō", +  ["ῶ"] = "ō", +  ["ῳ"] = "ō", +  ["ὠ"] = "ō", +  ["ὤ"] = "ō", +  ["ὢ"] = "ō", +  ["ὦ"] = "ō", +  ["ᾠ"] = "ō", +  ["ὡ"] = "ō", +  ["ὥ"] = "ō", +  ["ὣ"] = "ō", +  ["ὧ"] = "ō", +  ["ᾡ"] = "ō", +  ["ῴ"] = "ō", +  ["ῲ"] = "ō", +  ["ῷ"] = "ō", +  ["ᾤ"] = "ō", +  ["ᾢ"] = "ō", +  ["ᾥ"] = "ō", +  ["ᾣ"] = "ō", +  ["ᾦ"] = "ō", +  ["ᾧ"] = "ō", +} + +translit.tables["Greek transliteration lowercase"] = translit.gr_low + +-------------------------------------- +-- Uppercase Greek Transliteration  -- +-------------------------------------- + +translit.gr_upp = { +  ["Α"] = "A", +  ["Ά"] = "A", +  ["Ὰ"] = "A", +--["ᾶ"] = "A", +  ["ᾼ"] = "A", +  ["Ἀ"] = "A", +  ["Ἁ"] = "A", +  ["Ἄ"] = "A", +  ["Ἂ"] = "A", +  ["Ἆ"] = "A", +  ["Ἁ"] = "A", +  ["Ἅ"] = "A", +  ["Ἃ"] = "A", +  ["Ἇ"] = "A", +  ["ᾉ"] = "A", +--["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me +--["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript +--["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. +  ["ᾌ"] = "A", +  ["ᾊ"] = "A", +  ["ᾍ"] = "A", +  ["ᾋ"] = "A", +  ["ᾎ"] = "A", +  ["ᾏ"] = "A", +  ["Β"] = "B", +  ["Γ"] = "G", +  ["Δ"] = "D", +  ["Ε"] = "E", +  ["Έ"] = "E", +  ["Ὲ"] = "E", +  ["Ἐ"] = "E", +  ["Ἔ"] = "E", +  ["Ἒ"] = "E", +  ["Ἑ"] = "E", +  ["Ἕ"] = "E", +  ["Ἓ"] = "E", +  ["Ζ"] = "Z", +  ["Η"] = "Ē", +  ["Η"] = "Ē", +  ["Ή"] = "Ē", +  ["Ὴ"] = "Ē", +--["ῆ"] = "Ē", +  ["ῌ"] = "Ē", +  ["Ἠ"] = "Ē", +  ["Ἤ"] = "Ē", +  ["Ἢ"] = "Ē", +  ["Ἦ"] = "Ē", +  ["ᾘ"] = "Ē", +  ["Ἡ"] = "Ē", +  ["Ἥ"] = "Ē", +  ["Ἣ"] = "Ē", +  ["Ἧ"] = "Ē", +  ["ᾙ"] = "Ē", +--["ῄ"] = "Ē", +--["ῂ"] = "Ē", +--["ῇ"] = "Ē", +  ["ᾜ"] = "Ē", +  ["ᾚ"] = "Ē", +  ["ᾝ"] = "Ē", +  ["ᾛ"] = "Ē", +  ["ᾞ"] = "Ē", +  ["ᾟ"] = "Ē", +  ["Θ"] = "Th", +  ["Ι"] = "I", +  ["Ί"] = "I", +  ["Ὶ"] = "I", +--["ῖ"] = "I", +  ["Ἰ"] = "I", +  ["Ἴ"] = "I", +  ["Ἲ"] = "I", +  ["Ἶ"] = "I", +  ["Ἱ"] = "I", +  ["Ἵ"] = "I", +  ["Ἳ"] = "I", +  ["Ἷ"] = "I", +  ["Ϊ"] = "I", +--["ΐ"] = "I", +--["ῒ"] = "I", +--["ῗ"] = "I", +  ["Κ"] = "K", +  ["Λ"] = "L", +  ["Μ"] = "M", +  ["Ν"] = "N", +  ["Ξ"] = "X", +  ["Ο"] = "O", +  ["Ό"] = "O", +  ["Ὸ"] = "O", +  ["Ὀ"] = "O", +  ["Ὄ"] = "O", +  ["Ὂ"] = "O", +  ["Ὁ"] = "O", +  ["Ὅ"] = "O", +  ["Ὃ"] = "O", +  ["Π"] = "P", +  ["Ρ"] = "R", +--["ῤ"] = "R", +  ["Ῥ"] = "Rh", +  ["Σ"] = "S", +  ["Σ"] = "S", +  ["Τ"] = "T", +  ["Υ"] = "Y", +  ["Ύ"] = "Y", +  ["Ὺ"] = "Y", +--["ῦ"] = "Y", +--["ὐ"] = "Y", +--["ὔ"] = "Y", +--["ὒ"] = "Y", +--["ὖ"] = "Y", +  ["Ὑ"] = "Y", +  ["Ὕ"] = "Y", +  ["Ὓ"] = "Y", +  ["Ὗ"] = "Y", +  ["Ϋ"] = "Y", +--["ΰ"] = "Y", +--["ῢ"] = "Y", +--["ῧ"] = "Y", +  ["Φ"] = "Ph", +  ["Χ"] = "Ch", +  ["Ψ"] = "Ps", +  ["Ω"] = "Ō", +  ["Ώ"] = "Ō", +  ["Ὼ"] = "Ō", +--["ῶ"] = "Ō", +  ["ῼ"] = "Ō", +  ["Ὠ"] = "Ō", +  ["Ὤ"] = "Ō", +  ["Ὢ"] = "Ō", +  ["Ὦ"] = "Ō", +  ["ᾨ"] = "Ō", +  ["Ὡ"] = "Ō", +  ["Ὥ"] = "Ō", +  ["Ὣ"] = "Ō", +  ["Ὧ"] = "Ō", +  ["ᾩ"] = "Ō", +--["ῴ"] = "Ō", +--["ῲ"] = "Ō", +--["ῷ"] = "Ō", +  ["ᾬ"] = "Ō", +  ["ᾪ"] = "Ō", +  ["ᾭ"] = "Ō", +  ["ᾫ"] = "Ō", +  ["ᾮ"] = "Ō", +  ["ᾯ"] = "Ō", +} + +translit.tables["Greek transliteration uppercase"] = translit.gr_upp + +------------ +-- Varia  -- +------------ + +translit.gr_other = { +  ["ϝ"] = "w", +  ["Ϝ"] = "W", +  ["ϙ"] = "q", +  ["Ϙ"] = "Q", +  ["ϡ"] = "ss", +  ["Ϡ"] = "Ss", +} + +translit.tables["Greek transliteration archaic characters"] = translit.gr_other + +\stopluacode + +%-===========================================================================-- +%-                              End Of Tables                                -- +%-===========================================================================-- + +%D The function \type{translit.subst(s, t)} is used to replace any key of +%D \type{t} that occurs in \type{s} with the according value of \type{t}. + +\startluacode + +function translit.subst (text, tab) +  for symbol, replacement in pairs(tab) do +    text = utf.gsub(text, symbol, replacement) +  end +  return text +end + +\stopluacode +%D \type{translit.add_table(t, ta)} is used to build the final substitution tables +%D from those we defined earlier; any keys in the previous table \type{t} are +%D overwritten if they exist in the added table \type{ta}, too. +\startluacode + +function translit.add_table (t, t_add) +  for key, value in pairs (t_add) do +    t[key] = value +  end +  return t +end + +\stopluacode +%D We might want to have all the table data nicely formatted by \CONTEXT\  +%D itself, here's how we'll do it.  \type{translit.show_tab(t)} handles a +%D single table \type{t}, builds a Natural TABLE out of its content and +%D hands it down to the machine for typesetting.  For debugging purposes it +%D does not only print the replacement pairs but shows their code points as +%D well. +\startluacode + +function translit.show_tab (tab) +  -- Output a transliteration table, nicely formatted with natural tables. +  local cnt = 0 +  context ("\\setupTABLE[r][each]   [style=\\tfx,align=center] ") +  context ("\\setupTABLE[c][each]   [frame=off]") +  context ("\\setupTABLE[r][each]   [frame=off]") +  context ("\\setupTABLE[c][first]  [style=italic]") +  context ("\\setupTABLE[r][first]  [style=bold,topframe=on,bottomframe=on]") +  context ("\\setupTABLE[r][last]   [style=bold,topframe=on,bottomframe=on]") +  context ("\\bTABLE [split=yes,option=stretch]") +  context ("\\bTABLEhead\\bTR".. +    "\\bTH Number\\eTH" .. +    "\\bTH letters\\eTH" ..  +    "\\bTH n\\eTH" ..  +    "\\bTH replacement\\eTH".. +    "\\bTH n\\eTH" ..  +    "\\bTH bytes\\eTH".. +    "\\bTH repl. bytes".. +    "\\eTH\\eTR\\eTABLEhead ") +  context("\\bTABLEbody") +  for key, val in pairs(tab) do +    local strempty = function (s)  +      -- Some characters might not be replaced but removed, others might be +      -- multi-char sequences. +      if #s == 0 then return "nil" +      else  +        local i = 0 +        local r = "" +        -- The following loop could be replaced by checking the string length with utf.len(s) … +        repeat +          i = i + 1 +          if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end +        until (false) +        return r +      end +    end +    cnt = cnt + 1 +    context ("\\bTR\\bTC " .. cnt .. "\\eTC") +    context ("\\bTC " ..  +      key .. "\\eTC\\bTC " ..  +      utf.len(key) .. "\\eTC\\bTC " ..  +      val .. "\\eTC\\bTC " ..   +      utf.len(val) .. "\\eTC\\bTC " ..   +      strempty(key) .. "\\eTC\\bTC " ..   +      strempty(val) .. "\\eTC") +    context ("\\eTR ") +  end +  context("\\eTABLEbody") +  context ("\\bTABLEfoot\\bTR".. +    "\\bTC Number\\eTC" .. +    "\\bTC letters\\eTC" ..  +    "\\bTC n\\eTC" ..  +    "\\bTC replacement\\eTC".. +    "\\bTC n\\eTC" ..  +    "\\bTC bytes\\eTC".. +    "\\bTC repl. bytes".. +    "\\eTC\\eTR\\eTABLEfoot ") +  context ("\\eTABLE ") +end + +\stopluacode +%D Having to pick out single tables for printing can be tedious, therefore we +%D let Lua do the job in our stead.  \type{translit.show_all_tabs()} calls +%D \type{translit.show_tab} on every table that is registered with +%D \type{translit.table} -- and uses its registered key as table heading. +\startluacode + +function translit.show_all_tabs () +  -- Output all translation tables that are registered within translit.tables. +  -- This will be quite unordered.  +  context ("\\chapter{Transliterator Showing All Tables}") +  for key, val in pairs(translit.tables) do +    context ("\\section{" .. key .. "}") +    translit.show_tab (val) +  end +end + +\stopluacode +%D Now to the function \type{translit.transliterate(m, t)}: it constitutes the +%D metafunction that is called by the \type{\transliterate} command and itself +%D chooses the substitution tables according to the \type{m} parameter and +%D applies them in a given order on the string \type{m}. +%D (The correct order in multi-pass substitution has to be enforced because the +%D tables are in fact unordered dictionaries.) +\startluacode + +function translit.transliterate (method, text) +  local repl_tab = {} +  if method == "ru" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low) +  elseif method == "ru_transcript_de" then +    text = translit.subst (text, translit.ru_trsc_jrule) +    text = translit.subst (text, translit.ru_trsc_irule) +    text = translit.subst (text, translit.ru_trsc_srule) +    text = translit.subst (text, translit.ru_trsc_sharpsrule) +    text = translit.subst (text, translit.ru_trsc_jerule) +--  text = translit.subst (text, translit.ru_trsc_jorule) +    translit.add_table(repl_tab, translit.ru_trsc_upp_first) +    translit.add_table(repl_tab, translit.ru_trsc_low_first) +    text = translit.subst (text, repl_tab) +    repl_tab = {} +    translit.add_table(repl_tab, translit.ru_trsc_upp) +    translit.add_table(repl_tab, translit.ru_trsc_low) +  elseif method == "ru_transcript_en" then +    text = translit.subst (text, translit.ru_trsc_en_jerule) +    translit.add_table(repl_tab, translit.ru_trsc_en_upp_first) +    translit.add_table(repl_tab, translit.ru_trsc_en_low_first) +    translit.add_table(repl_tab, translit.ru_trsc_en_upp) +    translit.add_table(repl_tab, translit.ru_trsc_en_low) +  elseif method == "ru_old" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low)  +    translit.add_table(repl_tab, translit.ru_old_upp)  +    translit.add_table(repl_tab, translit.ru_old_low)  +  elseif method == "all" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low)  +    translit.add_table(repl_tab, translit.ru_old_upp)  +    translit.add_table(repl_tab, translit.ru_old_low)  +    translit.add_table(repl_tab, translit.non_ru_upp) +    translit.add_table(repl_tab, translit.non_ru_low)  +  elseif method == "iso9_ocs" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low)  +    translit.add_table(repl_tab, translit.non_ru_upp) +    translit.add_table(repl_tab, translit.non_ru_low)  +    translit.add_table(repl_tab, translit.ocs_add_upp) +    translit.add_table(repl_tab, translit.ocs_add_low)  +  elseif method == "ocs" then +    translit.add_table(repl_tab, translit.ocs_low)  +    translit.add_table(repl_tab, translit.ocs_upp)  +  elseif method == "ocs_gla" then +    translit.add_table(repl_tab, translit.ocs_gla_low)  +    translit.add_table(repl_tab, translit.ocs_gla_upp)  +  elseif method == "ru_cz" then +    translit.add_table(repl_tab, translit.ru_trsc_cz_low)  +    translit.add_table(repl_tab, translit.ru_trsc_cz_upp)  +  elseif method == "ocs_cz" then +    translit.add_table(repl_tab, translit.ru_trsc_cz_low)  +    translit.add_table(repl_tab, translit.ru_trsc_cz_upp)  +    translit.add_table(repl_tab, translit.ru_trsc_cz_add_low)  +    translit.add_table(repl_tab, translit.ru_trsc_cz_add_upp)  +  elseif method == "gr" then +    translit.add_table(repl_tab, translit.gr_di_in_low)  +    translit.add_table(repl_tab, translit.gr_di_in_upp)  +    translit.add_table(repl_tab, translit.gr_in_low)  +    translit.add_table(repl_tab, translit.gr_in_upp)  +    text = translit.subst (text, repl_tab) +    repl_tab = {} +    translit.add_table(repl_tab, translit.gr_di_low)  +    translit.add_table(repl_tab, translit.gr_di_upp)  +    text = translit.subst (text, repl_tab) +    repl_tab = {} +    translit.add_table(repl_tab, translit.gr_low)  +    translit.add_table(repl_tab, translit.gr_upp)  +    translit.add_table(repl_tab, translit.gr_other)  +  elseif method == "gr_n" then +    translit.add_table(repl_tab, translit.gr_di_in_low)  +    translit.add_table(repl_tab, translit.gr_di_in_upp)  +    translit.add_table(repl_tab, translit.gr_in_low)  +    translit.add_table(repl_tab, translit.gr_in_upp)  +    text = translit.subst (text, repl_tab) +    repl_tab = {} +    translit.add_table(repl_tab, translit.gr_di_low)  +    translit.add_table(repl_tab, translit.gr_di_upp)  +    translit.add_table(repl_tab, translit.gr_nrule)  +    text = translit.subst (text, repl_tab) +    repl_tab = {} +    translit.add_table(repl_tab, translit.gr_low)  +    translit.add_table(repl_tab, translit.gr_upp)  +    translit.add_table(repl_tab, translit.gr_other)  +  end +  text = translit.subst (text, repl_tab) +  tex.print (text) +end +\stopluacode + +%D The following will help debugging and reviewing tables.  Make sure your +%D typescript can handle the characters, in general it's no use with Latin +%D Modern which unfortunately provides only a restricted set of the unicode +%D range. +%D +%D The user-level command to output a single substitution table is +%D \type{\showOneTranslitTab{#1}}. +\define[1]\showOneTranslitTab{% +  \ctxlua{translit.show_tab(#1)}% +} + +%D The user-level command to output all defined tables is +%D \type{\showTranslitTabs{#1}}. +\define\showTranslitTabs{% +  \ctxlua{translit.show_all_tabs()}% +} + +\startluacode +function translit.debug_next () +  translit.debug_count = translit.debug_count + 1 +  tex.print("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count ..  "}") +end +\stopluacode + +\def\translitDebug#1{% +  \doif{\TRLdebug}{true}{% +    %\inmargin{\ctxlua{translit.debug_next()} #1}% Unreadable with too many debug messages. +    {\ss\inmargin{\ctxlua{translit.debug_next()}} #1}% +  }% +} + + +%D The user-level command \type{\transliterate[#1]{#2}} does the job of +%D switching to a given language (for hyphenation) and adjusting the +%D substitution method locally.  It takes an optional list \type{[#1]} of +%D key-value arguments to allow ad-hoc specification of either two that deviate +%D from the defaults set initially by means of \type{\setupTranslit}. +%D  +%D Internally, \type{\dotransliterate} is called according to the \CONTEXT\  +%D coding style and in case the user provides \type{hyphenate=} or +%D \type{mode=} those will be used instead of the globals.  Note that this +%D leaves the latter unchanged.  Thus, in order to permanently switch to +%D another transliteration style the user would have to set it by calling +%D \type{\setupTranslit} again. + +\def\dotransliterate[#1]#2{% +  \iffirstargument%  +    \let\TRLlocalhyphenate\undefined% +    \let\TRLlocalmode\undefined% +    \getparameters[TRLlocal][#1]% +    \ifcsname TRLlocalhyphenate\endcsname% +      \translitDebug{\type{\TRLlocalhyphenate} expands to \TRLlocalhyphenate\ (global: \TRLhyphenate). }% +      \language[\TRLlocalhyphenate]% +    \else% +      \translitDebug{\type{\TRLlocalhyphenate} is undefined.  Using \TRLhyphenate instead. }% +      \language[\TRLhyphenate]% +    \fi% +    \ifcsname TRLlocalmode\endcsname% +      %\translitDebug{{\type{\TRLlocalmode} expands to \TRLlocalmode. }}% +      \translitDebug{\type{\TRLlocalmode} is defined. }% +      \ctxlua{translit.transliterate ("\TRLlocalmode", "#2")}% +    \else% +      \translitDebug{\type{\TRLlocalmode} is undefined.  Using global \type{\TRLmode}. }% +      \ctxlua{translit.transliterate ("\TRLmode", "#2")}% +    \fi% +  \else% +    \language[\TRLhyphenate]% +    \translitDebug{No translit options given.  Using defaults. }% +    \ctxlua{translit.transliterate ("\TRLmode", "#2")}% +  \fi% +} + +\def\transliterate{\dosingleempty\dotransliterate} + +\protect + +%   vim:ft=context diff --git a/tex/context/third/transliterator/t-transliterator.tex b/tex/context/third/transliterator/t-transliterator.tex new file mode 100644 index 0000000..c08e822 --- /dev/null +++ b/tex/context/third/transliterator/t-transliterator.tex @@ -0,0 +1 @@ +\loadmarkfile[t-transliterator]  | 
