From 814b93d12bc9a0792b150527495ece0847a343fc Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 21 Nov 2021 19:22:18 +0100 Subject: reorganize source tree --- COPYING | 23 +- doc/context/third/transliterator/COPYING | 22 - .../third/transliterator/transliterator.tex | 897 --------------------- doc/transliterator.tex | 897 +++++++++++++++++++++ .../third/transliterator/mtx-t-transliterate.lua | 64 -- scripts/mtx-t-transliterate.lua | 64 ++ src/t-transliterator.mkii | 3 + src/t-transliterator.mkiv | 188 +++++ src/t-transliterator.tex | 1 + src/t-transliterator.xml | 63 ++ src/trans_tables_bg.lua | 114 +++ src/trans_tables_glag.lua | 128 +++ src/trans_tables_gr.lua | 709 ++++++++++++++++ src/trans_tables_iso9.lua | 310 +++++++ src/trans_tables_scntfc.lua | 287 +++++++ src/trans_tables_sr.lua | 241 ++++++ src/trans_tables_trsc.lua | 867 ++++++++++++++++++++ src/transliterator.lua | 283 +++++++ tex/context/interface/third/t-transliterator.xml | 63 -- .../third/transliterator/t-transliterator.mkii | 3 - .../third/transliterator/t-transliterator.mkiv | 188 ----- .../third/transliterator/t-transliterator.tex | 1 - .../third/transliterator/trans_tables_bg.lua | 114 --- .../third/transliterator/trans_tables_glag.lua | 128 --- .../third/transliterator/trans_tables_gr.lua | 709 ---------------- .../third/transliterator/trans_tables_iso9.lua | 310 ------- .../third/transliterator/trans_tables_scntfc.lua | 287 ------- .../third/transliterator/trans_tables_sr.lua | 241 ------ .../third/transliterator/trans_tables_trsc.lua | 867 -------------------- .../third/transliterator/transliterator.lua | 283 ------- 30 files changed, 4177 insertions(+), 4178 deletions(-) mode change 120000 => 100644 COPYING delete mode 100644 doc/context/third/transliterator/COPYING delete mode 100644 doc/context/third/transliterator/transliterator.tex create mode 100644 doc/transliterator.tex delete mode 100644 scripts/context/lua/third/transliterator/mtx-t-transliterate.lua create mode 100644 scripts/mtx-t-transliterate.lua create mode 100644 src/t-transliterator.mkii create mode 100644 src/t-transliterator.mkiv create mode 100644 src/t-transliterator.tex create mode 100644 src/t-transliterator.xml create mode 100644 src/trans_tables_bg.lua create mode 100644 src/trans_tables_glag.lua create mode 100644 src/trans_tables_gr.lua create mode 100644 src/trans_tables_iso9.lua create mode 100644 src/trans_tables_scntfc.lua create mode 100644 src/trans_tables_sr.lua create mode 100644 src/trans_tables_trsc.lua create mode 100644 src/transliterator.lua delete mode 100644 tex/context/interface/third/t-transliterator.xml delete mode 100644 tex/context/third/transliterator/t-transliterator.mkii delete mode 100644 tex/context/third/transliterator/t-transliterator.mkiv delete mode 100644 tex/context/third/transliterator/t-transliterator.tex delete mode 100644 tex/context/third/transliterator/trans_tables_bg.lua delete mode 100644 tex/context/third/transliterator/trans_tables_glag.lua delete mode 100644 tex/context/third/transliterator/trans_tables_gr.lua delete mode 100644 tex/context/third/transliterator/trans_tables_iso9.lua delete mode 100644 tex/context/third/transliterator/trans_tables_scntfc.lua delete mode 100644 tex/context/third/transliterator/trans_tables_sr.lua delete mode 100644 tex/context/third/transliterator/trans_tables_trsc.lua delete mode 100644 tex/context/third/transliterator/transliterator.lua diff --git a/COPYING b/COPYING deleted file mode 120000 index c3630c6..0000000 --- a/COPYING +++ /dev/null @@ -1 +0,0 @@ -./doc/context/third/transliterator/COPYING \ No newline at end of file diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..ac0eb7c --- /dev/null +++ b/COPYING @@ -0,0 +1,22 @@ +Copyright 2010-2013 Philipp Gesang. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/doc/context/third/transliterator/COPYING b/doc/context/third/transliterator/COPYING deleted file mode 100644 index ac0eb7c..0000000 --- a/doc/context/third/transliterator/COPYING +++ /dev/null @@ -1,22 +0,0 @@ -Copyright 2010-2013 Philipp Gesang. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/doc/context/third/transliterator/transliterator.tex b/doc/context/third/transliterator/transliterator.tex deleted file mode 100644 index d8e392c..0000000 --- a/doc/context/third/transliterator/transliterator.tex +++ /dev/null @@ -1,897 +0,0 @@ -\setuppapersize [A5] [A5] - -\definecolor [gutenred] [x=bf221f] % rubrication from digitized_Göttingen Gutenberg bible - -\setupinteraction [ - state=start, - color=gutenred, % rubricate, don’t viridificate - contrastcolor=gutenred, -] - -\setupcombinedlist[content][interaction=text,focus=standard] - -\setupindenting[yes,next,medium] - -%\showgrid -\setuphead[chapter][ - align=middle, - number=no, - style={\rm\tfa\setcharacterkerning[capitals]\WORD}, - before={\blank[5*line]}, - after={\blank[2*line,force]} -] - -\setuphead[section][ - align=middle, - number=no, - style={\rm\setcharacterkerning[capitals]\WORD}, - before={\blank[line,force]}, - after={\blank[line]} -] - -\setuphead[subsection][ - align=middle, - number=no, - style={\tf\sc\word}, - before={\blank[line,force]}, - after={\blank[line]} -] - -\setuplist[chapter][ - alternative=c, - interaction=text, - style={\word\sc}, -] -\setuplist[section,subsection][ - alternative=a, - style=\tfx\italic, - interaction=text, - margin=2em, - numberstyle=, - textstyle=, - numberstyle=\tfx, -] - -\setuplist[subsection][ - margin=4em, -] - -\setuplistalternative - -\definecharacterkerning [capitals] [factor=.05] - -\definefontfeature [default][default][ - protrusion=quality, - expansion=quality, - %mode=node, - script=latn, - onum=yes, - %dlig=yes, - liga=yes, -] - -\definefontfeature [smallcaps] [default] [smcp=yes] -\def\sc{\addff{smallcaps}\setcharacterkerning[capitals]} - -\setupbodyfontenvironment [default] [em=italic] - -\starttypescript [serif] [bukyvede] - \setups [font:fallback:serif] - \definefontsynonym [Serif] [name:Bukyvede] [features=default] - \definefontsynonym [SerifItalic] [name:Bukyvede-Italic] [features=default] -\stoptypescript -\usetypescript [bukyvede] -\definetypeface [hlaholice] [rm] [serif] [bukyvede] [default] [encoding=ec] -\definetypeface [cyrilice] [rm] [serif] [bukyvede] [default] [encoding=ec] -\definetypeface [lmstd] [rm] [serif] [latin-modern] [default] [encoding=texnansi] - -\usetypescriptfile[type-cmu] -\usetypescript[computer-modern-unicode] -\setupbodyfont[computer-modern-unicode,9pt] - -\usetypescript [serif] [hz] [highquality] -\setupalign [hanging,hz] - -\usemodule[bib] -\usemodule[transliterator] - -\setupcite[authoryear][compress=no] - -\setuppublications[% - alternative=apa,% - refcommand=authoryear,% - sorttype=bbl,% - numbering=yes,% - autohang=yes% -]% - -\setuppublicationlist[% - artauthor=\invertedauthor% -] - -% == REFERENCES =============================================================== - -\startpublication[ - k=aks, - t=book, - a={{Birnbaum/Schaeken}}, - y=1999, - n=4, - u=http://www.schaeken.nl/lu/research/online/publications/akslstud/index.htm, - s={Studien}, -] -\author[]{Henrik}[H.]{}{Birnbaum} -\author[]{Jos}[J.]{}{Schaeken} -\pubyear{1999} -\title{Altkirchenslavische Studien} -\volume{2} -\city{München} -\stoppublication - -\startpublication[ - k=bornemann, - t=book, - a={{Bornemann/Risch}}, - y=1978, - n=2, - s={Grammatik}, -] -\author[]{Eduard}[]{}{Bornemann} -\author[]{Ernst}[]{}{Risch} -\pubyear{1978} -\title{Griechische Grammatik} -\city{Frankfurt am Main} -\edition{2.} -\stoppublication - -\startpublication[ - k=bh, - t=book, - a={{Bringhurst}}, - y=2008, - n=4, - s={Bringhurst}, -] -\author[]{Robert}[R]{}{Bringhurst} -\pubyear{2008} -\title{The Elements of Typographic Style} -\edition{3.2} -\city{Point Roberts WA, Vancouver} -\stoppublication - -\startpublication[ - k=dintb, - t=book, - a={{DIN}}, - y=2001, - n=5, - s={DIN}, -] -\editor[]{}[]{}{DIN Deutsches Institut für Normung e.~V.} -\pubyear{2001} -\title{Bibliotheks und Dokumentationswesen} -\city{Berlin/Wien/Zürich} -\stoppublication - -\startpublication[ - k=duden, - t=book, - a={{Drosdowski/Müller/Scholze-Stubenrecht/Wermke}}, - y=1952, - n=1, - s={DUDEN}, -] -\editor[]{Günther}[]{}{Drosdowski} -\editor[]{Wolfgang}[]{}{Müller} -\editor[]{Werner}[]{}{Schulze-Stubenrecht} -\editor[]{Matthias}[]{}{Wermke} -\pubyear{1991} -\title{DUDEN Rechtschreibung der deutschen Sprache} -\city{Mannheim et al} -\edition{20.} -\stoppublication - -\startpublication[ - k=kirschbaum, - t=book, - a={{Kirschbaum}}, - y=2001, - n=3, - s={Grammatik}, -] -\author[]{Ernst Georg}[]{}{Kirschbaum} -\pubyear{2001} -\title{Grammatik der russischen Sprache} -\city{Berlin} -\stoppublication - -\startpublication[ - k=iso, - t=inbook, - a={{ISO}}, - y=1995, - n=6, - s={ISO~9}, -] -\editor[]{}[]{}{{{\sc iso} International Organization for Standardization}} -\pubyear{1995} -\title{Information and documentation -- Transliteration of Cyrillic characters into Latin characters -- Slavic and non-Slavic languages} -\edition{2.} -\crossref{dintb} -\pages{230--245} -\stoppublication - -%============================================================================== - -\setupframed[ - frame=off, - align=normal, - location=top, -] - -\defineframed[displayouter][ - location=top, - align={normal,verytolerant}, - frame=off, - style=\tfx, -] -\defineframed[displayinner][displayouter][ - offset=1ex, - width=.47\textwidth, -] - -\definenumber[excnt] -\setnumber[excnt][1] - -% This should rather be done using key-value args but I'm too lazy now. -% 1: mode; 2: hyphenate original; 3: hyphenate transliteration; -% 4: font for original; 5: caption; 6: original text. -\def\trlex#1#2#3#4#5#6{% - \setuplocalinterlinespace[line=8pt]% - \startplacefigure [ - location=force, - title={\type{[mode=#1,hyphenate=#3]}\hskip 1em{\italic #5}} - ]% - \displayouter{% - \displayinner{% - \setupbodyfont[#4]% - \tfx - %\setuptolerance[verytolerant, stretch] - \setuptolerance[verytolerant] - \unskip\language[#2]#6\par - }% - \displayinner{% - \tfx - \transliterate[mode=#1,hyphenate=#3]{#6\par}% - } - } - \stopplacefigure - \incrementnumber[excnt]% -} - -\defineframedtext[CenteredText][width=fit,frame=off,align=middle] - -\usemodule[int-load] -\loadsetups[t-transliterator.xml] - - -\setupwhitespace[medium] -\language[en] - -\starttext - -\setuppagenumbering[state=stop] - -\blank[3cm,force] - - -%\showframe -\startstandardmakeup[location=middle] - -\setuplayout[width=middle] -\raggedcenter -\vfill - {\setupbodyfont[19pt] - {\em The} - \blank [2*big] - {\tfc\sc transliterator} - \blank [2*big] - {\em for \CONTEXT} - \blank [9*big] - {\tfa\sc manual} - } -\vfill -\stopstandardmakeup - -\startstandardmakeup -\vfill -\framed [frame=off,topframe=on] {% -\tfxx\ss\setupinterlinespace[small]% -\startlines -The {\em Transliterator} module and mini-manual, -by Philipp Gesang, Radebeul. -Mail any patches or suggestions to - -{\tt philipp -dot- gesang -at- alumni -dot- uni-heidelberg -dot- de} -\useurl[me][https://phi-gamma.net] -\from[me]% -\stoplines -} -\stopstandardmakeup - -\setuppagenumbering[% - location=middle, - state=start, - style=\tfc -] - -\setuppagenumber[number=1] -\completecontent -\chapter{Usage and Functionality} -\section{Overview} -The Transliterator provides two commands: \type{\setuptransliterator} -preferably goes into the preamble and allows for global configuration. -The Transliterator is invoked locally by \type{\transliterate} which does the -actual transliteration of text passages. - -\setup{setuptransliterator} - -\setup{transliterate} - -\section{Loading and Configuring the Module} -In order to use the Transliterator in a document we put the following somewhere before -\type{\starttext}. -\starttyping -\usemodule[transliterator] -\stoptyping -Although it has some defaults already set at this point they will most likely -not correspond to what is needed in the document. -To override the presets we use the command \type{\setuptransliterator[#1]}. -It takes a comma separated list of two key-value pairs: \type{mode} and -\type{hyphenate}. -Through {\em mode} we specify the transliteration method. -By the time of this writing this can be one of the following set: - -\startplacetable[location=top,title=Transliteration modes.] - \tfx - \starttabulate[|l|p|] - \HL - \NC mode \NC description \NC\NR - \HL - \NC \type{all} \NC {\sc iso}~9 complete \NC\NR - \NC \type{bg_de} \NC Bulgarian, German „scientific“ transliteration\NC\NR - \NC \type{gr} \NC transliteration for Greek \NC\NR - \NC \type{gr_n} \NC transliteration for Greek obeying nasalizations \NC\NR - \NC \type{iso9_ocs} \NC == \type{all} plus non-{\sc iso} additions for Old (Church) Slavonic \NC\NR - \NC \type{ocs} \NC “scientific” transliteration for Old (Church) Slavonic\NC\NR - \NC \type{ocs_cz} \NC Czech transcription for Old (Church) Slavonic\NC\NR - \NC \type{ocs_gla} \NC “scientific” transliteration for Old (Church) Slavonic / Glagolitic alphabet\NC\NR - \NC \type{ru} \NC {\sc iso}~9 Russian \NC\NR - \NC \type{ru_cz} \NC Czech transcription for Russian\NC\NR - \NC \type{ru_old} \NC {\sc iso}~9 Russian plus pre-1918 chars (the default)\NC\NR - \NC \type{ru_transcript_de} \NC German transcription for Russian \NC\NR - \NC \type{ru_transcript_en} \NC English transcription for Russian \NC\NR - \NC \type{sr_tocy} \NC Serbian, Latin to Cyrillic \NC\NR - \NC \type{sr_tolt} \NC Serbian, Cyrillic to Latin \NC\NR - \HL - \stoptabulate -\stopplacetable - - -{\em Nota bene}: The description at this point only serves as a placeholder as the -transliteration modes are discussed in detail later in this document. - -Through the \type{hyphenate} argument it is possible to adjust the language -that is used for hyphenation. -Specifying \type{\setuptransliterator[hyphenate=nl]} will let every transliterated -part of the document be processed according to dutch rules, leaving the overall -\type{\language[#1]} configuration unchanged for the rest of the content. - -Another argument, \type{deficient_font} can be used in -combination with the modes \type{all}, \type{ru_old} and -\type{iso9_ocs}. It lets you circumvent the deficiency that some -fonts show concerning the characters that {\sc iso}~9 assigns to -cyrillic “ь” and “ъ”. Set it to {\em true} to enable it. - -The actual transliteration is done using the macro -\type{\transliterate[#1]} \type{{#2}}. -The second argument takes the raw string in the original language that we want -to process, while the first, optional argument accepts local adjustments for -\type{mode} and \type{hyphenate}. -Thus, we would typeset one of Epicuros' sayings like this: -{\setuptolerance[verytolerant] -\starttyping -\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν - μετὰ ἀνάγκης} -\stoptyping -\noindentation which yields \quotation{\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν -μετὰ ἀνάγκης}} in the {\sc pdf} output. -} -Alternatively there is an environment, \type{\starttransliterate[#1]}, as well, -that takes the same arguments. - -There are two special switches for the {\em Serbian} patterns, -\type{hinting} and \type{sr_exceptions}, allowing for a little -more fine-tuning. -If activated, hinting provides the special character “\type{*}” as -a means to indicate positions, where the sequences “lj” and “nj” -are to be treated as separate consonants. -E.~g. \type{\transliterate[mode=sr_tocy]{in*jekcija}} is -correctly transliterated as \transliterate[mode=sr_tocy]{in*jekcija}, -and not \transliterate[mode=sr_tocy,sr_exceptions=no]{injekcija}. -Likewise, further exceptions that are internally represented as -a lookup table can be toggled off or on by the -\type{sr_exceptions} switch. -This pertains to words like “nadživeti” (result: \transliterate[mode=sr_tocy]{nadživeti}) -but may lead to accidental false positives in cases that the -module author didn’t foresee. -By default both hinting and lexical exceptions are set to -\type{yes}. - -For orientation purposes the Transliterator comes with two macros that allow -for closer inspection of the internal tables. -\type{\showOneTranslitTab{#1}} outputs, obviously, a single table; their -identifiers -can be found in the \type{trans_} -\type{tables_*.lua} files in the transliterator -directory. -The lazy alternative is \type{\showTranslitTabs} which prints all registered -tables in a row nicely formatted as indexable sections. -(Be warned, this may take some time.) - -\chapter{Introduction} - -\hfil\framed[width=\hsize,align=left]{% - \inframed[bottomframe=on]{\it What's all this, then?} - \blank[medium] - {\sc Graham Chapman} -} -\blank[2*big] - -\noindentation At the first glance, {\em transliteration} -- the accurate representation of letters from one -alphabet in another -- seems obsolete after the advent of Unicode -which made its way even into \TeX\ lately. -Why not just go on and write down everything in the original script? -But still there are lots of situations where transliteration is desirable, -e.~g. some scholarly habits might prescribe it in the main text with citations in -footnotes left in the original alphabet; or transliteration might alleviate -comparison within one language that happens to be written in different scripts; -finally, including text in a foreign script might be impossible if there is no -appropriate font which fits the main text. -However, it is still most convenient for the writer to keep the -untransliterated original in the document source as this allows for reusing it in -another context where different transliterations rules might apply. -The Transliterator module is meant to provide both: have the original in the -source and a transliteration only in the final document. - -Another way of handling foreign languages is {\em transcription}. -It aims at producing some representation that does not rely on symbolisms -alien to the language and thus to be at least \quotation{pronouncable} -without further know\-ledge. -As transcription methods are language specific and highly idiosyncratic they -complicate the restoration of the original phrase because information may be lost. -The Transliterator provides means of transcription as well but in most cases -you should refrain from using them (\type{[mode=ru_transcript_en]}, -\type{[mode=ru_transcript_de]}). - -For Cyrillic scripts the best quality is achieved using the standardized -transliteration according to {\sc iso~9}.\footnote{\cite[authoryear][iso].} -This method not only covers all contemporary languages that are written in -a variety of Cyrillic but provides a bijective mapping on latin characters as -well. -Consequently, you can unambiguously revert the transliteration into -its original form which was impossible with previous versions of {\sc -iso}~9 because -they contained several exceptions depending on the original language. -Although fifteen years old it has not yet made its way into scholarly -publications at large so it might not immediately look familiar.\footnote{ - A hasty glance at the latest issues of around 20~journals in a local library - revealed that 2~of them actually are using {\sc iso}~9, these are {\em Przegląd - wschodni} as of Nr. X, 3 (2008) and {\em Kwartalnik historyczny} as of CXVI, - 3 (2009); the latter even contains a table on p.~218 showing a subset of the - {\sc iso}~9 transliteration rules. -} -The diacritics are not identical to the \quotation{scientific} -transliteration used in Slavic studies but as long as your editor does not -enforce its traditional method you should always prefer {\sc iso}~9 -(\type{[mode=ru]}, \type{[mode=ru_old]}, \type{[mode=all]}). - -But {\sc iso}~9, too, has its shortcomings. -It has no definitions for historical forms of the cyrillic script like -pre-XVIII-century Russian and Old (Church) Slavonic while those are covered by -the scholarly transliterations. -To amend the situation the Transliterator provides an extension to {\sc -iso}~9 for -Old Slavonic containing the glyphs -\startluacode -local translit = thirddata.translit -environment.loadluafile("trans_tables_scntfc") -local cnt, len = 0, 0 -for i,j in pairs(translit.ocs_add_low) do - len = len + 1 -end - -for k,v in pairs(translit.ocs_add_low) do - cnt = cnt + 1 - context.bgroup() - context.setupbodyfont({"cyrilice"}) - context(k) - context.egroup() - if cnt < len -1 then - context(", ") - elseif cnt < len then - context("\\ and ") - end -end -\stopluacode -\ taken from the scientific transliteration (\type{[mode=iso9_ocs]}). -If you prefer more coherency you might want to use pure \quotation{scientific} -transliteration (\type{[mode=ocs]}). -This method is complemented by \type{[mode=ocs_gla]}, the only option the -Transliterator offers for the Glagolitic alphabet; they can be used consistently -along each other as they were taken from the same -book.\footnote{\cite[authoryear][aks] p.~77 \cite[url][aks].} - -As far as I know there is no standardized transliteration for Greek so I had to -resort to the one that is used in scholarly literature. -Its main drawback is that it has no representation for diacritics apart from -(rough) breathing, but it respects specific rules for diphthongs and vowels in -initial positions (\type{[mode=gr]}). -There is one alternative mode for those who prefer their {\em γ} phonetically -resolved to /{\em n}/ before velars ({\em γ}, {\em κ}, {\em χ} and {\em ξ}; -\type{[mode=gr_n]}). - -Concerning the hyphenation within transliterated passages the default is set to -to \type{[hyphenate=cs]} (Czech) which produces reasonable results when using -\type{all}, \type{iso9_ocs} or \type{ru_cz}. -For stuff like the English and German transcription use their respective native -hyphenation.\footnote{% - You'll have to specify this through \type{\setuptransliterator} - or locally because the default hyphenation is {\em not} the same as your - documents'. -} -However, as there is no hyphenation pattern I know of that closely resembles the -transliteration of Greek you might have to resort to putting \type{\discretionary} -hyphens when line breaking does not satisfy. - -The Transliterator as a whole is nothing more than a bunch of dictionaries -containing substitution rules for tokens that may occur in the text. -These tokens may be single characters or strings of more than one character. -As there is no simple way to impose order onto those dictionaries the rules for -one transliteration method are, if needed, distributed over more than one table -which will be applied successively to ensure that multi-character rules -are processed first. - - -\setupfloats[spacebefore=small,spaceafter=small] -\startplacetable[location=left,title={ - Processing time for corpus {\language[cs]Evgenij Onegin} according to - GNU time(1) and the \CONTEXT\ stats. -}] - \starttabulate[|l|cg(.)|cg(.)|] - \HL%····················································% - \NC mode \NC time(1) in $s$ \NC \CONTEXT \NC \NR - \NC \NC 8.98 \NC 8.82 \NC \NR - \NC \type{all} \NC 8.37 \NC 8.25 \NC \NR - \NC \type{ru_cz} \NC 8.61 \NC 8.48 \NC \NR - \NC \type{ru_transcript_en} \NC 9.26 \NC 9.10 \NC \NR - \NC \type{ru_transcript_de} \NC 14.83 \NC 14.71 \NC \NR - \HL%····················································% - \stoptabulate -\stopplacetable -\setuptolerance[tolerant] -Following suggestions from the mailing list, the Transliterator uses {\em LPeg} -when substituting. -This means a huge speed improvement for most substitution modes when compared -to the older mechanism that used \type{string.gsub} iteratively. -In ordinary use when transliterating single words or short phrases the -Transliterator should have little impact on document processing time at large, -with the exception of the German transcription mode, perhaps.\footnote{ - The problem lies within the rule set for the German transcription which - dictates different instructions depending on the environment of a character; - these may conflict, i.~e. it is impossible to substitute a character stream - in a single run as some rules may apply only to the result of previous rule. - Let me know if there's a way to tell LPeg to backtrack to the last character - of a match and not to continue on the next. -} -Transliterating (and typesetting in MKIV) \transliterate{Александр Пушкин}'s verse novel -\transliterate{Евгений Онегин}, a corpus of about 27000 words, in -\type{[mode=all]} shows little to no delay at all. -In fact, typesetting cyrillic letters with russian hyphenation seems slow -things down so much that transliteration may be faster and uses slightly less -memory.\footnote{% - On an IBM T43: \tt 2.6.32-ARCH \#1 SMP PREEMPT Tue Feb 9 14:46:08 UTC 2010 - i686 Intel(R) Pentium(R) M processor 1.60GHz GenuineIntel GNU/Linux. -} - - - - -\chapter[ex]{Examples} -\section{Cyrillic scripts} -\subsection{{\sc iso}~9 and derivatives} - -Several transliteration rules are either strictly {\sc iso}~9 compliant -(\type{ru}, \type{ru_old}, \type{all}) or contain {\sc iso}~9 as a -subset (\type{iso9_ocs}).\footnote{% - Unfortunately \CONTEXT\ still lacks language files for some of them - so please excuse the inadequate hyphenation in these cases.% -} - -\trlex{ru}{ru}{cs}{computer-modern-unicode}{% - Transliteration rules for the contemporary russian alphabet.% -}{% - В~ворота гостиницы губернского города NN въехала довольно красивая рессорная - небольшая бричка, в~какой ездят холостяки: отставные подполковники, - штабс-капитаны, помещики, имеющие около сотни душ крестьян, — словом, все те, - которых называют господами средней руки. - В~бричке сидел господин, не красавец, но и~не дурной наружности, ни слишком - толст, ни слишком тонок; нельзя сказать, чтобы стар, однако ж~и~не так чтобы - слишком молод. -} - -\trlex{ru_old}{ru}{cs}{computer-modern-unicode}{% - With aditional characters for pre-1981 Russian orthography (100~per - cent {\sc iso}~9).% -}{% - А~сведется віра, убьютъ сотцкого в~селѣ, ино тебѣ взяти полтіна, а~не - сотцкого, - ино четырѣ гривны, а~намъ віръ не таити в~Новѣгородѣ; а~о~убіствѣ віръ нѣтъ. - А~что волости, честны король, новгородцкіе, ино тебѣ не держати своими мужи, - а~держати мужми новогородцкими. - А~что пошлина в~Торжку и~на Волоцѣ, тівунъ свои держати на своеи чясті, - а~Новугороду на своеи чясти посадника держаті. - А~се волости новогородцкіе: Волокъ со всѣми волостми, Торжокъ, Бѣжіці, - Городець - Палець, Шіпинъ, Мелеця, Егна, Заволочье, Тиръ, Пермь, Печера, Югра, Вологда - с~волостмі. -} - -\trlex{all}{ru}{cs}{computer-modern-unicode}{% - The complete cyrillic mapping from {\sc iso}~9; transliterating Belarusian.% -}{% - Беларуская мова, мова беларусаў, уваходзіць у~сям’ю індаеўрапейскіх моў, яе - славянскай групы і~ўсходнеславянскіх моваў падгрупы, на якой размаўляюць - у~Беларусі і~па ўсім свеце, галоўным чынам у~Расіі, Украіне, Польшчы. - Б.~м. падзяляе шмат граматычных і~лексічных уласцівасцяў з~іншымі - ўсходнеславянскімі мовамі (гл. таксама: Іншыя назвы беларускай мовы і~Узаемныя - ўплывы усходнеславянскіх моваў). -} - -\trlex{all}{uk}{cs}{computer-modern-unicode}{% - The complete cyrillic mapping from {\sc iso}~9; transliterating Ukrainian.% -}{% - Украї́нська мова (застарілі назви -- руська мова, проста мова […]) -- - слов'янська мова, державна в~Україні та одна з~трьох «офіційних мов на рівних - засадах» у~не\-ви\-зна\-ній Придністровській Молдавській Республіці. - За різними оцінками загалом у~світі українською мовою говорить від 41~млн. - до 45~млн. осіб, вона входить до третього десятка найпоширеніших мов - світу. -} - -\trlex{all}{ru}{cs}{computer-modern-unicode}{% - The complete cyrillic mapping from {\sc iso}~9; transliterating Serbian.% -}{% - Српски језик је један од словенских језика из породице индоевропских језика. - Први писани споменици у~српској редакцији старословенског језика потичу из XI - и~XII века. - Српски језик је стандардни језик у~службеној употреби у~Србији, Босни - и~Херцеговини и~Црној Гори, а~у~употреби је и~у другим земљама гдје живе - Срби, међу осталима и~у~Хрватској. -} - -\trlex{iso9_ocs}{ru}{cs}{cyrilice}{% - Transliteration rules according to {\sc iso}~9 with additions for Old (Church) - Slavonic.% -}{% - Что сѧ дѣѥтѣ по вѣремьнемь~: то ѿидето по вѣрьмьнемь~: приказано бѹдѣте - добрымъ людѣмъ~: а любо грамотою ѹтвѣрдѧть~: како то бѹдѣте всемъ вѣдомъ~: - или кто посль живыи ѡстанѣть сѧ~: того лѣт͠ коли алъбрахтъ~: влд͠ка ризкии - ѹмьрлъ~: ѹздѹмалъ кнѧзѣ смольнескыи~: мьстиславъ~: двд͠въ сн͠ъ~: прислалъ въ - ригѹ своѥго лѹчьшего попа~: ѥрьмея~: и съ нимь ѹмьна мѹжа пантелья~: - исвоѥго горда смольнеска~: та два была послъмь ѹ ризѣ~: из ригы ѥхали на - гочкыи берьго~: тамо твердити миръ~: -} - -\subsection{“Scientific” transliteration} -These transliterations are widely used among scholars, mainly linguists and, to -a lesser extent, historians. -They comprise large character sets in order to represent the original text -adequately and facilitate comparison of texts of the same language written in -different scripts; they are not, however, as easily reversible as {\sc -iso}~9. - -\trlex{ocs}{ru}{cs}{cyrilice}{% - Transliteration for Old Slavonic used in Slavic studies, taken from the - excellent book of \cite [authoryear][aks].\footnote{% - This one and both of the following Czech transliterations, although - elegantly dealing with hard and weak signs by taking characters from the - Cyrillic alphabet, are not unquestioned from a typographical point of - view: - \quotation{If contrasting faces are used for phonetic transcriptions and - main text, each entire phonetic word or passage, not just the individual - phonetic characters, should be set in the chosen phonetic face. Patchwork - typography, in which the letters of a single word come from different faces - and fonts, is a sign of typographic failure. […] - Such mixtures are almost sure to fail unless all the fonts involved have - been designed as a single family.} - (\cite [authoryear][bh]) - From this follows that it is advisably to reconsider your font whether it indeed - provides the needed glyphs from Russian as well. - }% -}{% - Се начнемъ повѣсть сию. - По потопѣ . первиє снве Ноєви . раздѣлиша землю . Симъ . Хамъ . Афетъ . и~ꙗсѧ - въстокъ . Симови Персида . Ватрь . тоже и~до Индикиꙗ в~долготу и~в~ширину [и - до Нирокоуриа] ꙗкоже рещи ѿ въстока и~до полуденьꙗ . и~Суриꙗ . - и~Индиа по Єфратъ рѣку . Вавилонъ . Кордуна . Асурѧне . Мисопотамира . - Аравиꙗ . старѣишаꙗ . Єлмаисъ . Инди . Равиꙗ . на всѧ Д. -} - -\trlex{ru_cz}{ru}{cs}{computer-modern-unicode}{% - Czech phonetic transcription for contemporary Russian.% -}{% - Прошло семь лет после 12-го года. Взволнованное историческое море Европы - улеглось в свои берега. Оно казалось затихшим; но таинственные силы, - двигающие человечество (таинственные потому, что законы, определяющие их - движение, неизвестны нам), продолжали свое действие. - Несмотря на то, что поверхность исторического моря казалась неподвижною, так - же непрерывно, как движение времени, двигалось человечество. Слагались, - разлагались различные группы людских сцеплений; подготовлялись причины - образования и~разложения государств, перемещений народов.% -} - -\trlex{ocs_cz}{ru}{cs}{cyrilice}{% - Czech phonetic transcription for Old Slavonic (superset of the corresponding - Russian transcription). -}{% - Убьеть мужь мужа, то мьстить брату брата, или сынови отца, любо отцю сына, - или братучаду, любо сестрину сынови; аще не будеть кто мьстіѧ, то 40 гривенъ - ꙁа голову; аще будеть русинъ, любо гридинъ, любо купчина, любо іѧбетник, любо - мечникъ, аще иꙁъгои будеть, любо словенинъ, то 40 гривенъ положити ꙁа нь. -} - -\subsection{Serbian} -The tables for converting Serbian text between Cyrillic and Latin -alphabets are \type{sr_tolt} and \type{sr_tocy}. -\trlex{sr_tolt}{sr}{hr}{computer-modern-unicode}{% - Transliteration ћирилица \rightarrow\ латиница.% -}{% - Српски језик је један од словенских језика из породице - индоевропских језика. Први писани споменици у српској редакцији - старословенског језика потичу из XI и XII века. - - Српски језик је стандардни језик у службеној употреби у Србији, - Босни и Херцеговини и Црној Гори, а у употреби је и у другим - земљама где живе Срби, међу осталима и у Хрватској.% -} - -\trlex{sr_tocy}{hr}{sr}{computer-modern-unicode}{% - Transliteration latinica \rightarrow\ ćirilica.% -}{% - Srpski jezik je jedan od slovenskih jezika iz porodice - indoevropskih jezika. Prvi pisani spomenici u srpskoj - redakciji staroslovenskog jezika potiču iz XI i XII veka. - - Srpski jezik je standardni jezik u službenoj upotrebi u Srbiji, - Bosni i Hercegovini i Crnoj Gori, a u upotrebi je i u drugim - zemljama gde žive Srbi, među ostalima i u Hrvatskoj.% -} - -\subsection{Bulgarian} - -\trlex{bg_de}{bg}{cs}{computer-modern-unicode}{% - German scientific transliteration for Bulgarian (based on old {\sc - iso}~9 standard).% -}{% - Българският език е индоевропейски език от групата на - южнославянските езици. Той е официалният език на Република - България и един от 23-те официални езика на Европейския съюз. -} - -\subsection{Legacy national transcriptions} -At the moment there are tables for “old school” transcription into three -languages: English (via \type{ru_transcript_en}), German -(\type{ru_transcript_de}) and Czech (\type{ocs_cz}). -At least the German one is almost unreadable if used with -strings longer than two words. -As we have the bijective {\sc iso}~9 mapping at hand there should be no reason at all -to use any of them. - -\trlex{ru_transcript_en}{ru}{en}{computer-modern-unicode}{% - English transcription for contemporary Russian.% -}{% - Прошло семь лет после 12-го года. Взволнованное историческое море Европы - улеглось в свои берега. Оно казалось затихшим; но таинственные силы, - двигающие человечество (таинственные потому, что законы, определяющие их - движение, неизвестны нам), продолжали свое действие. - Несмотря на то, что поверхность исторического моря казалась неподвижною, так - же непрерывно, как движение времени, двигалось человечество. Слагались, - разлагались различные группы людских сцеплений; подготовлялись причины - образования и~разложения государств, перемещений народов.% -} - -\trlex{ru_transcript_de}{ru}{deo}{computer-modern-unicode}{% - German transcription for contemporary Russian.\footnote{% - Following \cite[authoryear][duden] p.~82; all the canonical rules are - implemented save one: {\em -его} and {\em -ого} should resolve to {\em - -ewo} and {\em -owo} respectively iff genitive endings. - As this is a grammatical rather than graphetical criterion writing a - substitution algorithm would amount to do natural language parsing. - To make things worse this rule is phonetically confused as it would not - take care of other contexts where {\em г} in those patterns is articulated - as /{\em v}/ like for instance in {\em сегодня} (which is a historical - genitive, though …). - So even if this could be implemented it would not be advisable to use such - a rule.% - }% -}{% - Прошло семь лет после 12-го года. Взволнованное историческое море Европы - улеглось в свои берега. Оно казалось затихшим; но таинственные силы, - двигающие человечество (таинственные потому, что законы, определяющие их - движение, неизвестны нам), продолжали свое действие. - Несмотря на то, что поверхность исторического моря казалась неподвижною, так - же непрерывно, как движение времени, двигалось человечество. Слагались, - разлагались различные группы людских сцеплений; подготовлялись причины - образования и~разложения государств, перемещений народов.% -} - -\section{Glagolitic} -\trlex{ocs_gla}{ru}{cs}{hlaholice}{% - “Scientific” transliteration for Old Slavonic written in the Glagolitic - alphabet as used in \cite[authoryear][aks].% -}{% - [ⰲⰾ] - ⰰⰴⱏⰻⰽⱁ ⱍⰽ҃ⱏ ⱄⰻ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸ[ⱁⱃⰻⱅ] - ⰻ ⰸⰰⰽⱁⱀⱏ ⰿⰰⱀⰰⱄⱅⱏⰻⱃⱏⱄⰽⰻ: [ⰻⰶⰵ] - ⱅⱏⰻ ⱆⱄⱅⰰⰲⰻ჻ Ⱃⰵⱍⰵ ⰶⰵ ⰻⰳⱆⰿ[ⱏ] [ⱀⱏ] - ⰽⰰⰽⱁ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸⱁⱃⰻⱅⰻ ⰸⰰⰽ[ⱁⱀⱏ] - [.] [ⰰ] ⰵⱄⱅⱏ· ⱍⱃⱏⰲⰻ⁖ ⰻ [ⰿ] [..........] - [..] ⰿⱏ ⱀⰵ ⰿⱁⰶⰵⰿⱏ ⱄⰵⰳⱁ ⱅⱃⱏⱂⱑⱅ[ⰻ] - [ⰴⰰ] ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⰻⰿⱑⰻ ⱄⱏⰴⱑ჻ ⰰ ⰿⱏⰻ ⱁ - [ⱅⰻ]ⰴⰵⰿⱏ: ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⱂⱆⱄⱅⰻ: ⰴⰰ ⱁⱅ - [ⰻⰴ]ⰵⱅⱏ ⰻⰶⰵ ⰵⱄⱅⱏ ⱂⱃⰻⱎⱏⰾⱏ: ⱄ[ⰵ] -} - -\section{Greek} -The Transliterator offers two modes for handling Greek: \type{gr} and -\type{gr_n}. -They differ only on one aspect. -\type{gr} transliterates the canonical Greek alphabet as well as the -special glyphs Digamma, Quoppa and Sampi. -\type{gr_n} behaves exactly the same way except that nasalization is observed -such that \type{γ+[γ|κ]} yields \type{n+[g|k]}. - -\trlex{gr}{agr}{de}{computer-modern-unicode}{% - Transliteration for Greek -- standard. -}{% - οἴνῳ δὲ κάρτα προσκέαται, καί σφι οὐκ ἐμέσαι ἔξεστι, οὐκὶ οὐρῆσαι ἀντίον - ἄλλου. - ταῦτα μέν νυν οὕτω φυλάσσεται, μεθυσκόμενοι δὲ ἐώθασι βουλεύεσθαι τὰ - σπουδαιέστατα τῶν πρηγμάτων: τὸ δ᾽ ἂν ἅδῃ σφι βουλευομένοισι, τοῦτο τῇ - ὑστεραίῃ νήφουσι προτιθεῖ ὁ στέγαρχος, ἐν τοῦ ἂν ἐόντες βουλεύωνται, καὶ ἢν - μὲν - ἅδῃ καὶ νήφουσι, χρέωνται αὐτῷ, ἢν δὲμὴ ἅδῃ, μετιεῖσι. τὰ δ᾽ ἂν νήφοντες - προβουλεύσωνται, μεθυσκόμενοι ἐπιδιαγινώσκουσι. -}% - -\trlex{gr_n}{agr}{de}{computer-modern-unicode}{% - Transliteration for Greek -- alternative respecting nasalization. -}{% - ταῦτα καὶ νεωτέρῳ καὶ πρεσβυτέρῳ ὅτῳ ἂν ἐντυγχάνω ποιήσω, καὶ ξένῳ καὶ ἀστῷ, - μᾶλλον δὲ τοῖς ἀστοῖς, ὅσῳ μου ἐγγυτέρω ἐστὲ γένει. -}% - - -\chapter{References} -%\cite[authoryear][iso] -\nocite[duden] -\nocite[bornemann] -\nocite[kirschbaum] -\nocite[iso] -\nocite[aks] -\nocite[dintb] -\placepublications [criterium=all] - -\stoptext -% vim:ft=context diff --git a/doc/transliterator.tex b/doc/transliterator.tex new file mode 100644 index 0000000..d8e392c --- /dev/null +++ b/doc/transliterator.tex @@ -0,0 +1,897 @@ +\setuppapersize [A5] [A5] + +\definecolor [gutenred] [x=bf221f] % rubrication from digitized_Göttingen Gutenberg bible + +\setupinteraction [ + state=start, + color=gutenred, % rubricate, don’t viridificate + contrastcolor=gutenred, +] + +\setupcombinedlist[content][interaction=text,focus=standard] + +\setupindenting[yes,next,medium] + +%\showgrid +\setuphead[chapter][ + align=middle, + number=no, + style={\rm\tfa\setcharacterkerning[capitals]\WORD}, + before={\blank[5*line]}, + after={\blank[2*line,force]} +] + +\setuphead[section][ + align=middle, + number=no, + style={\rm\setcharacterkerning[capitals]\WORD}, + before={\blank[line,force]}, + after={\blank[line]} +] + +\setuphead[subsection][ + align=middle, + number=no, + style={\tf\sc\word}, + before={\blank[line,force]}, + after={\blank[line]} +] + +\setuplist[chapter][ + alternative=c, + interaction=text, + style={\word\sc}, +] +\setuplist[section,subsection][ + alternative=a, + style=\tfx\italic, + interaction=text, + margin=2em, + numberstyle=, + textstyle=, + numberstyle=\tfx, +] + +\setuplist[subsection][ + margin=4em, +] + +\setuplistalternative + +\definecharacterkerning [capitals] [factor=.05] + +\definefontfeature [default][default][ + protrusion=quality, + expansion=quality, + %mode=node, + script=latn, + onum=yes, + %dlig=yes, + liga=yes, +] + +\definefontfeature [smallcaps] [default] [smcp=yes] +\def\sc{\addff{smallcaps}\setcharacterkerning[capitals]} + +\setupbodyfontenvironment [default] [em=italic] + +\starttypescript [serif] [bukyvede] + \setups [font:fallback:serif] + \definefontsynonym [Serif] [name:Bukyvede] [features=default] + \definefontsynonym [SerifItalic] [name:Bukyvede-Italic] [features=default] +\stoptypescript +\usetypescript [bukyvede] +\definetypeface [hlaholice] [rm] [serif] [bukyvede] [default] [encoding=ec] +\definetypeface [cyrilice] [rm] [serif] [bukyvede] [default] [encoding=ec] +\definetypeface [lmstd] [rm] [serif] [latin-modern] [default] [encoding=texnansi] + +\usetypescriptfile[type-cmu] +\usetypescript[computer-modern-unicode] +\setupbodyfont[computer-modern-unicode,9pt] + +\usetypescript [serif] [hz] [highquality] +\setupalign [hanging,hz] + +\usemodule[bib] +\usemodule[transliterator] + +\setupcite[authoryear][compress=no] + +\setuppublications[% + alternative=apa,% + refcommand=authoryear,% + sorttype=bbl,% + numbering=yes,% + autohang=yes% +]% + +\setuppublicationlist[% + artauthor=\invertedauthor% +] + +% == REFERENCES =============================================================== + +\startpublication[ + k=aks, + t=book, + a={{Birnbaum/Schaeken}}, + y=1999, + n=4, + u=http://www.schaeken.nl/lu/research/online/publications/akslstud/index.htm, + s={Studien}, +] +\author[]{Henrik}[H.]{}{Birnbaum} +\author[]{Jos}[J.]{}{Schaeken} +\pubyear{1999} +\title{Altkirchenslavische Studien} +\volume{2} +\city{München} +\stoppublication + +\startpublication[ + k=bornemann, + t=book, + a={{Bornemann/Risch}}, + y=1978, + n=2, + s={Grammatik}, +] +\author[]{Eduard}[]{}{Bornemann} +\author[]{Ernst}[]{}{Risch} +\pubyear{1978} +\title{Griechische Grammatik} +\city{Frankfurt am Main} +\edition{2.} +\stoppublication + +\startpublication[ + k=bh, + t=book, + a={{Bringhurst}}, + y=2008, + n=4, + s={Bringhurst}, +] +\author[]{Robert}[R]{}{Bringhurst} +\pubyear{2008} +\title{The Elements of Typographic Style} +\edition{3.2} +\city{Point Roberts WA, Vancouver} +\stoppublication + +\startpublication[ + k=dintb, + t=book, + a={{DIN}}, + y=2001, + n=5, + s={DIN}, +] +\editor[]{}[]{}{DIN Deutsches Institut für Normung e.~V.} +\pubyear{2001} +\title{Bibliotheks und Dokumentationswesen} +\city{Berlin/Wien/Zürich} +\stoppublication + +\startpublication[ + k=duden, + t=book, + a={{Drosdowski/Müller/Scholze-Stubenrecht/Wermke}}, + y=1952, + n=1, + s={DUDEN}, +] +\editor[]{Günther}[]{}{Drosdowski} +\editor[]{Wolfgang}[]{}{Müller} +\editor[]{Werner}[]{}{Schulze-Stubenrecht} +\editor[]{Matthias}[]{}{Wermke} +\pubyear{1991} +\title{DUDEN Rechtschreibung der deutschen Sprache} +\city{Mannheim et al} +\edition{20.} +\stoppublication + +\startpublication[ + k=kirschbaum, + t=book, + a={{Kirschbaum}}, + y=2001, + n=3, + s={Grammatik}, +] +\author[]{Ernst Georg}[]{}{Kirschbaum} +\pubyear{2001} +\title{Grammatik der russischen Sprache} +\city{Berlin} +\stoppublication + +\startpublication[ + k=iso, + t=inbook, + a={{ISO}}, + y=1995, + n=6, + s={ISO~9}, +] +\editor[]{}[]{}{{{\sc iso} International Organization for Standardization}} +\pubyear{1995} +\title{Information and documentation -- Transliteration of Cyrillic characters into Latin characters -- Slavic and non-Slavic languages} +\edition{2.} +\crossref{dintb} +\pages{230--245} +\stoppublication + +%============================================================================== + +\setupframed[ + frame=off, + align=normal, + location=top, +] + +\defineframed[displayouter][ + location=top, + align={normal,verytolerant}, + frame=off, + style=\tfx, +] +\defineframed[displayinner][displayouter][ + offset=1ex, + width=.47\textwidth, +] + +\definenumber[excnt] +\setnumber[excnt][1] + +% This should rather be done using key-value args but I'm too lazy now. +% 1: mode; 2: hyphenate original; 3: hyphenate transliteration; +% 4: font for original; 5: caption; 6: original text. +\def\trlex#1#2#3#4#5#6{% + \setuplocalinterlinespace[line=8pt]% + \startplacefigure [ + location=force, + title={\type{[mode=#1,hyphenate=#3]}\hskip 1em{\italic #5}} + ]% + \displayouter{% + \displayinner{% + \setupbodyfont[#4]% + \tfx + %\setuptolerance[verytolerant, stretch] + \setuptolerance[verytolerant] + \unskip\language[#2]#6\par + }% + \displayinner{% + \tfx + \transliterate[mode=#1,hyphenate=#3]{#6\par}% + } + } + \stopplacefigure + \incrementnumber[excnt]% +} + +\defineframedtext[CenteredText][width=fit,frame=off,align=middle] + +\usemodule[int-load] +\loadsetups[t-transliterator.xml] + + +\setupwhitespace[medium] +\language[en] + +\starttext + +\setuppagenumbering[state=stop] + +\blank[3cm,force] + + +%\showframe +\startstandardmakeup[location=middle] + +\setuplayout[width=middle] +\raggedcenter +\vfill + {\setupbodyfont[19pt] + {\em The} + \blank [2*big] + {\tfc\sc transliterator} + \blank [2*big] + {\em for \CONTEXT} + \blank [9*big] + {\tfa\sc manual} + } +\vfill +\stopstandardmakeup + +\startstandardmakeup +\vfill +\framed [frame=off,topframe=on] {% +\tfxx\ss\setupinterlinespace[small]% +\startlines +The {\em Transliterator} module and mini-manual, +by Philipp Gesang, Radebeul. +Mail any patches or suggestions to + +{\tt philipp -dot- gesang -at- alumni -dot- uni-heidelberg -dot- de} +\useurl[me][https://phi-gamma.net] +\from[me]% +\stoplines +} +\stopstandardmakeup + +\setuppagenumbering[% + location=middle, + state=start, + style=\tfc +] + +\setuppagenumber[number=1] +\completecontent +\chapter{Usage and Functionality} +\section{Overview} +The Transliterator provides two commands: \type{\setuptransliterator} +preferably goes into the preamble and allows for global configuration. +The Transliterator is invoked locally by \type{\transliterate} which does the +actual transliteration of text passages. + +\setup{setuptransliterator} + +\setup{transliterate} + +\section{Loading and Configuring the Module} +In order to use the Transliterator in a document we put the following somewhere before +\type{\starttext}. +\starttyping +\usemodule[transliterator] +\stoptyping +Although it has some defaults already set at this point they will most likely +not correspond to what is needed in the document. +To override the presets we use the command \type{\setuptransliterator[#1]}. +It takes a comma separated list of two key-value pairs: \type{mode} and +\type{hyphenate}. +Through {\em mode} we specify the transliteration method. +By the time of this writing this can be one of the following set: + +\startplacetable[location=top,title=Transliteration modes.] + \tfx + \starttabulate[|l|p|] + \HL + \NC mode \NC description \NC\NR + \HL + \NC \type{all} \NC {\sc iso}~9 complete \NC\NR + \NC \type{bg_de} \NC Bulgarian, German „scientific“ transliteration\NC\NR + \NC \type{gr} \NC transliteration for Greek \NC\NR + \NC \type{gr_n} \NC transliteration for Greek obeying nasalizations \NC\NR + \NC \type{iso9_ocs} \NC == \type{all} plus non-{\sc iso} additions for Old (Church) Slavonic \NC\NR + \NC \type{ocs} \NC “scientific” transliteration for Old (Church) Slavonic\NC\NR + \NC \type{ocs_cz} \NC Czech transcription for Old (Church) Slavonic\NC\NR + \NC \type{ocs_gla} \NC “scientific” transliteration for Old (Church) Slavonic / Glagolitic alphabet\NC\NR + \NC \type{ru} \NC {\sc iso}~9 Russian \NC\NR + \NC \type{ru_cz} \NC Czech transcription for Russian\NC\NR + \NC \type{ru_old} \NC {\sc iso}~9 Russian plus pre-1918 chars (the default)\NC\NR + \NC \type{ru_transcript_de} \NC German transcription for Russian \NC\NR + \NC \type{ru_transcript_en} \NC English transcription for Russian \NC\NR + \NC \type{sr_tocy} \NC Serbian, Latin to Cyrillic \NC\NR + \NC \type{sr_tolt} \NC Serbian, Cyrillic to Latin \NC\NR + \HL + \stoptabulate +\stopplacetable + + +{\em Nota bene}: The description at this point only serves as a placeholder as the +transliteration modes are discussed in detail later in this document. + +Through the \type{hyphenate} argument it is possible to adjust the language +that is used for hyphenation. +Specifying \type{\setuptransliterator[hyphenate=nl]} will let every transliterated +part of the document be processed according to dutch rules, leaving the overall +\type{\language[#1]} configuration unchanged for the rest of the content. + +Another argument, \type{deficient_font} can be used in +combination with the modes \type{all}, \type{ru_old} and +\type{iso9_ocs}. It lets you circumvent the deficiency that some +fonts show concerning the characters that {\sc iso}~9 assigns to +cyrillic “ь” and “ъ”. Set it to {\em true} to enable it. + +The actual transliteration is done using the macro +\type{\transliterate[#1]} \type{{#2}}. +The second argument takes the raw string in the original language that we want +to process, while the first, optional argument accepts local adjustments for +\type{mode} and \type{hyphenate}. +Thus, we would typeset one of Epicuros' sayings like this: +{\setuptolerance[verytolerant] +\starttyping +\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν + μετὰ ἀνάγκης} +\stoptyping +\noindentation which yields \quotation{\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν +μετὰ ἀνάγκης}} in the {\sc pdf} output. +} +Alternatively there is an environment, \type{\starttransliterate[#1]}, as well, +that takes the same arguments. + +There are two special switches for the {\em Serbian} patterns, +\type{hinting} and \type{sr_exceptions}, allowing for a little +more fine-tuning. +If activated, hinting provides the special character “\type{*}” as +a means to indicate positions, where the sequences “lj” and “nj” +are to be treated as separate consonants. +E.~g. \type{\transliterate[mode=sr_tocy]{in*jekcija}} is +correctly transliterated as \transliterate[mode=sr_tocy]{in*jekcija}, +and not \transliterate[mode=sr_tocy,sr_exceptions=no]{injekcija}. +Likewise, further exceptions that are internally represented as +a lookup table can be toggled off or on by the +\type{sr_exceptions} switch. +This pertains to words like “nadživeti” (result: \transliterate[mode=sr_tocy]{nadživeti}) +but may lead to accidental false positives in cases that the +module author didn’t foresee. +By default both hinting and lexical exceptions are set to +\type{yes}. + +For orientation purposes the Transliterator comes with two macros that allow +for closer inspection of the internal tables. +\type{\showOneTranslitTab{#1}} outputs, obviously, a single table; their +identifiers +can be found in the \type{trans_} +\type{tables_*.lua} files in the transliterator +directory. +The lazy alternative is \type{\showTranslitTabs} which prints all registered +tables in a row nicely formatted as indexable sections. +(Be warned, this may take some time.) + +\chapter{Introduction} + +\hfil\framed[width=\hsize,align=left]{% + \inframed[bottomframe=on]{\it What's all this, then?} + \blank[medium] + {\sc Graham Chapman} +} +\blank[2*big] + +\noindentation At the first glance, {\em transliteration} -- the accurate representation of letters from one +alphabet in another -- seems obsolete after the advent of Unicode +which made its way even into \TeX\ lately. +Why not just go on and write down everything in the original script? +But still there are lots of situations where transliteration is desirable, +e.~g. some scholarly habits might prescribe it in the main text with citations in +footnotes left in the original alphabet; or transliteration might alleviate +comparison within one language that happens to be written in different scripts; +finally, including text in a foreign script might be impossible if there is no +appropriate font which fits the main text. +However, it is still most convenient for the writer to keep the +untransliterated original in the document source as this allows for reusing it in +another context where different transliterations rules might apply. +The Transliterator module is meant to provide both: have the original in the +source and a transliteration only in the final document. + +Another way of handling foreign languages is {\em transcription}. +It aims at producing some representation that does not rely on symbolisms +alien to the language and thus to be at least \quotation{pronouncable} +without further know\-ledge. +As transcription methods are language specific and highly idiosyncratic they +complicate the restoration of the original phrase because information may be lost. +The Transliterator provides means of transcription as well but in most cases +you should refrain from using them (\type{[mode=ru_transcript_en]}, +\type{[mode=ru_transcript_de]}). + +For Cyrillic scripts the best quality is achieved using the standardized +transliteration according to {\sc iso~9}.\footnote{\cite[authoryear][iso].} +This method not only covers all contemporary languages that are written in +a variety of Cyrillic but provides a bijective mapping on latin characters as +well. +Consequently, you can unambiguously revert the transliteration into +its original form which was impossible with previous versions of {\sc +iso}~9 because +they contained several exceptions depending on the original language. +Although fifteen years old it has not yet made its way into scholarly +publications at large so it might not immediately look familiar.\footnote{ + A hasty glance at the latest issues of around 20~journals in a local library + revealed that 2~of them actually are using {\sc iso}~9, these are {\em Przegląd + wschodni} as of Nr. X, 3 (2008) and {\em Kwartalnik historyczny} as of CXVI, + 3 (2009); the latter even contains a table on p.~218 showing a subset of the + {\sc iso}~9 transliteration rules. +} +The diacritics are not identical to the \quotation{scientific} +transliteration used in Slavic studies but as long as your editor does not +enforce its traditional method you should always prefer {\sc iso}~9 +(\type{[mode=ru]}, \type{[mode=ru_old]}, \type{[mode=all]}). + +But {\sc iso}~9, too, has its shortcomings. +It has no definitions for historical forms of the cyrillic script like +pre-XVIII-century Russian and Old (Church) Slavonic while those are covered by +the scholarly transliterations. +To amend the situation the Transliterator provides an extension to {\sc +iso}~9 for +Old Slavonic containing the glyphs +\startluacode +local translit = thirddata.translit +environment.loadluafile("trans_tables_scntfc") +local cnt, len = 0, 0 +for i,j in pairs(translit.ocs_add_low) do + len = len + 1 +end + +for k,v in pairs(translit.ocs_add_low) do + cnt = cnt + 1 + context.bgroup() + context.setupbodyfont({"cyrilice"}) + context(k) + context.egroup() + if cnt < len -1 then + context(", ") + elseif cnt < len then + context("\\ and ") + end +end +\stopluacode +\ taken from the scientific transliteration (\type{[mode=iso9_ocs]}). +If you prefer more coherency you might want to use pure \quotation{scientific} +transliteration (\type{[mode=ocs]}). +This method is complemented by \type{[mode=ocs_gla]}, the only option the +Transliterator offers for the Glagolitic alphabet; they can be used consistently +along each other as they were taken from the same +book.\footnote{\cite[authoryear][aks] p.~77 \cite[url][aks].} + +As far as I know there is no standardized transliteration for Greek so I had to +resort to the one that is used in scholarly literature. +Its main drawback is that it has no representation for diacritics apart from +(rough) breathing, but it respects specific rules for diphthongs and vowels in +initial positions (\type{[mode=gr]}). +There is one alternative mode for those who prefer their {\em γ} phonetically +resolved to /{\em n}/ before velars ({\em γ}, {\em κ}, {\em χ} and {\em ξ}; +\type{[mode=gr_n]}). + +Concerning the hyphenation within transliterated passages the default is set to +to \type{[hyphenate=cs]} (Czech) which produces reasonable results when using +\type{all}, \type{iso9_ocs} or \type{ru_cz}. +For stuff like the English and German transcription use their respective native +hyphenation.\footnote{% + You'll have to specify this through \type{\setuptransliterator} + or locally because the default hyphenation is {\em not} the same as your + documents'. +} +However, as there is no hyphenation pattern I know of that closely resembles the +transliteration of Greek you might have to resort to putting \type{\discretionary} +hyphens when line breaking does not satisfy. + +The Transliterator as a whole is nothing more than a bunch of dictionaries +containing substitution rules for tokens that may occur in the text. +These tokens may be single characters or strings of more than one character. +As there is no simple way to impose order onto those dictionaries the rules for +one transliteration method are, if needed, distributed over more than one table +which will be applied successively to ensure that multi-character rules +are processed first. + + +\setupfloats[spacebefore=small,spaceafter=small] +\startplacetable[location=left,title={ + Processing time for corpus {\language[cs]Evgenij Onegin} according to + GNU time(1) and the \CONTEXT\ stats. +}] + \starttabulate[|l|cg(.)|cg(.)|] + \HL%····················································% + \NC mode \NC time(1) in $s$ \NC \CONTEXT \NC \NR + \NC \NC 8.98 \NC 8.82 \NC \NR + \NC \type{all} \NC 8.37 \NC 8.25 \NC \NR + \NC \type{ru_cz} \NC 8.61 \NC 8.48 \NC \NR + \NC \type{ru_transcript_en} \NC 9.26 \NC 9.10 \NC \NR + \NC \type{ru_transcript_de} \NC 14.83 \NC 14.71 \NC \NR + \HL%····················································% + \stoptabulate +\stopplacetable +\setuptolerance[tolerant] +Following suggestions from the mailing list, the Transliterator uses {\em LPeg} +when substituting. +This means a huge speed improvement for most substitution modes when compared +to the older mechanism that used \type{string.gsub} iteratively. +In ordinary use when transliterating single words or short phrases the +Transliterator should have little impact on document processing time at large, +with the exception of the German transcription mode, perhaps.\footnote{ + The problem lies within the rule set for the German transcription which + dictates different instructions depending on the environment of a character; + these may conflict, i.~e. it is impossible to substitute a character stream + in a single run as some rules may apply only to the result of previous rule. + Let me know if there's a way to tell LPeg to backtrack to the last character + of a match and not to continue on the next. +} +Transliterating (and typesetting in MKIV) \transliterate{Александр Пушкин}'s verse novel +\transliterate{Евгений Онегин}, a corpus of about 27000 words, in +\type{[mode=all]} shows little to no delay at all. +In fact, typesetting cyrillic letters with russian hyphenation seems slow +things down so much that transliteration may be faster and uses slightly less +memory.\footnote{% + On an IBM T43: \tt 2.6.32-ARCH \#1 SMP PREEMPT Tue Feb 9 14:46:08 UTC 2010 + i686 Intel(R) Pentium(R) M processor 1.60GHz GenuineIntel GNU/Linux. +} + + + + +\chapter[ex]{Examples} +\section{Cyrillic scripts} +\subsection{{\sc iso}~9 and derivatives} + +Several transliteration rules are either strictly {\sc iso}~9 compliant +(\type{ru}, \type{ru_old}, \type{all}) or contain {\sc iso}~9 as a +subset (\type{iso9_ocs}).\footnote{% + Unfortunately \CONTEXT\ still lacks language files for some of them + so please excuse the inadequate hyphenation in these cases.% +} + +\trlex{ru}{ru}{cs}{computer-modern-unicode}{% + Transliteration rules for the contemporary russian alphabet.% +}{% + В~ворота гостиницы губернского города NN въехала довольно красивая рессорная + небольшая бричка, в~какой ездят холостяки: отставные подполковники, + штабс-капитаны, помещики, имеющие около сотни душ крестьян, — словом, все те, + которых называют господами средней руки. + В~бричке сидел господин, не красавец, но и~не дурной наружности, ни слишком + толст, ни слишком тонок; нельзя сказать, чтобы стар, однако ж~и~не так чтобы + слишком молод. +} + +\trlex{ru_old}{ru}{cs}{computer-modern-unicode}{% + With aditional characters for pre-1981 Russian orthography (100~per + cent {\sc iso}~9).% +}{% + А~сведется віра, убьютъ сотцкого в~селѣ, ино тебѣ взяти полтіна, а~не + сотцкого, + ино четырѣ гривны, а~намъ віръ не таити в~Новѣгородѣ; а~о~убіствѣ віръ нѣтъ. + А~что волости, честны король, новгородцкіе, ино тебѣ не держати своими мужи, + а~держати мужми новогородцкими. + А~что пошлина в~Торжку и~на Волоцѣ, тівунъ свои держати на своеи чясті, + а~Новугороду на своеи чясти посадника держаті. + А~се волости новогородцкіе: Волокъ со всѣми волостми, Торжокъ, Бѣжіці, + Городець + Палець, Шіпинъ, Мелеця, Егна, Заволочье, Тиръ, Пермь, Печера, Югра, Вологда + с~волостмі. +} + +\trlex{all}{ru}{cs}{computer-modern-unicode}{% + The complete cyrillic mapping from {\sc iso}~9; transliterating Belarusian.% +}{% + Беларуская мова, мова беларусаў, уваходзіць у~сям’ю індаеўрапейскіх моў, яе + славянскай групы і~ўсходнеславянскіх моваў падгрупы, на якой размаўляюць + у~Беларусі і~па ўсім свеце, галоўным чынам у~Расіі, Украіне, Польшчы. + Б.~м. падзяляе шмат граматычных і~лексічных уласцівасцяў з~іншымі + ўсходнеславянскімі мовамі (гл. таксама: Іншыя назвы беларускай мовы і~Узаемныя + ўплывы усходнеславянскіх моваў). +} + +\trlex{all}{uk}{cs}{computer-modern-unicode}{% + The complete cyrillic mapping from {\sc iso}~9; transliterating Ukrainian.% +}{% + Украї́нська мова (застарілі назви -- руська мова, проста мова […]) -- + слов'янська мова, державна в~Україні та одна з~трьох «офіційних мов на рівних + засадах» у~не\-ви\-зна\-ній Придністровській Молдавській Республіці. + За різними оцінками загалом у~світі українською мовою говорить від 41~млн. + до 45~млн. осіб, вона входить до третього десятка найпоширеніших мов + світу. +} + +\trlex{all}{ru}{cs}{computer-modern-unicode}{% + The complete cyrillic mapping from {\sc iso}~9; transliterating Serbian.% +}{% + Српски језик је један од словенских језика из породице индоевропских језика. + Први писани споменици у~српској редакцији старословенског језика потичу из XI + и~XII века. + Српски језик је стандардни језик у~службеној употреби у~Србији, Босни + и~Херцеговини и~Црној Гори, а~у~употреби је и~у другим земљама гдје живе + Срби, међу осталима и~у~Хрватској. +} + +\trlex{iso9_ocs}{ru}{cs}{cyrilice}{% + Transliteration rules according to {\sc iso}~9 with additions for Old (Church) + Slavonic.% +}{% + Что сѧ дѣѥтѣ по вѣремьнемь~: то ѿидето по вѣрьмьнемь~: приказано бѹдѣте + добрымъ людѣмъ~: а любо грамотою ѹтвѣрдѧть~: како то бѹдѣте всемъ вѣдомъ~: + или кто посль живыи ѡстанѣть сѧ~: того лѣт͠ коли алъбрахтъ~: влд͠ка ризкии + ѹмьрлъ~: ѹздѹмалъ кнѧзѣ смольнескыи~: мьстиславъ~: двд͠въ сн͠ъ~: прислалъ въ + ригѹ своѥго лѹчьшего попа~: ѥрьмея~: и съ нимь ѹмьна мѹжа пантелья~: + исвоѥго горда смольнеска~: та два была послъмь ѹ ризѣ~: из ригы ѥхали на + гочкыи берьго~: тамо твердити миръ~: +} + +\subsection{“Scientific” transliteration} +These transliterations are widely used among scholars, mainly linguists and, to +a lesser extent, historians. +They comprise large character sets in order to represent the original text +adequately and facilitate comparison of texts of the same language written in +different scripts; they are not, however, as easily reversible as {\sc +iso}~9. + +\trlex{ocs}{ru}{cs}{cyrilice}{% + Transliteration for Old Slavonic used in Slavic studies, taken from the + excellent book of \cite [authoryear][aks].\footnote{% + This one and both of the following Czech transliterations, although + elegantly dealing with hard and weak signs by taking characters from the + Cyrillic alphabet, are not unquestioned from a typographical point of + view: + \quotation{If contrasting faces are used for phonetic transcriptions and + main text, each entire phonetic word or passage, not just the individual + phonetic characters, should be set in the chosen phonetic face. Patchwork + typography, in which the letters of a single word come from different faces + and fonts, is a sign of typographic failure. […] + Such mixtures are almost sure to fail unless all the fonts involved have + been designed as a single family.} + (\cite [authoryear][bh]) + From this follows that it is advisably to reconsider your font whether it indeed + provides the needed glyphs from Russian as well. + }% +}{% + Се начнемъ повѣсть сию. + По потопѣ . первиє снве Ноєви . раздѣлиша землю . Симъ . Хамъ . Афетъ . и~ꙗсѧ + въстокъ . Симови Персида . Ватрь . тоже и~до Индикиꙗ в~долготу и~в~ширину [и + до Нирокоуриа] ꙗкоже рещи ѿ въстока и~до полуденьꙗ . и~Суриꙗ . + и~Индиа по Єфратъ рѣку . Вавилонъ . Кордуна . Асурѧне . Мисопотамира . + Аравиꙗ . старѣишаꙗ . Єлмаисъ . Инди . Равиꙗ . на всѧ Д. +} + +\trlex{ru_cz}{ru}{cs}{computer-modern-unicode}{% + Czech phonetic transcription for contemporary Russian.% +}{% + Прошло семь лет после 12-го года. Взволнованное историческое море Европы + улеглось в свои берега. Оно казалось затихшим; но таинственные силы, + двигающие человечество (таинственные потому, что законы, определяющие их + движение, неизвестны нам), продолжали свое действие. + Несмотря на то, что поверхность исторического моря казалась неподвижною, так + же непрерывно, как движение времени, двигалось человечество. Слагались, + разлагались различные группы людских сцеплений; подготовлялись причины + образования и~разложения государств, перемещений народов.% +} + +\trlex{ocs_cz}{ru}{cs}{cyrilice}{% + Czech phonetic transcription for Old Slavonic (superset of the corresponding + Russian transcription). +}{% + Убьеть мужь мужа, то мьстить брату брата, или сынови отца, любо отцю сына, + или братучаду, любо сестрину сынови; аще не будеть кто мьстіѧ, то 40 гривенъ + ꙁа голову; аще будеть русинъ, любо гридинъ, любо купчина, любо іѧбетник, любо + мечникъ, аще иꙁъгои будеть, любо словенинъ, то 40 гривенъ положити ꙁа нь. +} + +\subsection{Serbian} +The tables for converting Serbian text between Cyrillic and Latin +alphabets are \type{sr_tolt} and \type{sr_tocy}. +\trlex{sr_tolt}{sr}{hr}{computer-modern-unicode}{% + Transliteration ћирилица \rightarrow\ латиница.% +}{% + Српски језик је један од словенских језика из породице + индоевропских језика. Први писани споменици у српској редакцији + старословенског језика потичу из XI и XII века. + + Српски језик је стандардни језик у службеној употреби у Србији, + Босни и Херцеговини и Црној Гори, а у употреби је и у другим + земљама где живе Срби, међу осталима и у Хрватској.% +} + +\trlex{sr_tocy}{hr}{sr}{computer-modern-unicode}{% + Transliteration latinica \rightarrow\ ćirilica.% +}{% + Srpski jezik je jedan od slovenskih jezika iz porodice + indoevropskih jezika. Prvi pisani spomenici u srpskoj + redakciji staroslovenskog jezika potiču iz XI i XII veka. + + Srpski jezik je standardni jezik u službenoj upotrebi u Srbiji, + Bosni i Hercegovini i Crnoj Gori, a u upotrebi je i u drugim + zemljama gde žive Srbi, među ostalima i u Hrvatskoj.% +} + +\subsection{Bulgarian} + +\trlex{bg_de}{bg}{cs}{computer-modern-unicode}{% + German scientific transliteration for Bulgarian (based on old {\sc + iso}~9 standard).% +}{% + Българският език е индоевропейски език от групата на + южнославянските езици. Той е официалният език на Република + България и един от 23-те официални езика на Европейския съюз. +} + +\subsection{Legacy national transcriptions} +At the moment there are tables for “old school” transcription into three +languages: English (via \type{ru_transcript_en}), German +(\type{ru_transcript_de}) and Czech (\type{ocs_cz}). +At least the German one is almost unreadable if used with +strings longer than two words. +As we have the bijective {\sc iso}~9 mapping at hand there should be no reason at all +to use any of them. + +\trlex{ru_transcript_en}{ru}{en}{computer-modern-unicode}{% + English transcription for contemporary Russian.% +}{% + Прошло семь лет после 12-го года. Взволнованное историческое море Европы + улеглось в свои берега. Оно казалось затихшим; но таинственные силы, + двигающие человечество (таинственные потому, что законы, определяющие их + движение, неизвестны нам), продолжали свое действие. + Несмотря на то, что поверхность исторического моря казалась неподвижною, так + же непрерывно, как движение времени, двигалось человечество. Слагались, + разлагались различные группы людских сцеплений; подготовлялись причины + образования и~разложения государств, перемещений народов.% +} + +\trlex{ru_transcript_de}{ru}{deo}{computer-modern-unicode}{% + German transcription for contemporary Russian.\footnote{% + Following \cite[authoryear][duden] p.~82; all the canonical rules are + implemented save one: {\em -его} and {\em -ого} should resolve to {\em + -ewo} and {\em -owo} respectively iff genitive endings. + As this is a grammatical rather than graphetical criterion writing a + substitution algorithm would amount to do natural language parsing. + To make things worse this rule is phonetically confused as it would not + take care of other contexts where {\em г} in those patterns is articulated + as /{\em v}/ like for instance in {\em сегодня} (which is a historical + genitive, though …). + So even if this could be implemented it would not be advisable to use such + a rule.% + }% +}{% + Прошло семь лет после 12-го года. Взволнованное историческое море Европы + улеглось в свои берега. Оно казалось затихшим; но таинственные силы, + двигающие человечество (таинственные потому, что законы, определяющие их + движение, неизвестны нам), продолжали свое действие. + Несмотря на то, что поверхность исторического моря казалась неподвижною, так + же непрерывно, как движение времени, двигалось человечество. Слагались, + разлагались различные группы людских сцеплений; подготовлялись причины + образования и~разложения государств, перемещений народов.% +} + +\section{Glagolitic} +\trlex{ocs_gla}{ru}{cs}{hlaholice}{% + “Scientific” transliteration for Old Slavonic written in the Glagolitic + alphabet as used in \cite[authoryear][aks].% +}{% + [ⰲⰾ] + ⰰⰴⱏⰻⰽⱁ ⱍⰽ҃ⱏ ⱄⰻ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸ[ⱁⱃⰻⱅ] + ⰻ ⰸⰰⰽⱁⱀⱏ ⰿⰰⱀⰰⱄⱅⱏⰻⱃⱏⱄⰽⰻ: [ⰻⰶⰵ] + ⱅⱏⰻ ⱆⱄⱅⰰⰲⰻ჻ Ⱃⰵⱍⰵ ⰶⰵ ⰻⰳⱆⰿ[ⱏ] [ⱀⱏ] + ⰽⰰⰽⱁ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸⱁⱃⰻⱅⰻ ⰸⰰⰽ[ⱁⱀⱏ] + [.] [ⰰ] ⰵⱄⱅⱏ· ⱍⱃⱏⰲⰻ⁖ ⰻ [ⰿ] [..........] + [..] ⰿⱏ ⱀⰵ ⰿⱁⰶⰵⰿⱏ ⱄⰵⰳⱁ ⱅⱃⱏⱂⱑⱅ[ⰻ] + [ⰴⰰ] ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⰻⰿⱑⰻ ⱄⱏⰴⱑ჻ ⰰ ⰿⱏⰻ ⱁ + [ⱅⰻ]ⰴⰵⰿⱏ: ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⱂⱆⱄⱅⰻ: ⰴⰰ ⱁⱅ + [ⰻⰴ]ⰵⱅⱏ ⰻⰶⰵ ⰵⱄⱅⱏ ⱂⱃⰻⱎⱏⰾⱏ: ⱄ[ⰵ] +} + +\section{Greek} +The Transliterator offers two modes for handling Greek: \type{gr} and +\type{gr_n}. +They differ only on one aspect. +\type{gr} transliterates the canonical Greek alphabet as well as the +special glyphs Digamma, Quoppa and Sampi. +\type{gr_n} behaves exactly the same way except that nasalization is observed +such that \type{γ+[γ|κ]} yields \type{n+[g|k]}. + +\trlex{gr}{agr}{de}{computer-modern-unicode}{% + Transliteration for Greek -- standard. +}{% + οἴνῳ δὲ κάρτα προσκέαται, καί σφι οὐκ ἐμέσαι ἔξεστι, οὐκὶ οὐρῆσαι ἀντίον + ἄλλου. + ταῦτα μέν νυν οὕτω φυλάσσεται, μεθυσκόμενοι δὲ ἐώθασι βουλεύεσθαι τὰ + σπουδαιέστατα τῶν πρηγμάτων: τὸ δ᾽ ἂν ἅδῃ σφι βουλευομένοισι, τοῦτο τῇ + ὑστεραίῃ νήφουσι προτιθεῖ ὁ στέγαρχος, ἐν τοῦ ἂν ἐόντες βουλεύωνται, καὶ ἢν + μὲν + ἅδῃ καὶ νήφουσι, χρέωνται αὐτῷ, ἢν δὲμὴ ἅδῃ, μετιεῖσι. τὰ δ᾽ ἂν νήφοντες + προβουλεύσωνται, μεθυσκόμενοι ἐπιδιαγινώσκουσι. +}% + +\trlex{gr_n}{agr}{de}{computer-modern-unicode}{% + Transliteration for Greek -- alternative respecting nasalization. +}{% + ταῦτα καὶ νεωτέρῳ καὶ πρεσβυτέρῳ ὅτῳ ἂν ἐντυγχάνω ποιήσω, καὶ ξένῳ καὶ ἀστῷ, + μᾶλλον δὲ τοῖς ἀστοῖς, ὅσῳ μου ἐγγυτέρω ἐστὲ γένει. +}% + + +\chapter{References} +%\cite[authoryear][iso] +\nocite[duden] +\nocite[bornemann] +\nocite[kirschbaum] +\nocite[iso] +\nocite[aks] +\nocite[dintb] +\placepublications [criterium=all] + +\stoptext +% vim:ft=context diff --git a/scripts/context/lua/third/transliterator/mtx-t-transliterate.lua b/scripts/context/lua/third/transliterator/mtx-t-transliterate.lua deleted file mode 100644 index a6e3968..0000000 --- a/scripts/context/lua/third/transliterator/mtx-t-transliterate.lua +++ /dev/null @@ -1,64 +0,0 @@ --- --------------------------------------------------------------------------------- --- FILE: mtx-transliterate.lua --- USAGE: mtxrun --script transliterate [--mode=mode] --s="string" --- DESCRIPTION: context script interface for the Transliterator module --- REQUIREMENTS: latest ConTeXt MkIV --- AUTHOR: Philipp Gesang (Phg), --- CREATED: 2011-06-11T16:14:16+0200 --------------------------------------------------------------------------------- --- - -environment.loadluafile("transliterator") - -local translit = thirddata.translit - -translit.__script = true -scripts = scripts or { } -scripts.transliterate = { } -local ea = environment.argument - -local helpinfo = [[ -=============================================================== - The Transliterator module, command line interface. - © 2010--2011 Philipp Gesang. License: 2-clause BSD. - Home: -=============================================================== - -USAGE: - - mtxrun --script transliterate [--mode=mode] --s="target" - - Where “target” is the target string to be transliterated. - Optionally, a transliteration mode can be specified (see - the respective descriptions in transliterator.pdf). The - “mode” defaults to “ru_old”. - -=============================================================== -]] - -local application = logs.application { - name = "mtx-transliterate", - banner = "The Transliterator for ConTeXt, hg-rev 38+", - helpinfo = helpinfo, -} - -scripts.transliterate.input = ea("s") -scripts.transliterate.out = function (sin, sout) - if ea("silent") then - io.write(sout) - else - io.write(string.format("\n“%s” -> “%s”\n", sin, sout)) - end -end - -if scripts.transliterate.input then - local mode = ea("mode") or "ru_old" - scripts.transliterate.out( - scripts.transliterate.input, - translit.transliterate(mode, ea("s")) - ) -else - application.help() -end - diff --git a/scripts/mtx-t-transliterate.lua b/scripts/mtx-t-transliterate.lua new file mode 100644 index 0000000..a6e3968 --- /dev/null +++ b/scripts/mtx-t-transliterate.lua @@ -0,0 +1,64 @@ +-- +-------------------------------------------------------------------------------- +-- FILE: mtx-transliterate.lua +-- USAGE: mtxrun --script transliterate [--mode=mode] --s="string" +-- DESCRIPTION: context script interface for the Transliterator module +-- REQUIREMENTS: latest ConTeXt MkIV +-- AUTHOR: Philipp Gesang (Phg), +-- CREATED: 2011-06-11T16:14:16+0200 +-------------------------------------------------------------------------------- +-- + +environment.loadluafile("transliterator") + +local translit = thirddata.translit + +translit.__script = true +scripts = scripts or { } +scripts.transliterate = { } +local ea = environment.argument + +local helpinfo = [[ +=============================================================== + The Transliterator module, command line interface. + © 2010--2011 Philipp Gesang. License: 2-clause BSD. + Home: +=============================================================== + +USAGE: + + mtxrun --script transliterate [--mode=mode] --s="target" + + Where “target” is the target string to be transliterated. + Optionally, a transliteration mode can be specified (see + the respective descriptions in transliterator.pdf). The + “mode” defaults to “ru_old”. + +=============================================================== +]] + +local application = logs.application { + name = "mtx-transliterate", + banner = "The Transliterator for ConTeXt, hg-rev 38+", + helpinfo = helpinfo, +} + +scripts.transliterate.input = ea("s") +scripts.transliterate.out = function (sin, sout) + if ea("silent") then + io.write(sout) + else + io.write(string.format("\n“%s” -> “%s”\n", sin, sout)) + end +end + +if scripts.transliterate.input then + local mode = ea("mode") or "ru_old" + scripts.transliterate.out( + scripts.transliterate.input, + translit.transliterate(mode, ea("s")) + ) +else + application.help() +end + diff --git a/src/t-transliterator.mkii b/src/t-transliterator.mkii new file mode 100644 index 0000000..2fcfb0b --- /dev/null +++ b/src/t-transliterator.mkii @@ -0,0 +1,3 @@ +% Suggested by Taco +\message{Module is unsupported under mkii} +\endinput diff --git a/src/t-transliterator.mkiv b/src/t-transliterator.mkiv new file mode 100644 index 0000000..ae08278 --- /dev/null +++ b/src/t-transliterator.mkiv @@ -0,0 +1,188 @@ +%D \module +%D [ file=t-transliterator, +%D version=2021-11-21 18:19:24+0100, +%D title=\CONTEXT\ User Module, +%D subtitle=The Transliterator, +%D author=Philipp Gesang, +%D date=\currentdate, +%D copyright=Philipp Gesang, +%D license=2-clause BSD, +%D email={pgesang at ix dot urz dot uni-heidelberg dot de}] +%D This module is licensed under the conditions of the BSD license with +%D two clauses, there is a copy it in a file named "COPYING" in the +%D transliterator source tree. + +\writestatus{loading}{Transliteration from non-Latin scripts} + +\unprotect + +\definenamespace [TRL] [ + name=transliterate, + type=module, + setup=list, + parent=TRL, + style=no, + version=2021, + comment=Transliteration from non-Latin scripts., +] + +\ctxlua{environment.loadluafile ("transliterator")} + +%D Use the Transliterator by adding \type{\usemodule[transliterator]} somewhere +%D before \type{\starttext}. Adjust the Transliterator through the +%D \type{\setuptransliterate} command. As a first argument it accepts a set of +%D key-value options; at present you may configure \type{mode} and +%D \type{hyphenate}. + +\def\set_serbian_exceptions{% + \doifelse{\transliterateparameter{sr_exceptions}}\v!yes + {\ctxlua{thirddata.translit.sr_except = true}} + {\ctxlua{thirddata.translit.sr_except = false}}% +} + +\def\set_hinting{% + \doifelse{\transliterateparameter{hinting}}\v!yes + {\ctxlua{thirddata.translit.hinting = true}} + {\ctxlua{thirddata.translit.hinting = false}}% +} + +\appendtoks \set_serbian_exceptions \to \everysetuptransliterate +\appendtoks \set_hinting \to \everysetuptransliterate + +%D At first we'll set some defaults: + +\setuptransliterate[% + debug=\v!false, + hinting=\v!yes, + hyphenate=cz, + mode=ru_old, + sr_exceptions=\v!yes, + deficient_font=\v!no, +] + +%D Possible values for \type{mode} are by the time of this writing: +%D \type{ru}, \type{ru_transcript_de}, \type{ru_transcript_en}, \type{ru_old}, +%D \type{all}, \type{iso9_ocs}, \type{ocs}, \type{ocs_gla}, \type{ru_cz}, +%D \type{ocs_cz}, \type{gr} and \type{gr_n}. +%D As not all fonts, even the expensive ones, support some of the most frequent +%D unicode signs used in ISO~9, there are fallbacks for the transliterations of +%D the weak and hard sign. +%D They work with the modes \type{iso9_ocs}, \type{all} and +%D \type{ru_old} only and can be triggered by setting the +%D variable \type{deficient_font} to the value {\em yes}. +%D This will transliterate {\em ь} and {\em ъ} (both upper and +%D lower case) to the more common, but non-ISO characters {\em ’} and {\em ”} +%D respectively. +%D Possible values for \type{hyphenate} are all valid \CONTEXT\ language code, for an +%D overview see \type{http://wiki.contextgarden.net/Language_Codes}. +%D In praxi you may want to choose either Czech (the default) or Slovak +%D (\type{sk}) for most transliterations from cyrillic scripts. I've not yet +%D made up my mind concerning Greek transliteration, any suggestions are +%D welcome. + +%D The following will help debugging and reviewing tables. Make sure your +%D typescript can handle the characters, in general it's no use with Latin +%D Modern which unfortunately provides only a restricted set of the unicode +%D range. +%D +%D The user-level command to output a single substitution table is +%D \type{\showOneTranslitTab{#1}}. + +\define[1]\showOneTranslitTab{% + \startluacode + environment.loadluafile ("trans_tables_iso9") + environment.loadluafile ("trans_tables_trsc") + environment.loadluafile ("trans_tables_scntfc") + environment.loadluafile ("trans_tables_trsc") + environment.loadluafile ("trans_tables_glag") + environment.loadluafile ("trans_tables_gr") + thirddata.translit.gen_rules_en() + thirddata.translit.gen_rules_de() + thirddata.translit.show_tab(translit["\luaescapestring{#1}"]) + \stopluacode +} + +%D The user-level command to output all defined tables is +%D \type{\showTranslitTabs}. + +\define\showTranslitTabs{% + \ctxlua{thirddata.translit.show_all_tabs()}% +} + +\def\translitDebug#1{% + \doif{\transliterateparameter{debug}}{yes}{% + {\ss\inmargin{\ctxlua{thirddata.translit.debug_next()}} #1}% + }% +} + + +%D The user-level command \type{\transliterate[#1]{#2}} does the job of +%D switching to a given language (for hyphenation) and adjusting the +%D substitution method locally. It takes an optional list \type{[#1]} of +%D key-value arguments to allow ad-hoc specification of either two that deviate +%D from the defaults set initially by means of \type{\setuptransliterate}. +%D +%D Internally, \type{\dotransliterate} is called according to the \CONTEXT\ +%D coding style and in case the user provides \type{hyphenate=} or +%D \type{mode=} those will be used instead of the globals. Note that this +%D leaves the latter unchanged. Thus, in order to permanently switch to +%D another transliteration style the user would have to set it by calling +%D \type{\setuptransliterate} again. +%D +% All credits for rewriting the TeX code go to Wolfgang as well. +% http://www.ntg.nl/pipermail/ntg-context/2010/047816.html + +\def\dotransliterate[#1]#2{% + \bgroup\iffirstargument + \setuptransliterate[#1]% + \fi + \language[\transliterateparameter{hyphenate}]% + \ctxlua{ + thirddata.translit.deficient_font = "\transliterateparameter{deficient_font}" + thirddata.translit.transliterate("\transliterateparameter{mode}","\luaescapestring{#2}") + }% + \egroup% +} + +\unexpanded\def\transliterate{\dosingleempty\dotransliterate} + +\def\expandabletransliterate#1{% + %\bgroup + %\setuptransliterate[#1]% + %\language[\transliterateparameter{hyphenate}]% + \ctxlua{ + thirddata.translit.deficient_font = "\transliterateparameter{deficient_font}" + thirddata.translit.transliterate("\transliterateparameter{mode}","#1") + }% + %\egroup% +} + +\unexpanded\def\starttransliterate{% + \bgroup% + \dosingleempty\dostarttransliterate% +} + +\let\stoptransliterate\relax + +\def\dostarttransliterate[#1]#2\stoptransliterate{% + \iffirstargument + \setuptransliterate[#1]% + \fi + \language[\transliterateparameter{hyphenate}]% + \ctxlua{thirddata.translit.transliterate("\transliterateparameter{mode}","\luaescapestring{#2}")}% + \egroup% +} + +\newconditional\transliterate_useexpanded \setfalse\transliterate_useexpanded + +\def\transliterate_conditional[#1]#2{% + \ifconditional\transliterate_useexpanded + \transliterate[#1]{#2}% + \else + \expandabletransliterate{#2}% + \fi% +} + +\protect \endinput + +% vim:ft=context diff --git a/src/t-transliterator.tex b/src/t-transliterator.tex new file mode 100644 index 0000000..9d4e9f0 --- /dev/null +++ b/src/t-transliterator.tex @@ -0,0 +1 @@ +\loadmarkfile{t-transliterator} diff --git a/src/t-transliterator.xml b/src/t-transliterator.xml new file mode 100644 index 0000000..d45f9cf --- /dev/null +++ b/src/t-transliterator.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/trans_tables_bg.lua b/src/trans_tables_bg.lua new file mode 100644 index 0000000..b319666 --- /dev/null +++ b/src/trans_tables_bg.lua @@ -0,0 +1,114 @@ +--===========================================================================-- +-- Bulgarian -- +--===========================================================================-- + +local translit = thirddata.translit +local pcache = translit.parser_cache +local lpegmatch = lpeg.match + +if not translit.done_bg then + --------------------------------------------------------------------------- + -- Uppercase Bulgarian -> „scientific“ transliteration -- + --------------------------------------------------------------------------- + + translit.bg_upp = translit.make_add_dict{ + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Ch", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Št", + ["Ъ"] = "Ă", + ["Ь"] = "′", + ["Ю"] = "Ju", + ["Я"] = "Ja", + } + translit.tables["Bulgarian \\quotation{scientific} transliteration uppercase"] = translit.bg_upp + + --------------------------------------------------------------------------- + -- Lowercase Bulgarian -> „scientific“ transliteration -- + --------------------------------------------------------------------------- + translit.bg_low = translit.make_add_dict{ + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "ch", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "št", + ["ъ"] = "ă", + ["ь"] = "′", + ["ю"] = "ju", + ["я"] = "ja", + } + + translit.tables["Bulgarian \\quotation{scientific} transliteration lowercase"] = translit.bg_low + + translit.done_bg = true +end + +local P, Cs = lpeg.P, lpeg.Cs +local addrules = translit.addrules +local utfchar = translit.utfchar + +local function bulgarian (mode) + local bulgarian_parser + if mode == "de" then + local bg = translit.bg_upp + translit.bg_low + local p_bg = addrules(bg) + bulgarian_parser = Cs((p_bg / bg + utfchar)^0) + else + return nil + end + return bulgarian_parser +end + +translit.methods["bg_de"] = function (text) + local p = pcache["bg_de"] + if not p then + p = bulgarian("de") + pcache["bg_de"] = p + end + return p and lpegmatch(p, text) or "" +end + +-- vim:ft=lua:sw=4:ts=4 diff --git a/src/trans_tables_glag.lua b/src/trans_tables_glag.lua new file mode 100644 index 0000000..41974fd --- /dev/null +++ b/src/trans_tables_glag.lua @@ -0,0 +1,128 @@ + +--===========================================================================-- +-- Glagolica -- +--===========================================================================-- + +local translit = thirddata.translit + +------------------------------------------- +-- Lowercase Glagolitic Transliteration -- +------------------------------------------- + +if not translit.done_glagolica then + translit.ocs_gla_low = translit.make_add_dict{ + ["ⰰ"] = "a", -- GLAGOLITIC SMALL LETTER AZU + ["ⰱ"] = "b", -- GLAGOLITIC SMALL LETTER BUKY + ["ⰲ"] = "v", -- GLAGOLITIC SMALL LETTER VEDE + ["ⰳ"] = "g", -- GLAGOLITIC SMALL LETTER GLAGOLI + ["ⰴ"] = "d", -- GLAGOLITIC SMALL LETTER DOBRO + ["ⰵ"] = "e", -- GLAGOLITIC SMALL LETTER YESTU + ["ⰶ"] = "ž", -- GLAGOLITIC SMALL LETTER ZHIVETE + ["ⰷ"] = "ʒ", -- GLAGOLITIC SMALL LETTER DZELO + ["ⰸ"] = "z", -- GLAGOLITIC SMALL LETTER ZEMLJA + ["ⰹ"] = "i", -- GLAGOLITIC SMALL LETTER IZHE + ["ⰺ"] = "i", -- GLAGOLITIC SMALL LETTER INITIAL IZHE + ["ⰻ"] = "i", -- GLAGOLITIC SMALL LETTER I + ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI + ["ⰽ"] = "k", -- GLAGOLITIC SMALL LETTER KAKO + ["ⰾ"] = "l", -- GLAGOLITIC SMALL LETTER LJUDIJE + ["ⰿ"] = "m", -- GLAGOLITIC SMALL LETTER MYSLITE + ["ⱀ"] = "n", -- GLAGOLITIC SMALL LETTER NASHI + ["ⱁ"] = "o", -- GLAGOLITIC SMALL LETTER ONU + ["ⱂ"] = "p", -- GLAGOLITIC SMALL LETTER POKOJI + ["ⱃ"] = "r", -- GLAGOLITIC SMALL LETTER RITSI + ["ⱄ"] = "s", -- GLAGOLITIC SMALL LETTER SLOVO + ["ⱅ"] = "t", -- GLAGOLITIC SMALL LETTER TVRIDO + ["ⱆ"] = "u", -- GLAGOLITIC SMALL LETTER UKU + ["ⱇ"] = "f", -- GLAGOLITIC SMALL LETTER FRITU + ["ⱈ"] = "x", -- GLAGOLITIC SMALL LETTER HERU + ["ⱉ"] = "o", -- GLAGOLITIC SMALL LETTER OTU + ["ⱊ"] = "?", -- GLAGOLITIC SMALL LETTER PE + ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA + ["ⱌ"] = "c", -- GLAGOLITIC SMALL LETTER TSI + ["ⱍ"] = "č", -- GLAGOLITIC SMALL LETTER CHRIVI + ["ⱎ"] = "š", -- GLAGOLITIC SMALL LETTER SHA + ["ⱏ"] = "ъ", -- GLAGOLITIC SMALL LETTER YERU + ["ⱐ"] = "ь", -- GLAGOLITIC SMALL LETTER YERI + ["ⱑ"] = "ě", -- GLAGOLITIC SMALL LETTER YATI + ["ⱒ"] = "x", -- GLAGOLITIC SMALL LETTER SPIDERY HA + ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU + ["ⱔ"] = "ę", -- GLAGOLITIC SMALL LETTER SMALL YUS + ["ⱕ"] = "y̨", -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL + ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO + ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU + ["ⱘ"] = "ǫ", -- GLAGOLITIC SMALL LETTER BIG YUS + ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS + ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA + ["ⱛ"] = "ü", -- GLAGOLITIC SMALL LETTER IZHITSA + ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC + ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A + ["ⱞ"] = "m", -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE + } + + translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low + + ------------------------------------------------ + -- Uppercase (?!) Glagolitic Transliteration -- + ------------------------------------------------ + + translit.ocs_gla_upp = translit.make_add_dict{ + ["Ⰰ"] = "A", -- GLAGOLITIC CAPITAL LETTER AZU + ["Ⰱ"] = "B", -- GLAGOLITIC CAPITAL LETTER BUKY + ["Ⰲ"] = "V", -- GLAGOLITIC CAPITAL LETTER VEDE + ["Ⰳ"] = "G", -- GLAGOLITIC CAPITAL LETTER GLAGOLI + ["Ⰴ"] = "D", -- GLAGOLITIC CAPITAL LETTER DOBRO + ["Ⰵ"] = "E", -- GLAGOLITIC CAPITAL LETTER YESTU + ["Ⰶ"] = "Ž", -- GLAGOLITIC CAPITAL LETTER ZHIVETE + ["Ⰷ"] = "Ʒ", -- GLAGOLITIC CAPITAL LETTER DZELO + ["Ⰸ"] = "Z", -- GLAGOLITIC CAPITAL LETTER ZEMLJA + ["Ⰹ"] = "I", -- GLAGOLITIC CAPITAL LETTER IZHE + ["Ⰺ"] = "I", -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE + ["Ⰻ"] = "I", -- GLAGOLITIC CAPITAL LETTER I + ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI + ["Ⰽ"] = "K", -- GLAGOLITIC CAPITAL LETTER KAKO + ["Ⰾ"] = "L", -- GLAGOLITIC CAPITAL LETTER LJUDIJE + ["Ⰿ"] = "M", -- GLAGOLITIC CAPITAL LETTER MYSLITE + ["Ⱀ"] = "N", -- GLAGOLITIC CAPITAL LETTER NASHI + ["Ⱁ"] = "O", -- GLAGOLITIC CAPITAL LETTER ONU + ["Ⱂ"] = "P", -- GLAGOLITIC CAPITAL LETTER POKOJI + ["Ⱃ"] = "R", -- GLAGOLITIC CAPITAL LETTER RITSI + ["Ⱄ"] = "S", -- GLAGOLITIC CAPITAL LETTER SLOVO + ["Ⱅ"] = "T", -- GLAGOLITIC CAPITAL LETTER TVRIDO + ["Ⱆ"] = "U", -- GLAGOLITIC CAPITAL LETTER UKU + ["Ⱇ"] = "F", -- GLAGOLITIC CAPITAL LETTER FRITU + ["Ⱈ"] = "X", -- GLAGOLITIC CAPITAL LETTER HERU + ["Ⱉ"] = "O", -- GLAGOLITIC CAPITAL LETTER OTU + ["Ⱊ"] = "?", -- GLAGOLITIC CAPITAL LETTER PE + ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA + ["Ⱌ"] = "C", -- GLAGOLITIC CAPITAL LETTER TSI + ["Ⱍ"] = "Č", -- GLAGOLITIC CAPITAL LETTER CHRIVI + ["Ⱎ"] = "Š", -- GLAGOLITIC CAPITAL LETTER SHA + ["Ⱏ"] = "Ъ", -- GLAGOLITIC CAPITAL LETTER YERU + ["Ⱐ"] = "Ь", -- GLAGOLITIC CAPITAL LETTER YERI + ["Ⱑ"] = "Ě", -- GLAGOLITIC CAPITAL LETTER YATI + ["Ⱒ"] = "X", -- GLAGOLITIC CAPITAL LETTER SPIDERY HA + ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU + ["Ⱔ"] = "Ę", -- GLAGOLITIC CAPITAL LETTER SMALL YUS + ["Ⱕ"] = "Y̨", -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO + ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + ["Ⱘ"] = "Ǫ", -- GLAGOLITIC CAPITAL LETTER BIG YUS + ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA + ["Ⱛ"] = "Ü", -- GLAGOLITIC CAPITAL LETTER IZHITSA + ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC + ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A + ["Ⱞ"] = "M", -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT + } + + translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp + + translit.done_glagolica = true +end + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + + diff --git a/src/trans_tables_gr.lua b/src/trans_tables_gr.lua new file mode 100644 index 0000000..b4c77e7 --- /dev/null +++ b/src/trans_tables_gr.lua @@ -0,0 +1,709 @@ +--===========================================================================-- +-- Greek -- +--===========================================================================-- + +local translit = thirddata.translit +local pcache = translit.parser_cache +local lpegmatch = lpeg.match + +-- Note that the Greek transliteration mapping isn't bijective so transliterated +-- texts won't be reversible. (Shouldn't be impossible to make one up using +-- diacritics on latin characters to represent all possible combinations of +-- Greek breathings + accents.) + +-- Good reading on composed / precombined unicode: +-- http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed + +------------------------------------------------- +-- Lowercase Greek Initial Position Diphthongs -- +------------------------------------------------- + +if not translit.done_greek then + translit.gr_di_in_low = translit.make_add_dict{ + [" αὑ"] = " hau", + [" αὕ"] = " hau", + [" αὓ"] = " hau", + [" αὗ"] = " hau", + [" εὑ"] = " heu", + [" εὕ"] = " heu", + [" εὓ"] = " heu", + [" εὗ"] = " heu", + [" ηὑ"] = " hēu", + [" ηὕ"] = " hēu", + [" ηὓ"] = " hēu", + [" ηὗ"] = " hēu", + [" οὑ"] = " hu", + [" οὕ"] = " hu", + [" οὓ"] = " hu", + [" οὗ"] = " hu", + [" ωὑ"] = " hōu", + [" ωὕ"] = " hōu", + [" ωὓ"] = " hōu", + [" ωὗ"] = " hōu" + } + + translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low + + ------------------------------------------------- + -- Uppercase Greek Initial Position Diphthongs -- + ------------------------------------------------- + + translit.gr_di_in_upp = translit.make_add_dict{ + [" Αὑ"] = " Hau", + [" Αὕ"] = " Hau", + [" Αὓ"] = " Hau", + [" Αὗ"] = " Hau", + [" Εὑ"] = " Heu", + [" Εὕ"] = " Heu", + [" Εὓ"] = " Heu", + [" Εὗ"] = " Heu", + [" Ηὑ"] = " Hēu", + [" Ηὕ"] = " Hēu", + [" Ηὓ"] = " Hēu", + [" Ηὗ"] = " Hēu", + [" Οὑ"] = " Hu", + [" Οὕ"] = " Hu", + [" Οὓ"] = " Hu", + [" Οὗ"] = " Hu", + [" Ωὑ"] = " Hōu", + [" Ωὕ"] = " Hōu", + [" Ωὓ"] = " Hōu", + [" Ωὗ"] = " Hōu" + } + + translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp + + --------------------------------------- + -- Lowercase Greek Initial Position -- + --------------------------------------- + + translit.gr_in_low = translit.make_add_dict{ + [" ἁ"] = " ha", + [" ἅ"] = " ha", + [" ἃ"] = " ha", + [" ἇ"] = " ha", + [" ᾁ"] = " ha", + [" ᾅ"] = " ha", + [" ᾃ"] = " ha", + [" ᾇ"] = " ha", + [" ἑ"] = " he", + [" ἕ"] = " he", + [" ἓ"] = " he", + [" ἡ"] = " hē", + [" ἥ"] = " hē", + [" ἣ"] = " hē", + [" ἧ"] = " hē", + [" ᾑ"] = " hē", + [" ᾕ"] = " hē", + [" ᾓ"] = " hē", + [" ᾗ"] = " hē", + [" ἱ"] = " hi", + [" ἵ"] = " hi", + [" ἳ"] = " hi", + [" ἷ"] = " hi", + [" ὁ"] = " ho", + [" ὅ"] = " ho", + [" ὃ"] = " ho", + [" ὑ"] = " hy", + [" ὕ"] = " hy", + [" ὓ"] = " hy", + [" ὗ"] = " hy", + [" ὡ"] = " hō", + [" ὥ"] = " hō", + [" ὣ"] = " hō", + [" ὧ"] = " hō", + [" ᾡ"] = " hō", + [" ᾥ"] = " hō", + [" ᾣ"] = " hō", + [" ᾧ"] = " hō", + } + + translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low + + --------------------------------------- + -- Uppercase Greek Initial Position -- + --------------------------------------- + + translit.gr_in_upp = translit.make_add_dict{ + [" Ἁ"] = " Ha", + [" Ἅ"] = " Ha", + [" Ἃ"] = " Ha", + [" Ἇ"] = " Ha", + [" ᾉ"] = " Ha", + [" ᾍ"] = " Ha", + [" ᾋ"] = " Ha", + [" ᾏ"] = " Ha", + [" Ἑ"] = " He", + [" Ἕ"] = " He", + [" Ἓ"] = " He", + [" Ἡ"] = " Hē", + [" Ἥ"] = " Hē", + [" Ἣ"] = " Hē", + [" Ἧ"] = " Hē", + [" ᾙ"] = " Hē", + [" ᾝ"] = " Hē", + [" ᾛ"] = " Hē", + [" ᾟ"] = " Hē", + [" Ἱ"] = " Hi", + [" Ἵ"] = " Hi", + [" Ἳ"] = " Hi", + [" Ἷ"] = " Hi", + [" Ὁ"] = " Ho", + [" Ὅ"] = " Ho", + [" Ὃ"] = " Ho", + [" Ὑ"] = " Hy", + [" Ὕ"] = " Hy", + [" Ὓ"] = " Hy", + [" Ὗ"] = " Hy", + [" Ὡ"] = " Hō", + [" Ὥ"] = " Hō", + [" Ὣ"] = " Hō", + [" Ὧ"] = " Hō", + [" ᾩ"] = " Hō", + [" ᾭ"] = " Hō", + [" ᾫ"] = " Hō", + [" ᾯ"] = " Hō", + } + + translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp + + --------------------------------- + -- Lowercase Greek Diphthongs -- + --------------------------------- + + translit.gr_di_low = translit.make_add_dict{ + ["αυ"] = "au", + ["αύ"] = "au", + ["αὺ"] = "au", + ["αῦ"] = "au", + ["αὐ"] = "au", + ["αὔ"] = "au", + ["αὒ"] = "au", + ["αὖ"] = "au", + ["αὑ"] = "au", + ["αὕ"] = "au", + ["αὓ"] = "au", + ["αὗ"] = "au", + ["ευ"] = "eu", + ["εύ"] = "eu", + ["εὺ"] = "eu", + ["εῦ"] = "eu", + ["εὐ"] = "eu", + ["εὔ"] = "eu", + ["εὒ"] = "eu", + ["εὖ"] = "eu", + ["εὑ"] = "eu", + ["εὕ"] = "eu", + ["εὓ"] = "eu", + ["εὗ"] = "eu", + ["ηυ"] = "ēu", + ["ηύ"] = "ēu", + ["ηὺ"] = "ēu", + ["ηῦ"] = "ēu", + ["ηὐ"] = "ēu", + ["ηὔ"] = "ēu", + ["ηὒ"] = "ēu", + ["ηὖ"] = "ēu", + ["ηὑ"] = "ēu", + ["ηὕ"] = "ēu", + ["ηὓ"] = "ēu", + ["ηὗ"] = "ēu", + ["ου"] = "u", + ["ου"] = "u", + ["ου"] = "u", + ["ού"] = "u", + ["οὺ"] = "u", + ["οῦ"] = "u", + ["οὐ"] = "u", + ["οὔ"] = "u", + ["οὒ"] = "u", + ["οὖ"] = "u", + ["οὑ"] = "u", + ["οὕ"] = "u", + ["οὓ"] = "u", + ["οὗ"] = "u", + ["ωυ"] = "ōu", + ["ωύ"] = "ōu", + ["ωὺ"] = "ōu", + ["ωῦ"] = "ōu", + ["ωὐ"] = "ōu", + ["ωὔ"] = "ōu", + ["ωὒ"] = "ōu", + ["ωὖ"] = "ōu", + ["ωὑ"] = "ōu", + ["ωὕ"] = "ōu", + ["ωὓ"] = "ōu", + ["ωὗ"] = "ōu", + ["ῤῥ"] = "rrh", + } + + translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low + + --------------------------------- + -- Uppercase Greek Diphthongs -- + --------------------------------- + + translit.gr_di_upp = translit.make_add_dict{ + ["Αυ"] = "Au", + ["Αύ"] = "Au", + ["Αὺ"] = "Au", + ["Αῦ"] = "Au", + ["Αὐ"] = "Au", + ["Αὔ"] = "Au", + ["Αὒ"] = "Au", + ["Αὖ"] = "Au", + ["Αὑ"] = "Au", + ["Αὕ"] = "Au", + ["Αὓ"] = "Au", + ["Αὗ"] = "Au", + ["Ευ"] = "Eu", + ["Εύ"] = "Eu", + ["Εὺ"] = "Eu", + ["Εῦ"] = "Eu", + ["Εὐ"] = "Eu", + ["Εὔ"] = "Eu", + ["Εὒ"] = "Eu", + ["Εὖ"] = "Eu", + ["Εὑ"] = "Eu", + ["Εὕ"] = "Eu", + ["Εὓ"] = "Eu", + ["Εὗ"] = "Eu", + ["Ηυ"] = "Ēu", + ["Ηύ"] = "Ēu", + ["Ηὺ"] = "Ēu", + ["Ηῦ"] = "Ēu", + ["Ηὐ"] = "Ēu", + ["Ηὔ"] = "Ēu", + ["Ηὒ"] = "Ēu", + ["Ηὖ"] = "Ēu", + ["Ηὑ"] = "Ēu", + ["Ηὕ"] = "Ēu", + ["Ηὓ"] = "Ēu", + ["Ηὗ"] = "Ēu", + ["Ου"] = "U", + ["Ου"] = "U", + ["Ου"] = "U", + ["Ού"] = "U", + ["Οὺ"] = "U", + ["Οῦ"] = "U", + ["Οὐ"] = "U", + ["Οὔ"] = "U", + ["Οὒ"] = "U", + ["Οὖ"] = "U", + ["Οὑ"] = "U", + ["Οὕ"] = "U", + ["Οὓ"] = "U", + ["Οὗ"] = "U", + ["Ωυ"] = "Ōu", + ["Ωύ"] = "Ōu", + ["Ωὺ"] = "Ōu", + ["Ωῦ"] = "Ōu", + ["Ωὐ"] = "Ōu", + ["Ωὔ"] = "Ōu", + ["Ωὒ"] = "Ōu", + ["Ωὖ"] = "Ōu", + ["Ωὑ"] = "Ōu", + ["Ωὕ"] = "Ōu", + ["Ωὓ"] = "Ōu", + ["Ωὗ"] = "Ōu", + } + + translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp + + -- The following will be used in an option that ensures transcription of + -- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) + translit.gr_nrule = translit.make_add_dict{ + ["γγ"] = "ng", + ["γκ"] = "nk", + ["γξ"] = "nx", + ["γχ"] = "nch", + } + + translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule + + + -------------------------------------- + -- Lowercase Greek Transliteration -- + -------------------------------------- + + translit.gr_low = translit.make_add_dict{ + ["α"] = "a", + ["ά"] = "a", + ["ὰ"] = "a", + ["ᾶ"] = "a", + ["ᾳ"] = "a", + ["ἀ"] = "a", + ["ἁ"] = "a", + ["ἄ"] = "a", + ["ἂ"] = "a", + ["ἆ"] = "a", + ["ἁ"] = "a", + ["ἅ"] = "a", + ["ἃ"] = "a", + ["ἇ"] = "a", + ["ᾁ"] = "a", + ["ᾴ"] = "a", + ["ᾲ"] = "a", + ["ᾷ"] = "a", + ["ᾄ"] = "a", + ["ᾂ"] = "a", + ["ᾅ"] = "a", + ["ᾃ"] = "a", + ["ᾆ"] = "a", + ["ᾇ"] = "a", + ["β"] = "b", + ["γ"] = "g", + ["δ"] = "d", + ["ε"] = "e", + ["έ"] = "e", + ["ὲ"] = "e", + ["ἐ"] = "e", + ["ἔ"] = "e", + ["ἒ"] = "e", + ["ἑ"] = "e", + ["ἕ"] = "e", + ["ἓ"] = "e", + ["ζ"] = "z", + ["η"] = "ē", + ["η"] = "ē", + ["ή"] = "ē", + ["ὴ"] = "ē", + ["ῆ"] = "ē", + ["ῃ"] = "ē", + ["ἠ"] = "ē", + ["ἤ"] = "ē", + ["ἢ"] = "ē", + ["ἦ"] = "ē", + ["ᾐ"] = "ē", + ["ἡ"] = "ē", + ["ἥ"] = "ē", + ["ἣ"] = "ē", + ["ἧ"] = "ē", + ["ᾑ"] = "ē", + ["ῄ"] = "ē", + ["ῂ"] = "ē", + ["ῇ"] = "ē", + ["ᾔ"] = "ē", + ["ᾒ"] = "ē", + ["ᾕ"] = "ē", + ["ᾓ"] = "ē", + ["ᾖ"] = "ē", + ["ᾗ"] = "ē", + ["θ"] = "th", + ["ι"] = "i", + ["ί"] = "i", + ["ὶ"] = "i", + ["ῖ"] = "i", + ["ἰ"] = "i", + ["ἴ"] = "i", + ["ἲ"] = "i", + ["ἶ"] = "i", + ["ἱ"] = "i", + ["ἵ"] = "i", + ["ἳ"] = "i", + ["ἷ"] = "i", + ["ϊ"] = "i", + ["ΐ"] = "i", + ["ῒ"] = "i", + ["ῗ"] = "i", + ["κ"] = "k", + ["λ"] = "l", + ["μ"] = "m", + ["ν"] = "n", + ["ξ"] = "x", + ["ο"] = "o", + ["ό"] = "o", + ["ὸ"] = "o", + ["ὀ"] = "o", + ["ὄ"] = "o", + ["ὂ"] = "o", + ["ὁ"] = "o", + ["ὅ"] = "o", + ["ὃ"] = "o", + ["π"] = "p", + ["ρ"] = "r", + ["ῤ"] = "r", + ["ῥ"] = "rh", + ["σ"] = "s", + ["ς"] = "s", + ["τ"] = "t", + ["υ"] = "y", + ["ύ"] = "y", + ["ὺ"] = "y", + ["ῦ"] = "y", + ["ὐ"] = "y", + ["ὔ"] = "y", + ["ὒ"] = "y", + ["ὖ"] = "y", + ["ὑ"] = "y", + ["ὕ"] = "y", + ["ὓ"] = "y", + ["ὗ"] = "y", + ["ϋ"] = "y", + ["ΰ"] = "y", + ["ῢ"] = "y", + ["ῧ"] = "y", + ["φ"] = "ph", + ["χ"] = "ch", + ["ψ"] = "ps", + ["ω"] = "ō", + ["ώ"] = "ō", + ["ὼ"] = "ō", + ["ῶ"] = "ō", + ["ῳ"] = "ō", + ["ὠ"] = "ō", + ["ὤ"] = "ō", + ["ὢ"] = "ō", + ["ὦ"] = "ō", + ["ᾠ"] = "ō", + ["ὡ"] = "ō", + ["ὥ"] = "ō", + ["ὣ"] = "ō", + ["ὧ"] = "ō", + ["ᾡ"] = "ō", + ["ῴ"] = "ō", + ["ῲ"] = "ō", + ["ῷ"] = "ō", + ["ᾤ"] = "ō", + ["ᾢ"] = "ō", + ["ᾥ"] = "ō", + ["ᾣ"] = "ō", + ["ᾦ"] = "ō", + ["ᾧ"] = "ō", + } + + translit.tables["Greek transliteration lowercase"] = translit.gr_low + + -------------------------------------- + -- Uppercase Greek Transliteration -- + -------------------------------------- + + translit.gr_upp = translit.make_add_dict{ + ["Α"] = "A", + ["Ά"] = "A", + ["Ὰ"] = "A", + --["ᾶ"] = "A", + ["ᾼ"] = "A", + ["Ἀ"] = "A", + ["Ἁ"] = "A", + ["Ἄ"] = "A", + ["Ἂ"] = "A", + ["Ἆ"] = "A", + ["Ἁ"] = "A", + ["Ἅ"] = "A", + ["Ἃ"] = "A", + ["Ἇ"] = "A", + ["ᾉ"] = "A", + --["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me + --["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript + --["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. + ["ᾌ"] = "A", + ["ᾊ"] = "A", + ["ᾍ"] = "A", + ["ᾋ"] = "A", + ["ᾎ"] = "A", + ["ᾏ"] = "A", + ["Β"] = "B", + ["Γ"] = "G", + ["Δ"] = "D", + ["Ε"] = "E", + ["Έ"] = "E", + ["Ὲ"] = "E", + ["Ἐ"] = "E", + ["Ἔ"] = "E", + ["Ἒ"] = "E", + ["Ἑ"] = "E", + ["Ἕ"] = "E", + ["Ἓ"] = "E", + ["Ζ"] = "Z", + ["Η"] = "Ē", + ["Η"] = "Ē", + ["Ή"] = "Ē", + ["Ὴ"] = "Ē", + --["ῆ"] = "Ē", + ["ῌ"] = "Ē", + ["Ἠ"] = "Ē", + ["Ἤ"] = "Ē", + ["Ἢ"] = "Ē", + ["Ἦ"] = "Ē", + ["ᾘ"] = "Ē", + ["Ἡ"] = "Ē", + ["Ἥ"] = "Ē", + ["Ἣ"] = "Ē", + ["Ἧ"] = "Ē", + ["ᾙ"] = "Ē", + --["ῄ"] = "Ē", + --["ῂ"] = "Ē", + --["ῇ"] = "Ē", + ["ᾜ"] = "Ē", + ["ᾚ"] = "Ē", + ["ᾝ"] = "Ē", + ["ᾛ"] = "Ē", + ["ᾞ"] = "Ē", + ["ᾟ"] = "Ē", + ["Θ"] = "Th", + ["Ι"] = "I", + ["Ί"] = "I", + ["Ὶ"] = "I", + --["ῖ"] = "I", + ["Ἰ"] = "I", + ["Ἴ"] = "I", + ["Ἲ"] = "I", + ["Ἶ"] = "I", + ["Ἱ"] = "I", + ["Ἵ"] = "I", + ["Ἳ"] = "I", + ["Ἷ"] = "I", + ["Ϊ"] = "I", + --["ΐ"] = "I", + --["ῒ"] = "I", + --["ῗ"] = "I", + ["Κ"] = "K", + ["Λ"] = "L", + ["Μ"] = "M", + ["Ν"] = "N", + ["Ξ"] = "X", + ["Ο"] = "O", + ["Ό"] = "O", + ["Ὸ"] = "O", + ["Ὀ"] = "O", + ["Ὄ"] = "O", + ["Ὂ"] = "O", + ["Ὁ"] = "O", + ["Ὅ"] = "O", + ["Ὃ"] = "O", + ["Π"] = "P", + ["Ρ"] = "R", + --["ῤ"] = "R", + ["Ῥ"] = "Rh", + ["Σ"] = "S", + ["Σ"] = "S", + ["Τ"] = "T", + ["Υ"] = "Y", + ["Ύ"] = "Y", + ["Ὺ"] = "Y", + --["ῦ"] = "Y", + --["ὐ"] = "Y", + --["ὔ"] = "Y", + --["ὒ"] = "Y", + --["ὖ"] = "Y", + ["Ὑ"] = "Y", + ["Ὕ"] = "Y", + ["Ὓ"] = "Y", + ["Ὗ"] = "Y", + ["Ϋ"] = "Y", + --["ΰ"] = "Y", + --["ῢ"] = "Y", + --["ῧ"] = "Y", + ["Φ"] = "Ph", + ["Χ"] = "Ch", + ["Ψ"] = "Ps", + ["Ω"] = "Ō", + ["Ώ"] = "Ō", + ["Ὼ"] = "Ō", + --["ῶ"] = "Ō", + ["ῼ"] = "Ō", + ["Ὠ"] = "Ō", + ["Ὤ"] = "Ō", + ["Ὢ"] = "Ō", + ["Ὦ"] = "Ō", + ["ᾨ"] = "Ō", + ["Ὡ"] = "Ō", + ["Ὥ"] = "Ō", + ["Ὣ"] = "Ō", + ["Ὧ"] = "Ō", + ["ᾩ"] = "Ō", + --["ῴ"] = "Ō", + --["ῲ"] = "Ō", + --["ῷ"] = "Ō", + ["ᾬ"] = "Ō", + ["ᾪ"] = "Ō", + ["ᾭ"] = "Ō", + ["ᾫ"] = "Ō", + ["ᾮ"] = "Ō", + ["ᾯ"] = "Ō", + } + + translit.tables["Greek transliteration uppercase"] = translit.gr_upp + + ------------ + -- Varia -- + ------------ + + translit.gr_other = translit.make_add_dict{ + ["ϝ"] = "w", + ["Ϝ"] = "W", + ["ϙ"] = "q", + ["Ϙ"] = "Q", + ["ϡ"] = "ss", + ["Ϡ"] = "Ss", + } + + translit.tables["Greek transliteration archaic characters"] = translit.gr_other + + translit.done_greek = true +end + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + +local function greek (mode, text) + local P, V, Cs = lpeg.P, lpeg.V, lpeg.Cs + local addrules = translit.addrules + local utfchar = translit.utfchar + + if mode == "gr" or mode == "gr_n" then + + local gr_di_in, gr_in, gr_di, gr = translit.make_add_dict{}, translit.make_add_dict{}, translit.make_add_dict{}, translit.make_add_dict{} + gr_di_in = gr_di_in + translit.gr_di_in_low + translit.gr_di_in_upp + gr_in = gr_in + translit.gr_in_low + translit.gr_in_upp + gr_di = gr_di + translit.gr_di_low + translit.gr_di_upp + gr = gr + translit.gr_low + translit.gr_upp + translit.gr_other + + if mode == "gr_n" then gr_di = gr_di + translit.gr_nrule end + + local p_di_in, p_in, p_di, p + + p_di_in = addrules( gr_di_in, p_di_in ) + p_in = addrules( gr_in, p_in ) + p_di = addrules( gr_di, p_di ) + p = addrules( gr, p ) + + local g = P{ -- 2959 rules + Cs((V"init_diph" + + V"init" + + V"diph" + + V"other" + + utfchar + )^0), + + init_diph = Cs(p_di_in / gr_di_in ), + init = Cs(p_in / gr_in ), + diph = Cs(p_di / gr_di ), + other = Cs(p / gr ), + } + + return g + end +end + +translit.methods["gr"] = function (text) + p = pcache["gr"] + if not p then + p = greek("gr") + pcache["gr"] = p + end + return lpegmatch(p, text) +end + +translit.methods["gr_n"] = function (text) + p = pcache["gr_n"] + if not p then + p = greek("gr_n") + pcache["gr_n"] = p + end + return lpegmatch(p, text) +end + +-- vim:ft=lua:sw=4:ts=4 diff --git a/src/trans_tables_iso9.lua b/src/trans_tables_iso9.lua new file mode 100644 index 0000000..256d994 --- /dev/null +++ b/src/trans_tables_iso9.lua @@ -0,0 +1,310 @@ +--===========================================================================-- +-- ISO 9.1995(E) standardized transliteration for cyrillic -- +--===========================================================================-- + +local translit = thirddata.translit +local pcache = translit.parser_cache +local lpegmatch = lpeg.match + +if not translit.done_iso9 then + ----------------------------------------- + -- Lowercase russian cyrillic alphabet -- + ----------------------------------------- + translit.ru_low = translit.make_add_dict({ + ["а"] = "a", -- U+0430 -> U+0061 + ["б"] = "b", -- U+0431 -> U+0062 + ["в"] = "v", -- U+0432 -> U+0076 + ["г"] = "g", -- U+0433 -> U+0067 + ["д"] = "d", -- U+0434 -> U+0064 + ["е"] = "e", -- U+0435 -> U+0065 + ["ё"] = "ë", -- U+0451 -> U+00eb + ["ж"] = "ž", -- U+0436 -> U+017e + ["з"] = "z", -- U+0437 -> U+007a + ["и"] = "i", -- U+0438 -> U+0069 + ["й"] = "j", -- U+0439 -> U+006a + ["к"] = "k", -- U+043a -> U+006b + ["л"] = "l", -- U+043b -> U+006c + ["м"] = "m", -- U+043c -> U+006d + ["н"] = "n", -- U+043d -> U+006e + ["о"] = "o", -- U+043e -> U+006f + ["п"] = "p", -- U+043f -> U+0070 + ["р"] = "r", -- U+0440 -> U+0072 + ["с"] = "s", -- U+0441 -> U+0073 + ["т"] = "t", -- U+0442 -> U+0074 + ["у"] = "u", -- U+0443 -> U+0075 + ["ф"] = "f", -- U+0444 -> U+0066 + ["х"] = "h", -- U+0445 -> U+0068 + ["ц"] = "c", -- U+0446 -> U+0063 + ["ч"] = "č", -- U+0447 -> U+010d + ["ш"] = "š", -- U+0448 -> U+0161 + ["щ"] = "ŝ", -- U+0449 -> U+015d + ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is + ["ы"] = "y", -- U+044b -> U+0079 used for uppercase, too. + ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. + ["э"] = "è", -- U+044d -> U+00e8 + ["ю"] = "û", -- U+044e -> U+00fb + ["я"] = "â" -- U+044f -> U+00e2 + }) + + translit.tables["russian lowercase ISO~9"] = translit.ru_low + + ----------------------------------------- + -- Uppercase russian cyrillic alphabet -- + ----------------------------------------- + + translit.ru_upp = translit.make_add_dict({ + ["А"] = "A", -- U+0410 -> U+0041 + ["Б"] = "B", -- U+0411 -> U+0042 + ["В"] = "V", -- U+0412 -> U+0056 + ["Г"] = "G", -- U+0413 -> U+0047 + ["Д"] = "D", -- U+0414 -> U+0044 + ["Е"] = "E", -- U+0415 -> U+0045 + ["Ё"] = "Ë", -- U+0401 -> U+00cb + ["Ж"] = "Ž", -- U+0416 -> U+017d + ["З"] = "Z", -- U+0417 -> U+005a + ["И"] = "I", -- U+0418 -> U+0049 + ["Й"] = "J", -- U+0419 -> U+004a + ["К"] = "K", -- U+041a -> U+004b + ["Л"] = "L", -- U+041b -> U+004c + ["М"] = "M", -- U+041c -> U+004d + ["Н"] = "N", -- U+041d -> U+004e + ["О"] = "O", -- U+041e -> U+004f + ["П"] = "P", -- U+041f -> U+0050 + ["Р"] = "R", -- U+0420 -> U+0052 + ["С"] = "S", -- U+0421 -> U+0053 + ["Т"] = "T", -- U+0422 -> U+0054 + ["У"] = "U", -- U+0423 -> U+0055 + ["Ф"] = "F", -- U+0424 -> U+0046 + ["Х"] = "H", -- U+0425 -> U+0048 + ["Ц"] = "C", -- U+0426 -> U+0043 + ["Ч"] = "Č", -- U+0427 -> U+010c + ["Ш"] = "Š", -- U+0428 -> U+0160 + ["Щ"] = "Ŝ", -- U+0429 -> U+015c + ["Ъ"] = "ʺ", -- U+042a -> U+02ba + ["Ы"] = "Y", -- U+042b -> U+0059 + ["Ь"] = "ʹ", -- U+042c -> U+02b9 + ["Э"] = "È", -- U+042d -> U+00c8 + ["Ю"] = "Û", -- U+042e -> U+00db + ["Я"] = "Â" -- U+042f -> U+00c2 + }) + + translit.tables["russian uppercase ISO~9"] = translit.ru_upp + + ---------------------------------------------------------- + -- Lowercase pre-1918 russian cyrillic additional chars -- + ---------------------------------------------------------- + -- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 + + translit.ru_old_low = translit.make_add_dict{ + ["ѣ"] = "ě", -- U+048d -> U+011b -- 2-byte + ["і"] = "ì", -- U+0456 -> U+00ec -- 2-byte + ["ѳ"] = "f", -- U+0473 -> U+0066 -- 2-byte + ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 -- 3-byte + } + + translit.tables["russian pre-1918 lowercase ISO~9 2 byte"] = translit.ru_old_low + + translit.ru_old_upp = translit.make_add_dict{ + ["Ѣ"] = "Ě", -- U+048c -> U+011a -- 2-byte + ["І"] = "Ì", -- U+0406 -> U+00cc -- 2-byte + ["Ѳ"] = "F", -- U+0424 -> U+0046 -- 2-byte + ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 -- 3-byte + } + + translit.ru_jer_hack = translit.make_add_dict{ + ["ь"] = "’", + ["Ь"] = "’", + ["ъ"] = "”", + ["Ъ"] = "”", + } + + translit.tables["russian magkij / tverdyj znak hack"] = translit.ru_jer_hack + + translit.tables["russian pre-1918 uppercase ISO~9 2 byte"] = translit.ru_old_upp + + --------------------------------------------------------- + -- Lowercase characters from other cyrillic alphabets -- + --------------------------------------------------------- + + translit.non_ru_low = translit.make_add_dict{ + ["ӑ"] = "ă", -- U+04d1 -> U+0103 + ["ӓ"] = "ä", -- U+04d3 -> U+00e4 + ["ә"] = "a̋", -- u+04d9 -> U+0061+030b + ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 + ["ҕ"] = "ğ", -- U+0495 -> U+011f + ["ғ"] = "ġ", -- U+0493 -> U+0121 + ["ђ"] = "đ", -- U+0452 -> U+0111 + ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 + ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 + ["є"] = "ê", -- U+0454 -> U+00ea + ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 + ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 + ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 + ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 + ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 + ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 + ["ѕ"] = "ẑ", -- U+0455 -> U+1e91 -- Mapped to dz in old cyrillic non-ISO. + ["ӡ"] = "ź", -- U+04e1 -> U+017a + ["ӥ"] = "î", -- U+04e5 -> U+00ee + ["і"] = "ì", -- U+0456 -> U+00ec + ["ї"] = "ï", -- U+0457 -> U+00ef + ["ј"] = "ǰ", -- U+0458 -> U+01f0 + ["қ"] = "ķ", -- U+049b -> U+0137 + ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 + ["љ"] = "l̂", -- U+0459 -> U+006c+0302 + ["њ"] = "n̂", -- U+045a -> U+006e+0302 + ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 + ["ң"] = "ṇ", -- U+04a3 -> U+1e47 + ["ӧ"] = "ö", -- U+04e7 -> U+00f6 + ["ө"] = "ô", -- U+04e9 -> U+00f4 + ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 + ["ҫ"] = "ç", -- U+04ab -> U+00e7 + ["ҭ"] = "ţ", -- U+04ad -> U+0163 + ["ћ"] = "ć", -- U+045b -> U+0107 + ["ќ"] = "ḱ", -- U+045c -> U+1e31 + ["у́"] = "ú", -- U+0443+ -> U+00fA + ["ў"] = "ŭ", -- U+045e -> U+016d + ["ӱ"] = "ü", -- U+04f1 -> U+00fc + ["ӳ"] = "ű", -- U+04f3 -> U+0171 + ["ү"] = "ù", -- U+04af -> U+00f9 + ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 + ["һ"] = "ḥ", -- U+04bb -> U+1e25 + ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 + ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 + ["ҷ"] = "ç", -- U+04cc -> U+00e7 + ["џ"] = "d̂", -- U+045f -> U+0064+0302 + ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff + ["ѣ"] = "ě", -- U+048d -> U+011b + ["ѫ"] = "ǎ", -- U+046b -> U+01ce -- Mapped to ǫ in non-ISO old cyrillic. + ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. + ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 + ["ҩ"] = "ò", -- U+04a9 -> U+00f2 + ["Ӏ"] = "‡" -- U+04cf -> U+2021 + } + + translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low + + --------------------------------------------------------- + -- Uppercase characters from other cyrillic alphabets -- + --------------------------------------------------------- + + translit.non_ru_upp = translit.make_add_dict{ + ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 + ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 + ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b + ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 + ["Ҕ"] = "Ğ", -- U+0494 -> U+011e + ["Ғ"] = "Ġ", -- U+0492 -> U+0120 + ["Ђ"] = "Đ", -- U+0402 -> U+0110 + ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 + ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 + ["Є"] = "Ê", -- U+0404 -> U+00ca + ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 + ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 + ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 + ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 + ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 + ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 + ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 + ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 + ["Ӥ"] = "Î", -- U+04e4 -> U+00ce + ["І"] = "Ì", -- U+0406 -> U+00cc + ["Ї"] = "Ï", -- U+0407 -> U+00cf + ["Ј"] = "J̌", -- U+0408 -> U+004a+030c + ["Қ"] = "Ķ", -- U+049a -> U+0136 + ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 + ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 + ["Њ"] = "N̂", -- U+040a -> U+004e+0302 + ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 + ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 + ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 + ["Ө"] = "Ô", -- U+04e8 -> U+00d4 + ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 + ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 + ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 + ["Ћ"] = "Ć", -- U+040b -> U+0106 + ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 + ["У́"] = "Ú", -- U+0423 -> U+00da + ["Ў"] = "Ŭ", -- U+040e -> U+016c + ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc + ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 + ["Ү"] = "Ù", -- U+04ae -> U+00d9 + ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 + ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 + ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 + ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 + ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 + ["Џ"] = "D̂", -- U+040f -> U+0044+0302 + ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 + ["Ѣ"] = "Ě", -- U+048c -> U+011a + ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd + ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 + ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 + ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 + ["’"] = "‵", -- U+2035 -> U+2019 + ["Ӏ"] = "‡" -- U+04c0 -> U+2021 + } + + translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp + + translit.done_iso9 = true +end + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + +local function iso9 (mode) + local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs + local addrules = translit.addrules + local utfchar = translit.utfchar + + local iso9 = translit.make_add_dict{} + iso9 = translit.ru_upp + translit.ru_low + + if mode == "ru_old" or mode == "all" then + + iso9 = iso9 + translit.ru_old_upp + translit.ru_old_low + + if mode == "all" then + iso9 = iso9 + + translit.non_ru_upp + + translit.non_ru_low + end + if translit.deficient_font == "yes" then + iso9 = iso9 + + translit.ru_old_upp + + translit.ru_old_low + + translit.ru_jer_hack + end + end + + local p_iso9 = addrules (iso9, p_iso9) + local iso9_parser = Cs((p_iso9 / iso9 + utfchar)^0) + + return iso9_parser +end + +translit.methods["all"] = function (text) + local pname = "all" .. translit.deficient_font + local p = pcache[pname] + if not p then + p = iso9("all") + pcache[pname] = p + end + return lpegmatch(p, text) +end + +translit.methods["ru"] = translit.methods["all"] + +translit.methods["ru_old"] = function (text) + local pname = "ru_old" .. translit.deficient_font + local p = pcache[pname] + if not p then + p = iso9("all") + pcache[pname] = p + end + return lpegmatch(p, text) +end + +-- vim:ft=lua:sw=4:ts=4 diff --git a/src/trans_tables_scntfc.lua b/src/trans_tables_scntfc.lua new file mode 100644 index 0000000..96002c4 --- /dev/null +++ b/src/trans_tables_scntfc.lua @@ -0,0 +1,287 @@ +--===========================================================================-- +-- Other transliterations -- +--===========================================================================-- + +local translit = thirddata.translit +local pcache = translit.parser_cache +local lpegmatch = lpeg.match + +-- The following are needed because ISO 9 does not cover old Slavonic +-- characters that became obsolete before the advent of гражданский шрифт. + +-- Please note that these mappings are not bijective so don't expect the result +-- to be easily revertible (by machines). + +-- Source p. 77 of +-- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf + +if not translit.done_ocs then + ----------------------------------------------------------------------- + -- Lowercase and uppercase letter Uk -- “scientific transliteration” -- + ----------------------------------------------------------------------- + + translit.ocs_uk = translit.make_add_dict{ + ["oу"] = "u", + ["оу"] = "u", + ["Оу"] = "U", + } + ----------------------------------------------------------------------------- + -- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- + ----------------------------------------------------------------------------- + + translit.ocs_low = translit.make_add_dict{ + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["є"] = "e", + ["ж"] = "ž", + ["ꙃ"] = "ʒ", -- U+0292, alternative: dz U+01f3 + ["ѕ"] = "ʒ", + ["ꙁ"] = "z", + ["з"] = "z", + ["и"] = "i", + ["і"] = "i", + ["ї"] = "i", + ["ћ"] = "g’", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ѹ"] = "u", + ["ꙋ"] = "u", + ["ф"] = "f", + ["х"] = "x", + ["ѡ"] = "o", --"ō", + ["ѿ"] = "ot", -- U+047f + ["ѽ"] = "o!", -- U+047d + ["ꙍ"] = "o!", -- U+064D + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "št", + ["ъ"] = "ъ", + ["ы"] = "y", + ["ꙑ"] = "y", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. + ["ь"] = "ь", + ["ѣ"] = "ě", + ["ю"] = "ju", + ["ꙗ"] = "ja", + ["ѥ"] = "je", + ["ѧ"] = "ę", + ["ѩ"] = "ję", + ["ѫ"] = "ǫ", + ["ѭ"] = "jǫ", + ["ѯ"] = "ks", + ["ѱ"] = "ps", + ["ѳ"] = "th", + ["ѵ"] = "ü", + } + + translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low + + ----------------------------------------------------------------------------- + -- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- + ----------------------------------------------------------------------------- + + translit.ocs_upp = translit.make_add_dict{ + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Є"] = "E", + ["Ж"] = "Ž", + ["Ꙃ"] = "Ʒ", -- U+01b7, alternative: Dz U+01f2 + ["Ѕ"] = "Ʒ", + ["Ꙁ"] = "Z", + ["З"] = "Z", + ["И"] = "I", + ["І"] = "I", + ["Ї"] = "I", + ["Ћ"] = "G’", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "u", + ["Ѹ"] = "U", + --["ꙋ"] = "U", + ["Ф"] = "F", + ["Х"] = "X", + ["Ѡ"] = "Ō", + ["Ѿ"] = "Ot", -- U+047c + ["Ѽ"] = "O!", -- U+047e + ["Ꙍ"] = "O!", -- U+064C + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Št", + ["Ъ"] = "Ŭ", + ["Ы"] = "Y", + ["Ꙑ"] = "Y", -- U+a650 + ["Ь"] = "Ĭ", + ["Ѣ"] = "Ě", + ["Ю"] = "Ju", + ["Ꙗ"] = "Ja", + ["Ѥ"] = "Je", + ["Ѧ"] = "Ę", + ["Ѩ"] = "Ję", + ["Ѫ"] = "Ǫ", + ["Ѭ"] = "Jǫ", + ["Ѯ"] = "Ks", + ["Ѱ"] = "Ps", + ["Ѳ"] = "Th", + ["Ѵ"] = "Ü", + } + + translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp + + -- Note on the additional tables: these cover characters that are not defined + -- in ISO 9 but have a “scientific” transliteration. You may use them as + -- complementary mapping to ISO 9, trading off homogenity for completeness. + + ---------------------------------------------------------------------------------------- + -- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- + ---------------------------------------------------------------------------------------- + + translit.ocs_add_low = translit.make_add_dict{ + ["ѕ"] = "dz", -- Mapped to ẑ in ISO 9 (Macedonian …) + ["ѯ"] = "ks", + ["ѱ"] = "ps", + ["ѡ"] = "ô", + ["ѿ"] = "ot", -- U+047f + ["ѫ"] = "ǫ", -- Mapped to ǎ in ISO 9. + ["ѧ"] = "ę", + ["ѭ"] = "jǫ", + ["ѩ"] = "ję", + ["ѥ"] = "je", + ["ѹ"] = "u", -- Digraph uk. + ["ꙋ"] = "u", -- Monograph uk, U+a64b. (No glyph yet in the "fixed" font in February 2010 …) + ["ꙑ"] = "y", -- U+a651 + } + + translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low + + ---------------------------------------------------------------------------------------- + -- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- + ---------------------------------------------------------------------------------------- + + translit.ocs_add_upp = translit.make_add_dict{ + ["Ѕ"] = "Dz", + ["Ѯ"] = "Ks", + ["Ѱ"] = "Ps", + ["Ѡ"] = "Ô", + ["Ѿ"] = "ot", + ["Ѫ"] = "Ǫ", + ["Ѧ"] = "Ę", + ["Ѭ"] = "Jǫ", + ["Ѩ"] = "Ję", + ["Ѥ"] = "Je", + ["Ѹ"] = "U", -- Digraph uk. + --["Ꙋ"] = "U", -- Monograph Uk, U+a64a. + ["Ꙑ"] = "Y", -- U+a650 + } + + translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp + translit.done_ocs = true +end + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + +local function scientific (mode) + local P, Cs = lpeg.P, lpeg.Cs + local utfchar = translit.utfchar + local addrules = translit.addrules + + local cyr = translit.make_add_dict{} + local cyruk, p_cyruk, p_cyr, scientific_parser + + if mode == "iso9_ocs" or mode == "iso9_ocs_hack" then + + environment.loadluafile("trans_tables_iso9") + cyr = translit.ocs_add_low + + translit.ocs_add_upp + + translit.ocs_low + + translit.ru_upp + + translit.ru_low + + translit.ru_old_upp + + translit.ru_old_low + + translit.non_ru_upp + + translit.non_ru_low + + translit.ocs_upp + + if translit.deficient_font == "yes" then + cyr = cyr + translit.ru_jer_hack + end + + p_cyr = addrules(cyr, p_cyr) + + scientific_parser = Cs((p_cyr / cyr + utfchar)^0) + + elseif mode == ("ocs") then + + cyr = translit.ocs_low + translit.ocs_upp + + p_cyruk = addrules(translit.ocs_uk, cyruk) + p_cyr = addrules(cyr, p_cyr) + + scientific_parser = Cs((p_cyruk / translit.ocs_uk + + p_cyr / cyr + + utfchar)^0) + + elseif mode == ("ocs_gla") then + environment.loadluafile( "trans_tables_glag") + cyr = translit.ocs_gla_low + translit.ocs_gla_upp + + p_cyr = addrules(cyr, p_cyr) + scientific_parser = Cs((p_cyr / cyr + utfchar)^0) + end + + return scientific_parser +end + + +translit.methods["iso9_ocs"] = function (text) + local pname = "iso9_ocs" .. translit.deficient_font + local p = pcache[pname] + if not p then + p = scientific("iso9_ocs") + pcache[pname] = p + end + return lpegmatch(p, text) +end + +translit.methods["ocs"] = function (text) + local p = pcache["ocs"] + if not p then + p = scientific("ocs") + pcache["ocs"] = p + end + return lpegmatch(p, text) +end + +translit.methods["ocs_gla"] = function (text) + local p = pcache["ocs_gla"] + if not p then + p = scientific("ocs_gla") + pcache["ocs_gla"] = p + end + return lpegmatch(p, text) +end + +-- vim:ft=lua:ts=4:sw=4 diff --git a/src/trans_tables_sr.lua b/src/trans_tables_sr.lua new file mode 100644 index 0000000..4f549c5 --- /dev/null +++ b/src/trans_tables_sr.lua @@ -0,0 +1,241 @@ + +--===========================================================================-- +-- Serbian -- +--===========================================================================-- + +local translit = thirddata.translit +local pcache = translit.parser_cache +local lpegmatch = lpeg.match + + +-- Special thanks to Mojca Miklavec and Arthur Reutenauer for their +-- assistance in creating these transliteration routines. + +if not translit.done_serbian then + -------------------------------------------- + -- Lowercase Serbian (Cyrillic -> Latin) -- + -------------------------------------------- + translit.sr_tolt_lower = translit.make_add_dict{ + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["ђ"] = "đ", + ["е"] = "e", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["ј"] = "j", + ["к"] = "k", + ["л"] = "l", + ["љ"] = "lj", + ["м"] = "m", + ["н"] = "n", + ["њ"] = "nj", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["ћ"] = "ć", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "h", + ["ц"] = "c", + ["ч"] = "č", + ["џ"] = "dž", + ["ш"] = "š", + } + + translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower + + -------------------------------------------- + -- Uppercase Serbian (Cyrillic -> Latin) -- + -------------------------------------------- + + translit.sr_tolt_upper = translit.make_add_dict{ + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Ђ"] = "Đ", + ["Е"] = "E", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Ј"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["Љ"] = "Lj", + ["М"] = "M", + ["Н"] = "N", + ["Њ"] = "Nj", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["Ћ"] = "Ć", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "H", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Џ"] = "Dž", + ["Ш"] = "Š", + } + + translit.tables["Serbian Cyr->Lat Transliteration uppercase"] = translit.sr_tolt_upper + + local function __inverse_tab (t) + local result = { } + for k,v in next,t do result[v] = k end + return result + end + + translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower)) + translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper)) + + + --- Good reading up front: + --- + --- + + local except = { + ["konjug"] = "конјуг", + ["konjunk"] = "конјунк", + ["injekc"] = "инјекц", + ["injunkt"] = "инјункт", + ["panjelin"] = "панјелин", + ["tanjug"] = "танјуг", + ["vanjezič"] = "ванјезич", + ["vanjadransk"] = "ванјадранск", + + ["nadžanj"] = "наджањ", + ["nadždrel"] = "надждрел", + ["nadžet"] = "наджет", + ["nadživ"] = "наджив", + ["nadžnj"] = "наджњ", + ["nadžup"] = "наджуп", + ["odžal"] = "оджал", + ["odžar"] = "оджар", + ["odživ"] = "оджив", + ["odžubor"] = "оджубор", + ["odžur"] = "оджур", + ["odžvak"] = "оджвак", + ["podžanr"] = "поджанр", + ["podže"] = "подже", -- “поджећи” + } + + local P = lpeg.P + local sub, upper = unicode.utf8.sub, unicode.utf8.upper + + local p_tocy, p_i_tocy, p_tolt, p_i_tolt + + for left, right in next, except do -- generating exception patterns for both sides + local Left = upper(sub(left, 1, 1)) .. sub(left, 2) + local Right = upper(sub(right, 1, 1)) .. sub(right, 2) + local LEFT, RIGHT = upper(left), upper(right) + + local p_i_left = P(left) / right + P(Left) / Right + P(LEFT) / RIGHT + local p_i_right = P(right) / left + P(Right) / Left + P(RIGHT) / LEFT + + local p_left = P" " * p_i_left + local p_right = P" " * p_i_right + + if not p_tocy then + p_tocy = p_left + p_i_tocy = p_i_left + p_tolt = p_right + p_i_tolt = p_i_right + else + p_tocy = p_tocy + p_left + p_i_tocy = p_i_tocy + p_i_left + p_tolt = p_tolt + p_right + p_i_tolt = p_i_tolt + p_i_right + end + end + + local _p_hintchar = P"*" / "" + local hintme = "dln" + local _p_tocy_hint, _p_tolt_hint + + for left in hintme:utfcharacters() do + local right = translit.sr_tocy_lower[left] + local LEFT, RIGHT = upper(left), upper(right) + if not _p_tocy_hint then + _p_tocy_hint = P(left) / right + P(LEFT) / RIGHT + _p_tolt_hint = P(right) / left + P(RIGHT) / LEFT + else + _p_tocy_hint = _p_tocy_hint + P(left) / right + P(LEFT) / RIGHT + _p_tolt_hint = _p_tolt_hint + P(right) / left + P(RIGHT) / LEFT + end + end + + translit.serbian_exceptions = { } + translit.serbian_exceptions.p_tocy = p_tocy + translit.serbian_exceptions.p_tolt = p_tolt + translit.serbian_exceptions.p_tocy_init = p_i_tocy + translit.serbian_exceptions.p_tolt_init = p_i_tolt + translit.serbian_exceptions.p_tocy_hint = _p_tocy_hint * _p_hintchar + translit.serbian_exceptions.p_tolt_hint = _p_tolt_hint * _p_hintchar + + translit.done_serbian = true +end + +--===========================================================================-- +-- End Of Tables -- +--===========================================================================-- + + +local t = translit +local function sr (mode) + local P, R, Cs = lpeg.P, lpeg.R, lpeg.Cs + local utfchar = translit.utfchar + local modestr = "p_" .. mode:match("to..$") + local _p_sre = t.serbian_exceptions[modestr] + local _p_sre_i = t.serbian_exceptions[modestr .. "_init"] + + local trl_sr = translit.make_add_dict{} + trl_sr = t[mode.."_upper"] + t[mode.."_lower"] + + -- transliteration from latin script requires macro handling … + local _p_macro = P[[\]] * R("az", "AZ")^1 -- assuming standard catcodes + local _p_sr = translit.addrules (trl_sr, _p_sr) / trl_sr + if translit.hinting then + _p_sr = t.serbian_exceptions[modestr .. "_hint"] + _p_sr + end + + local p_sr + if translit.sr_except then + p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + _p_sr + utfchar)^0) + else + p_sr = Cs((_p_macro + _p_sr + utfchar)^0) + end + + return p_sr +end + +translit.methods["sr_tolt"] = function (text) + local pname = "sr_tolt" .. tostring(translit.hinting) .. tostring(translit.sr_except) + local p = pcache[pname] + if not p then + p = sr("sr_tolt") + pcache[pname] = p + end + return lpegmatch(p, text) +end + +translit.methods["sr_tocy"] = function (text) + local pname = "sr_tocy" .. tostring(translit.hinting) .. tostring(translit.sr_except) + local p = pcache[pname] + if not p then + p = sr("sr_tocy") + pcache[pname] = p + end + return lpegmatch(p, text) +end + +-- vim:ft=lua:sw=4:ts=4 diff --git a/src/trans_tables_trsc.lua b/src/trans_tables_trsc.lua new file mode 100644 index 0000000..fbc35d1 --- /dev/null +++ b/src/trans_tables_trsc.lua @@ -0,0 +1,867 @@ +--===================================================================-- +-- Legacy national transliterations -- +--===================================================================-- + +local translit = thirddata.translit +local addrules = translit.addrules +local utfchar = translit.utfchar + +local lpegmatch = lpeg.match +local tablepack = table.pack -- lua 5.2 precaution + +--------------------------------- +-- German simple transcription -- +--------------------------------- +-- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; +-- 20. Aufl., +-- Mannheim et. al. 1991. + +if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then + + -------------------------------------------------------- + -- Lowercase German simple transcription---first pass -- + -------------------------------------------------------- + + translit.ru_trsc_low_first = translit.make_add_dict{ + [" е"] = " je", + ["ъе"] = "je", + ["ье"] = "je", + [" ё"] = " jo", + ["ъё"] = "jo", + ["ьё"] = "jo", + ["жё"] = "scho", + ["чё"] = "tscho", + ["шё"] = "scho", + ["щё"] = "schtscho", + ["ье"] = "je", + ["ьи"] = "ji", + ["ьо"] = "jo", + ["ий"] = "i", + ["ый"] = "y", + ["кс"] = "x" + } + + translit.tables["German transcription first pass lowercase"] + = translit.ru_trsc_low_first + + -------------------------------------------------------- + -- Uppercase German simple transcription---first pass -- + -------------------------------------------------------- + + translit.ru_trsc_upp_first = translit.make_add_dict{ + [" Е"] = " Je", + ["Ъe"] = "Je", -- Pedantic, isn't it? + ["Ье"] = "Je", + [" Ё"] = "Jo", + ["Ъё"] = "Jo", + ["Ьё"] = "Jo", + ["Жё"] = "Scho", + ["Чё"] = "Tscho", + ["Шё"] = "Scho", + ["Щё"] = "Schtscho", + ["Кс"] = "ks" + } + + translit.tables["German transcription first pass uppercase"] + = translit.ru_trsc_upp_first + + ------------------------------------------- + -- Lowercase German simple transcription -- + ------------------------------------------- + + translit.ru_trsc_low = translit.make_add_dict{ + ["а"] = "a", + ["б"] = "b", + ["в"] = "w", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "jo", + ["ж"] = "sch", + ["з"] = "s", + ["и"] = "i", + ["й"] = "i", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "ch", + ["ц"] = "z", + ["ч"] = "tsch", + ["ш"] = "sch", + ["щ"] = "schtsch", + ["ъ"] = "", + ["ы"] = "y", + ["ь"] = "", + ["э"] = "e", + ["ю"] = "ju", + ["я"] = "ja" + } + + translit.tables["German transcription second pass lowercase"] + = translit.ru_trsc_low + + ------------------------------------------- + -- Uppercase German simple transcription -- + ------------------------------------------- + + translit.ru_trsc_upp = translit.make_add_dict{ + ["А"] = "A", + ["Б"] = "B", + ["В"] = "W", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Jo", + ["Ж"] = "Sch", + ["З"] = "S", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Ch", + ["Ц"] = "Z", + ["Ч"] = "Tsch", + ["Ш"] = "Sch", + ["Щ"] = "Schtsch", + ["Ъ"] = "", + ["Ы"] = "Y", + ["Ь"] = "", + ["Э"] = "E", + ["Ю"] = "Ju", + ["Я"] = "Ja" + } + + translit.tables["German transcription second pass uppercase"] + = translit.ru_trsc_upp + + translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} + + function translit.gen_rules_de() + -- The following are more interesting than the previous tables + -- because they implement various rules. For instance the + -- table \type{translit.ru_trsc_irule} holds a substitution + -- dictionary for all possible combinations (including nonsense + -- galore) of a vowel preceding an “й” (Russian short i) + -- preceding a consonant; here we access the sets of Russian + -- vowels as well consonants that were defined earlier. + + -- The й-rule, VйC -> ViC + translit.ru_trsc_irule = translit.make_add_dict{} + for _, vow in ipairs(translit.ru_vowels) do + for _, cons in ipairs(translit.ru_consonants) do + local new_ante = vow .. "й" .. cons + local new_post = vow .. "i" .. cons + translit.ru_trsc_irule[new_ante] = new_post + end + end + + translit.tables["German transcription i-rule"] + = translit.ru_trsc_irule + + -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC + translit.ru_trsc_jrule = {} + for _, vow in ipairs(translit.ru_vowels) do + local new_ante = "й" .. vow + local new_post = "j" .. vow + translit.ru_trsc_jrule[new_ante] = new_post + end + + for _, cons in ipairs(translit.ru_consonants) do + for _, iy in ipairs(translit.ru_trsc_iy) do + local new_ante = iy .. "й" .. cons + local new_post = iy .. "j" .. cons + translit.ru_trsc_jrule[new_ante] = new_post + end + end + + translit.tables["German transcription j-rule"] + = translit.ru_trsc_jrule + + -- The с-rule, VсV -> VssV + translit.ru_trsc_srule = translit.make_add_dict{} + for i, vow_1 in ipairs(translit.ru_vowels) do + for j, vow_2 in ipairs(translit.ru_vowels) do + local new_ante = vow_1 .. "с" .. vow_2 + local new_post = vow_1 .. "ss" .. vow_2 + translit.ru_trsc_srule[new_ante] = new_post + end + end + + translit.tables["German transcription s-rule"] + = translit.ru_trsc_srule + + -- The sharp-s-rule, Vсх -> Vßх + translit.ru_trsc_sharpsrule = translit.make_add_dict{} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "сх" + local new_post = vow .. "ßх" + translit.ru_trsc_sharpsrule[new_ante] = new_post + end + + translit.tables["German transcription sharp-s-rule"] + = translit.ru_trsc_sharpsrule + + -- The е-rule, Vе -> Vje + translit.ru_trsc_jerule = translit.make_add_dict{} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "е" + local new_post = vow .. "je" + translit.ru_trsc_jerule[new_ante] = new_post + end + + translit.tables["German transcription je-rule"] + = translit.ru_trsc_jerule + + -- The ё-rule, Vё -> Vjo + -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . + -- Somebody should teach those DUDEN-guys parsimony. + translit.ru_trsc_jorule = translit.make_add_dict{} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "ё" + local new_post = vow .. "jo" + translit.ru_trsc_jorule[new_ante] = new_post + end + + translit.tables["German transcription (redundant) jo-rule"] + = translit.ru_trsc_jorule + + end + + translit.gen_rules_de() + translit.done_ru_trsc_de = true +end + +if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then + + -- This is about *eight* times as fast as the old pattern. Just + -- waiting for v0.10 to make it into luatex. + + local de_tables = { } + + -------------------------------------------------------- + -- Lowercase German simple transcription---first pass -- + -------------------------------------------------------- + + de_tables[1] = { -- lowercase initial + [" е"] = " je", ["ъе"] = "je", ["ье"] = "je", + [" ё"] = " jo", ["ъё"] = "jo", ["ьё"] = "jo", + ["жё"] = "scho", ["цё"] = "scho", ["чё"] = "zo", + ["шё"] = "scho", ["щё"] = "schtscho", ["ье"] = "je", + ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i", + ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one. + } + translit.tables["German transcription first pass lowercase"] + = de_tables[1] + + -------------------------------------------------------- + -- Uppercase German simple transcription---first pass -- + -------------------------------------------------------- + + de_tables[2] = { -- uppercase initial + [" Е"] = " Je", ["Ъe"] = "Je", ["Ье"] = "Je", + [" Ё"] = "Jo", ["Ъё"] = "Jo", ["Ьё"] = "Jo", + ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", + ["Щё"] = "Schtscho", ["Кс"] = "ks" + } + translit.tables["German transcription first pass uppercase"] + = de_tables[2] + + ------------------------------------------- + -- Lowercase German simple transcription -- + ------------------------------------------- + + de_tables[3] = { -- lowercase + ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", + ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "sch", + ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k", + ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", + ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", + ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z", + ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", + ["ъ"] = "", ["ы"] = "y", ["ь"] = "", ["э"] = "e", + ["ю"] = "ju", ["я"] = "ja" + } + translit.tables["German transcription second pass lowercase"] + = de_tables[3] + + ------------------------------------------- + -- Uppercase German simple transcription -- + ------------------------------------------- + + de_tables[4] = { -- uppercase + ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", + ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Sch", + ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K", + ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", + ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", + ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z", + ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch",["Ъ"] = "", + ["Ы"] = "Y", ["Ь"] = "", ["Э"] = "E", ["Ю"] = "Ju", + ["Я"] = "Ja" + } + translit.tables["German transcription second pass uppercase"] + = de_tables[4] + + local B, P, Cs = lpeg.B, lpeg.P, lpeg.Cs + + -- All chars are 2-byte. + local Co = P{ + P"б" + "в" + "г" + "д" + "ж" + "з" + "к" + "л" + "м" + "н" + + "п" + "р" + "с" + "т" + "ф" + "х" + "ц" + "ч" + "ш" + "щ" + + "ъ" + "ь" + + "Б" + "В" + "Г" + "Д" + "Ж" + "З" + "К" + "Л" + "М" + "Н" + + "П" + "Р" + "С" + "Т" + "Ф" + "Х" + "Ц" + "Ч" + "Ш" + "Щ" + + "Ъ" + "Ь" + } + + local Vo = P{ + P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + + "ю" + "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + + "Я" + "Ю" + } + + local iy = P"и" + P"ы" + P"И" + P"Ы" + + ------------------------------------------- + -- Pattern generation. + ------------------------------------------- + + local p_transcript + + for _, set in next, de_tables do + for str, rep in next, set do + if not p_transcript then -- it’ll be empty initially + p_transcript = P(str) / rep + else + p_transcript = p_transcript + (P(str) / rep) + end + end + end + + local irule = B(Vo,2) * Cs(P"й") * #Co / "i" + local iyrule = B(iy,2) * Cs(P"й") * #Co / "j" + local jrule = Cs(P"й") * #Vo / "j" + local srule = B(Vo,2) * Cs(P"с") * #Vo / "ss" + local ssrule = B(Vo,2) * Cs(P"с") * #P"х" / "ß" + local jerule = B(Vo,2) * Cs(P"е") / "je" + local jorule = B(Vo,2) * Cs(P"ё") / "jo" + + translit.future_ru_transcript_de + = Cs((iyrule + jrule + irule + + jerule + srule + ssrule + + jorule + p_transcript + 1)^0 + ) +end + +if not translit.done_ru_trsc_en then + + --------------------------------------------------------- + -- Lowercase English simple transcription---first pass -- + --------------------------------------------------------- + + translit.ru_trsc_en_low_first = translit.make_add_dict{ + [" е"] = " ye", + ["ъе"] = "ye", + ["ье"] = "ye", + ["ье"] = "ye", + ["ьи"] = "yi", + } + + translit.tables["English transcription lowercase first pass"] + = translit.ru_trsc_en_low_first + + --------------------------------------------------------- + -- Uppercase English simple transcription---first pass -- + --------------------------------------------------------- + + translit.ru_trsc_en_upp_first = translit.make_add_dict{ + [" Е"] = " Ye", + ["Ъe"] = "Ye", + ["Ье"] = "Ye", + } + + translit.tables["English transcription uppercase first pass"] + = translit.ru_trsc_en_upp_first + + -------------------------------------------- + -- Lowercase English simple transcription -- + -------------------------------------------- + + translit.ru_trsc_en_low = translit.make_add_dict{ + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "e", + ["ж"] = "zh", + ["з"] = "z", + ["и"] = "i", + ["й"] = "y", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "kh", + ["ц"] = "ts", + ["ч"] = "ch", + ["ш"] = "sh", + ["щ"] = "shsh", + ["ъ"] = "", + ["ы"] = "y", + ["ь"] = "", + ["э"] = "e", + ["ю"] = "yu", + ["я"] = "ya" + } + + translit.tables["English transcription lowercase second pass"] + = translit.ru_trsc_en_low + + -------------------------------------------- + -- Uppercase English simple transcription -- + -------------------------------------------- + + translit.ru_trsc_en_upp = translit.make_add_dict{ + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "E", + ["Ж"] = "Zh", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "Y", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Kh", + ["Ц"] = "Ts", + ["Ч"] = "Ch", + ["Ш"] = "Sh", + ["Щ"] = "Shsh", + ["Ъ"] = "", + ["Ы"] = "Y", + ["Ь"] = "", + ["Э"] = "E", + ["Ю"] = "Yu", + ["Я"] = "Ya" + } + + translit.tables["English transcription uppercase second pass"] + = translit.ru_trsc_en_upp + + function translit.gen_rules_en () + -- The english е-rule, Vе -> Vye + translit.ru_trsc_en_jerule = translit.make_add_dict{} + for i, vow in ipairs(translit.ru_vowels) do + local new_ante = vow .. "е" + local new_post = vow .. "ye" + translit.ru_trsc_en_jerule[new_ante] = new_post + end + + translit.tables["English transcription ye-rule"] + = translit.ru_trsc_en_jerule + end + + translit.gen_rules_en() + translit.done_ru_trsc_en = true +end + + +if not translit.done_ru_trsc_cz then + ----------------------------------- + -- Lowercase Czech transcription -- + ----------------------------------- + + translit.ru_trsc_cz_low = translit.make_add_dict{ + ["а"] = "a", + ["б"] = "b", + ["в"] = "v", + ["г"] = "g", + ["д"] = "d", + ["е"] = "e", + ["ё"] = "ë", + ["ж"] = "ž", + ["з"] = "z", + ["и"] = "i", + ["й"] = "j", + ["к"] = "k", + ["л"] = "l", + ["м"] = "m", + ["н"] = "n", + ["о"] = "o", + ["п"] = "p", + ["р"] = "r", + ["с"] = "s", + ["т"] = "t", + ["у"] = "u", + ["ф"] = "f", + ["х"] = "ch", + ["ц"] = "c", + ["ч"] = "č", + ["ш"] = "š", + ["щ"] = "šč", + ["ъ"] = "ъ", + ["ы"] = "y", + ["ь"] = "ь", + ["э"] = "è", + ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and + ["я"] = "ja", -- тя -> ťa, but that would complicate things a + } -- bit and linguists might not agree. + + translit.tables["Czech transcription lowercase"] + = translit.ru_trsc_cz_low + + ----------------------------------- + -- Uppercase Czech transcription -- + ----------------------------------- + + translit.ru_trsc_cz_upp = translit.make_add_dict{ + ["А"] = "A", + ["Б"] = "B", + ["В"] = "V", + ["Г"] = "G", + ["Д"] = "D", + ["Е"] = "E", + ["Ё"] = "Ë", + ["Ж"] = "Ž", + ["З"] = "Z", + ["И"] = "I", + ["Й"] = "J", + ["К"] = "K", + ["Л"] = "L", + ["М"] = "M", + ["Н"] = "N", + ["О"] = "O", + ["П"] = "P", + ["Р"] = "R", + ["С"] = "S", + ["Т"] = "T", + ["У"] = "U", + ["Ф"] = "F", + ["Х"] = "Ch", + ["Ц"] = "C", + ["Ч"] = "Č", + ["Ш"] = "Š", + ["Щ"] = "Šč", + ["Ъ"] = "Ъ", + ["Ы"] = "Y", + ["Ь"] = "Ь", + ["Э"] = "È", + ["Ю"] = "Ju", + ["Я"] = "Ja" + } + + translit.tables["Czech transcription uppercase"] + = translit.ru_trsc_cz_upp + + ---------------------------------------------- + -- Lowercase Additional Czech Transcription -- + ---------------------------------------------- + + translit.ru_trsc_cz_add_low = translit.make_add_dict{ + ["ѕ"] = "dz", + ["з"] = "z", + ["ꙁ"] = "z", + ["і"] = "ï", + ["ѹ"] = "u", + ["ѡ"] = "ō", + ["ѣ"] = "ě", + ["ѥ"] = "je", + ["ѧ"] = "ę", + ["ѩ"] = "ję", + ["ѫ"] = "ǫ", + ["ѭ"] = "jǫ", + ["ѯ"] = "ks", + ["ѱ"] = "ps", + ["ѳ"] = "th", + ["ѵ"] = "ÿ", + } + + translit.tables[ + "Czech transcription for OCS and pre-1918 lowercase"] + = translit.ru_trsc_cz_add_low + + + ---------------------------------------------- + -- Uppercase Additional Czech Transcription -- + ---------------------------------------------- + + translit.ru_trsc_cz_add_upp = translit.make_add_dict{ + ["Ѕ"] = "Dz", + ["З"] = "Z", + ["Ꙁ"] = "Z", + ["І"] = "Ï", + ["Ѹ"] = "U", + ["Ѡ"] = "Ō", + ["Ѣ"] = "Ě", + ["Ѥ"] = "Je", + ["Ѧ"] = "Ę", + ["Ѩ"] = "Ję", + ["Ѫ"] = "Ǫ", + ["Ѭ"] = "Jǫ", + ["Ѯ"] = "Ks", + ["Ѱ"] = "Ps", + ["Ѳ"] = "Th", + ["Ѵ"] = "Ÿ", + } + + translit.tables[ + "Czech transcription for OCS and pre-1918 uppercase"] + = translit.ru_trsc_cz_add_upp + translit.done_ru_trsc_cz = true +end + +--===================================================================-- +-- End Of Tables -- +--===================================================================-- + +local function transcript (mode, text) + local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs + + local trsc_parser, p_rules, capt, p_de + + local function tab_subst (s, ...) + local sets = { ... } + local p_tmp, tmp = nil, translit.make_add_dict{} + for n=1, #sets do + local set = sets[n] + tmp = tmp + set + end + p_tmp = addrules(tmp, p_tmp) + local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0) + return lpegmatch(fp, s) + end + + if mode == "ru_transcript_en" then + + text = tab_subst(text, translit.ru_trsc_en_jerule) + text = tab_subst(text, + translit.ru_trsc_en_low_first, + translit.ru_trsc_en_upp_first) + text = tab_subst(text, + translit.ru_trsc_en_low, + translit.ru_trsc_en_upp) + + return text + + elseif mode == "ru_transcript_en_exp" then + + local en_low_upp = translit.make_add_dict{} + en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp + + local twochar + local tworepl = translit.make_add_dict{} + + twochar = addrules( translit.ru_trsc_en_low_first, twochar) + twochar = addrules( translit.ru_trsc_en_upp_first, twochar) + + tworepl = translit.ru_trsc_en_low_first + + translit.ru_trsc_en_upp_first + + -- The е-rule, Vе -> Vye + local function V_je (s) + local ante = utf.sub(s, 1, 1) + return en_low_upp[ante] .. "ye" + end + + local jerule = Cs((vow * "е") / V_je) + + local dvoje = Cs(twochar / tworepl) + local other = Cs((utfchar) / en_low_upp) + + local g = Cs((dvoje + jerule + other + utfchar)^0) + + text = g:match(text) + + return text + + elseif mode == "ru_cz" or mode == "ocs_cz" then + text = tab_subst(text, + translit.ru_trsc_cz_low, + translit.ru_trsc_cz_upp) + if mode == "ocs_cz" then + text = tab_subst(text, + translit.ru_trsc_cz_add_low, + translit.ru_trsc_cz_add_upp) + end + return text + end + + if mode == "ru_transcript_de_exp" then + + local vow, con, iy + vow = addrules(translit.ru_vowels, vow) + con = addrules(translit.ru_consonants, con) + iy = addrules(translit.ru_trsc_iy, iy ) + + local de_low_upp = translit.make_add_dict{} + de_low_upp = translit.ru_trsc_upp + translit.ru_trsc_low + + local twochar + local tworepl = translit.make_add_dict{} + + twochar = addrules( translit.ru_trsc_low_first, twochar ) + twochar = addrules( translit.ru_trsc_upp_first, twochar ) + + tworepl = translit.ru_trsc_low_first + + translit.ru_trsc_upp_first + + -- The й-rule, VйC -> ViC + local function V_i_C (s) + local ante = utf.sub(s, 1, 1) + local post = utf.sub(s, 3, 3) + return de_low_upp[ante] .. "i" .. de_low_upp[post] + end + + -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC + local function iy_j_C (s) + local ante = utf.sub(s, 1, 1) + local post = utf.sub(s, 3, 3) + return de_low_upp[ante] .. "j" .. de_low_upp[post] + end + + local function j_V (s) + local post = utf.sub(s, 2, 2) + return "j" .. de_low_upp[post] + end + + -- The с-rule, VсV -> VssV + local function V_ss_V (s) + local ante = utf.sub(s, 1, 1) + local post = utf.sub(s, 3, 3) + return de_low_upp[ante] .. "ss" .. de_low_upp[post] + end + + -- The sharp-s-rule, Vсх -> Vßх + local function V_sz_ch (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "ßch" + end + + -- The е-rule, Vе -> Vje + local function V_je (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "je" + end + + -- Reapplying V_je on its result + next char would make the + -- following two rules obsolete. + local function V_jeje (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "jeje" + end + + local function V___je (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "jeje" + end + + -- The ё-rule, Vё -> Vjo + -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . + -- Somebody should teach those DUDEN guys parsimony. + local function V_jo (s) + local ante = utf.sub(s, 1, 1) + return de_low_upp[ante] .. "jo" + end + + local iyrule = Cs((iy * "й" * con) / iy_j_C) + local jrule = Cs(("й" * vow) / j_V) + local irule = Cs((vow * "й" * con) / V_i_C) + + local ssrule = Cs((vow * "с" * vow) / V_ss_V) + local szrule = Cs((vow * "сх") / V_sz_ch) + + --local _jrule = Cs((vow * "ее") / V___je) + local jjrule = Cs((vow * "ее") / V_jeje) + local jerule = Cs((vow * "е") / V_je) + local jorule = Cs((vow * "ё") / V_jo) + + local dvoje = Cs(twochar / tworepl) + local other = Cs((utfchar) / de_low_upp) + + local izhe = iyrule + jrule + irule + local slovo = ssrule + szrule + local jest = jjrule + jerule + jorule + + local g = Cs((izhe + slovo + jest + dvoje + other + utfchar)^0) + + text = g:match(text) + return text + + elseif mode == "ru_transcript_de" then + + if lpeg.version() == "0.9" then + + text = tab_subst(text, translit.ru_trsc_jrule) + text = tab_subst(text, translit.ru_trsc_irule) + text = tab_subst(text, translit.ru_trsc_jerule) + text = tab_subst(text, translit.ru_trsc_srule) + text = tab_subst(text, translit.ru_trsc_sharpsrule) + text = tab_subst(text, translit.ru_trsc_jorule) + text = tab_subst(text, + translit.ru_trsc_upp_first, + translit.ru_trsc_low_first) + text = tab_subst(text, + translit.ru_trsc_upp, + translit.ru_trsc_low) + + return text + elseif lpeg.version() == "0.10" then + return translit.future_ru_transcript_de:match(text) + end + + end + +end + +translit.methods ["ru_transcript_de"] + = function (text) return transcript("ru_transcript_de" , text) end +translit.methods ["ru_transcript_de_exp"] + = function (text) return transcript("ru_transcript_de_exp", text) end +translit.methods ["ru_transcript_en"] + = function (text) return transcript("ru_transcript_en" , text) end +translit.methods ["ru_transcript_en_exp"] + = function (text) return transcript("ru_transcript_en_exp", text) end +translit.methods ["ru_cz"] + = function (text) return transcript("ru_cz" , text) end +translit.methods ["ocs_cz"] + = function (text) return transcript("ocs_cz" , text) end + +-- vim:sw=4:ts=4:expandtab:ft=lua diff --git a/src/transliterator.lua b/src/transliterator.lua new file mode 100644 index 0000000..c101ec5 --- /dev/null +++ b/src/transliterator.lua @@ -0,0 +1,283 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +-- FILE: transliterator.lua +-- USAGE: to be called by t-transliterator.mkiv +-- DESCRIPTION: basic lua environment for the Transliterator module +-- REQUIREMENTS: latest ConTeXt MkIV +-- AUTHOR: Philipp Gesang (Phg), +-- CREATED: 2010-12-23 22:12:31+0100 +-------------------------------------------------------------------------------- +-- + +thirddata = thirddata or { } +thirddata.translit = thirddata.translit or { } +local translit = thirddata.translit +translit.tables = translit.tables or { } +translit.methods = translit.methods or { } +translit.deficient_font = "no" +translit.parser_cache = { } + +local utf8 = unicode and unicode.utf8 or utf8 +local utf8byte = utf8.byte +local utf8len = utf8.len + +-------------------------------------------------------------------------------- +-- Predefining vowel lists +-------------------------------------------------------------------------------- +-- If you haven't heard of cyrillic scripts until now you might want to read +-- at least the first 15 pages of +-- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf +-- before you continue reading this file. +translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", + "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} +translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", + "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", + "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н", + "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} + +-- Substitution tables are the very heart of the Transliterator. Due to the +-- nature of languages and scripts exhaustive substitution is the simplest +-- method for transliterations and transcriptions unless they are one-to-one +-- mappings like those defined in ISO~9. +-- +-- To achieve better reusability we split the tables into segments, the most +-- obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase +-- and uppercase characters. Another set is constituted by e.~g. the +-- \type{ru_old*} tables that allow adding transcription of historical +-- characters if needed; by the way those are included in the default +-- transliteration mode \type{ru_old}. + +-- Tables can be found in separate Lua files. +-- See {\tt +-- trans_tables_glag.lua +-- trans_tables_gr.lua +-- trans_tables_iso9.lua +-- trans_tables_scntfc.lua +-- and +-- trans_tables_trsc.lua.} + +-------------------------------------------------------------------------------- +-- Metatables allow for lazy concatenation. +-------------------------------------------------------------------------------- + +do + -- This returns the Union of both key sets for the “+” operator. + -- The values of the first table will be updated (read: overridden) by + -- those given in the second. + local Dict_add = { + __add = function (dict_a, dict_b) + assert (type(dict_a) == "table" and type(dict_b) == "table") + local dict_result = setmetatable({}, Dict_add) + + for key, val in pairs(dict_a) do + dict_result[key] = val + end + + for key, val in pairs(dict_b) do + dict_result[key] = val + end + return dict_result + end + } + + translit.make_add_dict = function (dict) + return setmetatable(dict, Dict_add) + end +end + +-------------------------------------------------------------------------------- +-- Auxiliary Functions +-------------------------------------------------------------------------------- + +-- Generate a rule pattern from hash table. +do + local P, R, V = lpeg.P, lpeg.R, lpeg.V + + -- multi-char rules first + translit.addrules = function (dict, rules) + local by_length, occurring_lengths = { }, { } + for chr, _ in next, dict do + local l = utf8len(chr) + if not by_length[l] then + by_length[l] = { } + occurring_lengths[#occurring_lengths+1] = l + end + by_length[l][#by_length[l]+1] = chr + end + table.sort(occurring_lengths) + for i=#occurring_lengths, 1, -1 do + local l = occurring_lengths[i] + for _, chr in next, by_length[l] do + rules = rules and rules + P(chr) or P(chr) + end + end + return rules + end + +-- Modified version of Hans’s utf pattern (l-lpeg.lua). + + + translit.utfchar = P{ + V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", + + utf8next = R("\128\191"), + utf8one = R("\000\127"), + utf8two = R("\194\223") * V"utf8next", + utf8three = R("\224\239") * V"utf8next" * V"utf8next", + utf8four = R("\240\244") * V"utf8next" * V"utf8next" * V"utf8next", + } +end + +-- We might want to have all the table data nicely formatted by \CONTEXT\ +-- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a +-- single table \type{t}, builds a Natural TABLE out of its content and +-- hands it down to the machine for typesetting. For debugging purposes it +-- does not only print the replacement pairs but shows their code points as +-- well. + +-- handle the input chars and replacement values +local strempty = function (s) + if s == "" then return "nil" + else + -- add the unicode positions of the replacements (can be more + -- than one with composed diacritics + local i = 1 + local r = "" + repeat + r = r .. utf8byte(s,i) .. " " + i = i + 1 + until utf8byte(s,i) == nil + return r + end +end + +function translit.show_tab (tab) + -- Output a transliteration table, nicely formatted with natural tables. + -- Lots of calls to context() but as it’s only a goodie this doesn’t + -- really matter. + local cnt = 0 + context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"}) + context.setupTABLE({"c"}, {"each"}, {frame="off"}) + context.setupTABLE({"r"}, {"each"}, {frame="off"}) + context.setupTABLE({"c"}, {"first"}, {style="italic"}) + context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"}) + context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"}) + context.bTABLE({split="yes", option="stretch"}) + context.bTABLEhead() + context.bTR() + context.bTH() context("number") context.eTH() + context.bTH() context("letters") context.eTH() + context.bTH() context("n") context.eTH() + context.bTH() context("replacement") context.eTH() + context.bTH() context("n") context.eTH() + context.bTH() context("bytes") context.eTH() + context.bTH() context("repl. bytes") context.eTH() + context.eTR() + context.eTABLEhead() + context.bTABLEbody() + + for key, val in next,tab do + cnt = cnt + 1 + context.bTR() + context.bTC() context(cnt) context.eTC() + context.bTC() context(key) context.eTC() + context.bTC() context(string.len(key)) context.eTC() + context.bTC() context(val) context.eTC() + context.bTC() context(string.len(val)) context.eTC() + context.bTC() context(strempty(key)) context.eTC() + context.bTC() context(strempty(val)) context.eTC() + context.eTR() + end + + context.eTABLEbody() + context.bTABLEfoot() context.bTR() + context.bTC() context("number") context.eTC() + context.bTC() context("letters") context.eTC() + context.bTC() context("n") context.eTC() + context.bTC() context("replacement") context.eTC() + context.bTC() context("n") context.eTC() + context.bTC() context("bytes") context.eTC() + context.bTC() context("repl. bytes") context.eTC() + context.eTR() + context.eTABLEfoot() + context.eTABLE() +end + +-- Having to pick out single tables for printing can be tedious, therefore we +-- let Lua do the job in our stead. \type{translit.show_all_tabs()} calls +-- \type{translit.show_tab} on every table that is registered with +-- \type{translit.table} -- and uses its registered key as table heading. + +function translit.show_all_tabs () + environment.loadluafile ("trans_tables_iso9") + environment.loadluafile ("trans_tables_trsc") + environment.loadluafile ("trans_tables_scntfc") + environment.loadluafile ("trans_tables_sr") + environment.loadluafile ("trans_tables_trsc") + environment.loadluafile ("trans_tables_glag") + environment.loadluafile ("trans_tables_gr") + translit.gen_rules_en() + translit.gen_rules_de() + -- Output all translation tables that are registered within translit.tables. + -- This will be quite unordered. + context.chapter("Transliterator Showing All Tables") + for key, val in pairs(translit.tables) do + context.section(key) + translit.show_tab (val) + end +end + +-- for internal use only + +translit.debug_count = 0 + +function translit.debug_next () + translit.debug_count = translit.debug_count + 1 + context("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}") +end + +-------------------------------------------------------------------------------- +-- User-level Function +-------------------------------------------------------------------------------- + +-- \type{translit.transliterate(m, t)} constitutes the +-- metafunction that is called by the \type{\transliterate} command. +-- It loads the transliteration tables according to \type{method} and calls the +-- corresponding function. + +-- Those supposedly are the most frequently used so it won’t hurt to preload +-- them. The rest will be loaded on request. +environment.loadluafile ("trans_tables_iso9") + +function translit.transliterate (method, text) + local methods = translit.methods + if not methods[method] then -- register tables and method + if method == "ru_transcript_de" or + method == "ru_transcript_de_exp" or -- experimental lpeg + method == "ru_transcript_en" or + method == "ru_transcript_en_exp" or + method == "ru_cz" or + method == "ocs_cz" then + environment.loadluafile ("trans_tables_trsc") + elseif method == "iso9_ocs" or + method == "iso9_ocs_hack" or + method == "ocs" or + method == "ocs_gla" then + environment.loadluafile ("trans_tables_scntfc") + elseif method:match("^sr_") then + environment.loadluafile ("trans_tables_sr") + elseif method:match("^bg_") then -- only bg_de for now + environment.loadluafile ("trans_tables_bg") + elseif method == "gr" or + method == "gr_n" then + environment.loadluafile ("trans_tables_gr") + end + end + + if translit.__script then + return methods[method](text) + end + context ( methods[method](text) ) +end + +-- vim:sw=4:ts=4:expandtab:ft=lua diff --git a/tex/context/interface/third/t-transliterator.xml b/tex/context/interface/third/t-transliterator.xml deleted file mode 100644 index d45f9cf..0000000 --- a/tex/context/interface/third/t-transliterator.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tex/context/third/transliterator/t-transliterator.mkii b/tex/context/third/transliterator/t-transliterator.mkii deleted file mode 100644 index 2fcfb0b..0000000 --- a/tex/context/third/transliterator/t-transliterator.mkii +++ /dev/null @@ -1,3 +0,0 @@ -% Suggested by Taco -\message{Module is unsupported under mkii} -\endinput diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv deleted file mode 100644 index ae08278..0000000 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ /dev/null @@ -1,188 +0,0 @@ -%D \module -%D [ file=t-transliterator, -%D version=2021-11-21 18:19:24+0100, -%D title=\CONTEXT\ User Module, -%D subtitle=The Transliterator, -%D author=Philipp Gesang, -%D date=\currentdate, -%D copyright=Philipp Gesang, -%D license=2-clause BSD, -%D email={pgesang at ix dot urz dot uni-heidelberg dot de}] -%D This module is licensed under the conditions of the BSD license with -%D two clauses, there is a copy it in a file named "COPYING" in the -%D transliterator source tree. - -\writestatus{loading}{Transliteration from non-Latin scripts} - -\unprotect - -\definenamespace [TRL] [ - name=transliterate, - type=module, - setup=list, - parent=TRL, - style=no, - version=2021, - comment=Transliteration from non-Latin scripts., -] - -\ctxlua{environment.loadluafile ("transliterator")} - -%D Use the Transliterator by adding \type{\usemodule[transliterator]} somewhere -%D before \type{\starttext}. Adjust the Transliterator through the -%D \type{\setuptransliterate} command. As a first argument it accepts a set of -%D key-value options; at present you may configure \type{mode} and -%D \type{hyphenate}. - -\def\set_serbian_exceptions{% - \doifelse{\transliterateparameter{sr_exceptions}}\v!yes - {\ctxlua{thirddata.translit.sr_except = true}} - {\ctxlua{thirddata.translit.sr_except = false}}% -} - -\def\set_hinting{% - \doifelse{\transliterateparameter{hinting}}\v!yes - {\ctxlua{thirddata.translit.hinting = true}} - {\ctxlua{thirddata.translit.hinting = false}}% -} - -\appendtoks \set_serbian_exceptions \to \everysetuptransliterate -\appendtoks \set_hinting \to \everysetuptransliterate - -%D At first we'll set some defaults: - -\setuptransliterate[% - debug=\v!false, - hinting=\v!yes, - hyphenate=cz, - mode=ru_old, - sr_exceptions=\v!yes, - deficient_font=\v!no, -] - -%D Possible values for \type{mode} are by the time of this writing: -%D \type{ru}, \type{ru_transcript_de}, \type{ru_transcript_en}, \type{ru_old}, -%D \type{all}, \type{iso9_ocs}, \type{ocs}, \type{ocs_gla}, \type{ru_cz}, -%D \type{ocs_cz}, \type{gr} and \type{gr_n}. -%D As not all fonts, even the expensive ones, support some of the most frequent -%D unicode signs used in ISO~9, there are fallbacks for the transliterations of -%D the weak and hard sign. -%D They work with the modes \type{iso9_ocs}, \type{all} and -%D \type{ru_old} only and can be triggered by setting the -%D variable \type{deficient_font} to the value {\em yes}. -%D This will transliterate {\em ь} and {\em ъ} (both upper and -%D lower case) to the more common, but non-ISO characters {\em ’} and {\em ”} -%D respectively. -%D Possible values for \type{hyphenate} are all valid \CONTEXT\ language code, for an -%D overview see \type{http://wiki.contextgarden.net/Language_Codes}. -%D In praxi you may want to choose either Czech (the default) or Slovak -%D (\type{sk}) for most transliterations from cyrillic scripts. I've not yet -%D made up my mind concerning Greek transliteration, any suggestions are -%D welcome. - -%D The following will help debugging and reviewing tables. Make sure your -%D typescript can handle the characters, in general it's no use with Latin -%D Modern which unfortunately provides only a restricted set of the unicode -%D range. -%D -%D The user-level command to output a single substitution table is -%D \type{\showOneTranslitTab{#1}}. - -\define[1]\showOneTranslitTab{% - \startluacode - environment.loadluafile ("trans_tables_iso9") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_scntfc") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_glag") - environment.loadluafile ("trans_tables_gr") - thirddata.translit.gen_rules_en() - thirddata.translit.gen_rules_de() - thirddata.translit.show_tab(translit["\luaescapestring{#1}"]) - \stopluacode -} - -%D The user-level command to output all defined tables is -%D \type{\showTranslitTabs}. - -\define\showTranslitTabs{% - \ctxlua{thirddata.translit.show_all_tabs()}% -} - -\def\translitDebug#1{% - \doif{\transliterateparameter{debug}}{yes}{% - {\ss\inmargin{\ctxlua{thirddata.translit.debug_next()}} #1}% - }% -} - - -%D The user-level command \type{\transliterate[#1]{#2}} does the job of -%D switching to a given language (for hyphenation) and adjusting the -%D substitution method locally. It takes an optional list \type{[#1]} of -%D key-value arguments to allow ad-hoc specification of either two that deviate -%D from the defaults set initially by means of \type{\setuptransliterate}. -%D -%D Internally, \type{\dotransliterate} is called according to the \CONTEXT\ -%D coding style and in case the user provides \type{hyphenate=} or -%D \type{mode=} those will be used instead of the globals. Note that this -%D leaves the latter unchanged. Thus, in order to permanently switch to -%D another transliteration style the user would have to set it by calling -%D \type{\setuptransliterate} again. -%D -% All credits for rewriting the TeX code go to Wolfgang as well. -% http://www.ntg.nl/pipermail/ntg-context/2010/047816.html - -\def\dotransliterate[#1]#2{% - \bgroup\iffirstargument - \setuptransliterate[#1]% - \fi - \language[\transliterateparameter{hyphenate}]% - \ctxlua{ - thirddata.translit.deficient_font = "\transliterateparameter{deficient_font}" - thirddata.translit.transliterate("\transliterateparameter{mode}","\luaescapestring{#2}") - }% - \egroup% -} - -\unexpanded\def\transliterate{\dosingleempty\dotransliterate} - -\def\expandabletransliterate#1{% - %\bgroup - %\setuptransliterate[#1]% - %\language[\transliterateparameter{hyphenate}]% - \ctxlua{ - thirddata.translit.deficient_font = "\transliterateparameter{deficient_font}" - thirddata.translit.transliterate("\transliterateparameter{mode}","#1") - }% - %\egroup% -} - -\unexpanded\def\starttransliterate{% - \bgroup% - \dosingleempty\dostarttransliterate% -} - -\let\stoptransliterate\relax - -\def\dostarttransliterate[#1]#2\stoptransliterate{% - \iffirstargument - \setuptransliterate[#1]% - \fi - \language[\transliterateparameter{hyphenate}]% - \ctxlua{thirddata.translit.transliterate("\transliterateparameter{mode}","\luaescapestring{#2}")}% - \egroup% -} - -\newconditional\transliterate_useexpanded \setfalse\transliterate_useexpanded - -\def\transliterate_conditional[#1]#2{% - \ifconditional\transliterate_useexpanded - \transliterate[#1]{#2}% - \else - \expandabletransliterate{#2}% - \fi% -} - -\protect \endinput - -% vim:ft=context diff --git a/tex/context/third/transliterator/t-transliterator.tex b/tex/context/third/transliterator/t-transliterator.tex deleted file mode 100644 index 9d4e9f0..0000000 --- a/tex/context/third/transliterator/t-transliterator.tex +++ /dev/null @@ -1 +0,0 @@ -\loadmarkfile{t-transliterator} diff --git a/tex/context/third/transliterator/trans_tables_bg.lua b/tex/context/third/transliterator/trans_tables_bg.lua deleted file mode 100644 index b319666..0000000 --- a/tex/context/third/transliterator/trans_tables_bg.lua +++ /dev/null @@ -1,114 +0,0 @@ ---===========================================================================-- --- Bulgarian -- ---===========================================================================-- - -local translit = thirddata.translit -local pcache = translit.parser_cache -local lpegmatch = lpeg.match - -if not translit.done_bg then - --------------------------------------------------------------------------- - -- Uppercase Bulgarian -> „scientific“ transliteration -- - --------------------------------------------------------------------------- - - translit.bg_upp = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ж"] = "Ž", - ["З"] = "Z", - ["И"] = "I", - ["Й"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Ch", - ["Ц"] = "C", - ["Ч"] = "Č", - ["Ш"] = "Š", - ["Щ"] = "Št", - ["Ъ"] = "Ă", - ["Ь"] = "′", - ["Ю"] = "Ju", - ["Я"] = "Ja", - } - translit.tables["Bulgarian \\quotation{scientific} transliteration uppercase"] = translit.bg_upp - - --------------------------------------------------------------------------- - -- Lowercase Bulgarian -> „scientific“ transliteration -- - --------------------------------------------------------------------------- - translit.bg_low = translit.make_add_dict{ - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ж"] = "ž", - ["з"] = "z", - ["и"] = "i", - ["й"] = "j", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "ch", - ["ц"] = "c", - ["ч"] = "č", - ["ш"] = "š", - ["щ"] = "št", - ["ъ"] = "ă", - ["ь"] = "′", - ["ю"] = "ju", - ["я"] = "ja", - } - - translit.tables["Bulgarian \\quotation{scientific} transliteration lowercase"] = translit.bg_low - - translit.done_bg = true -end - -local P, Cs = lpeg.P, lpeg.Cs -local addrules = translit.addrules -local utfchar = translit.utfchar - -local function bulgarian (mode) - local bulgarian_parser - if mode == "de" then - local bg = translit.bg_upp + translit.bg_low - local p_bg = addrules(bg) - bulgarian_parser = Cs((p_bg / bg + utfchar)^0) - else - return nil - end - return bulgarian_parser -end - -translit.methods["bg_de"] = function (text) - local p = pcache["bg_de"] - if not p then - p = bulgarian("de") - pcache["bg_de"] = p - end - return p and lpegmatch(p, text) or "" -end - --- vim:ft=lua:sw=4:ts=4 diff --git a/tex/context/third/transliterator/trans_tables_glag.lua b/tex/context/third/transliterator/trans_tables_glag.lua deleted file mode 100644 index 41974fd..0000000 --- a/tex/context/third/transliterator/trans_tables_glag.lua +++ /dev/null @@ -1,128 +0,0 @@ - ---===========================================================================-- --- Glagolica -- ---===========================================================================-- - -local translit = thirddata.translit - -------------------------------------------- --- Lowercase Glagolitic Transliteration -- -------------------------------------------- - -if not translit.done_glagolica then - translit.ocs_gla_low = translit.make_add_dict{ - ["ⰰ"] = "a", -- GLAGOLITIC SMALL LETTER AZU - ["ⰱ"] = "b", -- GLAGOLITIC SMALL LETTER BUKY - ["ⰲ"] = "v", -- GLAGOLITIC SMALL LETTER VEDE - ["ⰳ"] = "g", -- GLAGOLITIC SMALL LETTER GLAGOLI - ["ⰴ"] = "d", -- GLAGOLITIC SMALL LETTER DOBRO - ["ⰵ"] = "e", -- GLAGOLITIC SMALL LETTER YESTU - ["ⰶ"] = "ž", -- GLAGOLITIC SMALL LETTER ZHIVETE - ["ⰷ"] = "ʒ", -- GLAGOLITIC SMALL LETTER DZELO - ["ⰸ"] = "z", -- GLAGOLITIC SMALL LETTER ZEMLJA - ["ⰹ"] = "i", -- GLAGOLITIC SMALL LETTER IZHE - ["ⰺ"] = "i", -- GLAGOLITIC SMALL LETTER INITIAL IZHE - ["ⰻ"] = "i", -- GLAGOLITIC SMALL LETTER I - ["ⰼ"] = "g’", -- GLAGOLITIC SMALL LETTER DJERVI - ["ⰽ"] = "k", -- GLAGOLITIC SMALL LETTER KAKO - ["ⰾ"] = "l", -- GLAGOLITIC SMALL LETTER LJUDIJE - ["ⰿ"] = "m", -- GLAGOLITIC SMALL LETTER MYSLITE - ["ⱀ"] = "n", -- GLAGOLITIC SMALL LETTER NASHI - ["ⱁ"] = "o", -- GLAGOLITIC SMALL LETTER ONU - ["ⱂ"] = "p", -- GLAGOLITIC SMALL LETTER POKOJI - ["ⱃ"] = "r", -- GLAGOLITIC SMALL LETTER RITSI - ["ⱄ"] = "s", -- GLAGOLITIC SMALL LETTER SLOVO - ["ⱅ"] = "t", -- GLAGOLITIC SMALL LETTER TVRIDO - ["ⱆ"] = "u", -- GLAGOLITIC SMALL LETTER UKU - ["ⱇ"] = "f", -- GLAGOLITIC SMALL LETTER FRITU - ["ⱈ"] = "x", -- GLAGOLITIC SMALL LETTER HERU - ["ⱉ"] = "o", -- GLAGOLITIC SMALL LETTER OTU - ["ⱊ"] = "?", -- GLAGOLITIC SMALL LETTER PE - ["ⱋ"] = "št", -- GLAGOLITIC SMALL LETTER SHTA - ["ⱌ"] = "c", -- GLAGOLITIC SMALL LETTER TSI - ["ⱍ"] = "č", -- GLAGOLITIC SMALL LETTER CHRIVI - ["ⱎ"] = "š", -- GLAGOLITIC SMALL LETTER SHA - ["ⱏ"] = "ъ", -- GLAGOLITIC SMALL LETTER YERU - ["ⱐ"] = "ь", -- GLAGOLITIC SMALL LETTER YERI - ["ⱑ"] = "ě", -- GLAGOLITIC SMALL LETTER YATI - ["ⱒ"] = "x", -- GLAGOLITIC SMALL LETTER SPIDERY HA - ["ⱓ"] = "ju", -- GLAGOLITIC SMALL LETTER YU - ["ⱔ"] = "ę", -- GLAGOLITIC SMALL LETTER SMALL YUS - ["ⱕ"] = "y̨", -- GLAGOLITIC SMALL LETTER SMALL YUS WITH TAIL - ["ⱖ"] = "??", -- GLAGOLITIC SMALL LETTER YO - ["ⱗ"] = "ję", -- GLAGOLITIC SMALL LETTER IOTATED SMALL YU - ["ⱘ"] = "ǫ", -- GLAGOLITIC SMALL LETTER BIG YUS - ["ⱙ"] = "jǫ", -- GLAGOLITIC SMALL LETTER IOTATED BIG YUS - ["ⱚ"] = "th", -- GLAGOLITIC SMALL LETTER FITA - ["ⱛ"] = "ü", -- GLAGOLITIC SMALL LETTER IZHITSA - ["ⱜ"] = "??", -- GLAGOLITIC SMALL LETTER SHTAPIC - ["ⱝ"] = "??", -- GLAGOLITIC SMALL LETTER TROKUTASTI A - ["ⱞ"] = "m", -- GLAGOLITIC SMALL LETTER LATINATE MYSLITE - } - - translit.tables["Glagolica transliteration for OCS lowercase"] = translit.ocs_gla_low - - ------------------------------------------------ - -- Uppercase (?!) Glagolitic Transliteration -- - ------------------------------------------------ - - translit.ocs_gla_upp = translit.make_add_dict{ - ["Ⰰ"] = "A", -- GLAGOLITIC CAPITAL LETTER AZU - ["Ⰱ"] = "B", -- GLAGOLITIC CAPITAL LETTER BUKY - ["Ⰲ"] = "V", -- GLAGOLITIC CAPITAL LETTER VEDE - ["Ⰳ"] = "G", -- GLAGOLITIC CAPITAL LETTER GLAGOLI - ["Ⰴ"] = "D", -- GLAGOLITIC CAPITAL LETTER DOBRO - ["Ⰵ"] = "E", -- GLAGOLITIC CAPITAL LETTER YESTU - ["Ⰶ"] = "Ž", -- GLAGOLITIC CAPITAL LETTER ZHIVETE - ["Ⰷ"] = "Ʒ", -- GLAGOLITIC CAPITAL LETTER DZELO - ["Ⰸ"] = "Z", -- GLAGOLITIC CAPITAL LETTER ZEMLJA - ["Ⰹ"] = "I", -- GLAGOLITIC CAPITAL LETTER IZHE - ["Ⰺ"] = "I", -- GLAGOLITIC CAPITAL LETTER INITIAL IZHE - ["Ⰻ"] = "I", -- GLAGOLITIC CAPITAL LETTER I - ["Ⰼ"] = "G’", -- GLAGOLITIC CAPITAL LETTER DJERVI - ["Ⰽ"] = "K", -- GLAGOLITIC CAPITAL LETTER KAKO - ["Ⰾ"] = "L", -- GLAGOLITIC CAPITAL LETTER LJUDIJE - ["Ⰿ"] = "M", -- GLAGOLITIC CAPITAL LETTER MYSLITE - ["Ⱀ"] = "N", -- GLAGOLITIC CAPITAL LETTER NASHI - ["Ⱁ"] = "O", -- GLAGOLITIC CAPITAL LETTER ONU - ["Ⱂ"] = "P", -- GLAGOLITIC CAPITAL LETTER POKOJI - ["Ⱃ"] = "R", -- GLAGOLITIC CAPITAL LETTER RITSI - ["Ⱄ"] = "S", -- GLAGOLITIC CAPITAL LETTER SLOVO - ["Ⱅ"] = "T", -- GLAGOLITIC CAPITAL LETTER TVRIDO - ["Ⱆ"] = "U", -- GLAGOLITIC CAPITAL LETTER UKU - ["Ⱇ"] = "F", -- GLAGOLITIC CAPITAL LETTER FRITU - ["Ⱈ"] = "X", -- GLAGOLITIC CAPITAL LETTER HERU - ["Ⱉ"] = "O", -- GLAGOLITIC CAPITAL LETTER OTU - ["Ⱊ"] = "?", -- GLAGOLITIC CAPITAL LETTER PE - ["Ⱋ"] = "Št", -- GLAGOLITIC CAPITAL LETTER SHTA - ["Ⱌ"] = "C", -- GLAGOLITIC CAPITAL LETTER TSI - ["Ⱍ"] = "Č", -- GLAGOLITIC CAPITAL LETTER CHRIVI - ["Ⱎ"] = "Š", -- GLAGOLITIC CAPITAL LETTER SHA - ["Ⱏ"] = "Ъ", -- GLAGOLITIC CAPITAL LETTER YERU - ["Ⱐ"] = "Ь", -- GLAGOLITIC CAPITAL LETTER YERI - ["Ⱑ"] = "Ě", -- GLAGOLITIC CAPITAL LETTER YATI - ["Ⱒ"] = "X", -- GLAGOLITIC CAPITAL LETTER SPIDERY HA - ["Ⱓ"] = "Ju", -- GLAGOLITIC CAPITAL LETTER YU - ["Ⱔ"] = "Ę", -- GLAGOLITIC CAPITAL LETTER SMALL YUS - ["Ⱕ"] = "Y̨", -- GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL - ["Ⱖ"] = "??", -- GLAGOLITIC CAPITAL LETTER YO - ["Ⱗ"] = "Ję", -- GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS - ["Ⱘ"] = "Ǫ", -- GLAGOLITIC CAPITAL LETTER BIG YUS - ["Ⱙ"] = "Jǫ", -- GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS - ["Ⱚ"] = "Th", -- GLAGOLITIC CAPITAL LETTER FITA - ["Ⱛ"] = "Ü", -- GLAGOLITIC CAPITAL LETTER IZHITSA - ["Ⱜ"] = "??", -- GLAGOLITIC CAPITAL LETTER SHTAPIC - ["Ⱝ"] = "??", -- GLAGOLITIC CAPITAL LETTER TROKUTASTI A - ["Ⱞ"] = "M", -- GLAGOLITIC CAPITAL LETTER LATINATE MYSLIT - } - - translit.tables["Glagolica transliteration for OCS uppercase"] = translit.ocs_gla_upp - - translit.done_glagolica = true -end - ---===========================================================================-- --- End Of Tables -- ---===========================================================================-- - - diff --git a/tex/context/third/transliterator/trans_tables_gr.lua b/tex/context/third/transliterator/trans_tables_gr.lua deleted file mode 100644 index b4c77e7..0000000 --- a/tex/context/third/transliterator/trans_tables_gr.lua +++ /dev/null @@ -1,709 +0,0 @@ ---===========================================================================-- --- Greek -- ---===========================================================================-- - -local translit = thirddata.translit -local pcache = translit.parser_cache -local lpegmatch = lpeg.match - --- Note that the Greek transliteration mapping isn't bijective so transliterated --- texts won't be reversible. (Shouldn't be impossible to make one up using --- diacritics on latin characters to represent all possible combinations of --- Greek breathings + accents.) - --- Good reading on composed / precombined unicode: --- http://www.tlg.uci.edu/~opoudjis/unicode/unicode_gaps.html#precomposed - -------------------------------------------------- --- Lowercase Greek Initial Position Diphthongs -- -------------------------------------------------- - -if not translit.done_greek then - translit.gr_di_in_low = translit.make_add_dict{ - [" αὑ"] = " hau", - [" αὕ"] = " hau", - [" αὓ"] = " hau", - [" αὗ"] = " hau", - [" εὑ"] = " heu", - [" εὕ"] = " heu", - [" εὓ"] = " heu", - [" εὗ"] = " heu", - [" ηὑ"] = " hēu", - [" ηὕ"] = " hēu", - [" ηὓ"] = " hēu", - [" ηὗ"] = " hēu", - [" οὑ"] = " hu", - [" οὕ"] = " hu", - [" οὓ"] = " hu", - [" οὗ"] = " hu", - [" ωὑ"] = " hōu", - [" ωὕ"] = " hōu", - [" ωὓ"] = " hōu", - [" ωὗ"] = " hōu" - } - - translit.tables["Greek transliteration initial breathing diphthongs lowercase"] = translit.gr_di_in_low - - ------------------------------------------------- - -- Uppercase Greek Initial Position Diphthongs -- - ------------------------------------------------- - - translit.gr_di_in_upp = translit.make_add_dict{ - [" Αὑ"] = " Hau", - [" Αὕ"] = " Hau", - [" Αὓ"] = " Hau", - [" Αὗ"] = " Hau", - [" Εὑ"] = " Heu", - [" Εὕ"] = " Heu", - [" Εὓ"] = " Heu", - [" Εὗ"] = " Heu", - [" Ηὑ"] = " Hēu", - [" Ηὕ"] = " Hēu", - [" Ηὓ"] = " Hēu", - [" Ηὗ"] = " Hēu", - [" Οὑ"] = " Hu", - [" Οὕ"] = " Hu", - [" Οὓ"] = " Hu", - [" Οὗ"] = " Hu", - [" Ωὑ"] = " Hōu", - [" Ωὕ"] = " Hōu", - [" Ωὓ"] = " Hōu", - [" Ωὗ"] = " Hōu" - } - - translit.tables["Greek transliteration initial breathing diphthongs uppercase"] = translit.gr_di_in_upp - - --------------------------------------- - -- Lowercase Greek Initial Position -- - --------------------------------------- - - translit.gr_in_low = translit.make_add_dict{ - [" ἁ"] = " ha", - [" ἅ"] = " ha", - [" ἃ"] = " ha", - [" ἇ"] = " ha", - [" ᾁ"] = " ha", - [" ᾅ"] = " ha", - [" ᾃ"] = " ha", - [" ᾇ"] = " ha", - [" ἑ"] = " he", - [" ἕ"] = " he", - [" ἓ"] = " he", - [" ἡ"] = " hē", - [" ἥ"] = " hē", - [" ἣ"] = " hē", - [" ἧ"] = " hē", - [" ᾑ"] = " hē", - [" ᾕ"] = " hē", - [" ᾓ"] = " hē", - [" ᾗ"] = " hē", - [" ἱ"] = " hi", - [" ἵ"] = " hi", - [" ἳ"] = " hi", - [" ἷ"] = " hi", - [" ὁ"] = " ho", - [" ὅ"] = " ho", - [" ὃ"] = " ho", - [" ὑ"] = " hy", - [" ὕ"] = " hy", - [" ὓ"] = " hy", - [" ὗ"] = " hy", - [" ὡ"] = " hō", - [" ὥ"] = " hō", - [" ὣ"] = " hō", - [" ὧ"] = " hō", - [" ᾡ"] = " hō", - [" ᾥ"] = " hō", - [" ᾣ"] = " hō", - [" ᾧ"] = " hō", - } - - translit.tables["Greek transliteration initial breathing lowercase"] = translit.gr_in_low - - --------------------------------------- - -- Uppercase Greek Initial Position -- - --------------------------------------- - - translit.gr_in_upp = translit.make_add_dict{ - [" Ἁ"] = " Ha", - [" Ἅ"] = " Ha", - [" Ἃ"] = " Ha", - [" Ἇ"] = " Ha", - [" ᾉ"] = " Ha", - [" ᾍ"] = " Ha", - [" ᾋ"] = " Ha", - [" ᾏ"] = " Ha", - [" Ἑ"] = " He", - [" Ἕ"] = " He", - [" Ἓ"] = " He", - [" Ἡ"] = " Hē", - [" Ἥ"] = " Hē", - [" Ἣ"] = " Hē", - [" Ἧ"] = " Hē", - [" ᾙ"] = " Hē", - [" ᾝ"] = " Hē", - [" ᾛ"] = " Hē", - [" ᾟ"] = " Hē", - [" Ἱ"] = " Hi", - [" Ἵ"] = " Hi", - [" Ἳ"] = " Hi", - [" Ἷ"] = " Hi", - [" Ὁ"] = " Ho", - [" Ὅ"] = " Ho", - [" Ὃ"] = " Ho", - [" Ὑ"] = " Hy", - [" Ὕ"] = " Hy", - [" Ὓ"] = " Hy", - [" Ὗ"] = " Hy", - [" Ὡ"] = " Hō", - [" Ὥ"] = " Hō", - [" Ὣ"] = " Hō", - [" Ὧ"] = " Hō", - [" ᾩ"] = " Hō", - [" ᾭ"] = " Hō", - [" ᾫ"] = " Hō", - [" ᾯ"] = " Hō", - } - - translit.tables["Greek transliteration initial breathing uppercase"] = translit.gr_in_upp - - --------------------------------- - -- Lowercase Greek Diphthongs -- - --------------------------------- - - translit.gr_di_low = translit.make_add_dict{ - ["αυ"] = "au", - ["αύ"] = "au", - ["αὺ"] = "au", - ["αῦ"] = "au", - ["αὐ"] = "au", - ["αὔ"] = "au", - ["αὒ"] = "au", - ["αὖ"] = "au", - ["αὑ"] = "au", - ["αὕ"] = "au", - ["αὓ"] = "au", - ["αὗ"] = "au", - ["ευ"] = "eu", - ["εύ"] = "eu", - ["εὺ"] = "eu", - ["εῦ"] = "eu", - ["εὐ"] = "eu", - ["εὔ"] = "eu", - ["εὒ"] = "eu", - ["εὖ"] = "eu", - ["εὑ"] = "eu", - ["εὕ"] = "eu", - ["εὓ"] = "eu", - ["εὗ"] = "eu", - ["ηυ"] = "ēu", - ["ηύ"] = "ēu", - ["ηὺ"] = "ēu", - ["ηῦ"] = "ēu", - ["ηὐ"] = "ēu", - ["ηὔ"] = "ēu", - ["ηὒ"] = "ēu", - ["ηὖ"] = "ēu", - ["ηὑ"] = "ēu", - ["ηὕ"] = "ēu", - ["ηὓ"] = "ēu", - ["ηὗ"] = "ēu", - ["ου"] = "u", - ["ου"] = "u", - ["ου"] = "u", - ["ού"] = "u", - ["οὺ"] = "u", - ["οῦ"] = "u", - ["οὐ"] = "u", - ["οὔ"] = "u", - ["οὒ"] = "u", - ["οὖ"] = "u", - ["οὑ"] = "u", - ["οὕ"] = "u", - ["οὓ"] = "u", - ["οὗ"] = "u", - ["ωυ"] = "ōu", - ["ωύ"] = "ōu", - ["ωὺ"] = "ōu", - ["ωῦ"] = "ōu", - ["ωὐ"] = "ōu", - ["ωὔ"] = "ōu", - ["ωὒ"] = "ōu", - ["ωὖ"] = "ōu", - ["ωὑ"] = "ōu", - ["ωὕ"] = "ōu", - ["ωὓ"] = "ōu", - ["ωὗ"] = "ōu", - ["ῤῥ"] = "rrh", - } - - translit.tables["Greek transliteration diphthongs lowercase"] = translit.gr_in_low - - --------------------------------- - -- Uppercase Greek Diphthongs -- - --------------------------------- - - translit.gr_di_upp = translit.make_add_dict{ - ["Αυ"] = "Au", - ["Αύ"] = "Au", - ["Αὺ"] = "Au", - ["Αῦ"] = "Au", - ["Αὐ"] = "Au", - ["Αὔ"] = "Au", - ["Αὒ"] = "Au", - ["Αὖ"] = "Au", - ["Αὑ"] = "Au", - ["Αὕ"] = "Au", - ["Αὓ"] = "Au", - ["Αὗ"] = "Au", - ["Ευ"] = "Eu", - ["Εύ"] = "Eu", - ["Εὺ"] = "Eu", - ["Εῦ"] = "Eu", - ["Εὐ"] = "Eu", - ["Εὔ"] = "Eu", - ["Εὒ"] = "Eu", - ["Εὖ"] = "Eu", - ["Εὑ"] = "Eu", - ["Εὕ"] = "Eu", - ["Εὓ"] = "Eu", - ["Εὗ"] = "Eu", - ["Ηυ"] = "Ēu", - ["Ηύ"] = "Ēu", - ["Ηὺ"] = "Ēu", - ["Ηῦ"] = "Ēu", - ["Ηὐ"] = "Ēu", - ["Ηὔ"] = "Ēu", - ["Ηὒ"] = "Ēu", - ["Ηὖ"] = "Ēu", - ["Ηὑ"] = "Ēu", - ["Ηὕ"] = "Ēu", - ["Ηὓ"] = "Ēu", - ["Ηὗ"] = "Ēu", - ["Ου"] = "U", - ["Ου"] = "U", - ["Ου"] = "U", - ["Ού"] = "U", - ["Οὺ"] = "U", - ["Οῦ"] = "U", - ["Οὐ"] = "U", - ["Οὔ"] = "U", - ["Οὒ"] = "U", - ["Οὖ"] = "U", - ["Οὑ"] = "U", - ["Οὕ"] = "U", - ["Οὓ"] = "U", - ["Οὗ"] = "U", - ["Ωυ"] = "Ōu", - ["Ωύ"] = "Ōu", - ["Ωὺ"] = "Ōu", - ["Ωῦ"] = "Ōu", - ["Ωὐ"] = "Ōu", - ["Ωὔ"] = "Ōu", - ["Ωὒ"] = "Ōu", - ["Ωὖ"] = "Ōu", - ["Ωὑ"] = "Ōu", - ["Ωὕ"] = "Ōu", - ["Ωὓ"] = "Ōu", - ["Ωὗ"] = "Ōu", - } - - translit.tables["Greek transliteration diphthongs uppercase"] = translit.gr_in_upp - - -- The following will be used in an option that ensures transcription of - -- nasalization, e.g. Ἁγχίσης -> “Anchises” (instead of “Agchises”) - translit.gr_nrule = translit.make_add_dict{ - ["γγ"] = "ng", - ["γκ"] = "nk", - ["γξ"] = "nx", - ["γχ"] = "nch", - } - - translit.tables["Greek transliteration optional nasalization"] = translit.gr_nrule - - - -------------------------------------- - -- Lowercase Greek Transliteration -- - -------------------------------------- - - translit.gr_low = translit.make_add_dict{ - ["α"] = "a", - ["ά"] = "a", - ["ὰ"] = "a", - ["ᾶ"] = "a", - ["ᾳ"] = "a", - ["ἀ"] = "a", - ["ἁ"] = "a", - ["ἄ"] = "a", - ["ἂ"] = "a", - ["ἆ"] = "a", - ["ἁ"] = "a", - ["ἅ"] = "a", - ["ἃ"] = "a", - ["ἇ"] = "a", - ["ᾁ"] = "a", - ["ᾴ"] = "a", - ["ᾲ"] = "a", - ["ᾷ"] = "a", - ["ᾄ"] = "a", - ["ᾂ"] = "a", - ["ᾅ"] = "a", - ["ᾃ"] = "a", - ["ᾆ"] = "a", - ["ᾇ"] = "a", - ["β"] = "b", - ["γ"] = "g", - ["δ"] = "d", - ["ε"] = "e", - ["έ"] = "e", - ["ὲ"] = "e", - ["ἐ"] = "e", - ["ἔ"] = "e", - ["ἒ"] = "e", - ["ἑ"] = "e", - ["ἕ"] = "e", - ["ἓ"] = "e", - ["ζ"] = "z", - ["η"] = "ē", - ["η"] = "ē", - ["ή"] = "ē", - ["ὴ"] = "ē", - ["ῆ"] = "ē", - ["ῃ"] = "ē", - ["ἠ"] = "ē", - ["ἤ"] = "ē", - ["ἢ"] = "ē", - ["ἦ"] = "ē", - ["ᾐ"] = "ē", - ["ἡ"] = "ē", - ["ἥ"] = "ē", - ["ἣ"] = "ē", - ["ἧ"] = "ē", - ["ᾑ"] = "ē", - ["ῄ"] = "ē", - ["ῂ"] = "ē", - ["ῇ"] = "ē", - ["ᾔ"] = "ē", - ["ᾒ"] = "ē", - ["ᾕ"] = "ē", - ["ᾓ"] = "ē", - ["ᾖ"] = "ē", - ["ᾗ"] = "ē", - ["θ"] = "th", - ["ι"] = "i", - ["ί"] = "i", - ["ὶ"] = "i", - ["ῖ"] = "i", - ["ἰ"] = "i", - ["ἴ"] = "i", - ["ἲ"] = "i", - ["ἶ"] = "i", - ["ἱ"] = "i", - ["ἵ"] = "i", - ["ἳ"] = "i", - ["ἷ"] = "i", - ["ϊ"] = "i", - ["ΐ"] = "i", - ["ῒ"] = "i", - ["ῗ"] = "i", - ["κ"] = "k", - ["λ"] = "l", - ["μ"] = "m", - ["ν"] = "n", - ["ξ"] = "x", - ["ο"] = "o", - ["ό"] = "o", - ["ὸ"] = "o", - ["ὀ"] = "o", - ["ὄ"] = "o", - ["ὂ"] = "o", - ["ὁ"] = "o", - ["ὅ"] = "o", - ["ὃ"] = "o", - ["π"] = "p", - ["ρ"] = "r", - ["ῤ"] = "r", - ["ῥ"] = "rh", - ["σ"] = "s", - ["ς"] = "s", - ["τ"] = "t", - ["υ"] = "y", - ["ύ"] = "y", - ["ὺ"] = "y", - ["ῦ"] = "y", - ["ὐ"] = "y", - ["ὔ"] = "y", - ["ὒ"] = "y", - ["ὖ"] = "y", - ["ὑ"] = "y", - ["ὕ"] = "y", - ["ὓ"] = "y", - ["ὗ"] = "y", - ["ϋ"] = "y", - ["ΰ"] = "y", - ["ῢ"] = "y", - ["ῧ"] = "y", - ["φ"] = "ph", - ["χ"] = "ch", - ["ψ"] = "ps", - ["ω"] = "ō", - ["ώ"] = "ō", - ["ὼ"] = "ō", - ["ῶ"] = "ō", - ["ῳ"] = "ō", - ["ὠ"] = "ō", - ["ὤ"] = "ō", - ["ὢ"] = "ō", - ["ὦ"] = "ō", - ["ᾠ"] = "ō", - ["ὡ"] = "ō", - ["ὥ"] = "ō", - ["ὣ"] = "ō", - ["ὧ"] = "ō", - ["ᾡ"] = "ō", - ["ῴ"] = "ō", - ["ῲ"] = "ō", - ["ῷ"] = "ō", - ["ᾤ"] = "ō", - ["ᾢ"] = "ō", - ["ᾥ"] = "ō", - ["ᾣ"] = "ō", - ["ᾦ"] = "ō", - ["ᾧ"] = "ō", - } - - translit.tables["Greek transliteration lowercase"] = translit.gr_low - - -------------------------------------- - -- Uppercase Greek Transliteration -- - -------------------------------------- - - translit.gr_upp = translit.make_add_dict{ - ["Α"] = "A", - ["Ά"] = "A", - ["Ὰ"] = "A", - --["ᾶ"] = "A", - ["ᾼ"] = "A", - ["Ἀ"] = "A", - ["Ἁ"] = "A", - ["Ἄ"] = "A", - ["Ἂ"] = "A", - ["Ἆ"] = "A", - ["Ἁ"] = "A", - ["Ἅ"] = "A", - ["Ἃ"] = "A", - ["Ἇ"] = "A", - ["ᾉ"] = "A", - --["ᾴ"] = "A", -- I’d be very happy if anybody could explain to me - --["ᾲ"] = "A", -- why there's Ά, ᾌ and ᾼ but no “A + iota subscript - --["ᾷ"] = "A", -- + acute” …, same for Η, Υ and Ω + diacritica. - ["ᾌ"] = "A", - ["ᾊ"] = "A", - ["ᾍ"] = "A", - ["ᾋ"] = "A", - ["ᾎ"] = "A", - ["ᾏ"] = "A", - ["Β"] = "B", - ["Γ"] = "G", - ["Δ"] = "D", - ["Ε"] = "E", - ["Έ"] = "E", - ["Ὲ"] = "E", - ["Ἐ"] = "E", - ["Ἔ"] = "E", - ["Ἒ"] = "E", - ["Ἑ"] = "E", - ["Ἕ"] = "E", - ["Ἓ"] = "E", - ["Ζ"] = "Z", - ["Η"] = "Ē", - ["Η"] = "Ē", - ["Ή"] = "Ē", - ["Ὴ"] = "Ē", - --["ῆ"] = "Ē", - ["ῌ"] = "Ē", - ["Ἠ"] = "Ē", - ["Ἤ"] = "Ē", - ["Ἢ"] = "Ē", - ["Ἦ"] = "Ē", - ["ᾘ"] = "Ē", - ["Ἡ"] = "Ē", - ["Ἥ"] = "Ē", - ["Ἣ"] = "Ē", - ["Ἧ"] = "Ē", - ["ᾙ"] = "Ē", - --["ῄ"] = "Ē", - --["ῂ"] = "Ē", - --["ῇ"] = "Ē", - ["ᾜ"] = "Ē", - ["ᾚ"] = "Ē", - ["ᾝ"] = "Ē", - ["ᾛ"] = "Ē", - ["ᾞ"] = "Ē", - ["ᾟ"] = "Ē", - ["Θ"] = "Th", - ["Ι"] = "I", - ["Ί"] = "I", - ["Ὶ"] = "I", - --["ῖ"] = "I", - ["Ἰ"] = "I", - ["Ἴ"] = "I", - ["Ἲ"] = "I", - ["Ἶ"] = "I", - ["Ἱ"] = "I", - ["Ἵ"] = "I", - ["Ἳ"] = "I", - ["Ἷ"] = "I", - ["Ϊ"] = "I", - --["ΐ"] = "I", - --["ῒ"] = "I", - --["ῗ"] = "I", - ["Κ"] = "K", - ["Λ"] = "L", - ["Μ"] = "M", - ["Ν"] = "N", - ["Ξ"] = "X", - ["Ο"] = "O", - ["Ό"] = "O", - ["Ὸ"] = "O", - ["Ὀ"] = "O", - ["Ὄ"] = "O", - ["Ὂ"] = "O", - ["Ὁ"] = "O", - ["Ὅ"] = "O", - ["Ὃ"] = "O", - ["Π"] = "P", - ["Ρ"] = "R", - --["ῤ"] = "R", - ["Ῥ"] = "Rh", - ["Σ"] = "S", - ["Σ"] = "S", - ["Τ"] = "T", - ["Υ"] = "Y", - ["Ύ"] = "Y", - ["Ὺ"] = "Y", - --["ῦ"] = "Y", - --["ὐ"] = "Y", - --["ὔ"] = "Y", - --["ὒ"] = "Y", - --["ὖ"] = "Y", - ["Ὑ"] = "Y", - ["Ὕ"] = "Y", - ["Ὓ"] = "Y", - ["Ὗ"] = "Y", - ["Ϋ"] = "Y", - --["ΰ"] = "Y", - --["ῢ"] = "Y", - --["ῧ"] = "Y", - ["Φ"] = "Ph", - ["Χ"] = "Ch", - ["Ψ"] = "Ps", - ["Ω"] = "Ō", - ["Ώ"] = "Ō", - ["Ὼ"] = "Ō", - --["ῶ"] = "Ō", - ["ῼ"] = "Ō", - ["Ὠ"] = "Ō", - ["Ὤ"] = "Ō", - ["Ὢ"] = "Ō", - ["Ὦ"] = "Ō", - ["ᾨ"] = "Ō", - ["Ὡ"] = "Ō", - ["Ὥ"] = "Ō", - ["Ὣ"] = "Ō", - ["Ὧ"] = "Ō", - ["ᾩ"] = "Ō", - --["ῴ"] = "Ō", - --["ῲ"] = "Ō", - --["ῷ"] = "Ō", - ["ᾬ"] = "Ō", - ["ᾪ"] = "Ō", - ["ᾭ"] = "Ō", - ["ᾫ"] = "Ō", - ["ᾮ"] = "Ō", - ["ᾯ"] = "Ō", - } - - translit.tables["Greek transliteration uppercase"] = translit.gr_upp - - ------------ - -- Varia -- - ------------ - - translit.gr_other = translit.make_add_dict{ - ["ϝ"] = "w", - ["Ϝ"] = "W", - ["ϙ"] = "q", - ["Ϙ"] = "Q", - ["ϡ"] = "ss", - ["Ϡ"] = "Ss", - } - - translit.tables["Greek transliteration archaic characters"] = translit.gr_other - - translit.done_greek = true -end - ---===========================================================================-- --- End Of Tables -- ---===========================================================================-- - -local function greek (mode, text) - local P, V, Cs = lpeg.P, lpeg.V, lpeg.Cs - local addrules = translit.addrules - local utfchar = translit.utfchar - - if mode == "gr" or mode == "gr_n" then - - local gr_di_in, gr_in, gr_di, gr = translit.make_add_dict{}, translit.make_add_dict{}, translit.make_add_dict{}, translit.make_add_dict{} - gr_di_in = gr_di_in + translit.gr_di_in_low + translit.gr_di_in_upp - gr_in = gr_in + translit.gr_in_low + translit.gr_in_upp - gr_di = gr_di + translit.gr_di_low + translit.gr_di_upp - gr = gr + translit.gr_low + translit.gr_upp + translit.gr_other - - if mode == "gr_n" then gr_di = gr_di + translit.gr_nrule end - - local p_di_in, p_in, p_di, p - - p_di_in = addrules( gr_di_in, p_di_in ) - p_in = addrules( gr_in, p_in ) - p_di = addrules( gr_di, p_di ) - p = addrules( gr, p ) - - local g = P{ -- 2959 rules - Cs((V"init_diph" - + V"init" - + V"diph" - + V"other" - + utfchar - )^0), - - init_diph = Cs(p_di_in / gr_di_in ), - init = Cs(p_in / gr_in ), - diph = Cs(p_di / gr_di ), - other = Cs(p / gr ), - } - - return g - end -end - -translit.methods["gr"] = function (text) - p = pcache["gr"] - if not p then - p = greek("gr") - pcache["gr"] = p - end - return lpegmatch(p, text) -end - -translit.methods["gr_n"] = function (text) - p = pcache["gr_n"] - if not p then - p = greek("gr_n") - pcache["gr_n"] = p - end - return lpegmatch(p, text) -end - --- vim:ft=lua:sw=4:ts=4 diff --git a/tex/context/third/transliterator/trans_tables_iso9.lua b/tex/context/third/transliterator/trans_tables_iso9.lua deleted file mode 100644 index 256d994..0000000 --- a/tex/context/third/transliterator/trans_tables_iso9.lua +++ /dev/null @@ -1,310 +0,0 @@ ---===========================================================================-- --- ISO 9.1995(E) standardized transliteration for cyrillic -- ---===========================================================================-- - -local translit = thirddata.translit -local pcache = translit.parser_cache -local lpegmatch = lpeg.match - -if not translit.done_iso9 then - ----------------------------------------- - -- Lowercase russian cyrillic alphabet -- - ----------------------------------------- - translit.ru_low = translit.make_add_dict({ - ["а"] = "a", -- U+0430 -> U+0061 - ["б"] = "b", -- U+0431 -> U+0062 - ["в"] = "v", -- U+0432 -> U+0076 - ["г"] = "g", -- U+0433 -> U+0067 - ["д"] = "d", -- U+0434 -> U+0064 - ["е"] = "e", -- U+0435 -> U+0065 - ["ё"] = "ë", -- U+0451 -> U+00eb - ["ж"] = "ž", -- U+0436 -> U+017e - ["з"] = "z", -- U+0437 -> U+007a - ["и"] = "i", -- U+0438 -> U+0069 - ["й"] = "j", -- U+0439 -> U+006a - ["к"] = "k", -- U+043a -> U+006b - ["л"] = "l", -- U+043b -> U+006c - ["м"] = "m", -- U+043c -> U+006d - ["н"] = "n", -- U+043d -> U+006e - ["о"] = "o", -- U+043e -> U+006f - ["п"] = "p", -- U+043f -> U+0070 - ["р"] = "r", -- U+0440 -> U+0072 - ["с"] = "s", -- U+0441 -> U+0073 - ["т"] = "t", -- U+0442 -> U+0074 - ["у"] = "u", -- U+0443 -> U+0075 - ["ф"] = "f", -- U+0444 -> U+0066 - ["х"] = "h", -- U+0445 -> U+0068 - ["ц"] = "c", -- U+0446 -> U+0063 - ["ч"] = "č", -- U+0447 -> U+010d - ["ш"] = "š", -- U+0448 -> U+0161 - ["щ"] = "ŝ", -- U+0449 -> U+015d - ["ъ"] = "ʺ", -- U+044a -> U+02ba <- That's somewhat ambiguous as 0x2ba is - ["ы"] = "y", -- U+044b -> U+0079 used for uppercase, too. - ["ь"] = "ʹ", -- U+044c -> U+02b9 <- Same here with 0x2b9. - ["э"] = "è", -- U+044d -> U+00e8 - ["ю"] = "û", -- U+044e -> U+00fb - ["я"] = "â" -- U+044f -> U+00e2 - }) - - translit.tables["russian lowercase ISO~9"] = translit.ru_low - - ----------------------------------------- - -- Uppercase russian cyrillic alphabet -- - ----------------------------------------- - - translit.ru_upp = translit.make_add_dict({ - ["А"] = "A", -- U+0410 -> U+0041 - ["Б"] = "B", -- U+0411 -> U+0042 - ["В"] = "V", -- U+0412 -> U+0056 - ["Г"] = "G", -- U+0413 -> U+0047 - ["Д"] = "D", -- U+0414 -> U+0044 - ["Е"] = "E", -- U+0415 -> U+0045 - ["Ё"] = "Ë", -- U+0401 -> U+00cb - ["Ж"] = "Ž", -- U+0416 -> U+017d - ["З"] = "Z", -- U+0417 -> U+005a - ["И"] = "I", -- U+0418 -> U+0049 - ["Й"] = "J", -- U+0419 -> U+004a - ["К"] = "K", -- U+041a -> U+004b - ["Л"] = "L", -- U+041b -> U+004c - ["М"] = "M", -- U+041c -> U+004d - ["Н"] = "N", -- U+041d -> U+004e - ["О"] = "O", -- U+041e -> U+004f - ["П"] = "P", -- U+041f -> U+0050 - ["Р"] = "R", -- U+0420 -> U+0052 - ["С"] = "S", -- U+0421 -> U+0053 - ["Т"] = "T", -- U+0422 -> U+0054 - ["У"] = "U", -- U+0423 -> U+0055 - ["Ф"] = "F", -- U+0424 -> U+0046 - ["Х"] = "H", -- U+0425 -> U+0048 - ["Ц"] = "C", -- U+0426 -> U+0043 - ["Ч"] = "Č", -- U+0427 -> U+010c - ["Ш"] = "Š", -- U+0428 -> U+0160 - ["Щ"] = "Ŝ", -- U+0429 -> U+015c - ["Ъ"] = "ʺ", -- U+042a -> U+02ba - ["Ы"] = "Y", -- U+042b -> U+0059 - ["Ь"] = "ʹ", -- U+042c -> U+02b9 - ["Э"] = "È", -- U+042d -> U+00c8 - ["Ю"] = "Û", -- U+042e -> U+00db - ["Я"] = "Â" -- U+042f -> U+00c2 - }) - - translit.tables["russian uppercase ISO~9"] = translit.ru_upp - - ---------------------------------------------------------- - -- Lowercase pre-1918 russian cyrillic additional chars -- - ---------------------------------------------------------- - -- cf. http://www.russportal.ru/index.php?id=oldorth.decret1917 - - translit.ru_old_low = translit.make_add_dict{ - ["ѣ"] = "ě", -- U+048d -> U+011b -- 2-byte - ["і"] = "ì", -- U+0456 -> U+00ec -- 2-byte - ["ѳ"] = "f", -- U+0473 -> U+0066 -- 2-byte - ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 -- 3-byte - } - - translit.tables["russian pre-1918 lowercase ISO~9 2 byte"] = translit.ru_old_low - - translit.ru_old_upp = translit.make_add_dict{ - ["Ѣ"] = "Ě", -- U+048c -> U+011a -- 2-byte - ["І"] = "Ì", -- U+0406 -> U+00cc -- 2-byte - ["Ѳ"] = "F", -- U+0424 -> U+0046 -- 2-byte - ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 -- 3-byte - } - - translit.ru_jer_hack = translit.make_add_dict{ - ["ь"] = "’", - ["Ь"] = "’", - ["ъ"] = "”", - ["Ъ"] = "”", - } - - translit.tables["russian magkij / tverdyj znak hack"] = translit.ru_jer_hack - - translit.tables["russian pre-1918 uppercase ISO~9 2 byte"] = translit.ru_old_upp - - --------------------------------------------------------- - -- Lowercase characters from other cyrillic alphabets -- - --------------------------------------------------------- - - translit.non_ru_low = translit.make_add_dict{ - ["ӑ"] = "ă", -- U+04d1 -> U+0103 - ["ӓ"] = "ä", -- U+04d3 -> U+00e4 - ["ә"] = "a̋", -- u+04d9 -> U+0061+030b - ["ґ"] = "g̀", -- u+0491 -> U+0067+0300 - ["ҕ"] = "ğ", -- U+0495 -> U+011f - ["ғ"] = "ġ", -- U+0493 -> U+0121 - ["ђ"] = "đ", -- U+0452 -> U+0111 - ["ѓ"] = "ǵ", -- U+0453 -> U+01f5 - ["ӗ"] = "ĕ", -- U+04d7 -> U+0115 - ["є"] = "ê", -- U+0454 -> U+00ea - ["ҽ"] = "c̆", -- U+04bd -> U+0063+0306 - ["ҿ"] = "ç̆", -- U+04bf -> U+00e7+0306 - ["ӂ"] = "z̆", -- U+04c2 -> U+007a+0306 - ["ӝ"] = "z̄", -- U+04dd -> U+007a+0304 - ["җ"] = "ž̧", -- U+0497 -> U+017e+0327 - ["ӟ"] = "z̈", -- U+04df -> U+007a+0308 - ["ѕ"] = "ẑ", -- U+0455 -> U+1e91 -- Mapped to dz in old cyrillic non-ISO. - ["ӡ"] = "ź", -- U+04e1 -> U+017a - ["ӥ"] = "î", -- U+04e5 -> U+00ee - ["і"] = "ì", -- U+0456 -> U+00ec - ["ї"] = "ï", -- U+0457 -> U+00ef - ["ј"] = "ǰ", -- U+0458 -> U+01f0 - ["қ"] = "ķ", -- U+049b -> U+0137 - ["ҟ"] = "k̄", -- U+049f -> U+006b+0304 - ["љ"] = "l̂", -- U+0459 -> U+006c+0302 - ["њ"] = "n̂", -- U+045a -> U+006e+0302 - ["ҥ"] = "ṅ", -- U+04a5 -> U+1e45 - ["ң"] = "ṇ", -- U+04a3 -> U+1e47 - ["ӧ"] = "ö", -- U+04e7 -> U+00f6 - ["ө"] = "ô", -- U+04e9 -> U+00f4 - ["ҧ"] = "ṕ", -- U+04a7 -> U+1e55 - ["ҫ"] = "ç", -- U+04ab -> U+00e7 - ["ҭ"] = "ţ", -- U+04ad -> U+0163 - ["ћ"] = "ć", -- U+045b -> U+0107 - ["ќ"] = "ḱ", -- U+045c -> U+1e31 - ["у́"] = "ú", -- U+0443+ -> U+00fA - ["ў"] = "ŭ", -- U+045e -> U+016d - ["ӱ"] = "ü", -- U+04f1 -> U+00fc - ["ӳ"] = "ű", -- U+04f3 -> U+0171 - ["ү"] = "ù", -- U+04af -> U+00f9 - ["ҳ"] = "ḩ", -- U+04b3 -> U+1e29 - ["һ"] = "ḥ", -- U+04bb -> U+1e25 - ["ҵ"] = "c̄", -- U+04b5 -> U+0063+0304 - ["ӵ"] = "c̈", -- U+04f5 -> U+0063+0308 - ["ҷ"] = "ç", -- U+04cc -> U+00e7 - ["џ"] = "d̂", -- U+045f -> U+0064+0302 - ["ӹ"] = "ÿ", -- U+04f9 -> U+00ff - ["ѣ"] = "ě", -- U+048d -> U+011b - ["ѫ"] = "ǎ", -- U+046b -> U+01ce -- Mapped to ǫ in non-ISO old cyrillic. - ["ѳ"] = "f̀", -- U+0473 -> U+0066+0300 -- This is mapped to ‘f’ in ru_old. - ["ѵ"] = "ỳ", -- U+0475 -> U+1ef3 - ["ҩ"] = "ò", -- U+04a9 -> U+00f2 - ["Ӏ"] = "‡" -- U+04cf -> U+2021 - } - - translit.tables["cyrillic other lowercase ISO~9"] = translit.non_ru_low - - --------------------------------------------------------- - -- Uppercase characters from other cyrillic alphabets -- - --------------------------------------------------------- - - translit.non_ru_upp = translit.make_add_dict{ - ["Ӑ"] = "Ă", -- U+04d0 -> U+0102 - ["Ӓ"] = "Ä", -- U+04d2 -> U+00c4 - ["Ә"] = "A̋", -- U+04d8 -> U+0041+030b - ["Ґ"] = "G̀", -- U+0490 -> U+0047+0300 - ["Ҕ"] = "Ğ", -- U+0494 -> U+011e - ["Ғ"] = "Ġ", -- U+0492 -> U+0120 - ["Ђ"] = "Đ", -- U+0402 -> U+0110 - ["Ѓ"] = "Ǵ", -- U+0403 -> U+01f4 - ["Ӗ"] = "Ĕ", -- U+04d6 -> U+0114 - ["Є"] = "Ê", -- U+0404 -> U+00ca - ["Ҽ"] = "C̆", -- U+04bc -> U+0043+0306 - ["Ҿ"] = "Ç̆", -- U+04be -> U+00c7+0306 - ["Ӂ"] = "Z̆", -- U+04c1 -> U+005a+0306 - ["Ӝ"] = "Z̄", -- U+04dc -> U+005a+0304 - ["Җ"] = "Ž̦", -- U+0496 -> U+017d+0326 - ["Ӟ"] = "Z̈", -- U+04de -> U+005a+0308 - ["Ѕ"] = "Ẑ", -- U+0405 -> U+1e90 - ["Ӡ"] = "Ź", -- U+04e0 -> U+0179 - ["Ӥ"] = "Î", -- U+04e4 -> U+00ce - ["І"] = "Ì", -- U+0406 -> U+00cc - ["Ї"] = "Ï", -- U+0407 -> U+00cf - ["Ј"] = "J̌", -- U+0408 -> U+004a+030c - ["Қ"] = "Ķ", -- U+049a -> U+0136 - ["Ҟ"] = "K̄", -- U+049e -> U+004b+0304 - ["Љ"] = "L̂", -- U+0409 -> U+004c+0302 - ["Њ"] = "N̂", -- U+040a -> U+004e+0302 - ["Ҥ"] = "Ṅ", -- U+04a4 -> U+1e44 - ["Ң"] = "Ṇ", -- U+04a2 -> U+1e46 - ["Ӧ"] = "Ö", -- U+04e6 -> U+00d6 - ["Ө"] = "Ô", -- U+04e8 -> U+00d4 - ["Ҧ"] = "Ṕ", -- U+04a6 -> U+1e54 - ["Ҫ"] = "Ç", -- U+04aa -> U+00c7 - ["Ҭ"] = "Ţ", -- U+04ac -> U+0162 - ["Ћ"] = "Ć", -- U+040b -> U+0106 - ["Ќ"] = "Ḱ", -- U+040c -> U+1e30 - ["У́"] = "Ú", -- U+0423 -> U+00da - ["Ў"] = "Ŭ", -- U+040e -> U+016c - ["Ӱ"] = "Ü", -- U+04f0 -> U+00dc - ["Ӳ"] = "Ű", -- U+04f2 -> U+0170 - ["Ү"] = "Ù", -- U+04ae -> U+00d9 - ["Ҳ"] = "Ḩ", -- U+04b2 -> U+1e28 - ["Һ"] = "Ḥ", -- U+04ba -> U+1e24 - ["Ҵ"] = "C̄", -- U+04b4 -> U+0043+0304 - ["Ӵ"] = "C̈", -- U+04f4 -> U+0043+0308 - ["Ҷ"] = "Ç", -- U+04cb -> U+00c7 - ["Џ"] = "D̂", -- U+040f -> U+0044+0302 - ["Ӹ"] = "Ÿ", -- U+04f8 -> U+0178 - ["Ѣ"] = "Ě", -- U+048c -> U+011a - ["Ѫ"] = "Ǎ", -- U+046a -> U+01cd - ["Ѳ"] = "F̀", -- U+0472 -> U+0046+0300 - ["Ѵ"] = "Ỳ", -- U+0474 -> U+1ef2 - ["Ҩ"] = "Ò", -- U+04a8 -> U+00d2 - ["’"] = "‵", -- U+2035 -> U+2019 - ["Ӏ"] = "‡" -- U+04c0 -> U+2021 - } - - translit.tables["cyrillic other uppercase ISO~9"] = translit.non_ru_upp - - translit.done_iso9 = true -end - ---===========================================================================-- --- End Of Tables -- ---===========================================================================-- - -local function iso9 (mode) - local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs - local addrules = translit.addrules - local utfchar = translit.utfchar - - local iso9 = translit.make_add_dict{} - iso9 = translit.ru_upp + translit.ru_low - - if mode == "ru_old" or mode == "all" then - - iso9 = iso9 + translit.ru_old_upp + translit.ru_old_low - - if mode == "all" then - iso9 = iso9 - + translit.non_ru_upp - + translit.non_ru_low - end - if translit.deficient_font == "yes" then - iso9 = iso9 - + translit.ru_old_upp - + translit.ru_old_low - + translit.ru_jer_hack - end - end - - local p_iso9 = addrules (iso9, p_iso9) - local iso9_parser = Cs((p_iso9 / iso9 + utfchar)^0) - - return iso9_parser -end - -translit.methods["all"] = function (text) - local pname = "all" .. translit.deficient_font - local p = pcache[pname] - if not p then - p = iso9("all") - pcache[pname] = p - end - return lpegmatch(p, text) -end - -translit.methods["ru"] = translit.methods["all"] - -translit.methods["ru_old"] = function (text) - local pname = "ru_old" .. translit.deficient_font - local p = pcache[pname] - if not p then - p = iso9("all") - pcache[pname] = p - end - return lpegmatch(p, text) -end - --- vim:ft=lua:sw=4:ts=4 diff --git a/tex/context/third/transliterator/trans_tables_scntfc.lua b/tex/context/third/transliterator/trans_tables_scntfc.lua deleted file mode 100644 index 96002c4..0000000 --- a/tex/context/third/transliterator/trans_tables_scntfc.lua +++ /dev/null @@ -1,287 +0,0 @@ ---===========================================================================-- --- Other transliterations -- ---===========================================================================-- - -local translit = thirddata.translit -local pcache = translit.parser_cache -local lpegmatch = lpeg.match - --- The following are needed because ISO 9 does not cover old Slavonic --- characters that became obsolete before the advent of гражданский шрифт. - --- Please note that these mappings are not bijective so don't expect the result --- to be easily revertible (by machines). - --- Source p. 77 of --- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf - -if not translit.done_ocs then - ----------------------------------------------------------------------- - -- Lowercase and uppercase letter Uk -- “scientific transliteration” -- - ----------------------------------------------------------------------- - - translit.ocs_uk = translit.make_add_dict{ - ["oу"] = "u", - ["оу"] = "u", - ["Оу"] = "U", - } - ----------------------------------------------------------------------------- - -- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” -- - ----------------------------------------------------------------------------- - - translit.ocs_low = translit.make_add_dict{ - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["є"] = "e", - ["ж"] = "ž", - ["ꙃ"] = "ʒ", -- U+0292, alternative: dz U+01f3 - ["ѕ"] = "ʒ", - ["ꙁ"] = "z", - ["з"] = "z", - ["и"] = "i", - ["і"] = "i", - ["ї"] = "i", - ["ћ"] = "g’", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ѹ"] = "u", - ["ꙋ"] = "u", - ["ф"] = "f", - ["х"] = "x", - ["ѡ"] = "o", --"ō", - ["ѿ"] = "ot", -- U+047f - ["ѽ"] = "o!", -- U+047d - ["ꙍ"] = "o!", -- U+064D - ["ц"] = "c", - ["ч"] = "č", - ["ш"] = "š", - ["щ"] = "št", - ["ъ"] = "ъ", - ["ы"] = "y", - ["ꙑ"] = "y", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. - ["ь"] = "ь", - ["ѣ"] = "ě", - ["ю"] = "ju", - ["ꙗ"] = "ja", - ["ѥ"] = "je", - ["ѧ"] = "ę", - ["ѩ"] = "ję", - ["ѫ"] = "ǫ", - ["ѭ"] = "jǫ", - ["ѯ"] = "ks", - ["ѱ"] = "ps", - ["ѳ"] = "th", - ["ѵ"] = "ü", - } - - translit.tables["OCS \\quotation{scientific} transliteration lowercase"] = translit.ocs_low - - ----------------------------------------------------------------------------- - -- Uppercase pre-Peter cyrillic characters -- “scientific transliteration” -- - ----------------------------------------------------------------------------- - - translit.ocs_upp = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Є"] = "E", - ["Ж"] = "Ž", - ["Ꙃ"] = "Ʒ", -- U+01b7, alternative: Dz U+01f2 - ["Ѕ"] = "Ʒ", - ["Ꙁ"] = "Z", - ["З"] = "Z", - ["И"] = "I", - ["І"] = "I", - ["Ї"] = "I", - ["Ћ"] = "G’", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "u", - ["Ѹ"] = "U", - --["ꙋ"] = "U", - ["Ф"] = "F", - ["Х"] = "X", - ["Ѡ"] = "Ō", - ["Ѿ"] = "Ot", -- U+047c - ["Ѽ"] = "O!", -- U+047e - ["Ꙍ"] = "O!", -- U+064C - ["Ц"] = "C", - ["Ч"] = "Č", - ["Ш"] = "Š", - ["Щ"] = "Št", - ["Ъ"] = "Ŭ", - ["Ы"] = "Y", - ["Ꙑ"] = "Y", -- U+a650 - ["Ь"] = "Ĭ", - ["Ѣ"] = "Ě", - ["Ю"] = "Ju", - ["Ꙗ"] = "Ja", - ["Ѥ"] = "Je", - ["Ѧ"] = "Ę", - ["Ѩ"] = "Ję", - ["Ѫ"] = "Ǫ", - ["Ѭ"] = "Jǫ", - ["Ѯ"] = "Ks", - ["Ѱ"] = "Ps", - ["Ѳ"] = "Th", - ["Ѵ"] = "Ü", - } - - translit.tables["OCS \\quotation{scientific} transliteration uppercase"] = translit.ocs_upp - - -- Note on the additional tables: these cover characters that are not defined - -- in ISO 9 but have a “scientific” transliteration. You may use them as - -- complementary mapping to ISO 9, trading off homogenity for completeness. - - ---------------------------------------------------------------------------------------- - -- Lowercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- - ---------------------------------------------------------------------------------------- - - translit.ocs_add_low = translit.make_add_dict{ - ["ѕ"] = "dz", -- Mapped to ẑ in ISO 9 (Macedonian …) - ["ѯ"] = "ks", - ["ѱ"] = "ps", - ["ѡ"] = "ô", - ["ѿ"] = "ot", -- U+047f - ["ѫ"] = "ǫ", -- Mapped to ǎ in ISO 9. - ["ѧ"] = "ę", - ["ѭ"] = "jǫ", - ["ѩ"] = "ję", - ["ѥ"] = "je", - ["ѹ"] = "u", -- Digraph uk. - ["ꙋ"] = "u", -- Monograph uk, U+a64b. (No glyph yet in the "fixed" font in February 2010 …) - ["ꙑ"] = "y", -- U+a651 - } - - translit.tables["OCS \\quotation{scientific} transliteration additional lowercase"] = translit.ocs_add_low - - ---------------------------------------------------------------------------------------- - -- Uppercase additional pre-Peter cyrillic characters -- “scientific transliteration” -- - ---------------------------------------------------------------------------------------- - - translit.ocs_add_upp = translit.make_add_dict{ - ["Ѕ"] = "Dz", - ["Ѯ"] = "Ks", - ["Ѱ"] = "Ps", - ["Ѡ"] = "Ô", - ["Ѿ"] = "ot", - ["Ѫ"] = "Ǫ", - ["Ѧ"] = "Ę", - ["Ѭ"] = "Jǫ", - ["Ѩ"] = "Ję", - ["Ѥ"] = "Je", - ["Ѹ"] = "U", -- Digraph uk. - --["Ꙋ"] = "U", -- Monograph Uk, U+a64a. - ["Ꙑ"] = "Y", -- U+a650 - } - - translit.tables["OCS \\quotation{scientific} transliteration additional uppercase"] = translit.ocs_add_upp - translit.done_ocs = true -end - ---===========================================================================-- --- End Of Tables -- ---===========================================================================-- - -local function scientific (mode) - local P, Cs = lpeg.P, lpeg.Cs - local utfchar = translit.utfchar - local addrules = translit.addrules - - local cyr = translit.make_add_dict{} - local cyruk, p_cyruk, p_cyr, scientific_parser - - if mode == "iso9_ocs" or mode == "iso9_ocs_hack" then - - environment.loadluafile("trans_tables_iso9") - cyr = translit.ocs_add_low - + translit.ocs_add_upp - + translit.ocs_low - + translit.ru_upp - + translit.ru_low - + translit.ru_old_upp - + translit.ru_old_low - + translit.non_ru_upp - + translit.non_ru_low - + translit.ocs_upp - - if translit.deficient_font == "yes" then - cyr = cyr + translit.ru_jer_hack - end - - p_cyr = addrules(cyr, p_cyr) - - scientific_parser = Cs((p_cyr / cyr + utfchar)^0) - - elseif mode == ("ocs") then - - cyr = translit.ocs_low + translit.ocs_upp - - p_cyruk = addrules(translit.ocs_uk, cyruk) - p_cyr = addrules(cyr, p_cyr) - - scientific_parser = Cs((p_cyruk / translit.ocs_uk - + p_cyr / cyr - + utfchar)^0) - - elseif mode == ("ocs_gla") then - environment.loadluafile( "trans_tables_glag") - cyr = translit.ocs_gla_low + translit.ocs_gla_upp - - p_cyr = addrules(cyr, p_cyr) - scientific_parser = Cs((p_cyr / cyr + utfchar)^0) - end - - return scientific_parser -end - - -translit.methods["iso9_ocs"] = function (text) - local pname = "iso9_ocs" .. translit.deficient_font - local p = pcache[pname] - if not p then - p = scientific("iso9_ocs") - pcache[pname] = p - end - return lpegmatch(p, text) -end - -translit.methods["ocs"] = function (text) - local p = pcache["ocs"] - if not p then - p = scientific("ocs") - pcache["ocs"] = p - end - return lpegmatch(p, text) -end - -translit.methods["ocs_gla"] = function (text) - local p = pcache["ocs_gla"] - if not p then - p = scientific("ocs_gla") - pcache["ocs_gla"] = p - end - return lpegmatch(p, text) -end - --- vim:ft=lua:ts=4:sw=4 diff --git a/tex/context/third/transliterator/trans_tables_sr.lua b/tex/context/third/transliterator/trans_tables_sr.lua deleted file mode 100644 index 4f549c5..0000000 --- a/tex/context/third/transliterator/trans_tables_sr.lua +++ /dev/null @@ -1,241 +0,0 @@ - ---===========================================================================-- --- Serbian -- ---===========================================================================-- - -local translit = thirddata.translit -local pcache = translit.parser_cache -local lpegmatch = lpeg.match - - --- Special thanks to Mojca Miklavec and Arthur Reutenauer for their --- assistance in creating these transliteration routines. - -if not translit.done_serbian then - -------------------------------------------- - -- Lowercase Serbian (Cyrillic -> Latin) -- - -------------------------------------------- - translit.sr_tolt_lower = translit.make_add_dict{ - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["ђ"] = "đ", - ["е"] = "e", - ["ж"] = "ž", - ["з"] = "z", - ["и"] = "i", - ["ј"] = "j", - ["к"] = "k", - ["л"] = "l", - ["љ"] = "lj", - ["м"] = "m", - ["н"] = "n", - ["њ"] = "nj", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["ћ"] = "ć", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "h", - ["ц"] = "c", - ["ч"] = "č", - ["џ"] = "dž", - ["ш"] = "š", - } - - translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower - - -------------------------------------------- - -- Uppercase Serbian (Cyrillic -> Latin) -- - -------------------------------------------- - - translit.sr_tolt_upper = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Ђ"] = "Đ", - ["Е"] = "E", - ["Ж"] = "Ž", - ["З"] = "Z", - ["И"] = "I", - ["Ј"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["Љ"] = "Lj", - ["М"] = "M", - ["Н"] = "N", - ["Њ"] = "Nj", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["Ћ"] = "Ć", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "H", - ["Ц"] = "C", - ["Ч"] = "Č", - ["Џ"] = "Dž", - ["Ш"] = "Š", - } - - translit.tables["Serbian Cyr->Lat Transliteration uppercase"] = translit.sr_tolt_upper - - local function __inverse_tab (t) - local result = { } - for k,v in next,t do result[v] = k end - return result - end - - translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower)) - translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper)) - - - --- Good reading up front: - --- - --- - - local except = { - ["konjug"] = "конјуг", - ["konjunk"] = "конјунк", - ["injekc"] = "инјекц", - ["injunkt"] = "инјункт", - ["panjelin"] = "панјелин", - ["tanjug"] = "танјуг", - ["vanjezič"] = "ванјезич", - ["vanjadransk"] = "ванјадранск", - - ["nadžanj"] = "наджањ", - ["nadždrel"] = "надждрел", - ["nadžet"] = "наджет", - ["nadživ"] = "наджив", - ["nadžnj"] = "наджњ", - ["nadžup"] = "наджуп", - ["odžal"] = "оджал", - ["odžar"] = "оджар", - ["odživ"] = "оджив", - ["odžubor"] = "оджубор", - ["odžur"] = "оджур", - ["odžvak"] = "оджвак", - ["podžanr"] = "поджанр", - ["podže"] = "подже", -- “поджећи” - } - - local P = lpeg.P - local sub, upper = unicode.utf8.sub, unicode.utf8.upper - - local p_tocy, p_i_tocy, p_tolt, p_i_tolt - - for left, right in next, except do -- generating exception patterns for both sides - local Left = upper(sub(left, 1, 1)) .. sub(left, 2) - local Right = upper(sub(right, 1, 1)) .. sub(right, 2) - local LEFT, RIGHT = upper(left), upper(right) - - local p_i_left = P(left) / right + P(Left) / Right + P(LEFT) / RIGHT - local p_i_right = P(right) / left + P(Right) / Left + P(RIGHT) / LEFT - - local p_left = P" " * p_i_left - local p_right = P" " * p_i_right - - if not p_tocy then - p_tocy = p_left - p_i_tocy = p_i_left - p_tolt = p_right - p_i_tolt = p_i_right - else - p_tocy = p_tocy + p_left - p_i_tocy = p_i_tocy + p_i_left - p_tolt = p_tolt + p_right - p_i_tolt = p_i_tolt + p_i_right - end - end - - local _p_hintchar = P"*" / "" - local hintme = "dln" - local _p_tocy_hint, _p_tolt_hint - - for left in hintme:utfcharacters() do - local right = translit.sr_tocy_lower[left] - local LEFT, RIGHT = upper(left), upper(right) - if not _p_tocy_hint then - _p_tocy_hint = P(left) / right + P(LEFT) / RIGHT - _p_tolt_hint = P(right) / left + P(RIGHT) / LEFT - else - _p_tocy_hint = _p_tocy_hint + P(left) / right + P(LEFT) / RIGHT - _p_tolt_hint = _p_tolt_hint + P(right) / left + P(RIGHT) / LEFT - end - end - - translit.serbian_exceptions = { } - translit.serbian_exceptions.p_tocy = p_tocy - translit.serbian_exceptions.p_tolt = p_tolt - translit.serbian_exceptions.p_tocy_init = p_i_tocy - translit.serbian_exceptions.p_tolt_init = p_i_tolt - translit.serbian_exceptions.p_tocy_hint = _p_tocy_hint * _p_hintchar - translit.serbian_exceptions.p_tolt_hint = _p_tolt_hint * _p_hintchar - - translit.done_serbian = true -end - ---===========================================================================-- --- End Of Tables -- ---===========================================================================-- - - -local t = translit -local function sr (mode) - local P, R, Cs = lpeg.P, lpeg.R, lpeg.Cs - local utfchar = translit.utfchar - local modestr = "p_" .. mode:match("to..$") - local _p_sre = t.serbian_exceptions[modestr] - local _p_sre_i = t.serbian_exceptions[modestr .. "_init"] - - local trl_sr = translit.make_add_dict{} - trl_sr = t[mode.."_upper"] + t[mode.."_lower"] - - -- transliteration from latin script requires macro handling … - local _p_macro = P[[\]] * R("az", "AZ")^1 -- assuming standard catcodes - local _p_sr = translit.addrules (trl_sr, _p_sr) / trl_sr - if translit.hinting then - _p_sr = t.serbian_exceptions[modestr .. "_hint"] + _p_sr - end - - local p_sr - if translit.sr_except then - p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + _p_sr + utfchar)^0) - else - p_sr = Cs((_p_macro + _p_sr + utfchar)^0) - end - - return p_sr -end - -translit.methods["sr_tolt"] = function (text) - local pname = "sr_tolt" .. tostring(translit.hinting) .. tostring(translit.sr_except) - local p = pcache[pname] - if not p then - p = sr("sr_tolt") - pcache[pname] = p - end - return lpegmatch(p, text) -end - -translit.methods["sr_tocy"] = function (text) - local pname = "sr_tocy" .. tostring(translit.hinting) .. tostring(translit.sr_except) - local p = pcache[pname] - if not p then - p = sr("sr_tocy") - pcache[pname] = p - end - return lpegmatch(p, text) -end - --- vim:ft=lua:sw=4:ts=4 diff --git a/tex/context/third/transliterator/trans_tables_trsc.lua b/tex/context/third/transliterator/trans_tables_trsc.lua deleted file mode 100644 index fbc35d1..0000000 --- a/tex/context/third/transliterator/trans_tables_trsc.lua +++ /dev/null @@ -1,867 +0,0 @@ ---===================================================================-- --- Legacy national transliterations -- ---===================================================================-- - -local translit = thirddata.translit -local addrules = translit.addrules -local utfchar = translit.utfchar - -local lpegmatch = lpeg.match -local tablepack = table.pack -- lua 5.2 precaution - ---------------------------------- --- German simple transcription -- ---------------------------------- --- Reference: „DUDEN. Rechtschreibung der deutschen Sprache“; --- 20. Aufl., --- Mannheim et. al. 1991. - -if lpeg.version() == "0.9" and not translit.done_ru_trsc_de then - - -------------------------------------------------------- - -- Lowercase German simple transcription---first pass -- - -------------------------------------------------------- - - translit.ru_trsc_low_first = translit.make_add_dict{ - [" е"] = " je", - ["ъе"] = "je", - ["ье"] = "je", - [" ё"] = " jo", - ["ъё"] = "jo", - ["ьё"] = "jo", - ["жё"] = "scho", - ["чё"] = "tscho", - ["шё"] = "scho", - ["щё"] = "schtscho", - ["ье"] = "je", - ["ьи"] = "ji", - ["ьо"] = "jo", - ["ий"] = "i", - ["ый"] = "y", - ["кс"] = "x" - } - - translit.tables["German transcription first pass lowercase"] - = translit.ru_trsc_low_first - - -------------------------------------------------------- - -- Uppercase German simple transcription---first pass -- - -------------------------------------------------------- - - translit.ru_trsc_upp_first = translit.make_add_dict{ - [" Е"] = " Je", - ["Ъe"] = "Je", -- Pedantic, isn't it? - ["Ье"] = "Je", - [" Ё"] = "Jo", - ["Ъё"] = "Jo", - ["Ьё"] = "Jo", - ["Жё"] = "Scho", - ["Чё"] = "Tscho", - ["Шё"] = "Scho", - ["Щё"] = "Schtscho", - ["Кс"] = "ks" - } - - translit.tables["German transcription first pass uppercase"] - = translit.ru_trsc_upp_first - - ------------------------------------------- - -- Lowercase German simple transcription -- - ------------------------------------------- - - translit.ru_trsc_low = translit.make_add_dict{ - ["а"] = "a", - ["б"] = "b", - ["в"] = "w", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ё"] = "jo", - ["ж"] = "sch", - ["з"] = "s", - ["и"] = "i", - ["й"] = "i", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "ch", - ["ц"] = "z", - ["ч"] = "tsch", - ["ш"] = "sch", - ["щ"] = "schtsch", - ["ъ"] = "", - ["ы"] = "y", - ["ь"] = "", - ["э"] = "e", - ["ю"] = "ju", - ["я"] = "ja" - } - - translit.tables["German transcription second pass lowercase"] - = translit.ru_trsc_low - - ------------------------------------------- - -- Uppercase German simple transcription -- - ------------------------------------------- - - translit.ru_trsc_upp = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "W", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "Jo", - ["Ж"] = "Sch", - ["З"] = "S", - ["И"] = "I", - ["Й"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Ch", - ["Ц"] = "Z", - ["Ч"] = "Tsch", - ["Ш"] = "Sch", - ["Щ"] = "Schtsch", - ["Ъ"] = "", - ["Ы"] = "Y", - ["Ь"] = "", - ["Э"] = "E", - ["Ю"] = "Ju", - ["Я"] = "Ja" - } - - translit.tables["German transcription second pass uppercase"] - = translit.ru_trsc_upp - - translit.ru_trsc_iy = {"и", "ы", "И", "Ы"} - - function translit.gen_rules_de() - -- The following are more interesting than the previous tables - -- because they implement various rules. For instance the - -- table \type{translit.ru_trsc_irule} holds a substitution - -- dictionary for all possible combinations (including nonsense - -- galore) of a vowel preceding an “й” (Russian short i) - -- preceding a consonant; here we access the sets of Russian - -- vowels as well consonants that were defined earlier. - - -- The й-rule, VйC -> ViC - translit.ru_trsc_irule = translit.make_add_dict{} - for _, vow in ipairs(translit.ru_vowels) do - for _, cons in ipairs(translit.ru_consonants) do - local new_ante = vow .. "й" .. cons - local new_post = vow .. "i" .. cons - translit.ru_trsc_irule[new_ante] = new_post - end - end - - translit.tables["German transcription i-rule"] - = translit.ru_trsc_irule - - -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC - translit.ru_trsc_jrule = {} - for _, vow in ipairs(translit.ru_vowels) do - local new_ante = "й" .. vow - local new_post = "j" .. vow - translit.ru_trsc_jrule[new_ante] = new_post - end - - for _, cons in ipairs(translit.ru_consonants) do - for _, iy in ipairs(translit.ru_trsc_iy) do - local new_ante = iy .. "й" .. cons - local new_post = iy .. "j" .. cons - translit.ru_trsc_jrule[new_ante] = new_post - end - end - - translit.tables["German transcription j-rule"] - = translit.ru_trsc_jrule - - -- The с-rule, VсV -> VssV - translit.ru_trsc_srule = translit.make_add_dict{} - for i, vow_1 in ipairs(translit.ru_vowels) do - for j, vow_2 in ipairs(translit.ru_vowels) do - local new_ante = vow_1 .. "с" .. vow_2 - local new_post = vow_1 .. "ss" .. vow_2 - translit.ru_trsc_srule[new_ante] = new_post - end - end - - translit.tables["German transcription s-rule"] - = translit.ru_trsc_srule - - -- The sharp-s-rule, Vсх -> Vßх - translit.ru_trsc_sharpsrule = translit.make_add_dict{} - for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "сх" - local new_post = vow .. "ßх" - translit.ru_trsc_sharpsrule[new_ante] = new_post - end - - translit.tables["German transcription sharp-s-rule"] - = translit.ru_trsc_sharpsrule - - -- The е-rule, Vе -> Vje - translit.ru_trsc_jerule = translit.make_add_dict{} - for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "е" - local new_post = vow .. "je" - translit.ru_trsc_jerule[new_ante] = new_post - end - - translit.tables["German transcription je-rule"] - = translit.ru_trsc_jerule - - -- The ё-rule, Vё -> Vjo - -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . - -- Somebody should teach those DUDEN-guys parsimony. - translit.ru_trsc_jorule = translit.make_add_dict{} - for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "ё" - local new_post = vow .. "jo" - translit.ru_trsc_jorule[new_ante] = new_post - end - - translit.tables["German transcription (redundant) jo-rule"] - = translit.ru_trsc_jorule - - end - - translit.gen_rules_de() - translit.done_ru_trsc_de = true -end - -if lpeg.version() == "0.10" and not translit.done_ru_trsc_de then - - -- This is about *eight* times as fast as the old pattern. Just - -- waiting for v0.10 to make it into luatex. - - local de_tables = { } - - -------------------------------------------------------- - -- Lowercase German simple transcription---first pass -- - -------------------------------------------------------- - - de_tables[1] = { -- lowercase initial - [" е"] = " je", ["ъе"] = "je", ["ье"] = "je", - [" ё"] = " jo", ["ъё"] = "jo", ["ьё"] = "jo", - ["жё"] = "scho", ["цё"] = "scho", ["чё"] = "zo", - ["шё"] = "scho", ["щё"] = "schtscho", ["ье"] = "je", - ["ьи"] = "ji", ["ьо"] = "jo", ["ий"] = "i", - ["ый"] = "y", ["кс"] = "x" -- Extraordinarily stupid one. - } - translit.tables["German transcription first pass lowercase"] - = de_tables[1] - - -------------------------------------------------------- - -- Uppercase German simple transcription---first pass -- - -------------------------------------------------------- - - de_tables[2] = { -- uppercase initial - [" Е"] = " Je", ["Ъe"] = "Je", ["Ье"] = "Je", - [" Ё"] = "Jo", ["Ъё"] = "Jo", ["Ьё"] = "Jo", - ["Жё"] = "Scho", ["Чё"] = "Tscho", ["Шё"] = "Scho", - ["Щё"] = "Schtscho", ["Кс"] = "ks" - } - translit.tables["German transcription first pass uppercase"] - = de_tables[2] - - ------------------------------------------- - -- Lowercase German simple transcription -- - ------------------------------------------- - - de_tables[3] = { -- lowercase - ["а"] = "a", ["б"] = "b", ["в"] = "w", ["г"] = "g", - ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "sch", - ["з"] = "s", ["и"] = "i", ["й"] = "i", ["к"] = "k", - ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", - ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", - ["у"] = "u", ["ф"] = "f", ["х"] = "ch", ["ц"] = "z", - ["ч"] = "tsch", ["ш"] = "sch", ["щ"] = "schtsch", - ["ъ"] = "", ["ы"] = "y", ["ь"] = "", ["э"] = "e", - ["ю"] = "ju", ["я"] = "ja" - } - translit.tables["German transcription second pass lowercase"] - = de_tables[3] - - ------------------------------------------- - -- Uppercase German simple transcription -- - ------------------------------------------- - - de_tables[4] = { -- uppercase - ["А"] = "A", ["Б"] = "B", ["В"] = "W", ["Г"] = "G", - ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Sch", - ["З"] = "S", ["И"] = "I", ["Й"] = "J", ["К"] = "K", - ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", - ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", - ["У"] = "U", ["Ф"] = "F", ["Х"] = "Ch", ["Ц"] = "Z", - ["Ч"] = "Tsch", ["Ш"] = "Sch", ["Щ"] = "Schtsch",["Ъ"] = "", - ["Ы"] = "Y", ["Ь"] = "", ["Э"] = "E", ["Ю"] = "Ju", - ["Я"] = "Ja" - } - translit.tables["German transcription second pass uppercase"] - = de_tables[4] - - local B, P, Cs = lpeg.B, lpeg.P, lpeg.Cs - - -- All chars are 2-byte. - local Co = P{ - P"б" + "в" + "г" + "д" + "ж" + "з" + "к" + "л" + "м" + "н" + - "п" + "р" + "с" + "т" + "ф" + "х" + "ц" + "ч" + "ш" + "щ" + - "ъ" + "ь" + - "Б" + "В" + "Г" + "Д" + "Ж" + "З" + "К" + "Л" + "М" + "Н" + - "П" + "Р" + "С" + "Т" + "Ф" + "Х" + "Ц" + "Ч" + "Ш" + "Щ" + - "Ъ" + "Ь" - } - - local Vo = P{ - P"а" + "е" + "ё" + "и" + "й" + "о" + "у" + "ы" + "э" + "я" + - "ю" + "А" + "Е" + "Ё" + "И" + "Й" + "О" + "У" + "Ы" + "Э" + - "Я" + "Ю" - } - - local iy = P"и" + P"ы" + P"И" + P"Ы" - - ------------------------------------------- - -- Pattern generation. - ------------------------------------------- - - local p_transcript - - for _, set in next, de_tables do - for str, rep in next, set do - if not p_transcript then -- it’ll be empty initially - p_transcript = P(str) / rep - else - p_transcript = p_transcript + (P(str) / rep) - end - end - end - - local irule = B(Vo,2) * Cs(P"й") * #Co / "i" - local iyrule = B(iy,2) * Cs(P"й") * #Co / "j" - local jrule = Cs(P"й") * #Vo / "j" - local srule = B(Vo,2) * Cs(P"с") * #Vo / "ss" - local ssrule = B(Vo,2) * Cs(P"с") * #P"х" / "ß" - local jerule = B(Vo,2) * Cs(P"е") / "je" - local jorule = B(Vo,2) * Cs(P"ё") / "jo" - - translit.future_ru_transcript_de - = Cs((iyrule + jrule + irule - + jerule + srule + ssrule - + jorule + p_transcript + 1)^0 - ) -end - -if not translit.done_ru_trsc_en then - - --------------------------------------------------------- - -- Lowercase English simple transcription---first pass -- - --------------------------------------------------------- - - translit.ru_trsc_en_low_first = translit.make_add_dict{ - [" е"] = " ye", - ["ъе"] = "ye", - ["ье"] = "ye", - ["ье"] = "ye", - ["ьи"] = "yi", - } - - translit.tables["English transcription lowercase first pass"] - = translit.ru_trsc_en_low_first - - --------------------------------------------------------- - -- Uppercase English simple transcription---first pass -- - --------------------------------------------------------- - - translit.ru_trsc_en_upp_first = translit.make_add_dict{ - [" Е"] = " Ye", - ["Ъe"] = "Ye", - ["Ье"] = "Ye", - } - - translit.tables["English transcription uppercase first pass"] - = translit.ru_trsc_en_upp_first - - -------------------------------------------- - -- Lowercase English simple transcription -- - -------------------------------------------- - - translit.ru_trsc_en_low = translit.make_add_dict{ - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ё"] = "e", - ["ж"] = "zh", - ["з"] = "z", - ["и"] = "i", - ["й"] = "y", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "kh", - ["ц"] = "ts", - ["ч"] = "ch", - ["ш"] = "sh", - ["щ"] = "shsh", - ["ъ"] = "", - ["ы"] = "y", - ["ь"] = "", - ["э"] = "e", - ["ю"] = "yu", - ["я"] = "ya" - } - - translit.tables["English transcription lowercase second pass"] - = translit.ru_trsc_en_low - - -------------------------------------------- - -- Uppercase English simple transcription -- - -------------------------------------------- - - translit.ru_trsc_en_upp = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "E", - ["Ж"] = "Zh", - ["З"] = "Z", - ["И"] = "I", - ["Й"] = "Y", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Kh", - ["Ц"] = "Ts", - ["Ч"] = "Ch", - ["Ш"] = "Sh", - ["Щ"] = "Shsh", - ["Ъ"] = "", - ["Ы"] = "Y", - ["Ь"] = "", - ["Э"] = "E", - ["Ю"] = "Yu", - ["Я"] = "Ya" - } - - translit.tables["English transcription uppercase second pass"] - = translit.ru_trsc_en_upp - - function translit.gen_rules_en () - -- The english е-rule, Vе -> Vye - translit.ru_trsc_en_jerule = translit.make_add_dict{} - for i, vow in ipairs(translit.ru_vowels) do - local new_ante = vow .. "е" - local new_post = vow .. "ye" - translit.ru_trsc_en_jerule[new_ante] = new_post - end - - translit.tables["English transcription ye-rule"] - = translit.ru_trsc_en_jerule - end - - translit.gen_rules_en() - translit.done_ru_trsc_en = true -end - - -if not translit.done_ru_trsc_cz then - ----------------------------------- - -- Lowercase Czech transcription -- - ----------------------------------- - - translit.ru_trsc_cz_low = translit.make_add_dict{ - ["а"] = "a", - ["б"] = "b", - ["в"] = "v", - ["г"] = "g", - ["д"] = "d", - ["е"] = "e", - ["ё"] = "ë", - ["ж"] = "ž", - ["з"] = "z", - ["и"] = "i", - ["й"] = "j", - ["к"] = "k", - ["л"] = "l", - ["м"] = "m", - ["н"] = "n", - ["о"] = "o", - ["п"] = "p", - ["р"] = "r", - ["с"] = "s", - ["т"] = "t", - ["у"] = "u", - ["ф"] = "f", - ["х"] = "ch", - ["ц"] = "c", - ["ч"] = "č", - ["ш"] = "š", - ["щ"] = "šč", - ["ъ"] = "ъ", - ["ы"] = "y", - ["ь"] = "ь", - ["э"] = "è", - ["ю"] = "ju", -- Maybe we should do things like ню -> ňu and - ["я"] = "ja", -- тя -> ťa, but that would complicate things a - } -- bit and linguists might not agree. - - translit.tables["Czech transcription lowercase"] - = translit.ru_trsc_cz_low - - ----------------------------------- - -- Uppercase Czech transcription -- - ----------------------------------- - - translit.ru_trsc_cz_upp = translit.make_add_dict{ - ["А"] = "A", - ["Б"] = "B", - ["В"] = "V", - ["Г"] = "G", - ["Д"] = "D", - ["Е"] = "E", - ["Ё"] = "Ë", - ["Ж"] = "Ž", - ["З"] = "Z", - ["И"] = "I", - ["Й"] = "J", - ["К"] = "K", - ["Л"] = "L", - ["М"] = "M", - ["Н"] = "N", - ["О"] = "O", - ["П"] = "P", - ["Р"] = "R", - ["С"] = "S", - ["Т"] = "T", - ["У"] = "U", - ["Ф"] = "F", - ["Х"] = "Ch", - ["Ц"] = "C", - ["Ч"] = "Č", - ["Ш"] = "Š", - ["Щ"] = "Šč", - ["Ъ"] = "Ъ", - ["Ы"] = "Y", - ["Ь"] = "Ь", - ["Э"] = "È", - ["Ю"] = "Ju", - ["Я"] = "Ja" - } - - translit.tables["Czech transcription uppercase"] - = translit.ru_trsc_cz_upp - - ---------------------------------------------- - -- Lowercase Additional Czech Transcription -- - ---------------------------------------------- - - translit.ru_trsc_cz_add_low = translit.make_add_dict{ - ["ѕ"] = "dz", - ["з"] = "z", - ["ꙁ"] = "z", - ["і"] = "ï", - ["ѹ"] = "u", - ["ѡ"] = "ō", - ["ѣ"] = "ě", - ["ѥ"] = "je", - ["ѧ"] = "ę", - ["ѩ"] = "ję", - ["ѫ"] = "ǫ", - ["ѭ"] = "jǫ", - ["ѯ"] = "ks", - ["ѱ"] = "ps", - ["ѳ"] = "th", - ["ѵ"] = "ÿ", - } - - translit.tables[ - "Czech transcription for OCS and pre-1918 lowercase"] - = translit.ru_trsc_cz_add_low - - - ---------------------------------------------- - -- Uppercase Additional Czech Transcription -- - ---------------------------------------------- - - translit.ru_trsc_cz_add_upp = translit.make_add_dict{ - ["Ѕ"] = "Dz", - ["З"] = "Z", - ["Ꙁ"] = "Z", - ["І"] = "Ï", - ["Ѹ"] = "U", - ["Ѡ"] = "Ō", - ["Ѣ"] = "Ě", - ["Ѥ"] = "Je", - ["Ѧ"] = "Ę", - ["Ѩ"] = "Ję", - ["Ѫ"] = "Ǫ", - ["Ѭ"] = "Jǫ", - ["Ѯ"] = "Ks", - ["Ѱ"] = "Ps", - ["Ѳ"] = "Th", - ["Ѵ"] = "Ÿ", - } - - translit.tables[ - "Czech transcription for OCS and pre-1918 uppercase"] - = translit.ru_trsc_cz_add_upp - translit.done_ru_trsc_cz = true -end - ---===================================================================-- --- End Of Tables -- ---===================================================================-- - -local function transcript (mode, text) - local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs - - local trsc_parser, p_rules, capt, p_de - - local function tab_subst (s, ...) - local sets = { ... } - local p_tmp, tmp = nil, translit.make_add_dict{} - for n=1, #sets do - local set = sets[n] - tmp = tmp + set - end - p_tmp = addrules(tmp, p_tmp) - local fp = Cs((Cs(P(p_tmp) / tmp) + utfchar)^0) - return lpegmatch(fp, s) - end - - if mode == "ru_transcript_en" then - - text = tab_subst(text, translit.ru_trsc_en_jerule) - text = tab_subst(text, - translit.ru_trsc_en_low_first, - translit.ru_trsc_en_upp_first) - text = tab_subst(text, - translit.ru_trsc_en_low, - translit.ru_trsc_en_upp) - - return text - - elseif mode == "ru_transcript_en_exp" then - - local en_low_upp = translit.make_add_dict{} - en_low_upp = translit.ru_trsc_en_low + translit.ru_trsc_en_upp - - local twochar - local tworepl = translit.make_add_dict{} - - twochar = addrules( translit.ru_trsc_en_low_first, twochar) - twochar = addrules( translit.ru_trsc_en_upp_first, twochar) - - tworepl = translit.ru_trsc_en_low_first - + translit.ru_trsc_en_upp_first - - -- The е-rule, Vе -> Vye - local function V_je (s) - local ante = utf.sub(s, 1, 1) - return en_low_upp[ante] .. "ye" - end - - local jerule = Cs((vow * "е") / V_je) - - local dvoje = Cs(twochar / tworepl) - local other = Cs((utfchar) / en_low_upp) - - local g = Cs((dvoje + jerule + other + utfchar)^0) - - text = g:match(text) - - return text - - elseif mode == "ru_cz" or mode == "ocs_cz" then - text = tab_subst(text, - translit.ru_trsc_cz_low, - translit.ru_trsc_cz_upp) - if mode == "ocs_cz" then - text = tab_subst(text, - translit.ru_trsc_cz_add_low, - translit.ru_trsc_cz_add_upp) - end - return text - end - - if mode == "ru_transcript_de_exp" then - - local vow, con, iy - vow = addrules(translit.ru_vowels, vow) - con = addrules(translit.ru_consonants, con) - iy = addrules(translit.ru_trsc_iy, iy ) - - local de_low_upp = translit.make_add_dict{} - de_low_upp = translit.ru_trsc_upp + translit.ru_trsc_low - - local twochar - local tworepl = translit.make_add_dict{} - - twochar = addrules( translit.ru_trsc_low_first, twochar ) - twochar = addrules( translit.ru_trsc_upp_first, twochar ) - - tworepl = translit.ru_trsc_low_first - + translit.ru_trsc_upp_first - - -- The й-rule, VйC -> ViC - local function V_i_C (s) - local ante = utf.sub(s, 1, 1) - local post = utf.sub(s, 3, 3) - return de_low_upp[ante] .. "i" .. de_low_upp[post] - end - - -- The second й-rule, йV -> jV && [иы]йC -> [иы]jC - local function iy_j_C (s) - local ante = utf.sub(s, 1, 1) - local post = utf.sub(s, 3, 3) - return de_low_upp[ante] .. "j" .. de_low_upp[post] - end - - local function j_V (s) - local post = utf.sub(s, 2, 2) - return "j" .. de_low_upp[post] - end - - -- The с-rule, VсV -> VssV - local function V_ss_V (s) - local ante = utf.sub(s, 1, 1) - local post = utf.sub(s, 3, 3) - return de_low_upp[ante] .. "ss" .. de_low_upp[post] - end - - -- The sharp-s-rule, Vсх -> Vßх - local function V_sz_ch (s) - local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "ßch" - end - - -- The е-rule, Vе -> Vje - local function V_je (s) - local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "je" - end - - -- Reapplying V_je on its result + next char would make the - -- following two rules obsolete. - local function V_jeje (s) - local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "jeje" - end - - local function V___je (s) - local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "jeje" - end - - -- The ё-rule, Vё -> Vjo - -- This should be redundant as [жцчшщ]ё -> o, else ё -> jo . - -- Somebody should teach those DUDEN guys parsimony. - local function V_jo (s) - local ante = utf.sub(s, 1, 1) - return de_low_upp[ante] .. "jo" - end - - local iyrule = Cs((iy * "й" * con) / iy_j_C) - local jrule = Cs(("й" * vow) / j_V) - local irule = Cs((vow * "й" * con) / V_i_C) - - local ssrule = Cs((vow * "с" * vow) / V_ss_V) - local szrule = Cs((vow * "сх") / V_sz_ch) - - --local _jrule = Cs((vow * "ее") / V___je) - local jjrule = Cs((vow * "ее") / V_jeje) - local jerule = Cs((vow * "е") / V_je) - local jorule = Cs((vow * "ё") / V_jo) - - local dvoje = Cs(twochar / tworepl) - local other = Cs((utfchar) / de_low_upp) - - local izhe = iyrule + jrule + irule - local slovo = ssrule + szrule - local jest = jjrule + jerule + jorule - - local g = Cs((izhe + slovo + jest + dvoje + other + utfchar)^0) - - text = g:match(text) - return text - - elseif mode == "ru_transcript_de" then - - if lpeg.version() == "0.9" then - - text = tab_subst(text, translit.ru_trsc_jrule) - text = tab_subst(text, translit.ru_trsc_irule) - text = tab_subst(text, translit.ru_trsc_jerule) - text = tab_subst(text, translit.ru_trsc_srule) - text = tab_subst(text, translit.ru_trsc_sharpsrule) - text = tab_subst(text, translit.ru_trsc_jorule) - text = tab_subst(text, - translit.ru_trsc_upp_first, - translit.ru_trsc_low_first) - text = tab_subst(text, - translit.ru_trsc_upp, - translit.ru_trsc_low) - - return text - elseif lpeg.version() == "0.10" then - return translit.future_ru_transcript_de:match(text) - end - - end - -end - -translit.methods ["ru_transcript_de"] - = function (text) return transcript("ru_transcript_de" , text) end -translit.methods ["ru_transcript_de_exp"] - = function (text) return transcript("ru_transcript_de_exp", text) end -translit.methods ["ru_transcript_en"] - = function (text) return transcript("ru_transcript_en" , text) end -translit.methods ["ru_transcript_en_exp"] - = function (text) return transcript("ru_transcript_en_exp", text) end -translit.methods ["ru_cz"] - = function (text) return transcript("ru_cz" , text) end -translit.methods ["ocs_cz"] - = function (text) return transcript("ocs_cz" , text) end - --- vim:sw=4:ts=4:expandtab:ft=lua diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua deleted file mode 100644 index c101ec5..0000000 --- a/tex/context/third/transliterator/transliterator.lua +++ /dev/null @@ -1,283 +0,0 @@ -#!/usr/bin/env texlua --------------------------------------------------------------------------------- --- FILE: transliterator.lua --- USAGE: to be called by t-transliterator.mkiv --- DESCRIPTION: basic lua environment for the Transliterator module --- REQUIREMENTS: latest ConTeXt MkIV --- AUTHOR: Philipp Gesang (Phg), --- CREATED: 2010-12-23 22:12:31+0100 --------------------------------------------------------------------------------- --- - -thirddata = thirddata or { } -thirddata.translit = thirddata.translit or { } -local translit = thirddata.translit -translit.tables = translit.tables or { } -translit.methods = translit.methods or { } -translit.deficient_font = "no" -translit.parser_cache = { } - -local utf8 = unicode and unicode.utf8 or utf8 -local utf8byte = utf8.byte -local utf8len = utf8.len - --------------------------------------------------------------------------------- --- Predefining vowel lists --------------------------------------------------------------------------------- --- If you haven't heard of cyrillic scripts until now you might want to read --- at least the first 15 pages of --- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf --- before you continue reading this file. -translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", - "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} -translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", - "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", - "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н", - "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} - --- Substitution tables are the very heart of the Transliterator. Due to the --- nature of languages and scripts exhaustive substitution is the simplest --- method for transliterations and transcriptions unless they are one-to-one --- mappings like those defined in ISO~9. --- --- To achieve better reusability we split the tables into segments, the most --- obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase --- and uppercase characters. Another set is constituted by e.~g. the --- \type{ru_old*} tables that allow adding transcription of historical --- characters if needed; by the way those are included in the default --- transliteration mode \type{ru_old}. - --- Tables can be found in separate Lua files. --- See {\tt --- trans_tables_glag.lua --- trans_tables_gr.lua --- trans_tables_iso9.lua --- trans_tables_scntfc.lua --- and --- trans_tables_trsc.lua.} - --------------------------------------------------------------------------------- --- Metatables allow for lazy concatenation. --------------------------------------------------------------------------------- - -do - -- This returns the Union of both key sets for the “+” operator. - -- The values of the first table will be updated (read: overridden) by - -- those given in the second. - local Dict_add = { - __add = function (dict_a, dict_b) - assert (type(dict_a) == "table" and type(dict_b) == "table") - local dict_result = setmetatable({}, Dict_add) - - for key, val in pairs(dict_a) do - dict_result[key] = val - end - - for key, val in pairs(dict_b) do - dict_result[key] = val - end - return dict_result - end - } - - translit.make_add_dict = function (dict) - return setmetatable(dict, Dict_add) - end -end - --------------------------------------------------------------------------------- --- Auxiliary Functions --------------------------------------------------------------------------------- - --- Generate a rule pattern from hash table. -do - local P, R, V = lpeg.P, lpeg.R, lpeg.V - - -- multi-char rules first - translit.addrules = function (dict, rules) - local by_length, occurring_lengths = { }, { } - for chr, _ in next, dict do - local l = utf8len(chr) - if not by_length[l] then - by_length[l] = { } - occurring_lengths[#occurring_lengths+1] = l - end - by_length[l][#by_length[l]+1] = chr - end - table.sort(occurring_lengths) - for i=#occurring_lengths, 1, -1 do - local l = occurring_lengths[i] - for _, chr in next, by_length[l] do - rules = rules and rules + P(chr) or P(chr) - end - end - return rules - end - --- Modified version of Hans’s utf pattern (l-lpeg.lua). - - - translit.utfchar = P{ - V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", - - utf8next = R("\128\191"), - utf8one = R("\000\127"), - utf8two = R("\194\223") * V"utf8next", - utf8three = R("\224\239") * V"utf8next" * V"utf8next", - utf8four = R("\240\244") * V"utf8next" * V"utf8next" * V"utf8next", - } -end - --- We might want to have all the table data nicely formatted by \CONTEXT\ --- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a --- single table \type{t}, builds a Natural TABLE out of its content and --- hands it down to the machine for typesetting. For debugging purposes it --- does not only print the replacement pairs but shows their code points as --- well. - --- handle the input chars and replacement values -local strempty = function (s) - if s == "" then return "nil" - else - -- add the unicode positions of the replacements (can be more - -- than one with composed diacritics - local i = 1 - local r = "" - repeat - r = r .. utf8byte(s,i) .. " " - i = i + 1 - until utf8byte(s,i) == nil - return r - end -end - -function translit.show_tab (tab) - -- Output a transliteration table, nicely formatted with natural tables. - -- Lots of calls to context() but as it’s only a goodie this doesn’t - -- really matter. - local cnt = 0 - context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"}) - context.setupTABLE({"c"}, {"each"}, {frame="off"}) - context.setupTABLE({"r"}, {"each"}, {frame="off"}) - context.setupTABLE({"c"}, {"first"}, {style="italic"}) - context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"}) - context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"}) - context.bTABLE({split="yes", option="stretch"}) - context.bTABLEhead() - context.bTR() - context.bTH() context("number") context.eTH() - context.bTH() context("letters") context.eTH() - context.bTH() context("n") context.eTH() - context.bTH() context("replacement") context.eTH() - context.bTH() context("n") context.eTH() - context.bTH() context("bytes") context.eTH() - context.bTH() context("repl. bytes") context.eTH() - context.eTR() - context.eTABLEhead() - context.bTABLEbody() - - for key, val in next,tab do - cnt = cnt + 1 - context.bTR() - context.bTC() context(cnt) context.eTC() - context.bTC() context(key) context.eTC() - context.bTC() context(string.len(key)) context.eTC() - context.bTC() context(val) context.eTC() - context.bTC() context(string.len(val)) context.eTC() - context.bTC() context(strempty(key)) context.eTC() - context.bTC() context(strempty(val)) context.eTC() - context.eTR() - end - - context.eTABLEbody() - context.bTABLEfoot() context.bTR() - context.bTC() context("number") context.eTC() - context.bTC() context("letters") context.eTC() - context.bTC() context("n") context.eTC() - context.bTC() context("replacement") context.eTC() - context.bTC() context("n") context.eTC() - context.bTC() context("bytes") context.eTC() - context.bTC() context("repl. bytes") context.eTC() - context.eTR() - context.eTABLEfoot() - context.eTABLE() -end - --- Having to pick out single tables for printing can be tedious, therefore we --- let Lua do the job in our stead. \type{translit.show_all_tabs()} calls --- \type{translit.show_tab} on every table that is registered with --- \type{translit.table} -- and uses its registered key as table heading. - -function translit.show_all_tabs () - environment.loadluafile ("trans_tables_iso9") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_scntfc") - environment.loadluafile ("trans_tables_sr") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_glag") - environment.loadluafile ("trans_tables_gr") - translit.gen_rules_en() - translit.gen_rules_de() - -- Output all translation tables that are registered within translit.tables. - -- This will be quite unordered. - context.chapter("Transliterator Showing All Tables") - for key, val in pairs(translit.tables) do - context.section(key) - translit.show_tab (val) - end -end - --- for internal use only - -translit.debug_count = 0 - -function translit.debug_next () - translit.debug_count = translit.debug_count + 1 - context("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}") -end - --------------------------------------------------------------------------------- --- User-level Function --------------------------------------------------------------------------------- - --- \type{translit.transliterate(m, t)} constitutes the --- metafunction that is called by the \type{\transliterate} command. --- It loads the transliteration tables according to \type{method} and calls the --- corresponding function. - --- Those supposedly are the most frequently used so it won’t hurt to preload --- them. The rest will be loaded on request. -environment.loadluafile ("trans_tables_iso9") - -function translit.transliterate (method, text) - local methods = translit.methods - if not methods[method] then -- register tables and method - if method == "ru_transcript_de" or - method == "ru_transcript_de_exp" or -- experimental lpeg - method == "ru_transcript_en" or - method == "ru_transcript_en_exp" or - method == "ru_cz" or - method == "ocs_cz" then - environment.loadluafile ("trans_tables_trsc") - elseif method == "iso9_ocs" or - method == "iso9_ocs_hack" or - method == "ocs" or - method == "ocs_gla" then - environment.loadluafile ("trans_tables_scntfc") - elseif method:match("^sr_") then - environment.loadluafile ("trans_tables_sr") - elseif method:match("^bg_") then -- only bg_de for now - environment.loadluafile ("trans_tables_bg") - elseif method == "gr" or - method == "gr_n" then - environment.loadluafile ("trans_tables_gr") - end - end - - if translit.__script then - return methods[method](text) - end - context ( methods[method](text) ) -end - --- vim:sw=4:ts=4:expandtab:ft=lua -- cgit v1.2.3