diff options
| author | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-03-02 14:06:04 +0100 | 
|---|---|---|
| committer | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2010-03-02 14:06:04 +0100 | 
| commit | f0765800a77932422dcdb02766a06ddd6c7541d1 (patch) | |
| tree | d7b6b908284d6d65bc276eb8370c6e45031c6eaa | |
| parent | fea0bcccf81692ab4e7bcb2a2e60793a2247daff (diff) | |
| download | transliterator-f0765800a77932422dcdb02766a06ddd6c7541d1.tar.gz | |
Continued documentation
| -rw-r--r-- | .hgignore | 2 | ||||
| l--------- | doc/context/third/transliterator/t-transliterator.mkiv | 1 | ||||
| l--------- | doc/context/third/transliterator/t-transliterator.xml | 1 | ||||
| -rw-r--r-- | doc/context/third/transliterator/transliterator.tex | 557 | ||||
| -rw-r--r-- | tex/context/third/transliterator/t-transliterator.mkiv | 155 | 
5 files changed, 631 insertions, 85 deletions
@@ -33,3 +33,5 @@ syntax:glob  *.top  *.tuc  *.swo +*.tuo +*.ted diff --git a/doc/context/third/transliterator/t-transliterator.mkiv b/doc/context/third/transliterator/t-transliterator.mkiv new file mode 120000 index 0000000..fe8c6e5 --- /dev/null +++ b/doc/context/third/transliterator/t-transliterator.mkiv @@ -0,0 +1 @@ +../../../../tex/context/third/transliterator/t-transliterator.mkiv
\ No newline at end of file diff --git a/doc/context/third/transliterator/t-transliterator.xml b/doc/context/third/transliterator/t-transliterator.xml new file mode 120000 index 0000000..476ff63 --- /dev/null +++ b/doc/context/third/transliterator/t-transliterator.xml @@ -0,0 +1 @@ +../../../../tex/context/interface/third/t-transliterator.xml
\ No newline at end of file diff --git a/doc/context/third/transliterator/transliterator.tex b/doc/context/third/transliterator/transliterator.tex index 47b1dc3..1a04a5f 100644 --- a/doc/context/third/transliterator/transliterator.tex +++ b/doc/context/third/transliterator/transliterator.tex @@ -1,12 +1,61 @@ +\definepapersize [MYA5] [width=148mm,height=210mm] +\setuppapersize  [MYA5] [MYA5] +  \setupinteraction [state=start]  \setupcombinedlist[content][interaction=all,focus=standard] + +\setupindenting[yes,next,medium] + +\setuppagenumbering[location=left] + +\setuphead[chapter][style={\rm\bf\tfb},before={\blank[big,force]},after={\blank[2*big,force]}] +\setuphead[section][style={\rm\bf\tfa},before={\blank[big]},after={\blank[small]}] +\setuphead[subsection][style={\rm\bf},before={\blank[medium]},after={\blank[small]}] + +\usemodule[bib] +\newbibfield[nugut]  \usemodule[transliterator] +\definebodyfontenvironment[default][em=italic] + +\starttypescript [serif] [bukyvede] +  \setups [font:fallback:serif] +  \definefontsynonym [Serif]        [name:Bukyvede]         [features=default] +  \definefontsynonym [SerifItalic]  [name:Bukyvede-Italic]  [features=default] +\stoptypescript +\usetypescript [bukyvede] +\definetypeface [hlaholice] [rm] [serif] [bukyvede] [default] [encoding=ec] +\definetypeface [cyrilice] [rm] [serif] [bukyvede] [default] [encoding=ec] + +\usetypefile[cmu] +\usetypescript[computer-modern-unicode] +\setupbodyfont[computer-modern-unicode,9pt] + +\usetypefile[linuxlibertine] +\usetypescript[linuxlibertine] + + +\setupcite[authoryear][% +  compress=no% +] + +\setuppublications[% +  alternative=apa,% +  refcommand=authoryear,% +  sorttype=bbl,% +  numbering=yes,% +  autohang=yes% +]% + +\setuppublicationlist[% +  artauthor=\invertedauthor% +]  % == REFERENCES =============================================================== +  \startpublication[    k=duden,    t=book, -  a={Drosdowski/Müller/Scholze-Stubenrecht/Wermke}, +  a={{Drosdowski/Müller/Scholze-Stubenrecht/Wermke}},    y=1952,    n=1,    s={DUDEN}, @@ -17,16 +66,16 @@  \editor[]{Matthias}[]{}{Wermke}  \pubyear{1991}  \title{DUDEN Rechtschreibung der deutschen Sprache} -\city{Mannheim et. al.} -\edition{20} +\city{Mannheim et al} +\edition{20.}  \stoppublication  \startpublication[    k=bornemann,    t=book, -  a={Bornemann/Risch}, +  a={{Bornemann/Risch}},    y=1978, -  n=1, +  n=2,    s={Grammatik},  ]  \author[]{Eduard}[]{}{Bornemann} @@ -34,55 +83,531 @@  \pubyear{1978}  \title{Griechische Grammatik}  \city{Frankfurt am Main} -\edition{2} +\edition{2.}  \stoppublication  \startpublication[    k=kirschbaum,    t=book, -  a={Kirschbaum}, +  a={{Kirschbaum}},    y=2001, -  n=1, +  n=3,    s={Grammatik},  ]  \author[]{Ernst Georg}[]{}{Kirschbaum}  \pubyear{2001}  \title{Grammatik der russischen Sprache}  \city{Berlin} -\edition{1}  \stoppublication  \startpublication[ -  k=aksstudien, +  k=aks,    t=book, -  a={Birnbaum/Schaeken}, +  a={{Birnbaum/Schaeken}},    y=1999, -  n=1, +  n=4, +  u=http://www.schaeken.nl/lu/research/online/publications/akslstud/index.htm,    s={Studien},  ] -\author[]{Henrik}[]{}{Birnbaum} -\author[]{Jos}[]{}{Schaeken} +\author[]{Henrik}[H.]{}{Birnbaum} +\author[]{Jos}[J.]{}{Schaeken}  \pubyear{1999}  \title{Altkirchenslavische Studien}  \volume{2}  \city{München} -\edition{1} -\url{http://www.schaeken.nl/lu/research/online/publications/akslstud/index.htm} +\stoppublication + +\startpublication[ +  k=dintb, +  t=book, +  a={{DIN}}, +  y=2001, +  n=5, +  s={DIN}, +] +\editor[]{}[]{}{DIN Deutsches Institut für Normung e.~V.} +\pubyear{2001} +\title{Bibliotheks und Dokumentationswesen} +\city{Berlin/Wien/Zürich} +\stoppublication + +\startpublication[ +  k=iso, +  t=inbook, +  a={{ISO}}, +  y=1995, +  n=6, +  s={ISO~9}, +] +\editor[]{}[]{}{{ISO International Organization for Standardization}} +\pubyear{1995} +\title{Information and documentation -- Transliteration of Cyrillic characters into Latin characters -- Slavic and non-Slavic languages} +\edition{2.} +\crossref{dintb} +\pages{230--245}  \stoppublication  %============================================================================== +\setupframed[% +  location=top,% +  align={normal,verytolerant},% +  frame=off,% +] + + +\definenumber[excnt] +\setnumber[excnt]{1} + +% This should rather be done using key-value args but I'm too lazy now. +% 1: mode; 2: hyphenate original; 3: hyphenate transliteration; +% 4: font for original; 5: caption; 6: original text. +\def\trlex#1#2#3#4#5#6{% +  \setupinterlinespace[line=8pt]% +  {\tfx% +    \placefigure [force] [#1] {% +      \type{[mode=#1,hyphenate=#3]}% +      \hskip 1em +      {\it #5}% +    } {% +      \framed{% +        \framed[% +          offset=1ex,% +          width=.47\textwidth,% +        ]{% +          \setupbodyfont[#4]% +          \unskip\language[#2]#6\par +        }% +        \framed[% +          offset=1ex,% +          width=.47\textwidth,% +        ]{% +          \transliterate[mode=#1,hyphenate=#3]{#6\par}% +        }% +      }% +    }% +    \incrementnumber[excnt]% +  } +}  \usemodule[int-load]  \loadsetups[t-transliterator.xml]  + +\setupwhitespace[big] +\language[en]  \starttext +\completecontent  \chapter{Usage and Functionality} +\section{Overview} +Basically the Transliterator provides two commands: \type{\setupTranslit} +preferably goes into the preamble and allows for global configuration. +The Transliterator is invoked locally by \type{\transliterate} which does the +actual transliteration of text passages. +  \setup{setupTranslit} +\setup{transliterate} + +\section{Module loading and configuration} +In order to use the Transliterator in a document we put the following somewhere before +\type{\starttext}. +\starttyping +\usemodule[transliterator] +\stoptyping +Although it has some defaults already set at this point they will most likely +not correspond to what is needed in the document. +To override the presets we use the command \type{\setupTranslit[#1]}. +It takes a comma separated list of two key-value pairs: \type{mode} and +\type{hyphenate}. +Through {\em mode} we specify the transliteration method. +By the time of this writing this can be one of the following set: +\setupTABLE[c][each]    [frame=off] +\setupTABLE[r][first]   [style=bold,topframe=on,bottomframe=on] +\setupTABLE[r][last]    [topframe=on,bottomframe=on] +\bTABLE[split=yes,stretch=yes] +  \bTABLEhead +    \bTR\bTH mode \eTH\bTH description \eTH\eTR +  \eTABLEhead +  \bTABLEbody +      \bTR\bTC \type{ru}               \eTC\bTC ISO~9 Russian \eTC\eTR +      \bTR\bTC \type{ru_old}           \eTC\bTC ISO~9 Russian plus pre-1918 chars (the default)\eTC\eTR +      \bTR\bTC \type{all}              \eTC\bTC ISO~9 complete \eTC\eTR +      \bTR\bTC \type{ru_transcript_de} \eTC\bTC German transcription for Russian \eTC\eTR +      \bTR\bTC \type{ru_transcript_en} \eTC\bTC English transcription for Russian \eTC\eTR +      \bTR\bTC \type{iso9_ocs}         \eTC\bTC == \type{all} plus non-ISO additions for Old (Church) Slavonic \eTC\eTR +      \bTR\bTC \type{ocs}              \eTC\bTC so-called “scientific” transliteration for Old (Church) Slavonic\eTC\eTR +      \bTR\bTC \type{ocs_gla}          \eTC\bTC so-called “scientific” transliteration for Old (Church) Slavonic / Glagolitic alphabet\eTC\eTR +      \bTR\bTC \type{ru_cz}            \eTC\bTC Czech transcription for Russian\eTC\eTR +      \bTR\bTC \type{ocs_cz}           \eTC\bTC Czech transcription for Old (Church) Slavonic\eTC\eTR +      \bTR\bTC \type{gr}               \eTC\bTC transliteration for Greek \eTC\eTR +      \bTR\bTC \type{gr_n}             \eTC\bTC transliteration for Greek obeying nasalizations \eTC\eTR +  \eTABLEbody +  \bTABLEfoot +    \bTR\bTH mode \eTH\bTH description \eTH\eTR +  \eTABLEfoot +\eTABLE + + +{\em Nota bene}: The description at this point only serves as a placeholder as the +transliteration modes are discussed in detail in later in this document. + +Through the \type{hyphenate} argument it is possible to adjust the language +that is used for hyphenation. +Specifying \type{\setupTranslit[hyphenate=nl]} will let every transliterated +part of the document be processed according to dutch rules, leaving the overall +\type{\language[#1]} configuration unchanged for the rest of the content. + +The actual transliteration is done using the macro +\type{\transliterate[#1]{#2}}. +The second argument takes the raw string in the original language that we want +to process, while the first, optional argument accepts local adjustments for +\type{mode} and \type{hyphenate}. +Thus, we would typeset one of Epicuros' sayings like this: +{\setuptolerance[verytolerant] +\starttyping +\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν μετὰ ἀνάγκης} +\stoptyping +which yields \quotation{\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν +μετὰ ἀνάγκης}} in the pdf output. +} +  \chapter{Introduction} +\hfil\framed[width=\hsize,align=left]{% +  \inframed[bottomframe=on]{\it What's all this, then?} +  \blank[medium] +  {\sc Graham Chapman} +} + +\noindentation  At the first glance, {\em transliteration} -- the accurate representation of letters from one +alphabet in another -- seems obsolete after the advent of Unicode +which made its way even into \TeX\ lately. +Why not just go on and write down everything in the original script? +But still there are lots of situations where transliteration is desirable, +e.~g. some scholarly habits might prescribe it in the main text with citations in +footnotes left in the original alphabet; or transliteration might alleviate +comparison within one language that happens to be written in different scripts; +finally, including text in a foreign script might be impossible if there is no +appropriate that fits the main text. +However, it is still most convenient for the writer to keep the +untransliterated original in the document source as this allows for reusing it in +another context where different transliterations rules might apply. +The Transliterator module is meant to provide both: have the original in the +source and a transliteration only in the final document. + +Another way of handling foreign languages is {\em transcription}. +It is aims at producing some representation that does not rely on symbolisms +alien to the language and thus to be at least \quotation{pronouncable} +without further knowledge. +As transcription methods are language specific and highly idiosyncratic they +complicate the restoration of the original phrase and information may be lost. +The Transliterator provides means of transcription as well but in most cases +you should refrain from using them (\type{[mode=ru_transcript_en]}, +\type{[mode=ru_transcript_de]}).  + +For Cyrillic scripts the best quality is achieved using the standardized +transliteration according to {\em ISO~9}.\footnote{\cite[authoryear][iso].} +This method not only covers all contemporary languages that are written in +a variety of Cyrillic but provides a bijective mapping on latin characters as +well. +Consequently, you can unambiguously revert the transliteration into +its original form which was impossible with previous versions of ISO~9 because +they contained several exceptions depending on the original language. +Although fifteen years old it has not yet made its way into scholarly +publications at large so it might not immediately look familiar. +The diacritics are not identical to the so-called \quotation{scientific} +transliteration used in Slavic studies but as long as your journal does not +enforce its traditional method you should always prefer ISO~9 +(\type{[mode=ru]}, \type{[mode=ru_old]}, \type{[mode=all]}). + +But ISO~9, too, has its shortcomings. +It has no definitions for historical form of the cyrillic script like  +pre-XVIII-century Russian and Old (Church) Slavonic while those are covered by +the scholarly transliterations. +To amend the situation the Transliterator provides an extension to ISO~9 for +Old Slavonic containing the glyphs  +\startluacode +local cnt, len = 0, 0 -- Wishing for a len() function that works on dictionaries as in python… +for i,j in pairs(translit.ocs_add_low) do +  len = len + 1 +end + +for k,v in pairs(translit.ocs_add_low) do +  cnt = cnt + 1 +  context.bgroup()  +    context.setupbodyfont({"cyrilice"}) +    context(k) +  context.egroup()  +  if cnt < len -1 then +    context(", ")  +  elseif cnt < len then +    context("\\ and ") +  end +end +\stopluacode +\ taken from the scientific transliteration (\type{[mode=iso9_ocs]}). +If you prefer more coherency you might want to use pure \quotation{scientific} +tranliteration (\type{[mode=ocs]}). +This method is complemented by \type{[mode=ocs_gla]}, the only option the +Transliterator offers for the Glagolitic alphabet; they can be use consistently +along each other as they were taken from the same +book.\footnote{\cite[authoryear][aks] p.~77 \cite[url][aks].} + +As far as I know there is no standardized transliteration for Greek so I had to +resort to the one that is used in scholarly literature. +Its main drawback is that it has no representation for diacritics apart from +(rough) breathing, but it respects specific rules for diphthongs and vowels in +initial positions (\type{[mode=gr]}). +There is one alternative mode for those who prefer their {\em γ} phonetically +resolved to /{\em n}/ before velars ({\em γ}, {\em κ}, {\em χ} and {\em ξ}; +\type{[mode=gr_n]}). + +Concerning the hyphenation within transliterated passages the default is set to +to \type{[hyphenate=cs]} (Czech) which produces reasonable results when using +\type{all}, \type{iso9_ocs} or \type{ru_cz}. +For stuff like the English and German transcription use their respective native +hyphenation.\footnote{% +  You'll have to specify this through \type{\setupTranslit} +  or locally because the default hyphenation is {\em not} the same as your +  documents'. +} +However, as their is no hyphenation pattern I know of that closely resembles the +transliteration of Greek you might have to resort to putting \type{\discretionary} +hyphens when line breaking does not satisfy. + + +\chapter[ex]{Examples} +\section{Cyrillic scripts} +\subsection{ISO~9 and derivatives} +Several transliteration rules are either strictly ISO~9 compliant (\type{ru}, \type{ru_old}, \type{all})  +or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{% +  Unfortunately there are not yet any language files for some of them so please +  excuse the inadequate hyphenation in these cases.% +} + +\trlex{ru}{ru}{cz}{computer-modern-unicode}{% +  Transliteration rules for the contemporary russian alphabet.% +}{% +  В~ворота гостиницы губернского города NN въехала довольно красивая рессорная +  небольшая бричка, в~какой ездят холостяки: отставные подполковники, +  штабс-капитаны, помещики, имеющие около сотни душ крестьян, — словом, все те,  +  которых называют господами средней руки.  +  В~бричке сидел господин, не красавец, но и~не дурной наружности, ни слишком +  толст, ни слишком тонок; нельзя сказать, чтобы стар, однако ж~и~не так чтобы +  слишком молод. +} + +\trlex{ru_old}{ru}{cz}{computer-modern-unicode}{% +  With aditional characters for pre-1981 Russian orthography (100~per cent ISO~9).% +}{% +  А~сведется віра, убьютъ сотцкого в~селѣ, ино тебѣ взяти полтіна, а~не +  сотцкого, +  ино четырѣ гривны, а~намъ віръ не таити в~Новѣгородѣ; а~о~убіствѣ віръ нѣтъ. +  А~что волости, честны король, новгородцкіе, ино тебѣ не держати своими мужи, +  а~держати мужми новогородцкими. +  А~что пошлина в~Торжку и~на Волоцѣ, тівунъ свои держати на своеи чясті, +  а~Новугороду на своеи чясти посадника держаті. +  А~се волости новогородцкіе: Волокъ со всѣми волостми, Торжокъ, Бѣжіці, +  Городець +  Палець, Шіпинъ, Мелеця, Егна, Заволочье, Тиръ, Пермь, Печера, Югра, Вологда +  с~волостмі. +} + +\trlex{all}{uk}{cz}{computer-modern-unicode}{% +  The complete cyrillic mapping from ISO~9; transliterating Belarusian.% +}{% +  Беларуская мова, мова беларусаў, уваход\-зіць у~сям’ю індаеўрапейскіх моў, яе +  славянскай групы і~ўсходнеславянскіх моваў падгрупы, на якой размаўляюць +  у~Беларусі і~па ўсім свеце, галоўным чынам у~Расіі, Украіне, Польшчы. +  Б.~м. пад\-зяляе шмат граматычных і~лексічных уласцівасцяў з~іншымі +  ўсходнеславянскімі мовамі (гл. таксама: Іншыя назвы беларускай мовы і~Узаемныя +  ўплывы усходнеславянскіх моваў). +} + +\trlex{all}{uk}{cz}{computer-modern-unicode}{% +  The complete cyrillic mapping from ISO~9; transliterating Ukrainian.% +}{% +  Украї́нська мова (застарілі назви -- руська мова, проста мова […]) -- +  слов'янсь\-ка мова, державна в~Україні та одна з~трьох «офіційних мов на рівних +  засадах» у~неви\-знаній Придністровській Молдавсь\-кій Республіці. +  За різними оцінками загалом у~світі українською мовою гово\-рить від 41~млн. +  до 45~млн. осіб, вона входить до третього десятка найпоши\-ре\-ні\-ших мов +  світу. +} + +\trlex{all}{ru}{cz}{computer-modern-unicode}{% +  The complete cyrillic mapping from ISO~9; transliterating Serbian.% +}{% +  Српски језик је један од словенских језика из породице индоевропских језика. +  Први писани споменици у~српској редакцији старословенског језика потичу из XI +  и~XII века. +  Српски језик је стандардни језик у~службеној употреби у~Србији, Босни +  и~Херцеговини и~Црној Гори, а~у~употреби је и~у другим земљама гдје живе +  Срби, међу осталима и~у~Хрватској. +} + +\trlex{iso9_ocs}{ru}{cz}{cyrilice}{% +  Transliteration rules according to ISO~9 with additions for Old (Church) +  Slavonic.% +}{% +  Что сѧ дѣѥтѣ по вѣремьнемь~: то ѿидето по вѣрьмьнемь~: приказано бѹдѣте +  добрымъ людѣмъ~: а любо грамотою ѹтвѣрдѧть~: како то бѹдѣте всемъ вѣдомъ~: +  или кто посль живыи ѡстанѣть сѧ~: того лѣт͠ коли алъбрахтъ~: влд͠ка ризкии +  ѹмьрлъ~: ѹздѹмалъ кнѧзѣ смольнескыи~: мьстиславъ~: двд͠въ сн͠ъ~: прислалъ въ +  ригѹ своѥго лѹчьшего попа~: ѥрьмея~: и съ нимь ѹмьна мѹжа пантелья~: +  исвоѥго горда смольнеска~: та два была послъмь ѹ ризѣ~: из ригы ѥхали на +  гочкыи берьго~: тамо твердити миръ~: +} + +\subsection{“Scientific” transliteration} +These transliterations are widely used among scholars, mainly linguists and, to +a lesser extent, historians. +They comprise large character sets in order to represent the original text +adequately and facilitate comparison of texts of the same language written in +different scripts; they are not, however, as easily reversible as ISO~9. + +\trlex{ocs}{ru}{cs}{cyrilice}{% +  Transliteration for Old Slavonic used in Slavic studies, taken from the +  excellent book of \cite [authoryear][aks].% +}{% +  Се начнемъ повѣсть сию.  +  По потопѣ . первиє снве Ноєви . раздѣлиша землю . Симъ . Хамъ . Афетъ . и~ꙗсѧ +  въстокъ . Симови Персида . Ватрь . тоже  и~до Индикиꙗ в~долготу и~в~ширину [и +  до Нирокоуриа] ꙗкоже рещи ѿ въстока и~до полуденьꙗ . и~Суриꙗ . +  и~Индиа по Єфратъ рѣку . Вавилонъ . Кордуна . Асурѧне . Мисопотамира . +  Аравиꙗ . старѣишаꙗ . Єлмаисъ . Инди . Равиꙗ . на всѧ  Д. +} + +\trlex{ru_cz}{ru}{cs}{computer-modern-unicode}{% +  Czech phonetic transcription for contemporary Russian.% +}{% +  Прошло семь лет после 12-го года. Взволнованное историческое море Европы +  улеглось в свои берега. Оно казалось затихшим; но таинственные силы, +  двигающие человечество (таинственные потому, что законы, определяющие их +  движение, неизвестны нам), продолжали свое действие. +  Несмотря на то, что поверхность исторического моря казалась неподвижною, так +  же непрерывно, как движение времени, двигалось человечество. Слагались, +  разлагались различные группы людских сцеплений; подготовлялись причины +  образования и~разложения государств, перемещений народов.% +} + +\trlex{ocs_cz}{ru}{cs}{cyrilice}{% +  Czech phonetic transcription for Old Slavonic (superset of the corresponding +  Russian transcription). +}{% +  Убьеть мужь мужа, то мьстить брату брата, или сынови отца, любо отцю сына, +  или братучаду, любо сестрину сынови; аще не будеть кто мьстіѧ, то 40 гривенъ +  ꙁа голову; аще будеть русинъ, любо гридинъ, любо купчина, любо іѧбетник, любо +  мечникъ, аще иꙁъгои будеть, любо словенинъ, то 40 гривенъ положити ꙁа нь. +} + + +\subsection{Legacy national transcriptions} +At the moment there are tables for old school transcription into three +languages: English (via \type{ru_transcript_en}), German +(\type{ru_transcript_de}) and Czech (\type{ocs_cz}). +Only one of them (Czech) is recommendable as the others are to a~large extent +irreversible and lack efficiency; +at least the German one is almost unreadable if used with +strings longer than two words. +As we have the bijective ISO~9 mapping at hand there should be reason at all to +use any of them unless when threatened by ignorants. + +\trlex{ru_transcript_en}{ru}{en}{computer-modern-unicode}{% +  English transcription for contemporary Russian.% +}{% +  Прошло семь лет после 12-го года. Взволнованное историческое море Европы +  улеглось в свои берега. Оно казалось затихшим; но таинственные силы, +  двигающие человечество (таинственные потому, что законы, определяющие их +  движение, неизвестны нам), продолжали свое действие. +  Несмотря на то, что поверхность исторического моря казалась неподвижною, так +  же непрерывно, как движение времени, двигалось человечество. Слагались, +  разлагались различные группы людских сцеплений; подготовлялись причины +  образования и~разложения государств, перемещений народов.% +} + +\trlex{ru_transcript_de}{ru}{deo}{computer-modern-unicode}{% +  German transcription for contemporary Russian.\footnote{% +    Following \cite[authoryear][duden] p.~82; all the canonical rules are +    implemented save one: {\em -его} and {\em -ого} should resolve to {\em +    -ewo} and {\em -owo} respectively iff genitive endings. +    As this is a grammatical rather than graphetical criterion writing  a +    substitution algorithm would amount to do natural language parsing. +    To make things worse this rule is phonetically confused as it would not +    take care of other contexts where {\em г} in those patterns is articulated +    as /{\em v}/ like for instance in {\em сегодня} (which is a historical +    genitive, though …). +    So even if this could be implemented it would not be advisable to use such +    a rule.% +  }% +}{% +  Прошло семь лет после 12-го года. Взволнованное историческое море Европы +  улеглось в свои берега. Оно казалось затихшим; но таинственные силы, +  двигающие человечество (таинственные потому, что законы, определяющие их +  движение, неизвестны нам), продолжали свое действие. +  Несмотря на то, что поверхность исторического моря казалась неподвижною, так +  же непрерывно, как движение времени, двигалось человечество. Слагались, +  разлагались различные группы людских сцеплений; подготовлялись причины +  образования и~разложения государств, перемещений народов.% +} + +\section{Glagolitic} +\trlex{ocs_gla}{ru}{cs}{hlaholice}{% +  “Scientific” transliteration for Old Slavonic written in the Glagolitic +  alphabet as used in \cite[authoryear][aks].% +}{% +  [ⰲⰾ]  +  ⰰⰴⱏⰻⰽⱁ ⱍⰽ҃ⱏ ⱄⰻ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸ[ⱁⱃⰻⱅ]  +  ⰻ ⰸⰰⰽⱁⱀⱏ ⰿⰰⱀⰰⱄⱅⱏⰻⱃⱏⱄⰽⰻ: [ⰻⰶⰵ]  +  ⱅⱏⰻ ⱆⱄⱅⰰⰲⰻ჻ Ⱃⰵⱍⰵ ⰶⰵ ⰻⰳⱆⰿ[ⱏ] [ⱀⱏ]  +  ⰽⰰⰽⱁ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸⱁⱃⰻⱅⰻ ⰸⰰⰽ[ⱁⱀⱏ]  +  [.] [ⰰ] ⰵⱄⱅⱏ· ⱍⱃⱏⰲⰻ⁖ ⰻ [ⰿ] [..........]  +  [..] ⰿⱏ ⱀⰵ ⰿⱁⰶⰵⰿⱏ ⱄⰵⰳⱁ ⱅⱃⱏⱂⱑⱅ[ⰻ]  +  [ⰴⰰ] ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⰻⰿⱑⰻ ⱄⱏⰴⱑ჻ ⰰ ⰿⱏⰻ ⱁ  +  [ⱅⰻ]ⰴⰵⰿⱏ: ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⱂⱆⱄⱅⰻ: ⰴⰰ ⱁⱅ  +  [ⰻⰴ]ⰵⱅⱏ ⰻⰶⰵ ⰵⱄⱅⱏ ⱂⱃⰻⱎⱏⰾⱏ: ⱄ[ⰵ]  +} + +\section{Greek} +The Transliterator offers two modes for handling Greek: \type{gr} and +\type{gr_en}. +They differ only on one aspect. +\type{gr} basically transliterates the canonical Greek alphabet as well as the +special glyphs Digamma, Quoppa and Sampi. +\type{gr_n} behaves exactly the same way except that nasalization is observed +such that \type{γ+[γ|κ]} yields \type{n+[g|k]}. + +\trlex{gr}{agr}{de}{computer-modern-unicode}{% +  Transliteration for Greek -- standard. +}{% +  οἴνῳ δὲ κάρτα προσκέαται, καί σφι οὐκ ἐμέσαι ἔξεστι, οὐκὶ οὐρῆσαι ἀντίον +  ἄλλου. +  ταῦτα μέν νυν οὕτω φυλάσσεται, μεθυσκόμενοι δὲ ἐώθασι βουλεύεσθαι τὰ +  σπουδαιέστατα τῶν πρηγμάτων: τὸ δ᾽ ἂν ἅδῃ σφι βουλευομένοισι, τοῦτο τῇ +  ὑστεραίῃ νήφουσι προτιθεῖ ὁ στέγαρχος, ἐν τοῦ ἂν ἐόντες βουλεύωνται, καὶ ἢν +  μὲν +  ἅδῃ καὶ νήφουσι, χρέωνται αὐτῷ, ἢν δὲμὴ ἅδῃ, μετιεῖσι. τὰ δ᾽ ἂν νήφοντες +  προβουλεύσωνται, μεθυσκόμενοι ἐπιδιαγινώσκουσι. +}% + +\trlex{gr_n}{agr}{de}{computer-modern-unicode}{% +  Transliteration for Greek -- alternative respecting nasalization. +}{% +  ταῦτα καὶ νεωτέρῳ καὶ πρεσβυτέρῳ ὅτῳ ἂν ἐντυγχάνω ποιήσω, καὶ ξένῳ καὶ ἀστῷ, +  μᾶλλον δὲ τοῖς ἀστοῖς, ὅσῳ μου ἐγγυτέρω ἐστὲ γένει. +}% +    +  \chapter{References} +%\cite[authoryear][iso] +\nocite[duden] +\nocite[bornemann] +\nocite[kirschbaum] +\nocite[iso] +\nocite[aks] +\nocite[dintb] +\placepublications [criterium=all]  \stoptext  %   vim:ft=context diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv index fc07c1d..8f8cadf 100644 --- a/tex/context/third/transliterator/t-transliterator.mkiv +++ b/tex/context/third/transliterator/t-transliterator.mkiv @@ -11,7 +11,7 @@  %D          email={pgesang at ix dot urz dot uni-heidelberg dot de}]  %D This module is licensed under the conditions of the BSD license with   %D two clauses: http://www.freebsd.org/copyright/freebsd-license.html. -%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.\newpage +%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.  \writestatus{loading}{Transliteration from non-Latin scripts} @@ -766,6 +766,8 @@ translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp  translit.ru_trsc_cz_add_low = {    ["ѕ"] = "dz", +  ["з"] = "z", +  ["ꙁ"] = "z",    ["і"] = "ï",    ["ѹ"] = "u",    ["ѡ"] = "ō", @@ -790,6 +792,8 @@ translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit  translit.ru_trsc_cz_add_upp = {    ["Ѕ"] = "Dz", +  ["З"] = "Z", +  ["Ꙁ"] = "Z",    ["І"] = "Ï",    ["Ѹ"] = "U",    ["Ѡ"] = "Ō", @@ -824,6 +828,15 @@ translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit  -- Source p. 77 of  -- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf +----------------------------------------------------------------------- +-- Lowercase and uppercase letter Uk -- “scientific transliteration” -- +----------------------------------------------------------------------- + +translit.ocs_uk = { +  ["oу"] = "u", +  ["оу"] = "u", +  ["Оу"] = "U", +}  -----------------------------------------------------------------------------  -- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” --  ----------------------------------------------------------------------------- @@ -853,7 +866,6 @@ translit.ocs_low = {    ["р"] = "r",    ["с"] = "s",    ["т"] = "t", -  ["оу"] = "u",    ["ѹ"] = "u",    ["ꙋ"] = "u",    ["ф"] = "f", @@ -915,7 +927,6 @@ translit.ocs_upp = {    ["Р"] = "R",    ["С"] = "S",    ["Т"] = "T", -  ["Оу"] = "U",    ["Ѹ"] = "U",    ["ꙋ"] = "U",    ["Ф"] = "F", @@ -1803,61 +1814,64 @@ end  function translit.show_tab (tab)    -- Output a transliteration table, nicely formatted with natural tables.    local cnt = 0 -  context ("\\setupTABLE[r][each]   [style=\\tfx,align=center] ") -  context ("\\setupTABLE[c][each]   [frame=off]") -  context ("\\setupTABLE[r][each]   [frame=off]") -  context ("\\setupTABLE[c][first]  [style=italic]") -  context ("\\setupTABLE[r][first]  [style=bold,topframe=on,bottomframe=on]") -  context ("\\setupTABLE[r][last]   [style=bold,topframe=on,bottomframe=on]") -  context ("\\bTABLE [split=yes,option=stretch]") -  context ("\\bTABLEhead\\bTR".. -    "\\bTH Number\\eTH" .. -    "\\bTH letters\\eTH" ..  -    "\\bTH n\\eTH" ..  -    "\\bTH replacement\\eTH".. -    "\\bTH n\\eTH" ..  -    "\\bTH bytes\\eTH".. -    "\\bTH repl. bytes".. -    "\\eTH\\eTR\\eTABLEhead ") -  context("\\bTABLEbody") -  for key, val in pairs(tab) do -    local strempty = function (s)  -      -- Some characters might not be replaced but removed, others might be -      -- multi-char sequences. -      if #s == 0 then return "nil" -      else  -        local i = 0 -        local r = "" -        -- The following loop could be replaced by checking the string length with utf.len(s) … -        repeat -          i = i + 1 -          if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end -        until (false) -        return r +  context.setupTABLE({"r"}, {"each"},     {style="\\tfx", align="center"}) +  context.setupTABLE({"c"}, {"each"},     {frame="off"}) +  context.setupTABLE({"r"}, {"each"},     {frame="off"}) +  context.setupTABLE({"c"}, {"first"},    {style="italic"}) +  context.setupTABLE({"r"}, {"first"},    {style="bold", topframe="on", bottomframe="on"}) +  context.setupTABLE({"r"}, {"last"},     {style="bold", topframe="on", bottomframe="on"}) +  context.bTABLE({split="yes", option="stretch"}) +    context.bTABLEhead() +      context.bTR() +        context.bTH() context("number")         context.eTH() +        context.bTH() context("letters")        context.eTH() +        context.bTH() context("n")              context.eTH() +        context.bTH() context("replacement")    context.eTH() +        context.bTH() context("n")              context.eTH() +        context.bTH() context("bytes")          context.eTH() +        context.bTH() context("repl. bytes")    context.eTH() +      context.eTR() +    context.eTABLEhead() +    context.bTABLEbody() +      for key, val in pairs(tab) do +        local strempty = function (s)  +          -- Some characters might not be replaced but removed, others might be +          -- multi-char sequences. +          if #s == 0 then return "nil" +          else  +            local i = 0 +            local r = "" +            -- The following loop could be replaced by checking the string length with utf.len(s) … +            repeat +              i = i + 1 +              if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end +            until (false) +            return r +          end +        end +        cnt = cnt + 1 +        context.bTR() +          context.bTC() context(cnt)           context.eTC() +          context.bTC() context(key)           context.eTC() +          context.bTC() context(utf.len(key))  context.eTC() +          context.bTC() context(val)           context.eTC() +          context.bTC() context(utf.len(val))  context.eTC() +          context.bTC() context(strempty(key)) context.eTC() +          context.bTC() context(strempty(val)) context.eTC() +        context.eTR()        end -    end -    cnt = cnt + 1 -    context ("\\bTR\\bTC " .. cnt .. "\\eTC") -    context ("\\bTC " ..  -      key .. "\\eTC\\bTC " ..  -      utf.len(key) .. "\\eTC\\bTC " ..  -      val .. "\\eTC\\bTC " ..   -      utf.len(val) .. "\\eTC\\bTC " ..   -      strempty(key) .. "\\eTC\\bTC " ..   -      strempty(val) .. "\\eTC") -    context ("\\eTR ") -  end -  context("\\eTABLEbody") -  context ("\\bTABLEfoot\\bTR".. -    "\\bTC Number\\eTC" .. -    "\\bTC letters\\eTC" ..  -    "\\bTC n\\eTC" ..  -    "\\bTC replacement\\eTC".. -    "\\bTC n\\eTC" ..  -    "\\bTC bytes\\eTC".. -    "\\bTC repl. bytes".. -    "\\eTC\\eTR\\eTABLEfoot ") -  context ("\\eTABLE ") +    context.eTABLEbody() +    context.bTABLEfoot() context.bTR() +      context.bTC() context("number")       context.eTC() +      context.bTC() context("letters")      context.eTC() +      context.bTC() context("n")            context.eTC() +      context.bTC() context("replacement")  context.eTC() +      context.bTC() context("n")            context.eTC() +      context.bTC() context("bytes")        context.eTC() +      context.bTC() context("repl. bytes")  context.eTC() +      context.eTR() +    context.eTABLEfoot() +  context.eTABLE()  end  \stopluacode @@ -1891,6 +1905,18 @@ function translit.transliterate (method, text)    if method == "ru" then      translit.add_table(repl_tab, translit.ru_upp)      translit.add_table(repl_tab, translit.ru_low) +  elseif method == "ru_old" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low)  +    translit.add_table(repl_tab, translit.ru_old_upp)  +    translit.add_table(repl_tab, translit.ru_old_low)  +  elseif method == "all" then +    translit.add_table(repl_tab, translit.ru_upp) +    translit.add_table(repl_tab, translit.ru_low)  +    translit.add_table(repl_tab, translit.ru_old_upp)  +    translit.add_table(repl_tab, translit.ru_old_low)  +    translit.add_table(repl_tab, translit.non_ru_upp) +    translit.add_table(repl_tab, translit.non_ru_low)     elseif method == "ru_transcript_de" then      text = translit.subst (text, translit.ru_trsc_jrule)      text = translit.subst (text, translit.ru_trsc_irule) @@ -1910,18 +1936,6 @@ function translit.transliterate (method, text)      translit.add_table(repl_tab, translit.ru_trsc_en_low_first)      translit.add_table(repl_tab, translit.ru_trsc_en_upp)      translit.add_table(repl_tab, translit.ru_trsc_en_low) -  elseif method == "ru_old" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low)  -    translit.add_table(repl_tab, translit.ru_old_upp)  -    translit.add_table(repl_tab, translit.ru_old_low)  -  elseif method == "all" then -    translit.add_table(repl_tab, translit.ru_upp) -    translit.add_table(repl_tab, translit.ru_low)  -    translit.add_table(repl_tab, translit.ru_old_upp)  -    translit.add_table(repl_tab, translit.ru_old_low)  -    translit.add_table(repl_tab, translit.non_ru_upp) -    translit.add_table(repl_tab, translit.non_ru_low)     elseif method == "iso9_ocs" then      translit.add_table(repl_tab, translit.ru_upp)      translit.add_table(repl_tab, translit.ru_low)  @@ -1930,6 +1944,9 @@ function translit.transliterate (method, text)      translit.add_table(repl_tab, translit.ocs_add_upp)      translit.add_table(repl_tab, translit.ocs_add_low)     elseif method == "ocs" then +    translit.add_table(repl_tab, translit.ocs_uk)  +    text = translit.subst (text, repl_tab) +    repl_tab = {}      translit.add_table(repl_tab, translit.ocs_low)       translit.add_table(repl_tab, translit.ocs_upp)     elseif method == "ocs_gla" then  | 
