Continued documentation

author: Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> 2010-03-02 14:06:04 +0100
committer: Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> 2010-03-02 14:06:04 +0100
commit: f0765800a77932422dcdb02766a06ddd6c7541d1 (patch)
tree: d7b6b908284d6d65bc276eb8370c6e45031c6eaa
parent: fea0bcccf81692ab4e7bcb2a2e60793a2247daff (diff)
download: transliterator-f0765800a77932422dcdb02766a06ddd6c7541d1.tar.gz
5 files changed, 631 insertions, 85 deletions
diff --git a/.hgignore b/.hgignore
index ffcf81a..c8c403c 100644
--- a/.hgignore
+++ b/.hgignore
@@ -33,3 +33,5 @@ syntax:glob
 *.top
 *.tuc
 *.swo
+*.tuo
+*.ted
diff --git a/doc/context/third/transliterator/t-transliterator.mkiv b/doc/context/third/transliterator/t-transliterator.mkiv
new file mode 120000
index 0000000..fe8c6e5
--- /dev/null
+++ b/doc/context/third/transliterator/t-transliterator.mkiv
@@ -0,0 +1 @@
+../../../../tex/context/third/transliterator/t-transliterator.mkiv
+\ No newline at end of file
diff --git a/doc/context/third/transliterator/t-transliterator.xml b/doc/context/third/transliterator/t-transliterator.xml
new file mode 120000
index 0000000..476ff63
--- /dev/null
+++ b/doc/context/third/transliterator/t-transliterator.xml
@@ -0,0 +1 @@
+../../../../tex/context/interface/third/t-transliterator.xml
+\ No newline at end of file
diff --git a/doc/context/third/transliterator/transliterator.tex b/doc/context/third/transliterator/transliterator.tex
index 47b1dc3..1a04a5f 100644
--- a/doc/context/third/transliterator/transliterator.tex
+++ b/doc/context/third/transliterator/transliterator.tex
@@ -1,12 +1,61 @@
+\definepapersize [MYA5] [width=148mm,height=210mm]
+\setuppapersize  [MYA5] [MYA5]
+
 \setupinteraction [state=start]
 \setupcombinedlist[content][interaction=all,focus=standard]
+
+\setupindenting[yes,next,medium]
+
+\setuppagenumbering[location=left]
+
+\setuphead[chapter][style={\rm\bf\tfb},before={\blank[big,force]},after={\blank[2*big,force]}]
+\setuphead[section][style={\rm\bf\tfa},before={\blank[big]},after={\blank[small]}]
+\setuphead[subsection][style={\rm\bf},before={\blank[medium]},after={\blank[small]}]
+
+\usemodule[bib]
+\newbibfield[nugut]
 \usemodule[transliterator]
+\definebodyfontenvironment[default][em=italic]
+
+\starttypescript [serif] [bukyvede]
+  \setups [font:fallback:serif]
+  \definefontsynonym [Serif]        [name:Bukyvede]         [features=default]
+  \definefontsynonym [SerifItalic]  [name:Bukyvede-Italic]  [features=default]
+\stoptypescript
+\usetypescript [bukyvede]
+\definetypeface [hlaholice] [rm] [serif] [bukyvede] [default] [encoding=ec]
+\definetypeface [cyrilice] [rm] [serif] [bukyvede] [default] [encoding=ec]
+
+\usetypefile[cmu]
+\usetypescript[computer-modern-unicode]
+\setupbodyfont[computer-modern-unicode,9pt]
+
+\usetypefile[linuxlibertine]
+\usetypescript[linuxlibertine]
+
+
+\setupcite[authoryear][%
+  compress=no%
+]
+
+\setuppublications[%
+  alternative=apa,%
+  refcommand=authoryear,%
+  sorttype=bbl,%
+  numbering=yes,%
+  autohang=yes%
+]%
+
+\setuppublicationlist[%
+  artauthor=\invertedauthor%
+]
 
 % == REFERENCES ===============================================================
+
 \startpublication[
   k=duden,
   t=book,
-  a={Drosdowski/Müller/Scholze-Stubenrecht/Wermke},
+  a={{Drosdowski/Müller/Scholze-Stubenrecht/Wermke}},
   y=1952,
   n=1,
   s={DUDEN},
@@ -17,16 +66,16 @@
 \editor[]{Matthias}[]{}{Wermke}
 \pubyear{1991}
 \title{DUDEN Rechtschreibung der deutschen Sprache}
-\city{Mannheim et. al.}
-\edition{20}
+\city{Mannheim et al}
+\edition{20.}
 \stoppublication
 
 \startpublication[
   k=bornemann,
   t=book,
-  a={Bornemann/Risch},
+  a={{Bornemann/Risch}},
   y=1978,
-  n=1,
+  n=2,
   s={Grammatik},
 ]
 \author[]{Eduard}[]{}{Bornemann}
@@ -34,55 +83,531 @@
 \pubyear{1978}
 \title{Griechische Grammatik}
 \city{Frankfurt am Main}
-\edition{2}
+\edition{2.}
 \stoppublication
 
 \startpublication[
   k=kirschbaum,
   t=book,
-  a={Kirschbaum},
+  a={{Kirschbaum}},
   y=2001,
-  n=1,
+  n=3,
   s={Grammatik},
 ]
 \author[]{Ernst Georg}[]{}{Kirschbaum}
 \pubyear{2001}
 \title{Grammatik der russischen Sprache}
 \city{Berlin}
-\edition{1}
 \stoppublication
 
 \startpublication[
-  k=aksstudien,
+  k=aks,
   t=book,
-  a={Birnbaum/Schaeken},
+  a={{Birnbaum/Schaeken}},
   y=1999,
-  n=1,
+  n=4,
+  u=http://www.schaeken.nl/lu/research/online/publications/akslstud/index.htm,
   s={Studien},
 ]
-\author[]{Henrik}[]{}{Birnbaum}
-\author[]{Jos}[]{}{Schaeken}
+\author[]{Henrik}[H.]{}{Birnbaum}
+\author[]{Jos}[J.]{}{Schaeken}
 \pubyear{1999}
 \title{Altkirchenslavische Studien}
 \volume{2}
 \city{München}
-\edition{1}
-\url{http://www.schaeken.nl/lu/research/online/publications/akslstud/index.htm}
+\stoppublication
+
+\startpublication[
+  k=dintb,
+  t=book,
+  a={{DIN}},
+  y=2001,
+  n=5,
+  s={DIN},
+]
+\editor[]{}[]{}{DIN Deutsches Institut für Normung e.~V.}
+\pubyear{2001}
+\title{Bibliotheks und Dokumentationswesen}
+\city{Berlin/Wien/Zürich}
+\stoppublication
+
+\startpublication[
+  k=iso,
+  t=inbook,
+  a={{ISO}},
+  y=1995,
+  n=6,
+  s={ISO~9},
+]
+\editor[]{}[]{}{{ISO International Organization for Standardization}}
+\pubyear{1995}
+\title{Information and documentation -- Transliteration of Cyrillic characters into Latin characters -- Slavic and non-Slavic languages}
+\edition{2.}
+\crossref{dintb}
+\pages{230--245}
 \stoppublication
 
 %==============================================================================
+\setupframed[%
+  location=top,%
+  align={normal,verytolerant},%
+  frame=off,%
+]
+
+
+\definenumber[excnt]
+\setnumber[excnt]{1}
+
+% This should rather be done using key-value args but I'm too lazy now.
+% 1: mode; 2: hyphenate original; 3: hyphenate transliteration;
+% 4: font for original; 5: caption; 6: original text.
+\def\trlex#1#2#3#4#5#6{%
+  \setupinterlinespace[line=8pt]%
+  {\tfx%
+    \placefigure [force] [#1] {%
+      \type{[mode=#1,hyphenate=#3]}%
+      \hskip 1em
+      {\it #5}%
+    } {%
+      \framed{%
+        \framed[%
+          offset=1ex,%
+          width=.47\textwidth,%
+        ]{%
+          \setupbodyfont[#4]%
+          \unskip\language[#2]#6\par
+        }%
+        \framed[%
+          offset=1ex,%
+          width=.47\textwidth,%
+        ]{%
+          \transliterate[mode=#1,hyphenate=#3]{#6\par}%
+        }%
+      }%
+    }%
+    \incrementnumber[excnt]%
+  }
+}
 
 \usemodule[int-load]
 \loadsetups[t-transliterator.xml] 
 
+
+\setupwhitespace[big]
+\language[en]
 \starttext
+\completecontent
 \chapter{Usage and Functionality}
+\section{Overview}
+Basically the Transliterator provides two commands: \type{\setupTranslit}
+preferably goes into the preamble and allows for global configuration.
+The Transliterator is invoked locally by \type{\transliterate} which does the
+actual transliteration of text passages.
+
 \setup{setupTranslit}
 
+\setup{transliterate}
+
+\section{Module loading and configuration}
+In order to use the Transliterator in a document we put the following somewhere before
+\type{\starttext}.
+\starttyping
+\usemodule[transliterator]
+\stoptyping
+Although it has some defaults already set at this point they will most likely
+not correspond to what is needed in the document.
+To override the presets we use the command \type{\setupTranslit[#1]}.
+It takes a comma separated list of two key-value pairs: \type{mode} and
+\type{hyphenate}.
+Through {\em mode} we specify the transliteration method.
+By the time of this writing this can be one of the following set:
+\setupTABLE[c][each]    [frame=off]
+\setupTABLE[r][first]   [style=bold,topframe=on,bottomframe=on]
+\setupTABLE[r][last]    [topframe=on,bottomframe=on]
+\bTABLE[split=yes,stretch=yes]
+  \bTABLEhead
+    \bTR\bTH mode \eTH\bTH description \eTH\eTR
+  \eTABLEhead
+  \bTABLEbody
+      \bTR\bTC \type{ru}               \eTC\bTC ISO~9 Russian \eTC\eTR
+      \bTR\bTC \type{ru_old}           \eTC\bTC ISO~9 Russian plus pre-1918 chars (the default)\eTC\eTR
+      \bTR\bTC \type{all}              \eTC\bTC ISO~9 complete \eTC\eTR
+      \bTR\bTC \type{ru_transcript_de} \eTC\bTC German transcription for Russian \eTC\eTR
+      \bTR\bTC \type{ru_transcript_en} \eTC\bTC English transcription for Russian \eTC\eTR
+      \bTR\bTC \type{iso9_ocs}         \eTC\bTC == \type{all} plus non-ISO additions for Old (Church) Slavonic \eTC\eTR
+      \bTR\bTC \type{ocs}              \eTC\bTC so-called “scientific” transliteration for Old (Church) Slavonic\eTC\eTR
+      \bTR\bTC \type{ocs_gla}          \eTC\bTC so-called “scientific” transliteration for Old (Church) Slavonic / Glagolitic alphabet\eTC\eTR
+      \bTR\bTC \type{ru_cz}            \eTC\bTC Czech transcription for Russian\eTC\eTR
+      \bTR\bTC \type{ocs_cz}           \eTC\bTC Czech transcription for Old (Church) Slavonic\eTC\eTR
+      \bTR\bTC \type{gr}               \eTC\bTC transliteration for Greek \eTC\eTR
+      \bTR\bTC \type{gr_n}             \eTC\bTC transliteration for Greek obeying nasalizations \eTC\eTR
+  \eTABLEbody
+  \bTABLEfoot
+    \bTR\bTH mode \eTH\bTH description \eTH\eTR
+  \eTABLEfoot
+\eTABLE
+
+
+{\em Nota bene}: The description at this point only serves as a placeholder as the
+transliteration modes are discussed in detail in later in this document.
+
+Through the \type{hyphenate} argument it is possible to adjust the language
+that is used for hyphenation.
+Specifying \type{\setupTranslit[hyphenate=nl]} will let every transliterated
+part of the document be processed according to dutch rules, leaving the overall
+\type{\language[#1]} configuration unchanged for the rest of the content.
+
+The actual transliteration is done using the macro
+\type{\transliterate[#1]{#2}}.
+The second argument takes the raw string in the original language that we want
+to process, while the first, optional argument accepts local adjustments for
+\type{mode} and \type{hyphenate}.
+Thus, we would typeset one of Epicuros' sayings like this:
+{\setuptolerance[verytolerant]
+\starttyping
+\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν μετὰ ἀνάγκης}
+\stoptyping
+which yields \quotation{\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν
+μετὰ ἀνάγκης}} in the pdf output.
+}
+
 
 \chapter{Introduction}
 
+\hfil\framed[width=\hsize,align=left]{%
+  \inframed[bottomframe=on]{\it What's all this, then?}
+  \blank[medium]
+  {\sc Graham Chapman}
+}
+
+\noindentation  At the first glance, {\em transliteration} -- the accurate representation of letters from one
+alphabet in another -- seems obsolete after the advent of Unicode
+which made its way even into \TeX\ lately.
+Why not just go on and write down everything in the original script?
+But still there are lots of situations where transliteration is desirable,
+e.~g. some scholarly habits might prescribe it in the main text with citations in
+footnotes left in the original alphabet; or transliteration might alleviate
+comparison within one language that happens to be written in different scripts;
+finally, including text in a foreign script might be impossible if there is no
+appropriate that fits the main text.
+However, it is still most convenient for the writer to keep the
+untransliterated original in the document source as this allows for reusing it in
+another context where different transliterations rules might apply.
+The Transliterator module is meant to provide both: have the original in the
+source and a transliteration only in the final document.
+
+Another way of handling foreign languages is {\em transcription}.
+It is aims at producing some representation that does not rely on symbolisms
+alien to the language and thus to be at least \quotation{pronouncable}
+without further knowledge.
+As transcription methods are language specific and highly idiosyncratic they
+complicate the restoration of the original phrase and information may be lost.
+The Transliterator provides means of transcription as well but in most cases
+you should refrain from using them (\type{[mode=ru_transcript_en]},
+\type{[mode=ru_transcript_de]}). 
+
+For Cyrillic scripts the best quality is achieved using the standardized
+transliteration according to {\em ISO~9}.\footnote{\cite[authoryear][iso].}
+This method not only covers all contemporary languages that are written in
+a variety of Cyrillic but provides a bijective mapping on latin characters as
+well.
+Consequently, you can unambiguously revert the transliteration into
+its original form which was impossible with previous versions of ISO~9 because
+they contained several exceptions depending on the original language.
+Although fifteen years old it has not yet made its way into scholarly
+publications at large so it might not immediately look familiar.
+The diacritics are not identical to the so-called \quotation{scientific}
+transliteration used in Slavic studies but as long as your journal does not
+enforce its traditional method you should always prefer ISO~9
+(\type{[mode=ru]}, \type{[mode=ru_old]}, \type{[mode=all]}).
+
+But ISO~9, too, has its shortcomings.
+It has no definitions for historical form of the cyrillic script like 
+pre-XVIII-century Russian and Old (Church) Slavonic while those are covered by
+the scholarly transliterations.
+To amend the situation the Transliterator provides an extension to ISO~9 for
+Old Slavonic containing the glyphs 
+\startluacode
+local cnt, len = 0, 0 -- Wishing for a len() function that works on dictionaries as in python…
+for i,j in pairs(translit.ocs_add_low) do
+  len = len + 1
+end
+
+for k,v in pairs(translit.ocs_add_low) do
+  cnt = cnt + 1
+  context.bgroup() 
+    context.setupbodyfont({"cyrilice"})
+    context(k)
+  context.egroup() 
+  if cnt < len -1 then
+    context(", ") 
+  elseif cnt < len then
+    context("\\ and ")
+  end
+end
+\stopluacode
+\ taken from the scientific transliteration (\type{[mode=iso9_ocs]}).
+If you prefer more coherency you might want to use pure \quotation{scientific}
+tranliteration (\type{[mode=ocs]}).
+This method is complemented by \type{[mode=ocs_gla]}, the only option the
+Transliterator offers for the Glagolitic alphabet; they can be use consistently
+along each other as they were taken from the same
+book.\footnote{\cite[authoryear][aks] p.~77 \cite[url][aks].}
+
+As far as I know there is no standardized transliteration for Greek so I had to
+resort to the one that is used in scholarly literature.
+Its main drawback is that it has no representation for diacritics apart from
+(rough) breathing, but it respects specific rules for diphthongs and vowels in
+initial positions (\type{[mode=gr]}).
+There is one alternative mode for those who prefer their {\em γ} phonetically
+resolved to /{\em n}/ before velars ({\em γ}, {\em κ}, {\em χ} and {\em ξ};
+\type{[mode=gr_n]}).
+
+Concerning the hyphenation within transliterated passages the default is set to
+to \type{[hyphenate=cs]} (Czech) which produces reasonable results when using
+\type{all}, \type{iso9_ocs} or \type{ru_cz}.
+For stuff like the English and German transcription use their respective native
+hyphenation.\footnote{%
+  You'll have to specify this through \type{\setupTranslit}
+  or locally because the default hyphenation is {\em not} the same as your
+  documents'.
+}
+However, as their is no hyphenation pattern I know of that closely resembles the
+transliteration of Greek you might have to resort to putting \type{\discretionary}
+hyphens when line breaking does not satisfy.
+
+
+\chapter[ex]{Examples}
+\section{Cyrillic scripts}
+\subsection{ISO~9 and derivatives}
+Several transliteration rules are either strictly ISO~9 compliant (\type{ru}, \type{ru_old}, \type{all}) 
+or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{%
+  Unfortunately there are not yet any language files for some of them so please
+  excuse the inadequate hyphenation in these cases.%
+}
+
+\trlex{ru}{ru}{cz}{computer-modern-unicode}{%
+  Transliteration rules for the contemporary russian alphabet.%
+}{%
+  В~ворота гостиницы губернского города NN въехала довольно красивая рессорная
+  небольшая бричка, в~какой ездят холостяки: отставные подполковники,
+  штабс-капитаны, помещики, имеющие около сотни душ крестьян, — словом, все те, 
+  которых называют господами средней руки. 
+  В~бричке сидел господин, не красавец, но и~не дурной наружности, ни слишком
+  толст, ни слишком тонок; нельзя сказать, чтобы стар, однако ж~и~не так чтобы
+  слишком молод.
+}
+
+\trlex{ru_old}{ru}{cz}{computer-modern-unicode}{%
+  With aditional characters for pre-1981 Russian orthography (100~per cent ISO~9).%
+}{%
+  А~сведется віра, убьютъ сотцкого в~селѣ, ино тебѣ взяти полтіна, а~не
+  сотцкого,
+  ино четырѣ гривны, а~намъ віръ не таити в~Новѣгородѣ; а~о~убіствѣ віръ нѣтъ.
+  А~что волости, честны король, новгородцкіе, ино тебѣ не держати своими мужи,
+  а~держати мужми новогородцкими.
+  А~что пошлина в~Торжку и~на Волоцѣ, тівунъ свои держати на своеи чясті,
+  а~Новугороду на своеи чясти посадника держаті.
+  А~се волости новогородцкіе: Волокъ со всѣми волостми, Торжокъ, Бѣжіці,
+  Городець
+  Палець, Шіпинъ, Мелеця, Егна, Заволочье, Тиръ, Пермь, Печера, Югра, Вологда
+  с~волостмі.
+}
+
+\trlex{all}{uk}{cz}{computer-modern-unicode}{%
+  The complete cyrillic mapping from ISO~9; transliterating Belarusian.%
+}{%
+  Беларуская мова, мова беларусаў, уваход\-зіць у~сям’ю індаеўрапейскіх моў, яе
+  славянскай групы і~ўсходнеславянскіх моваў падгрупы, на якой размаўляюць
+  у~Беларусі і~па ўсім свеце, галоўным чынам у~Расіі, Украіне, Польшчы.
+  Б.~м. пад\-зяляе шмат граматычных і~лексічных уласцівасцяў з~іншымі
+  ўсходнеславянскімі мовамі (гл. таксама: Іншыя назвы беларускай мовы і~Узаемныя
+  ўплывы усходнеславянскіх моваў).
+}
+
+\trlex{all}{uk}{cz}{computer-modern-unicode}{%
+  The complete cyrillic mapping from ISO~9; transliterating Ukrainian.%
+}{%
+  Украї́нська мова (застарілі назви -- руська мова, проста мова […]) --
+  слов'янсь\-ка мова, державна в~Україні та одна з~трьох «офіційних мов на рівних
+  засадах» у~неви\-знаній Придністровській Молдавсь\-кій Республіці.
+  За різними оцінками загалом у~світі українською мовою гово\-рить від 41~млн.
+  до 45~млн. осіб, вона входить до третього десятка найпоши\-ре\-ні\-ших мов
+  світу.
+}
+
+\trlex{all}{ru}{cz}{computer-modern-unicode}{%
+  The complete cyrillic mapping from ISO~9; transliterating Serbian.%
+}{%
+  Српски језик је један од словенских језика из породице индоевропских језика.
+  Први писани споменици у~српској редакцији старословенског језика потичу из XI
+  и~XII века.
+  Српски језик је стандардни језик у~службеној употреби у~Србији, Босни
+  и~Херцеговини и~Црној Гори, а~у~употреби је и~у другим земљама гдје живе
+  Срби, међу осталима и~у~Хрватској.
+}
+
+\trlex{iso9_ocs}{ru}{cz}{cyrilice}{%
+  Transliteration rules according to ISO~9 with additions for Old (Church)
+  Slavonic.%
+}{%
+  Что сѧ дѣѥтѣ по вѣремьнемь~: то ѿидето по вѣрьмьнемь~: приказано бѹдѣте
+  добрымъ людѣмъ~: а любо грамотою ѹтвѣрдѧть~: како то бѹдѣте всемъ вѣдомъ~:
+  или кто посль живыи ѡстанѣть сѧ~: того лѣт͠ коли алъбрахтъ~: влд͠ка ризкии
+  ѹмьрлъ~: ѹздѹмалъ кнѧзѣ смольнескыи~: мьстиславъ~: двд͠въ сн͠ъ~: прислалъ въ
+  ригѹ своѥго лѹчьшего попа~: ѥрьмея~: и съ нимь ѹмьна мѹжа пантелья~:
+  исвоѥго горда смольнеска~: та два была послъмь ѹ ризѣ~: из ригы ѥхали на
+  гочкыи берьго~: тамо твердити миръ~:
+}
+
+\subsection{“Scientific” transliteration}
+These transliterations are widely used among scholars, mainly linguists and, to
+a lesser extent, historians.
+They comprise large character sets in order to represent the original text
+adequately and facilitate comparison of texts of the same language written in
+different scripts; they are not, however, as easily reversible as ISO~9.
+
+\trlex{ocs}{ru}{cs}{cyrilice}{%
+  Transliteration for Old Slavonic used in Slavic studies, taken from the
+  excellent book of \cite [authoryear][aks].%
+}{%
+  Се начнемъ повѣсть сию. 
+  По потопѣ . первиє снве Ноєви . раздѣлиша землю . Симъ . Хамъ . Афетъ . и~ꙗсѧ
+  въстокъ . Симови Персида . Ватрь . тоже  и~до Индикиꙗ в~долготу и~в~ширину [и
+  до Нирокоуриа] ꙗкоже рещи ѿ въстока и~до полуденьꙗ . и~Суриꙗ .
+  и~Индиа по Єфратъ рѣку . Вавилонъ . Кордуна . Асурѧне . Мисопотамира .
+  Аравиꙗ . старѣишаꙗ . Єлмаисъ . Инди . Равиꙗ . на всѧ  Д.
+}
+
+\trlex{ru_cz}{ru}{cs}{computer-modern-unicode}{%
+  Czech phonetic transcription for contemporary Russian.%
+}{%
+  Прошло семь лет после 12-го года. Взволнованное историческое море Европы
+  улеглось в свои берега. Оно казалось затихшим; но таинственные силы,
+  двигающие человечество (таинственные потому, что законы, определяющие их
+  движение, неизвестны нам), продолжали свое действие.
+  Несмотря на то, что поверхность исторического моря казалась неподвижною, так
+  же непрерывно, как движение времени, двигалось человечество. Слагались,
+  разлагались различные группы людских сцеплений; подготовлялись причины
+  образования и~разложения государств, перемещений народов.%
+}
+
+\trlex{ocs_cz}{ru}{cs}{cyrilice}{%
+  Czech phonetic transcription for Old Slavonic (superset of the corresponding
+  Russian transcription).
+}{%
+  Убьеть мужь мужа, то мьстить брату брата, или сынови отца, любо отцю сына,
+  или братучаду, любо сестрину сынови; аще не будеть кто мьстіѧ, то 40 гривенъ
+  ꙁа голову; аще будеть русинъ, любо гридинъ, любо купчина, любо іѧбетник, любо
+  мечникъ, аще иꙁъгои будеть, любо словенинъ, то 40 гривенъ положити ꙁа нь.
+}
+
+
+\subsection{Legacy national transcriptions}
+At the moment there are tables for old school transcription into three
+languages: English (via \type{ru_transcript_en}), German
+(\type{ru_transcript_de}) and Czech (\type{ocs_cz}).
+Only one of them (Czech) is recommendable as the others are to a~large extent
+irreversible and lack efficiency;
+at least the German one is almost unreadable if used with
+strings longer than two words.
+As we have the bijective ISO~9 mapping at hand there should be reason at all to
+use any of them unless when threatened by ignorants.
+
+\trlex{ru_transcript_en}{ru}{en}{computer-modern-unicode}{%
+  English transcription for contemporary Russian.%
+}{%
+  Прошло семь лет после 12-го года. Взволнованное историческое море Европы
+  улеглось в свои берега. Оно казалось затихшим; но таинственные силы,
+  двигающие человечество (таинственные потому, что законы, определяющие их
+  движение, неизвестны нам), продолжали свое действие.
+  Несмотря на то, что поверхность исторического моря казалась неподвижною, так
+  же непрерывно, как движение времени, двигалось человечество. Слагались,
+  разлагались различные группы людских сцеплений; подготовлялись причины
+  образования и~разложения государств, перемещений народов.%
+}
+
+\trlex{ru_transcript_de}{ru}{deo}{computer-modern-unicode}{%
+  German transcription for contemporary Russian.\footnote{%
+    Following \cite[authoryear][duden] p.~82; all the canonical rules are
+    implemented save one: {\em -его} and {\em -ого} should resolve to {\em
+    -ewo} and {\em -owo} respectively iff genitive endings.
+    As this is a grammatical rather than graphetical criterion writing  a
+    substitution algorithm would amount to do natural language parsing.
+    To make things worse this rule is phonetically confused as it would not
+    take care of other contexts where {\em г} in those patterns is articulated
+    as /{\em v}/ like for instance in {\em сегодня} (which is a historical
+    genitive, though …).
+    So even if this could be implemented it would not be advisable to use such
+    a rule.%
+  }%
+}{%
+  Прошло семь лет после 12-го года. Взволнованное историческое море Европы
+  улеглось в свои берега. Оно казалось затихшим; но таинственные силы,
+  двигающие человечество (таинственные потому, что законы, определяющие их
+  движение, неизвестны нам), продолжали свое действие.
+  Несмотря на то, что поверхность исторического моря казалась неподвижною, так
+  же непрерывно, как движение времени, двигалось человечество. Слагались,
+  разлагались различные группы людских сцеплений; подготовлялись причины
+  образования и~разложения государств, перемещений народов.%
+}
+
+\section{Glagolitic}
+\trlex{ocs_gla}{ru}{cs}{hlaholice}{%
+  “Scientific” transliteration for Old Slavonic written in the Glagolitic
+  alphabet as used in \cite[authoryear][aks].%
+}{%
+  [ⰲⰾ] 
+  ⰰⰴⱏⰻⰽⱁ ⱍⰽ҃ⱏ ⱄⰻ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸ[ⱁⱃⰻⱅ] 
+  ⰻ ⰸⰰⰽⱁⱀⱏ ⰿⰰⱀⰰⱄⱅⱏⰻⱃⱏⱄⰽⰻ: [ⰻⰶⰵ] 
+  ⱅⱏⰻ ⱆⱄⱅⰰⰲⰻ჻ Ⱃⰵⱍⰵ ⰶⰵ ⰻⰳⱆⰿ[ⱏ] [ⱀⱏ] 
+  ⰽⰰⰽⱁ ⱈⱁⱋⰵⱅⱏ ⱃⰰⰸⱁⱃⰻⱅⰻ ⰸⰰⰽ[ⱁⱀⱏ] 
+  [.] [ⰰ] ⰵⱄⱅⱏ· ⱍⱃⱏⰲⰻ⁖ ⰻ [ⰿ] [..........] 
+  [..] ⰿⱏ ⱀⰵ ⰿⱁⰶⰵⰿⱏ ⱄⰵⰳⱁ ⱅⱃⱏⱂⱑⱅ[ⰻ] 
+  [ⰴⰰ] ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⰻⰿⱑⰻ ⱄⱏⰴⱑ჻ ⰰ ⰿⱏⰻ ⱁ 
+  [ⱅⰻ]ⰴⰵⰿⱏ: ⰾⱆⰱⱁ ⱄⰵⰳⱁ ⱂⱆⱄⱅⰻ: ⰴⰰ ⱁⱅ 
+  [ⰻⰴ]ⰵⱅⱏ ⰻⰶⰵ ⰵⱄⱅⱏ ⱂⱃⰻⱎⱏⰾⱏ: ⱄ[ⰵ] 
+}
+
+\section{Greek}
+The Transliterator offers two modes for handling Greek: \type{gr} and
+\type{gr_en}.
+They differ only on one aspect.
+\type{gr} basically transliterates the canonical Greek alphabet as well as the
+special glyphs Digamma, Quoppa and Sampi.
+\type{gr_n} behaves exactly the same way except that nasalization is observed
+such that \type{γ+[γ|κ]} yields \type{n+[g|k]}.
+
+\trlex{gr}{agr}{de}{computer-modern-unicode}{%
+  Transliteration for Greek -- standard.
+}{%
+  οἴνῳ δὲ κάρτα προσκέαται, καί σφι οὐκ ἐμέσαι ἔξεστι, οὐκὶ οὐρῆσαι ἀντίον
+  ἄλλου.
+  ταῦτα μέν νυν οὕτω φυλάσσεται, μεθυσκόμενοι δὲ ἐώθασι βουλεύεσθαι τὰ
+  σπουδαιέστατα τῶν πρηγμάτων: τὸ δ᾽ ἂν ἅδῃ σφι βουλευομένοισι, τοῦτο τῇ
+  ὑστεραίῃ νήφουσι προτιθεῖ ὁ στέγαρχος, ἐν τοῦ ἂν ἐόντες βουλεύωνται, καὶ ἢν
+  μὲν
+  ἅδῃ καὶ νήφουσι, χρέωνται αὐτῷ, ἢν δὲμὴ ἅδῃ, μετιεῖσι. τὰ δ᾽ ἂν νήφοντες
+  προβουλεύσωνται, μεθυσκόμενοι ἐπιδιαγινώσκουσι.
+}%
+
+\trlex{gr_n}{agr}{de}{computer-modern-unicode}{%
+  Transliteration for Greek -- alternative respecting nasalization.
+}{%
+  ταῦτα καὶ νεωτέρῳ καὶ πρεσβυτέρῳ ὅτῳ ἂν ἐντυγχάνω ποιήσω, καὶ ξένῳ καὶ ἀστῷ,
+  μᾶλλον δὲ τοῖς ἀστοῖς, ὅσῳ μου ἐγγυτέρω ἐστὲ γένει.
+}%
+   
+
 \chapter{References}
+%\cite[authoryear][iso]
+\nocite[duden]
+\nocite[bornemann]
+\nocite[kirschbaum]
+\nocite[iso]
+\nocite[aks]
+\nocite[dintb]
+\placepublications [criterium=all]
 
 \stoptext
 %   vim:ft=context
diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv
index fc07c1d..8f8cadf 100644
--- a/tex/context/third/transliterator/t-transliterator.mkiv
+++ b/tex/context/third/transliterator/t-transliterator.mkiv
@@ -11,7 +11,7 @@
 %D          email={pgesang at ix dot urz dot uni-heidelberg dot de}]
 %D This module is licensed under the conditions of the BSD license with 
 %D two clauses: http://www.freebsd.org/copyright/freebsd-license.html.
-%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.\newpage
+%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.
 
 \writestatus{loading}{Transliteration from non-Latin scripts}
 
@@ -766,6 +766,8 @@ translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp
 
 translit.ru_trsc_cz_add_low = {
   ["ѕ"] = "dz",
+  ["з"] = "z",
+  ["ꙁ"] = "z",
   ["і"] = "ï",
   ["ѹ"] = "u",
   ["ѡ"] = "ō",
@@ -790,6 +792,8 @@ translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit
 
 translit.ru_trsc_cz_add_upp = {
   ["Ѕ"] = "Dz",
+  ["З"] = "Z",
+  ["Ꙁ"] = "Z",
   ["І"] = "Ï",
   ["Ѹ"] = "U",
   ["Ѡ"] = "Ō",
@@ -824,6 +828,15 @@ translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit
 -- Source p. 77 of
 -- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf
 
+-----------------------------------------------------------------------
+-- Lowercase and uppercase letter Uk -- “scientific transliteration” --
+-----------------------------------------------------------------------
+
+translit.ocs_uk = {
+  ["oу"] = "u",
+  ["оу"] = "u",
+  ["Оу"] = "U",
+}
 -----------------------------------------------------------------------------
 -- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” --
 -----------------------------------------------------------------------------
@@ -853,7 +866,6 @@ translit.ocs_low = {
   ["р"] = "r",
   ["с"] = "s",
   ["т"] = "t",
-  ["оу"] = "u",
   ["ѹ"] = "u",
   ["ꙋ"] = "u",
   ["ф"] = "f",
@@ -915,7 +927,6 @@ translit.ocs_upp = {
   ["Р"] = "R",
   ["С"] = "S",
   ["Т"] = "T",
-  ["Оу"] = "U",
   ["Ѹ"] = "U",
   ["ꙋ"] = "U",
   ["Ф"] = "F",
@@ -1803,61 +1814,64 @@ end
 function translit.show_tab (tab)
   -- Output a transliteration table, nicely formatted with natural tables.
   local cnt = 0
-  context ("\\setupTABLE[r][each]   [style=\\tfx,align=center] ")
-  context ("\\setupTABLE[c][each]   [frame=off]")
-  context ("\\setupTABLE[r][each]   [frame=off]")
-  context ("\\setupTABLE[c][first]  [style=italic]")
-  context ("\\setupTABLE[r][first]  [style=bold,topframe=on,bottomframe=on]")
-  context ("\\setupTABLE[r][last]   [style=bold,topframe=on,bottomframe=on]")
-  context ("\\bTABLE [split=yes,option=stretch]")
-  context ("\\bTABLEhead\\bTR"..
-    "\\bTH Number\\eTH" ..
-    "\\bTH letters\\eTH" .. 
-    "\\bTH n\\eTH" .. 
-    "\\bTH replacement\\eTH"..
-    "\\bTH n\\eTH" .. 
-    "\\bTH bytes\\eTH"..
-    "\\bTH repl. bytes"..
-    "\\eTH\\eTR\\eTABLEhead ")
-  context("\\bTABLEbody")
-  for key, val in pairs(tab) do
-    local strempty = function (s) 
-      -- Some characters might not be replaced but removed, others might be
-      -- multi-char sequences.
-      if #s == 0 then return "nil"
-      else 
-        local i = 0
-        local r = ""
-        -- The following loop could be replaced by checking the string length with utf.len(s) …
-        repeat
-          i = i + 1
-          if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end
-        until (false)
-        return r
+  context.setupTABLE({"r"}, {"each"},     {style="\\tfx", align="center"})
+  context.setupTABLE({"c"}, {"each"},     {frame="off"})
+  context.setupTABLE({"r"}, {"each"},     {frame="off"})
+  context.setupTABLE({"c"}, {"first"},    {style="italic"})
+  context.setupTABLE({"r"}, {"first"},    {style="bold", topframe="on", bottomframe="on"})
+  context.setupTABLE({"r"}, {"last"},     {style="bold", topframe="on", bottomframe="on"})
+  context.bTABLE({split="yes", option="stretch"})
+    context.bTABLEhead()
+      context.bTR()
+        context.bTH() context("number")         context.eTH()
+        context.bTH() context("letters")        context.eTH()
+        context.bTH() context("n")              context.eTH()
+        context.bTH() context("replacement")    context.eTH()
+        context.bTH() context("n")              context.eTH()
+        context.bTH() context("bytes")          context.eTH()
+        context.bTH() context("repl. bytes")    context.eTH()
+      context.eTR()
+    context.eTABLEhead()
+    context.bTABLEbody()
+      for key, val in pairs(tab) do
+        local strempty = function (s) 
+          -- Some characters might not be replaced but removed, others might be
+          -- multi-char sequences.
+          if #s == 0 then return "nil"
+          else 
+            local i = 0
+            local r = ""
+            -- The following loop could be replaced by checking the string length with utf.len(s) …
+            repeat
+              i = i + 1
+              if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " "  end
+            until (false)
+            return r
+          end
+        end
+        cnt = cnt + 1
+        context.bTR()
+          context.bTC() context(cnt)           context.eTC()
+          context.bTC() context(key)           context.eTC()
+          context.bTC() context(utf.len(key))  context.eTC()
+          context.bTC() context(val)           context.eTC()
+          context.bTC() context(utf.len(val))  context.eTC()
+          context.bTC() context(strempty(key)) context.eTC()
+          context.bTC() context(strempty(val)) context.eTC()
+        context.eTR()
       end
-    end
-    cnt = cnt + 1
-    context ("\\bTR\\bTC " .. cnt .. "\\eTC")
-    context ("\\bTC " .. 
-      key .. "\\eTC\\bTC " .. 
-      utf.len(key) .. "\\eTC\\bTC " .. 
-      val .. "\\eTC\\bTC " ..  
-      utf.len(val) .. "\\eTC\\bTC " ..  
-      strempty(key) .. "\\eTC\\bTC " ..  
-      strempty(val) .. "\\eTC")
-    context ("\\eTR ")
-  end
-  context("\\eTABLEbody")
-  context ("\\bTABLEfoot\\bTR"..
-    "\\bTC Number\\eTC" ..
-    "\\bTC letters\\eTC" .. 
-    "\\bTC n\\eTC" .. 
-    "\\bTC replacement\\eTC"..
-    "\\bTC n\\eTC" .. 
-    "\\bTC bytes\\eTC"..
-    "\\bTC repl. bytes"..
-    "\\eTC\\eTR\\eTABLEfoot ")
-  context ("\\eTABLE ")
+    context.eTABLEbody()
+    context.bTABLEfoot() context.bTR()
+      context.bTC() context("number")       context.eTC()
+      context.bTC() context("letters")      context.eTC()
+      context.bTC() context("n")            context.eTC()
+      context.bTC() context("replacement")  context.eTC()
+      context.bTC() context("n")            context.eTC()
+      context.bTC() context("bytes")        context.eTC()
+      context.bTC() context("repl. bytes")  context.eTC()
+      context.eTR()
+    context.eTABLEfoot()
+  context.eTABLE()
 end
 
 \stopluacode
@@ -1891,6 +1905,18 @@ function translit.transliterate (method, text)
   if method == "ru" then
     translit.add_table(repl_tab, translit.ru_upp)
     translit.add_table(repl_tab, translit.ru_low)
+  elseif method == "ru_old" then
+    translit.add_table(repl_tab, translit.ru_upp)
+    translit.add_table(repl_tab, translit.ru_low) 
+    translit.add_table(repl_tab, translit.ru_old_upp) 
+    translit.add_table(repl_tab, translit.ru_old_low) 
+  elseif method == "all" then
+    translit.add_table(repl_tab, translit.ru_upp)
+    translit.add_table(repl_tab, translit.ru_low) 
+    translit.add_table(repl_tab, translit.ru_old_upp) 
+    translit.add_table(repl_tab, translit.ru_old_low) 
+    translit.add_table(repl_tab, translit.non_ru_upp)
+    translit.add_table(repl_tab, translit.non_ru_low) 
   elseif method == "ru_transcript_de" then
     text = translit.subst (text, translit.ru_trsc_jrule)
     text = translit.subst (text, translit.ru_trsc_irule)
@@ -1910,18 +1936,6 @@ function translit.transliterate (method, text)
     translit.add_table(repl_tab, translit.ru_trsc_en_low_first)
     translit.add_table(repl_tab, translit.ru_trsc_en_upp)
     translit.add_table(repl_tab, translit.ru_trsc_en_low)
-  elseif method == "ru_old" then
-    translit.add_table(repl_tab, translit.ru_upp)
-    translit.add_table(repl_tab, translit.ru_low) 
-    translit.add_table(repl_tab, translit.ru_old_upp) 
-    translit.add_table(repl_tab, translit.ru_old_low) 
-  elseif method == "all" then
-    translit.add_table(repl_tab, translit.ru_upp)
-    translit.add_table(repl_tab, translit.ru_low) 
-    translit.add_table(repl_tab, translit.ru_old_upp) 
-    translit.add_table(repl_tab, translit.ru_old_low) 
-    translit.add_table(repl_tab, translit.non_ru_upp)
-    translit.add_table(repl_tab, translit.non_ru_low) 
   elseif method == "iso9_ocs" then
     translit.add_table(repl_tab, translit.ru_upp)
     translit.add_table(repl_tab, translit.ru_low) 
@@ -1930,6 +1944,9 @@ function translit.transliterate (method, text)
     translit.add_table(repl_tab, translit.ocs_add_upp)
     translit.add_table(repl_tab, translit.ocs_add_low) 
   elseif method == "ocs" then
+    translit.add_table(repl_tab, translit.ocs_uk) 
+    text = translit.subst (text, repl_tab)
+    repl_tab = {}
     translit.add_table(repl_tab, translit.ocs_low) 
     translit.add_table(repl_tab, translit.ocs_upp) 
   elseif method == "ocs_gla" then
author	Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de>	2010-03-02 14:06:04 +0100
committer	Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de>	2010-03-02 14:06:04 +0100
commit	f0765800a77932422dcdb02766a06ddd6c7541d1 (patch)
tree	d7b6b908284d6d65bc276eb8370c6e45031c6eaa
parent	fea0bcccf81692ab4e7bcb2a2e60793a2247daff (diff)
download	transliterator-f0765800a77932422dcdb02766a06ddd6c7541d1.tar.gz