diff options
author | Philipp Gesang <phg@phi-gamma.net> | 2013-03-27 21:20:18 +0100 |
---|---|---|
committer | Philipp Gesang <phg@phi-gamma.net> | 2013-03-27 21:20:18 +0100 |
commit | 0a227783234edc4f963b852e09a029f5ea3d87a8 (patch) | |
tree | 5c65d96358f87261e9b2c23b3c91f607745be2eb /doc | |
parent | dcb4e3daf95132d661aadf34e61ff1f62ab39eb3 (diff) | |
download | transliterator-release_2013-1.tar.gz |
reform manualrelease_2013-1
Diffstat (limited to 'doc')
-rw-r--r-- | doc/context/third/transliterator/transliterator.tex | 249 |
1 files changed, 129 insertions, 120 deletions
diff --git a/doc/context/third/transliterator/transliterator.tex b/doc/context/third/transliterator/transliterator.tex index a430ceb..d8e392c 100644 --- a/doc/context/third/transliterator/transliterator.tex +++ b/doc/context/third/transliterator/transliterator.tex @@ -37,6 +37,27 @@ after={\blank[line]} ] +\setuplist[chapter][ + alternative=c, + interaction=text, + style={\word\sc}, +] +\setuplist[section,subsection][ + alternative=a, + style=\tfx\italic, + interaction=text, + margin=2em, + numberstyle=, + textstyle=, + numberstyle=\tfx, +] + +\setuplist[subsection][ + margin=4em, +] + +\setuplistalternative + \definecharacterkerning [capitals] [factor=.05] \definefontfeature [default][default][ @@ -192,7 +213,7 @@ n=6, s={ISO~9}, ] -\editor[]{}[]{}{{ISO International Organization for Standardization}} +\editor[]{}[]{}{{{\sc iso} International Organization for Standardization}} \pubyear{1995} \title{Information and documentation -- Transliteration of Cyrillic characters into Latin characters -- Slavic and non-Slavic languages} \edition{2.} @@ -201,12 +222,23 @@ \stoppublication %============================================================================== -\setupframed[% - location=top,% - align={normal,verytolerant},% - frame=off,% + +\setupframed[ + frame=off, + align=normal, + location=top, ] +\defineframed[displayouter][ + location=top, + align={normal,verytolerant}, + frame=off, + style=\tfx, +] +\defineframed[displayinner][displayouter][ + offset=1ex, + width=.47\textwidth, +] \definenumber[excnt] \setnumber[excnt][1] @@ -215,33 +247,26 @@ % 1: mode; 2: hyphenate original; 3: hyphenate transliteration; % 4: font for original; 5: caption; 6: original text. \def\trlex#1#2#3#4#5#6{% - \setupinterlinespace[line=8pt]% - {\tfx% - \placefigure [force] [#1] {% - \type{[mode=#1,hyphenate=#3]}% - \hskip 1em - {\it #5}% - } {% - \framed{% - \framed[% - offset=1ex,% - width=.47\textwidth,% - ]{% - \setupbodyfont[#4]% - %\setuptolerance[verytolerant, stretch] - \setuptolerance[verytolerant] - \unskip\language[#2]#6\par - }% - \framed[% - offset=1ex,% - width=.47\textwidth,% - ]{% - \transliterate[mode=#1,hyphenate=#3]{#6\par}% - }% + \setuplocalinterlinespace[line=8pt]% + \startplacefigure [ + location=force, + title={\type{[mode=#1,hyphenate=#3]}\hskip 1em{\italic #5}} + ]% + \displayouter{% + \displayinner{% + \setupbodyfont[#4]% + \tfx + %\setuptolerance[verytolerant, stretch] + \setuptolerance[verytolerant] + \unskip\language[#2]#6\par }% - }% - \incrementnumber[excnt]% - } + \displayinner{% + \tfx + \transliterate[mode=#1,hyphenate=#3]{#6\par}% + } + } + \stopplacefigure + \incrementnumber[excnt]% } \defineframedtext[CenteredText][width=fit,frame=off,align=middle] @@ -269,11 +294,11 @@ {\setupbodyfont[19pt] {\em The} \blank [2*big] - {\tfc\sc Transliterator} + {\tfc\sc transliterator} \blank [2*big] {\em for \CONTEXT} - \blank [5*big] - {\tfc\sc Manual} + \blank [9*big] + {\tfa\sc manual} } \vfill \stopstandardmakeup @@ -284,7 +309,7 @@ \tfxx\ss\setupinterlinespace[small]% \startlines The {\em Transliterator} module and mini-manual, -by Philipp Gesang, Heidelberg. +by Philipp Gesang, Radebeul. Mail any patches or suggestions to {\tt philipp -dot- gesang -at- alumni -dot- uni-heidelberg -dot- de} @@ -326,34 +351,31 @@ It takes a comma separated list of two key-value pairs: \type{mode} and \type{hyphenate}. Through {\em mode} we specify the transliteration method. By the time of this writing this can be one of the following set: -\setupTABLE[c][each] [frame=off] -\setupTABLE[r][first] [style=bold,topframe=on,bottomframe=on] -\setupTABLE[r][last] [topframe=on,bottomframe=on] -\bTABLE[split=yes,stretch=yes] - \bTABLEhead - \bTR\bTH mode \eTH\bTH description \eTH\eTR - \eTABLEhead - \bTABLEbody - \bTR\bTC \type{all} \eTC\bTC ISO~9 complete \eTC\eTR - \bTR\bTC \type{bg_de} \eTC\bTC Bulgarian, German „scientific“ transliteration\eTC\eTR - \bTR\bTC \type{gr} \eTC\bTC transliteration for Greek \eTC\eTR - \bTR\bTC \type{gr_n} \eTC\bTC transliteration for Greek obeying nasalizations \eTC\eTR - \bTR\bTC \type{iso9_ocs} \eTC\bTC == \type{all} plus non-ISO additions for Old (Church) Slavonic \eTC\eTR - \bTR\bTC \type{ocs} \eTC\bTC “scientific” transliteration for Old (Church) Slavonic\eTC\eTR - \bTR\bTC \type{ocs_cz} \eTC\bTC Czech transcription for Old (Church) Slavonic\eTC\eTR - \bTR\bTC \type{ocs_gla} \eTC\bTC “scientific” transliteration for Old (Church) Slavonic / Glagolitic alphabet\eTC\eTR - \bTR\bTC \type{ru} \eTC\bTC ISO~9 Russian \eTC\eTR - \bTR\bTC \type{ru_cz} \eTC\bTC Czech transcription for Russian\eTC\eTR - \bTR\bTC \type{ru_old} \eTC\bTC ISO~9 Russian plus pre-1918 chars (the default)\eTC\eTR - \bTR\bTC \type{ru_transcript_de} \eTC\bTC German transcription for Russian \eTC\eTR - \bTR\bTC \type{ru_transcript_en} \eTC\bTC English transcription for Russian \eTC\eTR - \bTR\bTC \type{sr_tocy} \eTC\bTC Serbian, Latin to Cyrillic \eTC\eTR - \bTR\bTC \type{sr_tolt} \eTC\bTC Serbian, Cyrillic to Latin \eTC\eTR - \eTABLEbody - \bTABLEfoot - \bTR\bTH mode \eTH\bTH description \eTH\eTR - \eTABLEfoot -\eTABLE + +\startplacetable[location=top,title=Transliteration modes.] + \tfx + \starttabulate[|l|p|] + \HL + \NC mode \NC description \NC\NR + \HL + \NC \type{all} \NC {\sc iso}~9 complete \NC\NR + \NC \type{bg_de} \NC Bulgarian, German „scientific“ transliteration\NC\NR + \NC \type{gr} \NC transliteration for Greek \NC\NR + \NC \type{gr_n} \NC transliteration for Greek obeying nasalizations \NC\NR + \NC \type{iso9_ocs} \NC == \type{all} plus non-{\sc iso} additions for Old (Church) Slavonic \NC\NR + \NC \type{ocs} \NC “scientific” transliteration for Old (Church) Slavonic\NC\NR + \NC \type{ocs_cz} \NC Czech transcription for Old (Church) Slavonic\NC\NR + \NC \type{ocs_gla} \NC “scientific” transliteration for Old (Church) Slavonic / Glagolitic alphabet\NC\NR + \NC \type{ru} \NC {\sc iso}~9 Russian \NC\NR + \NC \type{ru_cz} \NC Czech transcription for Russian\NC\NR + \NC \type{ru_old} \NC {\sc iso}~9 Russian plus pre-1918 chars (the default)\NC\NR + \NC \type{ru_transcript_de} \NC German transcription for Russian \NC\NR + \NC \type{ru_transcript_en} \NC English transcription for Russian \NC\NR + \NC \type{sr_tocy} \NC Serbian, Latin to Cyrillic \NC\NR + \NC \type{sr_tolt} \NC Serbian, Cyrillic to Latin \NC\NR + \HL + \stoptabulate +\stopplacetable {\em Nota bene}: The description at this point only serves as a placeholder as the @@ -368,7 +390,7 @@ part of the document be processed according to dutch rules, leaving the overall Another argument, \type{deficient_font} can be used in combination with the modes \type{all}, \type{ru_old} and \type{iso9_ocs}. It lets you circumvent the deficiency that some -fonts show concerning the characters that ISO~9 assigns to +fonts show concerning the characters that {\sc iso}~9 assigns to cyrillic “ь” and “ъ”. Set it to {\em true} to enable it. The actual transliteration is done using the macro @@ -383,7 +405,7 @@ Thus, we would typeset one of Epicuros' sayings like this: μετὰ ἀνάγκης} \stoptyping \noindentation which yields \quotation{\transliterate[mode=gr]{κακὸν ἀνάγκη, ἀλλ' οὐδεμία ἀνάγκη ζῆν -μετὰ ἀνάγκης}} in the pdf output. +μετὰ ἀνάγκης}} in the {\sc pdf} output. } Alternatively there is an environment, \type{\starttransliterate[#1]}, as well, that takes the same arguments. @@ -453,31 +475,33 @@ you should refrain from using them (\type{[mode=ru_transcript_en]}, \type{[mode=ru_transcript_de]}). For Cyrillic scripts the best quality is achieved using the standardized -transliteration according to {\em ISO~9}.\footnote{\cite[authoryear][iso].} +transliteration according to {\sc iso~9}.\footnote{\cite[authoryear][iso].} This method not only covers all contemporary languages that are written in a variety of Cyrillic but provides a bijective mapping on latin characters as well. Consequently, you can unambiguously revert the transliteration into -its original form which was impossible with previous versions of ISO~9 because +its original form which was impossible with previous versions of {\sc +iso}~9 because they contained several exceptions depending on the original language. Although fifteen years old it has not yet made its way into scholarly publications at large so it might not immediately look familiar.\footnote{ A hasty glance at the latest issues of around 20~journals in a local library - revealed that 2~of them actually are using ISO~9, these are {\em Przegląd + revealed that 2~of them actually are using {\sc iso}~9, these are {\em Przegląd wschodni} as of Nr. X, 3 (2008) and {\em Kwartalnik historyczny} as of CXVI, 3 (2009); the latter even contains a table on p.~218 showing a subset of the - ISO~9 transliteration rules. + {\sc iso}~9 transliteration rules. } The diacritics are not identical to the \quotation{scientific} transliteration used in Slavic studies but as long as your editor does not -enforce its traditional method you should always prefer ISO~9 +enforce its traditional method you should always prefer {\sc iso}~9 (\type{[mode=ru]}, \type{[mode=ru_old]}, \type{[mode=all]}). -But ISO~9, too, has its shortcomings. +But {\sc iso}~9, too, has its shortcomings. It has no definitions for historical forms of the cyrillic script like pre-XVIII-century Russian and Old (Church) Slavonic while those are covered by the scholarly transliterations. -To amend the situation the Transliterator provides an extension to ISO~9 for +To amend the situation the Transliterator provides an extension to {\sc +iso}~9 for Old Slavonic containing the glyphs \startluacode local translit = thirddata.translit @@ -540,41 +564,21 @@ are processed first. \setupfloats[spacebefore=small,spaceafter=small] -\placetable[left][none]{% - Processing time for corpus Evgenij Onegin according - to GNU time(1) and the \CONTEXT\ stats. -}{ - \setupTABLE[c][each] [frame=off] - \setupTABLE[r][first] [style=bold,topframe=on,bottomframe=on] - \setupTABLE[r][each] [frame=off,topframe=off,bottomframe=off] - \setupTABLE[r][last] [frame=off,topframe=off,bottomframe=on] - \setupTABLE[c][each] [align=middle] - \setupTABLE[c][first] [align=left] - \setupTABLE[c][2] [alignmentcharacter={.},aligncharacter=yes,align=middle] - \setupTABLE[c][3] [alignmentcharacter={.},aligncharacter=yes,align=middle] - \bTABLE[split=no,stretch=yes] - \bTABLEhead - \bTR - \bTH mode \eTH\bTH time(1) in $s$ \eTH\bTH \CONTEXT \eTH - - \eTR - \eTABLEhead - \bTABLEbody - \tfx - \bTR - \bTC <none> \eTC\bTC 8.98 \eTC\bTC 8.82 \eTC - \eTR\bTR - \bTC \type{all} \eTC\bTC 8.37 \eTC\bTC 8.25 \eTC - \eTR\bTR - \bTC \type{ru_cz} \eTC\bTC 8.61 \eTC\bTC 8.48 \eTC - \eTR\bTR - \bTC \type{ru_transcript_en} \eTC\bTC 9.26 \eTC\bTC 9.10 \eTC - \eTR\bTR - \bTC \type{ru_transcript_de} \eTC\bTC 14.83 \eTC\bTC 14.71 \eTC - \eTR - \eTABLEbody - \eTABLE -} +\startplacetable[location=left,title={ + Processing time for corpus {\language[cs]Evgenij Onegin} according to + GNU time(1) and the \CONTEXT\ stats. +}] + \starttabulate[|l|cg(.)|cg(.)|] + \HL%····················································% + \NC mode \NC time(1) in $s$ \NC \CONTEXT \NC \NR + \NC <none> \NC 8.98 \NC 8.82 \NC \NR + \NC \type{all} \NC 8.37 \NC 8.25 \NC \NR + \NC \type{ru_cz} \NC 8.61 \NC 8.48 \NC \NR + \NC \type{ru_transcript_en} \NC 9.26 \NC 9.10 \NC \NR + \NC \type{ru_transcript_de} \NC 14.83 \NC 14.71 \NC \NR + \HL%····················································% + \stoptabulate +\stopplacetable \setuptolerance[tolerant] Following suggestions from the mailing list, the Transliterator uses {\em LPeg} when substituting. @@ -605,11 +609,13 @@ memory.\footnote{% \chapter[ex]{Examples} \section{Cyrillic scripts} -\subsection{ISO~9 and derivatives} -Several transliteration rules are either strictly ISO~9 compliant (\type{ru}, \type{ru_old}, \type{all}) -or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{% - Unfortunately there are not yet any language files for some of them so please - excuse the inadequate hyphenation in these cases.% +\subsection{{\sc iso}~9 and derivatives} + +Several transliteration rules are either strictly {\sc iso}~9 compliant +(\type{ru}, \type{ru_old}, \type{all}) or contain {\sc iso}~9 as a +subset (\type{iso9_ocs}).\footnote{% + Unfortunately \CONTEXT\ still lacks language files for some of them + so please excuse the inadequate hyphenation in these cases.% } \trlex{ru}{ru}{cs}{computer-modern-unicode}{% @@ -625,7 +631,8 @@ or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{% } \trlex{ru_old}{ru}{cs}{computer-modern-unicode}{% - With aditional characters for pre-1981 Russian orthography (100~per cent ISO~9).% + With aditional characters for pre-1981 Russian orthography (100~per + cent {\sc iso}~9).% }{% А~сведется віра, убьютъ сотцкого в~селѣ, ино тебѣ взяти полтіна, а~не сотцкого, @@ -641,7 +648,7 @@ or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{% } \trlex{all}{ru}{cs}{computer-modern-unicode}{% - The complete cyrillic mapping from ISO~9; transliterating Belarusian.% + The complete cyrillic mapping from {\sc iso}~9; transliterating Belarusian.% }{% Беларуская мова, мова беларусаў, уваходзіць у~сям’ю індаеўрапейскіх моў, яе славянскай групы і~ўсходнеславянскіх моваў падгрупы, на якой размаўляюць @@ -652,7 +659,7 @@ or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{% } \trlex{all}{uk}{cs}{computer-modern-unicode}{% - The complete cyrillic mapping from ISO~9; transliterating Ukrainian.% + The complete cyrillic mapping from {\sc iso}~9; transliterating Ukrainian.% }{% Украї́нська мова (застарілі назви -- руська мова, проста мова […]) -- слов'янська мова, державна в~Україні та одна з~трьох «офіційних мов на рівних @@ -663,7 +670,7 @@ or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{% } \trlex{all}{ru}{cs}{computer-modern-unicode}{% - The complete cyrillic mapping from ISO~9; transliterating Serbian.% + The complete cyrillic mapping from {\sc iso}~9; transliterating Serbian.% }{% Српски језик је један од словенских језика из породице индоевропских језика. Први писани споменици у~српској редакцији старословенског језика потичу из XI @@ -674,7 +681,7 @@ or contain ISO~9 as a subset (\type{iso9_ocs}).\footnote{% } \trlex{iso9_ocs}{ru}{cs}{cyrilice}{% - Transliteration rules according to ISO~9 with additions for Old (Church) + Transliteration rules according to {\sc iso}~9 with additions for Old (Church) Slavonic.% }{% Что сѧ дѣѥтѣ по вѣремьнемь~: то ѿидето по вѣрьмьнемь~: приказано бѹдѣте @@ -691,7 +698,8 @@ These transliterations are widely used among scholars, mainly linguists and, to a lesser extent, historians. They comprise large character sets in order to represent the original text adequately and facilitate comparison of texts of the same language written in -different scripts; they are not, however, as easily reversible as ISO~9. +different scripts; they are not, however, as easily reversible as {\sc +iso}~9. \trlex{ocs}{ru}{cs}{cyrilice}{% Transliteration for Old Slavonic used in Slavic studies, taken from the @@ -773,7 +781,8 @@ alphabets are \type{sr_tolt} and \type{sr_tocy}. \subsection{Bulgarian} \trlex{bg_de}{bg}{cs}{computer-modern-unicode}{% - German scientific transliteration for Bulgarian (based on old ISO~9 standard).% + German scientific transliteration for Bulgarian (based on old {\sc + iso}~9 standard).% }{% Българският език е индоевропейски език от групата на южнославянските езици. Той е официалният език на Република @@ -786,7 +795,7 @@ languages: English (via \type{ru_transcript_en}), German (\type{ru_transcript_de}) and Czech (\type{ocs_cz}). At least the German one is almost unreadable if used with strings longer than two words. -As we have the bijective ISO~9 mapping at hand there should be no reason at all +As we have the bijective {\sc iso}~9 mapping at hand there should be no reason at all to use any of them. \trlex{ru_transcript_en}{ru}{en}{computer-modern-unicode}{% |