summaryrefslogtreecommitdiff
path: root/tex
diff options
context:
space:
mode:
authorPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-03-02 14:06:04 +0100
committerPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-03-02 14:06:04 +0100
commitf0765800a77932422dcdb02766a06ddd6c7541d1 (patch)
treed7b6b908284d6d65bc276eb8370c6e45031c6eaa /tex
parentfea0bcccf81692ab4e7bcb2a2e60793a2247daff (diff)
downloadtransliterator-f0765800a77932422dcdb02766a06ddd6c7541d1.tar.gz
Continued documentation
Diffstat (limited to 'tex')
-rw-r--r--tex/context/third/transliterator/t-transliterator.mkiv155
1 files changed, 86 insertions, 69 deletions
diff --git a/tex/context/third/transliterator/t-transliterator.mkiv b/tex/context/third/transliterator/t-transliterator.mkiv
index fc07c1d..8f8cadf 100644
--- a/tex/context/third/transliterator/t-transliterator.mkiv
+++ b/tex/context/third/transliterator/t-transliterator.mkiv
@@ -11,7 +11,7 @@
%D email={pgesang at ix dot urz dot uni-heidelberg dot de}]
%D This module is licensed under the conditions of the BSD license with
%D two clauses: http://www.freebsd.org/copyright/freebsd-license.html.
-%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.\newpage
+%D Substitute /OWNER/Philipp Gesang/; /YEAR/2010/.
\writestatus{loading}{Transliteration from non-Latin scripts}
@@ -766,6 +766,8 @@ translit.tables["Czech transcription uppercase"] = translit.ru_trsc_cz_upp
translit.ru_trsc_cz_add_low = {
["ѕ"] = "dz",
+ ["з"] = "z",
+ ["ꙁ"] = "z",
["і"] = "ï",
["ѹ"] = "u",
["ѡ"] = "ō",
@@ -790,6 +792,8 @@ translit.tables["Czech transcription for OCS and pre-1918 lowercase"] = translit
translit.ru_trsc_cz_add_upp = {
["Ѕ"] = "Dz",
+ ["З"] = "Z",
+ ["Ꙁ"] = "Z",
["І"] = "Ï",
["Ѹ"] = "U",
["Ѡ"] = "Ō",
@@ -824,6 +828,15 @@ translit.tables["Czech transcription for OCS and pre-1918 uppercase"] = translit
-- Source p. 77 of
-- http://www.schaeken.nl/lu/research/online/publications/akslstud/as2_03_kapitel_c.pdf
+-----------------------------------------------------------------------
+-- Lowercase and uppercase letter Uk -- “scientific transliteration” --
+-----------------------------------------------------------------------
+
+translit.ocs_uk = {
+ ["oу"] = "u",
+ ["оу"] = "u",
+ ["Оу"] = "U",
+}
-----------------------------------------------------------------------------
-- Lowercase pre-Peter cyrillic characters -- “scientific transliteration” --
-----------------------------------------------------------------------------
@@ -853,7 +866,6 @@ translit.ocs_low = {
["р"] = "r",
["с"] = "s",
["т"] = "t",
- ["оу"] = "u",
["ѹ"] = "u",
["ꙋ"] = "u",
["ф"] = "f",
@@ -915,7 +927,6 @@ translit.ocs_upp = {
["Р"] = "R",
["С"] = "S",
["Т"] = "T",
- ["Оу"] = "U",
["Ѹ"] = "U",
["ꙋ"] = "U",
["Ф"] = "F",
@@ -1803,61 +1814,64 @@ end
function translit.show_tab (tab)
-- Output a transliteration table, nicely formatted with natural tables.
local cnt = 0
- context ("\\setupTABLE[r][each] [style=\\tfx,align=center] ")
- context ("\\setupTABLE[c][each] [frame=off]")
- context ("\\setupTABLE[r][each] [frame=off]")
- context ("\\setupTABLE[c][first] [style=italic]")
- context ("\\setupTABLE[r][first] [style=bold,topframe=on,bottomframe=on]")
- context ("\\setupTABLE[r][last] [style=bold,topframe=on,bottomframe=on]")
- context ("\\bTABLE [split=yes,option=stretch]")
- context ("\\bTABLEhead\\bTR"..
- "\\bTH Number\\eTH" ..
- "\\bTH letters\\eTH" ..
- "\\bTH n\\eTH" ..
- "\\bTH replacement\\eTH"..
- "\\bTH n\\eTH" ..
- "\\bTH bytes\\eTH"..
- "\\bTH repl. bytes"..
- "\\eTH\\eTR\\eTABLEhead ")
- context("\\bTABLEbody")
- for key, val in pairs(tab) do
- local strempty = function (s)
- -- Some characters might not be replaced but removed, others might be
- -- multi-char sequences.
- if #s == 0 then return "nil"
- else
- local i = 0
- local r = ""
- -- The following loop could be replaced by checking the string length with utf.len(s) …
- repeat
- i = i + 1
- if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " " end
- until (false)
- return r
+ context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"})
+ context.setupTABLE({"c"}, {"each"}, {frame="off"})
+ context.setupTABLE({"r"}, {"each"}, {frame="off"})
+ context.setupTABLE({"c"}, {"first"}, {style="italic"})
+ context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"})
+ context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"})
+ context.bTABLE({split="yes", option="stretch"})
+ context.bTABLEhead()
+ context.bTR()
+ context.bTH() context("number") context.eTH()
+ context.bTH() context("letters") context.eTH()
+ context.bTH() context("n") context.eTH()
+ context.bTH() context("replacement") context.eTH()
+ context.bTH() context("n") context.eTH()
+ context.bTH() context("bytes") context.eTH()
+ context.bTH() context("repl. bytes") context.eTH()
+ context.eTR()
+ context.eTABLEhead()
+ context.bTABLEbody()
+ for key, val in pairs(tab) do
+ local strempty = function (s)
+ -- Some characters might not be replaced but removed, others might be
+ -- multi-char sequences.
+ if #s == 0 then return "nil"
+ else
+ local i = 0
+ local r = ""
+ -- The following loop could be replaced by checking the string length with utf.len(s) …
+ repeat
+ i = i + 1
+ if utf.byte(s,i) == nil then break else r = r .. utf.byte(s,i) .. " " end
+ until (false)
+ return r
+ end
+ end
+ cnt = cnt + 1
+ context.bTR()
+ context.bTC() context(cnt) context.eTC()
+ context.bTC() context(key) context.eTC()
+ context.bTC() context(utf.len(key)) context.eTC()
+ context.bTC() context(val) context.eTC()
+ context.bTC() context(utf.len(val)) context.eTC()
+ context.bTC() context(strempty(key)) context.eTC()
+ context.bTC() context(strempty(val)) context.eTC()
+ context.eTR()
end
- end
- cnt = cnt + 1
- context ("\\bTR\\bTC " .. cnt .. "\\eTC")
- context ("\\bTC " ..
- key .. "\\eTC\\bTC " ..
- utf.len(key) .. "\\eTC\\bTC " ..
- val .. "\\eTC\\bTC " ..
- utf.len(val) .. "\\eTC\\bTC " ..
- strempty(key) .. "\\eTC\\bTC " ..
- strempty(val) .. "\\eTC")
- context ("\\eTR ")
- end
- context("\\eTABLEbody")
- context ("\\bTABLEfoot\\bTR"..
- "\\bTC Number\\eTC" ..
- "\\bTC letters\\eTC" ..
- "\\bTC n\\eTC" ..
- "\\bTC replacement\\eTC"..
- "\\bTC n\\eTC" ..
- "\\bTC bytes\\eTC"..
- "\\bTC repl. bytes"..
- "\\eTC\\eTR\\eTABLEfoot ")
- context ("\\eTABLE ")
+ context.eTABLEbody()
+ context.bTABLEfoot() context.bTR()
+ context.bTC() context("number") context.eTC()
+ context.bTC() context("letters") context.eTC()
+ context.bTC() context("n") context.eTC()
+ context.bTC() context("replacement") context.eTC()
+ context.bTC() context("n") context.eTC()
+ context.bTC() context("bytes") context.eTC()
+ context.bTC() context("repl. bytes") context.eTC()
+ context.eTR()
+ context.eTABLEfoot()
+ context.eTABLE()
end
\stopluacode
@@ -1891,6 +1905,18 @@ function translit.transliterate (method, text)
if method == "ru" then
translit.add_table(repl_tab, translit.ru_upp)
translit.add_table(repl_tab, translit.ru_low)
+ elseif method == "ru_old" then
+ translit.add_table(repl_tab, translit.ru_upp)
+ translit.add_table(repl_tab, translit.ru_low)
+ translit.add_table(repl_tab, translit.ru_old_upp)
+ translit.add_table(repl_tab, translit.ru_old_low)
+ elseif method == "all" then
+ translit.add_table(repl_tab, translit.ru_upp)
+ translit.add_table(repl_tab, translit.ru_low)
+ translit.add_table(repl_tab, translit.ru_old_upp)
+ translit.add_table(repl_tab, translit.ru_old_low)
+ translit.add_table(repl_tab, translit.non_ru_upp)
+ translit.add_table(repl_tab, translit.non_ru_low)
elseif method == "ru_transcript_de" then
text = translit.subst (text, translit.ru_trsc_jrule)
text = translit.subst (text, translit.ru_trsc_irule)
@@ -1910,18 +1936,6 @@ function translit.transliterate (method, text)
translit.add_table(repl_tab, translit.ru_trsc_en_low_first)
translit.add_table(repl_tab, translit.ru_trsc_en_upp)
translit.add_table(repl_tab, translit.ru_trsc_en_low)
- elseif method == "ru_old" then
- translit.add_table(repl_tab, translit.ru_upp)
- translit.add_table(repl_tab, translit.ru_low)
- translit.add_table(repl_tab, translit.ru_old_upp)
- translit.add_table(repl_tab, translit.ru_old_low)
- elseif method == "all" then
- translit.add_table(repl_tab, translit.ru_upp)
- translit.add_table(repl_tab, translit.ru_low)
- translit.add_table(repl_tab, translit.ru_old_upp)
- translit.add_table(repl_tab, translit.ru_old_low)
- translit.add_table(repl_tab, translit.non_ru_upp)
- translit.add_table(repl_tab, translit.non_ru_low)
elseif method == "iso9_ocs" then
translit.add_table(repl_tab, translit.ru_upp)
translit.add_table(repl_tab, translit.ru_low)
@@ -1930,6 +1944,9 @@ function translit.transliterate (method, text)
translit.add_table(repl_tab, translit.ocs_add_upp)
translit.add_table(repl_tab, translit.ocs_add_low)
elseif method == "ocs" then
+ translit.add_table(repl_tab, translit.ocs_uk)
+ text = translit.subst (text, repl_tab)
+ repl_tab = {}
translit.add_table(repl_tab, translit.ocs_low)
translit.add_table(repl_tab, translit.ocs_upp)
elseif method == "ocs_gla" then