From 07ded7bc27e19ad200c5e14fd632a7ae8a36ac82 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 24 Dec 2010 00:31:40 +0100 Subject: moved lua stuff from main file to separate file --- .../third/transliterator/transliterator.lua | 220 +++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 tex/context/third/transliterator/transliterator.lua (limited to 'tex/context/third/transliterator/transliterator.lua') diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua new file mode 100644 index 0000000..1bcbb32 --- /dev/null +++ b/tex/context/third/transliterator/transliterator.lua @@ -0,0 +1,220 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +-- FILE: transliterator.lua +-- USAGE: to be called by t-transliterator.mkiv +-- DESCRIPTION: basic lua environment for the Transliterator module +-- REQUIREMENTS: latest ConTeXt MkIV +-- AUTHOR: Philipp Gesang (Phg), +-- CREATED: 2010-12-23 22:12:31+0100 +-------------------------------------------------------------------------------- +-- + +translit = translit or {} +translit.tables = translit.tables or {} + +-------------------------------------------------------------------------------- +-- Predefining vowel lists +-------------------------------------------------------------------------------- +-- If you haven't heard of cyrillic scripts until now you might want to read +-- at least the first 15 pages of +-- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf +-- before you continue reading this file. +translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", + "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} +translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", + "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", + "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н", + "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} + +-- Substitution tables are the very heart of the Transliterator. Due to the +-- nature of languages and scripts exhaustive substitution is the simplest +-- method for transliterations and transcriptions unless they are one-to-one +-- mappings like those defined in ISO~9. +-- +-- To achieve better reusability we split the tables into segments, the most +-- obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase +-- and uppercase characters. Another set is constituted by e.~g. the +-- \type{ru_old*} tables that allow adding transcription of historical +-- characters if needed; by the way those are included in the default +-- transliteration mode \type{ru_old}. + +-- Tables can be found in separate Lua files. +-- See {\tt +-- trans_tables_glag.lua +-- trans_tables_gr.lua +-- trans_tables_iso9.lua +-- trans_tables_scntfc.lua +-- and +-- trans_tables_trsc.lua.} + +-- Metatables allow for lazy concatenation. + +do + -- This returns the Union of both key sets for the “+” operator. + -- The values of the first table will be updated (read: overridden) by those given in the second. + local Dict_add = { + __add = function (dict_a, dict_b) + assert (type(dict_a) == "table" and type(dict_b) == "table") + local dict_result = setmetatable({}, Dict_add) + + for key, val in pairs(dict_a) do + dict_result[key] = val + end + + for key, val in pairs(dict_b) do + dict_result[key] = val + end + return dict_result + end + } + + function translit.make_add_dict(dict) + return setmetatable(dict, Dict_add) + end +end + +-- We might want to have all the table data nicely formatted by \CONTEXT\ +-- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a +-- single table \type{t}, builds a Natural TABLE out of its content and +-- hands it down to the machine for typesetting. For debugging purposes it +-- does not only print the replacement pairs but shows their code points as +-- well. + +-- handle the input chars and replacement values +local strempty = function (s) + if s == "" then return "nil" + else + -- add the unicode positions of the replacements (can be more + -- than one with composed diacritics + local i = 1 + local r = "" + repeat + r = r .. utf.byte(s,i) .. " " + i = i + 1 + until utf.byte(s,i) == nil + return r + end +end + +--function translit.show_tab (tab_name) + --local tab = translit[tab_name] +function translit.show_tab (tab) + -- Output a transliteration table, nicely formatted with natural tables. + -- Lots of calls to context() but as it’s only a goodie this doesn’t + -- really matter. + local cnt = 0 + context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"}) + context.setupTABLE({"c"}, {"each"}, {frame="off"}) + context.setupTABLE({"r"}, {"each"}, {frame="off"}) + context.setupTABLE({"c"}, {"first"}, {style="italic"}) + context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"}) + context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"}) + context.bTABLE({split="yes", option="stretch"}) + context.bTABLEhead() + context.bTR() + context.bTH() context("number") context.eTH() + context.bTH() context("letters") context.eTH() + context.bTH() context("n") context.eTH() + context.bTH() context("replacement") context.eTH() + context.bTH() context("n") context.eTH() + context.bTH() context("bytes") context.eTH() + context.bTH() context("repl. bytes") context.eTH() + context.eTR() + context.eTABLEhead() + context.bTABLEbody() + + for key, val in next,tab do + cnt = cnt + 1 + context.bTR() + context.bTC() context(cnt) context.eTC() + context.bTC() context(key) context.eTC() + context.bTC() context(string.len(key)) context.eTC() + context.bTC() context(val) context.eTC() + context.bTC() context(string.len(val)) context.eTC() + context.bTC() context(strempty(key)) context.eTC() + context.bTC() context(strempty(val)) context.eTC() + context.eTR() + end + + context.eTABLEbody() + context.bTABLEfoot() context.bTR() + context.bTC() context("number") context.eTC() + context.bTC() context("letters") context.eTC() + context.bTC() context("n") context.eTC() + context.bTC() context("replacement") context.eTC() + context.bTC() context("n") context.eTC() + context.bTC() context("bytes") context.eTC() + context.bTC() context("repl. bytes") context.eTC() + context.eTR() + context.eTABLEfoot() + context.eTABLE() +end + +-- Having to pick out single tables for printing can be tedious, therefore we +-- let Lua do the job in our stead. \type{translit.show_all_tabs()} calls +-- \type{translit.show_tab} on every table that is registered with +-- \type{translit.table} -- and uses its registered key as table heading. + +function translit.show_all_tabs () + environment.loadluafile ("trans_tables_iso9") + environment.loadluafile ("trans_tables_trsc") + environment.loadluafile ("trans_tables_scntfc") + environment.loadluafile ("trans_tables_trsc") + environment.loadluafile ("trans_tables_glag") + environment.loadluafile ("trans_tables_gr") + translit.gen_rules_en() + translit.gen_rules_de() + -- Output all translation tables that are registered within translit.tables. + -- This will be quite unordered. + context.chapter("Transliterator Showing All Tables") + for key, val in pairs(translit.tables) do + context.section(key) + translit.show_tab (val) + end +end + + +-- \type{translit.transliterate(m, t)} constitutes the +-- metafunction that is called by the \type{\transliterate} command. +-- It loads the transliteration tables according to \type{method} and calls the +-- corresponding function. + +function translit.transliterate (method, text) + if method == "ru" or + method == "ru_old" or + method == "ru_old_jer_hack" or + method == "all" + then + environment.loadluafile ("trans_tables_iso9") + text = translit.iso9 (method, text) + elseif method == "ru_transcript_de" or + method == "ru_transcript_de_exp" or -- experimental lpeg + method == "ru_transcript_en" or + method == "ru_transcript_en_sub" or -- old multiple substitution + method == "ru_cz" or + method == "ocs_cz" + then + environment.loadluafile ("trans_tables_trsc") + text = translit.transcript (method, text) + elseif method == "iso9_ocs" or + method == "iso9_ocs_hack" or + method == "ocs" or + method == "ocs_gla" + then + environment.loadluafile ("trans_tables_scntfc") + text = translit.scientific (method, text) + elseif method == "gr" or + method == "gr_n" + then + environment.loadluafile ("trans_tables_gr") + text = translit.dogreek (method, text) + end + context (text) +end + +translit.debug_count = 0 +-- for internal use only +function translit.debug_next () + translit.debug_count = translit.debug_count + 1 + context("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}") +end -- cgit v1.2.3