From 814b93d12bc9a0792b150527495ece0847a343fc Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 21 Nov 2021 19:22:18 +0100 Subject: reorganize source tree --- .../third/transliterator/transliterator.lua | 283 --------------------- 1 file changed, 283 deletions(-) delete mode 100644 tex/context/third/transliterator/transliterator.lua (limited to 'tex/context/third/transliterator/transliterator.lua') diff --git a/tex/context/third/transliterator/transliterator.lua b/tex/context/third/transliterator/transliterator.lua deleted file mode 100644 index c101ec5..0000000 --- a/tex/context/third/transliterator/transliterator.lua +++ /dev/null @@ -1,283 +0,0 @@ -#!/usr/bin/env texlua --------------------------------------------------------------------------------- --- FILE: transliterator.lua --- USAGE: to be called by t-transliterator.mkiv --- DESCRIPTION: basic lua environment for the Transliterator module --- REQUIREMENTS: latest ConTeXt MkIV --- AUTHOR: Philipp Gesang (Phg), --- CREATED: 2010-12-23 22:12:31+0100 --------------------------------------------------------------------------------- --- - -thirddata = thirddata or { } -thirddata.translit = thirddata.translit or { } -local translit = thirddata.translit -translit.tables = translit.tables or { } -translit.methods = translit.methods or { } -translit.deficient_font = "no" -translit.parser_cache = { } - -local utf8 = unicode and unicode.utf8 or utf8 -local utf8byte = utf8.byte -local utf8len = utf8.len - --------------------------------------------------------------------------------- --- Predefining vowel lists --------------------------------------------------------------------------------- --- If you haven't heard of cyrillic scripts until now you might want to read --- at least the first 15 pages of --- http://www.uni-giessen.de/partosch/eurotex99/berdnikov2.pdf --- before you continue reading this file. -translit.ru_vowels = {"а", "е", "ё", "и", "й", "о", "у", "ы", "э", "ю", "я", - "А", "Е", "Ё", "И", "Й", "О", "У", "Ы", "Э", "Ю", "Я"} -translit.ru_consonants = {"б", "в", "г", "д", "ж", "з", "к", "л", "м", "н", - "п", "р", "с", "т", "ф", "х", "ц", "ч", "ш", "щ", - "Б", "В", "Г", "Д", "Ж", "З", "К", "Л", "М", "Н", - "П", "Р", "С", "Т", "Ф", "Х", "Ц", "Ч", "Ш", "Щ"} - --- Substitution tables are the very heart of the Transliterator. Due to the --- nature of languages and scripts exhaustive substitution is the simplest --- method for transliterations and transcriptions unless they are one-to-one --- mappings like those defined in ISO~9. --- --- To achieve better reusability we split the tables into segments, the most --- obvious being the \type{*_low} and \type{*_upp} variants for sets of lowercase --- and uppercase characters. Another set is constituted by e.~g. the --- \type{ru_old*} tables that allow adding transcription of historical --- characters if needed; by the way those are included in the default --- transliteration mode \type{ru_old}. - --- Tables can be found in separate Lua files. --- See {\tt --- trans_tables_glag.lua --- trans_tables_gr.lua --- trans_tables_iso9.lua --- trans_tables_scntfc.lua --- and --- trans_tables_trsc.lua.} - --------------------------------------------------------------------------------- --- Metatables allow for lazy concatenation. --------------------------------------------------------------------------------- - -do - -- This returns the Union of both key sets for the “+” operator. - -- The values of the first table will be updated (read: overridden) by - -- those given in the second. - local Dict_add = { - __add = function (dict_a, dict_b) - assert (type(dict_a) == "table" and type(dict_b) == "table") - local dict_result = setmetatable({}, Dict_add) - - for key, val in pairs(dict_a) do - dict_result[key] = val - end - - for key, val in pairs(dict_b) do - dict_result[key] = val - end - return dict_result - end - } - - translit.make_add_dict = function (dict) - return setmetatable(dict, Dict_add) - end -end - --------------------------------------------------------------------------------- --- Auxiliary Functions --------------------------------------------------------------------------------- - --- Generate a rule pattern from hash table. -do - local P, R, V = lpeg.P, lpeg.R, lpeg.V - - -- multi-char rules first - translit.addrules = function (dict, rules) - local by_length, occurring_lengths = { }, { } - for chr, _ in next, dict do - local l = utf8len(chr) - if not by_length[l] then - by_length[l] = { } - occurring_lengths[#occurring_lengths+1] = l - end - by_length[l][#by_length[l]+1] = chr - end - table.sort(occurring_lengths) - for i=#occurring_lengths, 1, -1 do - local l = occurring_lengths[i] - for _, chr in next, by_length[l] do - rules = rules and rules + P(chr) or P(chr) - end - end - return rules - end - --- Modified version of Hans’s utf pattern (l-lpeg.lua). - - - translit.utfchar = P{ - V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", - - utf8next = R("\128\191"), - utf8one = R("\000\127"), - utf8two = R("\194\223") * V"utf8next", - utf8three = R("\224\239") * V"utf8next" * V"utf8next", - utf8four = R("\240\244") * V"utf8next" * V"utf8next" * V"utf8next", - } -end - --- We might want to have all the table data nicely formatted by \CONTEXT\ --- itself, here's how we'll do it. \type{translit.show_tab(t)} handles a --- single table \type{t}, builds a Natural TABLE out of its content and --- hands it down to the machine for typesetting. For debugging purposes it --- does not only print the replacement pairs but shows their code points as --- well. - --- handle the input chars and replacement values -local strempty = function (s) - if s == "" then return "nil" - else - -- add the unicode positions of the replacements (can be more - -- than one with composed diacritics - local i = 1 - local r = "" - repeat - r = r .. utf8byte(s,i) .. " " - i = i + 1 - until utf8byte(s,i) == nil - return r - end -end - -function translit.show_tab (tab) - -- Output a transliteration table, nicely formatted with natural tables. - -- Lots of calls to context() but as it’s only a goodie this doesn’t - -- really matter. - local cnt = 0 - context.setupTABLE({"r"}, {"each"}, {style="\\tfx", align="center"}) - context.setupTABLE({"c"}, {"each"}, {frame="off"}) - context.setupTABLE({"r"}, {"each"}, {frame="off"}) - context.setupTABLE({"c"}, {"first"}, {style="italic"}) - context.setupTABLE({"r"}, {"first"}, {style="bold", topframe="on", bottomframe="on"}) - context.setupTABLE({"r"}, {"last"}, {style="bold", topframe="on", bottomframe="on"}) - context.bTABLE({split="yes", option="stretch"}) - context.bTABLEhead() - context.bTR() - context.bTH() context("number") context.eTH() - context.bTH() context("letters") context.eTH() - context.bTH() context("n") context.eTH() - context.bTH() context("replacement") context.eTH() - context.bTH() context("n") context.eTH() - context.bTH() context("bytes") context.eTH() - context.bTH() context("repl. bytes") context.eTH() - context.eTR() - context.eTABLEhead() - context.bTABLEbody() - - for key, val in next,tab do - cnt = cnt + 1 - context.bTR() - context.bTC() context(cnt) context.eTC() - context.bTC() context(key) context.eTC() - context.bTC() context(string.len(key)) context.eTC() - context.bTC() context(val) context.eTC() - context.bTC() context(string.len(val)) context.eTC() - context.bTC() context(strempty(key)) context.eTC() - context.bTC() context(strempty(val)) context.eTC() - context.eTR() - end - - context.eTABLEbody() - context.bTABLEfoot() context.bTR() - context.bTC() context("number") context.eTC() - context.bTC() context("letters") context.eTC() - context.bTC() context("n") context.eTC() - context.bTC() context("replacement") context.eTC() - context.bTC() context("n") context.eTC() - context.bTC() context("bytes") context.eTC() - context.bTC() context("repl. bytes") context.eTC() - context.eTR() - context.eTABLEfoot() - context.eTABLE() -end - --- Having to pick out single tables for printing can be tedious, therefore we --- let Lua do the job in our stead. \type{translit.show_all_tabs()} calls --- \type{translit.show_tab} on every table that is registered with --- \type{translit.table} -- and uses its registered key as table heading. - -function translit.show_all_tabs () - environment.loadluafile ("trans_tables_iso9") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_scntfc") - environment.loadluafile ("trans_tables_sr") - environment.loadluafile ("trans_tables_trsc") - environment.loadluafile ("trans_tables_glag") - environment.loadluafile ("trans_tables_gr") - translit.gen_rules_en() - translit.gen_rules_de() - -- Output all translation tables that are registered within translit.tables. - -- This will be quite unordered. - context.chapter("Transliterator Showing All Tables") - for key, val in pairs(translit.tables) do - context.section(key) - translit.show_tab (val) - end -end - --- for internal use only - -translit.debug_count = 0 - -function translit.debug_next () - translit.debug_count = translit.debug_count + 1 - context("\\tfxx{\\bf translit debug msg. nr.~" .. translit.debug_count .. "}") -end - --------------------------------------------------------------------------------- --- User-level Function --------------------------------------------------------------------------------- - --- \type{translit.transliterate(m, t)} constitutes the --- metafunction that is called by the \type{\transliterate} command. --- It loads the transliteration tables according to \type{method} and calls the --- corresponding function. - --- Those supposedly are the most frequently used so it won’t hurt to preload --- them. The rest will be loaded on request. -environment.loadluafile ("trans_tables_iso9") - -function translit.transliterate (method, text) - local methods = translit.methods - if not methods[method] then -- register tables and method - if method == "ru_transcript_de" or - method == "ru_transcript_de_exp" or -- experimental lpeg - method == "ru_transcript_en" or - method == "ru_transcript_en_exp" or - method == "ru_cz" or - method == "ocs_cz" then - environment.loadluafile ("trans_tables_trsc") - elseif method == "iso9_ocs" or - method == "iso9_ocs_hack" or - method == "ocs" or - method == "ocs_gla" then - environment.loadluafile ("trans_tables_scntfc") - elseif method:match("^sr_") then - environment.loadluafile ("trans_tables_sr") - elseif method:match("^bg_") then -- only bg_de for now - environment.loadluafile ("trans_tables_bg") - elseif method == "gr" or - method == "gr_n" then - environment.loadluafile ("trans_tables_gr") - end - end - - if translit.__script then - return methods[method](text) - end - context ( methods[method](text) ) -end - --- vim:sw=4:ts=4:expandtab:ft=lua -- cgit v1.2.3