diff options
author | Marius <mariausol@gmail.com> | 2010-07-04 15:32:09 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2010-07-04 15:32:09 +0300 |
commit | 85b7bc695629926641c7cb752fd478adfdf374f3 (patch) | |
tree | 80293f5aaa7b95a500a78392c39688d8ee7a32fc /scripts/context/lua/mtx-babel.lua | |
download | context-85b7bc695629926641c7cb752fd478adfdf374f3.tar.gz |
stable 2010-05-24 13:10
Diffstat (limited to 'scripts/context/lua/mtx-babel.lua')
-rw-r--r-- | scripts/context/lua/mtx-babel.lua | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/scripts/context/lua/mtx-babel.lua b/scripts/context/lua/mtx-babel.lua new file mode 100644 index 000000000..01e2ba4b2 --- /dev/null +++ b/scripts/context/lua/mtx-babel.lua @@ -0,0 +1,430 @@ +if not modules then modules = { } end modules ['mtx-babel'] = { + version = 1.002, + comment = "companion to mtxrun.lua", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- data tables by Thomas A. Schmitz + +scripts = scripts or { } +scripts.babel = scripts.babel or { } + +do + + local converters = { } + + -- greek + + local replace_01 = { -- <' * | + a = "ᾅ", + h = "ᾕ", + w = "ᾥ", + } + + local replace_02 = { -- >' * | + a = "ᾄ", + h = "ᾔ", + w = "ᾤ", + } + + local replace_03 = { -- <` * | + a = "ᾃ", + h = "ᾓ", + w = "ᾣ", + } + + local replace_04 = { -- >` * | + a = "ᾂ", + h = "ᾒ", + w = "ᾢ", + } + + local replace_05 = { -- <~ * | + a = "ᾇ", + h = "ᾗ", + w = "ᾧ", + } + + local replace_06 = { -- >~ * | + a = "ᾆ", + h = "ᾖ", + w = "ᾦ" + } + + local replace_07 = { -- "' * + i = "ΐ", + u = "ΰ", + } + + local replace_08 = { -- "` * + i = "ῒ", + u = "ῢ", + } + + local replace_09 = { -- "~ * + i = "ῗ", + u = "ῧ", + } + + local replace_10 = { -- <' * + a = "ἅ", + e = "ἕ", + h = "ἥ", + i = "ἵ", + o = "ὅ", + u = "ὕ", + w = "ὥ", + A = "Ἅ", + E = "Ἕ", + H = "Ἥ", + I = "Ἵ", + O = "Ὅ", + U = "Ὕ", + W = "Ὥ", + } + + local replace_11 = { -- >' * + a = "ἄ", + e = "ἔ", + h = "ἤ", + i = "ἴ", + o = "ὄ", + u = "ὔ", + w = "ὤ", + A = "Ἄ", + E = "Ἔ", + H = "Ἤ", + I = "Ἴ", + O = "Ὄ", + U = "῎Υ", + W = "Ὤ", + } + + local replace_12 = { -- <` * + a = "ἃ", + e = "ἓ", + h = "ἣ", + i = "ἳ", + o = "ὃ", + u = "ὓ", + w = "ὣ", + A = "Ἃ", + E = "Ἒ", + H = "Ἣ", + I = "Ἳ", + O = "Ὃ", + U = "Ὓ", + W = "Ὣ", + } + + local replace_13 = { -- >` * + a = "ἂ", + e = "ἒ", + h = "ἢ", + i = "ἲ", + o = "ὂ", + u = "ὒ", + w = "ὢ", + A = "Ἂ", + E = "Ἒ", + H = "Ἢ", + I = "Ἲ", + O = "Ὂ", + U = "῍Υ", + W = "Ὢ", + } + + local replace_14 = { -- <~ * + a = "ἇ", + h = "ἧ", + i = "ἷ", + u = "ὗ", + w = "ὧ", + A = "Ἇ", + H = "Ἧ", + I = "Ἷ", + U = "Ὗ", + W = "Ὧ", + } + + local replace_15 = { -- >~ * + a = "ἆ", + h = "ἦ", + i = "ἶ", + u = "ὖ", + w = "ὦ", + A = "Ἆ", + H = "Ἦ", + I = "Ἶ", + U = "῏Υ", + W = "Ὦ", + } + + local replace_16 = { -- ' * | + a = "ᾴ", + h = "ῄ", + w = "ῴ", + } + + local replace_17 = { -- ` * | + a = "ᾲ", + h = "ῂ", + w = "ῲ", + } + + local replace_18 = { -- ~ * | + a = "ᾷ", + h = "ῇ", + w = "ῷ" + } + + local replace_19 = { -- ' * + a = "ά", + e = "έ", + h = "ή", + i = "ί", + o = "ό", + u = "ύ", + w = "ώ", + ["'"] = "’", + } + + local replace_20 = { -- ` * + a = "ὰ", + e = "ὲ", + h = "ὴ", + i = "ὶ", + o = "ὸ", + u = "ὺ", + w = "ὼ", + } + + local replace_21 = { -- ~ * + a = "ᾶ", + h = "ῆ", + i = "ῖ", + u = "ῦ", + w = "ῶ", + } + + local replace_22 = { -- < * + a = "ἁ", + e = "ἑ", + h = "ἡ", + i = "ἱ", + o = "ὁ", + u = "ὑ", + w = "ὡ", + r = "ῥ", + A = "Ἁ", + E = "Ἑ", + H = "Ἡ", + I = "Ἱ", + O = "Ὁ", + U = "Ὑ", + W = "Ὡ", + R = "Ῥ", + } + + local replace_23 = { -- > * + a = "ἀ", + e = "ἐ", + h = "ἠ", + i = "ἰ", + o = "ὀ", + u = "ὐ", + w = "ὠ", + A = "Ἀ", + E = "Ἐ", + H = "Ἠ", + I = "Ἰ", + O = "Ὀ", + U = "᾿Υ", + W = "Ὠ", + } + + local replace_24 = { -- * | + a = "ᾳ", + h = "ῃ", + w = "ῳ", + } + + local replace_25 = { -- " * + i = "ϊ", + u = "ϋ", + } + + local replace_26 = { -- * + a = "α", + b = "β", + g = "γ", + d = "δ", + e = "ε", + z = "ζ", + h = "η", + j = "θ", + i = "ι", + k = "κ", + l = "λ", + m = "μ", + n = "ν", + x = "ξ", + o = "ο", + p = "π", + r = "ρ", + s = "σ", + c = "ς", + t = "τ", + u = "υ", + f = "φ", + q = "χ", + y = "ψ", + w = "ω", + A = "Α", + B = "Β", + G = "Γ", + D = "Δ", + E = "Ε", + Z = "Ζ", + H = "Η", + J = "Θ", + I = "Ι", + K = "Κ", + L = "Λ", + M = "Μ", + N = "Ν", + X = "Ξ", + O = "Ο", + P = "Π", + R = "Ρ", + S = "Σ", + T = "Τ", + U = "Υ", + F = "Φ", + Q = "Χ", + Y = "Ψ", + W = "Ω", + [";"] = "·", + ["?"] = ";", + } + + local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs + + local skips_01 = P("\\") * R("az", "AZ")^1 + local skips_02 = P("[") * (1- S("[]"))^1 * P("]") + + local greek_01 = (P("<'") * Cs(1) * P('|')) / replace_01 + local greek_02 = (P(">'") * Cs(1) * P('|')) / replace_02 + local greek_03 = (P("<`") * Cs(1) * P('|')) / replace_03 + local greek_04 = (P(">`") * Cs(1) * P('|')) / replace_04 + local greek_05 = (P("<~") * Cs(1) * P('|')) / replace_05 + local greek_06 = (P(">~") * Cs(1) * P('|')) / replace_06 + local greek_07 = (P('"\'') * Cs(1) ) / replace_07 + local greek_08 = (P('"`') * Cs(1) ) / replace_08 + local greek_09 = (P('"~') * Cs(1) ) / replace_09 + local greek_10 = (P("<'") * Cs(1) ) / replace_10 + local greek_11 = (P(">'") * Cs(1) ) / replace_11 + local greek_12 = (P("<`") * Cs(1) ) / replace_12 + local greek_13 = (P(">`") * Cs(1) ) / replace_13 + local greek_14 = (P("<~") * Cs(1) ) / replace_14 + local greek_15 = (P(">~") * Cs(1) ) / replace_15 + local greek_16 = (P("'") * Cs(1) * P('|')) / replace_16 + local greek_17 = (P("`") * Cs(1) * P('|')) / replace_17 + local greek_18 = (P("~") * Cs(1) * P('|')) / replace_18 + local greek_19 = (P("'") * Cs(1) ) / replace_19 + local greek_20 = (P("`") * Cs(1) ) / replace_20 + local greek_21 = (P("~") * Cs(1) ) / replace_21 + local greek_22 = (P("<") * Cs(1) ) / replace_22 + local greek_23 = (P(">") * Cs(1) ) / replace_23 + local greek_24 = (Cs(1) * P('|') ) / replace_24 + local greek_25 = (P('"') * Cs(1) ) / replace_25 + local greek_26 = (Cs(1) ) / replace_26 + + local skips = + skips_01 + skips_02 + + local greek = + greek_01 + greek_02 + greek_03 + greek_04 + greek_05 + + greek_06 + greek_07 + greek_08 + greek_09 + greek_10 + + greek_11 + greek_12 + greek_13 + greek_14 + greek_15 + + greek_16 + greek_17 + greek_18 + greek_19 + greek_20 + + greek_21 + greek_22 + greek_23 + greek_24 + greek_25 + + greek_26 + + local spacing = S(" \n\r\t") + local startgreek = P("\\startgreek") + local stopgreek = P("\\stopgreek") + local localgreek = P("\\localgreek") + local lbrace = P("{") + local rbrace = P("}") + + local documentparser = Cs((skips + greek + 1)^0) + + local contextgrammar = Cs ( P { "scan", + ["scan"] = (V("global") + V("local") + skips + 1)^0, + ["global"] = startgreek * ((skips + greek + 1)-stopgreek )^0 , + ["local"] = localgreek * V("grouped"), + ["grouped"] = spacing^0 * lbrace * (V("grouped") + skips + (greek - rbrace))^0 * rbrace, + } ) + + converters['greek'] = { + document = documentparser, + context = contextgrammar, + } + + -- lpeg.print(parser): 254 lines + + function scripts.babel.convert(filename) + if filename and filename ~= empty then + local data = io.loaddata(filename) or "" + if data ~= "" then + local language = environment.argument("language") or "" + if language ~= "" then + local converter = converters[language] + if converter then + local structure = environment.argument("structure") or "document" + converter = converter[structure] + if converter then + logs.simple("converting '%s' using language '%s' with structure '%s'", filename, language, structure) + data = converter:match(data) + local newfilename = filename .. ".utf" + io.savedata(newfilename, data) + logs.simple("converted data saved in '%s'", newfilename) + else + logs.simple("unknown structure '%s' language '%s'", structure, language) + end + else + logs.simple("no converter for language '%s'", language) + end + else + logs.simple("provide language") + end + else + logs.simple("no data in '%s'",filename) + end + end + end + + --~ print(contextgrammar:match [[ + --~ oeps abg \localgreek{a} + --~ \startgreek abg \stopgreek \oeps + --~ oeps abg \localgreek{a{b}\oeps g} + --~ ]]) + +end + +logs.extendbanner("Babel Input To UTF Conversion 1.20",true) + +messages.help = [[ +--language=string conversion language (e.g. greek) +--structure=string obey given structure (e.g. 'document', default: 'context') +--convert convert babel codes into utf +]] + +if environment.argument("convert") then + scripts.babel.convert(environment.files[1] or "") +else + logs.help(messages.help) +end |