if not modules then modules = { } end modules ['mtx-chars'] = {
version = 1.001,
comment = "companion to mtxrun.lua",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
-- obsolete: --stix convert stix table to math table
local helpinfo = [[
mtx-chars
MkII Character Table Generators
0.10
generate xetx-*.tex (used by xetex)
generate pdfr-def.tex (used by pdftex)
generate entities table
]]
local application = logs.application {
name = "mtx-chars",
banner = "MkII Character Table Generators 0.10",
helpinfo = helpinfo,
}
local report = application.report
local format, gmatch, upper, lower = string.format, string.gmatch, string.upper, string.lower
local tonumber = tonumber
local concat = table.concat
local utfchar = utf.char
scripts = scripts or { }
scripts.chars = scripts.chars or { }
--~ local banner = [[
--~ -- filename : char-mth.lua
--~ -- comment : companion to char-mth.tex (in ConTeXt)
--~ -- author : Hans Hagen, PRAGMA-ADE, Hasselt NL
--~ -- license : see context related readme files
--~ -- comment : generated from data file downloaded from STIX website
--~
--~ if not versions then versions = { } end versions['char-mth'] = 1.001
--~ if not characters then characters = { } end
--~ ]]
--~
--~ function scripts.chars.stixtomkiv(inname,outname)
--~ if inname == "" then
--~ report("aquiring math data, invalid datafilename")
--~ end
--~ local f = io.open(inname)
--~ if not f then
--~ report("aquiring math data, invalid datafile")
--~ else
--~ report("aquiring math data, processing %s",inname)
--~ if not outname or outname == "" then
--~ outname = "char-mth.lua"
--~ end
--~ local classes = {
--~ N = "normal",
--~ A = "alphabetic",
--~ D = "diacritic",
--~ P = "punctuation",
--~ B = "binary",
--~ R = "relation",
--~ L = "large",
--~ O = "opening",
--~ C = "closing",
--~ F = "fence"
--~ }
--~ local valid, done = false, { }
--~ local g = io.open(outname,'w')
--~ g:write(banner)
--~ g:write(format("\ncharacters.math = {\n"))
--~ for l in f:lines() do
--~ if not valid then
--~ valid = l:find("AMS/TeX name")
--~ end
--~ if valid then
--~ local unicode = l:sub(2,6)
--~ if unicode:sub(1,1) ~= " " and unicode ~= "" and not done[unicode] then
--~ local mathclass, adobename, texname = l:sub(57,57) or "", l:sub(13,36) or "", l:sub(84,109) or ""
--~ texname, adobename = texname:gsub("[\\ ]",""), adobename:gsub("[\\ ]","")
--~ local t = { }
--~ if mathclass ~= "" then t[#t+1] = format("mathclass='%s'", classes[mathclass] or "unknown") end
--~ if adobename ~= "" then t[#t+1] = format("adobename='%s'", adobename ) end
--~ if texname ~= "" then t[#t+1] = format("texname='%s'" , texname ) end
--~ if #t > 0 then
--~ g:write(format("\t[0x%s] = { %s },\n",unicode, concat(t,", ")))
--~ end
--~ done[unicode] = true
--~ end
--~ end
--~ end
--~ if not valid then
--~ g:write("\t-- The data file is corrupt, invalid or maybe the format has changed.\n")
--~ report("aquiring math data, problems with data table")
--~ else
--~ report("aquiring math data, table saved in %s",outname)
--~ end
--~ g:write("}\n")
--~ g:close()
--~ f:close()
--~ end
--~ end
function scripts.chars.stixtomkiv(inname,outname)
report("we no longer use this options but use our own tables instead")
end
local banner_pdf_1 = [[
% filename : pdfr-def.tex
% comment : generated by mtxrun --script chars --pdf
% author : Hans Hagen, PRAGMA-ADE, Hasselt NL
% copyright: PRAGMA ADE / ConTeXt Development Team
% license : see context related readme files
%
]]
local banner_pdf_2 = [[
%
\endinput
]]
function scripts.chars.makepdfr()
local chartable = resolvers.findfile("char-def.lua") or ""
if chartable ~= "" then
dofile(chartable)
if characters and characters.data then
local f = io.open("pdfr-def.tex", 'w')
if f then
f:write(banner_pdf_1)
local cd = characters.data
local sd = table.sortedkeys(cd)
for i=1,#sd do
local char = cd[sd[i]]
if char.adobename then
f:write(format("\\pdfglyphtounicode{%s}{%04X}%%\n",char.adobename,char.unicodeslot))
end
end
f:write(banner_pdf_2)
f:close()
end
end
end
end
local banner_utf_module = [[
%% filename : %s
%% comment : generated by mtxrun --script chars --xtx
%% author : Hans Hagen, PRAGMA-ADE, Hasselt NL
%% copyright: PRAGMA ADE / ConTeXt Development Team
%% license : see context related readme files
]]
local banner_utf_mappings = [[
% lc/uc/catcode mappings
]]
local banner_utf_patch = [[
% patch needed for turkish
\setXTXcharcodes "201C "201C "201C
\setXTXcharcodes "201D "201D "201D
% patch needed for french
\setXTXcharcodes "2019 "2019 "2019
]]
local banner_utf_names = [[
% named characters mapped onto utf (\\char is needed for accents)
]]
local banner_utf_classes = [[
% some character classes for xetex; seems to be rather hard coded, these numbers
% and also a mix of several classes; here we do linebreaks
]]
local banner_utf_finish = [[
\endinput
]]
local xtxclasses = {
id = 1,
ex = 3,
is = 3,
cm = 256,
op = 2,
ns = 3,
cl = 3,
}
function scripts.chars.makeencoutf()
local chartable = resolvers.findfile("char-def.lua") or ""
if chartable ~= "" then
dofile(chartable)
local function open(name,banner)
local f = io.open(name,'w')
if f then
report("writing '%s'",name)
f:write(format(banner_utf_module,name))
f:write(banner)
f:write()
return f
end
end
local function close(f)
f:write(banner_utf_finish)
f:close()
end
local data = characters and characters.data
if data then
local list = table.sortedkeys(characters.data)
local f = open("xetx-utf.tex",banner_utf_mappings)
if f then
for i=1,#list do
local code = list[i]
if code <= 0xFFFF then
local chr = data[code]
local cc = chr.category
if cc == 'll' or cc == 'lu' or cc == 'lt' then
if not chr.lccode then chr.lccode = code end
if not chr.uccode then chr.uccode = code end
f:write(format('\\setXTXcharcodes "%05X "%05X "%05X %% %s\n',code,chr.lccode,chr.uccode,chr.description))
end
end
end
f:write("\n")
for i=1,#list do
local code = list[i]
local chr = data[code]
if chr and chr.range then
local cc = chr.category
if cc == 'lo' then
f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharcodes\\recurselevel\\recurselevel\\recurselevel}\n',code,chr.range))
end
end
end
f:write(banner_utf_patch)
close(f)
end
local f = open("xetx-chr.tex",banner_utf_names)
if f then
local length = 0
for i=1,#list do
local code = list[i]
if code > 0x5B and code <= 0xFFFF then
local chr = data[code]
if chr and #(chr.contextname or "") > length then
length = #chr.contextname
end
end
end
local template = "\\def\\%-".. length .. "s{\\char\"%05X } %% %s: %s\n"
for i=1,#list do
local code = list[i]
if code > 0x5B and code <= 0xFFFF then
local chr = data[code]
if chr and chr.contextname then
local ch = utfchar(code)
f:write(format(template, chr.contextname, code, chr.description, ch))
end
end
end
close(f)
end
local f = open("xetx-cls.tex",banner_utf_classes)
if f then
for k, v in next, xtxclasses do
f:write(format("\\defineXTXcharinjectionclass[lb:%s]\n",k))
end
f:write("\n")
local i_first, i_last, i_clb = nil, nil, nil
local function flush()
if i_first then
if i_first == i_last then
f:write(format('\\dosetXTXcharacterclass{"%05X}{lb:%s}\n',i_first,i_clb))
else
f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n',i_first,i_last,i_clb))
end
end
i_first, i_last, i_clb = nil, nil, nil
end
for i=1,#list do
local code = list[i]
local code_next = list[i+1]
local chr = data[code]
local chr_next = data[code_next]
local clb = chr and chr.linebreak
local lbc = xtxclasses[clb]
if not lbc then
flush()
elseif clb == i_clb then
if i_first then
i_last = code
else
i_first, i_last, i_clb = code, code, clb
end
else
flush()
i_first, i_last, i_clb = code, code, clb
end
end
flush()
f:write("\n")
for i=1,#list do
local code = list[i]
local chr = data[code]
if chr and chr.range then
local lbc = chr.linebreak
if xtxclasses[lbc] then
f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n',code,chr.range,lbc))
end
end
end
close(f)
end
end
end
end
local entityfiles = {
"http://www.w3.org/2003/entities/2007/w3centities-f.ent",
"http://www.w3.org/2003/entities/2007/htmlmathml-f.ent",
}
function scripts.chars.xmlentities()
local done = { }
local entities = { "local entities = utilities.storage.allocate {" }
for i=1,#entityfiles do
local f = entityfiles[i]
local s = url.hashed(f)
local b = file.basename(s.path)
local n = resolvers.findfile(b)
local data = io.loaddata(n)
for name, value in gmatch(data,'') do
if not done[name] then
done[name] = true
local str, hex
local low = lower(name)
if name == "newline" then
-- let's forget about that one
elseif name == "lt" then
str, hex = "<", format("%s %05X",hex,c)
elseif name == "gt" then
str, hex = ">", format("%s %05X",hex,c)
elseif name == "amp" then
str, hex = "&", format("%s %05X",hex,c)
else
for t, c in gmatch(value,"([x]*)([^;]+);") do
if t == "x" then
c = tonumber(c,16)
else
c = tonumber(c)
end
if str then
str, hex = str .. utfchar(c), format("%s %05X",hex,c)
else
str, hex = utfchar(c), format("U+%05X",c)
end
end
end
if str and hex then
entities[#entities+1] = format(' ["%s"] = %q, -- %s',name,str,hex)
end
end
end
end
entities[#entities+1] = "}"
io.savedata("xmlentities.tmp",concat(entities,"\n"))
end
if environment.argument("stix") then
local inname = environment.files[1] or ""
local outname = environment.files[2] or ""
scripts.chars.stixtomkiv(inname,outname)
elseif environment.argument("entities") then
scripts.chars.xmlentities()
elseif environment.argument("xtx") then
scripts.chars.makeencoutf()
elseif environment.argument("pdf") then
scripts.chars.makepdfr()
elseif environment.argument("exporthelp") then
application.export(environment.argument("exporthelp"),environment.files[1])
else
application.help()
end
-- local http = require("socket.http")
-- local ltn12 = require("ltn12")
--
-- local t = { }
-- local status, message = http.request {
-- url = f,
-- sink = ltn12.sink.table(t)
-- }
--
-- local template = [[
--
--
--
--
-- This is just a placeholder.
-- ]]
--
-- local e = string.format(template,io.loaddata(n))
-- local x = xml.convert(e, { utfize_entities = true } )
-- local entities = x.entities