if not modules then modules = { } end modules ['mtx-chars'] = { version = 1.001, comment = "companion to mtxrun.lua", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- obsolete: --stix convert stix table to math table local helpinfo = [[ mtx-chars MkII Character Table Generators 0.10 generate xetx-*.tex (used by xetex) generate pdfr-def.tex (used by pdftex) generate entities table ]] local application = logs.application { name = "mtx-chars", banner = "MkII Character Table Generators 0.10", helpinfo = helpinfo, } local report = application.report local format, gmatch, upper, lower = string.format, string.gmatch, string.upper, string.lower local tonumber = tonumber local concat = table.concat local utfchar = utf.char scripts = scripts or { } scripts.chars = scripts.chars or { } --~ local banner = [[ --~ -- filename : char-mth.lua --~ -- comment : companion to char-mth.tex (in ConTeXt) --~ -- author : Hans Hagen, PRAGMA-ADE, Hasselt NL --~ -- license : see context related readme files --~ -- comment : generated from data file downloaded from STIX website --~ --~ if not versions then versions = { } end versions['char-mth'] = 1.001 --~ if not characters then characters = { } end --~ ]] --~ --~ function scripts.chars.stixtomkiv(inname,outname) --~ if inname == "" then --~ report("aquiring math data, invalid datafilename") --~ end --~ local f = io.open(inname) --~ if not f then --~ report("aquiring math data, invalid datafile") --~ else --~ report("aquiring math data, processing %s",inname) --~ if not outname or outname == "" then --~ outname = "char-mth.lua" --~ end --~ local classes = { --~ N = "normal", --~ A = "alphabetic", --~ D = "diacritic", --~ P = "punctuation", --~ B = "binary", --~ R = "relation", --~ L = "large", --~ O = "opening", --~ C = "closing", --~ F = "fence" --~ } --~ local valid, done = false, { } --~ local g = io.open(outname,'w') --~ g:write(banner) --~ g:write(format("\ncharacters.math = {\n")) --~ for l in f:lines() do --~ if not valid then --~ valid = l:find("AMS/TeX name") --~ end --~ if valid then --~ local unicode = l:sub(2,6) --~ if unicode:sub(1,1) ~= " " and unicode ~= "" and not done[unicode] then --~ local mathclass, adobename, texname = l:sub(57,57) or "", l:sub(13,36) or "", l:sub(84,109) or "" --~ texname, adobename = texname:gsub("[\\ ]",""), adobename:gsub("[\\ ]","") --~ local t = { } --~ if mathclass ~= "" then t[#t+1] = format("mathclass='%s'", classes[mathclass] or "unknown") end --~ if adobename ~= "" then t[#t+1] = format("adobename='%s'", adobename ) end --~ if texname ~= "" then t[#t+1] = format("texname='%s'" , texname ) end --~ if #t > 0 then --~ g:write(format("\t[0x%s] = { %s },\n",unicode, concat(t,", "))) --~ end --~ done[unicode] = true --~ end --~ end --~ end --~ if not valid then --~ g:write("\t-- The data file is corrupt, invalid or maybe the format has changed.\n") --~ report("aquiring math data, problems with data table") --~ else --~ report("aquiring math data, table saved in %s",outname) --~ end --~ g:write("}\n") --~ g:close() --~ f:close() --~ end --~ end function scripts.chars.stixtomkiv(inname,outname) report("we no longer use this options but use our own tables instead") end local banner_pdf_1 = [[ % filename : pdfr-def.tex % comment : generated by mtxrun --script chars --pdf % author : Hans Hagen, PRAGMA-ADE, Hasselt NL % copyright: PRAGMA ADE / ConTeXt Development Team % license : see context related readme files % ]] local banner_pdf_2 = [[ % \endinput ]] function scripts.chars.makepdfr() local chartable = resolvers.findfile("char-def.lua") or "" if chartable ~= "" then dofile(chartable) if characters and characters.data then local f = io.open("pdfr-def.tex", 'w') if f then f:write(banner_pdf_1) local cd = characters.data local sd = table.sortedkeys(cd) for i=1,#sd do local char = cd[sd[i]] if char.adobename then f:write(format("\\pdfglyphtounicode{%s}{%04X}%%\n",char.adobename,char.unicodeslot)) end end f:write(banner_pdf_2) f:close() end end end end local banner_utf_module = [[ %% filename : %s %% comment : generated by mtxrun --script chars --xtx %% author : Hans Hagen, PRAGMA-ADE, Hasselt NL %% copyright: PRAGMA ADE / ConTeXt Development Team %% license : see context related readme files ]] local banner_utf_mappings = [[ % lc/uc/catcode mappings ]] local banner_utf_patch = [[ % patch needed for turkish \setXTXcharcodes "201C "201C "201C \setXTXcharcodes "201D "201D "201D % patch needed for french \setXTXcharcodes "2019 "2019 "2019 ]] local banner_utf_names = [[ % named characters mapped onto utf (\\char is needed for accents) ]] local banner_utf_classes = [[ % some character classes for xetex; seems to be rather hard coded, these numbers % and also a mix of several classes; here we do linebreaks ]] local banner_utf_finish = [[ \endinput ]] local xtxclasses = { id = 1, ex = 3, is = 3, cm = 256, op = 2, ns = 3, cl = 3, } function scripts.chars.makeencoutf() local chartable = resolvers.findfile("char-def.lua") or "" if chartable ~= "" then dofile(chartable) local function open(name,banner) local f = io.open(name,'w') if f then report("writing '%s'",name) f:write(format(banner_utf_module,name)) f:write(banner) f:write() return f end end local function close(f) f:write(banner_utf_finish) f:close() end local data = characters and characters.data if data then local list = table.sortedkeys(characters.data) local f = open("xetx-utf.tex",banner_utf_mappings) if f then for i=1,#list do local code = list[i] if code <= 0xFFFF then local chr = data[code] local cc = chr.category if cc == 'll' or cc == 'lu' or cc == 'lt' then if not chr.lccode then chr.lccode = code end if not chr.uccode then chr.uccode = code end f:write(format('\\setXTXcharcodes "%05X "%05X "%05X %% %s\n',code,chr.lccode,chr.uccode,chr.description)) end end end f:write("\n") for i=1,#list do local code = list[i] local chr = data[code] if chr and chr.range then local cc = chr.category if cc == 'lo' then f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharcodes\\recurselevel\\recurselevel\\recurselevel}\n',code,chr.range)) end end end f:write(banner_utf_patch) close(f) end local f = open("xetx-chr.tex",banner_utf_names) if f then local length = 0 for i=1,#list do local code = list[i] if code > 0x5B and code <= 0xFFFF then local chr = data[code] if chr and #(chr.contextname or "") > length then length = #chr.contextname end end end local template = "\\def\\%-".. length .. "s{\\char\"%05X } %% %s: %s\n" for i=1,#list do local code = list[i] if code > 0x5B and code <= 0xFFFF then local chr = data[code] if chr and chr.contextname then local ch = utfchar(code) f:write(format(template, chr.contextname, code, chr.description, ch)) end end end close(f) end local f = open("xetx-cls.tex",banner_utf_classes) if f then for k, v in next, xtxclasses do f:write(format("\\defineXTXcharinjectionclass[lb:%s]\n",k)) end f:write("\n") local i_first, i_last, i_clb = nil, nil, nil local function flush() if i_first then if i_first == i_last then f:write(format('\\dosetXTXcharacterclass{"%05X}{lb:%s}\n',i_first,i_clb)) else f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n',i_first,i_last,i_clb)) end end i_first, i_last, i_clb = nil, nil, nil end for i=1,#list do local code = list[i] local code_next = list[i+1] local chr = data[code] local chr_next = data[code_next] local clb = chr and chr.linebreak local lbc = xtxclasses[clb] if not lbc then flush() elseif clb == i_clb then if i_first then i_last = code else i_first, i_last, i_clb = code, code, clb end else flush() i_first, i_last, i_clb = code, code, clb end end flush() f:write("\n") for i=1,#list do local code = list[i] local chr = data[code] if chr and chr.range then local lbc = chr.linebreak if xtxclasses[lbc] then f:write(format('\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n',code,chr.range,lbc)) end end end close(f) end end end end local entityfiles = { "http://www.w3.org/2003/entities/2007/w3centities-f.ent", "http://www.w3.org/2003/entities/2007/htmlmathml-f.ent", } function scripts.chars.xmlentities() local done = { } local entities = { "local entities = utilities.storage.allocate {" } for i=1,#entityfiles do local f = entityfiles[i] local s = url.hashed(f) local b = file.basename(s.path) local n = resolvers.findfile(b) local data = io.loaddata(n) for name, value in gmatch(data,'') do if not done[name] then done[name] = true local str, hex local low = lower(name) if name == "newline" then -- let's forget about that one elseif name == "lt" then str, hex = "<", format("%s %05X",hex,c) elseif name == "gt" then str, hex = ">", format("%s %05X",hex,c) elseif name == "amp" then str, hex = "&", format("%s %05X",hex,c) else for t, c in gmatch(value,"&#([x]*)([^;]+);") do if t == "x" then c = tonumber(c,16) else c = tonumber(c) end if str then str, hex = str .. utfchar(c), format("%s %05X",hex,c) else str, hex = utfchar(c), format("U+%05X",c) end end end if str and hex then entities[#entities+1] = format(' ["%s"] = %q, -- %s',name,str,hex) end end end end entities[#entities+1] = "}" io.savedata("xmlentities.tmp",concat(entities,"\n")) end if environment.argument("stix") then local inname = environment.files[1] or "" local outname = environment.files[2] or "" scripts.chars.stixtomkiv(inname,outname) elseif environment.argument("entities") then scripts.chars.xmlentities() elseif environment.argument("xtx") then scripts.chars.makeencoutf() elseif environment.argument("pdf") then scripts.chars.makepdfr() elseif environment.argument("exporthelp") then application.export(environment.argument("exporthelp"),environment.files[1]) else application.help() end -- local http = require("socket.http") -- local ltn12 = require("ltn12") -- -- local t = { } -- local status, message = http.request { -- url = f, -- sink = ltn12.sink.table(t) -- } -- -- local template = [[ -- -- -- -- -- This is just a placeholder. -- ]] -- -- local e = string.format(template,io.loaddata(n)) -- local x = xml.convert(e, { utfize_entities = true } ) -- local entities = x.entities