From c36e19abdfd15bf6cae6fa379c6ce51f3ef5332d Mon Sep 17 00:00:00 2001 From: Marius Date: Fri, 19 Oct 2012 01:21:22 +0300 Subject: beta 2012.10.19 00:06 --- scripts/context/lua/mtx-context.lua | 1518 ++++++--------- scripts/context/lua/mtx-convert.lua | 4 +- scripts/context/lua/mtx-epub.lua | 113 +- scripts/context/lua/mtx-fcd.lua | 366 ++++ scripts/context/lua/mtx-flac.lua | 6 - scripts/context/lua/mtx-fonts.lua | 4 +- scripts/context/lua/mtx-grep.lua | 4 +- scripts/context/lua/mtx-metapost.lua | 2 +- scripts/context/lua/mtx-pdf.lua | 142 +- scripts/context/lua/mtx-scite.lua | 25 +- scripts/context/lua/mtx-server-ctx-help.lua | 215 ++- scripts/context/lua/mtx-server.lua | 41 +- scripts/context/lua/mtx-tools.lua | 2 +- scripts/context/lua/mtx-update.lua | 4 +- scripts/context/lua/mtx-watch.lua | 11 - scripts/context/lua/mtxrun.lua | 2679 +++++++++++++++++++-------- scripts/context/ruby/fcd_start.rb | 472 ----- scripts/context/stubs/mswin/mtxrun.lua | 2679 +++++++++++++++++++-------- scripts/context/stubs/unix/mtxrun | 2679 +++++++++++++++++++-------- 19 files changed, 7001 insertions(+), 3965 deletions(-) create mode 100644 scripts/context/lua/mtx-fcd.lua delete mode 100644 scripts/context/ruby/fcd_start.rb (limited to 'scripts') diff --git a/scripts/context/lua/mtx-context.lua b/scripts/context/lua/mtx-context.lua index e07ecdfc7..f33363a0d 100644 --- a/scripts/context/lua/mtx-context.lua +++ b/scripts/context/lua/mtx-context.lua @@ -6,11 +6,18 @@ if not modules then modules = { } end modules ['mtx-context'] = { license = "see context related readme files" } +-- todo: more local functions +-- todo: pass jobticket/ctxdata table around + local format, gmatch, match, gsub, find = string.format, string.gmatch, string.match, string.gsub, string.find -local quote = string.quote +local quote, validstring = string.quote, string.valid local concat = table.concat +local settings_to_array = utilities.parsers.settings_to_array +local appendtable = table.append +local lpegpatterns, lpegmatch, Cs, P = lpeg.patterns, lpeg.match, lpeg.Cs, lpeg.P -local getargument = environment.argument +local getargument = environment.getargument or environment.argument +local setargument = environment.setargument local basicinfo = [[ --run process (one or more) files (default action) @@ -35,8 +42,10 @@ local basicinfo = [[ --noconsole disable logging to the console (logfile only) --purgeresult purge result file before run ---forcexml force xml stub (optional flag: --mkii) +--forcexml force xml stub --forcecld force cld (context lua document) stub +--forcelua force lua stub (like texlua) +--forcemp force mp stub --arrange run extra imposition pass, given that the style sets up imposition --noarrange ignore imposition specifications in the style @@ -50,23 +59,16 @@ local basicinfo = [[ --version report installed context version --global assume given file present elsewhere +--nofile use dummy file as jobname --expert expert options ]] --- filter=list is kind of obsolete --- color is obsolete for mkiv, always on --- separation is obsolete for mkiv, no longer available --- output is currently obsolete for mkiv --- setuppath=list must check --- modefile=name must check --- input=name load the given inputfile (must check) - local expertinfo = [[ expert options: --touch update context version number (remake needed afterwards, also provide --expert) ---nostats omit runtime statistics at the end of the run +--nostatistics omit runtime statistics at the end of the run --update update context from website (not to be confused with contextgarden) --profile profile job (use: mtxrun --script profile --analyze) --timing generate timing and statistics overview @@ -80,13 +82,14 @@ special options: --pdftex process file with texexec using pdftex --xetex process file with texexec using xetex +--mkii process file with texexec --pipe don't check for file and enter scroll mode (--dummyfile=whatever.tmp) ]] local application = logs.application { name = "mtx-context", - banner = "ConTeXt Process Management 0.52", + banner = "ConTeXt Process Management 0.60", helpinfo = { basic = basicinfo, extra = extrainfo, @@ -94,159 +97,105 @@ local application = logs.application { } } +-- local luatexflags = { +-- ["8bit"] = true, -- ignored, input is assumed to be in UTF-8 encoding +-- ["default-translate-file"] = true, -- ignored, input is assumed to be in UTF-8 encoding +-- ["translate-file"] = true, -- ignored, input is assumed to be in UTF-8 encoding +-- ["etex"] = true, -- ignored, the etex extensions are always active +-- +-- ["credits"] = true, -- display credits and exit +-- ["debug-format"] = true, -- enable format debugging +-- ["disable-write18"] = true, -- disable \write18{SHELL COMMAND} +-- ["draftmode"] = true, -- switch on draft mode (generates no output PDF) +-- ["enable-write18"] = true, -- enable \write18{SHELL COMMAND} +-- ["file-line-error"] = true, -- enable file:line:error style messages +-- ["file-line-error-style"] = true, -- aliases of --file-line-error +-- ["no-file-line-error"] = true, -- disable file:line:error style messages +-- ["no-file-line-error-style"] = true, -- aliases of --no-file-line-error +-- ["fmt"] = true, -- load the format file FORMAT +-- ["halt-on-error"] = true, -- stop processing at the first error +-- ["help"] = true, -- display help and exit +-- ["ini"] = true, -- be iniluatex, for dumping formats +-- ["interaction"] = true, -- set interaction mode (STRING=batchmode/nonstopmode/scrollmode/errorstopmode) +-- ["jobname"] = true, -- set the job name to STRING +-- ["kpathsea-debug"] = true, -- set path searching debugging flags according to the bits of NUMBER +-- ["lua"] = true, -- load and execute a lua initialization script +-- ["mktex"] = true, -- enable mktexFMT generation (FMT=tex/tfm) +-- ["no-mktex"] = true, -- disable mktexFMT generation (FMT=tex/tfm) +-- ["nosocket"] = true, -- disable the lua socket library +-- ["output-comment"] = true, -- use STRING for DVI file comment instead of date (no effect for PDF) +-- ["output-directory"] = true, -- use existing DIR as the directory to write files in +-- ["output-format"] = true, -- use FORMAT for job output; FORMAT is 'dvi' or 'pdf' +-- ["parse-first-line"] = true, -- enable parsing of the first line of the input file +-- ["no-parse-first-line"] = true, -- disable parsing of the first line of the input file +-- ["progname"] = true, -- set the program name to STRING +-- ["recorder"] = true, -- enable filename recorder +-- ["safer"] = true, -- disable easily exploitable lua commands +-- ["shell-escape"] = true, -- enable \write18{SHELL COMMAND} +-- ["no-shell-escape"] = true, -- disable \write18{SHELL COMMAND} +-- ["shell-restricted"] = true, -- restrict \write18 to a list of commands given in texmf.cnf +-- ["synctex"] = true, -- enable synctex +-- ["version"] = true, -- display version and exit +-- ["luaonly"] = true, -- run a lua file, then exit +-- ["luaconly"] = true, -- byte-compile a lua file, then exit +-- } + local report = application.report scripts = scripts or { } scripts.context = scripts.context or { } --- a demo cld file: --- --- context.starttext() --- context.chapter("Hello There") --- context.readfile("tufte","","not found") --- context.stoptext() - --- l-file / todo +-- constants -function file.needsupdate(oldfile,newfile) - return true -end -function file.syncmtimes(oldfile,newfile) -end +local usedfiles = { + nop = "cont-nop.mkiv", + yes = "cont-yes.mkiv", +} --- l-io +local usedsuffixes = { + before = { + "tuc" + }, + after = { + "pdf", "tuc", "log" + }, + keep = { + "log" + }, +} -function io.copydata(fromfile,tofile) - io.savedata(tofile,io.loaddata(fromfile) or "") -end +local formatofinterface = { + en = "cont-en", + uk = "cont-uk", + de = "cont-de", + fr = "cont-fr", + nl = "cont-nl", + cs = "cont-cs", + it = "cont-it", + ro = "cont-ro", + pe = "cont-pe", +} --- ctx (will become util-ctx) - -local ctxrunner = { } - -function ctxrunner.filtered(str,method) - str = tostring(str) - if method == 'name' then str = file.removesuffix(file.basename(str)) - elseif method == 'path' then str = file.dirname(str) - elseif method == 'suffix' then str = file.extname(str) - elseif method == 'nosuffix' then str = file.removesuffix(str) - elseif method == 'nopath' then str = file.basename(str) - elseif method == 'base' then str = file.basename(str) --- elseif method == 'full' then --- elseif method == 'complete' then --- elseif method == 'expand' then -- str = file.expandpath(str) - end - return str:gsub("\\","/") -end +local defaultformats = { + "cont-en", + "cont-nl", +} -function ctxrunner.substitute(e,str) - local attributes = e.at - if str and attributes then - if attributes['method'] then - str = ctxrunner.filtered(str,attributes['method']) - end - if str == "" and attributes['default'] then - str = attributes['default'] - end - end - return str -end +-- process information -function ctxrunner.reflag(flags) - local t = { } - for _, flag in next, flags do - local key, value = match(flag,"^(.-)=(.+)$") - if key and value then - t[key] = value - else - t[flag] = true - end - end - return t -end +local ctxrunner = { } -- namespace will go -function ctxrunner.substitute(str) - return str -end - -function ctxrunner.justtext(str) - str = xml.unescaped(tostring(str)) - str = xml.cleansed(str) - str = str:gsub("\\+",'/') - str = str:gsub("%s+",' ') - return str -end +local ctx_locations = { '..', '../..' } function ctxrunner.new() return { - ctxname = "", - jobname = "", - xmldata = nil, - suffix = "prep", - locations = { '..', '../..' }, - variables = { }, - messages = { }, - environments = { }, - modules = { }, - filters = { }, - flags = { }, - modes = { }, - prepfiles = { }, - paths = { }, + ctxname = "", + jobname = "", + flags = { }, } end -function ctxrunner.savelog(ctxdata,ctlname) - local function yn(b) - if b then return 'yes' else return 'no' end - end - if not ctlname or ctlname == "" or ctlname == ctxdata.jobname then - if ctxdata.jobname then - ctlname = file.replacesuffix(ctxdata.jobname,'ctl') - elseif ctxdata.ctxname then - ctlname = file.replacesuffix(ctxdata.ctxname,'ctl') - else - report("invalid ctl name: %s",ctlname or "?") - return - end - end - local prepfiles = ctxdata.prepfiles - if prepfiles and next(prepfiles) then - report("saving logdata in: %s",ctlname) - f = io.open(ctlname,'w') - if f then - f:write("\n\n") - f:write(format("\n",yn(ctxdata.runlocal))) - local sorted = table.sortedkeys(prepfiles) - for i=1,#sorted do - local name = sorted[i] - f:write(format("\t%s\n",yn(prepfiles[name]),name)) - end - f:write("\n") - f:close() - end - else - report("nothing prepared, no ctl file saved") - os.remove(ctlname) - end -end - -function ctxrunner.register_path(ctxdata,path) - -- test if exists - ctxdata.paths[ctxdata.paths+1] = path -end - -function ctxrunner.trace(ctxdata) - print(table.serialize(ctxdata.messages)) - print(table.serialize(ctxdata.flags)) - print(table.serialize(ctxdata.environments)) - print(table.serialize(ctxdata.modules)) - print(table.serialize(ctxdata.filters)) - print(table.serialize(ctxdata.modes)) - print(xml.tostring(ctxdata.xmldata)) -end - -function ctxrunner.manipulate(ctxdata,ctxname,defaultname) +function ctxrunner.checkfile(ctxdata,ctxname,defaultname) if not ctxdata.jobname or ctxdata.jobname == "" then return @@ -269,13 +218,14 @@ function ctxrunner.manipulate(ctxdata,ctxname,defaultname) local usedname = ctxdata.ctxname local found = lfs.isfile(usedname) - -- no futher test if qualified path + -- no further test if qualified path if not found then - for _, path in next, ctxdata.locations do + for _, path in next, ctx_locations do local fullname = file.join(path,ctxdata.ctxname) if lfs.isfile(fullname) then - usedname, found = fullname, true + usedname = fullname + found = true break end end @@ -283,194 +233,70 @@ function ctxrunner.manipulate(ctxdata,ctxname,defaultname) if not found then usedname = resolvers.findfile(ctxdata.ctxname,"tex") - found = usedname ~= "" + found = usedname ~= "" end if not found and defaultname and defaultname ~= "" and lfs.isfile(defaultname) then - usedname, found = defaultname, true + usedname = defaultname + found = true end if not found then return end - ctxdata.xmldata = xml.load(usedname) + local xmldata = xml.load(usedname) - if not ctxdata.xmldata then + if not xmldata then return else -- test for valid, can be text file end - xml.include(ctxdata.xmldata,'ctx:include','name', table.append({'.', file.dirname(ctxdata.ctxname)},ctxdata.locations)) - - ctxdata.variables['job'] = ctxdata.jobname - - ctxdata.flags = xml.collect_texts(ctxdata.xmldata,"/ctx:job/ctx:flags/ctx:flag",true) - ctxdata.environments = xml.collect_texts(ctxdata.xmldata,"/ctx:job/ctx:process/ctx:resources/ctx:environment",true) - ctxdata.modules = xml.collect_texts(ctxdata.xmldata,"/ctx:job/ctx:process/ctx:resources/ctx:module",true) - ctxdata.filters = xml.collect_texts(ctxdata.xmldata,"/ctx:job/ctx:process/ctx:resources/ctx:filter",true) - ctxdata.modes = xml.collect_texts(ctxdata.xmldata,"/ctx:job/ctx:process/ctx:resources/ctx:mode",true) - ctxdata.messages = xml.collect_texts(ctxdata.xmldata,"ctx:message",true) - - ctxdata.flags = ctxrunner.reflag(ctxdata.flags) - - local messages = ctxdata.messages - for i=1,#messages do - report("ctx comment: %s", xml.tostring(messages[i])) - end - - for r, d, k in xml.elements(ctxdata.xmldata,"ctx:value[@name='job']") do - d[k] = ctxdata.variables['job'] or "" - end - - local commands = { } - for e in xml.collected(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:processors/ctx:processor") do - commands[e.at and e.at['name'] or "unknown"] = e - end - - local suffix = xml.filter(ctxdata.xmldata,"/ctx:job/ctx:preprocess/attribute('suffix')") or ctxdata.suffix - local runlocal = xml.filter(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:processors/attribute('local')") - - runlocal = toboolean(runlocal) - - for files in xml.collected(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:files") do - for pattern in xml.collected(files,"ctx:file") do + local ctxpaths = table.append({'.', file.dirname(ctxdata.ctxname)}, ctx_locations) - preprocessor = pattern.at['processor'] or "" + xml.include(xmldata,'ctx:include','name', ctxpaths) - if preprocessor ~= "" then + local flags = ctxdata.flags - ctxdata.variables['old'] = ctxdata.jobname - for r, d, k in xml.elements(ctxdata.xmldata,"ctx:value") do - local ek = d[k] - local ekat = ek.at['name'] - if ekat == 'old' then - d[k] = ctxrunner.substitute(ctxdata.variables[ekat] or "") - end - end - - pattern = ctxrunner.justtext(xml.tostring(pattern)) - - local oldfiles = dir.glob(pattern) - - local pluspath = false - if #oldfiles == 0 then - -- message: no files match pattern - local paths = ctxdata.paths - for i=1,#paths do - local p = paths[i] - local oldfiles = dir.glob(path.join(p,pattern)) - if #oldfiles > 0 then - pluspath = true - break - end - end - end - if #oldfiles == 0 then - -- message: no old files - else - for i=1,#oldfiles do - local oldfile = oldfiles[i] - local newfile = oldfile .. "." .. suffix -- addsuffix will add one only - if ctxdata.runlocal then - newfile = file.basename(newfile) - end - if oldfile ~= newfile and file.needsupdate(oldfile,newfile) then - -- message: oldfile needs preprocessing - -- os.remove(newfile) - local splitted = preprocessor:split(',') - for i=1,#splitted do - local pp = splitted[i] - local command = commands[pp] - if command then - command = xml.copy(command) - local suf = (command.at and command.at['suffix']) or ctxdata.suffix - if suf then - newfile = oldfile .. "." .. suf - end - if ctxdata.runlocal then - newfile = file.basename(newfile) - end - for r, d, k in xml.elements(command,"ctx:old") do - d[k] = ctxrunner.substitute(oldfile) - end - for r, d, k in xml.elements(command,"ctx:new") do - d[k] = ctxrunner.substitute(newfile) - end - ctxdata.variables['old'] = oldfile - ctxdata.variables['new'] = newfile - for r, d, k in xml.elements(command,"ctx:value") do - local ek = d[k] - local ekat = ek.at and ek.at['name'] - if ekat then - d[k] = ctxrunner.substitute(ctxdata.variables[ekat] or "") - end - end - -- potential optimization: when mtxrun run internal - command = xml.content(command) - command = ctxrunner.justtext(command) - report("command: %s",command) - local result = os.spawn(command) or 0 - -- somehow we get the wrong return value - if result > 0 then - report("error, return code: %s",result) - end - if ctxdata.runlocal then - oldfile = file.basename(oldfile) - end - end - end - if lfs.isfile(newfile) then - file.syncmtimes(oldfile,newfile) - ctxdata.prepfiles[oldfile] = true - else - report("error, check target location of new file: %s", newfile) - ctxdata.prepfiles[oldfile] = false - end - else - report("old file needs no preprocessing") - ctxdata.prepfiles[oldfile] = lfs.isfile(newfile) - end - end - end - end + for e in xml.collected(xmldata,"/ctx:job/ctx:flags/ctx:flag") do + local flag = xml.text(e) or "" + local key, value = match(flag,"^(.-)=(.+)$") + if key and value then + flags[key] = value + else + flags[flag] = true end end - ctxrunner.savelog(ctxdata) - end -function ctxrunner.preppedfile(ctxdata,filename) - if ctxdata.prepfiles[file.basename(filename)] then - return filename .. ".prep" - else - return filename +function ctxrunner.checkflags(ctxdata) + if ctxdata then + for k,v in next, ctxdata.flags do + if getargument(k) == nil then + setargument(k,v) + end + end end end --- rest +-- multipass control -scripts.context.multipass = { --- suffixes = { ".tuo", ".tuc" }, - suffixes = { ".tuc" }, - nofruns = 8, --- nofruns = 7, -- test oscillation -} +local multipass_suffixes = { ".tuc" } +local multipass_nofruns = 8 -- or 7 to test oscillation -function scripts.context.multipass.hashfiles(jobname) +local function multipass_hashfiles(jobname) local hash = { } - local suffixes = scripts.context.multipass.suffixes - for i=1,#suffixes do - local suffix = suffixes[i] + for i=1,#multipass_suffixes do + local suffix = multipass_suffixes[i] local full = jobname .. suffix hash[full] = md5.hex(io.loaddata(full) or "unknown") end return hash end -function scripts.context.multipass.changed(oldhash, newhash) +local function multipass_changed(oldhash, newhash) for k,v in next, oldhash do if v ~= newhash[k] then return true @@ -479,126 +305,7 @@ function scripts.context.multipass.changed(oldhash, newhash) return false end -function scripts.context.multipass.makeoptionfile(jobname,ctxdata,kindofrun,currentrun,finalrun,once) - -- take jobname from ctx - jobname = file.removesuffix(jobname) - local f = io.open(jobname..".top","w") - if f then - local function someflag(flag) - return (ctxdata and ctxdata.flags[flag]) or getargument(flag) - end - local function setvalue(flag,template,hash,default) - local a = someflag(flag) or default - if a and a ~= "" then - if hash then - if hash[a] then - f:write(format(template,a),"\n") - end - else - f:write(format(template,a),"\n") - end - end - end - local function setvalues(flag,template,plural) - if type(flag) == "table" then - for k, v in next, flag do - f:write(format(template,v),"\n") - end - else - local a = someflag(flag) or (plural and someflag(flag.."s")) - if a and a ~= "" then - for v in gmatch(a,"%s*([^,]+)") do - f:write(format(template,v),"\n") - end - end - end - end - local function setfixed(flag,template,...) - if someflag(flag) then - f:write(format(template,...),"\n") - end - end - local function setalways(template,...) - f:write(format(template,...),"\n") - end - -- - -- This might change ... we can just pass the relevant flags directly. - -- - setalways("%% runtime options files (command line driven)") - -- - setalways("\\unprotect") - -- - setalways("%% feedback and basic job control") - -- - -- Option file, we can pass more on the commandline some day soon. Actually we - -- should use directives and trackers. - -- - setfixed ("timing" , "\\usemodule[timing]") - setfixed ("batchmode" , "\\batchmode") - setfixed ("batch" , "\\batchmode") - setfixed ("nonstopmode" , "\\nonstopmode") - setfixed ("nonstop" , "\\nonstopmode") - -- setfixed ("tracefiles" , "\\tracefilestrue") - setfixed ("nostats" , "\\nomkivstatistics") - setfixed ("paranoid" , "\\def\\maxreadlevel{1}") - -- - setalways("%% handy for special styles") - -- - setalways("\\startluacode") - setalways("document = document or { }") - setalways(table.serialize(environment.arguments, "document.arguments")) - setalways(table.serialize(environment.files, "document.files")) - setalways("\\stopluacode") - -- - setalways("%% process info") - -- - setalways( "\\setupsystem[inputfile=%s]",getargument("input") or environment.files[1] or "\\jobname") - setvalue ("result" , "\\setupsystem[file=%s]") - setalways( "\\setupsystem[\\c!n=%s,\\c!m=%s]", kindofrun or 0, currentrun or 0) - setvalues("path" , "\\usepath[%s]") - setvalue ("setuppath" , "\\setupsystem[\\c!directory={%s}]") - setvalue ("randomseed" , "\\setupsystem[\\c!random=%s]") - setvalue ("arguments" , "\\setupenv[%s]") - if once then - setalways("\\enabledirectives[system.runonce]") - end - setalways("%% modes") - setvalues("modefile" , "\\readlocfile{%s}{}{}") - setvalues("mode" , "\\enablemode[%s]", true) - if ctxdata then - setvalues(ctxdata.modes, "\\enablemode[%s]") - end - -- - setalways("%% options (not that important)") - -- - setalways("\\startsetups *runtime:options") - setfixed ("color" , "\\setupcolors[\\c!state=\\v!start]") - setvalue ("separation" , "\\setupcolors[\\c!split=%s]") - setfixed ("noarrange" , "\\setuparranging[\\v!disable]") - if getargument('arrange') and not finalrun then - setalways( "\\setuparranging[\\v!disable]") - end - setalways("\\stopsetups") - -- - setalways("%% styles and modules") - -- - setalways("\\startsetups *runtime:modules") - setvalues("usemodule" , "\\usemodule[%s]", true) - setvalues("environment" , "\\environment %s ", true) - if ctxdata then - setvalues(ctxdata.modules, "\\usemodule[%s]") - setvalues(ctxdata.environments, "\\environment %s ") - end - setalways("\\stopsetups") - -- - setalways("%% done") - -- - setalways("\\protect \\endinput") - f:close() - end -end - -function scripts.context.multipass.copyluafile(jobname) -- obsolete +local function multipass_copyluafile(jobname) local tuaname, tucname = jobname..".tua", jobname..".tuc" if lfs.isfile(tuaname) then os.remove(tucname) @@ -606,120 +313,54 @@ function scripts.context.multipass.copyluafile(jobname) -- obsolete end end -scripts.context.cldsuffixes = table.tohash { - "cld", -} - -scripts.context.xmlsuffixes = table.tohash { - "xml", -} - -scripts.context.luasuffixes = table.tohash { - "lua", -} - -scripts.context.beforesuffixes = { - "tuo", "tuc" -} -scripts.context.aftersuffixes = { - "pdf", "tuo", "tuc", "log" -} - -scripts.context.errorsuffixes = { - "log" -} - -scripts.context.interfaces = { - en = "cont-en", - uk = "cont-uk", - de = "cont-de", - fr = "cont-fr", - nl = "cont-nl", - cs = "cont-cs", - it = "cont-it", - ro = "cont-ro", - pe = "cont-pe", -} - -scripts.context.defaultformats = { - "cont-en", - "cont-nl", --- "mptopdf", -- todo: mak emkiv variant --- "metatex", -- will show up soon --- "metafun", -- todo: mp formats --- "plain" -} - -local lpegpatterns, Cs, P = lpeg.patterns, lpeg.Cs, lpeg.P +-- local pattern = lpegpatterns.utfbom^-1 * (P("%% ") + P("% ")) * Cs((1-lpegpatterns.newline)^1) -local function analyze(filename) -- only files on current path - local f = io.open(file.addsuffix(filename,"tex")) - if f then - local t = { } - local line = f:read("*line") or "" - local preamble = lpeg.match(pattern,line) +local function preamble_analyze(filename) -- only files on current path + local t = { } + local line = io.loadlines(file.addsuffix(filename,"tex")) + if line then + local preamble = lpegmatch(pattern,line) if preamble then for key, value in gmatch(preamble,"(%S+)%s*=%s*(%S+)") do t[key] = value end t.type = "tex" - elseif line:find("^&1', file.replacesuffix(name,"pdf"))) ---~ end ---~ function scripts.context.closepdf(name) ---~ os.spawn(format('pdfclose --file "%s" 2>&1', file.replacesuffix(name,"pdf"))) ---~ end +-- automatically opening and closing pdf files -local pdfview -- delayed loading +local pdfview -- delayed -function scripts.context.openpdf(name,method) +local function pdf_open(name,method) pdfview = pdfview or dofile(resolvers.findfile("l-pdfview.lua","tex")) pdfview.setmethod(method) report(pdfview.status()) pdfview.open(file.replacesuffix(name,"pdf")) end -function scripts.context.closepdf(name,method) +local function pdf_close(name,method) pdfview = pdfview or dofile(resolvers.findfile("l-pdfview.lua","tex")) pdfview.setmethod(method) pdfview.close(file.replacesuffix(name,"pdf")) end -local function push_result_purge(oldbase,newbase) - for _, suffix in next, scripts.context.aftersuffixes do +-- result file handling + +local function result_push_purge(oldbase,newbase) + for _, suffix in next, usedsuffixes.after do local oldname = file.addsuffix(oldbase,suffix) local newname = file.addsuffix(newbase,suffix) os.remove(newname) @@ -727,8 +368,8 @@ local function push_result_purge(oldbase,newbase) end end -local function push_result_keep(oldbase,newbase) - for _, suffix in next, scripts.context.beforesuffixes do +local function result_push_keep(oldbase,newbase) + for _, suffix in next, usedsuffixes.before do local oldname = file.addsuffix(oldbase,suffix) local newname = file.addsuffix(newbase,suffix) local tmpname = "keep-"..oldname @@ -739,8 +380,8 @@ local function push_result_keep(oldbase,newbase) end end -local function save_result_error(oldbase,newbase) - for _, suffix in next, scripts.context.errorsuffixes do +local function result_save_error(oldbase,newbase) + for _, suffix in next, usedsuffixes.keep do local oldname = file.addsuffix(oldbase,suffix) local newname = file.addsuffix(newbase,suffix) os.remove(newname) -- to be sure @@ -748,8 +389,8 @@ local function save_result_error(oldbase,newbase) end end -local function save_result_purge(oldbase,newbase) - for _, suffix in next, scripts.context.aftersuffixes do +local function result_save_purge(oldbase,newbase) + for _, suffix in next, usedsuffixes.after do local oldname = file.addsuffix(oldbase,suffix) local newname = file.addsuffix(newbase,suffix) os.remove(newname) -- to be sure @@ -757,8 +398,8 @@ local function save_result_purge(oldbase,newbase) end end -local function save_result_keep(oldbase,newbase) - for _, suffix in next, scripts.context.aftersuffixes do +local function result_save_keep(oldbase,newbase) + for _, suffix in next, usedsuffixes.after do local oldname = file.addsuffix(oldbase,suffix) local newname = file.addsuffix(newbase,suffix) local tmpname = "keep-"..oldname @@ -768,313 +409,339 @@ local function save_result_keep(oldbase,newbase) end end -function scripts.context.run(ctxdata,filename) - -- filename overloads environment.files - local files = (filename and { filename }) or environment.files - if ctxdata then - -- todo: interface - for k,v in next, ctxdata.flags do - environment.setargument(k,v) +-- executing luatex + +local function flags_to_string(flags,prefix) -- context flags get prepended by c: + local t = { } + for k, v in table.sortedhash(flags) do + if prefix then + k = format("c:%s",k) + end + if not v or v == "" or v == '""' then + -- no need to flag false + elseif v == true then + t[#t+1] = format('--%s',k) + elseif type(v) == "string" then + t[#t+1] = format('--%s=%s',k,quote(v)) + else + t[#t+1] = format('--%s=%s',k,tostring(v)) end end - if #files > 0 then + return concat(t," ") +end + +local function luatex_command(l_flags,c_flags,filename) + return format('luatex %s %s "%s"', + flags_to_string(l_flags), + flags_to_string(c_flags,true), + filename + ) +end + +local function run_texexec(filename,a_purge,a_purgeall) + if false then + -- we need to write a top etc too and run mp etc so it's not worth the + -- trouble, so it will take a while before the next is finished -- - local interface = getargument("interface") - -- todo: getargument("interface","en") - interface = (type(interface) == "string" and interface) or "en" + -- context --extra=texutil --convert myfile + else + local texexec = resolvers.findfile("texexec.rb") or "" + if texexec ~= "" then + os.setenv("RUBYOPT","") + local options = environment.reconstructcommandline(environment.arguments_after) + options = gsub(options,"--purge","") + options = gsub(options,"--purgeall","") + local command = format("ruby %s %s",texexec,options) + if a_purge then + os.execute(command) + scripts.context.purge_job(filename,false,true) + elseif a_purgeall then + os.execute(command) + scripts.context.purge_job(filename,true,true) + else + os.exec(command) + end + end + end +end + +-- + +function scripts.context.run(ctxdata,filename) + -- + local a_nofile = getargument("nofile") + -- + local files = environment.files or { } + -- + local filelist, mainfile + -- + if filename then + -- the given forced name is processed, the filelist is passed to context + mainfile = filename + filelist = { filename } + -- files = files + elseif a_nofile then + -- the list of given files is processed using the dummy file + mainfile = usedfiles.nop + filelist = { usedfiles.nop } + -- files = { } + elseif #files > 0 then + -- the list of given files is processed using the stub file + mainfile = usedfiles.yes + filelist = files + files = { } + else + return + end + -- + local interface = validstring(getargument("interface")) or "en" + local formatname = formatofinterface[interface] or "cont-en" + local formatfile, scriptfile = resolvers.locateformat(formatname) + if not formatfile or not scriptfile then + report("warning: no format found, forcing remake (commandline driven)") + scripts.context.make(formatname) + formatfile, scriptfile = resolvers.locateformat(formatname) + end + if formatfile and scriptfile then + -- okay + elseif formatname then + report("error, no format found with name: %s, aborting",formatname) + return + else + report("error, no format found (provide formatname or interface)") + return + end + -- + local a_mkii = getargument("mkii") or getargument("pdftex") or getargument("xetex") + local a_purge = getargument("purge") + local a_purgeall = getargument("purgeall") + local a_purgeresult = getargument("purgeresult") + local a_global = getargument("global") + local a_timing = getargument("timing") + local a_batchmode = getargument("batchmode") + local a_nonstopmode = getargument("nonstopmode") + local a_once = getargument("once") + local a_synctex = getargument("synctex") + local a_backend = getargument("backend") + local a_arrange = getargument("arrange") + local a_noarrange = getargument("noarrange") + -- + for i=1,#filelist do -- - local formatname = scripts.context.interfaces[interface] or "cont-en" - local formatfile, scriptfile = resolvers.locateformat(formatname) - -- this catches the command line - if not formatfile or not scriptfile then - report("warning: no format found, forcing remake (commandline driven)") - scripts.context.make(formatname) - formatfile, scriptfile = resolvers.locateformat(formatname) + local filename = filelist[i] + local basename = file.basename(filename) + local pathname = file.dirname(filename) + local jobname = file.removesuffix(basename) + local ctxname = ctxdata and ctxdata.ctxname + -- + if pathname == "" and not a_global and filename ~= usedfiles.nop then + filename = "./" .. filename end -- - if formatfile and scriptfile then - for i=1,#files do - local filename = files[i] - local basename, pathname = file.basename(filename), file.dirname(filename) - local jobname = file.removesuffix(basename) - if pathname == "" and not getargument("global") then - filename = "./" .. filename + local analysis = preamble_analyze(filename) + -- + if a_mkii or analysis.engine == 'pdftex' or analysis.engine == 'xetex' then + run_texexec(filename,a_purge,a_purgeall) + else + if analysis.interface and analysis.interface ~= interface then + formatname = formatofinterface[analysis.interface] or formatname + formatfile, scriptfile = resolvers.locateformat(formatname) + end + if not formatfile or not scriptfile then + report("warning: no format found, forcing remake (source driven)") + scripts.context.make(formatname) + formatfile, scriptfile = resolvers.locateformat(formatname) + end + if formatfile and scriptfile then + -- + local suffix = validstring(getargument("suffix")) + local resultname = validstring(getargument("result")) + if suffix then + resultname = file.removesuffix(jobname) .. suffix end - -- look at the first line - local a = analyze(filename) - if a and (a.engine == 'pdftex' or a.engine == 'xetex' or getargument("pdftex") or getargument("xetex")) then - if false then - -- we need to write a top etc too and run mp etc so it's not worth the - -- trouble, so it will take a while before the next is finished - -- - -- context --extra=texutil --convert myfile - else - local texexec = resolvers.findfile("texexec.rb") or "" - if texexec ~= "" then - os.setenv("RUBYOPT","") - local options = environment.reconstructcommandline(environment.arguments_after) - options = gsub(options,"--purge","") - options = gsub(options,"--purgeall","") - local command = format("ruby %s %s",texexec,options) - if getargument("purge") then - os.execute(command) - scripts.context.purge_job(filename,false,true) - elseif getargument("purgeall") then - os.execute(command) - scripts.context.purge_job(filename,true,true) - else - os.exec(command) - end + local oldbase = "" + local newbase = "" + if resultname then + oldbase = file.removesuffix(jobname) + newbase = file.removesuffix(resultname) + if oldbase ~= newbase then + if a_purgeresult then + result_push_purge(oldbase,newbase) + else + result_push_keep(oldbase,newbase) end + else + resultname = nil end - else - if a and a.interface and a.interface ~= interface then - formatname = scripts.context.interfaces[a.interface] or formatname - formatfile, scriptfile = resolvers.locateformat(formatname) + end + -- + local pdfview = getargument("autopdf") or getargument("closepdf") + if pdfview then + pdf_close(filename,pdfview) + if resultname then + pdf_close(resultname,pdfview) end - -- this catches the command line - if not formatfile or not scriptfile then - report("warning: no format found, forcing remake (source driven)") - scripts.context.make(formatname) - formatfile, scriptfile = resolvers.locateformat(formatname) + end + -- + local okay = statistics.checkfmtstatus(formatfile) + if okay ~= true then + report("warning: %s, forcing remake",tostring(okay)) + scripts.context.make(formatname) + end + -- + local oldhash = multipass_hashfiles(jobname) + local newhash = { } + local maxnofruns = once and 1 or multipass_nofruns + -- + local c_flags = { + directives = validstring(environment.directives), -- gets passed via mtxrun + trackers = validstring(environment.trackers), -- gets passed via mtxrun + experiments = validstring(environment.experiments), -- gets passed via mtxrun + -- + result = validstring(resultname), + input = validstring(getargument("input") or filename), -- alternative input + fulljobname = validstring(filename), + files = concat(files,","), + ctx = validstring(ctxname), + } + -- + for k, v in next, environment.arguments do + if c_flags[k] == nil then + c_flags[k] = v end - if formatfile and scriptfile then - -- we default to mkiv xml ! - -- the --prep argument might become automatic (and noprep) - local suffix = file.extname(filename) or "?" - if scripts.context.xmlsuffixes[suffix] or getargument("forcexml") then - if getargument("mkii") then - filename = makestub(true,"\\processXMLfilegrouped{%s}",filename) - else - filename = makestub(true,"\\xmlprocess{\\xmldocument}{%s}{}",filename) - end - elseif scripts.context.cldsuffixes[suffix] or getargument("forcecld") then - -- self contained cld files need to have a starttext/stoptext (less fontloading) - filename = makestub(false,"\\ctxlua{context.runfile('%s')}",filename) - elseif scripts.context.luasuffixes[suffix] or getargument("forcelua") then - filename = makestub(true,"\\ctxlua{dofile('%s')}",filename) - elseif getargument("prep") then - -- we need to keep the original jobname - filename = makestub(true,"\\readfile{%s}{}{}",filename,ctxrunner.preppedfile(ctxdata,filename)) - end - -- - -- todo: also other stubs - -- - local suffix, resultname = getargument("suffix"), getargument("result") - if type(suffix) == "string" then - resultname = file.removesuffix(jobname) .. suffix - end - local oldbase, newbase = "", "" - if type(resultname) == "string" then - oldbase = file.removesuffix(jobname) - newbase = file.removesuffix(resultname) - if oldbase ~= newbase then - if getargument("purgeresult") then - push_result_purge(oldbase,newbase) - else - push_result_keep(oldbase,newbase) - end - else - resultname = nil - end - else - resultname = nil - end - -- - local pdfview = getargument("autopdf") or getargument("closepdf") - if pdfview then - scripts.context.closepdf(filename,pdfview) - if resultname then - scripts.context.closepdf(resultname,pdfview) - end - end - -- - local okay = statistics.checkfmtstatus(formatfile) - if okay ~= true then - report("warning: %s, forcing remake",tostring(okay)) - scripts.context.make(formatname) - end - -- - local flags = { } - if getargument("batchmode") or getargument("batch") then - flags[#flags+1] = "--interaction=batchmode" - end - if getargument("synctex") then - -- this should become a directive - report("warning: synctex is enabled") -- can add upto 5% runtime - flags[#flags+1] = "--synctex=1" - end - flags[#flags+1] = "--fmt=" .. quote(formatfile) - flags[#flags+1] = "--lua=" .. quote(scriptfile) - -- - -- We pass these directly. - -- - ---~ local silent = getargument("silent") ---~ local noconsole = getargument("noconsole") ---~ local directives = getargument("directives") ---~ local trackers = getargument("trackers") ---~ if silent == true then ---~ silent = "*" ---~ end ---~ if type(silent) == "string" then ---~ if type(directives) == "string" then ---~ directives = format("%s,logs.blocked={%s}",directives,silent) ---~ else ---~ directives = format("logs.blocked={%s}",silent) ---~ end ---~ end ---~ if noconsole then ---~ if type(directives) == "string" then ---~ directives = format("%s,logs.target=file",directives) ---~ else ---~ directives = format("logs.target=file") ---~ end ---~ end - - local directives = environment.directives - local trackers = environment.trackers - local experiments = environment.experiments - - -- - if type(directives) == "string" then - flags[#flags+1] = format('--directives="%s"',directives) - end - if type(trackers) == "string" then - flags[#flags+1] = format('--trackers="%s"',trackers) - end - -- - local backend = getargument("backend") - if type(backend) ~= "string" then - backend = "pdf" - end - flags[#flags+1] = format('--backend="%s"',backend) - -- - local command = format("luatex %s %s \\stoptext", concat(flags," "), quote(filename)) - local oldhash, newhash = scripts.context.multipass.hashfiles(jobname), { } - local once = getargument("once") - local maxnofruns = (once and 1) or scripts.context.multipass.nofruns - local arrange = getargument("arrange") - for i=1,maxnofruns do - -- 1:first run, 2:successive run, 3:once, 4:last of maxruns - local kindofrun = (once and 3) or (i==1 and 1) or (i==maxnofruns and 4) or 2 - scripts.context.multipass.makeoptionfile(jobname,ctxdata,kindofrun,i,false,once) -- kindofrun, currentrun, final - report("run %s: %s",i,command) ---~ print("\n") -- cleaner, else continuation on same line - print("") -- cleaner, else continuation on same line - local returncode, errorstring = os.spawn(command) - --~ if returncode == 3 then - --~ scripts.context.make(formatname) - --~ returncode, errorstring = os.spawn(command) - --~ if returncode == 3 then - --~ report("ks: return code 3, message: %s",errorstring or "?") - --~ os.exit(1) - --~ end - --~ end - if not returncode then - report("fatal error: no return code, message: %s",errorstring or "?") - if resultname then - save_result_error(oldbase,newbase) - end - os.exit(1) - break - elseif returncode > 0 then - report("fatal error: return code: %s",returncode or "?") - if resultname then - save_result_error(oldbase,newbase) - end - os.exit(returncode) - break - else - scripts.context.multipass.copyluafile(jobname) - -- scripts.context.multipass.copytuifile(jobname) - newhash = scripts.context.multipass.hashfiles(jobname) - if scripts.context.multipass.changed(oldhash,newhash) then - oldhash = newhash - else - break - end - end - end - -- - if arrange then - local kindofrun = 3 - scripts.context.multipass.makeoptionfile(jobname,ctxdata,kindofrun,i,true) -- kindofrun, currentrun, final - report("arrange run: %s",command) - local returncode, errorstring = os.spawn(command) - if not returncode then - report("fatal error: no return code, message: %s",errorstring or "?") - os.exit(1) - elseif returncode > 0 then - report("fatal error: return code: %s",returncode or "?") - os.exit(returncode) - end - end - -- - if getargument("purge") then - scripts.context.purge_job(jobname) - elseif getargument("purgeall") then - scripts.context.purge_job(jobname,true) - end - -- - os.remove(jobname..".top") - -- + end + -- + local l_flags = { + ["interaction"] = (a_batchmode and "batchmode") or (a_nonstopmode and "nonstopmode") or nil, + ["synctex"] = a_synctex and 1 or nil, + ["no-parse-first-line"] = true, + -- ["no-mktex"] = true, + -- ["file-line-error-style"] = true, + ["fmt"] = formatfile, + ["lua"] = scriptfile, + ["jobname"] = jobname, + } + -- + if a_synctex then + report("warning: synctex is enabled") -- can add upto 5% runtime + end + -- + -- kindofrun: 1:first run, 2:successive run, 3:once, 4:last of maxruns + -- + for currentrun=1,maxnofruns do + -- + c_flags.final = false + c_flags.kindofrun = (a_once and 3) or (currentrun==1 and 1) or (currentrun==maxnofruns and 4) or 2 + c_flags.currentrun = currentrun + c_flags.noarrange = a_noarrange or a_arrange or nil + -- + local command = luatex_command(l_flags,c_flags,mainfile) + -- + report("run %s: %s",i,command) + print("") -- cleaner, else continuation on same line + local returncode, errorstring = os.spawn(command) + if not returncode then + report("fatal error: no return code, message: %s",errorstring or "?") if resultname then - if getargument("purgeresult") then - -- so, if there is no result then we don't get the old one, but - -- related files (log etc) are still there for tracing purposes - save_result_purge(oldbase,newbase) - else - save_result_keep(oldbase,newbase) - end - report("result renamed to: %s",newbase) - end - -- - if getargument("purge") then - scripts.context.purge_job(resultname) - elseif getargument("purgeall") then - scripts.context.purge_job(resultname,true) - end - -- - local pdfview = getargument("autopdf") - if pdfview then - scripts.context.openpdf(resultname or filename,pdfview) + result_save_error(oldbase,newbase) end - -- - if getargument("timing") then - report() - report("you can process (timing) statistics with:",jobname) - report() - report("context --extra=timing '%s'",jobname) - report("mtxrun --script timing --xhtml [--launch --remove] '%s'",jobname) - report() + os.exit(1) + break + elseif returncode == 0 then + multipass_copyluafile(jobname) + newhash = multipass_hashfiles(jobname) + if multipass_changed(oldhash,newhash) then + oldhash = newhash + else + break end else - if formatname then - report("error, no format found with name: %s, skipping",formatname) - else - report("error, no format found (provide formatname or interface)") + report("fatal error: return code: %s",returncode or "?") + if resultname then + result_save_error(oldbase,newbase) end + os.exit(1) -- (returncode) break end + -- + end + -- + if a_arrange then + -- + c_flags.final = true + c_flags.kindofrun = 3 + c_flags.currentrun = c_flags.currentrun + 1 + c_flags.noarrange = nil + -- + local command = luatex_command(l_flags,c_flags,mainfile) + -- + report("arrange run: %s",command) + local returncode, errorstring = os.spawn(command) + if not returncode then + report("fatal error: no return code, message: %s",errorstring or "?") + os.exit(1) + elseif returncode > 0 then + report("fatal error: return code: %s",returncode or "?") + os.exit(returncode) + end + -- + end + -- + if a_purge then + scripts.context.purge_job(jobname) + elseif a_purgeall then + scripts.context.purge_job(jobname,true) + end + -- + if resultname then + if a_purgeresult then + -- so, if there is no result then we don't get the old one, but + -- related files (log etc) are still there for tracing purposes + result_save_purge(oldbase,newbase) + else + result_save_keep(oldbase,newbase) + end + report("result renamed to: %s",newbase) + end + -- + if purge then + scripts.context.purge_job(resultname) + elseif purgeall then + scripts.context.purge_job(resultname,true) + end + -- + local pdfview = getargument("autopdf") + if pdfview then + pdf_open(resultname or jobname,pdfview) + end + -- + if a_timing then + report() + report("you can process (timing) statistics with:",jobname) + report() + report("context --extra=timing '%s'",jobname) + report("mtxrun --script timing --xhtml [--launch --remove] '%s'",jobname) + report() end - end - else - if formatname then - report("error, no format found with name: %s, aborting",formatname) else - report("error, no format found (provide formatname or interface)") + if formatname then + report("error, no format found with name: %s, skipping",formatname) + else + report("error, no format found (provide formatname or interface)") + end + break end end end + -- end -function scripts.context.pipe() +function scripts.context.pipe() -- still used? -- context --pipe -- context --pipe --purge --dummyfile=whatever.tmp local interface = getargument("interface") interface = (type(interface) == "string" and interface) or "en" - local formatname = scripts.context.interfaces[interface] or "cont-en" + local formatname = formatofinterface[interface] or "cont-en" local formatfile, scriptfile = resolvers.locateformat(formatname) if not formatfile or not scriptfile then report("warning: no format found, forcing remake (commandline driven)") @@ -1087,11 +754,16 @@ function scripts.context.pipe() report("warning: %s, forcing remake",tostring(okay)) scripts.context.make(formatname) end - local flags = { - "--interaction=scrollmode", - "--fmt=" .. quote(formatfile), - "--lua=" .. quote(scriptfile), - "--backend=pdf", + local l_flags = { + interaction = "scrollmode", + fmt = formatfile, + lua = scriptfile, + } + local c_flags = { + backend = "pdf", + final = false, + kindofrun = 3, + currentrun = 1, } local filename = getargument("dummyfile") or "" if filename == "" then @@ -1100,10 +772,9 @@ function scripts.context.pipe() else filename = file.addsuffix(filename,"tmp") io.savedata(filename,"\\relax") - scripts.context.multipass.makeoptionfile(filename,{ flags = flags },3,1,false) -- kindofrun, currentrun, final report("entering scrollmode using '%s' with optionfile, end job with \\end",filename) end - local command = format("luatex %s %s", concat(flags," "), quote(filename)) + local command = luatex_command(l_flags,c_flags,filename) os.spawn(command) if getargument("purge") then scripts.context.purge_job(filename) @@ -1123,11 +794,9 @@ end local make_mkiv_format = environment.make_format local function make_mkii_format(name,engine) - if getargument(engine) then - local command = format("mtxrun texexec.rb --make --%s %s",name,engine) - report("running command: %s",command) - os.spawn(command) - end + local command = format("mtxrun texexec.rb --make --%s %s",name,engine) + report("running command: %s",command) + os.spawn(command) end function scripts.context.generate() @@ -1140,14 +809,17 @@ function scripts.context.make(name) if not getargument("fast") then -- as in texexec scripts.context.generate() end - local list = (name and { name }) or (environment.files[1] and environment.files) or scripts.context.defaultformats + local list = (name and { name }) or (environment.files[1] and environment.files) or defaultformats + local engine = getargument("engine") or "luatex" for i=1,#list do local name = list[i] - name = scripts.context.interfaces[name] or name or "" - if name ~= "" then + name = formatofinterface[name] or name or "" + if name == "" then + -- nothing + elseif engine == "luatex" then make_mkiv_format(name) - make_mkii_format(name,"pdftex") - make_mkii_format(name,"xetex") + elseif engine == "pdftex" or engine == "xetex" then + make_mkii_format(name,engine) end end end @@ -1155,68 +827,77 @@ end function scripts.context.ctx() local ctxdata = ctxrunner.new() ctxdata.jobname = environment.files[1] - ctxrunner.manipulate(ctxdata,getargument("ctx")) + ctxrunner.checkfile(ctxdata,getargument("ctx")) + ctxrunner.checkflags(ctxdata) scripts.context.run(ctxdata) end function scripts.context.autoctx() local ctxdata = nil - local files = (filename and { filename }) or environment.files + local files = environment.files local firstfile = #files > 0 and files[1] - if firstfile and file.extname(firstfile) == "xml" then - local f = io.open(firstfile) - if f then - local chunk = f:read(512) or "" - f:close() - local ctxname = match(chunk,"<%?context%-directive%s+job%s+ctxfile%s+([^ ]-)%s*?>") - if ctxname then - ctxdata = ctxrunner.new() - ctxdata.jobname = firstfile - ctxrunner.manipulate(ctxdata,ctxname) + if firstfile then + local suffix = file.suffix(firstfile) + if suffix == "xml" then + local chunk = io.loadchunk(firstfile) -- 1024 + if chunk then + local ctxname = match(chunk,"<%?context%-directive%s+job%s+ctxfile%s+([^ ]-)%s*?>") + if ctxname then + ctxdata = ctxrunner.new() + ctxdata.jobname = firstfile + ctxrunner.checkfile(ctxdata,ctxname) + ctxrunner.checkflags(ctxdata) + end end + elseif suffix == "tex" then + -- maybe but we scan the preamble later too end end scripts.context.run(ctxdata) end -local template = [[ -\starttext - \directMPgraphic{%s}{input "%s"} -\stoptext -]] - -local loaded = false +-- no longer ok as mlib-run misses something: -function scripts.context.metapost() - local filename = environment.files[1] or "" - if not loaded then - dofile(resolvers.findfile("mlib-run.lua")) - loaded = true - commands = commands or { } - commands.writestatus = report -- no longer needed - end - local formatname = getargument("format") or "metafun" - if formatname == "" or type(formatname) == "boolean" then - formatname = "metafun" - end - if getargument("pdf") then - local basename = file.removesuffix(filename) - local resultname = getargument("result") or basename - local jobname = "mtx-context-metapost" - local tempname = file.addsuffix(jobname,"tex") - io.savedata(tempname,format(template,"metafun",filename)) - environment.files[1] = tempname - environment.setargument("result",resultname) - environment.setargument("once",true) - scripts.context.run() - scripts.context.purge_job(jobname,true) - scripts.context.purge_job(resultname,true) - elseif getargument("svg") then - metapost.directrun(formatname,filename,"svg") - else - metapost.directrun(formatname,filename,"mps") - end -end +-- local template = [[ +-- \starttext +-- \directMPgraphic{%s}{input "%s"} +-- \stoptext +-- ]] +-- +-- local loaded = false +-- +-- function scripts.context.metapost() +-- local filename = environment.files[1] or "" +-- if not loaded then +-- dofile(resolvers.findfile("mlib-run.lua")) +-- loaded = true +-- commands = commands or { } +-- commands.writestatus = report -- no longer needed +-- end +-- local formatname = getargument("format") or "metafun" +-- if formatname == "" or type(formatname) == "boolean" then +-- formatname = "metafun" +-- end +-- if getargument("pdf") then +-- local basename = file.removesuffix(filename) +-- local resultname = getargument("result") or basename +-- local jobname = "mtx-context-metapost" +-- local tempname = file.addsuffix(jobname,"tex") +-- io.savedata(tempname,format(template,"metafun",filename)) +-- environment.files[1] = tempname +-- setargument("result",resultname) +-- setargument("once",true) +-- scripts.context.run() +-- scripts.context.purge_job(jobname,true) +-- scripts.context.purge_job(resultname,true) +-- elseif getargument("svg") then +-- metapost.directrun(formatname,filename,"svg") +-- else +-- metapost.directrun(formatname,filename,"mps") +-- end +-- end + +-- -- function scripts.context.version() local name = resolvers.findfile("context.mkiv") @@ -1238,6 +919,8 @@ function scripts.context.version() end end +-- purging files + local generic_files = { "texexec.tex", "texexec.tui", "texexec.tuo", "texexec.tuc", "texexec.tua", @@ -1262,7 +945,6 @@ local persistent_runfiles = { } local special_runfiles = { ---~ "-mpgraph*", "-mprun*", "-temp-*" -- hm, wasn't this escaped? "-mpgraph", "-mprun", "-temp-" } @@ -1278,9 +960,6 @@ local function purge_file(dfile,cfile) end end -local function remove_special_files(pattern) -end - function scripts.context.purge_job(jobname,all,mkiitoo) if jobname and jobname ~= "" then jobname = file.basename(jobname) @@ -1318,7 +997,7 @@ function scripts.context.purge(all,pattern,mkiitoo) local deleted = { } for i=1,#files do local name = files[i] - local suffix = file.extname(name) + local suffix = file.suffix(name) local basename = file.basename(name) if obsolete[suffix] or temporary[suffix] or persistent[suffix] or generic[basename] then deleted[#deleted+1] = purge_file(name) @@ -1335,12 +1014,14 @@ function scripts.context.purge(all,pattern,mkiitoo) end end +-- touching files (signals regeneration of formats) + local function touch(name,pattern) local name = resolvers.findfile(name) local olddata = io.loaddata(name) if olddata then local oldversion, newversion = "", os.date("%Y.%m.%d %H:%M") - local newdata, ok = olddata:gsub(pattern,function(pre,mid,post) + local newdata, ok = gsub(olddata,pattern,function(pre,mid,post) oldversion = mid return pre .. newversion .. post end) @@ -1374,25 +1055,29 @@ function scripts.context.touch() touchfiles("mkii") touchfiles("mkiv") touchfiles("mkvi") + touchfiles("mkix") + touchfiles("mkxi") + else + report("touching needs --expert") end end -- modules local labels = { "title", "comment", "status" } -local cards = { "*.mkvi", "*.mkiv", "*.tex" } +local cards = { "*.mkvi", "*.mkiv", "*.mkxi", "*.mkix", "*.tex" } function scripts.context.modules(pattern) local list = { } local found = resolvers.findfile("context.mkiv") if not pattern or pattern == "" then -- official files in the tree - for _, card in ipairs(cards) do - resolvers.findwildcardfiles(card,list) + for i=1,#cards do + resolvers.findwildcardfiles(cards[i],list) end -- my dev path - for _, card in ipairs(cards) do - dir.glob(file.join(file.dirname(found),card),list) + for i=1,#cards do + dir.glob(file.join(file.dirname(found),cards[i]),list) end else resolvers.findwildcardfiles(pattern,list) @@ -1405,7 +1090,7 @@ function scripts.context.modules(pattern) if not done[base] then done[base] = true local suffix = file.suffix(base) - if suffix == "tex" or suffix == "mkiv" or suffix == "mkvi" then + if suffix == "tex" or suffix == "mkiv" or suffix == "mkvi" or suffix == "mkix" or suffix == "mkxi" then local prefix = match(base,"^([xmst])%-") if prefix then v = resolvers.findfile(base) -- so that files on my dev path are seen @@ -1462,30 +1147,28 @@ end function scripts.context.extra() local extra = getargument("extra") - if type(extra) == "string" then - if getargument("help") then - scripts.context.extras(extra) + if type(extra) ~= "string" then + scripts.context.extras() + elseif getargument("help") then + scripts.context.extras(extra) + else + local fullextra = extra + if not find(fullextra,"mtx%-context%-") then + fullextra = "mtx-context-" .. extra + end + local foundextra = resolvers.findfile(fullextra) + if foundextra == "" then + scripts.context.extras() + return else - local fullextra = extra - if not find(fullextra,"mtx%-context%-") then - fullextra = "mtx-context-" .. extra - end - local foundextra = resolvers.findfile(fullextra) - if foundextra == "" then - scripts.context.extras() - return - else - report("processing extra: %s", foundextra) - end - environment.setargument("purgeall",true) - local result = environment.setargument("result") or "" - if result == "" then - environment.setargument("result","context-extra") - end - scripts.context.run(nil,foundextra) + report("processing extra: %s", foundextra) end - else - scripts.context.extras() + setargument("purgeall",true) + local result = getargument("result") or "" + if result == "" then + setargument("result","context-extra") + end + scripts.context.run(nil,foundextra) end end @@ -1493,25 +1176,27 @@ end function scripts.context.trackers() environment.files = { resolvers.findfile("m-trackers.mkiv") } - scripts.context.multipass.nofruns = 1 - environment.setargument("purgeall",true) + multipass_nofruns = 1 + setargument("purgeall",true) scripts.context.run() end function scripts.context.directives() environment.files = { resolvers.findfile("m-directives.mkiv") } - scripts.context.multipass.nofruns = 1 - environment.setargument("purgeall",true) + multipass_nofruns = 1 + setargument("purgeall",true) scripts.context.run() end function scripts.context.logcategories() environment.files = { resolvers.findfile("m-logcategories.mkiv") } - scripts.context.multipass.nofruns = 1 - environment.setargument("purgeall",true) + multipass_nofruns = 1 + setargument("purgeall",true) scripts.context.run() end +-- updating (often one will use mtx-update instead) + function scripts.context.timed(action) statistics.timed(action) end @@ -1548,7 +1233,7 @@ function scripts.context.update() local function is_okay(basetree) for _, tree in next, validtrees do local pattern = gsub(tree,"%-","%%-") - if basetree:find(pattern) then + if find(basetree,pattern) then return tree end end @@ -1614,7 +1299,7 @@ function scripts.context.update() end for k in zipfile:files() do local filename = k.filename - if filename:find("/$") then + if find(filename,"/$") then lfs.mkdir(filename) else local data = zip.loaddata(zipfile,filename) @@ -1652,6 +1337,23 @@ function scripts.context.update() end end +-- getting it done + +if getargument("nostats") then + setargument("nostatistics",true) + setargument("nostat",nil) +end + +if getargument("batch") then + setargument("batchmode",true) + setargument("batch",nil) +end + +if getargument("nonstop") then + setargument("nonstopmode",true) + setargument("nonstop",nil) +end + do local silent = getargument("silent") @@ -1664,9 +1366,9 @@ do end if getargument("once") then - scripts.context.multipass.nofruns = 1 + multipass_nofruns = 1 elseif getargument("runs") then - scripts.context.multipass.nofruns = tonumber(getargument("runs")) or nil + multipass_nofruns = tonumber(getargument("runs")) or nil end if getargument("profile") then @@ -1674,7 +1376,6 @@ if getargument("profile") then end if getargument("run") then --- scripts.context.timed(scripts.context.run) scripts.context.timed(scripts.context.autoctx) elseif getargument("make") then scripts.context.timed(function() scripts.context.make() end) @@ -1682,8 +1383,8 @@ elseif getargument("generate") then scripts.context.timed(function() scripts.context.generate() end) elseif getargument("ctx") then scripts.context.timed(scripts.context.ctx) -elseif getargument("mp") or getargument("metapost") then - scripts.context.timed(scripts.context.metapost) +-- elseif getargument("mp") or getargument("metapost") then +-- scripts.context.timed(scripts.context.metapost) elseif getargument("version") then application.identify() scripts.context.version() @@ -1711,10 +1412,7 @@ elseif getargument("showdirectives") or getargument("directives") == true then scripts.context.directives() elseif getargument("showlogcategories") then scripts.context.logcategories() -elseif getargument("track") and type(getargument("track")) == "boolean" then -- for old times sake, will go - scripts.context.trackers() -elseif environment.files[1] then --- scripts.context.timed(scripts.context.run) +elseif environment.files[1] or getargument("nofile") then scripts.context.timed(scripts.context.autoctx) elseif getargument("pipe") then scripts.context.timed(scripts.context.pipe) diff --git a/scripts/context/lua/mtx-convert.lua b/scripts/context/lua/mtx-convert.lua index b4e6e010b..04ff38aad 100644 --- a/scripts/context/lua/mtx-convert.lua +++ b/scripts/context/lua/mtx-convert.lua @@ -83,7 +83,7 @@ function converters.convertpath(inputpath,outputpath) inputpath = inputpath or "." outputpath = outputpath or "." for name in lfs.dir(inputpath) do - local suffix = file.extname(name) + local suffix = file.suffix(name) if find(name,"%.$") then -- skip . and .. elseif converters[suffix] then @@ -102,7 +102,7 @@ function converters.convertpath(inputpath,outputpath) end function converters.convertfile(oldname) - local suffix = file.extname(oldname) + local suffix = file.suffix(oldname) if converters[suffix] then local newname = file.replacesuffix(oldname,"pdf") if oldname == newname then diff --git a/scripts/context/lua/mtx-epub.lua b/scripts/context/lua/mtx-epub.lua index 7d1c15774..28a37fec2 100644 --- a/scripts/context/lua/mtx-epub.lua +++ b/scripts/context/lua/mtx-epub.lua @@ -11,8 +11,8 @@ if not modules then modules = { } end modules ['mtx-epub'] = { -- really an id but has some special property). Then there is this ncx suffix -- thing. Somehow it give the impression of a reversed engineered application -- format so it will probably take a few cycles to let it become a real --- clean standard. Thanks to Adam Reviczky for helping to figure out all these --- puzzling details. +-- clean standard. Thanks to Adam Reviczky, Luigi Scarso and Andy Thomas for +-- helping to figure out all the puzzling details. -- This is preliminary code. At some point we will deal with images as well but -- first we need a decent strategy to export them. More information will be @@ -31,7 +31,7 @@ mtxrun --script epub --make mydocument local application = logs.application { name = "mtx-epub", - banner = "ConTeXt EPUB Helpers 0.11", + banner = "ConTeXt EPUB Helpers 0.12", helpinfo = helpinfo, } @@ -43,26 +43,27 @@ scripts.epub = scripts.epub or { } local mimetype = "application/epub+zip" local container = [[ - + - + ]] local package = [[ - + - My Title - en - urn:uuid:%s - MySelf + %s + %s + urn:uuid:%s + %s %s + @@ -70,13 +71,14 @@ local package = [[ + ]] -local item = [[ ]] +local item = [[ ]] local toc = [[ @@ -108,6 +110,23 @@ local toc = [[ ]] +local coverxhtml = [[ + + + + + + + cover.xhtml + + +
+ The cover image +
+ + +]] + -- We need to figure out what is permitted. Numbers only seem to give -- problems is some applications as do names with dashes. Also the -- optional toc is supposed to be there and although id's are by @@ -117,7 +136,7 @@ local toc = [[ local function dumbid(filename) -- return (string.gsub(os.uuid(),"%-%","")) -- to be tested - return file.nameonly(filename) .. "-" .. file.extname(filename) + return file.nameonly(filename) .. "-" .. file.suffix(filename) end local mimetypes = { @@ -128,6 +147,7 @@ local mimetypes = { png = "image/png", jpg = "image/jpeg", ncx = "application/x-dtbncx+xml", + gif = "image/gif", -- default = "text/plain", } @@ -194,9 +214,21 @@ function scripts.epub.make() local files = specification.files or { file.addsuffix(filename,"xhtml") } local images = specification.images or { } local root = specification.root or files[1] + local language = specification.language or "en" + local creator = specification.author or "My Self" + local title = specification.title or "My Title" + local firstpage = specification.firstpage or "" + local lastpage = specification.lastpage or "" -- identifier = gsub(identifier,"[^a-zA-z0-9]","") + if firstpage ~= "" then + images[firstpage] = firstpage + end + if lastpage ~= "" then + images[lastpage] = lastpage + end + identifier = "BookId" -- weird requirement local epubname = name @@ -204,11 +236,12 @@ function scripts.epub.make() local epubfile = file.replacesuffix(name,"epub") local epubroot = file.replacesuffix(name,"opf") local epubtoc = "toc.ncx" + local epubcover = "cover.xhtml" application.report("creating paths in tree %s",epubpath) lfs.mkdir(epubpath) lfs.mkdir(file.join(epubpath,"META-INF")) - lfs.mkdir(file.join(epubpath,"OPS")) + lfs.mkdir(file.join(epubpath,"OEBPS")) local used = { } @@ -217,13 +250,14 @@ function scripts.epub.make() local mime = mimetypes[suffix] if mime then local idmaker = idmakers[suffix] or idmakers.default - local target = file.join(epubpath,"OPS",filename) + local target = file.join(epubpath,"OEBPS",filename) file.copy(filename,target) application.report("copying %s to %s",filename,target) used[#used+1] = format(item,idmaker(filename),filename,mime) end end + copyone("cover.xhtml") copyone("toc.ncx") local function copythem(files) @@ -241,7 +275,7 @@ function scripts.epub.make() for k, v in table.sortedpairs(images) do theimages[#theimages+1] = k - if not lfs.isfile(k) and file.extname(k) == "svg" and file.extname(v) == "pdf" then + if not lfs.isfile(k) and file.suffix(k) == "svg" and file.suffix(v) == "pdf" then local command = format("inkscape --export-plain-svg=%s %s",k,v) application.report("running command '%s'\n\n",command) os.execute(command) @@ -250,33 +284,52 @@ function scripts.epub.make() copythem(theimages) - local idmaker = idmakers[file.extname(root)] or idmakers.default - - container = format(container,epubroot) - package = format(package,identifier,identifier,os.uuid(),os.date("!%Y-%m-%dT%H:%M:%SZ"),concat(used,"\n"),idmaker(root)) - toc = format(toc,identifier,"title",root) + local idmaker = idmakers[file.suffix(root)] or idmakers.default + + container = format(container, + epubroot + ) + package = format(package, + identifier, + title, + language, + identifier, + os.uuid(), + creator, + os.date("!%Y-%m-%dT%H:%M:%SZ"), + idmaker(firstpage), + concat(used,"\n"), + idmaker(root) + ) + toc = format(toc, + identifier, + title, + root + ) + coverxhtml = format(coverxhtml, + firstpage + ) io.savedata(file.join(epubpath,"mimetype"),mimetype) io.savedata(file.join(epubpath,"META-INF","container.xml"),container) - io.savedata(file.join(epubpath,"OPS",epubroot),package) - io.savedata(file.join(epubpath,"OPS",epubtoc),toc) + io.savedata(file.join(epubpath,"OEBPS",epubroot),package) + io.savedata(file.join(epubpath,"OEBPS",epubtoc),toc) + io.savedata(file.join(epubpath,"OEBPS",epubcover),coverxhtml) application.report("creating archive\n\n") - local done = false - local list = { } - lfs.chdir(epubpath) os.remove(epubfile) + local done = false + for i=1,#zippers do local zipper = zippers[i] if os.execute(format(zipper.uncompressed,epubfile,"mimetype")) then os.execute(format(zipper.compressed,epubfile,"META-INF")) - os.execute(format(zipper.compressed,epubfile,"OPS")) + os.execute(format(zipper.compressed,epubfile,"OEBPS")) done = zipper.name - else - list[#list+1] = zipper.name + break end end @@ -285,6 +338,10 @@ function scripts.epub.make() if done then application.report("epub archive made using %s: %s",done,file.join(epubpath,epubfile)) else + local list = { } + for i=1,#zippers do + list[#list+1] = zipper.name + end application.report("no epub archive made, install one of: %s",concat(list," ")) end diff --git a/scripts/context/lua/mtx-fcd.lua b/scripts/context/lua/mtx-fcd.lua new file mode 100644 index 000000000..d7e1d17a7 --- /dev/null +++ b/scripts/context/lua/mtx-fcd.lua @@ -0,0 +1,366 @@ +if not modules then modules = { } end modules ['mtx-fcd'] = { + version = 1.002, + comment = "companion to mtxrun.lua", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", + comment = "based on the ruby version from 2005", +} + +-- This is a kind of variant of the good old ncd (norton change directory) program. This +-- script uses the same indirect cmd trick as Erwin Waterlander's wcd program. +-- +-- The program is called via the stubs fcd.cmd or fcd.sh. On unix one should probably source +-- the file: ". fcd args" in order to make the chdir persistent. +-- +-- You need to create a stub with: +-- +-- mtxrun --script fcd --stub > fcd.cmd +-- mtxrun --script fcd --stub > fcd.sh +-- +-- The stub starts this script and afterwards runs the created directory change script as +-- part if the same run, so that indeed we change. + +local helpinfo = [[ +--clear clear the cache +--clear --history [entry] clear the history +--scan clear the cache and add given path(s) +--add add given path(s) +--find file given path (can be substring) +--find --nohistory file given path (can be substring) but don't use history +--stub print platform stub file +--list show roots of cached dirs +--list --history show history of chosen dirs +--help show this help + +usage: + + fcd --scan t:\ + fcd --add f:\project + fcd [--find] whatever + fcd --list +]] + +local application = logs.application { + name = "mtx-fcd", + banner = "Fast Directory Change", + helpinfo = helpinfo, +} + +local report = application.report +local writeln = print -- texio.write_nl + +local find, char, byte, lower, gsub, format = string.find, string.char, string.byte, string.lower, string.gsub, string.format + +local mswinstub = [[@echo off + +rem this is: fcd.cmd + +@echo off + +if not exist "%HOME%" goto homepath + +:home + +mtxrun --script mtx-fcd.lua %1 %2 %3 %4 %5 %6 %7 %8 %9 + +if exist "%HOME%\mtx-fcd-goto.cmd" call "%HOME%\mtx-fcd-goto.cmd" + +goto end + +:homepath + +if not exist "%HOMEDRIVE%\%HOMEPATH%" goto end + +mtxrun --script mtx-fcd.lua %1 %2 %3 %4 %5 %6 %7 %8 %9 + +if exist "%HOMEDRIVE%\%HOMEPATH%\mtx-fcd-goto.cmd" call "%HOMEDRIVE%\%HOMEPATH%\mtx-fcd-goto.cmd" + +goto end + +:end +]] + +local unixstub = [[#!/usr/bin/env sh + +# this is: fcd.sh + +# mv fcd.sh fcd +# chmod fcd 755 +# . fcd [args] + +ruby -S fcd_start.rb $1 $2 $3 $4 $5 $6 $7 $8 $9 + +if test -f "$HOME/fcd_stage.sh" ; then + . $HOME/fcd_stage.sh ; +fi; + +]] + +local gotofile +local datafile +local stubfile +local stubdata +local stubdummy +local stubchdir + +if os.platform == 'mswin' then + gotofile = 'mtx-fcd-goto.cmd' + datafile = 'mtx-fcd-data.lua' + stubfile = 'fcd.cmd' + stubdata = mswinstub + stubdummy = 'rem no dir to change to' + stubchdir = 'cd /d "%s"' +else + gotofile = 'mtx-fcd-goto.sh' + datafile = 'mtx-fcd-data.lua' + stubfile = 'fcd.sh' + stubdata = unixstub + stubdummy = '# no dir to change to' + stubchdir = '# cd "%s"' +end + +local homedir = os.env["HOME"] or "" -- no longer TMP etc + +if homedir == "" then + homedir = format("%s/%s",os.env["HOMEDRIVE"] or "",os.env["HOMEPATH"] or "") +end + +if homedir == "/" or not lfs.isdir(homedir) then + os.exit() +end + +local datafile = file.join(homedir,datafile) +local gotofile = file.join(homedir,gotofile) +local hash = nil +local found = { } +local pattern = "" +local version = modules['mtx-fcd'].version + +io.savedata(gotofile,stubdummy) + +if not lfs.isfile(gotofile) then + -- write error + os.exit() +end + +local function fcd_clear(onlyhistory,what) + if onlyhistory and hash and hash.history then + if what and what ~= "" then + hash.history[what] = nil + else + hash.history = { } + end + else + hash = { + name = "fcd cache", + comment = "generated by mtx-fcd.lua", + created = os.date(), + version = version, + paths = { }, + history = { }, + } + end +end + +local function fcd_changeto(dir) + if dir and dir ~= "" then + io.savedata(gotofile,format(stubchdir,dir)) + end +end + +local function fcd_load(forcecreate) + if lfs.isfile(datafile) then + hash = dofile(datafile) + end + if not hash or hash.version ~= version then + if forcecache then + fcd_clear() + else + writeln("empty dir cache") + fcd_clear() + os.exit() + end + end +end + +local function fcd_save() + if hash then + io.savedata(datafile,table.serialize(hash,true)) + end +end + +local function fcd_list(onlyhistory) + if hash then + writeln("") + if onlyhistory then + if next(hash.history) then + for k, v in table.sortedhash(hash.history) do + writeln(format("%s => %s",k,v)) + end + else + writeln("no history") + end + else + local paths = hash.paths + if #paths > 0 then + for i=1,#paths do + local path = paths[i] + writeln(format("%4i %s",#path[2],path[1])) + end + else + writeln("empty cache") + end + end + end +end + +local function fcd_find() + found = { } + pattern = environment.files[1] or "" + if pattern ~= "" then + pattern = string.escapedpattern(pattern) + local paths = hash.paths + for i=1,#paths do + local paths = paths[i][2] + for i=1,#paths do + local path = paths[i] + if find(path,pattern) then + found[#found+1] = path + end + end + end + end +end + +local function fcd_choose(new) + if pattern == "" then + writeln(format("staying in dir %q",(gsub(lfs.currentdir(),"\\","/")))) + return + end + if #found == 0 then + writeln(format("dir %q not found",pattern)) + return + end + local okay = #found == 1 and found[1] or (not new and hash.history[pattern]) + if okay then + writeln(format("changing to %q",okay)) + fcd_changeto(okay) + return + end + local offset = 0 + while true do + if not found[offset] then + offset = 0 + end + io.write("\n") + for i=1,26 do + local v = found[i+offset] + if v then + writeln(format("%s %3i %s",char(i+96),offset+i,v)) + else + break + end + end + offset = offset + 26 + if found[offset+1] then + io.write("\n[press enter for more or select letter]\n\n>> ") + else + io.write("\n[select letter]\n\n>> ") + end + local answer = lower(io.read() or "") + if not answer or answer == 'quit' then + break + elseif #answer > 0 then + local choice = tonumber(answer) + if not choice then + if answer >= "a" and answer <= "z" then + choice = byte(answer) - 96 + offset - 26 + end + end + local newdir = found[choice] + if newdir then + hash.history[pattern] = newdir + writeln(format("changing to %q",newdir)) + fcd_changeto(newdir) + fcd_save() + return + end + else + -- try again + end + end +end + +local function globdirs(path,dirs) + local dirs = dirs or { } + for name in lfs.dir(path) do + if not find(name,"%.$") then + local fullname = path .. "/" .. name + if lfs.isdir(fullname) and not find(fullname,"/%.") then + dirs[#dirs+1] = fullname + globdirs(fullname,dirs) + end + end + end + return dirs +end + +local function fcd_scan() + if hash then + local paths = hash.paths + for i=1,#environment.files do + local name = environment.files[i] + local name = gsub(name,"\\","/") + local name = gsub(name,"/$","") + local list = globdirs(name) + local done = false + for i=1,#paths do + if paths[i][1] == name then + paths[i][2] = list + done = true + break + end + end + if not done then + paths[#paths+1] = { name, list } + end + end + end +end + +local argument = environment.argument + +if argument("clear") then + if argument("history") then + fcd_load() + fcd_clear(true) + else + fcd_clear() + end + fcd_save() +elseif argument("scan") then + fcd_clear() + fcd_scan() + fcd_save() +elseif argument("add") then + fcd_load(true) + fcd_scan() + fcd_save() +elseif argument("stub") then + writeln(stubdata) +elseif argument("list") then + fcd_load() + if argument("history") then + fcd_list(true) + else + fcd_list() + end +elseif argument("help") then + application.help() +else -- also argument("find") + fcd_load() + fcd_find() + fcd_choose(argument("nohistory")) +end + diff --git a/scripts/context/lua/mtx-flac.lua b/scripts/context/lua/mtx-flac.lua index 37f985654..cb73a6592 100644 --- a/scripts/context/lua/mtx-flac.lua +++ b/scripts/context/lua/mtx-flac.lua @@ -6,12 +6,6 @@ if not modules then modules = { } end modules ['mtx-flac'] = { license = "see context related readme files" } --- Written with Within Temptation's "The Unforgiven" in loopmode on --- the speakers. The following code is also used for my occasional music --- repository cleanup session using the code below. - --- this can become l-flac.lua - local sub, match, byte, lower = string.sub, string.match, string.byte, string.lower local readstring, readnumber = io.readstring, io.readnumber local concat = table.concat diff --git a/scripts/context/lua/mtx-fonts.lua b/scripts/context/lua/mtx-fonts.lua index c5b458c14..31ee18ce9 100644 --- a/scripts/context/lua/mtx-fonts.lua +++ b/scripts/context/lua/mtx-fonts.lua @@ -126,7 +126,7 @@ function fonts.names.simple() end report("saving names in '%s'",name) io.savedata(name,table.serialize(simplified,true)) - local data = io.loaddata(resolvers.findfile("font-dum.lua","tex")) + local data = io.loaddata(resolvers.findfile("luatex-fonts-syn.lua","tex")) or "" local dummy = string.match(data,"fonts%.names%.version%s*=%s*([%d%.]+)") if tonumber(dummy) ~= simpleversion then report("warning: version number %s in 'font-dum' does not match database version number %s",dummy or "?",simpleversion) @@ -367,7 +367,7 @@ function scripts.fonts.save() if name and name ~= "" then local filename = resolvers.findfile(name) -- maybe also search for opentype if filename and filename ~= "" then - local suffix = string.lower(file.extname(filename)) + local suffix = string.lower(file.suffix(filename)) if suffix == 'ttf' or suffix == 'otf' or suffix == 'ttc' or suffix == "dfont" then local fontinfo = fontloader.info(filename) if fontinfo then diff --git a/scripts/context/lua/mtx-grep.lua b/scripts/context/lua/mtx-grep.lua index 3cbc1421a..98a97279d 100644 --- a/scripts/context/lua/mtx-grep.lua +++ b/scripts/context/lua/mtx-grep.lua @@ -60,7 +60,7 @@ function scripts.grep.find(pattern, files, offset) if m > 0 then nofmatches = nofmatches + m nofmatchedfiles = nofmatchedfiles + 1 - write_nl(format("%s: %s",name,m)) + write_nl(format("%5i %s",m,name)) io.flush() end else @@ -127,7 +127,7 @@ function scripts.grep.find(pattern, files, offset) if count and m > 0 then nofmatches = nofmatches + m nofmatchedfiles = nofmatchedfiles + 1 - write_nl(format("%s: %s",name,m)) + write_nl(format("%5i %s",m,name)) io.flush() end end diff --git a/scripts/context/lua/mtx-metapost.lua b/scripts/context/lua/mtx-metapost.lua index 3b9ed6ff1..44cf8205d 100644 --- a/scripts/context/lua/mtx-metapost.lua +++ b/scripts/context/lua/mtx-metapost.lua @@ -49,7 +49,7 @@ local tempname = "mptopdf-temp.tex" local function do_convert(filename) if find(filename,".%d+$") or find(filename,"%.mps$") then io.savedata(tempname,format(template,filename)) - local resultname = format("%s-%s.pdf",file.nameonly(filename),file.extname(filename)) + local resultname = format("%s-%s.pdf",file.nameonly(filename),file.suffix(filename)) local result = os.execute(format([[context --once --batch --purge --result=%s "%s"]],resultname,tempname)) return lfs.isfile(resultname) and resultname end diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua index 5654b8bc4..f37ee006a 100644 --- a/scripts/context/lua/mtx-pdf.lua +++ b/scripts/context/lua/mtx-pdf.lua @@ -6,14 +6,21 @@ if not modules then modules = { } end modules ['mtx-pdf'] = { license = "see context related readme files" } +local tonumber = tonumber +local format, gmatch = string.format, string.gmatch +local utfchar = utf.char +local concat = table.concat +local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys + local helpinfo = [[ --info show some info about the given file --metadata show metadata xml blob +--fonts show used fonts (--detail) ]] local application = logs.application { name = "mtx-pdf", - banner = "ConTeXt PDF Helpers 0.01", + banner = "ConTeXt PDF Helpers 0.10", helpinfo = helpinfo, } @@ -39,9 +46,8 @@ local function loadpdffile(filename) end end -function scripts.pdf.info() - local filename = environment.files[1] - local pdffile = loadpdffile(filename) +function scripts.pdf.info(filename) + local pdffile = loadpdffile(filename) if pdffile then local catalog = pdffile.Catalog local info = pdffile.Info @@ -73,9 +79,8 @@ function scripts.pdf.info() end end -function scripts.pdf.metadata() - local filename = environment.files[1] - local pdffile = loadpdffile(filename) +function scripts.pdf.metadata(filename) + local pdffile = loadpdffile(filename) if pdffile then local catalog = pdffile.Catalog local metadata = catalog.Metadata @@ -87,10 +92,127 @@ function scripts.pdf.metadata() end end -if environment.argument("info") then - scripts.pdf.info() +local function getfonts(pdffile) + local usedfonts = { } + for i=1,pdffile.pages.n do + local page = pdffile.pages[i] + local fontlist = page.Resources.Font + for k, v in next, lpdf.epdf.expand(fontlist) do + usedfonts[k] = lpdf.epdf.expand(v) + end + end + return usedfonts +end + +local function getunicodes(font) + local cid = font.ToUnicode + if cid then + cid = cid() + local counts = { } + -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do + -- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do + -- print(a,b) + -- end + -- end + setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end) + for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do + for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do + first = tonumber(first,16) + last = tonumber(last,16) + offset = tonumber(offset,16) + offset = offset - first + for i=first,last do + local c = i + offset + counts[c] = counts[c] + 1 + end + end + end + for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do + for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do + for n in gmatch(new,"....") do + local c = tonumber(n,16) + counts[c] = counts[c] + 1 + end + end + end + return counts + end +end + +function scripts.pdf.fonts(filename) + local pdffile = loadpdffile(filename) + if pdffile then + local usedfonts = getfonts(pdffile) + local found = { } + for k, v in table.sortedhash(usedfonts) do + local counts = getunicodes(v) + local codes = { } + local chars = { } + local freqs = { } + if counts then + codes = sortedkeys(counts) + for i=1,#codes do + local k = codes[i] + local c = utfchar(k) + chars[i] = c + freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c) + end + for i=1,#codes do + codes[i] = format("U+%05X",codes[i]) + end + end + found[k] = { + basefont = v.BaseFont or "no basefont", + encoding = v.Encoding or "no encoding", + subtype = v.Subtype or "no subtype", + unicode = v.ToUnicode and "unicode" or "no unicode", + chars = chars, + codes = codes, + freqs = freqs, + } + end + + if environment.argument("detail") then + for k, v in sortedhash(found) do + report("id : %s",k) + report("basefont : %s",v.basefont) + report("encoding : %s",v.encoding) + report("subtype : %s",v.subtype) + report("unicode : %s",v.unicode) + report("characters : %s", concat(v.chars," ")) + report("codepoints : %s", concat(v.codes," ")) + report("") + end + else + local results = { { "id", "basefont", "encoding", "subtype", "unicode", "characters" } } + for k, v in sortedhash(found) do + results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, concat(v.chars," ") } + end + utilities.formatters.formatcolumns(results) + report(results[1]) + report("") + for i=2,#results do + report(results[i]) + end + report("") + end + end +end + +-- scripts.pdf.info("e:/tmp/oeps.pdf") +-- scripts.pdf.metadata("e:/tmp/oeps.pdf") +-- scripts.pdf.fonts("e:/tmp/oeps.pdf") + +local filename = environment.files[1] or "" + +if filename == "" then + application.help() +elseif environment.argument("info") then + scripts.pdf.info(filename) elseif environment.argument("metadata") then - scripts.pdf.metadata() + scripts.pdf.metadata(filename) +elseif environment.argument("fonts") then + scripts.pdf.fonts(filename) else application.help() end diff --git a/scripts/context/lua/mtx-scite.lua b/scripts/context/lua/mtx-scite.lua index 116555e79..3369c5f3b 100644 --- a/scripts/context/lua/mtx-scite.lua +++ b/scripts/context/lua/mtx-scite.lua @@ -199,14 +199,31 @@ function scripts.scite.words() if lfs.isfile(txtname) then report("loading %s",txtname) local olddata = io.loaddata(txtname) or "" + local words = splitwords(olddata) + local min, max, n = 100, 1, 0 + for k, v in next, words do + local l = #k + if l < min then + min = l + end + if l > max then + max = l + end + n = n + 1 + end + if min > max then + min = max + end local newdata = { - words = splitwords(olddata), - -- words = olddata, + words = words, source = oldname, + min = min, + max = max, + n = n, } - report("saving %s",luaname) + report("saving %q, %s words, %s shortest, %s longest",luaname,n,min,max) io.savedata(luaname,table.serialize(newdata,true)) - report("compiling %s",lucname) + report("compiling %q",lucname) os.execute(format("luac -s -o %s %s",lucname,luaname)) else report("no data file %s",txtname) diff --git a/scripts/context/lua/mtx-server-ctx-help.lua b/scripts/context/lua/mtx-server-ctx-help.lua index a212e1369..39a73dc4e 100644 --- a/scripts/context/lua/mtx-server-ctx-help.lua +++ b/scripts/context/lua/mtx-server-ctx-help.lua @@ -15,7 +15,7 @@ dofile(resolvers.findfile("trac-lmx.lua","tex")) -- problem ... serialize parent stack -local format = string.format +local format, match, gsub, find = string.format, string.match, string.gsub, string.find local concat = table.concat local report = logs.reporter("ctx-help") @@ -282,18 +282,48 @@ document.setups.translations = document.setups.translations or { } document.setups.formats = { - open_command = { [[\%s]], [[context.%s (]] }, - close_command = { [[]], [[ )]] }, - connector = { [[]], [[, ]] }, - href_in_list = { [[%s]], [[%s]] }, - href_as_command = { [[\%s]], [[context.%s]] }, + open_command = { + tex = [[\%s]], + lua = [[context.%s (]], + }, + close_command = { + tex = [[]], + lua = [[ )]], + }, + connector = { + tex = [[]], + lua = [[, ]], + }, + href_in_list = { + tex = [[%s]], + lua = [[%s]], + }, + href_as_command = { + tex = [[\%s]], + lua = [[context.%s]], + }, + modes = { + tex = [[lua mode]], + lua = [[tex mode]], + }, + optional_single = { + tex = "[optional string %s]", + lua = "{optional string %s}", + }, + optional_list = { + tex = "[optional list %s]", + lua = "{optional table %s}" , + } , + mandate_single = { + tex = "[mandate string %s]", + lua = "{mandate string %s}", + }, + mandate_list = { + tex = "[mandate list %s]", + lua = "{mandate list %s}", + }, interface = [[%s]], source = [[%s]], - modes = { [[lua mode]], [[tex mode]] }, - optional_single = { "[optional string %s]", "{optional string %s}" }, - optional_list = { "[optional list %s]", "{optional table %s}" } , - mandate_single = { "[mandate string %s]", "{mandate string %s}" }, - mandate_list = { "[mandate list %s]", "{mandate list %s}" }, parameter = [[%s%s%s]], parameters = [[%s
]], listing = [[
%s]],
@@ -315,7 +345,7 @@ end
 local function translated(e,int)
     local attributes = e.at
     local s = attributes.type or "?"
-    local tag = s:match("^cd:(.*)$")
+    local tag = match(s,"^cd:(.*)$")
     if attributes.default == "yes" then
         return format(document.setups.formats.default,tag or "?")
     elseif tag then
@@ -329,7 +359,7 @@ document.setups.loaded = document.setups.loaded or { }
 
 document.setups.current = { }
 document.setups.showsources = true
-document.setups.mode = 1
+document.setups.mode = "tex"
 
 function document.setups.load(filename)
     filename = resolvers.findfile(filename) or ""
@@ -402,7 +432,7 @@ end
 function document.setups.show(name)
     local current = document.setups.current
     if current.root then
-        local name = name:gsub("[<>]","")
+        local name = gsub(name,"[<>]","")
         local setup = xml.first(current.root,"cd:command[@name='" .. name .. "']")
         current.used[#current.used+1] = setup
         xml.sprint(setup)
@@ -452,12 +482,12 @@ function document.setups.collect(name,int,lastmode)
             category = attributes.category or "",
         }
         if document.setups.showsources then
-            data.source = (attributes.file and formats.source:format(attributes.file,lastmode,attributes.file)) or ""
+            data.source = (attributes.file and format(formats.source,attributes.file,lastmode,attributes.file)) or ""
         else
             data.source = attributes.file or ""
         end
         local n, sequence, tags = 0, { }, { }
-        sequence[#sequence+1] = formats.open_command[lastmode]:format(document.setups.csname(command,int))
+        sequence[#sequence+1] = format(formats.open_command[lastmode],document.setups.csname(command,int))
         local arguments, tag = { }, ""
         for r, d, k in xml.elements(command,"(cd:keywords|cd:assignments)") do
             n = n + 1
@@ -470,15 +500,15 @@ function document.setups.collect(name,int,lastmode)
             end
             if attributes.optional == 'yes' then
                 if attributes.list == 'yes' then
-                    tag = formats.optional_list[lastmode]:format(n)
+                    tag = format(formats.optional_list[lastmode],n)
                 else
-                    tag = formats.optional_single[lastmode]:format(n)
+                    tag = format(formats.optional_single[lastmode],n)
                 end
             else
                 if attributes.list == 'yes' then
-                    tag = formats.mandate_list[lastmode]:format(n)
+                    tag = format(formats.mandate_list[lastmode],n)
                 else
-                    tag = formats.mandate_single[lastmode]:format(n)
+                    tag = format(formats.mandate_single[lastmode],n)
                 end
             end
             sequence[#sequence+1] = tag
@@ -506,7 +536,7 @@ function document.setups.collect(name,int,lastmode)
                         right[#right+1] = translated(d[k],int)
                     end
                 end
-                parameters[#parameters+1] = formats.parameter:format(left,"",concat(right, ", "))
+                parameters[#parameters+1] = format(formats.parameter,left,"",concat(right, ", "))
             else
                 local what = tags[n]
                 for r, d, k in xml.elements(d[k],"(cd:parameter|cd:inherit)") do
@@ -514,11 +544,11 @@ function document.setups.collect(name,int,lastmode)
                     local left, right = d[k].at.name or "?", { }
                     if tag == "inherit" then
                         local name = d[k].at.name or "?"
-                        local goto = document.setups.formats.href_as_command[lastmode]:format(name,lastmode,name)
-                        if #parameters > 0 and not parameters[#parameters]:find("
") then - parameters[#parameters+1] = formats.parameter:format("
","","") + local goto = format(document.setups.formats.href_as_command[lastmode],name,lastmode,name) + if #parameters > 0 and not find(parameters[#parameters],"
") then + parameters[#parameters+1] = format(formats.parameter,"
","","") end - parameters[#parameters+1] = formats.parameter:format(what,formats.special:format(translate("inherits",int)),goto) + parameters[#parameters+1] = format(formats.parameter,what,format(formats.special,translate("inherits",int)),goto) else for r, d, k in xml.elements(d[k],"(cd:constant|cd:resolve)") do local tag = d[k].tg @@ -534,15 +564,15 @@ function document.setups.collect(name,int,lastmode) right[#right+1] = translated(d[k],int) end end - parameters[#parameters+1] = formats.parameter:format(what,left,concat(right, ", ")) + parameters[#parameters+1] = format(formats.parameter,what,left,concat(right, ", ")) end what = "" end end - parameters[#parameters+1] = formats.parameter:format("
","","") + parameters[#parameters+1] = format(formats.parameter,"
","","") end data.parameters = parameters or { } - data.mode = formats.modes[lastmode or 1] + data.mode = formats.modes[lastmode or "tex"] return data else return nil @@ -566,7 +596,7 @@ local interfaces = { romanian = 'ro', } -local lastinterface, lastcommand, lastsource, lastmode = "en", "", "", 1 +local lastinterface, lastcommand, lastsource, lastmode = "en", "", "", "tex" local variables = { ['color-background-main-left'] = '#3F3F3F', @@ -584,78 +614,87 @@ local function doit(configuration,filename,hashed) local formats = document.setups.formats - local start = os.clock() + local start = os.clock() + local detail = hashed.queries - local detail = url.query(hashed.query or "") + if detail then - lastinterface = detail.interface or lastinterface - lastcommand = detail.command or lastcommand - lastsource = detail.source or lastsource - lastmode = tonumber(detail.mode or lastmode) or 1 + lastinterface = detail.interface or lastinterface + lastcommand = detail.command or lastcommand + lastsource = detail.source or lastsource + lastmode = detail.mode or lastmode or "tex" - if lastinterface then - report("checking interface: %s",lastinterface) - document.setups.load(format("cont-%s.xml",lastinterface)) - end + lastcommand = gsub(lastcommand,"%s*^\\*(.+)%s*","%1") - local div = document.setups.div[lastinterface] - local span = document.setups.span[lastinterface] + if lastinterface then + report("checking interface: %s",lastinterface) + document.setups.load(format("cont-%s.xml",lastinterface)) + end - local result = { content = "error" } + local div = document.setups.div [lastinterface] + local span = document.setups.span[lastinterface] - local names, refs, ints = document.setups.names(lastinterface), { }, { } - for k=1,#names do - local v = names[k] - refs[k] = formats.href_in_list[lastmode]:format(v[1],lastmode,v[2]) - end - if lastmode ~= 2 then - local sorted = table.sortedkeys(interfaces) - for k=1,#sorted do - local v = sorted[k] - ints[k] = formats.interface:format(interfaces[v],lastmode,v) + local names, refs, ints = document.setups.names(lastinterface), { }, { } + for k=1,#names do + local v = names[k] + refs[k] = format(formats.href_in_list[lastmode],v[1],lastmode,v[2]) + end + if lastmode ~= "lua" then + local sorted = table.sortedkeys(interfaces) + for k=1,#sorted do + local v = sorted[k] + ints[k] = format(formats.interface,interfaces[v],lastmode,v) + end end - end - local n = concat(refs,"
") - local i = concat(ints,"

") + local n = concat(refs,"
") + local i = concat(ints,"

") - if div then - variables.names = div:format(n) - variables.interfaces = div:format(i) - else - variables.names = n - variables.interfaces = i - end + if div then + variables.names = format(div,n) + variables.interfaces = format(div,i) + else + variables.names = n + variables.interfaces = i + end - -- first we need to add information about mkii/mkiv - - variables.maintitle = "no definition" - variables.maintext = "" - variables.extra = "" - - if document.setups.showsources and lastsource and lastsource ~= "" then - -- todo: mkii, mkiv, tex (can be different) - local data = io.loaddata(resolvers.findfile(lastsource)) - variables.maintitle = lastsource - variables.maintext = formats.listing:format(data) - lastsource = "" - elseif lastcommand and lastcommand ~= "" then - local data = document.setups.collect(lastcommand,lastinterface,lastmode) - if data then - local what, extra = { "environment", "category", "source", "mode" }, { } - for k=1,#what do - local v = what[k] - if data[v] and data[v] ~= "" then - lmx.set(v, data[v]) - extra[#extra+1] = v .. ": " .. data[v] + -- first we need to add information about mkii/mkiv + + variables.maintitle = "no definition" + variables.maintext = "" + variables.extra = "" + + if document.setups.showsources and lastsource and lastsource ~= "" then + -- todo: mkii, mkiv, tex (can be different) + local data = io.loaddata(resolvers.findfile(lastsource)) + variables.maintitle = lastsource + variables.maintext = format(formats.listing,data) + lastsource = "" + elseif lastcommand and lastcommand ~= "" then + local data = document.setups.collect(lastcommand,lastinterface,lastmode) + if data then + local what, extra = { "environment", "category", "source", "mode" }, { } + for k=1,#what do + local v = what[k] + if data[v] and data[v] ~= "" then + lmx.set(v, data[v]) + extra[#extra+1] = v .. ": " .. data[v] + end end + variables.maintitle = data.sequence + variables.maintext = format(formats.parameters,concat(data.parameters)) + variables.extra = concat(extra,"   ") + else + variables.maintext = "select command" end - variables.maintitle = data.sequence - variables.maintext = formats.parameters:format(concat(data.parameters)) - variables.extra = concat(extra,"   ") - else - variables.maintext = "select command" end + + else + + variables.maintitle = "no definition" + variables.maintext = "some error" + variables.extra = "" + end local content = lmx.convert('context-help.lmx',false,variables) diff --git a/scripts/context/lua/mtx-server.lua b/scripts/context/lua/mtx-server.lua index 068d51111..d6e8ac902 100644 --- a/scripts/context/lua/mtx-server.lua +++ b/scripts/context/lua/mtx-server.lua @@ -30,7 +30,7 @@ dofile(resolvers.findfile("l-url.lua","tex")) dofile(resolvers.findfile("luat-soc.lua","tex")) local socket = socket or require("socket") -local http = socket or require("socket.http") +local http = http or require("socket.http") -- not needed local format = string.format -- The following two lists are taken from webrick (ruby) and @@ -231,6 +231,7 @@ function handlers.lua(client,configuration,filename,suffix,iscontent,hashed) -- end if result then if type(result) == "function" then + report("running script: %s",filename) result = result(configuration,filename,hashed) -- second argument will become query end if result and type(result) == "string" then @@ -242,7 +243,7 @@ function handlers.lua(client,configuration,filename,suffix,iscontent,hashed) -- local action = handlers[suffix] or handlers.generic action(client,configuration,result.content,suffix,true) -- content elseif result.filename then - local suffix = file.extname(result.filename) or "text/html" + local suffix = file.suffix(result.filename) or "text/html" local action = handlers[suffix] or handlers.generic action(client,configuration,result.filename,suffix,false) -- filename else @@ -301,40 +302,50 @@ function scripts.webserver.run(configuration) report("scripts subpath: %s",configuration.scripts) report("context services: http://localhost:%s/mtx-server-ctx-startup.lua",configuration.port) local server = assert(socket.bind("*", configuration.port)) --- local reading = { server } - while true do -- no multiple clients + local script = configuration.script + while true do -- blocking local start = os.clock() --- local input = socket.select(reading) --- local client = input:accept() local client = server:accept() client:settimeout(configuration.timeout or 60) local request, e = client:receive() --- local request, e = client:receive("*a") -- doesn't work well (so no post) if e then errormessage(client,configuration,404) else local from = client:getpeername() report("request from: %s",tostring(from)) - local fullurl = request:match("GET (.+) HTTP/.*$") or "" -- todo: more clever / post + report("request data: %s",tostring(request)) + local fullurl = string.match(request,"GET (.+) HTTP/.*$") or "" -- todo: more clever / post if fullurl == "" then + report("no url") errormessage(client,configuration,404) else - fullurl = socket.url.unescape(fullurl) + report("requested url: %s",fullurl) + fullurl = socket.url.unescape(fullurl) -- still needed? local hashed = url.hashed(fullurl) local query = url.query(hashed.query) - local filename = hashed.path --- table.print(hashed) - if filename then + local filename = hashed.path -- hm, not query? + if script then + filename = script + report("forced script: %s",filename) + local suffix = file.suffix(filename) + local action = handlers[suffix] or handlers.generic + if action then + report("performing action: %s",filename) + action(client,configuration,filename,suffix,false,hashed) -- filename and no content + else + errormessage(client,configuration,404) + end + elseif filename then filename = socket.url.unescape(filename) report("requested action: %s",filename) - if filename:find("%.%.") then + if string.find(filename,"%.%.") then filename = nil -- invalid path end if filename == nil or filename == "" or filename == "/" then filename = configuration.index report("invalid filename, forcing: %s",filename) end - local suffix = file.extname(filename) + local suffix = file.suffix(filename) local action = handlers[suffix] or handlers.generic if action then report("performing action: %s",filename) @@ -358,6 +369,7 @@ if environment.argument("auto") then port = environment.argument("port"), root = environment.argument("root") or file.dirname(path) or ".", scripts = environment.argument("scripts") or file.dirname(path) or ".", + script = environment.argument("script"), } elseif environment.argument("start") then scripts.webserver.run { @@ -365,6 +377,7 @@ elseif environment.argument("start") then root = environment.argument("root") or ".", -- "e:/websites/www.pragma-ade.com", index = environment.argument("index"), scripts = environment.argument("scripts"), + script = environment.argument("script"), } else application.help() diff --git a/scripts/context/lua/mtx-tools.lua b/scripts/context/lua/mtx-tools.lua index 45961a639..c1aaf9e5d 100644 --- a/scripts/context/lua/mtx-tools.lua +++ b/scripts/context/lua/mtx-tools.lua @@ -102,7 +102,7 @@ end function scripts.tools.dirtoxml() - local join, removesuffix, extname, date = file.join, file.removesuffix, file.extname, os.date + local join, removesuffix, suffixonly, date = file.join, file.removesuffix, file.suffixonly, os.date local xmlns = "http://www.pragma-ade.com/rlg/xmldir.rng" local timestamp = "%Y-%m-%d %H:%M" diff --git a/scripts/context/lua/mtx-update.lua b/scripts/context/lua/mtx-update.lua index 037de8650..b5f34d615 100644 --- a/scripts/context/lua/mtx-update.lua +++ b/scripts/context/lua/mtx-update.lua @@ -421,9 +421,9 @@ function scripts.update.synchronize() if platform == 'mswin' then bin = gsub(bin,"([a-zA-Z]):/", "/cygdrive/%1/") texroot = gsub(texroot,"([a-zA-Z]):/", "/cygdrive/%1/") - command = format("%s -t %s/texmf-context/scripts/context/lua/%s.lua %s/texmf-mswin/bin/", bin, texroot, script, texroot) + command = format([[%s -t "%s/texmf-context/scripts/context/lua/%s.lua" "%s/texmf-mswin/bin/"]], bin, texroot, script, texroot) else - command = format("%s -tgo --chmod=a+x %s/texmf-context/scripts/context/lua/%s.lua %s/texmf-%s/bin/%s", bin, texroot, script, texroot, platform, script) + command = format([[%s -tgo --chmod=a+x '%s/texmf-context/scripts/context/lua/%s.lua' '%s/texmf-%s/bin/%s']], bin, texroot, script, texroot, platform, script) end report("updating %s for %s: %s", script, platform, command) scripts.update.run(command) diff --git a/scripts/context/lua/mtx-watch.lua b/scripts/context/lua/mtx-watch.lua index 36a3176c4..31ed95f7b 100644 --- a/scripts/context/lua/mtx-watch.lua +++ b/scripts/context/lua/mtx-watch.lua @@ -227,17 +227,6 @@ function scripts.watch.watch() end end local n, start = 0, time() ---~ local function wait() ---~ io.flush() ---~ if not done then ---~ n = n + 1 ---~ if n >= 10 then ---~ report("run time: %i seconds, memory usage: %0.3g MB", difftime(time(),start), (status.luastate_bytes/1024)/1000) ---~ n = 0 ---~ end ---~ os.sleep(delay) ---~ end ---~ end local wtime = 0 local function wait() io.flush() diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 108f2a8a1..e6bbbe2b5 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -8,6 +8,11 @@ if not modules then modules = { } end modules ['mtxrun'] = { license = "see context related readme files" } +-- if not lpeg then require("lpeg") end +-- if not md5 then require("md5") end +-- if not lfs then require("lfs") end +-- if not texconfig then texconfig = { } end + -- one can make a stub: -- -- #!/bin/sh @@ -150,11 +155,28 @@ function string.topattern(str,lowercase,strict) end end + +function string.valid(str,default) + return (type(str) == "string" and str ~= "" and str) or default or nil +end + -- obsolete names: string.quote = string.quoted string.unquote = string.unquoted +-- handy fallback + +string.itself = function(s) return s end + +-- also handy (see utf variant) + +local pattern = Ct(C(1)^0) + +function string.totable(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -168,7 +190,8 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs, table, string = type, next, tostring, tonumber, ipairs, table, string +local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match local getmetatable, setmetatable = getmetatable, setmetatable @@ -179,6 +202,8 @@ local getinfo = debug.getinfo -- impact on ConTeXt was not that large; the remaining ipairs already -- have been replaced. In a similar fashion we also hardly used pairs. -- +-- Hm, actually ipairs was retained, but we no longer use it anyway. +-- -- Just in case, we provide the fallbacks as discussed in Programming -- in Lua (http://www.lua.org/pil/7.3.html): @@ -238,12 +263,16 @@ function table.strip(tab) end function table.keys(t) - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return keys end local function compare(a,b) @@ -256,41 +285,49 @@ local function compare(a,b) end local function sortedkeys(tab) - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - category = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if category == 0 or category == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys @@ -315,7 +352,7 @@ end table.sortedhash = sortedhash table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list) local n = #t for i=1,#list do n = n + 1 @@ -550,12 +587,26 @@ local function do_serialize(root,name,depth,level,indexed) end -- we could check for k (index) being number (cardinal) if root and next(root) then - local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- if compact then + -- -- NOT: for k=1,#root do (we need to quit at nil) + -- for k,v in ipairs(root) do -- can we use next? + -- if not first then first = k end + -- last = last + 1 + -- end + -- end + local first, last = nil, 0 if compact then - -- NOT: for k=1,#root do (we need to quit at nil) - for k,v in ipairs(root) do -- can we use next? - if not first then first = k end - last = last + 1 + last = #root + for k=1,last do +-- if not root[k] then + if root[k] == nil then + last = k - 1 + break + end + end + if last > 0 then + first = 1 end end local sk = sortedkeys(root) @@ -1027,23 +1078,27 @@ function table.reversed(t) end end -function table.sequenced(t,sep,simple) -- hash only - local s, n = { }, 0 - for k, v in sortedhash(t) do - if simple then - if v == true then - n = n + 1 - s[n] = k - elseif v and v~= "" then +function table.sequenced(t,sep) -- hash only + if t then + local s, n = { }, 0 + for k, v in sortedhash(t) do + if simple then + if v == true then + n = n + 1 + s[n] = k + elseif v and v~= "" then + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + else n = n + 1 s[n] = k .. "=" .. tostring(v) end - else - n = n + 1 - s[n] = k .. "=" .. tostring(v) end + return concat(s, sep or " | ") + else + return "" end - return concat(s, sep or " | ") end function table.print(t,...) @@ -1124,6 +1179,8 @@ local lpeg = require("lpeg") -- tracing (only used when we encounter a problem in integration of lpeg in luatex) +-- some code will move to unicode and string + local report = texio and texio.write_nl or print -- local lpmatch = lpeg.match @@ -1160,8 +1217,8 @@ local report = texio and texio.write_nl or print -- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type -local byte, char, gmatch = string.byte, string.char, string.gmatch +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -- Beware, we predefine a bunch of patterns here and one reason for doing so -- is that we get consistent behaviour in some of the visualizers. @@ -1169,9 +1226,8 @@ local byte, char, gmatch = string.byte, string.char, string.gmatch lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match -local Ct, C, Cs, Cc = lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc -local lpegtype = lpeg.type +local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp +local lpegtype, lpegmatch = lpeg.type, lpeg.match local utfcharacters = string.utfcharacters local utfgmatch = unicode and unicode.utf8.gmatch @@ -1222,6 +1278,10 @@ patterns.utf8char = utf8char patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer + patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 @@ -1241,16 +1301,16 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") -patterns.commaspacer = P(",") * patterns.spacer^0 +patterns.commaspacer = P(",") * spacer^0 patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") @@ -1265,6 +1325,10 @@ patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble patterns.unspacer = ((patterns.spacer^1)/"")^0 +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted + patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) @@ -1275,8 +1339,17 @@ patterns.beginline = #(1-newline) -- print(string.unquoted('"test"')) -- print(string.unquoted('"test"')) -function lpeg.anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } -- why so complex? +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } +end + +lpeg.anywhere = anywhere + +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false + end end function lpeg.splitter(pattern, action) @@ -1325,7 +1398,7 @@ function string.splitup(str,separator) if not separator then separator = "," end - return match(splitters_m[separator] or splitat(separator),str) + return lpegmatch(splitters_m[separator] or splitat(separator),str) end @@ -1337,16 +1410,20 @@ function lpeg.split(separator,str) c = tsplitat(separator) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.split(str,separator) - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c + end + return lpegmatch(c,str) + else + return { str } end - return match(c,str) end local spacing = patterns.spacer^0 * newline -- sort of strip @@ -1362,7 +1439,7 @@ local linesplitter = tsplitat(newline) patterns.linesplitter = linesplitter function string.splitlines(str) - return match(linesplitter,str) + return lpegmatch(linesplitter,str) end local utflinesplitter = utfbom^-1 * tsplitat(newline) @@ -1370,7 +1447,58 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str or "") + return lpegmatch(utflinesplitter,str or "") +end + +local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) +local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) + +function string.utfsplit(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +-- inspect(string.utfsplit("a b c d")) +-- inspect(string.utfsplit("a b c d",true)) + +-- -- alternative 1: 0.77 +-- +-- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) +-- +-- function string.utflength(str) +-- return #lpegmatch(utfcharcounter,str or "") +-- end +-- +-- -- alternative 2: 1.70 +-- +-- local n = 0 +-- +-- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow +-- +-- function string.utflength(str) +-- n = 0 +-- lpegmatch(utfcharcounter,str or "") +-- return n +-- end +-- +-- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + +local n = 0 + +local utfcharcounter = utfbom^-1 * Cs ( ( + Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end +)^0 ) + +function string.utflength(str) + n = 0 + lpegmatch(utfcharcounter,str or "") + return n end @@ -1384,7 +1512,7 @@ function lpeg.checkedsplit(separator,str) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.checkedsplit(str,separator) @@ -1395,7 +1523,7 @@ function string.checkedsplit(str,separator) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end @@ -1440,11 +1568,11 @@ function lpeg.keeper(str) end function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(P(1)^0) + return (P(str) + P(true)) * Cs(anything^0) end function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * P(-1))^0) + return Cs((1 - P(str) * endofstring)^0) end -- Just for fun I looked at the used bytecode and @@ -1453,8 +1581,22 @@ end function lpeg.replacer(one,two) if type(one) == "table" then local no = #one - if no > 0 then - local p + local p + if no == 0 then + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end + return Cs((p + 1)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + return Cs(((1-one)^1 + one/two)^0) + else for i=1,no do local o = one[i] local pp = P(o[1]) / o[2] @@ -1467,11 +1609,16 @@ function lpeg.replacer(one,two) return Cs((p + 1)^0) end else + one = P(one) two = two or "" - return Cs((P(one)/two + 1)^0) + return Cs(((1-one)^1 + one/two)^0) end end +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + local splitters_f, splitters_s = { }, { } function lpeg.firstofsplit(separator) -- always return value @@ -1506,7 +1653,7 @@ local nany = utf8char/"" function lpeg.counter(pattern) pattern = Cs((P(pattern)/" " + nany)^0) return function(str) - return #match(pattern,str) + return #lpegmatch(pattern,str) end end @@ -1520,7 +1667,7 @@ if utfgmatch then end return n else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1535,9 +1682,9 @@ else p = Cs((P(what)/" " + nany)^0) cache[p] = p end - return #match(p,str) + return #lpegmatch(p,str) else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1564,7 +1711,7 @@ local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) function string.escapedpattern(str,simple) - return match(simple and s or p,str) + return lpegmatch(simple and s or p,str) end -- utf extensies @@ -1611,7 +1758,7 @@ else p = P(uc) end end - match((utf8char/f)^0,str) + lpegmatch((utf8char/f)^0,str) return p end @@ -1627,7 +1774,7 @@ function lpeg.UR(str,more) first = str last = more or first else - first, last = match(range,str) + first, last = lpegmatch(range,str) if not last then return P(str) end @@ -1654,11 +1801,15 @@ end -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order if type(list) ~= "table" then list = { list, ... } end - -- sort(list) -- longest match first + -- table.sort(list) -- longest match first local p = P(list[1]) for l=2,#list do p = p + P(list[l]) @@ -1666,10 +1817,6 @@ function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") return p end -function lpeg.is_lpeg(p) - return p and lpegtype(p) == "pattern" -end - -- For the moment here, but it might move to utilities. Beware, we need to -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we -- loop back from the end cq. prepend. @@ -1827,6 +1974,24 @@ end -- utfchar(0x205F), -- math thinspace -- } ) +-- handy from within tex: + +local lpegmatch = lpeg.match + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -1851,14 +2016,14 @@ else io.fileseparator, io.pathseparator = "/" , ":" end -function io.loaddata(filename,textmode) +function io.loaddata(filename,textmode) -- return nil if empty local f = io.open(filename,(textmode and 'r') or 'rb') if f then local data = f:read('*all') f:close() - return data - else - return nil + if #data > 0 then + return data + end end end @@ -1880,6 +2045,45 @@ function io.savedata(filename,data,joiner) end end +function io.loadlines(filename,n) -- return nil if empty + local f = io.open(filename,'r') + if f then + if n then + local lines = { } + for i=1,n do + local line = f:read("*lines") + if line then + lines[#lines+1] = line + else + break + end + end + f:close() + lines = concat(lines,"\n") + if #lines > 0 then + return lines + end + else + local line = f:read("*line") or "" + assert(f:close()) + if #line > 0 then + return line + end + end + end +end + +function io.loadchunk(filename,n) + local f = io.open(filename,'rb') + if f then + local data = f:read(n or 1024) + f:close() + if #data > 0 then + return data + end + end +end + function io.exists(filename) local f = io.open(filename) if f == nil then @@ -2107,7 +2311,7 @@ if not modules then modules = { } end modules ['l-number'] = { -- this module will be replaced when we have the bit library -local tostring = tostring +local tostring, tonumber = tostring, tonumber local format, floor, match, rep = string.format, math.floor, string.match, string.rep local concat, insert = table.concat, table.insert local lpegmatch = lpeg.match @@ -2170,11 +2374,11 @@ function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... end function number.setbit(x, p) - return hasbit(x, p) and x or x + p + return (x % (p + p) >= p) and x or x + p end function number.clearbit(x, p) - return hasbit(x, p) and x - p or x + return (x % (p + p) >= p) and x - p or x end @@ -2208,6 +2412,10 @@ function number.tobitstring(n,m) end +function number.valid(str,default) + return tonumber(str) or default or nil +end + end -- of closure @@ -2319,17 +2527,28 @@ if not modules then modules = { } end modules ['l-os'] = { -- os.name : windows | msdos | linux | macosx | solaris | .. | generic (new) -- os.platform : extended os.name with architecture +-- os.sleep() => socket.sleep() +-- math.randomseed(tonumber(string.sub(string.reverse(tostring(math.floor(socket.gettime()*10000))),1,6))) + -- maybe build io.flush in os.execute local os = os +local date, time = os.date, os.time local find, format, gsub, upper, gmatch = string.find, string.format, string.gsub, string.upper, string.gmatch local concat = table.concat -local random, ceil = math.random, math.ceil -local rawget, rawset, type, getmetatable, setmetatable, tonumber = rawget, rawset, type, getmetatable, setmetatable, tonumber +local random, ceil, randomseed = math.random, math.ceil, math.randomseed +local rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring = rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring -- The following code permits traversing the environment table, at least -- in luatex. Internally all environment names are uppercase. +-- The randomseed in Lua is not that random, although this depends on the operating system as well +-- as the binary (Luatex is normally okay). But to be sure we set the seed anyway. + +math.initialseed = tonumber(string.sub(string.reverse(tostring(ceil(socket and socket.gettime()*10000 or time()))),1,6)) + +randomseed(math.initialseed) + if not os.__getenv__ then os.__getenv__ = os.getenv @@ -2433,12 +2652,14 @@ else os.libsuffix, os.binsuffix, os.binsuffixes = 'so', '', { '' } end +local launchers = { + windows = "start %s", + macosx = "open %s", + unix = "$BROWSER %s &> /dev/null &", +} + function os.launch(str) - if os.type == "windows" then - os.execute("start " .. str) -- os.spawn ? - else - os.execute(str .. " &") -- os.spawn ? - end + os.execute(format(launchers[os.name] or launchers.unix,str)) end if not os.times then @@ -2649,7 +2870,7 @@ end local d function os.timezone(delta) - d = d or tonumber(tonumber(os.date("%H")-os.date("!%H"))) + d = d or tonumber(tonumber(date("%H")-date("!%H"))) if delta then if d > 0 then return format("+%02i:00",d) @@ -2661,6 +2882,44 @@ function os.timezone(delta) end end +local timeformat = format("%%s%s",os.timezone(true)) +local dateformat = "!%Y-%m-%d %H:%M:%S" + +function os.fulltime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return format(timeformat,date(dateformat,t)) +end + +local dateformat = "%Y-%m-%d %H:%M:%S" + +function os.localtime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return date(dateformat,t) +end + +function os.converttime(t,default) + local t = tonumber(t) + if t and t > 0 then + return date(dateformat,t) + else + return default or "-" + end +end + local memory = { } local function which(filename) @@ -2735,7 +2994,7 @@ local function nameonly(name) return (gsub(match(name,"^.+[/\\](.-)$") or name,"%.[%a%d]+$","")) end -local function extname(name,default) +local function suffixonly(name,default) return match(name,"^.+%.([^/\\]-)$") or default or "" end @@ -2744,11 +3003,16 @@ local function splitname(name) return n or name, s or "" end -file.basename = basename -file.dirname = dirname -file.nameonly = nameonly -file.extname = extname -file.suffix = extname +file.basename = basename + +file.pathpart = dirname +file.dirname = dirname + +file.nameonly = nameonly + +file.suffixonly = suffixonly +file.extname = suffixonly -- obsolete +file.suffix = suffixonly function file.removesuffix(filename) return (gsub(filename,"%.[%a%d]+$","")) @@ -2864,6 +3128,11 @@ end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated +function file.size(name) + local a = attributes(name) + return a and a.size or 0 +end + -- todo: lpeg \\ / .. does not save much local checkedsplit = string.checkedsplit @@ -3001,6 +3270,7 @@ local drive = C(R("az","AZ")) * P(":") local path = C(((1-slash)^0 * slash)^0) local suffix = period * C(P(1-period)^0 * P(-1)) local base = C((1-suffix)^0) +local rest = C(P(1)^0) drive = drive + Cc("") path = path + Cc("") @@ -3009,7 +3279,8 @@ suffix = suffix + Cc("") local pattern_a = drive * path * base * suffix local pattern_b = path * base * suffix -local pattern_c = C(drive * path) * C(base * suffix) +local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures +local pattern_d = path * rest function file.splitname(str,splitdrive) if splitdrive then @@ -3019,6 +3290,10 @@ function file.splitname(str,splitdrive) end end +function file.splitbase(str) + return lpegmatch(pattern_d,str) -- returns path, base+suffix +end + function file.nametotable(str,splitdrive) -- returns table local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) if splitdrive then @@ -3040,6 +3315,8 @@ function file.nametotable(str,splitdrive) -- returns table end end +-- print(file.splitbase("a/b/c.txt")) + -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end -- -- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" } @@ -3081,15 +3358,30 @@ if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end -function file.needs_updating(oldname,newname,threshold) -- size modification access change - local oldtime = lfs.attributes(oldname, modification) - local newtime = lfs.attributes(newname, modification) - if newtime >= oldtime then - return false - elseif oldtime - newtime < (threshold or 1) then - return false +function file.needsupdating(oldname,newname,threshold) -- size modification access change + local oldtime = lfs.attributes(oldname,"modification") + if oldtime then + local newtime = lfs.attributes(newname,"modification") + if not newtime then + return true -- no new file, so no updating needed + elseif newtime >= oldtime then + return false -- new file definitely needs updating + elseif oldtime - newtime < (threshold or 1) then + return false -- new file is probably still okay + else + return true -- new file has to be updated + end else - return true + return false -- no old file, so no updating needed + end +end + +file.needs_updating = file.needsupdating + +function file.syncmtimes(oldname,newname) + local oldtime = lfs.attributes(oldname,"modification") + if oldtime and lfs.isfile(newname) then + lfs.touch(newname,oldtime,oldtime) end end @@ -3111,7 +3403,7 @@ function file.loadchecksum(name) return nil end -function file.savechecksum(name, checksum) +function file.savechecksum(name,checksum) if not checksum then checksum = file.checksum(name) end if checksum then io.savedata(name .. ".md5",checksum) @@ -3136,7 +3428,7 @@ if not modules then modules = { } end modules ['l-url'] = { local char, gmatch, gsub, format, byte, find = string.char, string.gmatch, string.gsub, string.format, string.byte, string.find local concat = table.concat local tonumber, type = tonumber, type -local P, C, R, S, Cs, Cc, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct +local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer -- from wikipedia: @@ -3169,15 +3461,19 @@ local endofstring = P(-1) local hexdigit = R("09","AF","af") local plus = P("+") local nothing = Cc("") -local escaped = (plus / " ") + (percent * C(hexdigit * hexdigit) / tochar) +local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar +local escaped = (plus / " ") + escapedchar -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) -- we also assume that when we have a scheme, we also have an authority +-- +-- maybe we should already split the query (better for unescaping as = & can be part of a value local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2) local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0) local pathstr = Cs((escaped+(1- qmark-hash))^0) -local querystr = Cs((escaped+(1- hash))^0) +----- querystr = Cs((escaped+(1- hash))^0) +local querystr = Cs(( (1- hash))^0) local fragmentstr = Cs((escaped+(1- endofstring))^0) local scheme = schemestr * colon + nothing @@ -3192,11 +3488,20 @@ local parser = Ct(validurl) lpegpatterns.url = validurl lpegpatterns.urlsplitter = parser -local escapes = { } ; for i=0,255 do escapes[i] = format("%%%02X",i) end +local escapes = { } -local escaper = Cs((R("09","AZ","az") + S("-./_") + P(1) / escapes)^0) +setmetatable(escapes, { __index = function(t,k) + local v = format("%%%02X",byte(k)) + t[k] = v + return v +end }) -lpegpatterns.urlescaper = escaper +local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most +local unescaper = Cs((escapedchar + 1)^0) + +lpegpatterns.urlunescaped = escapedchar +lpegpatterns.urlescaper = escaper +lpegpatterns.urlunescaper = unescaper -- todo: reconsider Ct as we can as well have five return values (saves a table) -- so we can have two parsers, one with and one without @@ -3208,8 +3513,12 @@ end local isscheme = schemestr * colon * slash * slash -- this test also assumes authority local function hasscheme(str) - local scheme = lpegmatch(isscheme,str) -- at least one character - return scheme ~= "" and scheme or false + if str then + local scheme = lpegmatch(isscheme,str) -- at least one character + return scheme ~= "" and scheme or false + else + return false + end end @@ -3228,10 +3537,32 @@ local rootbased = P("/") local barswapper = replacer("|",":") local backslashswapper = replacer("\\","/") +-- queries: + +local equal = P("=") +local amp = P("&") +local key = Cs(((escapedchar+1)-equal )^0) +local value = Cs(((escapedchar+1)-amp -endofstring)^0) + +local splitquery = Cf ( Ct("") * P { "sequence", + sequence = V("pair") * (amp * V("pair"))^0, + pair = Cg(key * equal * value), +}, rawset) + +-- hasher + local function hashed(str) -- not yet ok (/test?test) + if str == "" then + return { + scheme = "invalid", + original = str, + } + end local s = split(str) - local somescheme = s[1] ~= "" - local somequery = s[4] ~= "" + local rawscheme = s[1] + local rawquery = s[4] + local somescheme = rawscheme ~= "" + local somequery = rawquery ~= "" if not somescheme and not somequery then s = { scheme = "file", @@ -3247,14 +3578,17 @@ local function hashed(str) -- not yet ok (/test?test) local authority, path, filename = s[2], s[3] if authority == "" then filename = path + elseif path == "" then + filename = "" else filename = authority .. "/" .. path end s = { - scheme = s[1], + scheme = rawscheme, authority = authority, path = path, - query = s[4], + query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = + queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped fragment = s[5], original = str, noscheme = false, @@ -3264,6 +3598,8 @@ local function hashed(str) -- not yet ok (/test?test) return s end +-- inspect(hashed("template://test")) + -- Here we assume: -- -- files: /// = relative @@ -3306,23 +3642,65 @@ function url.construct(hash) -- dodo: we need to escape ! return lpegmatch(escaper,concat(fullurl)) end -function url.filename(filename) +function url.filename(filename) -- why no lpeg here ? local t = hashed(filename) return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename end +local function escapestring(str) + return lpegmatch(escaper,str) +end + +url.escape = escapestring + +-- function url.query(str) -- separator could be an option +-- if type(str) == "string" then +-- local t = { } +-- for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do +-- t[k] = v +-- end +-- return t +-- else +-- return str +-- end +-- end + function url.query(str) if type(str) == "string" then - local t = { } - for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do - t[k] = v - end - return t + return lpegmatch(splitquery,str) or "" else return str end end +function url.toquery(data) + local td = type(data) + if td == "string" then + return #str and escape(data) or nil -- beware of double escaping + elseif td == "table" then + if next(data) then + local t = { } + for k, v in next, data do + t[#t+1] = format("%s=%s",k,escapestring(v)) + end + return concat(t,"&") + end + else + -- nil is a signal that no query + end +end + +-- /test/ | /test | test/ | test => test + +function url.barepath(path) + if not path or path == "" then + return "" + else + return (gsub(path,"^/?(.-)/?$","%1")) + end +end + + @@ -3363,6 +3741,24 @@ local isdir = lfs.isdir local isfile = lfs.isfile local currentdir = lfs.currentdir +-- in case we load outside luatex + +if not isdir then + function isdir(name) + local a = attributes(name) + return a and a.mode == "directory" + end + lfs.isdir = isdir +end + +if not isfile then + function isfile(name) + local a = attributes(name) + return a and a.mode == "file" + end + lfs.isfile = isfile +end + -- handy function dir.current() @@ -3738,28 +4134,49 @@ function boolean.tonumber(b) end function toboolean(str,tolerant) - if tolerant then - local tstr = type(str) - if tstr == "string" then - return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t" - elseif tstr == "number" then - return tonumber(str) ~= 0 - elseif tstr == "nil" then - return false - else - return str - end + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true elseif str == "true" then return true elseif str == "false" then return false + elseif not tolerant then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true else - return str + return str == "yes" or str == "on" or str == "t" end end string.toboolean = toboolean +function string.booleanstring(str) + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true + elseif str == "true" then + return true + elseif str == "false" then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true + else + return str == "yes" or str == "on" or str == "t" + end +end + function string.is_boolean(str,default) if type(str) == "string" then if str == "true" or str == "yes" or str == "on" or str == "t" then @@ -3784,57 +4201,229 @@ if not modules then modules = { } end modules ['l-unicode'] = { license = "see context related readme files" } +-- this module will be reorganized + +-- todo: utf.sub replacement (used in syst-aux) + +local concat = table.concat +local type = type +local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local utftype = patterns.utftype +local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format +local utfsplitlines = string.utfsplitlines + if not unicode then - unicode = { utf8 = { } } + unicode = { } + +end + +local unicode = unicode + +utf = utf or unicode.utf8 + +if not utf then + + utf8 = { } + unicode.utf8 = utf8 + utf = utf8 + +end + +if not utf.char then local floor, char = math.floor, string.char - function unicode.utf8.utfchar(n) + function utf.char(n) if n < 0x80 then + -- 0aaaaaaa : 0x80 return char(n) elseif n < 0x800 then + -- 110bbbaa : 0xC0 : n >> 6 + -- 10aaaaaa : 0x80 : n & 0x3F return char( 0xC0 + floor(n/0x40), 0x80 + (n % 0x40) ) elseif n < 0x10000 then + -- 1110bbbb : 0xE0 : n >> 12 + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F return char( 0xE0 + floor(n/0x1000), 0x80 + (floor(n/0x40) % 0x40), 0x80 + (n % 0x40) ) - elseif n < 0x40000 then + elseif n < 0x200000 then + -- 11110ccc : 0xF0 : n >> 18 + -- 10ccbbbb : 0x80 : (n >> 12) & 0x3F + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F + -- dddd : ccccc - 1 return char( - 0xF0 + floor(n/0x40000), - 0x80 + floor(n/0x1000), + 0xF0 + floor(n/0x40000), + 0x80 + (floor(n/0x1000) % 0x40), 0x80 + (floor(n/0x40) % 0x40), 0x80 + (n % 0x40) ) else - -- return char( - -- 0xF1 + floor(n/0x1000000), - -- 0x80 + floor(n/0x40000), - -- 0x80 + floor(n/0x1000), - -- 0x80 + (floor(n/0x40) % 0x40), - -- 0x80 + (n % 0x40) - -- ) - return "?" + return "" end end end -local unicode = unicode +if not utf.byte then -utf = utf or unicode.utf8 + local utf8byte = patterns.utf8byte -local concat = table.concat -local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub -local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format -local type = type + function utf.byte(c) + return lpegmatch(utf8byte,c) + end -local utfsplitlines = string.utfsplitlines +end + +local utfchar, utfbyte = utf.char, utf.byte + +-- As we want to get rid of the (unmaintained) utf library we implement our own +-- variants (in due time an independent module): + +function unicode.filetype(data) + return data and lpegmatch(utftype,data) or "unknown" +end + +local toentities = Cs ( + ( + patterns.utf8one + + ( + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end + )^0 +) + +patterns.toentities = toentities + +function utf.toentities(str) + return lpegmatch(toentities,str) +end + + + + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +-- actually one of them is already utf ... sort of useless this one + +-- function utf.char(n) +-- if n < 0x80 then +-- return char(n) +-- elseif n < 0x800 then +-- return char( +-- 0xC0 + floor(n/0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x10000 then +-- return char( +-- 0xE0 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x40000 then +-- return char( +-- 0xF0 + floor(n/0x40000), +-- 0x80 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- else +-- -- return char( +-- -- 0xF1 + floor(n/0x1000000), +-- -- 0x80 + floor(n/0x40000), +-- -- 0x80 + floor(n/0x1000), +-- -- 0x80 + (floor(n/0x40) % 0x40), +-- -- 0x80 + (n % 0x40) +-- -- ) +-- return "?" +-- end +-- end +-- +-- merge into: + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + + +utf.length = string.utflength +utf.split = string.utfsplit +utf.splitines = string.utfsplitlines +utf.valid = string.validutf + +if not utf.len then + utf.len = utf.length +end + +-- a replacement for simple gsubs: + +local utf8char = patterns.utf8char + +function utf.remapper(mapping) + local pattern = Cs((utf8char/mapping)^0) + return function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end, pattern +end + +-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } +-- print(remap("abcd 1234 abcd")) -- 0 EF BB BF UTF-8 -- 1 FF FE UTF-16-little-endian @@ -4027,11 +4616,22 @@ local function big(c) end end +-- function unicode.utf8_to_utf16(str,littleendian) +-- if littleendian then +-- return char(255,254) .. utfgsub(str,".",little) +-- else +-- return char(254,255) .. utfgsub(str,".",big) +-- end +-- end + +local _, l_remap = utf.remapper(little) +local _, b_remap = utf.remapper(big) + function unicode.utf8_to_utf16(str,littleendian) if littleendian then - return char(255,254) .. utfgsub(str,".",little) + return char(255,254) .. lpegmatch(l_remap,str) else - return char(254,255) .. utfgsub(str,".",big) + return char(254,255) .. lpegmatch(b_remap,str) end end @@ -4052,84 +4652,12 @@ function unicode.xstring(s) return format("0x%05X",type(s) == "number" and s or utfbyte(s)) end +-- -local lpegmatch = lpeg.match -local patterns = lpeg.patterns -local utftype = patterns.utftype - -function unicode.filetype(data) - return data and lpegmatch(utftype,data) or "unknown" -end - -local toentities = lpeg.Cs ( - ( - patterns.utf8one - + ( - patterns.utf8two - + patterns.utf8three - + patterns.utf8four - ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end - )^0 -) - -patterns.toentities = toentities - -function utf.toentities(str) - return lpegmatch(toentities,str) -end - - - - -local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs - -local one = P(1) -local two = C(1) * C(1) -local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) - --- actually one of them is already utf ... sort of useless this one - -local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) - local ab = 0xFF * byte(a) + byte(b) - local cd = 0xFF * byte(c) + byte(d) - return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) - end - + two / function(a,b) - return utfchar(byte(a)*256 + byte(b)) - end - + one - )^1 ) - + P("\255\254") * Cs( ( - four / function(b,a,d,c) - local ab = 0xFF * byte(a) + byte(b) - local cd = 0xFF * byte(c) + byte(d) - return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) - end - + two / function(b,a) - return utfchar(byte(a)*256 + byte(b)) - end - + one - )^1 ) - -function string.toutf(s) - return lpegmatch(pattern,s) or s -- todo: utf32 -end - -local validatedutf = Cs ( - ( - patterns.utf8one - + patterns.utf8two - + patterns.utf8three - + patterns.utf8four - + P(1) / "�" - )^0 -) - -patterns.validatedutf = validatedutf +local pattern = Ct(C(patterns.utf8char)^0) -function string.validutf(str) - return lpegmatch(validatedutf,str) +function utf.totable(str) + return lpegmatch(pattern,str) end @@ -4189,10 +4717,11 @@ utilities = utilities or {} utilities.tables = utilities.tables or { } local tables = utilities.tables -local format, gmatch, rep = string.format, string.gmatch, string.rep +local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring -local type, next, rawset, tonumber = type, next, rawset, tonumber +local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring +local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs function tables.definetable(target) -- defines undefined tables local composed, t, n = nil, { }, 0 @@ -4346,6 +4875,121 @@ function tables.encapsulate(core,capsule,protect) end end +local function serialize(t,r,outer) -- no mixes + r[#r+1] = "{" + local n = #t + if n > 0 then + for i=1,n do + local v = t[i] + local tv = type(v) + if tv == "string" then + r[#r+1] = format("%q,",v) + elseif tv == "number" then + r[#r+1] = format("%s,",v) + elseif tv == "table" then + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("%s,",tostring(v)) + end + end + else + for k, v in next, t do + local tv = type(v) + if tv == "string" then + r[#r+1] = format("[%q]=%q,",k,v) + elseif tv == "number" then + r[#r+1] = format("[%q]=%s,",k,v) + elseif tv == "table" then + r[#r+1] = format("[%q]=",k) + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("[%q]=%s,",k,tostring(v)) + end + end + end + if outer then + r[#r+1] = "}" + else + r[#r+1] = "}," + end + return r +end + +function table.fastserialize(t,prefix) + return concat(serialize(t,{ prefix or "return" },true)) +end + +function table.deserialize(str) + if not str or str == "" then + return + end + local code = loadstring(str) + if not code then + return + end + code = code() + if not code then + return + end + return code +end + +-- inspect(table.fastserialize { a = 1, b = { 4, { 5, 6 } }, c = { d = 7, e = 'f"g\nh' } }) + +function table.load(filename) + if filename then + local t = io.loaddata(filename) + if t and t ~= "" then + t = loadstring(t) + if type(t) == "function" then + t = t() + if type(t) == "table" then + return t + end + end + end + end +end + +local function slowdrop(t) + local r = { } + local l = { } + for i=1,#t do + local ti = t[i] + local j = 0 + for k, v in next, ti do + j = j + 1 + l[j] = format("%s=%q",k,v) + end + r[i] = format(" {%s},\n",concat(l)) + end + return format("return {\n%s}",concat(r)) +end + +local function fastdrop(t) + local r = { "return {\n" } + for i=1,#t do + local ti = t[i] + r[#r+1] = " {" + for k, v in next, ti do + r[#r+1] = format("%s=%q",k,v) + end + r[#r+1] = "},\n" + end + r[#r+1] = "}" + return concat(r) +end + +function table.drop(t,slow) + if #t == 0 then + return "return { }" + elseif slow == true then + return slowdrop(t) -- less memory + else + return fastdrop(t) -- some 15% faster + end +end + end -- of closure @@ -4520,11 +5164,10 @@ local concat = table.concat local type, next = type, next utilities = utilities or {} -utilities.merger = utilities.merger or { } -- maybe mergers +local merger = utilities.merger or { } +utilities.merger = merger utilities.report = logs and logs.reporter("system") or print -local merger = utilities.merger - merger.strip_comment = true local m_begin_merge = "begin library merge" @@ -4570,9 +5213,11 @@ end local function self_save(name, data) if data ~= "" then if merger.strip_comment then - -- saves some 20K local n = #data + -- saves some 20K .. scite comments data = gsub(data,"%-%-~[^\n\r]*[\r\n]","") + -- saves some 20K .. ldx comments + data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","") utilities.report("merge: %s bytes of comment stripped, %s bytes of code left",n-#data,#data) end io.savedata(name,data) @@ -4653,36 +5298,208 @@ if not modules then modules = { } end modules ['util-lua'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + comment = "the strip code is written by Peter Cawley", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -utilities = utilities or {} -utilities.lua = utilities.lua or { } -utilities.report = logs and logs.reporter("system") or print +local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format +local loadstring, loadfile, type = loadstring, loadfile, type + +utilities = utilities or {} +utilities.lua = utilities.lua or { } +local luautilities = utilities.lua + +utilities.report = logs and logs.reporter("system") or print -- can be overloaded later + +local tracestripping = false +local forcestupidcompile = true -- use internal bytecode compiler +luautilities.stripcode = true -- support stripping when asked for +luautilities.alwaysstripcode = false -- saves 1 meg on 7 meg compressed format file (2012.08.12) +luautilities.nofstrippedchunks = 0 +luautilities.nofstrippedbytes = 0 + +-- The next function was posted by Peter Cawley on the lua list and strips line +-- number information etc. from the bytecode data blob. We only apply this trick +-- when we store data tables. Stripping makes the compressed format file about +-- 1MB smaller (and uncompressed we save at least 6MB). +-- +-- You can consider this feature an experiment, so it might disappear. There is +-- no noticeable gain in runtime although the memory footprint should be somewhat +-- smaller (and the file system has a bit less to deal with). +-- +-- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ... + +local function strip_code_pc(dump,name) + local before = #dump + local version, format, endian, int, size, ins, num = byte(dump,5,11) + local subint + if endian == 1 then + subint = function(dump, i, l) + local val = 0 + for n = l, 1, -1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + else + subint = function(dump, i, l) + local val = 0 + for n = 1, l, 1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + end + local strip_function + strip_function = function(dump) + local count, offset = subint(dump, 1, size) + local stripped, dirty = rep("\0", size), offset + count + offset = offset + count + int * 2 + 4 + offset = offset + int + subint(dump, offset, int) * ins + count, offset = subint(dump, offset, int) + for n = 1, count do + local t + t, offset = subint(dump, offset, 1) + if t == 1 then + offset = offset + 1 + elseif t == 4 then + offset = offset + size + subint(dump, offset, size) + elseif t == 3 then + offset = offset + num + end + end + count, offset = subint(dump, offset, int) + stripped = stripped .. sub(dump,dirty, offset - 1) + for n = 1, count do + local proto, off = strip_function(sub(dump,offset, -1)) + stripped, offset = stripped .. proto, offset + off - 1 + end + offset = offset + subint(dump, offset, int) * int + int + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + int * 2 + end + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + end + stripped = stripped .. rep("\0", int * 3) + return stripped, offset + end + dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) + local after = #dump + local delta = before-after + if tracestripping then + utilities.report("stripped bytecode: %s, before %s, after %s, delta %s",name or "unknown",before,after,delta) + end + luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 + luautilities.nofstrippedbytes = luautilities.nofstrippedbytes + delta + return dump, delta +end + +-- ... end of borrowed code. + +local function strippedbytecode(code,forcestrip,name) + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + return strip_code_pc(code,name) + else + return code, 0 + end +end + +luautilities.stripbytecode = strip_code_pc +luautilities.strippedbytecode = strippedbytecode + +local function fatalerror(name) + utilities.report(format("fatal error in %q",name or "unknown")) +end + +-- quite subtle ... doing this wrong incidentally can give more bytes + + +function luautilities.loadedluacode(fullname,forcestrip,name) + -- quite subtle ... doing this wrong incidentally can give more bytes + name = name or fullname + local code = loadfile(fullname) + if code then + code() + end + if forcestrip and luautilities.stripcode then + if type(forcestrip) == "function" then + forcestrip = forcestrip(fullname) + end + if forcestrip then + local code, n = strip_code_pc(dump(code,name)) + return loadstring(code), n + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end +end + +function luautilities.strippedloadstring(code,forcestrip,name) -- not executed + local n = 0 + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + code = loadstring(code) + if not code then + fatalerror(name) + end + code, n = strip_code_pc(dump(code),name) + end + return loadstring(code), n +end -local function stupidcompile(luafile,lucfile) - local data = io.loaddata(luafile) - if data and data ~= "" then - data = string.dump(data) - if data and data ~= "" then - io.savedata(lucfile,data) +local function stupidcompile(luafile,lucfile,strip) + local code = io.loaddata(luafile) + local n = 0 + if code and code ~= "" then + code = loadstring(code) + if not code then + fatalerror() + end + code = dump(code) + if strip then + code, n = strippedbytecode(code,true,luafile) -- last one is reported + end + if code and code ~= "" then + io.savedata(lucfile,code) end end + return n end -function utilities.lua.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true +local luac_normal = "texluac -o %q %q" +local luac_strip = "texluac -s -o %q %q" + +function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true utilities.report("lua: compiling %s into %s",luafile,lucfile) os.remove(lucfile) - local command = "-o " .. string.quoted(lucfile) .. " " .. string.quoted(luafile) + local done = false if strip ~= false then - command = "-s " .. command + strip = true + end + if forcestupidcompile then + fallback = true + elseif strip then + done = os.spawn(format(luac_strip, lucfile,luafile)) == 0 + else + done = os.spawn(format(luac_normal,lucfile,luafile)) == 0 end - local done = os.spawn("texluac " .. command) == 0 -- or os.spawn("luac " .. command) == 0 if not done and fallback then - utilities.report("lua: dumping %s into %s (unstripped)",luafile,lucfile) - stupidcompile(luafile,lucfile) -- maybe use the stripper we have elsewhere - cleanup = false -- better see how worse it is + local n = stupidcompile(luafile,lucfile,strip) + if n > 0 then + utilities.report("lua: %s dumped into %s (%i bytes stripped)",luafile,lucfile,n) + else + utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile) + end + cleanup = false -- better see how bad it is end if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then utilities.report("lua: removing %s",luafile) @@ -4697,7 +5514,6 @@ end - end -- of closure do -- create closure to overcome 200 locals limit @@ -4710,8 +5526,10 @@ if not modules then modules = { } end modules ['util-prs'] = { license = "see context related readme files" } -local P, R, V, C, Ct, Cs, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg -local lpegmatch = lpeg.match +local lpeg, table, string = lpeg, table, string + +local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc +local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find local tostring, type, next = tostring, type, next @@ -4723,29 +5541,39 @@ parsers.patterns = parsers.patterns or { } local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +-- we share some patterns + +local space = P(' ') +local equal = P("=") +local comma = P(",") +local lbrace = P("{") +local rbrace = P("}") +local period = S(".") +local punctuation = S(".,:;") +local spacer = patterns.spacer +local whitespace = patterns.whitespace +local newline = patterns.newline +local anything = patterns.anything +local endofstring = patterns.endofstring + -- we could use a Cf Cg construct local escape, left, right = P("\\"), P('{'), P('}') -lpeg.patterns.balanced = P { +patterns.balanced = P { [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, [2] = left * V(1) * right } -local space = P(' ') -local equal = P("=") -local comma = P(",") -local lbrace = P("{") -local rbrace = P("}") local nobrace = 1 - (lbrace+rbrace) local nested = P { lbrace * (nobrace + V(1))^0 * rbrace } local spaces = space^0 local argument = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/"")) -local content = (1-P(-1))^0 +local content = (1-endofstring)^0 -lpeg.patterns.nested = nested -- no capture -lpeg.patterns.argument = argument -- argument after e.g. = -lpeg.patterns.content = content -- rest after e.g = +patterns.nested = nested -- no capture +patterns.argument = argument -- argument after e.g. = +patterns.content = content -- rest after e.g = local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) @@ -4764,10 +5592,6 @@ local function set(key,value) hash[key] = value end -local function set(key,value) - hash[key] = value -end - local pattern_a_s = (pattern_a/set)^1 local pattern_b_s = (pattern_b/set)^1 local pattern_c_s = (pattern_c/set)^1 @@ -4818,7 +5642,7 @@ end local separator = comma * space^0 local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) -local pattern = Ct(value*(separator*value)^0) +local pattern = spaces * Ct(value*(separator*value)^0) -- "aap, {noot}, mies" : outer {} removes, leading spaces ignored @@ -4942,6 +5766,37 @@ function parsers.listitem(str) return gmatch(str,"[^, ]+") end +-- +local digit = R("09") + +local pattern = Cs { "start", + start = V("one") + V("two") + V("three"), + rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0, + thousand = digit * digit * digit, + one = digit * V("rest"), + two = digit * digit * V("rest"), + three = V("thousand") * V("rest"), +} + +patterns.splitthousands = pattern -- maybe better in the parsers namespace ? + +function parsers.splitthousands(str) + return lpegmatch(pattern,str) or str +end + +-- print(parsers.splitthousands("11111111111.11")) + +local optionalwhitespace = whitespace^0 + +patterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1) +patterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1) +patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1) + +-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n " +-- inspect(lpegmatch(patterns.paragraphs,str)) +-- inspect(lpegmatch(patterns.sentences,str)) +-- inspect(lpegmatch(patterns.words,str)) + end -- of closure @@ -5043,7 +5898,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['util.deb'] = { +if not modules then modules = { } end modules ['util-deb'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -5155,6 +6010,7 @@ function inspect(i) -- global function else print(tostring(i)) end + return i -- so that we can inline the inspect end -- from the lua book: @@ -5194,7 +6050,7 @@ if not modules then modules = { } end modules ['trac-inf'] = { local format, lower = string.format, string.lower local clock = os.gettimeofday or os.clock -- should go in environment -local write_nl = texio.write_nl +local write_nl = texio and texio.write_nl or print statistics = statistics or { } local statistics = statistics @@ -5277,7 +6133,7 @@ statistics.elapsedtime = elapsedtime statistics.elapsedindeed = elapsedindeed statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module function statistics.register(tag,fnc) if statistics.enable and type(fnc) == "function" then @@ -5387,6 +6243,8 @@ if not modules then modules = { } end modules ['trac-set'] = { -- might become u license = "see context related readme files" } +-- maybe this should be util-set.lua + local type, next, tostring = type, next, tostring local concat = table.concat local format, find, lower, gsub, escapedpattern = string.format, string.find, string.lower, string.gsub, string.escapedpattern @@ -5586,7 +6444,7 @@ function setters.show(t) local value, default, modules = functions.value, functions.default, #functions value = value == nil and "unset" or tostring(value) default = default == nil and "unset" or tostring(default) - t.report("%-30s modules: %2i default: %6s value: %6s",name,modules,default,value) + t.report("%-50s modules: %2i default: %6s value: %6s",name,modules,default,value) end end t.report() @@ -5678,17 +6536,31 @@ end) -- experiment -local flags = environment and environment.engineflags +if environment then -if flags then - if trackers and flags.trackers then - setters.initialize("flags","trackers", settings_to_hash(flags.trackers)) - -- t_enable(flags.trackers) - end - if directives and flags.directives then - setters.initialize("flags","directives", settings_to_hash(flags.directives)) - -- d_enable(flags.directives) + -- The engineflags are known earlier than environment.arguments but maybe we + -- need to handle them both as the later are parsed differently. The c: prefix + -- is used by mtx-context to isolate the flags from those that concern luatex. + + local engineflags = environment.engineflags + + if engineflags then + if trackers then + local list = engineflags["c:trackers"] or engineflags["trackers"] + if type(list) == "string" then + setters.initialize("flags","trackers",settings_to_hash(list)) + -- t_enable(list) + end + end + if directives then + local list = engineflags["c:directives"] or engineflags["directives"] + if type(list) == "string" then + setters.initialize("flags","directives", settings_to_hash(list)) + -- d_enable(list) + end + end end + end -- here @@ -5741,10 +6613,7 @@ local next, type = next, type local setmetatableindex = table.setmetatableindex ---[[ldx-- -

This is a prelude to a more extensive logging module. We no longer -provide based logging a sparsing is relatively easy anyway.

---ldx]]-- + logs = logs or { } local logs = logs @@ -6560,7 +7429,8 @@ local allocate, mark = utilities.storage.allocate, utilities.storage.mark local format, sub, match, gsub, find = string.format, string.sub, string.match, string.gsub, string.find local unquoted, quoted = string.unquoted, string.quoted -local concat = table.concat +local concat, insert, remove = table.concat, table.insert, table.remove +local loadedluacode = utilities.lua.loadedluacode -- precautions @@ -6578,8 +7448,28 @@ if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaon for k=3,#arg do arg[k-2] = arg[k] end - arg[#arg] = nil -- last - arg[#arg] = nil -- pre-last + remove(arg) -- last + remove(arg) -- pre-last +end + +-- This is an ugly hack but it permits symlinking a script (say 'context') to 'mtxrun' as in: +-- +-- ln -s /opt/minimals/tex/texmf-linux-64/bin/mtxrun context +-- +-- The special mapping hack is needed because 'luatools' boils down to 'mtxrun --script base' +-- but it's unlikely that there will be more of this + +do + + local originalzero = file.basename(arg[0]) + local specialmapping = { luatools == "base" } + + if originalzero ~= "mtxrun" and originalzero ~= "mtxrun.lua" then + arg[0] = specialmapping[originalzero] or originalzero + insert(arg,0,"--script") + insert(arg,0,"mtxrun") + end + end -- environment @@ -6619,6 +7509,8 @@ local mt = { setmetatable(environment,mt) +-- context specific arguments (in order not to confuse the engine) + function environment.initializearguments(arg) local arguments, files = { }, { } environment.arguments, environment.files, environment.sortedflags = arguments, files, nil @@ -6627,10 +7519,12 @@ function environment.initializearguments(arg) if index > 0 then local flag, value = match(argument,"^%-+(.-)=(.-)$") if flag then + flag = gsub(flag,"^c:","") arguments[flag] = unquoted(value or "") else flag = match(argument,"^%-+(.+)") if flag then + flag = gsub(flag,"^c:","") arguments[flag] = true else files[#files+1] = argument @@ -6650,7 +7544,7 @@ end -- tricky: too many hits when we support partials unless we add -- a registration of arguments so from now on we have 'partial' -function environment.argument(name,partial) +function environment.getargument(name,partial) local arguments, sortedflags = environment.arguments, environment.sortedflags if arguments[name] then return arguments[name] @@ -6673,6 +7567,8 @@ function environment.argument(name,partial) return nil end +environment.argument = environment.getargument + function environment.splitarguments(separator) -- rather special, cut-off before separator local done, before, after = false, { }, { } local originalarguments = environment.originalarguments @@ -6758,7 +7654,7 @@ function environment.texfile(filename) return resolvers.findfile(filename,'tex') end -function environment.luafile(filename) +function environment.luafile(filename) -- needs checking local resolved = resolvers.findfile(filename,'tex') or "" if resolved ~= "" then return resolved @@ -6770,13 +7666,16 @@ function environment.luafile(filename) return resolvers.findfile(filename,'luatexlibs') or "" end -environment.loadedluacode = loadfile -- can be overloaded +local function checkstrip(filename) + local modu = modules[file.nameonly(filename)] + return modu and modu.dataonly +end function environment.luafilechunk(filename,silent) -- used for loading lua bytecode in the format filename = file.replacesuffix(filename, "lua") local fullname = environment.luafile(filename) if fullname and fullname ~= "" then - local data = environment.loadedluacode(fullname) + local data = loadedluacode(fullname,checkstrip,filename) if trace_locating then report_lua("loading file %s%s", fullname, not data and " failed" or "") elseif not silent then @@ -6874,21 +7773,7 @@ local trace_entities = false trackers.register("xml.entities", function(v) trac local report_xml = logs and logs.reporter("xml","core") or function(...) print(format(...)) end ---[[ldx-- -

The parser used here is inspired by the variant discussed in the lua book, but -handles comment and processing instructions, has a different structure, provides -parent access; a first version used different trickery but was less optimized to we -went this route. First we had a find based parser, now we have an based one. -The find based parser can be found in l-xml-edu.lua along with other older code.

-

Beware, the interface may change. For instance at, ns, tg, dt may get more -verbose names. Once the code is stable we will also remove some tracing and -optimize the code.

- -

I might even decide to reimplement the parser using the latest trickery -as the current variant was written when showed up and it's easier now to -build tables in one go.

---ldx]]-- xml = xml or { } local xml = xml @@ -6898,46 +7783,25 @@ local utf = unicode.utf8 local concat, remove, insert = table.concat, table.remove, table.insert local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub -local utfchar, utffind, utfgsub = utf.char, utf.find, utf.gsub +local utfchar = utf.char local lpegmatch = lpeg.match local P, S, R, C, V, C, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.C, lpeg.Cs ---[[ldx-- -

First a hack to enable namespace resolving. A namespace is characterized by -a . The following function associates a namespace prefix with a -pattern. We use , which in this case is more than twice as fast as a -find based solution where we loop over an array of patterns. Less code and -much cleaner.

---ldx]]-- + xml.xmlns = xml.xmlns or { } local check = P(false) local parse = check ---[[ldx-- -

The next function associates a namespace prefix with an . This -normally happens independent of parsing.

- -xml.registerns("mml","mathml") - ---ldx]]-- function xml.registerns(namespace, pattern) -- pattern can be an lpeg check = check + C(P(lower(pattern))) / namespace parse = P { P(check) + 1 * V(1) } end ---[[ldx-- -

The next function also registers a namespace, but this time we map a -given namespace prefix onto a registered one, using the given -. This used for attributes like xmlns:m.

- -xml.checkns("m","http://www.w3.org/mathml") - ---ldx]]-- function xml.checkns(namespace,url) local ns = lpegmatch(parse,lower(url)) @@ -6946,66 +7810,15 @@ function xml.checkns(namespace,url) end end ---[[ldx-- -

Next we provide a way to turn an into a registered -namespace. This used for the xmlns attribute.

- -resolvedns = xml.resolvens("http://www.w3.org/mathml") - - -This returns mml. ---ldx]]-- function xml.resolvens(url) return lpegmatch(parse,lower(url)) or "" end ---[[ldx-- -

A namespace in an element can be remapped onto the registered -one efficiently by using the xml.xmlns table.

---ldx]]-- - ---[[ldx-- -

This version uses . We follow the same approach as before, stack and top and -such. This version is about twice as fast which is mostly due to the fact that -we don't have to prepare the stream for cdata, doctype etc etc. This variant is -is dedicated to Luigi Scarso, who challenged me with 40 megabyte files that -took 12.5 seconds to load (1.5 for file io and the rest for tree building). With -the implementation we got that down to less 7.3 seconds. Loading the 14 - interface definition files (2.6 meg) went down from 1.05 seconds to 0.55.

- -

Next comes the parser. The rather messy doctype definition comes in many -disguises so it is no surprice that later on have to dedicate quite some - code to it.

- - - - - - - - - - -

The code may look a bit complex but this is mostly due to the fact that we -resolve namespaces and attach metatables. There is only one public function:

- - -local x = xml.convert(somestring) - - -

An optional second boolean argument tells this function not to create a root -element.

- -

Valid entities are:

- - - - - - ---ldx]]-- + + + -- not just one big nested table capture (lpeg overflow) @@ -7220,15 +8033,7 @@ local privates_n = { -- keeps track of defined ones } -local function escaped(s) - if s == "" then - return "" - else -- if utffind(s,privates_u) then - return (utfgsub(s,".",privates_u)) - -- else - -- return s - end -end +local escaped = utf.remapper(privates_u) local function unescaped(s) local p = privates_n[s] @@ -7243,13 +8048,7 @@ local function unescaped(s) return p end -local function unprivatized(s,resolve) - if s == "" then - return "" - else - return (utfgsub(s,".",privates_p)) - end -end +local unprivatized = utf.remapper(privates_p) xml.privatetoken = unescaped xml.unprivatized = unprivatized @@ -7589,7 +8388,12 @@ local function _xmlconvert_(data, settings) else errorhandler = errorhandler or xml.errorhandler if errorhandler then - xml.errorhandler(format("load error: %s",errorstr)) + local currentresource = settings.currentresource + if currentresource and currentresource ~= "" then + xml.errorhandler(format("load error in [%s]: %s",currentresource,errorstr)) + else + xml.errorhandler(format("load error: %s",errorstr)) + end end end else @@ -7634,7 +8438,7 @@ function xmlconvert(data,settings) if ok then return result else - return _xmlconvert_("") + return _xmlconvert_("",settings) end end @@ -7655,10 +8459,7 @@ function xml.inheritedconvert(data,xmldata) -- xmldata is parent return xc end ---[[ldx-- -

Packaging data in an xml like table is done with the following -function. Maybe it will go away (when not used).

---ldx]]-- + function xml.is_valid(root) return root and root.dt and root.dt[1] and type(root.dt[1]) == "table" and not root.dt[1].er @@ -7677,11 +8478,7 @@ end xml.errorhandler = report_xml ---[[ldx-- -

We cannot load an from a filehandle so we need to load -the whole file first. The function accepts a string representing -a filename or a file handle.

---ldx]]-- + function xml.load(filename,settings) local data = "" @@ -7695,13 +8492,17 @@ function xml.load(filename,settings) elseif filename then -- filehandle data = filename:read("*all") end - return xmlconvert(data,settings) + if settings then + settings.currentresource = filename + local result = xmlconvert(data,settings) + settings.currentresource = nil + return result + else + return xmlconvert(data,{ currentresource = filename }) + end end ---[[ldx-- -

When we inject new elements, we need to convert strings to -valid trees, which is what the next function does.

---ldx]]-- + local no_root = { no_root = true } @@ -7714,11 +8515,7 @@ function xml.toxml(data) end end ---[[ldx-- -

For copying a tree we use a dedicated function instead of the -generic table copier. Since we know what we're dealing with we -can speed up things a bit. The second argument is not to be used!

---ldx]]-- + local function copy(old,tables) if old then @@ -7742,13 +8539,7 @@ end xml.copy = copy ---[[ldx-- -

In serializing the tree or parts of the tree is a major -actitivity which is why the following function is pretty optimized resulting -in a few more lines of code than needed. The variant that uses the formatting -function for all components is about 15% slower than the concatinating -alternative.

---ldx]]-- + -- todo: add when not present @@ -7761,15 +8552,12 @@ function xml.checkbom(root) -- can be made faster return end end - insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } ) + insert(dt, 1, { special = true, ns = "", tg = "@pi@", dt = { "xml version='1.0' standalone='yes'" } } ) insert(dt, 2, "\n" ) end end ---[[ldx-- -

At the cost of some 25% runtime overhead you can first convert the tree to a string -and then handle the lot.

---ldx]]-- + -- new experimental reorganized serialize @@ -7962,21 +8750,7 @@ newhandlers { } } ---[[ldx-- -

How you deal with saving data depends on your preferences. For a 40 MB database -file the timing on a 2.3 Core Duo are as follows (time in seconds):

- - -1.3 : load data from file to string -6.1 : convert string into tree -5.3 : saving in file using xmlsave -6.8 : converting to string using xml.tostring -3.6 : saving converted string in file - -

Beware, these were timing with the old routine but measurements will not be that -much different I guess.

---ldx]]-- -- maybe this will move to lxml-xml @@ -8054,10 +8828,7 @@ xml.newhandlers = newhandlers xml.serialize = serialize xml.tostring = xmltostring ---[[ldx-- -

The next function operated on the content only and needs a handle function -that accepts a string.

---ldx]]-- + local function xmlstring(e,handle) if not handle or (e.special and e.tg ~= "@rt@") then @@ -8076,9 +8847,7 @@ end xml.string = xmlstring ---[[ldx-- -

A few helpers:

---ldx]]-- + function xml.settings(e) @@ -8122,11 +8891,7 @@ function xml.name(root) end end ---[[ldx-- -

The next helper erases an element but keeps the table as it is, -and since empty strings are not serialized (effectively) it does -not harm. Copying the table would take more time. Usage:

---ldx]]-- + function xml.erase(dt,k) if dt then @@ -8138,13 +8903,7 @@ function xml.erase(dt,k) end end ---[[ldx-- -

The next helper assigns a tree (or string). Usage:

- -dt[k] = xml.assign(root) or xml.assign(dt,k,root) - ---ldx]]-- function xml.assign(dt,k,root) if dt and k then @@ -8157,20 +8916,14 @@ end -- the following helpers may move ---[[ldx-- -

The next helper assigns a tree (or string). Usage:

- -xml.tocdata(e) -xml.tocdata(e,"error") - ---ldx]]-- + function xml.tocdata(e,wrapper) -- a few more in the aux module local whatever = type(e) == "table" and xmltostring(e.dt) or e or "" if wrapper then whatever = format("<%s>%s",wrapper,whatever,wrapper) end - local t = { special = true, ns = "", tg = "@cd@", at = {}, rn = "", dt = { whatever }, __p__ = e } + local t = { special = true, ns = "", tg = "@cd@", at = { }, rn = "", dt = { whatever }, __p__ = e } setmetatable(t,getmetatable(e)) e.dt = { t } end @@ -8225,7 +8978,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['lxml-pth'] = { +if not modules then modules = { } end modules ['lxml-lpt'] = { version = 1.001, comment = "this module is the basis for the lxml-* ones", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -8246,28 +8999,9 @@ local setmetatableindex = table.setmetatableindex -- beware, this is not xpath ... e.g. position is different (currently) and -- we have reverse-sibling as reversed preceding sibling ---[[ldx-- -

This module can be used stand alone but also inside in -which case it hooks into the tracker code. Therefore we provide a few -functions that set the tracers. Here we overload a previously defined -function.

-

If I can get in the mood I will make a variant that is XSLT compliant -but I wonder if it makes sense.

---ldx]]-- - ---[[ldx-- -

Expecially the lpath code is experimental, we will support some of xpath, but -only things that make sense for us; as compensation it is possible to hook in your -own functions. Apart from preprocessing content for we also need -this module for process management, like handling and -files.

- - -a/b/c /*/c -a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n) -a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n) - ---ldx]]-- + + + local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end @@ -8275,11 +9009,7 @@ local trace_lprofile = false if trackers then trackers.register("xml.profile", local report_lpath = logs.reporter("xml","lpath") ---[[ldx-- -

We've now arrived at an interesting part: accessing the tree using a subset -of and since we're not compatible we call it . We -will explain more about its usage in other documents.

---ldx]]-- + local xml = xml @@ -8731,14 +9461,23 @@ local lp_builtin = P ( -- for the moment we keep namespaces with attributes local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])") -local lp_fastpos_p = ((P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end) -local lp_fastpos_n = ((P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end) + +-- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end +-- lp_fastpos_n = (P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end + +lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0" +lp_fastpos_n = P("-") * R("09")^1 * P(-1) / "(%0<0 and (#list+%0==l))" + local lp_fastpos = lp_fastpos_n + lp_fastpos_p + local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false") -local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling - return t .. "(" -end +-- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling +-- return t .. "(" +-- end + +-- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0(" +local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0" local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling if expressions[t] then @@ -9254,9 +9993,7 @@ end xml.applylpath = applylpath -- takes a table as first argment, which is what xml.filter will do ---[[ldx-- -

This is the main filter function. It returns whatever is asked for.

---ldx]]-- + function xml.filter(root,pattern) -- no longer funny attribute handling here return applylpath(root,pattern) @@ -9354,12 +10091,12 @@ xml.selection = selection -- new method, simple handle -- generic function finalizer (independant namespace) -local function dofunction(collected,fnc) +local function dofunction(collected,fnc,...) if collected then local f = functions[fnc] if f then for c=1,#collected do - f(collected[c]) + f(collected[c],...) end else report_lpath("unknown function '%s'",fnc) @@ -9460,21 +10197,7 @@ expressions.tag = function(e,n) -- only tg end end ---[[ldx-- -

Often using an iterators looks nicer in the code than passing handler -functions. The book describes how to use coroutines for that -purpose (). This permits -code like:

- -for r, d, k in xml.elements(xml.load('text.xml'),"title") do - print(d[k]) -- old method -end -for e in xml.collected(xml.load('text.xml'),"title") do - print(e) -- new one -end - ---ldx]]-- local wrap, yield = coroutine.wrap, coroutine.yield @@ -9515,6 +10238,32 @@ function xml.inspect(collection,pattern) end end +-- texy (see xfdf): + +local function split(e) + local dt = e.dt + if dt then + for i=1,#dt do + local dti = dt[i] + if type(dti) == "string" then + dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1") + dti = gsub(dti,"[\n\r]+","\n\n") + dt[i] = dti + else + split(dti) + end + end + end + return e +end + +function xml.finalizers.paragraphs(c) + for i=1,#c do + split(c[i]) + end + return c +end + end -- of closure @@ -9539,13 +10288,7 @@ local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, l lpegpatterns.xml = lpegpatterns.xml or { } local xmlpatterns = lpegpatterns.xml ---[[ldx-- -

The following helper functions best belong to the lxml-ini -module. Some are here because we need then in the mk -document and other manuals, others came up when playing with -this module. Since this module is also used in we've -put them here instead of loading mode modules there then needed.

---ldx]]-- + local function xmlgsub(t,old,new) -- will be replaced local dt = t.dt @@ -9731,9 +10474,7 @@ function xml.processattributes(root,pattern,handle) return collected end ---[[ldx-- -

The following functions collect elements and texts.

---ldx]]-- + -- are these still needed -> lxml-cmp.lua @@ -9772,9 +10513,7 @@ function xml.collect_tags(root, pattern, nonamespace) end end ---[[ldx-- -

We've now arrived at the functions that manipulate the tree.

---ldx]]-- + local no_root = { no_root = true } @@ -10160,9 +10899,7 @@ function xml.remapname(root, pattern, newtg, newns, newrn) end end ---[[ldx-- -

Helper (for q2p).

---ldx]]-- + function xml.cdatatotext(e) local dt = e.dt @@ -10259,9 +10996,7 @@ end -- xml.addentitiesdoctype(x,"hexadecimal") -- print(x) ---[[ldx-- -

Here are a few synonyms.

---ldx]]-- + xml.all = xml.each xml.insert = xml.insertafter @@ -10852,7 +11587,7 @@ local gsub, find, gmatch, char = string.gsub, string.find, string.gmatch, string local concat = table.concat local next, type = next, type -local filedirname, filebasename, fileextname, filejoin = file.dirname, file.basename, file.extname, file.join +local filedirname, filebasename, filejoin = file.dirname, file.basename, file.join local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) local trace_detail = false trackers.register("resolvers.details", function(v) trace_detail = v end) @@ -11202,12 +11937,14 @@ local function splitpathexpr(str, newlist, validate) -- I couldn't resist lpeggi for s in gmatch(str,"[^,]+") do s = validate(s) if s then - n = n + 1 ; t[n] = s + n = n + 1 + t[n] = s end end else for s in gmatch(str,"[^,]+") do - n = n + 1 ; t[n] = s + n = n + 1 + t[n] = s end end if trace_expansions then @@ -11221,7 +11958,7 @@ end -- We could make the previous one public. local function validate(s) - s = collapsepath(s) -- already keeps the // + s = collapsepath(s) -- already keeps the trailing / and // return s ~= "" and not find(s,"^!*unset/*$") and s end @@ -11559,7 +12296,7 @@ local resolvers = resolvers local allocate = utilities.storage.allocate local setmetatableindex = table.setmetatableindex -local fileextname = file.extname +local suffixonly = file.suffixonly local formats = allocate() local suffixes = allocate() @@ -11814,7 +12551,7 @@ function resolvers.formatofvariable(str) end function resolvers.formatofsuffix(str) -- of file - return suffixmap[fileextname(str)] or 'tex' -- so many map onto tex (like mkiv, cld etc) + return suffixmap[suffixonly(str)] or 'tex' -- so many map onto tex (like mkiv, cld etc) end function resolvers.variableofformat(str) @@ -11826,7 +12563,7 @@ function resolvers.variableofformatorsuffix(str) if v then return v end - v = suffixmap[fileextname(str)] + v = suffixmap[suffixonly(str)] if v then return formats[v] end @@ -11847,21 +12584,7 @@ if not modules then modules = { } end modules ['data-tmp'] = { license = "see context related readme files" } ---[[ldx-- -

This module deals with caching data. It sets up the paths and -implements loaders and savers for tables. Best is to set the -following variable. When not set, the usual paths will be -checked. Personally I prefer the (users) temporary path.

- - -TEXMFCACHE=$TMP;$TEMP;$TMPDIR;$TEMPDIR;$HOME;$TEXMFVAR;$VARTEXMF;. - -

Currently we do no locking when we write files. This is no real -problem because most caching involves fonts and the chance of them -being written at the same time is small. We also need to extend -luatools with a recache feature.

---ldx]]-- local format, lower, gsub, concat = string.format, string.lower, string.gsub, table.concat local serialize, serializetofile = table.serialize, table.tofile @@ -12396,11 +13119,12 @@ local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns local filedirname = file.dirname local filebasename = file.basename -local fileextname = file.extname +local suffixonly = file.suffixonly local filejoin = file.join local collapsepath = file.collapsepath local joinpath = file.joinpath local allocate = utilities.storage.allocate +local settings_to_array = utilities.parsers.settings_to_array local setmetatableindex = table.setmetatableindex local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) @@ -12424,7 +13148,7 @@ resolvers.cacheversion = '1.0.1' resolvers.configbanner = '' resolvers.homedir = environment.homedir resolvers.criticalvars = allocate { "SELFAUTOLOC", "SELFAUTODIR", "SELFAUTOPARENT", "TEXMFCNF", "TEXMF", "TEXOS" } -resolvers.luacnfname = 'texmfcnf.lua' +resolvers.luacnfname = "texmfcnf.lua" resolvers.luacnfstate = "unknown" -- The web2c tex binaries as well as kpse have built in paths for the configuration @@ -12696,7 +13420,7 @@ end local function identify_configuration_files() local specification = instance.specification if #specification == 0 then - local cnfspec = getenv('TEXMFCNF') + local cnfspec = getenv("TEXMFCNF") if cnfspec == "" then cnfspec = resolvers.luacnfspec resolvers.luacnfstate = "default" @@ -12784,7 +13508,7 @@ local function load_configuration_files() -- we push the value into the main environment (osenv) so -- that it takes precedence over the default one and therefore -- also over following definitions - resolvers.setenv('TEXMFCNF',cnfspec) -- resolves prefixes + resolvers.setenv("TEXMFCNF",cnfspec) -- resolves prefixes -- we now identify and load the specified configuration files instance.specification = { } identify_configuration_files() @@ -12832,10 +13556,11 @@ end local function locate_file_databases() -- todo: cache:// and tree:// (runtime) - local texmfpaths = resolvers.expandedpathlist('TEXMF') + local texmfpaths = resolvers.expandedpathlist("TEXMF") if #texmfpaths > 0 then for i=1,#texmfpaths do local path = collapsepath(texmfpaths[i]) + path = gsub(path,"/+$","") -- in case $HOME expands to something with a trailing / local stripped = lpegmatch(inhibitstripper,path) -- the !! thing if stripped ~= "" then local runtime = stripped == path @@ -12964,9 +13689,9 @@ function resolvers.prependhash(type,name,cache) end function resolvers.extendtexmfvariable(specification) -- crap, we could better prepend the hash - local t = resolvers.splitpath(getenv('TEXMF')) + local t = resolvers.splitpath(getenv("TEXMF")) -- okay? insert(t,1,specification) - local newspec = concat(t,";") + local newspec = concat(t,",") -- not ; if instance.environment["TEXMF"] then instance.environment["TEXMF"] = newspec elseif instance.variables["TEXMF"] then @@ -13041,14 +13766,19 @@ function resolvers.resetextrapath() end function resolvers.registerextrapath(paths,subpaths) + paths = settings_to_array(paths) + subpaths = settings_to_array(subpaths) local ep = instance.extra_paths or { } local oldn = #ep local newn = oldn - if paths and paths ~= "" then - if subpaths and subpaths ~= "" then - for p in gmatch(paths,"[^,]+") do - -- we gmatch each step again, not that fast, but used seldom - for s in gmatch(subpaths,"[^,]+") do + local nofpaths = #paths + local nofsubpaths = #subpaths + if nofpaths > 0 then + if nofsubpaths > 0 then + for i=1,nofpaths do + local p = paths[i] + for j=1,nofsubpaths do + local s = subpaths[j] local ps = p .. "/" .. s if not done[ps] then newn = newn + 1 @@ -13058,7 +13788,8 @@ function resolvers.registerextrapath(paths,subpaths) end end else - for p in gmatch(paths,"[^,]+") do + for i=1,nofpaths do + local p = paths[i] if not done[p] then newn = newn + 1 ep[newn] = resolvers.cleanpath(p) @@ -13066,10 +13797,10 @@ function resolvers.registerextrapath(paths,subpaths) end end end - elseif subpaths and subpaths ~= "" then + elseif nofsubpaths > 0 then for i=1,oldn do - -- we gmatch each step again, not that fast, but used seldom - for s in gmatch(subpaths,"[^,]+") do + for j=1,nofsubpaths do + local s = subpaths[j] local ps = ep[i] .. "/" .. s if not done[ps] then newn = newn + 1 @@ -13147,18 +13878,21 @@ function resolvers.expandedpathlist(str) return { } elseif instance.savelists then str = lpegmatch(dollarstripper,str) - if not instance.lists[str] then -- cached - local lst = made_list(instance,resolvers.splitpath(resolvers.expansion(str))) - instance.lists[str] = expandedpathfromlist(lst) - end - return instance.lists[str] + local lists = instance.lists + local lst = lists[str] + if not lst then + local l = made_list(instance,resolvers.splitpath(resolvers.expansion(str))) + lst = expandedpathfromlist(l) + lists[str] = lst + end + return lst else local lst = resolvers.splitpath(resolvers.expansion(str)) return made_list(instance,expandedpathfromlist(lst)) end end -function resolvers.expandedpathlistfromvariable(str) -- brrr +function resolvers.expandedpathlistfromvariable(str) -- brrr / could also have cleaner ^!! /$ // str = lpegmatch(dollarstripper,str) local tmp = resolvers.variableofformatorsuffix(str) return resolvers.expandedpathlist(tmp ~= "" and tmp or str) @@ -13315,7 +14049,7 @@ local preparetreepattern = Cs((P(".")/"%%." + P("-")/"%%-" + P(1))^0 * Cc("$")) local collect_instance_files local function find_analyze(filename,askedformat,allresults) - local filetype, wantedfiles, ext = '', { }, fileextname(filename) + local filetype, wantedfiles, ext = '', { }, suffixonly(filename) -- too tricky as filename can be bla.1.2.3: -- -- if not suffixmap[ext] then @@ -13393,7 +14127,7 @@ local function find_qualified(filename,allresults) -- this one will be split too if trace_detail then report_resolving("locating qualified file '%s'", filename) end - local forcedname, suffix = "", fileextname(filename) + local forcedname, suffix = "", suffixonly(filename) if suffix == "" then -- why local format_suffixes = askedformat == "" and resolvers.defaultsuffixes or suffixes[askedformat] if format_suffixes then @@ -14063,6 +14797,8 @@ local gsub = string.gsub local cleanpath, findgivenfile, expansion = resolvers.cleanpath, resolvers.findgivenfile, resolvers.expansion local getenv = resolvers.getenv -- we can probably also use resolvers.expansion local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match +local joinpath, basename, dirname = file.join, file.basename, file.dirname +local getmetatable, rawset, type = getmetatable, rawset, type -- getenv = function(...) return resolvers.getenv(...) end -- needs checking (definitions changes later on) @@ -14104,28 +14840,43 @@ end prefixes.filename = function(str) local fullname = findgivenfile(str) or "" - return cleanpath(file.basename((fullname ~= "" and fullname) or str)) -- no cleanpath needed here + return cleanpath(basename((fullname ~= "" and fullname) or str)) -- no cleanpath needed here end prefixes.pathname = function(str) local fullname = findgivenfile(str) or "" - return cleanpath(file.dirname((fullname ~= "" and fullname) or str)) + return cleanpath(dirname((fullname ~= "" and fullname) or str)) end prefixes.selfautoloc = function(str) - return cleanpath(file.join(getenv('SELFAUTOLOC'),str)) + return cleanpath(joinpath(getenv('SELFAUTOLOC'),str)) end prefixes.selfautoparent = function(str) - return cleanpath(file.join(getenv('SELFAUTOPARENT'),str)) + return cleanpath(joinpath(getenv('SELFAUTOPARENT'),str)) end prefixes.selfautodir = function(str) - return cleanpath(file.join(getenv('SELFAUTODIR'),str)) + return cleanpath(joinpath(getenv('SELFAUTODIR'),str)) end prefixes.home = function(str) - return cleanpath(file.join(getenv('HOME'),str)) + return cleanpath(joinpath(getenv('HOME'),str)) +end + +local function toppath() + local pathname = dirname(inputstack[#inputstack] or "") + if pathname == "" then + return "." + else + return pathname + end +end + +resolvers.toppath = toppath + +prefixes.toppath = function(str) + return cleanpath(joinpath(toppath(),str)) end prefixes.env = prefixes.environment @@ -14161,6 +14912,8 @@ function resolvers.resetresolve(str) resolved, abstract = { }, { } end +-- todo: use an lpeg (see data-lua for !! / stripper) + local function resolve(str) -- use schemes, this one is then for the commandline only if type(str) == "table" then local t = { } @@ -14186,7 +14939,7 @@ end resolvers.resolve = resolve resolvers.unresolve = unresolve -if os.uname then +if type(os.uname) == "function" then for k, v in next, os.uname() do if not prefixes[k] then @@ -14198,11 +14951,17 @@ end if os.type == "unix" then + -- We need to distringuish between a prefix and something else : so we + -- have a special repath variant for linux. Also, when a new prefix is + -- defined, we need to remake the matcher. + local pattern local function makepattern(t,k,v) + if t then + rawset(t,k,v) + end local colon = P(":") - local p for k, v in table.sortedpairs(prefixes) do if p then p = P(k) + p @@ -14211,9 +14970,6 @@ if os.type == "unix" then end end pattern = Cs((p * colon + colon/";" + P(1))^0) - if t then - t[k] = v - end end makepattern() @@ -14424,18 +15180,7 @@ local trace_cache = false trackers.register("resolvers.cache", functi local trace_containers = false trackers.register("resolvers.containers", function(v) trace_containers = v end) local trace_storage = false trackers.register("resolvers.storage", function(v) trace_storage = v end) ---[[ldx-- -

Once we found ourselves defining similar cache constructs -several times, containers were introduced. Containers are used -to collect tables in memory and reuse them when possible based -on (unique) hashes (to be provided by the calling function).

- -

Caching to disk is disabled by default. Version numbers are -stored in the saved table which makes it possible to change the -table structures without bothering about the disk cache.

-

Examples of usage can be found in the font related code.

---ldx]]-- containers = containers or { } local containers = containers @@ -14670,11 +15415,7 @@ local trace_locating = false trackers.register("resolvers.locating", function(v local report_zip = logs.reporter("resolvers","zip") --- zip:///oeps.zip?name=bla/bla.tex --- zip:///oeps.zip?tree=tex/texmf-local --- zip:///texmf.zip?tree=/tex/texmf --- zip:///texmf.zip?tree=/tex/texmf-local --- zip:///texmf-mine.zip?tree=/tex/texmf-projects + local resolvers = resolvers @@ -14999,7 +15740,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['data-crl'] = { +if not modules then modules = { } end modules ['data-sch'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -15007,60 +15748,199 @@ if not modules then modules = { } end modules ['data-crl'] = { license = "see context related readme files" } --- this one is replaced by data-sch.lua -- +local loadstring = loadstring +local gsub, concat, format = string.gsub, table.concat, string.format +local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders -local gsub = string.gsub +local trace_schemes = false trackers.register("resolvers.schemes",function(v) trace_schemes = v end) +local report_schemes = logs.reporter("resolvers","schemes") -local resolvers = resolvers +local http = require("socket.http") +local ltn12 = require("ltn12") -local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders +local resolvers = resolvers +local schemes = resolvers.schemes or { } +resolvers.schemes = schemes + +local cleaners = { } +schemes.cleaners = cleaners + +local threshold = 24 * 60 * 60 + +directives.register("schemes.threshold", function(v) threshold = tonumber(v) or threshold end) + +function cleaners.none(specification) + return specification.original +end + +function cleaners.strip(specification) + return (gsub(specification.original,"[^%a%d%.]+","-")) -- so we keep periods +end + +function cleaners.md5(specification) + return file.addsuffix(md5.hex(specification.original),file.suffix(specification.path)) +end + +local cleaner = cleaners.strip + +directives.register("schemes.cleanmethod", function(v) cleaner = cleaners[v] or cleaners.strip end) + +function resolvers.schemes.cleanname(specification) + local hash = cleaner(specification) + if trace_schemes then + report_schemes("hashing %s to %s",specification.original,hash) + end + return hash +end -resolvers.curl = resolvers.curl or { } -local curl = resolvers.curl +local cached, loaded, reused, thresholds, handlers = { }, { }, { }, { }, { } -local cached = { } +local function runcurl(name,cachename) -- we use sockets instead or the curl library when possible + local command = "curl --silent --create-dirs --output " .. cachename .. " " .. name + os.spawn(command) +end -local function runcurl(specification) +local function fetch(specification) local original = specification.original - -- local scheme = specification.scheme - local cleanname = gsub(original,"[^%a%d%.]+","-") - local cachename = caches.setfirstwritablefile(cleanname,"curl") + local scheme = specification.scheme + local cleanname = schemes.cleanname(specification) + local cachename = caches.setfirstwritablefile(cleanname,"schemes") if not cached[original] then - if not io.exists(cachename) then + statistics.starttiming(schemes) + if not io.exists(cachename) or (os.difftime(os.time(),lfs.attributes(cachename).modification) > (thresholds[protocol] or threshold)) then cached[original] = cachename - local command = "curl --silent --create-dirs --output " .. cachename .. " " .. original - os.spawn(command) + local handler = handlers[scheme] + if handler then + if trace_schemes then + report_schemes("fetching '%s', protocol '%s', method 'built-in'",original,scheme) + end + logs.flush() + handler(specification,cachename) + else + if trace_schemes then + report_schemes("fetching '%s', protocol '%s', method 'curl'",original,scheme) + end + logs.flush() + runcurl(original,cachename) + end end if io.exists(cachename) then cached[original] = cachename + if trace_schemes then + report_schemes("using cached '%s', protocol '%s', cachename '%s'",original,scheme,cachename) + end else cached[original] = "" + if trace_schemes then + report_schemes("using missing '%s', protocol '%s'",original,scheme) + end end + loaded[scheme] = loaded[scheme] + 1 + statistics.stoptiming(schemes) + else + if trace_schemes then + report_schemes("reusing '%s', protocol '%s'",original,scheme) + end + reused[scheme] = reused[scheme] + 1 end return cached[original] end --- old code: we could be cleaner using specification (see schemes) - local function finder(specification,filetype) - return resolvers.methodhandler("finders",runcurl(specification),filetype) + return resolvers.methodhandler("finders",fetch(specification),filetype) end local opener = openers.file local loader = loaders.file -local function install(scheme) - finders[scheme] = finder - openers[scheme] = opener - loaders[scheme] = loader +local function install(scheme,handler,newthreshold) + handlers [scheme] = handler + loaded [scheme] = 0 + reused [scheme] = 0 + finders [scheme] = finder + openers [scheme] = opener + loaders [scheme] = loader + thresholds[scheme] = newthreshold or threshold end -resolvers.curl.install = install +schemes.install = install + +local function http_handler(specification,cachename) + local tempname = cachename .. ".tmp" + local f = io.open(tempname,"wb") + local status, message = http.request { + url = specification.original, + sink = ltn12.sink.file(f) + } + if not status then + os.remove(tempname) + else + os.remove(cachename) + os.rename(tempname,cachename) + end + return cachename +end -install('http') -install('https') +install('http',http_handler) +install('https') -- see pod install('ftp') +statistics.register("scheme handling time", function() + local l, r, nl, nr = { }, { }, 0, 0 + for k, v in table.sortedhash(loaded) do + if v > 0 then + nl = nl + 1 + l[nl] = k .. ":" .. v + end + end + for k, v in table.sortedhash(reused) do + if v > 0 then + nr = nr + 1 + r[nr] = k .. ":" .. v + end + end + local n = nl + nr + if n > 0 then + l = nl > 0 and concat(l) or "none" + r = nr > 0 and concat(r) or "none" + return format("%s seconds, %s processed, threshold %s seconds, loaded: %s, reused: %s", + statistics.elapsedtime(schemes), n, threshold, l, r) + else + return nil + end +end) + +-- We provide a few more helpers: + +----- http = require("socket.http") +local httprequest = http.request +local toquery = url.toquery + +-- local function httprequest(url) +-- return os.resultof(format("curl --silent %q", url)) +-- end + +local function fetchstring(url,data) + local q = data and toquery(data) + if q then + url = url .. "?" .. q + end + local reply = httprequest(url) + return reply -- just one argument +end + +schemes.fetchstring = fetchstring + +function schemes.fetchtable(url,data) + local reply = fetchstring(url,data) + if reply then + local s = loadstring("return " .. reply) + if s then + return s() + end + end +end + end -- of closure @@ -15074,170 +15954,199 @@ if not modules then modules = { } end modules ['data-lua'] = { license = "see context related readme files" } --- some loading stuff ... we might move this one to slot 2 depending --- on the developments (the loaders must not trigger kpse); we could --- of course use a more extensive lib path spec +-- We overload the regular loader. We do so because we operate mostly in +-- tds and use our own loader code. Alternatively we could use a more +-- extensive definition of package.path and package.cpath but even then +-- we're not done. Also, we now have better tracing. +-- +-- -- local mylib = require("libtest") +-- -- local mysql = require("luasql.mysql") -local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) +local concat = table.concat + +local trace_libraries = false + +trackers.register("resolvers.libraries", function(v) trace_libraries = v end) +trackers.register("resolvers.locating", function(v) trace_libraries = v end) local report_libraries = logs.reporter("resolvers","libraries") local gsub, insert = string.gsub, table.insert +local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match local unpack = unpack or table.unpack +local is_readable = file.is_readable local resolvers, package = resolvers, package -local libformats = { 'luatexlibs', 'tex', 'texmfscripts', 'othertextfiles' } -- 'luainputs' -local clibformats = { 'lib' } - -local _path_, libpaths, _cpath_, clibpaths - -function package.libpaths() - if not _path_ or package.path ~= _path_ then - _path_ = package.path - libpaths = file.splitpath(_path_,";") +local libsuffixes = { 'tex', 'lua' } +local clibsuffixes = { 'lib' } +local libformats = { 'TEXINPUTS', 'LUAINPUTS' } +local clibformats = { 'CLUAINPUTS' } + +local libpaths = nil +local clibpaths = nil +local libhash = { } +local clibhash = { } +local libextras = { } +local clibextras = { } + +local pattern = Cs(P("!")^0 / "" * (P("/") * P(-1) / "/" + P("/")^1 / "/" + 1)^0) + +local function cleanpath(path) --hm, don't we have a helper for this? + return resolvers.resolve(lpegmatch(pattern,path)) +end + +local function getlibpaths() + if not libpaths then + libpaths = { } + for i=1,#libformats do + local paths = resolvers.expandedpathlistfromvariable(libformats[i]) + for i=1,#paths do + local path = cleanpath(paths[i]) + if not libhash[path] then + libpaths[#libpaths+1] = path + libhash[path] = true + end + end + end end return libpaths end -function package.clibpaths() - if not _cpath_ or package.cpath ~= _cpath_ then - _cpath_ = package.cpath - clibpaths = file.splitpath(_cpath_,";") +local function getclibpaths() + if not clibpaths then + clibpaths = { } + for i=1,#clibformats do + local paths = resolvers.expandedpathlistfromvariable(clibformats[i]) + for i=1,#paths do + local path = cleanpath(paths[i]) + if not clibhash[path] then + clibpaths[#clibpaths+1] = path + clibhash[path] = true + end + end + end end return clibpaths end -local function thepath(...) - local t = { ... } t[#t+1] = "?.lua" - local path = file.join(unpack(t)) - if trace_locating then - report_libraries("! appending '%s' to 'package.path'",path) +package.libpaths = getlibpaths +package.clibpaths = getclibpaths + +function package.extralibpath(...) + local paths = { ... } + for i=1,#paths do + local path = cleanpath(paths[i]) + if not libhash[path] then + if trace_libraries then + report_libraries("! extra lua path '%s'",path) + end + libextras[#libextras+1] = path + libpaths[#libpaths +1] = path + end end - return path end -local p_libpaths, a_libpaths = { }, { } - -function package.appendtolibpath(...) - insert(a_libpath,thepath(...)) +function package.extraclibpath(...) + local paths = { ... } + for i=1,#paths do + local path = cleanpath(paths[i]) + if not clibhash[path] then + if trace_libraries then + report_libraries("! extra lib path '%s'",path) + end + clibextras[#clibextras+1] = path + clibpaths[#clibpaths +1] = path + end + end end -function package.prependtolibpath(...) - insert(p_libpaths,1,thepath(...)) +if not package.loaders[-2] then + -- use package-path and package-cpath + package.loaders[-2] = package.loaders[2] end --- beware, we need to return a loadfile result ! +local function loadedaslib(resolved,rawname) + return package.loadlib(resolved,"luaopen_" .. gsub(rawname,"%.","_")) +end -local function loaded(libpaths,name,simple) - for i=1,#libpaths do -- package.path, might become option - local libpath = libpaths[i] - local resolved = gsub(libpath,"%?",simple) - if trace_locating then -- more detail - report_libraries("! checking for '%s' on 'package.path': '%s' => '%s'",simple,libpath,resolved) - end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'package.path': '%s'",name,resolved) - end - return loadfile(resolved) - end +local function loadedbylua(name) + if trace_libraries then + report_libraries("! locating %q using normal loader",name) end + local resolved = package.loaders[-2](name) end -package.loaders[2] = function(name) -- was [#package.loaders+1] - if file.suffix(name) == "" then - name = file.addsuffix(name,"lua") -- maybe a list - if trace_locating then -- mode detail - report_libraries("! locating '%s' with forced suffix",name) - end - else - if trace_locating then -- mode detail - report_libraries("! locating '%s'",name) - end +local function loadedbyformat(name,rawname,suffixes,islib) + if trace_libraries then + report_libraries("! locating %q as %q using formats %q",rawname,name,concat(suffixes)) end - for i=1,#libformats do - local format = libformats[i] + for i=1,#suffixes do -- so we use findfile and not a lookup loop + local format = suffixes[i] local resolved = resolvers.findfile(name,format) or "" - if trace_locating then -- mode detail - report_libraries("! checking for '%s' using 'libformat path': '%s'",name,format) + if trace_libraries then + report_libraries("! checking for %q' using format %q",name,format) end if resolved ~= "" then - if trace_locating then - report_libraries("! lib '%s' located via environment: '%s'",name,resolved) + if trace_libraries then + report_libraries("! lib %q located on %q",name,resolved) end - return loadfile(resolved) - end - end - -- libpaths - local libpaths, clibpaths = package.libpaths(), package.clibpaths() - local simple = gsub(name,"%.lua$","") - local simple = gsub(simple,"%.","/") - local resolved = loaded(p_libpaths,name,simple) or loaded(libpaths,name,simple) or loaded(a_libpaths,name,simple) - if resolved then - return resolved - end - -- - local libname = file.addsuffix(simple,os.libsuffix) - for i=1,#clibformats do - -- better have a dedicated loop - local format = clibformats[i] - local paths = resolvers.expandedpathlistfromvariable(format) - for p=1,#paths do - local path = paths[p] - local resolved = file.join(path,libname) - if trace_locating then -- mode detail - report_libraries("! checking for '%s' using 'clibformat path': '%s'",libname,path) - end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'clibformat': '%s'",libname,resolved) - end - return package.loadlib(resolved,name) + if islib then + return loadedaslib(resolved,rawname) + else + return loadfile(resolved) end end end - for i=1,#clibpaths do -- package.path, might become option - local libpath = clibpaths[i] - local resolved = gsub(libpath,"?",simple) - if trace_locating then -- more detail - report_libraries("! checking for '%s' on 'package.cpath': '%s'",simple,libpath) +end + +local function loadedbypath(name,rawname,paths,islib,what) + if trace_libraries then + report_libraries("! locating %q as %q on %q paths",rawname,name,what) + end + for p=1,#paths do + local path = paths[p] + local resolved = file.join(path,name) + if trace_libraries then -- mode detail + report_libraries("! checking for %q using %q path %q",name,what,path) end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'package.cpath': '%s'",name,resolved) + if is_readable(resolved) then + if trace_libraries then + report_libraries("! lib %q located on %q",name,resolved) + end + if islib then + return loadedaslib(resolved,rawname) + else + return loadfile(resolved) end - return package.loadlib(resolved,name) - end - end - -- just in case the distribution is messed up - if trace_loading then -- more detail - report_libraries("! checking for '%s' using 'luatexlibs': '%s'",name) - end - local resolved = resolvers.findfile(file.basename(name),'luatexlibs') or "" - if resolved ~= "" then - if trace_locating then - report_libraries("! lib '%s' located by basename via environment: '%s'",name,resolved) end - return loadfile(resolved) end - if trace_locating then - report_libraries('? unable to locate lib: %s',name) - end --- return "unable to locate " .. name end -resolvers.loadlualib = require - --- -- -- -- +local function notloaded(name) + if trace_libraries then + report_libraries("? unable to locate library %q",name) + end +end -package.obsolete = package.obsolete or { } +package.loaders[2] = function(name) + local thename = gsub(name,"%.","/") + local luaname = file.addsuffix(thename,"lua") + local libname = file.addsuffix(thename,os.libsuffix) + return + loadedbyformat(luaname,name,libsuffixes, false) + or loadedbyformat(libname,name,clibsuffixes, true) + or loadedbypath (luaname,name,getlibpaths (),false,"lua") + or loadedbypath (luaname,name,getclibpaths(),false,"lua") + or loadedbypath (libname,name,getclibpaths(),true, "lib") + or loadedbylua (name) + or notloaded (name) +end -package.append_libpath = appendtolibpath -- will become obsolete -package.prepend_libpath = prependtolibpath -- will become obsolete +-- package.loaders[3] = nil +-- package.loaders[4] = nil -package.obsolete.append_libpath = appendtolibpath -- will become obsolete -package.obsolete.prepend_libpath = prependtolibpath -- will become obsolete +resolvers.loadlualib = require end -- of closure @@ -15707,7 +16616,6 @@ function environment.make_format(name) end function environment.run_format(name,data,more) - -- hm, rather old code here; we can now use the file.whatever functions if name and name ~= "" then local barename = file.removesuffix(name) local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats") @@ -15736,6 +16644,129 @@ function environment.run_format(name,data,more) end +end -- of closure + +do -- create closure to overcome 200 locals limit + +if not modules then modules = { } end modules ['util-tpl'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- This is experimental code. Coming from dos and windows, I've always used %whatever% +-- as template variables so let's stick to it. After all, it's easy to parse and stands +-- out well. A double %% is turned into a regular %. + +utilities.templates = utilities.templates or { } +local templates = utilities.templates + +local trace_template = false trackers.register("templates.trace",function(v) trace_template = v end) +local report_template = logs.reporter("template") + +local format = string.format +local P, C, Cs, Carg, lpegmatch = lpeg.P, lpeg.C, lpeg.Cs, lpeg.Carg, lpeg.match + +-- todo: make installable template.new + +local replacer + +local function replacekey(k,t,recursive) + local v = t[k] + if not v then + if trace_template then + report_template("unknown key %q",k) + end + return "" + else + if trace_template then + report_template("setting key %q to value %q",k,v) + end + if recursive then + return lpegmatch(replacer,v,1,t) + else + return v + end + end +end + +local sqlescape = lpeg.replacer { + { "'", "''" }, + { "\\", "\\\\" }, + { "\r\n", "\\n" }, + { "\r", "\\n" }, + -- { "\t", "\\t" }, +} + +local escapers = { + lua = function(s) + return format("%q",s) + end, + sql = function(s) + return lpegmatch(sqlescape,s) + end, +} + +local function replacekeyunquoted(s,t,how,recurse) -- ".. \" " + local escaper = how and escapers[how] or escapers.lua + return escaper(replacekey(s,t,recurse)) +end + +local single = P("%") -- test %test% test : resolves test +local double = P("%%") -- test 10%% test : %% becomes % +local lquoted = P("%[") -- test %[test]" test : resolves test with escaped "'s +local rquoted = P("]%") -- + +local escape = double / '%%' +local nosingle = single / '' +local nodouble = double / '' +local nolquoted = lquoted / '' +local norquoted = rquoted / '' + +local key = nosingle * (C((1-nosingle)^1 * Carg(1) * Carg(2) * Carg(3))/replacekey) * nosingle +local unquoted = nolquoted * ((C((1 - norquoted)^1) * Carg(1) * Carg(2) * Carg(3))/replacekeyunquoted) * norquoted +local any = P(1) + + replacer = Cs((unquoted + escape + key + any)^0) + +local function replace(str,mapping,how,recurse) + if mapping then + return lpegmatch(replacer,str,1,mapping,how or "lua",recurse or false) or str + else + return str + end +end + +-- print(replace("test '%[x]%' test",{ x = [[a 'x'  a]] })) +-- print(replace("test '%[x]%' test",{ x = [[a 'x'  a]] },'sql')) + +templates.replace = replace + +function templates.load(filename,mapping,how,recurse) + local data = io.loaddata(filename) or "" + if mapping and next(mapping) then + return replace(data,mapping,how,recurse) + else + return data + end +end + +function templates.resolve(t,mapping,how,recurse) + if not mapping then + mapping = t + end + for k, v in next, t do + t[k] = replace(v,mapping,how,recurse) + end + return t +end + +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" })) +-- inspect(utilities.templates.resolve({ one = "%two%", two = "two", three = "%three%" })) + + end -- of closure -- end library merge @@ -15796,7 +16827,7 @@ own.libs = { -- order can be made better -- 'data-bin.lua', 'data-zip.lua', 'data-tre.lua', - 'data-crl.lua', + 'data-sch.lua', 'data-lua.lua', 'data-aux.lua', -- updater 'data-tmf.lua', @@ -15804,6 +16835,8 @@ own.libs = { -- order can be made better 'luat-sta.lua', 'luat-fmt.lua', + + 'util-tpl.lua', } -- We need this hack till luatex is fixed. @@ -15824,7 +16857,7 @@ own.path = gsub(match(own.name,"^(.+)[\\/].-$") or ".","\\","/") local ownpath, owntree = own.path, environment and environment.ownpath or own.path -own.list = { +own.list = { -- predictable paths '.', ownpath , ownpath .. "/../sources", -- HH's development path @@ -15848,7 +16881,7 @@ local function locate_libs() local filename = pth .. "/" .. lib local found = lfs.isfile(filename) if found then - package.path = package.path .. ";" .. pth .. "/?.lua" -- in case l-* does a require + package.path = package.path .. ";" .. pth .. "/?.lua" -- in case l-* does a require (probably obsolete) return pth end end @@ -15980,6 +17013,7 @@ local helpinfo = [[ --var-value report value of variable --find-file report file location --find-path report path of file +--show-package-path report package paths --pattern=str filter variables ]] @@ -16093,7 +17127,8 @@ function runners.execute_script(fullname,internal,nosplit) elseif state == 'skip' then return true elseif state == "run" then - local path, name, suffix, result = file.dirname(fullname), file.basename(fullname), file.extname(fullname), "" + local path, name, suffix = file.splitname(fullname) + local result = "" if path ~= "" then result = fullname elseif name then @@ -16104,7 +17139,7 @@ function runners.execute_script(fullname,internal,nosplit) name = gsub(name,"^script:","") if suffix == "" and runners.registered[name] and runners.registered[name][1] then name = runners.registered[name][1] - suffix = file.extname(name) + suffix = file.suffix(name) end if suffix == "" then -- loop over known suffixes @@ -16131,7 +17166,7 @@ function runners.execute_script(fullname,internal,nosplit) environment.ownscript = result dofile(result) else - local binary = runners.applications[file.extname(result)] + local binary = runners.applications[file.suffix(result)] result = string.quoted(string.unquoted(result)) -- if string.match(result,' ') and not string.match(result,"^\".*\"$") then -- result = '"' .. result .. '"' @@ -16324,7 +17359,7 @@ function resolvers.launch(str) -- maybe we also need to test on mtxrun.launcher.suffix environment -- variable or on windows consult the assoc and ftype vars and such local launchers = runners.launchers[os.platform] if launchers then - local suffix = file.extname(str) if suffix then + local suffix = file.suffix(str) if suffix then local runner = launchers[suffix] if runner then str = runner .. " " .. str end @@ -16383,7 +17418,7 @@ function runners.find_mtx_script(filename) end filename = file.addsuffix(filename,"lua") local basename = file.removesuffix(file.basename(filename)) - local suffix = file.extname(filename) + local suffix = file.suffix(filename) -- qualified path, raw name local fullname = file.is_qualified_path(filename) and io.exists(filename) and filename if fullname and fullname ~= "" then @@ -16438,7 +17473,7 @@ function runners.execute_ctx_script(filename,...) runners.register_arguments(...) local arguments = environment.arguments_after local fullname = runners.find_mtx_script(filename) or "" - if file.extname(fullname) == "cld" then + if file.suffix(fullname) == "cld" then -- handy in editors where we force --autopdf report("running cld script: %s",filename) table.insert(arguments,1,fullname) @@ -16546,6 +17581,21 @@ function runners.timed(action) statistics.timed(action) end +function runners.associate(filename) + os.launch(filename) +end + +function runners.gethelp(filename) + local url = environment.argument("url") + if url and url ~= "" then + local command = string.gsub(environment.argument("command") or "unknown","^%s*\\*(.-)%s*$","%1") + url = utilities.templates.replace(url,{ command = command }) + os.launch(url) + else + report("no --url given") + end +end + -- this is a bit dirty ... first we store the first filename and next we -- split the arguments so that we only see the ones meant for this script -- ... later we will use the second half @@ -16648,7 +17698,18 @@ else end -if e_argument("selfmerge") then +if e_argument("script") or e_argument("scripts") then + + -- run a script by loading it (using libs), pass args + + runners.loadbase() + if is_mkii_stub then + ok = runners.execute_script(filename,false,true) + else + ok = runners.execute_ctx_script(filename) + end + +elseif e_argument("selfmerge") then -- embed used libraries @@ -16671,23 +17732,25 @@ elseif e_argument("selfupdate") then trackers.enable("resolvers.locating") resolvers.updatescript(own.name,"mtxrun") -elseif e_argument("ctxlua") or e_argument("internal") then +elseif e_argument("show-package-path") or e_argument("show-package-paths") then - -- run a script by loading it (using libs) + local l = package.libpaths() + local c = package.clibpaths() - runners.loadbase() - ok = runners.execute_script(filename,true) + for i=1,#l do + report("package lib path %s: %s",i,l[i]) + end -elseif e_argument("script") or e_argument("scripts") then + for i=1,#c do + report("package clib path %s: %s",i,c[i]) + end - -- run a script by loading it (using libs), pass args +elseif e_argument("ctxlua") or e_argument("internal") then + + -- run a script by loading it (using libs) runners.loadbase() - if is_mkii_stub then - ok = runners.execute_script(filename,false,true) - else - ok = runners.execute_ctx_script(filename) - end + ok = runners.execute_script(filename,true) elseif e_argument("execute") then @@ -16715,6 +17778,14 @@ elseif e_argument("launch") then runners.loadbase() runners.launch_file(filename) +elseif e_argument("associate") then + + runners.associate(filename) + +elseif e_argument("gethelp") then + + runners.gethelp() + elseif e_argument("makestubs") then -- make stubs (depricated) @@ -16806,7 +17877,7 @@ elseif e_argument("find-path") then elseif e_argument("expand-braces") then - -- luatools: runners.execute_ctx_script("mtx-base","--expand-braces",filename + -- luatools: runners.execute_ctx_script("mtx-base","--expand-braces",filename) resolvers.load("nofiles") runners.register_arguments(filename) diff --git a/scripts/context/ruby/fcd_start.rb b/scripts/context/ruby/fcd_start.rb deleted file mode 100644 index b1fa42a2a..000000000 --- a/scripts/context/ruby/fcd_start.rb +++ /dev/null @@ -1,472 +0,0 @@ -# Hans Hagen / PRAGMA ADE / 2005 / www.pragma-ade.com -# -# Fast Change Dir -# -# This is a kind of variant of the good old ncd -# program. This script uses the same indirect cmd -# trick as Erwin Waterlander's wcd program. -# -# === windows: fcd.cmd === -# -# @echo off -# ruby -S fcd_start.rb %1 %2 %3 %4 %5 %6 %7 %8 %9 -# if exist "%HOME%/fcd_stage.cmd" call %HOME%/fcd_stage.cmd -# -# === linux: fcd (fcd.sh) === -# -# !/usr/bin/env sh -# ruby -S fcd_start.rb $1 $2 $3 $4 $5 $6 $7 $8 $9 -# if test -f "$HOME/fcd_stage.sh" ; then -# . $HOME/fcd_stage.sh ; -# fi; -# -# === -# -# On linux, one should source the file: ". fcd args" in order -# to make the chdir persistent. -# -# You can create a stub with: -# -# ruby fcd_start.rb --stub --verbose -# -# usage: -# -# fcd --make t:\ -# fcd --add f:\project -# fcd [--find] whatever -# fcd [--find] whatever c (c being a list entry) -# fcd [--find] whatever . (last choice with this pattern) -# fcd --list - -# todo: HOMEDRIVE\HOMEPATH - -require 'rbconfig' - -class FastCD - - @@rootpath = nil - - ['HOME','TEMP','TMP','TMPDIR'].each do |key| - if ENV[key] then - if FileTest.directory?(ENV[key]) then - @@rootpath = ENV[key] - break - end - end - end - - exit unless @@rootpath - - @@mswindows = Config::CONFIG['host_os'] =~ /mswin/ - @@maxlength = 26 - - require 'Win32API' if @@mswindows - - if @@mswindows then - @@stubcode = [ - '@echo off', - '', - 'if not exist "%HOME%" goto temp', - '', - ':home', - '', - 'ruby -S fcd_start.rb %1 %2 %3 %4 %5 %6 %7 %8 %9', - '', - 'if exist "%HOME%\fcd_stage.cmd" call %HOME%\fcd_stage.cmd', - 'goto end', - '', - ':temp', - '', - 'ruby -S fcd_start.rb %1 %2 %3 %4 %5 %6 %7 %8 %9', - '', - 'if exist "%TEMP%\fcd_stage.cmd" call %TEMP%\fcd_stage.cmd', - 'goto end', - '', - ':end' - ].join("\n") - else - @@stubcode = [ - '#!/usr/bin/env sh', - '', - 'ruby -S fcd_start.rb $1 $2 $3 $4 $5 $6 $7 $8 $9', - '', - 'if test -f "$HOME/fcd_stage.sh" ; then', - ' . $HOME/fcd_stage.sh ;', - 'fi;' - ].join("\n") - end - - @@selfpath = File.dirname($0) - @@datafile = File.join(@@rootpath,'fcd_state.dat') - @@histfile = File.join(@@rootpath,'fcd_state.his') - @@cdirfile = File.join(@@rootpath,if @@mswindows then 'fcd_stage.cmd' else 'fcd_stage.sh' end) - @@stubfile = File.join(@@selfpath,if @@mswindows then 'fcd.cmd' else 'fcd' end) - - def initialize(verbose=false) - @list = Array.new - @hist = Hash.new - @result = Array.new - @pattern = '' - @result = '' - @verbose = verbose - if f = File.open(@@cdirfile,'w') then - f << "#{if @@mswindows then 'rem' else '#' end} no dir to change to" - f.close - else - report("unable to create stub #{@@cdirfile}") - end - end - - def filename(name) - File.join(@@root,name) - end - - def report(str,verbose=@verbose) - puts(">> #{str}") if verbose - end - - def flush(str,verbose=@verbose) - print(str) if verbose - end - - def clear - if FileTest.file?(@@histfile) - begin - File.delete(@@histfile) - rescue - report("error in deleting history file '#{@histfile}'") - else - report("history file '#{@histfile}' is deleted") - end - else - report("no history file '#{@histfile}'") - end - end - - def scan(dir='.') - begin - [dir].flatten.sort.uniq.each do |dir| - begin - Dir.chdir(dir) - report("scanning '#{dir}'") - # flush(">> ") - Dir.glob("**/*").each do |d| - if FileTest.directory?(d) then - @list << File.expand_path(d) - # flush(".") - end - end - # flush("\n") - @list = @list.sort.uniq - report("#{@list.size} entries found") - rescue - report("unknown directory '#{dir}'") - end - end - rescue - report("invalid dir specification ") - end - end - - def save - begin - if f = File.open(@@datafile,'w') then - @list.each do |l| - f.puts(l) - end - f.close - report("#{@list.size} status bytes saved in #{@@datafile}") - else - report("unable to save status in #{@@datafile}") - end - rescue - report("error in saving status in #{@@datafile}") - end - end - - def remember - if @hist[@pattern] == @result then - # no need to save result - else - begin - if f = File.open(@@histfile,'w') then - @hist[@pattern] = @result - @hist.keys.each do |k| - f.puts("#{k} #{@hist[k]}") - end - f.close - report("#{@hist.size} history entries saved in #{@@histfile}") - else - report("unable to save history in #{@@histfile}") - end - rescue - report("error in saving history in #{@@histfile}") - end - end - end - - def load - begin - @list = IO.read(@@datafile).split("\n") - report("#{@list.length} status bytes loaded from #{@@datafile}") - rescue - report("error in loading status from #{@@datafile}") - end - begin - IO.readlines(@@histfile).each do |line| - if line =~ /^(.*?)\s+(.*)$/i then - @hist[$1] = $2 - end - end - report("#{@hist.length} history entries loaded from #{@@histfile}") - rescue - report("error in loading history from #{@@histfile}") - end - end - - def show - begin - puts("directories:") - puts("\n") - if @list.length > 0 then - @list.each do |l| - puts(l) - end - else - puts("no entries") - end - puts("\n") - puts("history:") - puts("\n") - if @hist.length > 0 then - @hist.keys.sort.each do |h| - puts("#{h} >> #{@hist[h]}") - end - else - puts("no entries") - end - rescue - end - end - - def find(pattern=nil) - begin - if pattern = [pattern].flatten.first then - if pattern.length > 0 and @pattern = pattern then - @result = @list.grep(/\/#{@pattern}$/i) - if @result.length == 0 then - @result = @list.grep(/\/#{@pattern}[^\/]*$/i) - end - end - else - puts(Dir.pwd.gsub(/\\/o, '/')) - end - rescue - puts("some error") - end - end - - def chdir(dir) - begin - if dir then - if f = File.open(@@cdirfile,'w') then - if @@mswindows then - f.puts("cd /d #{dir.gsub('/','\\')}") - else - f.puts("cd #{dir.gsub("\\",'/')}") - end - f.close - end - @result = dir - report("changing to #{dir}",true) - else - report("not changing dir") - end - rescue - end - end - - def choose(args=[]) - offset = 97 - unless @pattern.empty? then - begin - case @result.size - when 0 then - report("dir '#{@pattern}' not found",true) - when 1 then - chdir(@result[0]) - else - list = @result.dup - begin - if answer = args[1] then # assignment & test - if answer == '.' and @hist.key?(@pattern) then - if FileTest.directory?(@hist[@pattern]) then - print("last choice ") - chdir(@hist[@pattern]) - return - end - else - index = answer[0] - offset - if dir = list[index] then - chdir(dir) - return - end - end - end - rescue - puts("some error") - end - loop do - print("\n") - list.each_index do |i| -begin - if i < @@maxlength then - # puts("#{(i+?a).chr} #{list[i]}") - puts("#{(i+offset).chr} #{list[i]}") - else - puts("\n there are #{list.length-@@maxlength} entries more") - break - end -rescue - puts("some error") -end - end - print("\n>> ") - if answer = wait then - if answer >= offset and answer <= offset+25 then - index = answer - offset - if dir = list[index] then - print("#{answer.chr} ") - chdir(dir) - elsif @hist.key?(@pattern) and FileTest.directory?(@hist[@pattern]) then - print("last choice ") - chdir(@hist[@pattern]) - else - print("quit\n") - end - break - elsif list.length >= @@maxlength then - @@maxlength.times do |i| list.shift end - print("next set") - print("\n") - elsif @hist.key?(@pattern) and FileTest.directory?(@hist[@pattern]) then - print("last choice ") - chdir(@hist[@pattern]) - break - else - print("quit\n") - break - end - end - end - end - rescue - report($!) - end - end - end - - def wait - begin - $stdout.flush - return getc - rescue - return nil - end - end - - def getc - begin - if @@mswindows then - ch = Win32API.new('crtdll','_getch',[],'L').call - else - system('stty raw -echo') - ch = $stdin.getc - system('stty -raw echo') - end - rescue - ch = nil - end - return ch - end - - def check - unless FileTest.file?(@@stubfile) then - report("creating stub #{@@stubfile}") - begin - if f = File.open(@@stubfile,'w') then - f.puts(@@stubcode) - f.close - end - rescue - report("unable to create stub #{@@stubfile}") - else - unless @mswindows then - begin - File.chmod(0755,@@stubfile) - rescue - report("unable to change protections on #{@@stubfile}") - end - end - end - else - report("stub #{@@stubfile} already present") - end - end - -end - -$stdout.sync = true - -verbose, action, args = false, :find, Array.new - -usage = "fcd [--add|clear|find|list|make|show|stub] [--verbose] [pattern]" -version = "1.0.2" - -def quit(message) - puts(message) - exit -end - -ARGV.each do |a| - case a - when '-a', '--add' then action = :add - when '-c', '--clear' then action = :clear - when '-f', '--find' then action = :find - when '-l', '--list' then action = :show - when '-m', '--make' then action = :make - when '-s', '--show' then action = :show - when '--stub' then action = :stub - when '-v', '--verbose' then verbose = true - when '--version' then quit("version: #{version}") - when '-h', '--help' then quit("usage: #{usage}") - when /^\-\-.*/ then quit("error: unknown switch #{a}, try --help") - else args << a - end -end - -fcd = FastCD.new(verbose) -fcd.report("Fast Change Dir / version #{version}") - -case action - when :make then - fcd.clear - fcd.scan(args) - fcd.save - when :clear then - fcd.clear - when :add then - fcd.load - fcd.scan(args) - fcd.save - when :show then - fcd.load - fcd.show - when :find then - fcd.load - fcd.find(args) - fcd.choose(args) - fcd.remember - when :stub - fcd.check -end diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index 108f2a8a1..e6bbbe2b5 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -8,6 +8,11 @@ if not modules then modules = { } end modules ['mtxrun'] = { license = "see context related readme files" } +-- if not lpeg then require("lpeg") end +-- if not md5 then require("md5") end +-- if not lfs then require("lfs") end +-- if not texconfig then texconfig = { } end + -- one can make a stub: -- -- #!/bin/sh @@ -150,11 +155,28 @@ function string.topattern(str,lowercase,strict) end end + +function string.valid(str,default) + return (type(str) == "string" and str ~= "" and str) or default or nil +end + -- obsolete names: string.quote = string.quoted string.unquote = string.unquoted +-- handy fallback + +string.itself = function(s) return s end + +-- also handy (see utf variant) + +local pattern = Ct(C(1)^0) + +function string.totable(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -168,7 +190,8 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs, table, string = type, next, tostring, tonumber, ipairs, table, string +local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match local getmetatable, setmetatable = getmetatable, setmetatable @@ -179,6 +202,8 @@ local getinfo = debug.getinfo -- impact on ConTeXt was not that large; the remaining ipairs already -- have been replaced. In a similar fashion we also hardly used pairs. -- +-- Hm, actually ipairs was retained, but we no longer use it anyway. +-- -- Just in case, we provide the fallbacks as discussed in Programming -- in Lua (http://www.lua.org/pil/7.3.html): @@ -238,12 +263,16 @@ function table.strip(tab) end function table.keys(t) - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return keys end local function compare(a,b) @@ -256,41 +285,49 @@ local function compare(a,b) end local function sortedkeys(tab) - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - category = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if category == 0 or category == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys @@ -315,7 +352,7 @@ end table.sortedhash = sortedhash table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list) local n = #t for i=1,#list do n = n + 1 @@ -550,12 +587,26 @@ local function do_serialize(root,name,depth,level,indexed) end -- we could check for k (index) being number (cardinal) if root and next(root) then - local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- if compact then + -- -- NOT: for k=1,#root do (we need to quit at nil) + -- for k,v in ipairs(root) do -- can we use next? + -- if not first then first = k end + -- last = last + 1 + -- end + -- end + local first, last = nil, 0 if compact then - -- NOT: for k=1,#root do (we need to quit at nil) - for k,v in ipairs(root) do -- can we use next? - if not first then first = k end - last = last + 1 + last = #root + for k=1,last do +-- if not root[k] then + if root[k] == nil then + last = k - 1 + break + end + end + if last > 0 then + first = 1 end end local sk = sortedkeys(root) @@ -1027,23 +1078,27 @@ function table.reversed(t) end end -function table.sequenced(t,sep,simple) -- hash only - local s, n = { }, 0 - for k, v in sortedhash(t) do - if simple then - if v == true then - n = n + 1 - s[n] = k - elseif v and v~= "" then +function table.sequenced(t,sep) -- hash only + if t then + local s, n = { }, 0 + for k, v in sortedhash(t) do + if simple then + if v == true then + n = n + 1 + s[n] = k + elseif v and v~= "" then + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + else n = n + 1 s[n] = k .. "=" .. tostring(v) end - else - n = n + 1 - s[n] = k .. "=" .. tostring(v) end + return concat(s, sep or " | ") + else + return "" end - return concat(s, sep or " | ") end function table.print(t,...) @@ -1124,6 +1179,8 @@ local lpeg = require("lpeg") -- tracing (only used when we encounter a problem in integration of lpeg in luatex) +-- some code will move to unicode and string + local report = texio and texio.write_nl or print -- local lpmatch = lpeg.match @@ -1160,8 +1217,8 @@ local report = texio and texio.write_nl or print -- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type -local byte, char, gmatch = string.byte, string.char, string.gmatch +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -- Beware, we predefine a bunch of patterns here and one reason for doing so -- is that we get consistent behaviour in some of the visualizers. @@ -1169,9 +1226,8 @@ local byte, char, gmatch = string.byte, string.char, string.gmatch lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match -local Ct, C, Cs, Cc = lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc -local lpegtype = lpeg.type +local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp +local lpegtype, lpegmatch = lpeg.type, lpeg.match local utfcharacters = string.utfcharacters local utfgmatch = unicode and unicode.utf8.gmatch @@ -1222,6 +1278,10 @@ patterns.utf8char = utf8char patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer + patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 @@ -1241,16 +1301,16 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") -patterns.commaspacer = P(",") * patterns.spacer^0 +patterns.commaspacer = P(",") * spacer^0 patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") @@ -1265,6 +1325,10 @@ patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble patterns.unspacer = ((patterns.spacer^1)/"")^0 +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted + patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) @@ -1275,8 +1339,17 @@ patterns.beginline = #(1-newline) -- print(string.unquoted('"test"')) -- print(string.unquoted('"test"')) -function lpeg.anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } -- why so complex? +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } +end + +lpeg.anywhere = anywhere + +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false + end end function lpeg.splitter(pattern, action) @@ -1325,7 +1398,7 @@ function string.splitup(str,separator) if not separator then separator = "," end - return match(splitters_m[separator] or splitat(separator),str) + return lpegmatch(splitters_m[separator] or splitat(separator),str) end @@ -1337,16 +1410,20 @@ function lpeg.split(separator,str) c = tsplitat(separator) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.split(str,separator) - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c + end + return lpegmatch(c,str) + else + return { str } end - return match(c,str) end local spacing = patterns.spacer^0 * newline -- sort of strip @@ -1362,7 +1439,7 @@ local linesplitter = tsplitat(newline) patterns.linesplitter = linesplitter function string.splitlines(str) - return match(linesplitter,str) + return lpegmatch(linesplitter,str) end local utflinesplitter = utfbom^-1 * tsplitat(newline) @@ -1370,7 +1447,58 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str or "") + return lpegmatch(utflinesplitter,str or "") +end + +local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) +local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) + +function string.utfsplit(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +-- inspect(string.utfsplit("a b c d")) +-- inspect(string.utfsplit("a b c d",true)) + +-- -- alternative 1: 0.77 +-- +-- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) +-- +-- function string.utflength(str) +-- return #lpegmatch(utfcharcounter,str or "") +-- end +-- +-- -- alternative 2: 1.70 +-- +-- local n = 0 +-- +-- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow +-- +-- function string.utflength(str) +-- n = 0 +-- lpegmatch(utfcharcounter,str or "") +-- return n +-- end +-- +-- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + +local n = 0 + +local utfcharcounter = utfbom^-1 * Cs ( ( + Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end +)^0 ) + +function string.utflength(str) + n = 0 + lpegmatch(utfcharcounter,str or "") + return n end @@ -1384,7 +1512,7 @@ function lpeg.checkedsplit(separator,str) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.checkedsplit(str,separator) @@ -1395,7 +1523,7 @@ function string.checkedsplit(str,separator) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end @@ -1440,11 +1568,11 @@ function lpeg.keeper(str) end function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(P(1)^0) + return (P(str) + P(true)) * Cs(anything^0) end function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * P(-1))^0) + return Cs((1 - P(str) * endofstring)^0) end -- Just for fun I looked at the used bytecode and @@ -1453,8 +1581,22 @@ end function lpeg.replacer(one,two) if type(one) == "table" then local no = #one - if no > 0 then - local p + local p + if no == 0 then + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end + return Cs((p + 1)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + return Cs(((1-one)^1 + one/two)^0) + else for i=1,no do local o = one[i] local pp = P(o[1]) / o[2] @@ -1467,11 +1609,16 @@ function lpeg.replacer(one,two) return Cs((p + 1)^0) end else + one = P(one) two = two or "" - return Cs((P(one)/two + 1)^0) + return Cs(((1-one)^1 + one/two)^0) end end +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + local splitters_f, splitters_s = { }, { } function lpeg.firstofsplit(separator) -- always return value @@ -1506,7 +1653,7 @@ local nany = utf8char/"" function lpeg.counter(pattern) pattern = Cs((P(pattern)/" " + nany)^0) return function(str) - return #match(pattern,str) + return #lpegmatch(pattern,str) end end @@ -1520,7 +1667,7 @@ if utfgmatch then end return n else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1535,9 +1682,9 @@ else p = Cs((P(what)/" " + nany)^0) cache[p] = p end - return #match(p,str) + return #lpegmatch(p,str) else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1564,7 +1711,7 @@ local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) function string.escapedpattern(str,simple) - return match(simple and s or p,str) + return lpegmatch(simple and s or p,str) end -- utf extensies @@ -1611,7 +1758,7 @@ else p = P(uc) end end - match((utf8char/f)^0,str) + lpegmatch((utf8char/f)^0,str) return p end @@ -1627,7 +1774,7 @@ function lpeg.UR(str,more) first = str last = more or first else - first, last = match(range,str) + first, last = lpegmatch(range,str) if not last then return P(str) end @@ -1654,11 +1801,15 @@ end -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order if type(list) ~= "table" then list = { list, ... } end - -- sort(list) -- longest match first + -- table.sort(list) -- longest match first local p = P(list[1]) for l=2,#list do p = p + P(list[l]) @@ -1666,10 +1817,6 @@ function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") return p end -function lpeg.is_lpeg(p) - return p and lpegtype(p) == "pattern" -end - -- For the moment here, but it might move to utilities. Beware, we need to -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we -- loop back from the end cq. prepend. @@ -1827,6 +1974,24 @@ end -- utfchar(0x205F), -- math thinspace -- } ) +-- handy from within tex: + +local lpegmatch = lpeg.match + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -1851,14 +2016,14 @@ else io.fileseparator, io.pathseparator = "/" , ":" end -function io.loaddata(filename,textmode) +function io.loaddata(filename,textmode) -- return nil if empty local f = io.open(filename,(textmode and 'r') or 'rb') if f then local data = f:read('*all') f:close() - return data - else - return nil + if #data > 0 then + return data + end end end @@ -1880,6 +2045,45 @@ function io.savedata(filename,data,joiner) end end +function io.loadlines(filename,n) -- return nil if empty + local f = io.open(filename,'r') + if f then + if n then + local lines = { } + for i=1,n do + local line = f:read("*lines") + if line then + lines[#lines+1] = line + else + break + end + end + f:close() + lines = concat(lines,"\n") + if #lines > 0 then + return lines + end + else + local line = f:read("*line") or "" + assert(f:close()) + if #line > 0 then + return line + end + end + end +end + +function io.loadchunk(filename,n) + local f = io.open(filename,'rb') + if f then + local data = f:read(n or 1024) + f:close() + if #data > 0 then + return data + end + end +end + function io.exists(filename) local f = io.open(filename) if f == nil then @@ -2107,7 +2311,7 @@ if not modules then modules = { } end modules ['l-number'] = { -- this module will be replaced when we have the bit library -local tostring = tostring +local tostring, tonumber = tostring, tonumber local format, floor, match, rep = string.format, math.floor, string.match, string.rep local concat, insert = table.concat, table.insert local lpegmatch = lpeg.match @@ -2170,11 +2374,11 @@ function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... end function number.setbit(x, p) - return hasbit(x, p) and x or x + p + return (x % (p + p) >= p) and x or x + p end function number.clearbit(x, p) - return hasbit(x, p) and x - p or x + return (x % (p + p) >= p) and x - p or x end @@ -2208,6 +2412,10 @@ function number.tobitstring(n,m) end +function number.valid(str,default) + return tonumber(str) or default or nil +end + end -- of closure @@ -2319,17 +2527,28 @@ if not modules then modules = { } end modules ['l-os'] = { -- os.name : windows | msdos | linux | macosx | solaris | .. | generic (new) -- os.platform : extended os.name with architecture +-- os.sleep() => socket.sleep() +-- math.randomseed(tonumber(string.sub(string.reverse(tostring(math.floor(socket.gettime()*10000))),1,6))) + -- maybe build io.flush in os.execute local os = os +local date, time = os.date, os.time local find, format, gsub, upper, gmatch = string.find, string.format, string.gsub, string.upper, string.gmatch local concat = table.concat -local random, ceil = math.random, math.ceil -local rawget, rawset, type, getmetatable, setmetatable, tonumber = rawget, rawset, type, getmetatable, setmetatable, tonumber +local random, ceil, randomseed = math.random, math.ceil, math.randomseed +local rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring = rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring -- The following code permits traversing the environment table, at least -- in luatex. Internally all environment names are uppercase. +-- The randomseed in Lua is not that random, although this depends on the operating system as well +-- as the binary (Luatex is normally okay). But to be sure we set the seed anyway. + +math.initialseed = tonumber(string.sub(string.reverse(tostring(ceil(socket and socket.gettime()*10000 or time()))),1,6)) + +randomseed(math.initialseed) + if not os.__getenv__ then os.__getenv__ = os.getenv @@ -2433,12 +2652,14 @@ else os.libsuffix, os.binsuffix, os.binsuffixes = 'so', '', { '' } end +local launchers = { + windows = "start %s", + macosx = "open %s", + unix = "$BROWSER %s &> /dev/null &", +} + function os.launch(str) - if os.type == "windows" then - os.execute("start " .. str) -- os.spawn ? - else - os.execute(str .. " &") -- os.spawn ? - end + os.execute(format(launchers[os.name] or launchers.unix,str)) end if not os.times then @@ -2649,7 +2870,7 @@ end local d function os.timezone(delta) - d = d or tonumber(tonumber(os.date("%H")-os.date("!%H"))) + d = d or tonumber(tonumber(date("%H")-date("!%H"))) if delta then if d > 0 then return format("+%02i:00",d) @@ -2661,6 +2882,44 @@ function os.timezone(delta) end end +local timeformat = format("%%s%s",os.timezone(true)) +local dateformat = "!%Y-%m-%d %H:%M:%S" + +function os.fulltime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return format(timeformat,date(dateformat,t)) +end + +local dateformat = "%Y-%m-%d %H:%M:%S" + +function os.localtime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return date(dateformat,t) +end + +function os.converttime(t,default) + local t = tonumber(t) + if t and t > 0 then + return date(dateformat,t) + else + return default or "-" + end +end + local memory = { } local function which(filename) @@ -2735,7 +2994,7 @@ local function nameonly(name) return (gsub(match(name,"^.+[/\\](.-)$") or name,"%.[%a%d]+$","")) end -local function extname(name,default) +local function suffixonly(name,default) return match(name,"^.+%.([^/\\]-)$") or default or "" end @@ -2744,11 +3003,16 @@ local function splitname(name) return n or name, s or "" end -file.basename = basename -file.dirname = dirname -file.nameonly = nameonly -file.extname = extname -file.suffix = extname +file.basename = basename + +file.pathpart = dirname +file.dirname = dirname + +file.nameonly = nameonly + +file.suffixonly = suffixonly +file.extname = suffixonly -- obsolete +file.suffix = suffixonly function file.removesuffix(filename) return (gsub(filename,"%.[%a%d]+$","")) @@ -2864,6 +3128,11 @@ end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated +function file.size(name) + local a = attributes(name) + return a and a.size or 0 +end + -- todo: lpeg \\ / .. does not save much local checkedsplit = string.checkedsplit @@ -3001,6 +3270,7 @@ local drive = C(R("az","AZ")) * P(":") local path = C(((1-slash)^0 * slash)^0) local suffix = period * C(P(1-period)^0 * P(-1)) local base = C((1-suffix)^0) +local rest = C(P(1)^0) drive = drive + Cc("") path = path + Cc("") @@ -3009,7 +3279,8 @@ suffix = suffix + Cc("") local pattern_a = drive * path * base * suffix local pattern_b = path * base * suffix -local pattern_c = C(drive * path) * C(base * suffix) +local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures +local pattern_d = path * rest function file.splitname(str,splitdrive) if splitdrive then @@ -3019,6 +3290,10 @@ function file.splitname(str,splitdrive) end end +function file.splitbase(str) + return lpegmatch(pattern_d,str) -- returns path, base+suffix +end + function file.nametotable(str,splitdrive) -- returns table local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) if splitdrive then @@ -3040,6 +3315,8 @@ function file.nametotable(str,splitdrive) -- returns table end end +-- print(file.splitbase("a/b/c.txt")) + -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end -- -- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" } @@ -3081,15 +3358,30 @@ if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end -function file.needs_updating(oldname,newname,threshold) -- size modification access change - local oldtime = lfs.attributes(oldname, modification) - local newtime = lfs.attributes(newname, modification) - if newtime >= oldtime then - return false - elseif oldtime - newtime < (threshold or 1) then - return false +function file.needsupdating(oldname,newname,threshold) -- size modification access change + local oldtime = lfs.attributes(oldname,"modification") + if oldtime then + local newtime = lfs.attributes(newname,"modification") + if not newtime then + return true -- no new file, so no updating needed + elseif newtime >= oldtime then + return false -- new file definitely needs updating + elseif oldtime - newtime < (threshold or 1) then + return false -- new file is probably still okay + else + return true -- new file has to be updated + end else - return true + return false -- no old file, so no updating needed + end +end + +file.needs_updating = file.needsupdating + +function file.syncmtimes(oldname,newname) + local oldtime = lfs.attributes(oldname,"modification") + if oldtime and lfs.isfile(newname) then + lfs.touch(newname,oldtime,oldtime) end end @@ -3111,7 +3403,7 @@ function file.loadchecksum(name) return nil end -function file.savechecksum(name, checksum) +function file.savechecksum(name,checksum) if not checksum then checksum = file.checksum(name) end if checksum then io.savedata(name .. ".md5",checksum) @@ -3136,7 +3428,7 @@ if not modules then modules = { } end modules ['l-url'] = { local char, gmatch, gsub, format, byte, find = string.char, string.gmatch, string.gsub, string.format, string.byte, string.find local concat = table.concat local tonumber, type = tonumber, type -local P, C, R, S, Cs, Cc, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct +local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer -- from wikipedia: @@ -3169,15 +3461,19 @@ local endofstring = P(-1) local hexdigit = R("09","AF","af") local plus = P("+") local nothing = Cc("") -local escaped = (plus / " ") + (percent * C(hexdigit * hexdigit) / tochar) +local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar +local escaped = (plus / " ") + escapedchar -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) -- we also assume that when we have a scheme, we also have an authority +-- +-- maybe we should already split the query (better for unescaping as = & can be part of a value local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2) local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0) local pathstr = Cs((escaped+(1- qmark-hash))^0) -local querystr = Cs((escaped+(1- hash))^0) +----- querystr = Cs((escaped+(1- hash))^0) +local querystr = Cs(( (1- hash))^0) local fragmentstr = Cs((escaped+(1- endofstring))^0) local scheme = schemestr * colon + nothing @@ -3192,11 +3488,20 @@ local parser = Ct(validurl) lpegpatterns.url = validurl lpegpatterns.urlsplitter = parser -local escapes = { } ; for i=0,255 do escapes[i] = format("%%%02X",i) end +local escapes = { } -local escaper = Cs((R("09","AZ","az") + S("-./_") + P(1) / escapes)^0) +setmetatable(escapes, { __index = function(t,k) + local v = format("%%%02X",byte(k)) + t[k] = v + return v +end }) -lpegpatterns.urlescaper = escaper +local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most +local unescaper = Cs((escapedchar + 1)^0) + +lpegpatterns.urlunescaped = escapedchar +lpegpatterns.urlescaper = escaper +lpegpatterns.urlunescaper = unescaper -- todo: reconsider Ct as we can as well have five return values (saves a table) -- so we can have two parsers, one with and one without @@ -3208,8 +3513,12 @@ end local isscheme = schemestr * colon * slash * slash -- this test also assumes authority local function hasscheme(str) - local scheme = lpegmatch(isscheme,str) -- at least one character - return scheme ~= "" and scheme or false + if str then + local scheme = lpegmatch(isscheme,str) -- at least one character + return scheme ~= "" and scheme or false + else + return false + end end @@ -3228,10 +3537,32 @@ local rootbased = P("/") local barswapper = replacer("|",":") local backslashswapper = replacer("\\","/") +-- queries: + +local equal = P("=") +local amp = P("&") +local key = Cs(((escapedchar+1)-equal )^0) +local value = Cs(((escapedchar+1)-amp -endofstring)^0) + +local splitquery = Cf ( Ct("") * P { "sequence", + sequence = V("pair") * (amp * V("pair"))^0, + pair = Cg(key * equal * value), +}, rawset) + +-- hasher + local function hashed(str) -- not yet ok (/test?test) + if str == "" then + return { + scheme = "invalid", + original = str, + } + end local s = split(str) - local somescheme = s[1] ~= "" - local somequery = s[4] ~= "" + local rawscheme = s[1] + local rawquery = s[4] + local somescheme = rawscheme ~= "" + local somequery = rawquery ~= "" if not somescheme and not somequery then s = { scheme = "file", @@ -3247,14 +3578,17 @@ local function hashed(str) -- not yet ok (/test?test) local authority, path, filename = s[2], s[3] if authority == "" then filename = path + elseif path == "" then + filename = "" else filename = authority .. "/" .. path end s = { - scheme = s[1], + scheme = rawscheme, authority = authority, path = path, - query = s[4], + query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = + queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped fragment = s[5], original = str, noscheme = false, @@ -3264,6 +3598,8 @@ local function hashed(str) -- not yet ok (/test?test) return s end +-- inspect(hashed("template://test")) + -- Here we assume: -- -- files: /// = relative @@ -3306,23 +3642,65 @@ function url.construct(hash) -- dodo: we need to escape ! return lpegmatch(escaper,concat(fullurl)) end -function url.filename(filename) +function url.filename(filename) -- why no lpeg here ? local t = hashed(filename) return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename end +local function escapestring(str) + return lpegmatch(escaper,str) +end + +url.escape = escapestring + +-- function url.query(str) -- separator could be an option +-- if type(str) == "string" then +-- local t = { } +-- for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do +-- t[k] = v +-- end +-- return t +-- else +-- return str +-- end +-- end + function url.query(str) if type(str) == "string" then - local t = { } - for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do - t[k] = v - end - return t + return lpegmatch(splitquery,str) or "" else return str end end +function url.toquery(data) + local td = type(data) + if td == "string" then + return #str and escape(data) or nil -- beware of double escaping + elseif td == "table" then + if next(data) then + local t = { } + for k, v in next, data do + t[#t+1] = format("%s=%s",k,escapestring(v)) + end + return concat(t,"&") + end + else + -- nil is a signal that no query + end +end + +-- /test/ | /test | test/ | test => test + +function url.barepath(path) + if not path or path == "" then + return "" + else + return (gsub(path,"^/?(.-)/?$","%1")) + end +end + + @@ -3363,6 +3741,24 @@ local isdir = lfs.isdir local isfile = lfs.isfile local currentdir = lfs.currentdir +-- in case we load outside luatex + +if not isdir then + function isdir(name) + local a = attributes(name) + return a and a.mode == "directory" + end + lfs.isdir = isdir +end + +if not isfile then + function isfile(name) + local a = attributes(name) + return a and a.mode == "file" + end + lfs.isfile = isfile +end + -- handy function dir.current() @@ -3738,28 +4134,49 @@ function boolean.tonumber(b) end function toboolean(str,tolerant) - if tolerant then - local tstr = type(str) - if tstr == "string" then - return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t" - elseif tstr == "number" then - return tonumber(str) ~= 0 - elseif tstr == "nil" then - return false - else - return str - end + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true elseif str == "true" then return true elseif str == "false" then return false + elseif not tolerant then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true else - return str + return str == "yes" or str == "on" or str == "t" end end string.toboolean = toboolean +function string.booleanstring(str) + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true + elseif str == "true" then + return true + elseif str == "false" then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true + else + return str == "yes" or str == "on" or str == "t" + end +end + function string.is_boolean(str,default) if type(str) == "string" then if str == "true" or str == "yes" or str == "on" or str == "t" then @@ -3784,57 +4201,229 @@ if not modules then modules = { } end modules ['l-unicode'] = { license = "see context related readme files" } +-- this module will be reorganized + +-- todo: utf.sub replacement (used in syst-aux) + +local concat = table.concat +local type = type +local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local utftype = patterns.utftype +local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format +local utfsplitlines = string.utfsplitlines + if not unicode then - unicode = { utf8 = { } } + unicode = { } + +end + +local unicode = unicode + +utf = utf or unicode.utf8 + +if not utf then + + utf8 = { } + unicode.utf8 = utf8 + utf = utf8 + +end + +if not utf.char then local floor, char = math.floor, string.char - function unicode.utf8.utfchar(n) + function utf.char(n) if n < 0x80 then + -- 0aaaaaaa : 0x80 return char(n) elseif n < 0x800 then + -- 110bbbaa : 0xC0 : n >> 6 + -- 10aaaaaa : 0x80 : n & 0x3F return char( 0xC0 + floor(n/0x40), 0x80 + (n % 0x40) ) elseif n < 0x10000 then + -- 1110bbbb : 0xE0 : n >> 12 + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F return char( 0xE0 + floor(n/0x1000), 0x80 + (floor(n/0x40) % 0x40), 0x80 + (n % 0x40) ) - elseif n < 0x40000 then + elseif n < 0x200000 then + -- 11110ccc : 0xF0 : n >> 18 + -- 10ccbbbb : 0x80 : (n >> 12) & 0x3F + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F + -- dddd : ccccc - 1 return char( - 0xF0 + floor(n/0x40000), - 0x80 + floor(n/0x1000), + 0xF0 + floor(n/0x40000), + 0x80 + (floor(n/0x1000) % 0x40), 0x80 + (floor(n/0x40) % 0x40), 0x80 + (n % 0x40) ) else - -- return char( - -- 0xF1 + floor(n/0x1000000), - -- 0x80 + floor(n/0x40000), - -- 0x80 + floor(n/0x1000), - -- 0x80 + (floor(n/0x40) % 0x40), - -- 0x80 + (n % 0x40) - -- ) - return "?" + return "" end end end -local unicode = unicode +if not utf.byte then -utf = utf or unicode.utf8 + local utf8byte = patterns.utf8byte -local concat = table.concat -local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub -local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format -local type = type + function utf.byte(c) + return lpegmatch(utf8byte,c) + end -local utfsplitlines = string.utfsplitlines +end + +local utfchar, utfbyte = utf.char, utf.byte + +-- As we want to get rid of the (unmaintained) utf library we implement our own +-- variants (in due time an independent module): + +function unicode.filetype(data) + return data and lpegmatch(utftype,data) or "unknown" +end + +local toentities = Cs ( + ( + patterns.utf8one + + ( + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end + )^0 +) + +patterns.toentities = toentities + +function utf.toentities(str) + return lpegmatch(toentities,str) +end + + + + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +-- actually one of them is already utf ... sort of useless this one + +-- function utf.char(n) +-- if n < 0x80 then +-- return char(n) +-- elseif n < 0x800 then +-- return char( +-- 0xC0 + floor(n/0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x10000 then +-- return char( +-- 0xE0 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x40000 then +-- return char( +-- 0xF0 + floor(n/0x40000), +-- 0x80 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- else +-- -- return char( +-- -- 0xF1 + floor(n/0x1000000), +-- -- 0x80 + floor(n/0x40000), +-- -- 0x80 + floor(n/0x1000), +-- -- 0x80 + (floor(n/0x40) % 0x40), +-- -- 0x80 + (n % 0x40) +-- -- ) +-- return "?" +-- end +-- end +-- +-- merge into: + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + + +utf.length = string.utflength +utf.split = string.utfsplit +utf.splitines = string.utfsplitlines +utf.valid = string.validutf + +if not utf.len then + utf.len = utf.length +end + +-- a replacement for simple gsubs: + +local utf8char = patterns.utf8char + +function utf.remapper(mapping) + local pattern = Cs((utf8char/mapping)^0) + return function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end, pattern +end + +-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } +-- print(remap("abcd 1234 abcd")) -- 0 EF BB BF UTF-8 -- 1 FF FE UTF-16-little-endian @@ -4027,11 +4616,22 @@ local function big(c) end end +-- function unicode.utf8_to_utf16(str,littleendian) +-- if littleendian then +-- return char(255,254) .. utfgsub(str,".",little) +-- else +-- return char(254,255) .. utfgsub(str,".",big) +-- end +-- end + +local _, l_remap = utf.remapper(little) +local _, b_remap = utf.remapper(big) + function unicode.utf8_to_utf16(str,littleendian) if littleendian then - return char(255,254) .. utfgsub(str,".",little) + return char(255,254) .. lpegmatch(l_remap,str) else - return char(254,255) .. utfgsub(str,".",big) + return char(254,255) .. lpegmatch(b_remap,str) end end @@ -4052,84 +4652,12 @@ function unicode.xstring(s) return format("0x%05X",type(s) == "number" and s or utfbyte(s)) end +-- -local lpegmatch = lpeg.match -local patterns = lpeg.patterns -local utftype = patterns.utftype - -function unicode.filetype(data) - return data and lpegmatch(utftype,data) or "unknown" -end - -local toentities = lpeg.Cs ( - ( - patterns.utf8one - + ( - patterns.utf8two - + patterns.utf8three - + patterns.utf8four - ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end - )^0 -) - -patterns.toentities = toentities - -function utf.toentities(str) - return lpegmatch(toentities,str) -end - - - - -local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs - -local one = P(1) -local two = C(1) * C(1) -local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) - --- actually one of them is already utf ... sort of useless this one - -local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) - local ab = 0xFF * byte(a) + byte(b) - local cd = 0xFF * byte(c) + byte(d) - return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) - end - + two / function(a,b) - return utfchar(byte(a)*256 + byte(b)) - end - + one - )^1 ) - + P("\255\254") * Cs( ( - four / function(b,a,d,c) - local ab = 0xFF * byte(a) + byte(b) - local cd = 0xFF * byte(c) + byte(d) - return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) - end - + two / function(b,a) - return utfchar(byte(a)*256 + byte(b)) - end - + one - )^1 ) - -function string.toutf(s) - return lpegmatch(pattern,s) or s -- todo: utf32 -end - -local validatedutf = Cs ( - ( - patterns.utf8one - + patterns.utf8two - + patterns.utf8three - + patterns.utf8four - + P(1) / "�" - )^0 -) - -patterns.validatedutf = validatedutf +local pattern = Ct(C(patterns.utf8char)^0) -function string.validutf(str) - return lpegmatch(validatedutf,str) +function utf.totable(str) + return lpegmatch(pattern,str) end @@ -4189,10 +4717,11 @@ utilities = utilities or {} utilities.tables = utilities.tables or { } local tables = utilities.tables -local format, gmatch, rep = string.format, string.gmatch, string.rep +local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring -local type, next, rawset, tonumber = type, next, rawset, tonumber +local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring +local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs function tables.definetable(target) -- defines undefined tables local composed, t, n = nil, { }, 0 @@ -4346,6 +4875,121 @@ function tables.encapsulate(core,capsule,protect) end end +local function serialize(t,r,outer) -- no mixes + r[#r+1] = "{" + local n = #t + if n > 0 then + for i=1,n do + local v = t[i] + local tv = type(v) + if tv == "string" then + r[#r+1] = format("%q,",v) + elseif tv == "number" then + r[#r+1] = format("%s,",v) + elseif tv == "table" then + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("%s,",tostring(v)) + end + end + else + for k, v in next, t do + local tv = type(v) + if tv == "string" then + r[#r+1] = format("[%q]=%q,",k,v) + elseif tv == "number" then + r[#r+1] = format("[%q]=%s,",k,v) + elseif tv == "table" then + r[#r+1] = format("[%q]=",k) + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("[%q]=%s,",k,tostring(v)) + end + end + end + if outer then + r[#r+1] = "}" + else + r[#r+1] = "}," + end + return r +end + +function table.fastserialize(t,prefix) + return concat(serialize(t,{ prefix or "return" },true)) +end + +function table.deserialize(str) + if not str or str == "" then + return + end + local code = loadstring(str) + if not code then + return + end + code = code() + if not code then + return + end + return code +end + +-- inspect(table.fastserialize { a = 1, b = { 4, { 5, 6 } }, c = { d = 7, e = 'f"g\nh' } }) + +function table.load(filename) + if filename then + local t = io.loaddata(filename) + if t and t ~= "" then + t = loadstring(t) + if type(t) == "function" then + t = t() + if type(t) == "table" then + return t + end + end + end + end +end + +local function slowdrop(t) + local r = { } + local l = { } + for i=1,#t do + local ti = t[i] + local j = 0 + for k, v in next, ti do + j = j + 1 + l[j] = format("%s=%q",k,v) + end + r[i] = format(" {%s},\n",concat(l)) + end + return format("return {\n%s}",concat(r)) +end + +local function fastdrop(t) + local r = { "return {\n" } + for i=1,#t do + local ti = t[i] + r[#r+1] = " {" + for k, v in next, ti do + r[#r+1] = format("%s=%q",k,v) + end + r[#r+1] = "},\n" + end + r[#r+1] = "}" + return concat(r) +end + +function table.drop(t,slow) + if #t == 0 then + return "return { }" + elseif slow == true then + return slowdrop(t) -- less memory + else + return fastdrop(t) -- some 15% faster + end +end + end -- of closure @@ -4520,11 +5164,10 @@ local concat = table.concat local type, next = type, next utilities = utilities or {} -utilities.merger = utilities.merger or { } -- maybe mergers +local merger = utilities.merger or { } +utilities.merger = merger utilities.report = logs and logs.reporter("system") or print -local merger = utilities.merger - merger.strip_comment = true local m_begin_merge = "begin library merge" @@ -4570,9 +5213,11 @@ end local function self_save(name, data) if data ~= "" then if merger.strip_comment then - -- saves some 20K local n = #data + -- saves some 20K .. scite comments data = gsub(data,"%-%-~[^\n\r]*[\r\n]","") + -- saves some 20K .. ldx comments + data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","") utilities.report("merge: %s bytes of comment stripped, %s bytes of code left",n-#data,#data) end io.savedata(name,data) @@ -4653,36 +5298,208 @@ if not modules then modules = { } end modules ['util-lua'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + comment = "the strip code is written by Peter Cawley", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -utilities = utilities or {} -utilities.lua = utilities.lua or { } -utilities.report = logs and logs.reporter("system") or print +local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format +local loadstring, loadfile, type = loadstring, loadfile, type + +utilities = utilities or {} +utilities.lua = utilities.lua or { } +local luautilities = utilities.lua + +utilities.report = logs and logs.reporter("system") or print -- can be overloaded later + +local tracestripping = false +local forcestupidcompile = true -- use internal bytecode compiler +luautilities.stripcode = true -- support stripping when asked for +luautilities.alwaysstripcode = false -- saves 1 meg on 7 meg compressed format file (2012.08.12) +luautilities.nofstrippedchunks = 0 +luautilities.nofstrippedbytes = 0 + +-- The next function was posted by Peter Cawley on the lua list and strips line +-- number information etc. from the bytecode data blob. We only apply this trick +-- when we store data tables. Stripping makes the compressed format file about +-- 1MB smaller (and uncompressed we save at least 6MB). +-- +-- You can consider this feature an experiment, so it might disappear. There is +-- no noticeable gain in runtime although the memory footprint should be somewhat +-- smaller (and the file system has a bit less to deal with). +-- +-- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ... + +local function strip_code_pc(dump,name) + local before = #dump + local version, format, endian, int, size, ins, num = byte(dump,5,11) + local subint + if endian == 1 then + subint = function(dump, i, l) + local val = 0 + for n = l, 1, -1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + else + subint = function(dump, i, l) + local val = 0 + for n = 1, l, 1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + end + local strip_function + strip_function = function(dump) + local count, offset = subint(dump, 1, size) + local stripped, dirty = rep("\0", size), offset + count + offset = offset + count + int * 2 + 4 + offset = offset + int + subint(dump, offset, int) * ins + count, offset = subint(dump, offset, int) + for n = 1, count do + local t + t, offset = subint(dump, offset, 1) + if t == 1 then + offset = offset + 1 + elseif t == 4 then + offset = offset + size + subint(dump, offset, size) + elseif t == 3 then + offset = offset + num + end + end + count, offset = subint(dump, offset, int) + stripped = stripped .. sub(dump,dirty, offset - 1) + for n = 1, count do + local proto, off = strip_function(sub(dump,offset, -1)) + stripped, offset = stripped .. proto, offset + off - 1 + end + offset = offset + subint(dump, offset, int) * int + int + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + int * 2 + end + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + end + stripped = stripped .. rep("\0", int * 3) + return stripped, offset + end + dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) + local after = #dump + local delta = before-after + if tracestripping then + utilities.report("stripped bytecode: %s, before %s, after %s, delta %s",name or "unknown",before,after,delta) + end + luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 + luautilities.nofstrippedbytes = luautilities.nofstrippedbytes + delta + return dump, delta +end + +-- ... end of borrowed code. + +local function strippedbytecode(code,forcestrip,name) + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + return strip_code_pc(code,name) + else + return code, 0 + end +end + +luautilities.stripbytecode = strip_code_pc +luautilities.strippedbytecode = strippedbytecode + +local function fatalerror(name) + utilities.report(format("fatal error in %q",name or "unknown")) +end + +-- quite subtle ... doing this wrong incidentally can give more bytes + + +function luautilities.loadedluacode(fullname,forcestrip,name) + -- quite subtle ... doing this wrong incidentally can give more bytes + name = name or fullname + local code = loadfile(fullname) + if code then + code() + end + if forcestrip and luautilities.stripcode then + if type(forcestrip) == "function" then + forcestrip = forcestrip(fullname) + end + if forcestrip then + local code, n = strip_code_pc(dump(code,name)) + return loadstring(code), n + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end +end + +function luautilities.strippedloadstring(code,forcestrip,name) -- not executed + local n = 0 + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + code = loadstring(code) + if not code then + fatalerror(name) + end + code, n = strip_code_pc(dump(code),name) + end + return loadstring(code), n +end -local function stupidcompile(luafile,lucfile) - local data = io.loaddata(luafile) - if data and data ~= "" then - data = string.dump(data) - if data and data ~= "" then - io.savedata(lucfile,data) +local function stupidcompile(luafile,lucfile,strip) + local code = io.loaddata(luafile) + local n = 0 + if code and code ~= "" then + code = loadstring(code) + if not code then + fatalerror() + end + code = dump(code) + if strip then + code, n = strippedbytecode(code,true,luafile) -- last one is reported + end + if code and code ~= "" then + io.savedata(lucfile,code) end end + return n end -function utilities.lua.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true +local luac_normal = "texluac -o %q %q" +local luac_strip = "texluac -s -o %q %q" + +function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true utilities.report("lua: compiling %s into %s",luafile,lucfile) os.remove(lucfile) - local command = "-o " .. string.quoted(lucfile) .. " " .. string.quoted(luafile) + local done = false if strip ~= false then - command = "-s " .. command + strip = true + end + if forcestupidcompile then + fallback = true + elseif strip then + done = os.spawn(format(luac_strip, lucfile,luafile)) == 0 + else + done = os.spawn(format(luac_normal,lucfile,luafile)) == 0 end - local done = os.spawn("texluac " .. command) == 0 -- or os.spawn("luac " .. command) == 0 if not done and fallback then - utilities.report("lua: dumping %s into %s (unstripped)",luafile,lucfile) - stupidcompile(luafile,lucfile) -- maybe use the stripper we have elsewhere - cleanup = false -- better see how worse it is + local n = stupidcompile(luafile,lucfile,strip) + if n > 0 then + utilities.report("lua: %s dumped into %s (%i bytes stripped)",luafile,lucfile,n) + else + utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile) + end + cleanup = false -- better see how bad it is end if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then utilities.report("lua: removing %s",luafile) @@ -4697,7 +5514,6 @@ end - end -- of closure do -- create closure to overcome 200 locals limit @@ -4710,8 +5526,10 @@ if not modules then modules = { } end modules ['util-prs'] = { license = "see context related readme files" } -local P, R, V, C, Ct, Cs, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg -local lpegmatch = lpeg.match +local lpeg, table, string = lpeg, table, string + +local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc +local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find local tostring, type, next = tostring, type, next @@ -4723,29 +5541,39 @@ parsers.patterns = parsers.patterns or { } local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +-- we share some patterns + +local space = P(' ') +local equal = P("=") +local comma = P(",") +local lbrace = P("{") +local rbrace = P("}") +local period = S(".") +local punctuation = S(".,:;") +local spacer = patterns.spacer +local whitespace = patterns.whitespace +local newline = patterns.newline +local anything = patterns.anything +local endofstring = patterns.endofstring + -- we could use a Cf Cg construct local escape, left, right = P("\\"), P('{'), P('}') -lpeg.patterns.balanced = P { +patterns.balanced = P { [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, [2] = left * V(1) * right } -local space = P(' ') -local equal = P("=") -local comma = P(",") -local lbrace = P("{") -local rbrace = P("}") local nobrace = 1 - (lbrace+rbrace) local nested = P { lbrace * (nobrace + V(1))^0 * rbrace } local spaces = space^0 local argument = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/"")) -local content = (1-P(-1))^0 +local content = (1-endofstring)^0 -lpeg.patterns.nested = nested -- no capture -lpeg.patterns.argument = argument -- argument after e.g. = -lpeg.patterns.content = content -- rest after e.g = +patterns.nested = nested -- no capture +patterns.argument = argument -- argument after e.g. = +patterns.content = content -- rest after e.g = local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) @@ -4764,10 +5592,6 @@ local function set(key,value) hash[key] = value end -local function set(key,value) - hash[key] = value -end - local pattern_a_s = (pattern_a/set)^1 local pattern_b_s = (pattern_b/set)^1 local pattern_c_s = (pattern_c/set)^1 @@ -4818,7 +5642,7 @@ end local separator = comma * space^0 local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) -local pattern = Ct(value*(separator*value)^0) +local pattern = spaces * Ct(value*(separator*value)^0) -- "aap, {noot}, mies" : outer {} removes, leading spaces ignored @@ -4942,6 +5766,37 @@ function parsers.listitem(str) return gmatch(str,"[^, ]+") end +-- +local digit = R("09") + +local pattern = Cs { "start", + start = V("one") + V("two") + V("three"), + rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0, + thousand = digit * digit * digit, + one = digit * V("rest"), + two = digit * digit * V("rest"), + three = V("thousand") * V("rest"), +} + +patterns.splitthousands = pattern -- maybe better in the parsers namespace ? + +function parsers.splitthousands(str) + return lpegmatch(pattern,str) or str +end + +-- print(parsers.splitthousands("11111111111.11")) + +local optionalwhitespace = whitespace^0 + +patterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1) +patterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1) +patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1) + +-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n " +-- inspect(lpegmatch(patterns.paragraphs,str)) +-- inspect(lpegmatch(patterns.sentences,str)) +-- inspect(lpegmatch(patterns.words,str)) + end -- of closure @@ -5043,7 +5898,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['util.deb'] = { +if not modules then modules = { } end modules ['util-deb'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -5155,6 +6010,7 @@ function inspect(i) -- global function else print(tostring(i)) end + return i -- so that we can inline the inspect end -- from the lua book: @@ -5194,7 +6050,7 @@ if not modules then modules = { } end modules ['trac-inf'] = { local format, lower = string.format, string.lower local clock = os.gettimeofday or os.clock -- should go in environment -local write_nl = texio.write_nl +local write_nl = texio and texio.write_nl or print statistics = statistics or { } local statistics = statistics @@ -5277,7 +6133,7 @@ statistics.elapsedtime = elapsedtime statistics.elapsedindeed = elapsedindeed statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module function statistics.register(tag,fnc) if statistics.enable and type(fnc) == "function" then @@ -5387,6 +6243,8 @@ if not modules then modules = { } end modules ['trac-set'] = { -- might become u license = "see context related readme files" } +-- maybe this should be util-set.lua + local type, next, tostring = type, next, tostring local concat = table.concat local format, find, lower, gsub, escapedpattern = string.format, string.find, string.lower, string.gsub, string.escapedpattern @@ -5586,7 +6444,7 @@ function setters.show(t) local value, default, modules = functions.value, functions.default, #functions value = value == nil and "unset" or tostring(value) default = default == nil and "unset" or tostring(default) - t.report("%-30s modules: %2i default: %6s value: %6s",name,modules,default,value) + t.report("%-50s modules: %2i default: %6s value: %6s",name,modules,default,value) end end t.report() @@ -5678,17 +6536,31 @@ end) -- experiment -local flags = environment and environment.engineflags +if environment then -if flags then - if trackers and flags.trackers then - setters.initialize("flags","trackers", settings_to_hash(flags.trackers)) - -- t_enable(flags.trackers) - end - if directives and flags.directives then - setters.initialize("flags","directives", settings_to_hash(flags.directives)) - -- d_enable(flags.directives) + -- The engineflags are known earlier than environment.arguments but maybe we + -- need to handle them both as the later are parsed differently. The c: prefix + -- is used by mtx-context to isolate the flags from those that concern luatex. + + local engineflags = environment.engineflags + + if engineflags then + if trackers then + local list = engineflags["c:trackers"] or engineflags["trackers"] + if type(list) == "string" then + setters.initialize("flags","trackers",settings_to_hash(list)) + -- t_enable(list) + end + end + if directives then + local list = engineflags["c:directives"] or engineflags["directives"] + if type(list) == "string" then + setters.initialize("flags","directives", settings_to_hash(list)) + -- d_enable(list) + end + end end + end -- here @@ -5741,10 +6613,7 @@ local next, type = next, type local setmetatableindex = table.setmetatableindex ---[[ldx-- -

This is a prelude to a more extensive logging module. We no longer -provide based logging a sparsing is relatively easy anyway.

---ldx]]-- + logs = logs or { } local logs = logs @@ -6560,7 +7429,8 @@ local allocate, mark = utilities.storage.allocate, utilities.storage.mark local format, sub, match, gsub, find = string.format, string.sub, string.match, string.gsub, string.find local unquoted, quoted = string.unquoted, string.quoted -local concat = table.concat +local concat, insert, remove = table.concat, table.insert, table.remove +local loadedluacode = utilities.lua.loadedluacode -- precautions @@ -6578,8 +7448,28 @@ if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaon for k=3,#arg do arg[k-2] = arg[k] end - arg[#arg] = nil -- last - arg[#arg] = nil -- pre-last + remove(arg) -- last + remove(arg) -- pre-last +end + +-- This is an ugly hack but it permits symlinking a script (say 'context') to 'mtxrun' as in: +-- +-- ln -s /opt/minimals/tex/texmf-linux-64/bin/mtxrun context +-- +-- The special mapping hack is needed because 'luatools' boils down to 'mtxrun --script base' +-- but it's unlikely that there will be more of this + +do + + local originalzero = file.basename(arg[0]) + local specialmapping = { luatools == "base" } + + if originalzero ~= "mtxrun" and originalzero ~= "mtxrun.lua" then + arg[0] = specialmapping[originalzero] or originalzero + insert(arg,0,"--script") + insert(arg,0,"mtxrun") + end + end -- environment @@ -6619,6 +7509,8 @@ local mt = { setmetatable(environment,mt) +-- context specific arguments (in order not to confuse the engine) + function environment.initializearguments(arg) local arguments, files = { }, { } environment.arguments, environment.files, environment.sortedflags = arguments, files, nil @@ -6627,10 +7519,12 @@ function environment.initializearguments(arg) if index > 0 then local flag, value = match(argument,"^%-+(.-)=(.-)$") if flag then + flag = gsub(flag,"^c:","") arguments[flag] = unquoted(value or "") else flag = match(argument,"^%-+(.+)") if flag then + flag = gsub(flag,"^c:","") arguments[flag] = true else files[#files+1] = argument @@ -6650,7 +7544,7 @@ end -- tricky: too many hits when we support partials unless we add -- a registration of arguments so from now on we have 'partial' -function environment.argument(name,partial) +function environment.getargument(name,partial) local arguments, sortedflags = environment.arguments, environment.sortedflags if arguments[name] then return arguments[name] @@ -6673,6 +7567,8 @@ function environment.argument(name,partial) return nil end +environment.argument = environment.getargument + function environment.splitarguments(separator) -- rather special, cut-off before separator local done, before, after = false, { }, { } local originalarguments = environment.originalarguments @@ -6758,7 +7654,7 @@ function environment.texfile(filename) return resolvers.findfile(filename,'tex') end -function environment.luafile(filename) +function environment.luafile(filename) -- needs checking local resolved = resolvers.findfile(filename,'tex') or "" if resolved ~= "" then return resolved @@ -6770,13 +7666,16 @@ function environment.luafile(filename) return resolvers.findfile(filename,'luatexlibs') or "" end -environment.loadedluacode = loadfile -- can be overloaded +local function checkstrip(filename) + local modu = modules[file.nameonly(filename)] + return modu and modu.dataonly +end function environment.luafilechunk(filename,silent) -- used for loading lua bytecode in the format filename = file.replacesuffix(filename, "lua") local fullname = environment.luafile(filename) if fullname and fullname ~= "" then - local data = environment.loadedluacode(fullname) + local data = loadedluacode(fullname,checkstrip,filename) if trace_locating then report_lua("loading file %s%s", fullname, not data and " failed" or "") elseif not silent then @@ -6874,21 +7773,7 @@ local trace_entities = false trackers.register("xml.entities", function(v) trac local report_xml = logs and logs.reporter("xml","core") or function(...) print(format(...)) end ---[[ldx-- -

The parser used here is inspired by the variant discussed in the lua book, but -handles comment and processing instructions, has a different structure, provides -parent access; a first version used different trickery but was less optimized to we -went this route. First we had a find based parser, now we have an based one. -The find based parser can be found in l-xml-edu.lua along with other older code.

-

Beware, the interface may change. For instance at, ns, tg, dt may get more -verbose names. Once the code is stable we will also remove some tracing and -optimize the code.

- -

I might even decide to reimplement the parser using the latest trickery -as the current variant was written when showed up and it's easier now to -build tables in one go.

---ldx]]-- xml = xml or { } local xml = xml @@ -6898,46 +7783,25 @@ local utf = unicode.utf8 local concat, remove, insert = table.concat, table.remove, table.insert local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub -local utfchar, utffind, utfgsub = utf.char, utf.find, utf.gsub +local utfchar = utf.char local lpegmatch = lpeg.match local P, S, R, C, V, C, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.C, lpeg.Cs ---[[ldx-- -

First a hack to enable namespace resolving. A namespace is characterized by -a . The following function associates a namespace prefix with a -pattern. We use , which in this case is more than twice as fast as a -find based solution where we loop over an array of patterns. Less code and -much cleaner.

---ldx]]-- + xml.xmlns = xml.xmlns or { } local check = P(false) local parse = check ---[[ldx-- -

The next function associates a namespace prefix with an . This -normally happens independent of parsing.

- -xml.registerns("mml","mathml") - ---ldx]]-- function xml.registerns(namespace, pattern) -- pattern can be an lpeg check = check + C(P(lower(pattern))) / namespace parse = P { P(check) + 1 * V(1) } end ---[[ldx-- -

The next function also registers a namespace, but this time we map a -given namespace prefix onto a registered one, using the given -. This used for attributes like xmlns:m.

- -xml.checkns("m","http://www.w3.org/mathml") - ---ldx]]-- function xml.checkns(namespace,url) local ns = lpegmatch(parse,lower(url)) @@ -6946,66 +7810,15 @@ function xml.checkns(namespace,url) end end ---[[ldx-- -

Next we provide a way to turn an into a registered -namespace. This used for the xmlns attribute.

- -resolvedns = xml.resolvens("http://www.w3.org/mathml") - - -This returns mml. ---ldx]]-- function xml.resolvens(url) return lpegmatch(parse,lower(url)) or "" end ---[[ldx-- -

A namespace in an element can be remapped onto the registered -one efficiently by using the xml.xmlns table.

---ldx]]-- - ---[[ldx-- -

This version uses . We follow the same approach as before, stack and top and -such. This version is about twice as fast which is mostly due to the fact that -we don't have to prepare the stream for cdata, doctype etc etc. This variant is -is dedicated to Luigi Scarso, who challenged me with 40 megabyte files that -took 12.5 seconds to load (1.5 for file io and the rest for tree building). With -the implementation we got that down to less 7.3 seconds. Loading the 14 - interface definition files (2.6 meg) went down from 1.05 seconds to 0.55.

- -

Next comes the parser. The rather messy doctype definition comes in many -disguises so it is no surprice that later on have to dedicate quite some - code to it.

- - - - - - - - - - -

The code may look a bit complex but this is mostly due to the fact that we -resolve namespaces and attach metatables. There is only one public function:

- - -local x = xml.convert(somestring) - - -

An optional second boolean argument tells this function not to create a root -element.

- -

Valid entities are:

- - - - - - ---ldx]]-- + + + -- not just one big nested table capture (lpeg overflow) @@ -7220,15 +8033,7 @@ local privates_n = { -- keeps track of defined ones } -local function escaped(s) - if s == "" then - return "" - else -- if utffind(s,privates_u) then - return (utfgsub(s,".",privates_u)) - -- else - -- return s - end -end +local escaped = utf.remapper(privates_u) local function unescaped(s) local p = privates_n[s] @@ -7243,13 +8048,7 @@ local function unescaped(s) return p end -local function unprivatized(s,resolve) - if s == "" then - return "" - else - return (utfgsub(s,".",privates_p)) - end -end +local unprivatized = utf.remapper(privates_p) xml.privatetoken = unescaped xml.unprivatized = unprivatized @@ -7589,7 +8388,12 @@ local function _xmlconvert_(data, settings) else errorhandler = errorhandler or xml.errorhandler if errorhandler then - xml.errorhandler(format("load error: %s",errorstr)) + local currentresource = settings.currentresource + if currentresource and currentresource ~= "" then + xml.errorhandler(format("load error in [%s]: %s",currentresource,errorstr)) + else + xml.errorhandler(format("load error: %s",errorstr)) + end end end else @@ -7634,7 +8438,7 @@ function xmlconvert(data,settings) if ok then return result else - return _xmlconvert_("") + return _xmlconvert_("",settings) end end @@ -7655,10 +8459,7 @@ function xml.inheritedconvert(data,xmldata) -- xmldata is parent return xc end ---[[ldx-- -

Packaging data in an xml like table is done with the following -function. Maybe it will go away (when not used).

---ldx]]-- + function xml.is_valid(root) return root and root.dt and root.dt[1] and type(root.dt[1]) == "table" and not root.dt[1].er @@ -7677,11 +8478,7 @@ end xml.errorhandler = report_xml ---[[ldx-- -

We cannot load an from a filehandle so we need to load -the whole file first. The function accepts a string representing -a filename or a file handle.

---ldx]]-- + function xml.load(filename,settings) local data = "" @@ -7695,13 +8492,17 @@ function xml.load(filename,settings) elseif filename then -- filehandle data = filename:read("*all") end - return xmlconvert(data,settings) + if settings then + settings.currentresource = filename + local result = xmlconvert(data,settings) + settings.currentresource = nil + return result + else + return xmlconvert(data,{ currentresource = filename }) + end end ---[[ldx-- -

When we inject new elements, we need to convert strings to -valid trees, which is what the next function does.

---ldx]]-- + local no_root = { no_root = true } @@ -7714,11 +8515,7 @@ function xml.toxml(data) end end ---[[ldx-- -

For copying a tree we use a dedicated function instead of the -generic table copier. Since we know what we're dealing with we -can speed up things a bit. The second argument is not to be used!

---ldx]]-- + local function copy(old,tables) if old then @@ -7742,13 +8539,7 @@ end xml.copy = copy ---[[ldx-- -

In serializing the tree or parts of the tree is a major -actitivity which is why the following function is pretty optimized resulting -in a few more lines of code than needed. The variant that uses the formatting -function for all components is about 15% slower than the concatinating -alternative.

---ldx]]-- + -- todo: add when not present @@ -7761,15 +8552,12 @@ function xml.checkbom(root) -- can be made faster return end end - insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } ) + insert(dt, 1, { special = true, ns = "", tg = "@pi@", dt = { "xml version='1.0' standalone='yes'" } } ) insert(dt, 2, "\n" ) end end ---[[ldx-- -

At the cost of some 25% runtime overhead you can first convert the tree to a string -and then handle the lot.

---ldx]]-- + -- new experimental reorganized serialize @@ -7962,21 +8750,7 @@ newhandlers { } } ---[[ldx-- -

How you deal with saving data depends on your preferences. For a 40 MB database -file the timing on a 2.3 Core Duo are as follows (time in seconds):

- - -1.3 : load data from file to string -6.1 : convert string into tree -5.3 : saving in file using xmlsave -6.8 : converting to string using xml.tostring -3.6 : saving converted string in file - -

Beware, these were timing with the old routine but measurements will not be that -much different I guess.

---ldx]]-- -- maybe this will move to lxml-xml @@ -8054,10 +8828,7 @@ xml.newhandlers = newhandlers xml.serialize = serialize xml.tostring = xmltostring ---[[ldx-- -

The next function operated on the content only and needs a handle function -that accepts a string.

---ldx]]-- + local function xmlstring(e,handle) if not handle or (e.special and e.tg ~= "@rt@") then @@ -8076,9 +8847,7 @@ end xml.string = xmlstring ---[[ldx-- -

A few helpers:

---ldx]]-- + function xml.settings(e) @@ -8122,11 +8891,7 @@ function xml.name(root) end end ---[[ldx-- -

The next helper erases an element but keeps the table as it is, -and since empty strings are not serialized (effectively) it does -not harm. Copying the table would take more time. Usage:

---ldx]]-- + function xml.erase(dt,k) if dt then @@ -8138,13 +8903,7 @@ function xml.erase(dt,k) end end ---[[ldx-- -

The next helper assigns a tree (or string). Usage:

- -dt[k] = xml.assign(root) or xml.assign(dt,k,root) - ---ldx]]-- function xml.assign(dt,k,root) if dt and k then @@ -8157,20 +8916,14 @@ end -- the following helpers may move ---[[ldx-- -

The next helper assigns a tree (or string). Usage:

- -xml.tocdata(e) -xml.tocdata(e,"error") - ---ldx]]-- + function xml.tocdata(e,wrapper) -- a few more in the aux module local whatever = type(e) == "table" and xmltostring(e.dt) or e or "" if wrapper then whatever = format("<%s>%s",wrapper,whatever,wrapper) end - local t = { special = true, ns = "", tg = "@cd@", at = {}, rn = "", dt = { whatever }, __p__ = e } + local t = { special = true, ns = "", tg = "@cd@", at = { }, rn = "", dt = { whatever }, __p__ = e } setmetatable(t,getmetatable(e)) e.dt = { t } end @@ -8225,7 +8978,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['lxml-pth'] = { +if not modules then modules = { } end modules ['lxml-lpt'] = { version = 1.001, comment = "this module is the basis for the lxml-* ones", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -8246,28 +8999,9 @@ local setmetatableindex = table.setmetatableindex -- beware, this is not xpath ... e.g. position is different (currently) and -- we have reverse-sibling as reversed preceding sibling ---[[ldx-- -

This module can be used stand alone but also inside in -which case it hooks into the tracker code. Therefore we provide a few -functions that set the tracers. Here we overload a previously defined -function.

-

If I can get in the mood I will make a variant that is XSLT compliant -but I wonder if it makes sense.

---ldx]]-- - ---[[ldx-- -

Expecially the lpath code is experimental, we will support some of xpath, but -only things that make sense for us; as compensation it is possible to hook in your -own functions. Apart from preprocessing content for we also need -this module for process management, like handling and -files.

- - -a/b/c /*/c -a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n) -a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n) - ---ldx]]-- + + + local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end @@ -8275,11 +9009,7 @@ local trace_lprofile = false if trackers then trackers.register("xml.profile", local report_lpath = logs.reporter("xml","lpath") ---[[ldx-- -

We've now arrived at an interesting part: accessing the tree using a subset -of and since we're not compatible we call it . We -will explain more about its usage in other documents.

---ldx]]-- + local xml = xml @@ -8731,14 +9461,23 @@ local lp_builtin = P ( -- for the moment we keep namespaces with attributes local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])") -local lp_fastpos_p = ((P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end) -local lp_fastpos_n = ((P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end) + +-- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end +-- lp_fastpos_n = (P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end + +lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0" +lp_fastpos_n = P("-") * R("09")^1 * P(-1) / "(%0<0 and (#list+%0==l))" + local lp_fastpos = lp_fastpos_n + lp_fastpos_p + local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false") -local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling - return t .. "(" -end +-- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling +-- return t .. "(" +-- end + +-- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0(" +local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0" local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling if expressions[t] then @@ -9254,9 +9993,7 @@ end xml.applylpath = applylpath -- takes a table as first argment, which is what xml.filter will do ---[[ldx-- -

This is the main filter function. It returns whatever is asked for.

---ldx]]-- + function xml.filter(root,pattern) -- no longer funny attribute handling here return applylpath(root,pattern) @@ -9354,12 +10091,12 @@ xml.selection = selection -- new method, simple handle -- generic function finalizer (independant namespace) -local function dofunction(collected,fnc) +local function dofunction(collected,fnc,...) if collected then local f = functions[fnc] if f then for c=1,#collected do - f(collected[c]) + f(collected[c],...) end else report_lpath("unknown function '%s'",fnc) @@ -9460,21 +10197,7 @@ expressions.tag = function(e,n) -- only tg end end ---[[ldx-- -

Often using an iterators looks nicer in the code than passing handler -functions. The book describes how to use coroutines for that -purpose (). This permits -code like:

- -for r, d, k in xml.elements(xml.load('text.xml'),"title") do - print(d[k]) -- old method -end -for e in xml.collected(xml.load('text.xml'),"title") do - print(e) -- new one -end - ---ldx]]-- local wrap, yield = coroutine.wrap, coroutine.yield @@ -9515,6 +10238,32 @@ function xml.inspect(collection,pattern) end end +-- texy (see xfdf): + +local function split(e) + local dt = e.dt + if dt then + for i=1,#dt do + local dti = dt[i] + if type(dti) == "string" then + dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1") + dti = gsub(dti,"[\n\r]+","\n\n") + dt[i] = dti + else + split(dti) + end + end + end + return e +end + +function xml.finalizers.paragraphs(c) + for i=1,#c do + split(c[i]) + end + return c +end + end -- of closure @@ -9539,13 +10288,7 @@ local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, l lpegpatterns.xml = lpegpatterns.xml or { } local xmlpatterns = lpegpatterns.xml ---[[ldx-- -

The following helper functions best belong to the lxml-ini -module. Some are here because we need then in the mk -document and other manuals, others came up when playing with -this module. Since this module is also used in we've -put them here instead of loading mode modules there then needed.

---ldx]]-- + local function xmlgsub(t,old,new) -- will be replaced local dt = t.dt @@ -9731,9 +10474,7 @@ function xml.processattributes(root,pattern,handle) return collected end ---[[ldx-- -

The following functions collect elements and texts.

---ldx]]-- + -- are these still needed -> lxml-cmp.lua @@ -9772,9 +10513,7 @@ function xml.collect_tags(root, pattern, nonamespace) end end ---[[ldx-- -

We've now arrived at the functions that manipulate the tree.

---ldx]]-- + local no_root = { no_root = true } @@ -10160,9 +10899,7 @@ function xml.remapname(root, pattern, newtg, newns, newrn) end end ---[[ldx-- -

Helper (for q2p).

---ldx]]-- + function xml.cdatatotext(e) local dt = e.dt @@ -10259,9 +10996,7 @@ end -- xml.addentitiesdoctype(x,"hexadecimal") -- print(x) ---[[ldx-- -

Here are a few synonyms.

---ldx]]-- + xml.all = xml.each xml.insert = xml.insertafter @@ -10852,7 +11587,7 @@ local gsub, find, gmatch, char = string.gsub, string.find, string.gmatch, string local concat = table.concat local next, type = next, type -local filedirname, filebasename, fileextname, filejoin = file.dirname, file.basename, file.extname, file.join +local filedirname, filebasename, filejoin = file.dirname, file.basename, file.join local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) local trace_detail = false trackers.register("resolvers.details", function(v) trace_detail = v end) @@ -11202,12 +11937,14 @@ local function splitpathexpr(str, newlist, validate) -- I couldn't resist lpeggi for s in gmatch(str,"[^,]+") do s = validate(s) if s then - n = n + 1 ; t[n] = s + n = n + 1 + t[n] = s end end else for s in gmatch(str,"[^,]+") do - n = n + 1 ; t[n] = s + n = n + 1 + t[n] = s end end if trace_expansions then @@ -11221,7 +11958,7 @@ end -- We could make the previous one public. local function validate(s) - s = collapsepath(s) -- already keeps the // + s = collapsepath(s) -- already keeps the trailing / and // return s ~= "" and not find(s,"^!*unset/*$") and s end @@ -11559,7 +12296,7 @@ local resolvers = resolvers local allocate = utilities.storage.allocate local setmetatableindex = table.setmetatableindex -local fileextname = file.extname +local suffixonly = file.suffixonly local formats = allocate() local suffixes = allocate() @@ -11814,7 +12551,7 @@ function resolvers.formatofvariable(str) end function resolvers.formatofsuffix(str) -- of file - return suffixmap[fileextname(str)] or 'tex' -- so many map onto tex (like mkiv, cld etc) + return suffixmap[suffixonly(str)] or 'tex' -- so many map onto tex (like mkiv, cld etc) end function resolvers.variableofformat(str) @@ -11826,7 +12563,7 @@ function resolvers.variableofformatorsuffix(str) if v then return v end - v = suffixmap[fileextname(str)] + v = suffixmap[suffixonly(str)] if v then return formats[v] end @@ -11847,21 +12584,7 @@ if not modules then modules = { } end modules ['data-tmp'] = { license = "see context related readme files" } ---[[ldx-- -

This module deals with caching data. It sets up the paths and -implements loaders and savers for tables. Best is to set the -following variable. When not set, the usual paths will be -checked. Personally I prefer the (users) temporary path.

- - -TEXMFCACHE=$TMP;$TEMP;$TMPDIR;$TEMPDIR;$HOME;$TEXMFVAR;$VARTEXMF;. - -

Currently we do no locking when we write files. This is no real -problem because most caching involves fonts and the chance of them -being written at the same time is small. We also need to extend -luatools with a recache feature.

---ldx]]-- local format, lower, gsub, concat = string.format, string.lower, string.gsub, table.concat local serialize, serializetofile = table.serialize, table.tofile @@ -12396,11 +13119,12 @@ local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns local filedirname = file.dirname local filebasename = file.basename -local fileextname = file.extname +local suffixonly = file.suffixonly local filejoin = file.join local collapsepath = file.collapsepath local joinpath = file.joinpath local allocate = utilities.storage.allocate +local settings_to_array = utilities.parsers.settings_to_array local setmetatableindex = table.setmetatableindex local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) @@ -12424,7 +13148,7 @@ resolvers.cacheversion = '1.0.1' resolvers.configbanner = '' resolvers.homedir = environment.homedir resolvers.criticalvars = allocate { "SELFAUTOLOC", "SELFAUTODIR", "SELFAUTOPARENT", "TEXMFCNF", "TEXMF", "TEXOS" } -resolvers.luacnfname = 'texmfcnf.lua' +resolvers.luacnfname = "texmfcnf.lua" resolvers.luacnfstate = "unknown" -- The web2c tex binaries as well as kpse have built in paths for the configuration @@ -12696,7 +13420,7 @@ end local function identify_configuration_files() local specification = instance.specification if #specification == 0 then - local cnfspec = getenv('TEXMFCNF') + local cnfspec = getenv("TEXMFCNF") if cnfspec == "" then cnfspec = resolvers.luacnfspec resolvers.luacnfstate = "default" @@ -12784,7 +13508,7 @@ local function load_configuration_files() -- we push the value into the main environment (osenv) so -- that it takes precedence over the default one and therefore -- also over following definitions - resolvers.setenv('TEXMFCNF',cnfspec) -- resolves prefixes + resolvers.setenv("TEXMFCNF",cnfspec) -- resolves prefixes -- we now identify and load the specified configuration files instance.specification = { } identify_configuration_files() @@ -12832,10 +13556,11 @@ end local function locate_file_databases() -- todo: cache:// and tree:// (runtime) - local texmfpaths = resolvers.expandedpathlist('TEXMF') + local texmfpaths = resolvers.expandedpathlist("TEXMF") if #texmfpaths > 0 then for i=1,#texmfpaths do local path = collapsepath(texmfpaths[i]) + path = gsub(path,"/+$","") -- in case $HOME expands to something with a trailing / local stripped = lpegmatch(inhibitstripper,path) -- the !! thing if stripped ~= "" then local runtime = stripped == path @@ -12964,9 +13689,9 @@ function resolvers.prependhash(type,name,cache) end function resolvers.extendtexmfvariable(specification) -- crap, we could better prepend the hash - local t = resolvers.splitpath(getenv('TEXMF')) + local t = resolvers.splitpath(getenv("TEXMF")) -- okay? insert(t,1,specification) - local newspec = concat(t,";") + local newspec = concat(t,",") -- not ; if instance.environment["TEXMF"] then instance.environment["TEXMF"] = newspec elseif instance.variables["TEXMF"] then @@ -13041,14 +13766,19 @@ function resolvers.resetextrapath() end function resolvers.registerextrapath(paths,subpaths) + paths = settings_to_array(paths) + subpaths = settings_to_array(subpaths) local ep = instance.extra_paths or { } local oldn = #ep local newn = oldn - if paths and paths ~= "" then - if subpaths and subpaths ~= "" then - for p in gmatch(paths,"[^,]+") do - -- we gmatch each step again, not that fast, but used seldom - for s in gmatch(subpaths,"[^,]+") do + local nofpaths = #paths + local nofsubpaths = #subpaths + if nofpaths > 0 then + if nofsubpaths > 0 then + for i=1,nofpaths do + local p = paths[i] + for j=1,nofsubpaths do + local s = subpaths[j] local ps = p .. "/" .. s if not done[ps] then newn = newn + 1 @@ -13058,7 +13788,8 @@ function resolvers.registerextrapath(paths,subpaths) end end else - for p in gmatch(paths,"[^,]+") do + for i=1,nofpaths do + local p = paths[i] if not done[p] then newn = newn + 1 ep[newn] = resolvers.cleanpath(p) @@ -13066,10 +13797,10 @@ function resolvers.registerextrapath(paths,subpaths) end end end - elseif subpaths and subpaths ~= "" then + elseif nofsubpaths > 0 then for i=1,oldn do - -- we gmatch each step again, not that fast, but used seldom - for s in gmatch(subpaths,"[^,]+") do + for j=1,nofsubpaths do + local s = subpaths[j] local ps = ep[i] .. "/" .. s if not done[ps] then newn = newn + 1 @@ -13147,18 +13878,21 @@ function resolvers.expandedpathlist(str) return { } elseif instance.savelists then str = lpegmatch(dollarstripper,str) - if not instance.lists[str] then -- cached - local lst = made_list(instance,resolvers.splitpath(resolvers.expansion(str))) - instance.lists[str] = expandedpathfromlist(lst) - end - return instance.lists[str] + local lists = instance.lists + local lst = lists[str] + if not lst then + local l = made_list(instance,resolvers.splitpath(resolvers.expansion(str))) + lst = expandedpathfromlist(l) + lists[str] = lst + end + return lst else local lst = resolvers.splitpath(resolvers.expansion(str)) return made_list(instance,expandedpathfromlist(lst)) end end -function resolvers.expandedpathlistfromvariable(str) -- brrr +function resolvers.expandedpathlistfromvariable(str) -- brrr / could also have cleaner ^!! /$ // str = lpegmatch(dollarstripper,str) local tmp = resolvers.variableofformatorsuffix(str) return resolvers.expandedpathlist(tmp ~= "" and tmp or str) @@ -13315,7 +14049,7 @@ local preparetreepattern = Cs((P(".")/"%%." + P("-")/"%%-" + P(1))^0 * Cc("$")) local collect_instance_files local function find_analyze(filename,askedformat,allresults) - local filetype, wantedfiles, ext = '', { }, fileextname(filename) + local filetype, wantedfiles, ext = '', { }, suffixonly(filename) -- too tricky as filename can be bla.1.2.3: -- -- if not suffixmap[ext] then @@ -13393,7 +14127,7 @@ local function find_qualified(filename,allresults) -- this one will be split too if trace_detail then report_resolving("locating qualified file '%s'", filename) end - local forcedname, suffix = "", fileextname(filename) + local forcedname, suffix = "", suffixonly(filename) if suffix == "" then -- why local format_suffixes = askedformat == "" and resolvers.defaultsuffixes or suffixes[askedformat] if format_suffixes then @@ -14063,6 +14797,8 @@ local gsub = string.gsub local cleanpath, findgivenfile, expansion = resolvers.cleanpath, resolvers.findgivenfile, resolvers.expansion local getenv = resolvers.getenv -- we can probably also use resolvers.expansion local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match +local joinpath, basename, dirname = file.join, file.basename, file.dirname +local getmetatable, rawset, type = getmetatable, rawset, type -- getenv = function(...) return resolvers.getenv(...) end -- needs checking (definitions changes later on) @@ -14104,28 +14840,43 @@ end prefixes.filename = function(str) local fullname = findgivenfile(str) or "" - return cleanpath(file.basename((fullname ~= "" and fullname) or str)) -- no cleanpath needed here + return cleanpath(basename((fullname ~= "" and fullname) or str)) -- no cleanpath needed here end prefixes.pathname = function(str) local fullname = findgivenfile(str) or "" - return cleanpath(file.dirname((fullname ~= "" and fullname) or str)) + return cleanpath(dirname((fullname ~= "" and fullname) or str)) end prefixes.selfautoloc = function(str) - return cleanpath(file.join(getenv('SELFAUTOLOC'),str)) + return cleanpath(joinpath(getenv('SELFAUTOLOC'),str)) end prefixes.selfautoparent = function(str) - return cleanpath(file.join(getenv('SELFAUTOPARENT'),str)) + return cleanpath(joinpath(getenv('SELFAUTOPARENT'),str)) end prefixes.selfautodir = function(str) - return cleanpath(file.join(getenv('SELFAUTODIR'),str)) + return cleanpath(joinpath(getenv('SELFAUTODIR'),str)) end prefixes.home = function(str) - return cleanpath(file.join(getenv('HOME'),str)) + return cleanpath(joinpath(getenv('HOME'),str)) +end + +local function toppath() + local pathname = dirname(inputstack[#inputstack] or "") + if pathname == "" then + return "." + else + return pathname + end +end + +resolvers.toppath = toppath + +prefixes.toppath = function(str) + return cleanpath(joinpath(toppath(),str)) end prefixes.env = prefixes.environment @@ -14161,6 +14912,8 @@ function resolvers.resetresolve(str) resolved, abstract = { }, { } end +-- todo: use an lpeg (see data-lua for !! / stripper) + local function resolve(str) -- use schemes, this one is then for the commandline only if type(str) == "table" then local t = { } @@ -14186,7 +14939,7 @@ end resolvers.resolve = resolve resolvers.unresolve = unresolve -if os.uname then +if type(os.uname) == "function" then for k, v in next, os.uname() do if not prefixes[k] then @@ -14198,11 +14951,17 @@ end if os.type == "unix" then + -- We need to distringuish between a prefix and something else : so we + -- have a special repath variant for linux. Also, when a new prefix is + -- defined, we need to remake the matcher. + local pattern local function makepattern(t,k,v) + if t then + rawset(t,k,v) + end local colon = P(":") - local p for k, v in table.sortedpairs(prefixes) do if p then p = P(k) + p @@ -14211,9 +14970,6 @@ if os.type == "unix" then end end pattern = Cs((p * colon + colon/";" + P(1))^0) - if t then - t[k] = v - end end makepattern() @@ -14424,18 +15180,7 @@ local trace_cache = false trackers.register("resolvers.cache", functi local trace_containers = false trackers.register("resolvers.containers", function(v) trace_containers = v end) local trace_storage = false trackers.register("resolvers.storage", function(v) trace_storage = v end) ---[[ldx-- -

Once we found ourselves defining similar cache constructs -several times, containers were introduced. Containers are used -to collect tables in memory and reuse them when possible based -on (unique) hashes (to be provided by the calling function).

- -

Caching to disk is disabled by default. Version numbers are -stored in the saved table which makes it possible to change the -table structures without bothering about the disk cache.

-

Examples of usage can be found in the font related code.

---ldx]]-- containers = containers or { } local containers = containers @@ -14670,11 +15415,7 @@ local trace_locating = false trackers.register("resolvers.locating", function(v local report_zip = logs.reporter("resolvers","zip") --- zip:///oeps.zip?name=bla/bla.tex --- zip:///oeps.zip?tree=tex/texmf-local --- zip:///texmf.zip?tree=/tex/texmf --- zip:///texmf.zip?tree=/tex/texmf-local --- zip:///texmf-mine.zip?tree=/tex/texmf-projects + local resolvers = resolvers @@ -14999,7 +15740,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['data-crl'] = { +if not modules then modules = { } end modules ['data-sch'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -15007,60 +15748,199 @@ if not modules then modules = { } end modules ['data-crl'] = { license = "see context related readme files" } --- this one is replaced by data-sch.lua -- +local loadstring = loadstring +local gsub, concat, format = string.gsub, table.concat, string.format +local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders -local gsub = string.gsub +local trace_schemes = false trackers.register("resolvers.schemes",function(v) trace_schemes = v end) +local report_schemes = logs.reporter("resolvers","schemes") -local resolvers = resolvers +local http = require("socket.http") +local ltn12 = require("ltn12") -local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders +local resolvers = resolvers +local schemes = resolvers.schemes or { } +resolvers.schemes = schemes + +local cleaners = { } +schemes.cleaners = cleaners + +local threshold = 24 * 60 * 60 + +directives.register("schemes.threshold", function(v) threshold = tonumber(v) or threshold end) + +function cleaners.none(specification) + return specification.original +end + +function cleaners.strip(specification) + return (gsub(specification.original,"[^%a%d%.]+","-")) -- so we keep periods +end + +function cleaners.md5(specification) + return file.addsuffix(md5.hex(specification.original),file.suffix(specification.path)) +end + +local cleaner = cleaners.strip + +directives.register("schemes.cleanmethod", function(v) cleaner = cleaners[v] or cleaners.strip end) + +function resolvers.schemes.cleanname(specification) + local hash = cleaner(specification) + if trace_schemes then + report_schemes("hashing %s to %s",specification.original,hash) + end + return hash +end -resolvers.curl = resolvers.curl or { } -local curl = resolvers.curl +local cached, loaded, reused, thresholds, handlers = { }, { }, { }, { }, { } -local cached = { } +local function runcurl(name,cachename) -- we use sockets instead or the curl library when possible + local command = "curl --silent --create-dirs --output " .. cachename .. " " .. name + os.spawn(command) +end -local function runcurl(specification) +local function fetch(specification) local original = specification.original - -- local scheme = specification.scheme - local cleanname = gsub(original,"[^%a%d%.]+","-") - local cachename = caches.setfirstwritablefile(cleanname,"curl") + local scheme = specification.scheme + local cleanname = schemes.cleanname(specification) + local cachename = caches.setfirstwritablefile(cleanname,"schemes") if not cached[original] then - if not io.exists(cachename) then + statistics.starttiming(schemes) + if not io.exists(cachename) or (os.difftime(os.time(),lfs.attributes(cachename).modification) > (thresholds[protocol] or threshold)) then cached[original] = cachename - local command = "curl --silent --create-dirs --output " .. cachename .. " " .. original - os.spawn(command) + local handler = handlers[scheme] + if handler then + if trace_schemes then + report_schemes("fetching '%s', protocol '%s', method 'built-in'",original,scheme) + end + logs.flush() + handler(specification,cachename) + else + if trace_schemes then + report_schemes("fetching '%s', protocol '%s', method 'curl'",original,scheme) + end + logs.flush() + runcurl(original,cachename) + end end if io.exists(cachename) then cached[original] = cachename + if trace_schemes then + report_schemes("using cached '%s', protocol '%s', cachename '%s'",original,scheme,cachename) + end else cached[original] = "" + if trace_schemes then + report_schemes("using missing '%s', protocol '%s'",original,scheme) + end end + loaded[scheme] = loaded[scheme] + 1 + statistics.stoptiming(schemes) + else + if trace_schemes then + report_schemes("reusing '%s', protocol '%s'",original,scheme) + end + reused[scheme] = reused[scheme] + 1 end return cached[original] end --- old code: we could be cleaner using specification (see schemes) - local function finder(specification,filetype) - return resolvers.methodhandler("finders",runcurl(specification),filetype) + return resolvers.methodhandler("finders",fetch(specification),filetype) end local opener = openers.file local loader = loaders.file -local function install(scheme) - finders[scheme] = finder - openers[scheme] = opener - loaders[scheme] = loader +local function install(scheme,handler,newthreshold) + handlers [scheme] = handler + loaded [scheme] = 0 + reused [scheme] = 0 + finders [scheme] = finder + openers [scheme] = opener + loaders [scheme] = loader + thresholds[scheme] = newthreshold or threshold end -resolvers.curl.install = install +schemes.install = install + +local function http_handler(specification,cachename) + local tempname = cachename .. ".tmp" + local f = io.open(tempname,"wb") + local status, message = http.request { + url = specification.original, + sink = ltn12.sink.file(f) + } + if not status then + os.remove(tempname) + else + os.remove(cachename) + os.rename(tempname,cachename) + end + return cachename +end -install('http') -install('https') +install('http',http_handler) +install('https') -- see pod install('ftp') +statistics.register("scheme handling time", function() + local l, r, nl, nr = { }, { }, 0, 0 + for k, v in table.sortedhash(loaded) do + if v > 0 then + nl = nl + 1 + l[nl] = k .. ":" .. v + end + end + for k, v in table.sortedhash(reused) do + if v > 0 then + nr = nr + 1 + r[nr] = k .. ":" .. v + end + end + local n = nl + nr + if n > 0 then + l = nl > 0 and concat(l) or "none" + r = nr > 0 and concat(r) or "none" + return format("%s seconds, %s processed, threshold %s seconds, loaded: %s, reused: %s", + statistics.elapsedtime(schemes), n, threshold, l, r) + else + return nil + end +end) + +-- We provide a few more helpers: + +----- http = require("socket.http") +local httprequest = http.request +local toquery = url.toquery + +-- local function httprequest(url) +-- return os.resultof(format("curl --silent %q", url)) +-- end + +local function fetchstring(url,data) + local q = data and toquery(data) + if q then + url = url .. "?" .. q + end + local reply = httprequest(url) + return reply -- just one argument +end + +schemes.fetchstring = fetchstring + +function schemes.fetchtable(url,data) + local reply = fetchstring(url,data) + if reply then + local s = loadstring("return " .. reply) + if s then + return s() + end + end +end + end -- of closure @@ -15074,170 +15954,199 @@ if not modules then modules = { } end modules ['data-lua'] = { license = "see context related readme files" } --- some loading stuff ... we might move this one to slot 2 depending --- on the developments (the loaders must not trigger kpse); we could --- of course use a more extensive lib path spec +-- We overload the regular loader. We do so because we operate mostly in +-- tds and use our own loader code. Alternatively we could use a more +-- extensive definition of package.path and package.cpath but even then +-- we're not done. Also, we now have better tracing. +-- +-- -- local mylib = require("libtest") +-- -- local mysql = require("luasql.mysql") -local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) +local concat = table.concat + +local trace_libraries = false + +trackers.register("resolvers.libraries", function(v) trace_libraries = v end) +trackers.register("resolvers.locating", function(v) trace_libraries = v end) local report_libraries = logs.reporter("resolvers","libraries") local gsub, insert = string.gsub, table.insert +local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match local unpack = unpack or table.unpack +local is_readable = file.is_readable local resolvers, package = resolvers, package -local libformats = { 'luatexlibs', 'tex', 'texmfscripts', 'othertextfiles' } -- 'luainputs' -local clibformats = { 'lib' } - -local _path_, libpaths, _cpath_, clibpaths - -function package.libpaths() - if not _path_ or package.path ~= _path_ then - _path_ = package.path - libpaths = file.splitpath(_path_,";") +local libsuffixes = { 'tex', 'lua' } +local clibsuffixes = { 'lib' } +local libformats = { 'TEXINPUTS', 'LUAINPUTS' } +local clibformats = { 'CLUAINPUTS' } + +local libpaths = nil +local clibpaths = nil +local libhash = { } +local clibhash = { } +local libextras = { } +local clibextras = { } + +local pattern = Cs(P("!")^0 / "" * (P("/") * P(-1) / "/" + P("/")^1 / "/" + 1)^0) + +local function cleanpath(path) --hm, don't we have a helper for this? + return resolvers.resolve(lpegmatch(pattern,path)) +end + +local function getlibpaths() + if not libpaths then + libpaths = { } + for i=1,#libformats do + local paths = resolvers.expandedpathlistfromvariable(libformats[i]) + for i=1,#paths do + local path = cleanpath(paths[i]) + if not libhash[path] then + libpaths[#libpaths+1] = path + libhash[path] = true + end + end + end end return libpaths end -function package.clibpaths() - if not _cpath_ or package.cpath ~= _cpath_ then - _cpath_ = package.cpath - clibpaths = file.splitpath(_cpath_,";") +local function getclibpaths() + if not clibpaths then + clibpaths = { } + for i=1,#clibformats do + local paths = resolvers.expandedpathlistfromvariable(clibformats[i]) + for i=1,#paths do + local path = cleanpath(paths[i]) + if not clibhash[path] then + clibpaths[#clibpaths+1] = path + clibhash[path] = true + end + end + end end return clibpaths end -local function thepath(...) - local t = { ... } t[#t+1] = "?.lua" - local path = file.join(unpack(t)) - if trace_locating then - report_libraries("! appending '%s' to 'package.path'",path) +package.libpaths = getlibpaths +package.clibpaths = getclibpaths + +function package.extralibpath(...) + local paths = { ... } + for i=1,#paths do + local path = cleanpath(paths[i]) + if not libhash[path] then + if trace_libraries then + report_libraries("! extra lua path '%s'",path) + end + libextras[#libextras+1] = path + libpaths[#libpaths +1] = path + end end - return path end -local p_libpaths, a_libpaths = { }, { } - -function package.appendtolibpath(...) - insert(a_libpath,thepath(...)) +function package.extraclibpath(...) + local paths = { ... } + for i=1,#paths do + local path = cleanpath(paths[i]) + if not clibhash[path] then + if trace_libraries then + report_libraries("! extra lib path '%s'",path) + end + clibextras[#clibextras+1] = path + clibpaths[#clibpaths +1] = path + end + end end -function package.prependtolibpath(...) - insert(p_libpaths,1,thepath(...)) +if not package.loaders[-2] then + -- use package-path and package-cpath + package.loaders[-2] = package.loaders[2] end --- beware, we need to return a loadfile result ! +local function loadedaslib(resolved,rawname) + return package.loadlib(resolved,"luaopen_" .. gsub(rawname,"%.","_")) +end -local function loaded(libpaths,name,simple) - for i=1,#libpaths do -- package.path, might become option - local libpath = libpaths[i] - local resolved = gsub(libpath,"%?",simple) - if trace_locating then -- more detail - report_libraries("! checking for '%s' on 'package.path': '%s' => '%s'",simple,libpath,resolved) - end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'package.path': '%s'",name,resolved) - end - return loadfile(resolved) - end +local function loadedbylua(name) + if trace_libraries then + report_libraries("! locating %q using normal loader",name) end + local resolved = package.loaders[-2](name) end -package.loaders[2] = function(name) -- was [#package.loaders+1] - if file.suffix(name) == "" then - name = file.addsuffix(name,"lua") -- maybe a list - if trace_locating then -- mode detail - report_libraries("! locating '%s' with forced suffix",name) - end - else - if trace_locating then -- mode detail - report_libraries("! locating '%s'",name) - end +local function loadedbyformat(name,rawname,suffixes,islib) + if trace_libraries then + report_libraries("! locating %q as %q using formats %q",rawname,name,concat(suffixes)) end - for i=1,#libformats do - local format = libformats[i] + for i=1,#suffixes do -- so we use findfile and not a lookup loop + local format = suffixes[i] local resolved = resolvers.findfile(name,format) or "" - if trace_locating then -- mode detail - report_libraries("! checking for '%s' using 'libformat path': '%s'",name,format) + if trace_libraries then + report_libraries("! checking for %q' using format %q",name,format) end if resolved ~= "" then - if trace_locating then - report_libraries("! lib '%s' located via environment: '%s'",name,resolved) + if trace_libraries then + report_libraries("! lib %q located on %q",name,resolved) end - return loadfile(resolved) - end - end - -- libpaths - local libpaths, clibpaths = package.libpaths(), package.clibpaths() - local simple = gsub(name,"%.lua$","") - local simple = gsub(simple,"%.","/") - local resolved = loaded(p_libpaths,name,simple) or loaded(libpaths,name,simple) or loaded(a_libpaths,name,simple) - if resolved then - return resolved - end - -- - local libname = file.addsuffix(simple,os.libsuffix) - for i=1,#clibformats do - -- better have a dedicated loop - local format = clibformats[i] - local paths = resolvers.expandedpathlistfromvariable(format) - for p=1,#paths do - local path = paths[p] - local resolved = file.join(path,libname) - if trace_locating then -- mode detail - report_libraries("! checking for '%s' using 'clibformat path': '%s'",libname,path) - end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'clibformat': '%s'",libname,resolved) - end - return package.loadlib(resolved,name) + if islib then + return loadedaslib(resolved,rawname) + else + return loadfile(resolved) end end end - for i=1,#clibpaths do -- package.path, might become option - local libpath = clibpaths[i] - local resolved = gsub(libpath,"?",simple) - if trace_locating then -- more detail - report_libraries("! checking for '%s' on 'package.cpath': '%s'",simple,libpath) +end + +local function loadedbypath(name,rawname,paths,islib,what) + if trace_libraries then + report_libraries("! locating %q as %q on %q paths",rawname,name,what) + end + for p=1,#paths do + local path = paths[p] + local resolved = file.join(path,name) + if trace_libraries then -- mode detail + report_libraries("! checking for %q using %q path %q",name,what,path) end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'package.cpath': '%s'",name,resolved) + if is_readable(resolved) then + if trace_libraries then + report_libraries("! lib %q located on %q",name,resolved) + end + if islib then + return loadedaslib(resolved,rawname) + else + return loadfile(resolved) end - return package.loadlib(resolved,name) - end - end - -- just in case the distribution is messed up - if trace_loading then -- more detail - report_libraries("! checking for '%s' using 'luatexlibs': '%s'",name) - end - local resolved = resolvers.findfile(file.basename(name),'luatexlibs') or "" - if resolved ~= "" then - if trace_locating then - report_libraries("! lib '%s' located by basename via environment: '%s'",name,resolved) end - return loadfile(resolved) end - if trace_locating then - report_libraries('? unable to locate lib: %s',name) - end --- return "unable to locate " .. name end -resolvers.loadlualib = require - --- -- -- -- +local function notloaded(name) + if trace_libraries then + report_libraries("? unable to locate library %q",name) + end +end -package.obsolete = package.obsolete or { } +package.loaders[2] = function(name) + local thename = gsub(name,"%.","/") + local luaname = file.addsuffix(thename,"lua") + local libname = file.addsuffix(thename,os.libsuffix) + return + loadedbyformat(luaname,name,libsuffixes, false) + or loadedbyformat(libname,name,clibsuffixes, true) + or loadedbypath (luaname,name,getlibpaths (),false,"lua") + or loadedbypath (luaname,name,getclibpaths(),false,"lua") + or loadedbypath (libname,name,getclibpaths(),true, "lib") + or loadedbylua (name) + or notloaded (name) +end -package.append_libpath = appendtolibpath -- will become obsolete -package.prepend_libpath = prependtolibpath -- will become obsolete +-- package.loaders[3] = nil +-- package.loaders[4] = nil -package.obsolete.append_libpath = appendtolibpath -- will become obsolete -package.obsolete.prepend_libpath = prependtolibpath -- will become obsolete +resolvers.loadlualib = require end -- of closure @@ -15707,7 +16616,6 @@ function environment.make_format(name) end function environment.run_format(name,data,more) - -- hm, rather old code here; we can now use the file.whatever functions if name and name ~= "" then local barename = file.removesuffix(name) local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats") @@ -15736,6 +16644,129 @@ function environment.run_format(name,data,more) end +end -- of closure + +do -- create closure to overcome 200 locals limit + +if not modules then modules = { } end modules ['util-tpl'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- This is experimental code. Coming from dos and windows, I've always used %whatever% +-- as template variables so let's stick to it. After all, it's easy to parse and stands +-- out well. A double %% is turned into a regular %. + +utilities.templates = utilities.templates or { } +local templates = utilities.templates + +local trace_template = false trackers.register("templates.trace",function(v) trace_template = v end) +local report_template = logs.reporter("template") + +local format = string.format +local P, C, Cs, Carg, lpegmatch = lpeg.P, lpeg.C, lpeg.Cs, lpeg.Carg, lpeg.match + +-- todo: make installable template.new + +local replacer + +local function replacekey(k,t,recursive) + local v = t[k] + if not v then + if trace_template then + report_template("unknown key %q",k) + end + return "" + else + if trace_template then + report_template("setting key %q to value %q",k,v) + end + if recursive then + return lpegmatch(replacer,v,1,t) + else + return v + end + end +end + +local sqlescape = lpeg.replacer { + { "'", "''" }, + { "\\", "\\\\" }, + { "\r\n", "\\n" }, + { "\r", "\\n" }, + -- { "\t", "\\t" }, +} + +local escapers = { + lua = function(s) + return format("%q",s) + end, + sql = function(s) + return lpegmatch(sqlescape,s) + end, +} + +local function replacekeyunquoted(s,t,how,recurse) -- ".. \" " + local escaper = how and escapers[how] or escapers.lua + return escaper(replacekey(s,t,recurse)) +end + +local single = P("%") -- test %test% test : resolves test +local double = P("%%") -- test 10%% test : %% becomes % +local lquoted = P("%[") -- test %[test]" test : resolves test with escaped "'s +local rquoted = P("]%") -- + +local escape = double / '%%' +local nosingle = single / '' +local nodouble = double / '' +local nolquoted = lquoted / '' +local norquoted = rquoted / '' + +local key = nosingle * (C((1-nosingle)^1 * Carg(1) * Carg(2) * Carg(3))/replacekey) * nosingle +local unquoted = nolquoted * ((C((1 - norquoted)^1) * Carg(1) * Carg(2) * Carg(3))/replacekeyunquoted) * norquoted +local any = P(1) + + replacer = Cs((unquoted + escape + key + any)^0) + +local function replace(str,mapping,how,recurse) + if mapping then + return lpegmatch(replacer,str,1,mapping,how or "lua",recurse or false) or str + else + return str + end +end + +-- print(replace("test '%[x]%' test",{ x = [[a 'x'  a]] })) +-- print(replace("test '%[x]%' test",{ x = [[a 'x'  a]] },'sql')) + +templates.replace = replace + +function templates.load(filename,mapping,how,recurse) + local data = io.loaddata(filename) or "" + if mapping and next(mapping) then + return replace(data,mapping,how,recurse) + else + return data + end +end + +function templates.resolve(t,mapping,how,recurse) + if not mapping then + mapping = t + end + for k, v in next, t do + t[k] = replace(v,mapping,how,recurse) + end + return t +end + +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" })) +-- inspect(utilities.templates.resolve({ one = "%two%", two = "two", three = "%three%" })) + + end -- of closure -- end library merge @@ -15796,7 +16827,7 @@ own.libs = { -- order can be made better -- 'data-bin.lua', 'data-zip.lua', 'data-tre.lua', - 'data-crl.lua', + 'data-sch.lua', 'data-lua.lua', 'data-aux.lua', -- updater 'data-tmf.lua', @@ -15804,6 +16835,8 @@ own.libs = { -- order can be made better 'luat-sta.lua', 'luat-fmt.lua', + + 'util-tpl.lua', } -- We need this hack till luatex is fixed. @@ -15824,7 +16857,7 @@ own.path = gsub(match(own.name,"^(.+)[\\/].-$") or ".","\\","/") local ownpath, owntree = own.path, environment and environment.ownpath or own.path -own.list = { +own.list = { -- predictable paths '.', ownpath , ownpath .. "/../sources", -- HH's development path @@ -15848,7 +16881,7 @@ local function locate_libs() local filename = pth .. "/" .. lib local found = lfs.isfile(filename) if found then - package.path = package.path .. ";" .. pth .. "/?.lua" -- in case l-* does a require + package.path = package.path .. ";" .. pth .. "/?.lua" -- in case l-* does a require (probably obsolete) return pth end end @@ -15980,6 +17013,7 @@ local helpinfo = [[ --var-value report value of variable --find-file report file location --find-path report path of file +--show-package-path report package paths --pattern=str filter variables ]] @@ -16093,7 +17127,8 @@ function runners.execute_script(fullname,internal,nosplit) elseif state == 'skip' then return true elseif state == "run" then - local path, name, suffix, result = file.dirname(fullname), file.basename(fullname), file.extname(fullname), "" + local path, name, suffix = file.splitname(fullname) + local result = "" if path ~= "" then result = fullname elseif name then @@ -16104,7 +17139,7 @@ function runners.execute_script(fullname,internal,nosplit) name = gsub(name,"^script:","") if suffix == "" and runners.registered[name] and runners.registered[name][1] then name = runners.registered[name][1] - suffix = file.extname(name) + suffix = file.suffix(name) end if suffix == "" then -- loop over known suffixes @@ -16131,7 +17166,7 @@ function runners.execute_script(fullname,internal,nosplit) environment.ownscript = result dofile(result) else - local binary = runners.applications[file.extname(result)] + local binary = runners.applications[file.suffix(result)] result = string.quoted(string.unquoted(result)) -- if string.match(result,' ') and not string.match(result,"^\".*\"$") then -- result = '"' .. result .. '"' @@ -16324,7 +17359,7 @@ function resolvers.launch(str) -- maybe we also need to test on mtxrun.launcher.suffix environment -- variable or on windows consult the assoc and ftype vars and such local launchers = runners.launchers[os.platform] if launchers then - local suffix = file.extname(str) if suffix then + local suffix = file.suffix(str) if suffix then local runner = launchers[suffix] if runner then str = runner .. " " .. str end @@ -16383,7 +17418,7 @@ function runners.find_mtx_script(filename) end filename = file.addsuffix(filename,"lua") local basename = file.removesuffix(file.basename(filename)) - local suffix = file.extname(filename) + local suffix = file.suffix(filename) -- qualified path, raw name local fullname = file.is_qualified_path(filename) and io.exists(filename) and filename if fullname and fullname ~= "" then @@ -16438,7 +17473,7 @@ function runners.execute_ctx_script(filename,...) runners.register_arguments(...) local arguments = environment.arguments_after local fullname = runners.find_mtx_script(filename) or "" - if file.extname(fullname) == "cld" then + if file.suffix(fullname) == "cld" then -- handy in editors where we force --autopdf report("running cld script: %s",filename) table.insert(arguments,1,fullname) @@ -16546,6 +17581,21 @@ function runners.timed(action) statistics.timed(action) end +function runners.associate(filename) + os.launch(filename) +end + +function runners.gethelp(filename) + local url = environment.argument("url") + if url and url ~= "" then + local command = string.gsub(environment.argument("command") or "unknown","^%s*\\*(.-)%s*$","%1") + url = utilities.templates.replace(url,{ command = command }) + os.launch(url) + else + report("no --url given") + end +end + -- this is a bit dirty ... first we store the first filename and next we -- split the arguments so that we only see the ones meant for this script -- ... later we will use the second half @@ -16648,7 +17698,18 @@ else end -if e_argument("selfmerge") then +if e_argument("script") or e_argument("scripts") then + + -- run a script by loading it (using libs), pass args + + runners.loadbase() + if is_mkii_stub then + ok = runners.execute_script(filename,false,true) + else + ok = runners.execute_ctx_script(filename) + end + +elseif e_argument("selfmerge") then -- embed used libraries @@ -16671,23 +17732,25 @@ elseif e_argument("selfupdate") then trackers.enable("resolvers.locating") resolvers.updatescript(own.name,"mtxrun") -elseif e_argument("ctxlua") or e_argument("internal") then +elseif e_argument("show-package-path") or e_argument("show-package-paths") then - -- run a script by loading it (using libs) + local l = package.libpaths() + local c = package.clibpaths() - runners.loadbase() - ok = runners.execute_script(filename,true) + for i=1,#l do + report("package lib path %s: %s",i,l[i]) + end -elseif e_argument("script") or e_argument("scripts") then + for i=1,#c do + report("package clib path %s: %s",i,c[i]) + end - -- run a script by loading it (using libs), pass args +elseif e_argument("ctxlua") or e_argument("internal") then + + -- run a script by loading it (using libs) runners.loadbase() - if is_mkii_stub then - ok = runners.execute_script(filename,false,true) - else - ok = runners.execute_ctx_script(filename) - end + ok = runners.execute_script(filename,true) elseif e_argument("execute") then @@ -16715,6 +17778,14 @@ elseif e_argument("launch") then runners.loadbase() runners.launch_file(filename) +elseif e_argument("associate") then + + runners.associate(filename) + +elseif e_argument("gethelp") then + + runners.gethelp() + elseif e_argument("makestubs") then -- make stubs (depricated) @@ -16806,7 +17877,7 @@ elseif e_argument("find-path") then elseif e_argument("expand-braces") then - -- luatools: runners.execute_ctx_script("mtx-base","--expand-braces",filename + -- luatools: runners.execute_ctx_script("mtx-base","--expand-braces",filename) resolvers.load("nofiles") runners.register_arguments(filename) diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index 108f2a8a1..e6bbbe2b5 100644 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -8,6 +8,11 @@ if not modules then modules = { } end modules ['mtxrun'] = { license = "see context related readme files" } +-- if not lpeg then require("lpeg") end +-- if not md5 then require("md5") end +-- if not lfs then require("lfs") end +-- if not texconfig then texconfig = { } end + -- one can make a stub: -- -- #!/bin/sh @@ -150,11 +155,28 @@ function string.topattern(str,lowercase,strict) end end + +function string.valid(str,default) + return (type(str) == "string" and str ~= "" and str) or default or nil +end + -- obsolete names: string.quote = string.quoted string.unquote = string.unquoted +-- handy fallback + +string.itself = function(s) return s end + +-- also handy (see utf variant) + +local pattern = Ct(C(1)^0) + +function string.totable(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -168,7 +190,8 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs, table, string = type, next, tostring, tonumber, ipairs, table, string +local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match local getmetatable, setmetatable = getmetatable, setmetatable @@ -179,6 +202,8 @@ local getinfo = debug.getinfo -- impact on ConTeXt was not that large; the remaining ipairs already -- have been replaced. In a similar fashion we also hardly used pairs. -- +-- Hm, actually ipairs was retained, but we no longer use it anyway. +-- -- Just in case, we provide the fallbacks as discussed in Programming -- in Lua (http://www.lua.org/pil/7.3.html): @@ -238,12 +263,16 @@ function table.strip(tab) end function table.keys(t) - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return keys end local function compare(a,b) @@ -256,41 +285,49 @@ local function compare(a,b) end local function sortedkeys(tab) - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - category = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if category == 0 or category == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys @@ -315,7 +352,7 @@ end table.sortedhash = sortedhash table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list) local n = #t for i=1,#list do n = n + 1 @@ -550,12 +587,26 @@ local function do_serialize(root,name,depth,level,indexed) end -- we could check for k (index) being number (cardinal) if root and next(root) then - local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- if compact then + -- -- NOT: for k=1,#root do (we need to quit at nil) + -- for k,v in ipairs(root) do -- can we use next? + -- if not first then first = k end + -- last = last + 1 + -- end + -- end + local first, last = nil, 0 if compact then - -- NOT: for k=1,#root do (we need to quit at nil) - for k,v in ipairs(root) do -- can we use next? - if not first then first = k end - last = last + 1 + last = #root + for k=1,last do +-- if not root[k] then + if root[k] == nil then + last = k - 1 + break + end + end + if last > 0 then + first = 1 end end local sk = sortedkeys(root) @@ -1027,23 +1078,27 @@ function table.reversed(t) end end -function table.sequenced(t,sep,simple) -- hash only - local s, n = { }, 0 - for k, v in sortedhash(t) do - if simple then - if v == true then - n = n + 1 - s[n] = k - elseif v and v~= "" then +function table.sequenced(t,sep) -- hash only + if t then + local s, n = { }, 0 + for k, v in sortedhash(t) do + if simple then + if v == true then + n = n + 1 + s[n] = k + elseif v and v~= "" then + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + else n = n + 1 s[n] = k .. "=" .. tostring(v) end - else - n = n + 1 - s[n] = k .. "=" .. tostring(v) end + return concat(s, sep or " | ") + else + return "" end - return concat(s, sep or " | ") end function table.print(t,...) @@ -1124,6 +1179,8 @@ local lpeg = require("lpeg") -- tracing (only used when we encounter a problem in integration of lpeg in luatex) +-- some code will move to unicode and string + local report = texio and texio.write_nl or print -- local lpmatch = lpeg.match @@ -1160,8 +1217,8 @@ local report = texio and texio.write_nl or print -- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type -local byte, char, gmatch = string.byte, string.char, string.gmatch +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -- Beware, we predefine a bunch of patterns here and one reason for doing so -- is that we get consistent behaviour in some of the visualizers. @@ -1169,9 +1226,8 @@ local byte, char, gmatch = string.byte, string.char, string.gmatch lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match -local Ct, C, Cs, Cc = lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc -local lpegtype = lpeg.type +local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp +local lpegtype, lpegmatch = lpeg.type, lpeg.match local utfcharacters = string.utfcharacters local utfgmatch = unicode and unicode.utf8.gmatch @@ -1222,6 +1278,10 @@ patterns.utf8char = utf8char patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer + patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 @@ -1241,16 +1301,16 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") -patterns.commaspacer = P(",") * patterns.spacer^0 +patterns.commaspacer = P(",") * spacer^0 patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") @@ -1265,6 +1325,10 @@ patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble patterns.unspacer = ((patterns.spacer^1)/"")^0 +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted + patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) @@ -1275,8 +1339,17 @@ patterns.beginline = #(1-newline) -- print(string.unquoted('"test"')) -- print(string.unquoted('"test"')) -function lpeg.anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } -- why so complex? +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } +end + +lpeg.anywhere = anywhere + +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false + end end function lpeg.splitter(pattern, action) @@ -1325,7 +1398,7 @@ function string.splitup(str,separator) if not separator then separator = "," end - return match(splitters_m[separator] or splitat(separator),str) + return lpegmatch(splitters_m[separator] or splitat(separator),str) end @@ -1337,16 +1410,20 @@ function lpeg.split(separator,str) c = tsplitat(separator) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.split(str,separator) - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c + end + return lpegmatch(c,str) + else + return { str } end - return match(c,str) end local spacing = patterns.spacer^0 * newline -- sort of strip @@ -1362,7 +1439,7 @@ local linesplitter = tsplitat(newline) patterns.linesplitter = linesplitter function string.splitlines(str) - return match(linesplitter,str) + return lpegmatch(linesplitter,str) end local utflinesplitter = utfbom^-1 * tsplitat(newline) @@ -1370,7 +1447,58 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str or "") + return lpegmatch(utflinesplitter,str or "") +end + +local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) +local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) + +function string.utfsplit(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +-- inspect(string.utfsplit("a b c d")) +-- inspect(string.utfsplit("a b c d",true)) + +-- -- alternative 1: 0.77 +-- +-- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) +-- +-- function string.utflength(str) +-- return #lpegmatch(utfcharcounter,str or "") +-- end +-- +-- -- alternative 2: 1.70 +-- +-- local n = 0 +-- +-- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow +-- +-- function string.utflength(str) +-- n = 0 +-- lpegmatch(utfcharcounter,str or "") +-- return n +-- end +-- +-- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + +local n = 0 + +local utfcharcounter = utfbom^-1 * Cs ( ( + Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end +)^0 ) + +function string.utflength(str) + n = 0 + lpegmatch(utfcharcounter,str or "") + return n end @@ -1384,7 +1512,7 @@ function lpeg.checkedsplit(separator,str) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.checkedsplit(str,separator) @@ -1395,7 +1523,7 @@ function string.checkedsplit(str,separator) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end @@ -1440,11 +1568,11 @@ function lpeg.keeper(str) end function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(P(1)^0) + return (P(str) + P(true)) * Cs(anything^0) end function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * P(-1))^0) + return Cs((1 - P(str) * endofstring)^0) end -- Just for fun I looked at the used bytecode and @@ -1453,8 +1581,22 @@ end function lpeg.replacer(one,two) if type(one) == "table" then local no = #one - if no > 0 then - local p + local p + if no == 0 then + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end + return Cs((p + 1)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + return Cs(((1-one)^1 + one/two)^0) + else for i=1,no do local o = one[i] local pp = P(o[1]) / o[2] @@ -1467,11 +1609,16 @@ function lpeg.replacer(one,two) return Cs((p + 1)^0) end else + one = P(one) two = two or "" - return Cs((P(one)/two + 1)^0) + return Cs(((1-one)^1 + one/two)^0) end end +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + local splitters_f, splitters_s = { }, { } function lpeg.firstofsplit(separator) -- always return value @@ -1506,7 +1653,7 @@ local nany = utf8char/"" function lpeg.counter(pattern) pattern = Cs((P(pattern)/" " + nany)^0) return function(str) - return #match(pattern,str) + return #lpegmatch(pattern,str) end end @@ -1520,7 +1667,7 @@ if utfgmatch then end return n else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1535,9 +1682,9 @@ else p = Cs((P(what)/" " + nany)^0) cache[p] = p end - return #match(p,str) + return #lpegmatch(p,str) else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1564,7 +1711,7 @@ local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) function string.escapedpattern(str,simple) - return match(simple and s or p,str) + return lpegmatch(simple and s or p,str) end -- utf extensies @@ -1611,7 +1758,7 @@ else p = P(uc) end end - match((utf8char/f)^0,str) + lpegmatch((utf8char/f)^0,str) return p end @@ -1627,7 +1774,7 @@ function lpeg.UR(str,more) first = str last = more or first else - first, last = match(range,str) + first, last = lpegmatch(range,str) if not last then return P(str) end @@ -1654,11 +1801,15 @@ end -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order if type(list) ~= "table" then list = { list, ... } end - -- sort(list) -- longest match first + -- table.sort(list) -- longest match first local p = P(list[1]) for l=2,#list do p = p + P(list[l]) @@ -1666,10 +1817,6 @@ function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") return p end -function lpeg.is_lpeg(p) - return p and lpegtype(p) == "pattern" -end - -- For the moment here, but it might move to utilities. Beware, we need to -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we -- loop back from the end cq. prepend. @@ -1827,6 +1974,24 @@ end -- utfchar(0x205F), -- math thinspace -- } ) +-- handy from within tex: + +local lpegmatch = lpeg.match + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -1851,14 +2016,14 @@ else io.fileseparator, io.pathseparator = "/" , ":" end -function io.loaddata(filename,textmode) +function io.loaddata(filename,textmode) -- return nil if empty local f = io.open(filename,(textmode and 'r') or 'rb') if f then local data = f:read('*all') f:close() - return data - else - return nil + if #data > 0 then + return data + end end end @@ -1880,6 +2045,45 @@ function io.savedata(filename,data,joiner) end end +function io.loadlines(filename,n) -- return nil if empty + local f = io.open(filename,'r') + if f then + if n then + local lines = { } + for i=1,n do + local line = f:read("*lines") + if line then + lines[#lines+1] = line + else + break + end + end + f:close() + lines = concat(lines,"\n") + if #lines > 0 then + return lines + end + else + local line = f:read("*line") or "" + assert(f:close()) + if #line > 0 then + return line + end + end + end +end + +function io.loadchunk(filename,n) + local f = io.open(filename,'rb') + if f then + local data = f:read(n or 1024) + f:close() + if #data > 0 then + return data + end + end +end + function io.exists(filename) local f = io.open(filename) if f == nil then @@ -2107,7 +2311,7 @@ if not modules then modules = { } end modules ['l-number'] = { -- this module will be replaced when we have the bit library -local tostring = tostring +local tostring, tonumber = tostring, tonumber local format, floor, match, rep = string.format, math.floor, string.match, string.rep local concat, insert = table.concat, table.insert local lpegmatch = lpeg.match @@ -2170,11 +2374,11 @@ function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... end function number.setbit(x, p) - return hasbit(x, p) and x or x + p + return (x % (p + p) >= p) and x or x + p end function number.clearbit(x, p) - return hasbit(x, p) and x - p or x + return (x % (p + p) >= p) and x - p or x end @@ -2208,6 +2412,10 @@ function number.tobitstring(n,m) end +function number.valid(str,default) + return tonumber(str) or default or nil +end + end -- of closure @@ -2319,17 +2527,28 @@ if not modules then modules = { } end modules ['l-os'] = { -- os.name : windows | msdos | linux | macosx | solaris | .. | generic (new) -- os.platform : extended os.name with architecture +-- os.sleep() => socket.sleep() +-- math.randomseed(tonumber(string.sub(string.reverse(tostring(math.floor(socket.gettime()*10000))),1,6))) + -- maybe build io.flush in os.execute local os = os +local date, time = os.date, os.time local find, format, gsub, upper, gmatch = string.find, string.format, string.gsub, string.upper, string.gmatch local concat = table.concat -local random, ceil = math.random, math.ceil -local rawget, rawset, type, getmetatable, setmetatable, tonumber = rawget, rawset, type, getmetatable, setmetatable, tonumber +local random, ceil, randomseed = math.random, math.ceil, math.randomseed +local rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring = rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring -- The following code permits traversing the environment table, at least -- in luatex. Internally all environment names are uppercase. +-- The randomseed in Lua is not that random, although this depends on the operating system as well +-- as the binary (Luatex is normally okay). But to be sure we set the seed anyway. + +math.initialseed = tonumber(string.sub(string.reverse(tostring(ceil(socket and socket.gettime()*10000 or time()))),1,6)) + +randomseed(math.initialseed) + if not os.__getenv__ then os.__getenv__ = os.getenv @@ -2433,12 +2652,14 @@ else os.libsuffix, os.binsuffix, os.binsuffixes = 'so', '', { '' } end +local launchers = { + windows = "start %s", + macosx = "open %s", + unix = "$BROWSER %s &> /dev/null &", +} + function os.launch(str) - if os.type == "windows" then - os.execute("start " .. str) -- os.spawn ? - else - os.execute(str .. " &") -- os.spawn ? - end + os.execute(format(launchers[os.name] or launchers.unix,str)) end if not os.times then @@ -2649,7 +2870,7 @@ end local d function os.timezone(delta) - d = d or tonumber(tonumber(os.date("%H")-os.date("!%H"))) + d = d or tonumber(tonumber(date("%H")-date("!%H"))) if delta then if d > 0 then return format("+%02i:00",d) @@ -2661,6 +2882,44 @@ function os.timezone(delta) end end +local timeformat = format("%%s%s",os.timezone(true)) +local dateformat = "!%Y-%m-%d %H:%M:%S" + +function os.fulltime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return format(timeformat,date(dateformat,t)) +end + +local dateformat = "%Y-%m-%d %H:%M:%S" + +function os.localtime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return date(dateformat,t) +end + +function os.converttime(t,default) + local t = tonumber(t) + if t and t > 0 then + return date(dateformat,t) + else + return default or "-" + end +end + local memory = { } local function which(filename) @@ -2735,7 +2994,7 @@ local function nameonly(name) return (gsub(match(name,"^.+[/\\](.-)$") or name,"%.[%a%d]+$","")) end -local function extname(name,default) +local function suffixonly(name,default) return match(name,"^.+%.([^/\\]-)$") or default or "" end @@ -2744,11 +3003,16 @@ local function splitname(name) return n or name, s or "" end -file.basename = basename -file.dirname = dirname -file.nameonly = nameonly -file.extname = extname -file.suffix = extname +file.basename = basename + +file.pathpart = dirname +file.dirname = dirname + +file.nameonly = nameonly + +file.suffixonly = suffixonly +file.extname = suffixonly -- obsolete +file.suffix = suffixonly function file.removesuffix(filename) return (gsub(filename,"%.[%a%d]+$","")) @@ -2864,6 +3128,11 @@ end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated +function file.size(name) + local a = attributes(name) + return a and a.size or 0 +end + -- todo: lpeg \\ / .. does not save much local checkedsplit = string.checkedsplit @@ -3001,6 +3270,7 @@ local drive = C(R("az","AZ")) * P(":") local path = C(((1-slash)^0 * slash)^0) local suffix = period * C(P(1-period)^0 * P(-1)) local base = C((1-suffix)^0) +local rest = C(P(1)^0) drive = drive + Cc("") path = path + Cc("") @@ -3009,7 +3279,8 @@ suffix = suffix + Cc("") local pattern_a = drive * path * base * suffix local pattern_b = path * base * suffix -local pattern_c = C(drive * path) * C(base * suffix) +local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures +local pattern_d = path * rest function file.splitname(str,splitdrive) if splitdrive then @@ -3019,6 +3290,10 @@ function file.splitname(str,splitdrive) end end +function file.splitbase(str) + return lpegmatch(pattern_d,str) -- returns path, base+suffix +end + function file.nametotable(str,splitdrive) -- returns table local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) if splitdrive then @@ -3040,6 +3315,8 @@ function file.nametotable(str,splitdrive) -- returns table end end +-- print(file.splitbase("a/b/c.txt")) + -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end -- -- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" } @@ -3081,15 +3358,30 @@ if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end -function file.needs_updating(oldname,newname,threshold) -- size modification access change - local oldtime = lfs.attributes(oldname, modification) - local newtime = lfs.attributes(newname, modification) - if newtime >= oldtime then - return false - elseif oldtime - newtime < (threshold or 1) then - return false +function file.needsupdating(oldname,newname,threshold) -- size modification access change + local oldtime = lfs.attributes(oldname,"modification") + if oldtime then + local newtime = lfs.attributes(newname,"modification") + if not newtime then + return true -- no new file, so no updating needed + elseif newtime >= oldtime then + return false -- new file definitely needs updating + elseif oldtime - newtime < (threshold or 1) then + return false -- new file is probably still okay + else + return true -- new file has to be updated + end else - return true + return false -- no old file, so no updating needed + end +end + +file.needs_updating = file.needsupdating + +function file.syncmtimes(oldname,newname) + local oldtime = lfs.attributes(oldname,"modification") + if oldtime and lfs.isfile(newname) then + lfs.touch(newname,oldtime,oldtime) end end @@ -3111,7 +3403,7 @@ function file.loadchecksum(name) return nil end -function file.savechecksum(name, checksum) +function file.savechecksum(name,checksum) if not checksum then checksum = file.checksum(name) end if checksum then io.savedata(name .. ".md5",checksum) @@ -3136,7 +3428,7 @@ if not modules then modules = { } end modules ['l-url'] = { local char, gmatch, gsub, format, byte, find = string.char, string.gmatch, string.gsub, string.format, string.byte, string.find local concat = table.concat local tonumber, type = tonumber, type -local P, C, R, S, Cs, Cc, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct +local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer -- from wikipedia: @@ -3169,15 +3461,19 @@ local endofstring = P(-1) local hexdigit = R("09","AF","af") local plus = P("+") local nothing = Cc("") -local escaped = (plus / " ") + (percent * C(hexdigit * hexdigit) / tochar) +local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar +local escaped = (plus / " ") + escapedchar -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) -- we also assume that when we have a scheme, we also have an authority +-- +-- maybe we should already split the query (better for unescaping as = & can be part of a value local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2) local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0) local pathstr = Cs((escaped+(1- qmark-hash))^0) -local querystr = Cs((escaped+(1- hash))^0) +----- querystr = Cs((escaped+(1- hash))^0) +local querystr = Cs(( (1- hash))^0) local fragmentstr = Cs((escaped+(1- endofstring))^0) local scheme = schemestr * colon + nothing @@ -3192,11 +3488,20 @@ local parser = Ct(validurl) lpegpatterns.url = validurl lpegpatterns.urlsplitter = parser -local escapes = { } ; for i=0,255 do escapes[i] = format("%%%02X",i) end +local escapes = { } -local escaper = Cs((R("09","AZ","az") + S("-./_") + P(1) / escapes)^0) +setmetatable(escapes, { __index = function(t,k) + local v = format("%%%02X",byte(k)) + t[k] = v + return v +end }) -lpegpatterns.urlescaper = escaper +local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most +local unescaper = Cs((escapedchar + 1)^0) + +lpegpatterns.urlunescaped = escapedchar +lpegpatterns.urlescaper = escaper +lpegpatterns.urlunescaper = unescaper -- todo: reconsider Ct as we can as well have five return values (saves a table) -- so we can have two parsers, one with and one without @@ -3208,8 +3513,12 @@ end local isscheme = schemestr * colon * slash * slash -- this test also assumes authority local function hasscheme(str) - local scheme = lpegmatch(isscheme,str) -- at least one character - return scheme ~= "" and scheme or false + if str then + local scheme = lpegmatch(isscheme,str) -- at least one character + return scheme ~= "" and scheme or false + else + return false + end end @@ -3228,10 +3537,32 @@ local rootbased = P("/") local barswapper = replacer("|",":") local backslashswapper = replacer("\\","/") +-- queries: + +local equal = P("=") +local amp = P("&") +local key = Cs(((escapedchar+1)-equal )^0) +local value = Cs(((escapedchar+1)-amp -endofstring)^0) + +local splitquery = Cf ( Ct("") * P { "sequence", + sequence = V("pair") * (amp * V("pair"))^0, + pair = Cg(key * equal * value), +}, rawset) + +-- hasher + local function hashed(str) -- not yet ok (/test?test) + if str == "" then + return { + scheme = "invalid", + original = str, + } + end local s = split(str) - local somescheme = s[1] ~= "" - local somequery = s[4] ~= "" + local rawscheme = s[1] + local rawquery = s[4] + local somescheme = rawscheme ~= "" + local somequery = rawquery ~= "" if not somescheme and not somequery then s = { scheme = "file", @@ -3247,14 +3578,17 @@ local function hashed(str) -- not yet ok (/test?test) local authority, path, filename = s[2], s[3] if authority == "" then filename = path + elseif path == "" then + filename = "" else filename = authority .. "/" .. path end s = { - scheme = s[1], + scheme = rawscheme, authority = authority, path = path, - query = s[4], + query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = + queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped fragment = s[5], original = str, noscheme = false, @@ -3264,6 +3598,8 @@ local function hashed(str) -- not yet ok (/test?test) return s end +-- inspect(hashed("template://test")) + -- Here we assume: -- -- files: /// = relative @@ -3306,23 +3642,65 @@ function url.construct(hash) -- dodo: we need to escape ! return lpegmatch(escaper,concat(fullurl)) end -function url.filename(filename) +function url.filename(filename) -- why no lpeg here ? local t = hashed(filename) return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename end +local function escapestring(str) + return lpegmatch(escaper,str) +end + +url.escape = escapestring + +-- function url.query(str) -- separator could be an option +-- if type(str) == "string" then +-- local t = { } +-- for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do +-- t[k] = v +-- end +-- return t +-- else +-- return str +-- end +-- end + function url.query(str) if type(str) == "string" then - local t = { } - for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do - t[k] = v - end - return t + return lpegmatch(splitquery,str) or "" else return str end end +function url.toquery(data) + local td = type(data) + if td == "string" then + return #str and escape(data) or nil -- beware of double escaping + elseif td == "table" then + if next(data) then + local t = { } + for k, v in next, data do + t[#t+1] = format("%s=%s",k,escapestring(v)) + end + return concat(t,"&") + end + else + -- nil is a signal that no query + end +end + +-- /test/ | /test | test/ | test => test + +function url.barepath(path) + if not path or path == "" then + return "" + else + return (gsub(path,"^/?(.-)/?$","%1")) + end +end + + @@ -3363,6 +3741,24 @@ local isdir = lfs.isdir local isfile = lfs.isfile local currentdir = lfs.currentdir +-- in case we load outside luatex + +if not isdir then + function isdir(name) + local a = attributes(name) + return a and a.mode == "directory" + end + lfs.isdir = isdir +end + +if not isfile then + function isfile(name) + local a = attributes(name) + return a and a.mode == "file" + end + lfs.isfile = isfile +end + -- handy function dir.current() @@ -3738,28 +4134,49 @@ function boolean.tonumber(b) end function toboolean(str,tolerant) - if tolerant then - local tstr = type(str) - if tstr == "string" then - return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t" - elseif tstr == "number" then - return tonumber(str) ~= 0 - elseif tstr == "nil" then - return false - else - return str - end + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true elseif str == "true" then return true elseif str == "false" then return false + elseif not tolerant then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true else - return str + return str == "yes" or str == "on" or str == "t" end end string.toboolean = toboolean +function string.booleanstring(str) + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true + elseif str == "true" then + return true + elseif str == "false" then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true + else + return str == "yes" or str == "on" or str == "t" + end +end + function string.is_boolean(str,default) if type(str) == "string" then if str == "true" or str == "yes" or str == "on" or str == "t" then @@ -3784,57 +4201,229 @@ if not modules then modules = { } end modules ['l-unicode'] = { license = "see context related readme files" } +-- this module will be reorganized + +-- todo: utf.sub replacement (used in syst-aux) + +local concat = table.concat +local type = type +local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local utftype = patterns.utftype +local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format +local utfsplitlines = string.utfsplitlines + if not unicode then - unicode = { utf8 = { } } + unicode = { } + +end + +local unicode = unicode + +utf = utf or unicode.utf8 + +if not utf then + + utf8 = { } + unicode.utf8 = utf8 + utf = utf8 + +end + +if not utf.char then local floor, char = math.floor, string.char - function unicode.utf8.utfchar(n) + function utf.char(n) if n < 0x80 then + -- 0aaaaaaa : 0x80 return char(n) elseif n < 0x800 then + -- 110bbbaa : 0xC0 : n >> 6 + -- 10aaaaaa : 0x80 : n & 0x3F return char( 0xC0 + floor(n/0x40), 0x80 + (n % 0x40) ) elseif n < 0x10000 then + -- 1110bbbb : 0xE0 : n >> 12 + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F return char( 0xE0 + floor(n/0x1000), 0x80 + (floor(n/0x40) % 0x40), 0x80 + (n % 0x40) ) - elseif n < 0x40000 then + elseif n < 0x200000 then + -- 11110ccc : 0xF0 : n >> 18 + -- 10ccbbbb : 0x80 : (n >> 12) & 0x3F + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F + -- dddd : ccccc - 1 return char( - 0xF0 + floor(n/0x40000), - 0x80 + floor(n/0x1000), + 0xF0 + floor(n/0x40000), + 0x80 + (floor(n/0x1000) % 0x40), 0x80 + (floor(n/0x40) % 0x40), 0x80 + (n % 0x40) ) else - -- return char( - -- 0xF1 + floor(n/0x1000000), - -- 0x80 + floor(n/0x40000), - -- 0x80 + floor(n/0x1000), - -- 0x80 + (floor(n/0x40) % 0x40), - -- 0x80 + (n % 0x40) - -- ) - return "?" + return "" end end end -local unicode = unicode +if not utf.byte then -utf = utf or unicode.utf8 + local utf8byte = patterns.utf8byte -local concat = table.concat -local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub -local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format -local type = type + function utf.byte(c) + return lpegmatch(utf8byte,c) + end -local utfsplitlines = string.utfsplitlines +end + +local utfchar, utfbyte = utf.char, utf.byte + +-- As we want to get rid of the (unmaintained) utf library we implement our own +-- variants (in due time an independent module): + +function unicode.filetype(data) + return data and lpegmatch(utftype,data) or "unknown" +end + +local toentities = Cs ( + ( + patterns.utf8one + + ( + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end + )^0 +) + +patterns.toentities = toentities + +function utf.toentities(str) + return lpegmatch(toentities,str) +end + + + + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +-- actually one of them is already utf ... sort of useless this one + +-- function utf.char(n) +-- if n < 0x80 then +-- return char(n) +-- elseif n < 0x800 then +-- return char( +-- 0xC0 + floor(n/0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x10000 then +-- return char( +-- 0xE0 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x40000 then +-- return char( +-- 0xF0 + floor(n/0x40000), +-- 0x80 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- else +-- -- return char( +-- -- 0xF1 + floor(n/0x1000000), +-- -- 0x80 + floor(n/0x40000), +-- -- 0x80 + floor(n/0x1000), +-- -- 0x80 + (floor(n/0x40) % 0x40), +-- -- 0x80 + (n % 0x40) +-- -- ) +-- return "?" +-- end +-- end +-- +-- merge into: + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + + +utf.length = string.utflength +utf.split = string.utfsplit +utf.splitines = string.utfsplitlines +utf.valid = string.validutf + +if not utf.len then + utf.len = utf.length +end + +-- a replacement for simple gsubs: + +local utf8char = patterns.utf8char + +function utf.remapper(mapping) + local pattern = Cs((utf8char/mapping)^0) + return function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end, pattern +end + +-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } +-- print(remap("abcd 1234 abcd")) -- 0 EF BB BF UTF-8 -- 1 FF FE UTF-16-little-endian @@ -4027,11 +4616,22 @@ local function big(c) end end +-- function unicode.utf8_to_utf16(str,littleendian) +-- if littleendian then +-- return char(255,254) .. utfgsub(str,".",little) +-- else +-- return char(254,255) .. utfgsub(str,".",big) +-- end +-- end + +local _, l_remap = utf.remapper(little) +local _, b_remap = utf.remapper(big) + function unicode.utf8_to_utf16(str,littleendian) if littleendian then - return char(255,254) .. utfgsub(str,".",little) + return char(255,254) .. lpegmatch(l_remap,str) else - return char(254,255) .. utfgsub(str,".",big) + return char(254,255) .. lpegmatch(b_remap,str) end end @@ -4052,84 +4652,12 @@ function unicode.xstring(s) return format("0x%05X",type(s) == "number" and s or utfbyte(s)) end +-- -local lpegmatch = lpeg.match -local patterns = lpeg.patterns -local utftype = patterns.utftype - -function unicode.filetype(data) - return data and lpegmatch(utftype,data) or "unknown" -end - -local toentities = lpeg.Cs ( - ( - patterns.utf8one - + ( - patterns.utf8two - + patterns.utf8three - + patterns.utf8four - ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end - )^0 -) - -patterns.toentities = toentities - -function utf.toentities(str) - return lpegmatch(toentities,str) -end - - - - -local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs - -local one = P(1) -local two = C(1) * C(1) -local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) - --- actually one of them is already utf ... sort of useless this one - -local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) - local ab = 0xFF * byte(a) + byte(b) - local cd = 0xFF * byte(c) + byte(d) - return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) - end - + two / function(a,b) - return utfchar(byte(a)*256 + byte(b)) - end - + one - )^1 ) - + P("\255\254") * Cs( ( - four / function(b,a,d,c) - local ab = 0xFF * byte(a) + byte(b) - local cd = 0xFF * byte(c) + byte(d) - return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) - end - + two / function(b,a) - return utfchar(byte(a)*256 + byte(b)) - end - + one - )^1 ) - -function string.toutf(s) - return lpegmatch(pattern,s) or s -- todo: utf32 -end - -local validatedutf = Cs ( - ( - patterns.utf8one - + patterns.utf8two - + patterns.utf8three - + patterns.utf8four - + P(1) / "�" - )^0 -) - -patterns.validatedutf = validatedutf +local pattern = Ct(C(patterns.utf8char)^0) -function string.validutf(str) - return lpegmatch(validatedutf,str) +function utf.totable(str) + return lpegmatch(pattern,str) end @@ -4189,10 +4717,11 @@ utilities = utilities or {} utilities.tables = utilities.tables or { } local tables = utilities.tables -local format, gmatch, rep = string.format, string.gmatch, string.rep +local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring -local type, next, rawset, tonumber = type, next, rawset, tonumber +local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring +local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs function tables.definetable(target) -- defines undefined tables local composed, t, n = nil, { }, 0 @@ -4346,6 +4875,121 @@ function tables.encapsulate(core,capsule,protect) end end +local function serialize(t,r,outer) -- no mixes + r[#r+1] = "{" + local n = #t + if n > 0 then + for i=1,n do + local v = t[i] + local tv = type(v) + if tv == "string" then + r[#r+1] = format("%q,",v) + elseif tv == "number" then + r[#r+1] = format("%s,",v) + elseif tv == "table" then + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("%s,",tostring(v)) + end + end + else + for k, v in next, t do + local tv = type(v) + if tv == "string" then + r[#r+1] = format("[%q]=%q,",k,v) + elseif tv == "number" then + r[#r+1] = format("[%q]=%s,",k,v) + elseif tv == "table" then + r[#r+1] = format("[%q]=",k) + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("[%q]=%s,",k,tostring(v)) + end + end + end + if outer then + r[#r+1] = "}" + else + r[#r+1] = "}," + end + return r +end + +function table.fastserialize(t,prefix) + return concat(serialize(t,{ prefix or "return" },true)) +end + +function table.deserialize(str) + if not str or str == "" then + return + end + local code = loadstring(str) + if not code then + return + end + code = code() + if not code then + return + end + return code +end + +-- inspect(table.fastserialize { a = 1, b = { 4, { 5, 6 } }, c = { d = 7, e = 'f"g\nh' } }) + +function table.load(filename) + if filename then + local t = io.loaddata(filename) + if t and t ~= "" then + t = loadstring(t) + if type(t) == "function" then + t = t() + if type(t) == "table" then + return t + end + end + end + end +end + +local function slowdrop(t) + local r = { } + local l = { } + for i=1,#t do + local ti = t[i] + local j = 0 + for k, v in next, ti do + j = j + 1 + l[j] = format("%s=%q",k,v) + end + r[i] = format(" {%s},\n",concat(l)) + end + return format("return {\n%s}",concat(r)) +end + +local function fastdrop(t) + local r = { "return {\n" } + for i=1,#t do + local ti = t[i] + r[#r+1] = " {" + for k, v in next, ti do + r[#r+1] = format("%s=%q",k,v) + end + r[#r+1] = "},\n" + end + r[#r+1] = "}" + return concat(r) +end + +function table.drop(t,slow) + if #t == 0 then + return "return { }" + elseif slow == true then + return slowdrop(t) -- less memory + else + return fastdrop(t) -- some 15% faster + end +end + end -- of closure @@ -4520,11 +5164,10 @@ local concat = table.concat local type, next = type, next utilities = utilities or {} -utilities.merger = utilities.merger or { } -- maybe mergers +local merger = utilities.merger or { } +utilities.merger = merger utilities.report = logs and logs.reporter("system") or print -local merger = utilities.merger - merger.strip_comment = true local m_begin_merge = "begin library merge" @@ -4570,9 +5213,11 @@ end local function self_save(name, data) if data ~= "" then if merger.strip_comment then - -- saves some 20K local n = #data + -- saves some 20K .. scite comments data = gsub(data,"%-%-~[^\n\r]*[\r\n]","") + -- saves some 20K .. ldx comments + data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","") utilities.report("merge: %s bytes of comment stripped, %s bytes of code left",n-#data,#data) end io.savedata(name,data) @@ -4653,36 +5298,208 @@ if not modules then modules = { } end modules ['util-lua'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + comment = "the strip code is written by Peter Cawley", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -utilities = utilities or {} -utilities.lua = utilities.lua or { } -utilities.report = logs and logs.reporter("system") or print +local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format +local loadstring, loadfile, type = loadstring, loadfile, type + +utilities = utilities or {} +utilities.lua = utilities.lua or { } +local luautilities = utilities.lua + +utilities.report = logs and logs.reporter("system") or print -- can be overloaded later + +local tracestripping = false +local forcestupidcompile = true -- use internal bytecode compiler +luautilities.stripcode = true -- support stripping when asked for +luautilities.alwaysstripcode = false -- saves 1 meg on 7 meg compressed format file (2012.08.12) +luautilities.nofstrippedchunks = 0 +luautilities.nofstrippedbytes = 0 + +-- The next function was posted by Peter Cawley on the lua list and strips line +-- number information etc. from the bytecode data blob. We only apply this trick +-- when we store data tables. Stripping makes the compressed format file about +-- 1MB smaller (and uncompressed we save at least 6MB). +-- +-- You can consider this feature an experiment, so it might disappear. There is +-- no noticeable gain in runtime although the memory footprint should be somewhat +-- smaller (and the file system has a bit less to deal with). +-- +-- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ... + +local function strip_code_pc(dump,name) + local before = #dump + local version, format, endian, int, size, ins, num = byte(dump,5,11) + local subint + if endian == 1 then + subint = function(dump, i, l) + local val = 0 + for n = l, 1, -1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + else + subint = function(dump, i, l) + local val = 0 + for n = 1, l, 1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + end + local strip_function + strip_function = function(dump) + local count, offset = subint(dump, 1, size) + local stripped, dirty = rep("\0", size), offset + count + offset = offset + count + int * 2 + 4 + offset = offset + int + subint(dump, offset, int) * ins + count, offset = subint(dump, offset, int) + for n = 1, count do + local t + t, offset = subint(dump, offset, 1) + if t == 1 then + offset = offset + 1 + elseif t == 4 then + offset = offset + size + subint(dump, offset, size) + elseif t == 3 then + offset = offset + num + end + end + count, offset = subint(dump, offset, int) + stripped = stripped .. sub(dump,dirty, offset - 1) + for n = 1, count do + local proto, off = strip_function(sub(dump,offset, -1)) + stripped, offset = stripped .. proto, offset + off - 1 + end + offset = offset + subint(dump, offset, int) * int + int + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + int * 2 + end + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + end + stripped = stripped .. rep("\0", int * 3) + return stripped, offset + end + dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) + local after = #dump + local delta = before-after + if tracestripping then + utilities.report("stripped bytecode: %s, before %s, after %s, delta %s",name or "unknown",before,after,delta) + end + luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 + luautilities.nofstrippedbytes = luautilities.nofstrippedbytes + delta + return dump, delta +end + +-- ... end of borrowed code. + +local function strippedbytecode(code,forcestrip,name) + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + return strip_code_pc(code,name) + else + return code, 0 + end +end + +luautilities.stripbytecode = strip_code_pc +luautilities.strippedbytecode = strippedbytecode + +local function fatalerror(name) + utilities.report(format("fatal error in %q",name or "unknown")) +end + +-- quite subtle ... doing this wrong incidentally can give more bytes + + +function luautilities.loadedluacode(fullname,forcestrip,name) + -- quite subtle ... doing this wrong incidentally can give more bytes + name = name or fullname + local code = loadfile(fullname) + if code then + code() + end + if forcestrip and luautilities.stripcode then + if type(forcestrip) == "function" then + forcestrip = forcestrip(fullname) + end + if forcestrip then + local code, n = strip_code_pc(dump(code,name)) + return loadstring(code), n + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end +end + +function luautilities.strippedloadstring(code,forcestrip,name) -- not executed + local n = 0 + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + code = loadstring(code) + if not code then + fatalerror(name) + end + code, n = strip_code_pc(dump(code),name) + end + return loadstring(code), n +end -local function stupidcompile(luafile,lucfile) - local data = io.loaddata(luafile) - if data and data ~= "" then - data = string.dump(data) - if data and data ~= "" then - io.savedata(lucfile,data) +local function stupidcompile(luafile,lucfile,strip) + local code = io.loaddata(luafile) + local n = 0 + if code and code ~= "" then + code = loadstring(code) + if not code then + fatalerror() + end + code = dump(code) + if strip then + code, n = strippedbytecode(code,true,luafile) -- last one is reported + end + if code and code ~= "" then + io.savedata(lucfile,code) end end + return n end -function utilities.lua.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true +local luac_normal = "texluac -o %q %q" +local luac_strip = "texluac -s -o %q %q" + +function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true utilities.report("lua: compiling %s into %s",luafile,lucfile) os.remove(lucfile) - local command = "-o " .. string.quoted(lucfile) .. " " .. string.quoted(luafile) + local done = false if strip ~= false then - command = "-s " .. command + strip = true + end + if forcestupidcompile then + fallback = true + elseif strip then + done = os.spawn(format(luac_strip, lucfile,luafile)) == 0 + else + done = os.spawn(format(luac_normal,lucfile,luafile)) == 0 end - local done = os.spawn("texluac " .. command) == 0 -- or os.spawn("luac " .. command) == 0 if not done and fallback then - utilities.report("lua: dumping %s into %s (unstripped)",luafile,lucfile) - stupidcompile(luafile,lucfile) -- maybe use the stripper we have elsewhere - cleanup = false -- better see how worse it is + local n = stupidcompile(luafile,lucfile,strip) + if n > 0 then + utilities.report("lua: %s dumped into %s (%i bytes stripped)",luafile,lucfile,n) + else + utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile) + end + cleanup = false -- better see how bad it is end if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then utilities.report("lua: removing %s",luafile) @@ -4697,7 +5514,6 @@ end - end -- of closure do -- create closure to overcome 200 locals limit @@ -4710,8 +5526,10 @@ if not modules then modules = { } end modules ['util-prs'] = { license = "see context related readme files" } -local P, R, V, C, Ct, Cs, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg -local lpegmatch = lpeg.match +local lpeg, table, string = lpeg, table, string + +local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc +local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find local tostring, type, next = tostring, type, next @@ -4723,29 +5541,39 @@ parsers.patterns = parsers.patterns or { } local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +-- we share some patterns + +local space = P(' ') +local equal = P("=") +local comma = P(",") +local lbrace = P("{") +local rbrace = P("}") +local period = S(".") +local punctuation = S(".,:;") +local spacer = patterns.spacer +local whitespace = patterns.whitespace +local newline = patterns.newline +local anything = patterns.anything +local endofstring = patterns.endofstring + -- we could use a Cf Cg construct local escape, left, right = P("\\"), P('{'), P('}') -lpeg.patterns.balanced = P { +patterns.balanced = P { [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, [2] = left * V(1) * right } -local space = P(' ') -local equal = P("=") -local comma = P(",") -local lbrace = P("{") -local rbrace = P("}") local nobrace = 1 - (lbrace+rbrace) local nested = P { lbrace * (nobrace + V(1))^0 * rbrace } local spaces = space^0 local argument = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/"")) -local content = (1-P(-1))^0 +local content = (1-endofstring)^0 -lpeg.patterns.nested = nested -- no capture -lpeg.patterns.argument = argument -- argument after e.g. = -lpeg.patterns.content = content -- rest after e.g = +patterns.nested = nested -- no capture +patterns.argument = argument -- argument after e.g. = +patterns.content = content -- rest after e.g = local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) @@ -4764,10 +5592,6 @@ local function set(key,value) hash[key] = value end -local function set(key,value) - hash[key] = value -end - local pattern_a_s = (pattern_a/set)^1 local pattern_b_s = (pattern_b/set)^1 local pattern_c_s = (pattern_c/set)^1 @@ -4818,7 +5642,7 @@ end local separator = comma * space^0 local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) -local pattern = Ct(value*(separator*value)^0) +local pattern = spaces * Ct(value*(separator*value)^0) -- "aap, {noot}, mies" : outer {} removes, leading spaces ignored @@ -4942,6 +5766,37 @@ function parsers.listitem(str) return gmatch(str,"[^, ]+") end +-- +local digit = R("09") + +local pattern = Cs { "start", + start = V("one") + V("two") + V("three"), + rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0, + thousand = digit * digit * digit, + one = digit * V("rest"), + two = digit * digit * V("rest"), + three = V("thousand") * V("rest"), +} + +patterns.splitthousands = pattern -- maybe better in the parsers namespace ? + +function parsers.splitthousands(str) + return lpegmatch(pattern,str) or str +end + +-- print(parsers.splitthousands("11111111111.11")) + +local optionalwhitespace = whitespace^0 + +patterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1) +patterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1) +patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1) + +-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n " +-- inspect(lpegmatch(patterns.paragraphs,str)) +-- inspect(lpegmatch(patterns.sentences,str)) +-- inspect(lpegmatch(patterns.words,str)) + end -- of closure @@ -5043,7 +5898,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['util.deb'] = { +if not modules then modules = { } end modules ['util-deb'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -5155,6 +6010,7 @@ function inspect(i) -- global function else print(tostring(i)) end + return i -- so that we can inline the inspect end -- from the lua book: @@ -5194,7 +6050,7 @@ if not modules then modules = { } end modules ['trac-inf'] = { local format, lower = string.format, string.lower local clock = os.gettimeofday or os.clock -- should go in environment -local write_nl = texio.write_nl +local write_nl = texio and texio.write_nl or print statistics = statistics or { } local statistics = statistics @@ -5277,7 +6133,7 @@ statistics.elapsedtime = elapsedtime statistics.elapsedindeed = elapsedindeed statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module function statistics.register(tag,fnc) if statistics.enable and type(fnc) == "function" then @@ -5387,6 +6243,8 @@ if not modules then modules = { } end modules ['trac-set'] = { -- might become u license = "see context related readme files" } +-- maybe this should be util-set.lua + local type, next, tostring = type, next, tostring local concat = table.concat local format, find, lower, gsub, escapedpattern = string.format, string.find, string.lower, string.gsub, string.escapedpattern @@ -5586,7 +6444,7 @@ function setters.show(t) local value, default, modules = functions.value, functions.default, #functions value = value == nil and "unset" or tostring(value) default = default == nil and "unset" or tostring(default) - t.report("%-30s modules: %2i default: %6s value: %6s",name,modules,default,value) + t.report("%-50s modules: %2i default: %6s value: %6s",name,modules,default,value) end end t.report() @@ -5678,17 +6536,31 @@ end) -- experiment -local flags = environment and environment.engineflags +if environment then -if flags then - if trackers and flags.trackers then - setters.initialize("flags","trackers", settings_to_hash(flags.trackers)) - -- t_enable(flags.trackers) - end - if directives and flags.directives then - setters.initialize("flags","directives", settings_to_hash(flags.directives)) - -- d_enable(flags.directives) + -- The engineflags are known earlier than environment.arguments but maybe we + -- need to handle them both as the later are parsed differently. The c: prefix + -- is used by mtx-context to isolate the flags from those that concern luatex. + + local engineflags = environment.engineflags + + if engineflags then + if trackers then + local list = engineflags["c:trackers"] or engineflags["trackers"] + if type(list) == "string" then + setters.initialize("flags","trackers",settings_to_hash(list)) + -- t_enable(list) + end + end + if directives then + local list = engineflags["c:directives"] or engineflags["directives"] + if type(list) == "string" then + setters.initialize("flags","directives", settings_to_hash(list)) + -- d_enable(list) + end + end end + end -- here @@ -5741,10 +6613,7 @@ local next, type = next, type local setmetatableindex = table.setmetatableindex ---[[ldx-- -

This is a prelude to a more extensive logging module. We no longer -provide based logging a sparsing is relatively easy anyway.

---ldx]]-- + logs = logs or { } local logs = logs @@ -6560,7 +7429,8 @@ local allocate, mark = utilities.storage.allocate, utilities.storage.mark local format, sub, match, gsub, find = string.format, string.sub, string.match, string.gsub, string.find local unquoted, quoted = string.unquoted, string.quoted -local concat = table.concat +local concat, insert, remove = table.concat, table.insert, table.remove +local loadedluacode = utilities.lua.loadedluacode -- precautions @@ -6578,8 +7448,28 @@ if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaon for k=3,#arg do arg[k-2] = arg[k] end - arg[#arg] = nil -- last - arg[#arg] = nil -- pre-last + remove(arg) -- last + remove(arg) -- pre-last +end + +-- This is an ugly hack but it permits symlinking a script (say 'context') to 'mtxrun' as in: +-- +-- ln -s /opt/minimals/tex/texmf-linux-64/bin/mtxrun context +-- +-- The special mapping hack is needed because 'luatools' boils down to 'mtxrun --script base' +-- but it's unlikely that there will be more of this + +do + + local originalzero = file.basename(arg[0]) + local specialmapping = { luatools == "base" } + + if originalzero ~= "mtxrun" and originalzero ~= "mtxrun.lua" then + arg[0] = specialmapping[originalzero] or originalzero + insert(arg,0,"--script") + insert(arg,0,"mtxrun") + end + end -- environment @@ -6619,6 +7509,8 @@ local mt = { setmetatable(environment,mt) +-- context specific arguments (in order not to confuse the engine) + function environment.initializearguments(arg) local arguments, files = { }, { } environment.arguments, environment.files, environment.sortedflags = arguments, files, nil @@ -6627,10 +7519,12 @@ function environment.initializearguments(arg) if index > 0 then local flag, value = match(argument,"^%-+(.-)=(.-)$") if flag then + flag = gsub(flag,"^c:","") arguments[flag] = unquoted(value or "") else flag = match(argument,"^%-+(.+)") if flag then + flag = gsub(flag,"^c:","") arguments[flag] = true else files[#files+1] = argument @@ -6650,7 +7544,7 @@ end -- tricky: too many hits when we support partials unless we add -- a registration of arguments so from now on we have 'partial' -function environment.argument(name,partial) +function environment.getargument(name,partial) local arguments, sortedflags = environment.arguments, environment.sortedflags if arguments[name] then return arguments[name] @@ -6673,6 +7567,8 @@ function environment.argument(name,partial) return nil end +environment.argument = environment.getargument + function environment.splitarguments(separator) -- rather special, cut-off before separator local done, before, after = false, { }, { } local originalarguments = environment.originalarguments @@ -6758,7 +7654,7 @@ function environment.texfile(filename) return resolvers.findfile(filename,'tex') end -function environment.luafile(filename) +function environment.luafile(filename) -- needs checking local resolved = resolvers.findfile(filename,'tex') or "" if resolved ~= "" then return resolved @@ -6770,13 +7666,16 @@ function environment.luafile(filename) return resolvers.findfile(filename,'luatexlibs') or "" end -environment.loadedluacode = loadfile -- can be overloaded +local function checkstrip(filename) + local modu = modules[file.nameonly(filename)] + return modu and modu.dataonly +end function environment.luafilechunk(filename,silent) -- used for loading lua bytecode in the format filename = file.replacesuffix(filename, "lua") local fullname = environment.luafile(filename) if fullname and fullname ~= "" then - local data = environment.loadedluacode(fullname) + local data = loadedluacode(fullname,checkstrip,filename) if trace_locating then report_lua("loading file %s%s", fullname, not data and " failed" or "") elseif not silent then @@ -6874,21 +7773,7 @@ local trace_entities = false trackers.register("xml.entities", function(v) trac local report_xml = logs and logs.reporter("xml","core") or function(...) print(format(...)) end ---[[ldx-- -

The parser used here is inspired by the variant discussed in the lua book, but -handles comment and processing instructions, has a different structure, provides -parent access; a first version used different trickery but was less optimized to we -went this route. First we had a find based parser, now we have an based one. -The find based parser can be found in l-xml-edu.lua along with other older code.

-

Beware, the interface may change. For instance at, ns, tg, dt may get more -verbose names. Once the code is stable we will also remove some tracing and -optimize the code.

- -

I might even decide to reimplement the parser using the latest trickery -as the current variant was written when showed up and it's easier now to -build tables in one go.

---ldx]]-- xml = xml or { } local xml = xml @@ -6898,46 +7783,25 @@ local utf = unicode.utf8 local concat, remove, insert = table.concat, table.remove, table.insert local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub -local utfchar, utffind, utfgsub = utf.char, utf.find, utf.gsub +local utfchar = utf.char local lpegmatch = lpeg.match local P, S, R, C, V, C, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.C, lpeg.Cs ---[[ldx-- -

First a hack to enable namespace resolving. A namespace is characterized by -a . The following function associates a namespace prefix with a -pattern. We use , which in this case is more than twice as fast as a -find based solution where we loop over an array of patterns. Less code and -much cleaner.

---ldx]]-- + xml.xmlns = xml.xmlns or { } local check = P(false) local parse = check ---[[ldx-- -

The next function associates a namespace prefix with an . This -normally happens independent of parsing.

- -xml.registerns("mml","mathml") - ---ldx]]-- function xml.registerns(namespace, pattern) -- pattern can be an lpeg check = check + C(P(lower(pattern))) / namespace parse = P { P(check) + 1 * V(1) } end ---[[ldx-- -

The next function also registers a namespace, but this time we map a -given namespace prefix onto a registered one, using the given -. This used for attributes like xmlns:m.

- -xml.checkns("m","http://www.w3.org/mathml") - ---ldx]]-- function xml.checkns(namespace,url) local ns = lpegmatch(parse,lower(url)) @@ -6946,66 +7810,15 @@ function xml.checkns(namespace,url) end end ---[[ldx-- -

Next we provide a way to turn an into a registered -namespace. This used for the xmlns attribute.

- -resolvedns = xml.resolvens("http://www.w3.org/mathml") - - -This returns mml. ---ldx]]-- function xml.resolvens(url) return lpegmatch(parse,lower(url)) or "" end ---[[ldx-- -

A namespace in an element can be remapped onto the registered -one efficiently by using the xml.xmlns table.

---ldx]]-- - ---[[ldx-- -

This version uses . We follow the same approach as before, stack and top and -such. This version is about twice as fast which is mostly due to the fact that -we don't have to prepare the stream for cdata, doctype etc etc. This variant is -is dedicated to Luigi Scarso, who challenged me with 40 megabyte files that -took 12.5 seconds to load (1.5 for file io and the rest for tree building). With -the implementation we got that down to less 7.3 seconds. Loading the 14 - interface definition files (2.6 meg) went down from 1.05 seconds to 0.55.

- -

Next comes the parser. The rather messy doctype definition comes in many -disguises so it is no surprice that later on have to dedicate quite some - code to it.

- - - - - - - - - - -

The code may look a bit complex but this is mostly due to the fact that we -resolve namespaces and attach metatables. There is only one public function:

- - -local x = xml.convert(somestring) - - -

An optional second boolean argument tells this function not to create a root -element.

- -

Valid entities are:

- - - - - - ---ldx]]-- + + + -- not just one big nested table capture (lpeg overflow) @@ -7220,15 +8033,7 @@ local privates_n = { -- keeps track of defined ones } -local function escaped(s) - if s == "" then - return "" - else -- if utffind(s,privates_u) then - return (utfgsub(s,".",privates_u)) - -- else - -- return s - end -end +local escaped = utf.remapper(privates_u) local function unescaped(s) local p = privates_n[s] @@ -7243,13 +8048,7 @@ local function unescaped(s) return p end -local function unprivatized(s,resolve) - if s == "" then - return "" - else - return (utfgsub(s,".",privates_p)) - end -end +local unprivatized = utf.remapper(privates_p) xml.privatetoken = unescaped xml.unprivatized = unprivatized @@ -7589,7 +8388,12 @@ local function _xmlconvert_(data, settings) else errorhandler = errorhandler or xml.errorhandler if errorhandler then - xml.errorhandler(format("load error: %s",errorstr)) + local currentresource = settings.currentresource + if currentresource and currentresource ~= "" then + xml.errorhandler(format("load error in [%s]: %s",currentresource,errorstr)) + else + xml.errorhandler(format("load error: %s",errorstr)) + end end end else @@ -7634,7 +8438,7 @@ function xmlconvert(data,settings) if ok then return result else - return _xmlconvert_("") + return _xmlconvert_("",settings) end end @@ -7655,10 +8459,7 @@ function xml.inheritedconvert(data,xmldata) -- xmldata is parent return xc end ---[[ldx-- -

Packaging data in an xml like table is done with the following -function. Maybe it will go away (when not used).

---ldx]]-- + function xml.is_valid(root) return root and root.dt and root.dt[1] and type(root.dt[1]) == "table" and not root.dt[1].er @@ -7677,11 +8478,7 @@ end xml.errorhandler = report_xml ---[[ldx-- -

We cannot load an from a filehandle so we need to load -the whole file first. The function accepts a string representing -a filename or a file handle.

---ldx]]-- + function xml.load(filename,settings) local data = "" @@ -7695,13 +8492,17 @@ function xml.load(filename,settings) elseif filename then -- filehandle data = filename:read("*all") end - return xmlconvert(data,settings) + if settings then + settings.currentresource = filename + local result = xmlconvert(data,settings) + settings.currentresource = nil + return result + else + return xmlconvert(data,{ currentresource = filename }) + end end ---[[ldx-- -

When we inject new elements, we need to convert strings to -valid trees, which is what the next function does.

---ldx]]-- + local no_root = { no_root = true } @@ -7714,11 +8515,7 @@ function xml.toxml(data) end end ---[[ldx-- -

For copying a tree we use a dedicated function instead of the -generic table copier. Since we know what we're dealing with we -can speed up things a bit. The second argument is not to be used!

---ldx]]-- + local function copy(old,tables) if old then @@ -7742,13 +8539,7 @@ end xml.copy = copy ---[[ldx-- -

In serializing the tree or parts of the tree is a major -actitivity which is why the following function is pretty optimized resulting -in a few more lines of code than needed. The variant that uses the formatting -function for all components is about 15% slower than the concatinating -alternative.

---ldx]]-- + -- todo: add when not present @@ -7761,15 +8552,12 @@ function xml.checkbom(root) -- can be made faster return end end - insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } ) + insert(dt, 1, { special = true, ns = "", tg = "@pi@", dt = { "xml version='1.0' standalone='yes'" } } ) insert(dt, 2, "\n" ) end end ---[[ldx-- -

At the cost of some 25% runtime overhead you can first convert the tree to a string -and then handle the lot.

---ldx]]-- + -- new experimental reorganized serialize @@ -7962,21 +8750,7 @@ newhandlers { } } ---[[ldx-- -

How you deal with saving data depends on your preferences. For a 40 MB database -file the timing on a 2.3 Core Duo are as follows (time in seconds):

- - -1.3 : load data from file to string -6.1 : convert string into tree -5.3 : saving in file using xmlsave -6.8 : converting to string using xml.tostring -3.6 : saving converted string in file - -

Beware, these were timing with the old routine but measurements will not be that -much different I guess.

---ldx]]-- -- maybe this will move to lxml-xml @@ -8054,10 +8828,7 @@ xml.newhandlers = newhandlers xml.serialize = serialize xml.tostring = xmltostring ---[[ldx-- -

The next function operated on the content only and needs a handle function -that accepts a string.

---ldx]]-- + local function xmlstring(e,handle) if not handle or (e.special and e.tg ~= "@rt@") then @@ -8076,9 +8847,7 @@ end xml.string = xmlstring ---[[ldx-- -

A few helpers:

---ldx]]-- + function xml.settings(e) @@ -8122,11 +8891,7 @@ function xml.name(root) end end ---[[ldx-- -

The next helper erases an element but keeps the table as it is, -and since empty strings are not serialized (effectively) it does -not harm. Copying the table would take more time. Usage:

---ldx]]-- + function xml.erase(dt,k) if dt then @@ -8138,13 +8903,7 @@ function xml.erase(dt,k) end end ---[[ldx-- -

The next helper assigns a tree (or string). Usage:

- -dt[k] = xml.assign(root) or xml.assign(dt,k,root) - ---ldx]]-- function xml.assign(dt,k,root) if dt and k then @@ -8157,20 +8916,14 @@ end -- the following helpers may move ---[[ldx-- -

The next helper assigns a tree (or string). Usage:

- -xml.tocdata(e) -xml.tocdata(e,"error") - ---ldx]]-- + function xml.tocdata(e,wrapper) -- a few more in the aux module local whatever = type(e) == "table" and xmltostring(e.dt) or e or "" if wrapper then whatever = format("<%s>%s",wrapper,whatever,wrapper) end - local t = { special = true, ns = "", tg = "@cd@", at = {}, rn = "", dt = { whatever }, __p__ = e } + local t = { special = true, ns = "", tg = "@cd@", at = { }, rn = "", dt = { whatever }, __p__ = e } setmetatable(t,getmetatable(e)) e.dt = { t } end @@ -8225,7 +8978,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['lxml-pth'] = { +if not modules then modules = { } end modules ['lxml-lpt'] = { version = 1.001, comment = "this module is the basis for the lxml-* ones", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -8246,28 +8999,9 @@ local setmetatableindex = table.setmetatableindex -- beware, this is not xpath ... e.g. position is different (currently) and -- we have reverse-sibling as reversed preceding sibling ---[[ldx-- -

This module can be used stand alone but also inside in -which case it hooks into the tracker code. Therefore we provide a few -functions that set the tracers. Here we overload a previously defined -function.

-

If I can get in the mood I will make a variant that is XSLT compliant -but I wonder if it makes sense.

---ldx]]-- - ---[[ldx-- -

Expecially the lpath code is experimental, we will support some of xpath, but -only things that make sense for us; as compensation it is possible to hook in your -own functions. Apart from preprocessing content for we also need -this module for process management, like handling and -files.

- - -a/b/c /*/c -a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n) -a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n) - ---ldx]]-- + + + local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end @@ -8275,11 +9009,7 @@ local trace_lprofile = false if trackers then trackers.register("xml.profile", local report_lpath = logs.reporter("xml","lpath") ---[[ldx-- -

We've now arrived at an interesting part: accessing the tree using a subset -of and since we're not compatible we call it . We -will explain more about its usage in other documents.

---ldx]]-- + local xml = xml @@ -8731,14 +9461,23 @@ local lp_builtin = P ( -- for the moment we keep namespaces with attributes local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])") -local lp_fastpos_p = ((P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end) -local lp_fastpos_n = ((P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end) + +-- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end +-- lp_fastpos_n = (P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end + +lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0" +lp_fastpos_n = P("-") * R("09")^1 * P(-1) / "(%0<0 and (#list+%0==l))" + local lp_fastpos = lp_fastpos_n + lp_fastpos_p + local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false") -local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling - return t .. "(" -end +-- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling +-- return t .. "(" +-- end + +-- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0(" +local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0" local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling if expressions[t] then @@ -9254,9 +9993,7 @@ end xml.applylpath = applylpath -- takes a table as first argment, which is what xml.filter will do ---[[ldx-- -

This is the main filter function. It returns whatever is asked for.

---ldx]]-- + function xml.filter(root,pattern) -- no longer funny attribute handling here return applylpath(root,pattern) @@ -9354,12 +10091,12 @@ xml.selection = selection -- new method, simple handle -- generic function finalizer (independant namespace) -local function dofunction(collected,fnc) +local function dofunction(collected,fnc,...) if collected then local f = functions[fnc] if f then for c=1,#collected do - f(collected[c]) + f(collected[c],...) end else report_lpath("unknown function '%s'",fnc) @@ -9460,21 +10197,7 @@ expressions.tag = function(e,n) -- only tg end end ---[[ldx-- -

Often using an iterators looks nicer in the code than passing handler -functions. The book describes how to use coroutines for that -purpose (). This permits -code like:

- -for r, d, k in xml.elements(xml.load('text.xml'),"title") do - print(d[k]) -- old method -end -for e in xml.collected(xml.load('text.xml'),"title") do - print(e) -- new one -end - ---ldx]]-- local wrap, yield = coroutine.wrap, coroutine.yield @@ -9515,6 +10238,32 @@ function xml.inspect(collection,pattern) end end +-- texy (see xfdf): + +local function split(e) + local dt = e.dt + if dt then + for i=1,#dt do + local dti = dt[i] + if type(dti) == "string" then + dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1") + dti = gsub(dti,"[\n\r]+","\n\n") + dt[i] = dti + else + split(dti) + end + end + end + return e +end + +function xml.finalizers.paragraphs(c) + for i=1,#c do + split(c[i]) + end + return c +end + end -- of closure @@ -9539,13 +10288,7 @@ local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, l lpegpatterns.xml = lpegpatterns.xml or { } local xmlpatterns = lpegpatterns.xml ---[[ldx-- -

The following helper functions best belong to the lxml-ini -module. Some are here because we need then in the mk -document and other manuals, others came up when playing with -this module. Since this module is also used in we've -put them here instead of loading mode modules there then needed.

---ldx]]-- + local function xmlgsub(t,old,new) -- will be replaced local dt = t.dt @@ -9731,9 +10474,7 @@ function xml.processattributes(root,pattern,handle) return collected end ---[[ldx-- -

The following functions collect elements and texts.

---ldx]]-- + -- are these still needed -> lxml-cmp.lua @@ -9772,9 +10513,7 @@ function xml.collect_tags(root, pattern, nonamespace) end end ---[[ldx-- -

We've now arrived at the functions that manipulate the tree.

---ldx]]-- + local no_root = { no_root = true } @@ -10160,9 +10899,7 @@ function xml.remapname(root, pattern, newtg, newns, newrn) end end ---[[ldx-- -

Helper (for q2p).

---ldx]]-- + function xml.cdatatotext(e) local dt = e.dt @@ -10259,9 +10996,7 @@ end -- xml.addentitiesdoctype(x,"hexadecimal") -- print(x) ---[[ldx-- -

Here are a few synonyms.

---ldx]]-- + xml.all = xml.each xml.insert = xml.insertafter @@ -10852,7 +11587,7 @@ local gsub, find, gmatch, char = string.gsub, string.find, string.gmatch, string local concat = table.concat local next, type = next, type -local filedirname, filebasename, fileextname, filejoin = file.dirname, file.basename, file.extname, file.join +local filedirname, filebasename, filejoin = file.dirname, file.basename, file.join local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) local trace_detail = false trackers.register("resolvers.details", function(v) trace_detail = v end) @@ -11202,12 +11937,14 @@ local function splitpathexpr(str, newlist, validate) -- I couldn't resist lpeggi for s in gmatch(str,"[^,]+") do s = validate(s) if s then - n = n + 1 ; t[n] = s + n = n + 1 + t[n] = s end end else for s in gmatch(str,"[^,]+") do - n = n + 1 ; t[n] = s + n = n + 1 + t[n] = s end end if trace_expansions then @@ -11221,7 +11958,7 @@ end -- We could make the previous one public. local function validate(s) - s = collapsepath(s) -- already keeps the // + s = collapsepath(s) -- already keeps the trailing / and // return s ~= "" and not find(s,"^!*unset/*$") and s end @@ -11559,7 +12296,7 @@ local resolvers = resolvers local allocate = utilities.storage.allocate local setmetatableindex = table.setmetatableindex -local fileextname = file.extname +local suffixonly = file.suffixonly local formats = allocate() local suffixes = allocate() @@ -11814,7 +12551,7 @@ function resolvers.formatofvariable(str) end function resolvers.formatofsuffix(str) -- of file - return suffixmap[fileextname(str)] or 'tex' -- so many map onto tex (like mkiv, cld etc) + return suffixmap[suffixonly(str)] or 'tex' -- so many map onto tex (like mkiv, cld etc) end function resolvers.variableofformat(str) @@ -11826,7 +12563,7 @@ function resolvers.variableofformatorsuffix(str) if v then return v end - v = suffixmap[fileextname(str)] + v = suffixmap[suffixonly(str)] if v then return formats[v] end @@ -11847,21 +12584,7 @@ if not modules then modules = { } end modules ['data-tmp'] = { license = "see context related readme files" } ---[[ldx-- -

This module deals with caching data. It sets up the paths and -implements loaders and savers for tables. Best is to set the -following variable. When not set, the usual paths will be -checked. Personally I prefer the (users) temporary path.

- - -TEXMFCACHE=$TMP;$TEMP;$TMPDIR;$TEMPDIR;$HOME;$TEXMFVAR;$VARTEXMF;. - -

Currently we do no locking when we write files. This is no real -problem because most caching involves fonts and the chance of them -being written at the same time is small. We also need to extend -luatools with a recache feature.

---ldx]]-- local format, lower, gsub, concat = string.format, string.lower, string.gsub, table.concat local serialize, serializetofile = table.serialize, table.tofile @@ -12396,11 +13119,12 @@ local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns local filedirname = file.dirname local filebasename = file.basename -local fileextname = file.extname +local suffixonly = file.suffixonly local filejoin = file.join local collapsepath = file.collapsepath local joinpath = file.joinpath local allocate = utilities.storage.allocate +local settings_to_array = utilities.parsers.settings_to_array local setmetatableindex = table.setmetatableindex local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) @@ -12424,7 +13148,7 @@ resolvers.cacheversion = '1.0.1' resolvers.configbanner = '' resolvers.homedir = environment.homedir resolvers.criticalvars = allocate { "SELFAUTOLOC", "SELFAUTODIR", "SELFAUTOPARENT", "TEXMFCNF", "TEXMF", "TEXOS" } -resolvers.luacnfname = 'texmfcnf.lua' +resolvers.luacnfname = "texmfcnf.lua" resolvers.luacnfstate = "unknown" -- The web2c tex binaries as well as kpse have built in paths for the configuration @@ -12696,7 +13420,7 @@ end local function identify_configuration_files() local specification = instance.specification if #specification == 0 then - local cnfspec = getenv('TEXMFCNF') + local cnfspec = getenv("TEXMFCNF") if cnfspec == "" then cnfspec = resolvers.luacnfspec resolvers.luacnfstate = "default" @@ -12784,7 +13508,7 @@ local function load_configuration_files() -- we push the value into the main environment (osenv) so -- that it takes precedence over the default one and therefore -- also over following definitions - resolvers.setenv('TEXMFCNF',cnfspec) -- resolves prefixes + resolvers.setenv("TEXMFCNF",cnfspec) -- resolves prefixes -- we now identify and load the specified configuration files instance.specification = { } identify_configuration_files() @@ -12832,10 +13556,11 @@ end local function locate_file_databases() -- todo: cache:// and tree:// (runtime) - local texmfpaths = resolvers.expandedpathlist('TEXMF') + local texmfpaths = resolvers.expandedpathlist("TEXMF") if #texmfpaths > 0 then for i=1,#texmfpaths do local path = collapsepath(texmfpaths[i]) + path = gsub(path,"/+$","") -- in case $HOME expands to something with a trailing / local stripped = lpegmatch(inhibitstripper,path) -- the !! thing if stripped ~= "" then local runtime = stripped == path @@ -12964,9 +13689,9 @@ function resolvers.prependhash(type,name,cache) end function resolvers.extendtexmfvariable(specification) -- crap, we could better prepend the hash - local t = resolvers.splitpath(getenv('TEXMF')) + local t = resolvers.splitpath(getenv("TEXMF")) -- okay? insert(t,1,specification) - local newspec = concat(t,";") + local newspec = concat(t,",") -- not ; if instance.environment["TEXMF"] then instance.environment["TEXMF"] = newspec elseif instance.variables["TEXMF"] then @@ -13041,14 +13766,19 @@ function resolvers.resetextrapath() end function resolvers.registerextrapath(paths,subpaths) + paths = settings_to_array(paths) + subpaths = settings_to_array(subpaths) local ep = instance.extra_paths or { } local oldn = #ep local newn = oldn - if paths and paths ~= "" then - if subpaths and subpaths ~= "" then - for p in gmatch(paths,"[^,]+") do - -- we gmatch each step again, not that fast, but used seldom - for s in gmatch(subpaths,"[^,]+") do + local nofpaths = #paths + local nofsubpaths = #subpaths + if nofpaths > 0 then + if nofsubpaths > 0 then + for i=1,nofpaths do + local p = paths[i] + for j=1,nofsubpaths do + local s = subpaths[j] local ps = p .. "/" .. s if not done[ps] then newn = newn + 1 @@ -13058,7 +13788,8 @@ function resolvers.registerextrapath(paths,subpaths) end end else - for p in gmatch(paths,"[^,]+") do + for i=1,nofpaths do + local p = paths[i] if not done[p] then newn = newn + 1 ep[newn] = resolvers.cleanpath(p) @@ -13066,10 +13797,10 @@ function resolvers.registerextrapath(paths,subpaths) end end end - elseif subpaths and subpaths ~= "" then + elseif nofsubpaths > 0 then for i=1,oldn do - -- we gmatch each step again, not that fast, but used seldom - for s in gmatch(subpaths,"[^,]+") do + for j=1,nofsubpaths do + local s = subpaths[j] local ps = ep[i] .. "/" .. s if not done[ps] then newn = newn + 1 @@ -13147,18 +13878,21 @@ function resolvers.expandedpathlist(str) return { } elseif instance.savelists then str = lpegmatch(dollarstripper,str) - if not instance.lists[str] then -- cached - local lst = made_list(instance,resolvers.splitpath(resolvers.expansion(str))) - instance.lists[str] = expandedpathfromlist(lst) - end - return instance.lists[str] + local lists = instance.lists + local lst = lists[str] + if not lst then + local l = made_list(instance,resolvers.splitpath(resolvers.expansion(str))) + lst = expandedpathfromlist(l) + lists[str] = lst + end + return lst else local lst = resolvers.splitpath(resolvers.expansion(str)) return made_list(instance,expandedpathfromlist(lst)) end end -function resolvers.expandedpathlistfromvariable(str) -- brrr +function resolvers.expandedpathlistfromvariable(str) -- brrr / could also have cleaner ^!! /$ // str = lpegmatch(dollarstripper,str) local tmp = resolvers.variableofformatorsuffix(str) return resolvers.expandedpathlist(tmp ~= "" and tmp or str) @@ -13315,7 +14049,7 @@ local preparetreepattern = Cs((P(".")/"%%." + P("-")/"%%-" + P(1))^0 * Cc("$")) local collect_instance_files local function find_analyze(filename,askedformat,allresults) - local filetype, wantedfiles, ext = '', { }, fileextname(filename) + local filetype, wantedfiles, ext = '', { }, suffixonly(filename) -- too tricky as filename can be bla.1.2.3: -- -- if not suffixmap[ext] then @@ -13393,7 +14127,7 @@ local function find_qualified(filename,allresults) -- this one will be split too if trace_detail then report_resolving("locating qualified file '%s'", filename) end - local forcedname, suffix = "", fileextname(filename) + local forcedname, suffix = "", suffixonly(filename) if suffix == "" then -- why local format_suffixes = askedformat == "" and resolvers.defaultsuffixes or suffixes[askedformat] if format_suffixes then @@ -14063,6 +14797,8 @@ local gsub = string.gsub local cleanpath, findgivenfile, expansion = resolvers.cleanpath, resolvers.findgivenfile, resolvers.expansion local getenv = resolvers.getenv -- we can probably also use resolvers.expansion local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match +local joinpath, basename, dirname = file.join, file.basename, file.dirname +local getmetatable, rawset, type = getmetatable, rawset, type -- getenv = function(...) return resolvers.getenv(...) end -- needs checking (definitions changes later on) @@ -14104,28 +14840,43 @@ end prefixes.filename = function(str) local fullname = findgivenfile(str) or "" - return cleanpath(file.basename((fullname ~= "" and fullname) or str)) -- no cleanpath needed here + return cleanpath(basename((fullname ~= "" and fullname) or str)) -- no cleanpath needed here end prefixes.pathname = function(str) local fullname = findgivenfile(str) or "" - return cleanpath(file.dirname((fullname ~= "" and fullname) or str)) + return cleanpath(dirname((fullname ~= "" and fullname) or str)) end prefixes.selfautoloc = function(str) - return cleanpath(file.join(getenv('SELFAUTOLOC'),str)) + return cleanpath(joinpath(getenv('SELFAUTOLOC'),str)) end prefixes.selfautoparent = function(str) - return cleanpath(file.join(getenv('SELFAUTOPARENT'),str)) + return cleanpath(joinpath(getenv('SELFAUTOPARENT'),str)) end prefixes.selfautodir = function(str) - return cleanpath(file.join(getenv('SELFAUTODIR'),str)) + return cleanpath(joinpath(getenv('SELFAUTODIR'),str)) end prefixes.home = function(str) - return cleanpath(file.join(getenv('HOME'),str)) + return cleanpath(joinpath(getenv('HOME'),str)) +end + +local function toppath() + local pathname = dirname(inputstack[#inputstack] or "") + if pathname == "" then + return "." + else + return pathname + end +end + +resolvers.toppath = toppath + +prefixes.toppath = function(str) + return cleanpath(joinpath(toppath(),str)) end prefixes.env = prefixes.environment @@ -14161,6 +14912,8 @@ function resolvers.resetresolve(str) resolved, abstract = { }, { } end +-- todo: use an lpeg (see data-lua for !! / stripper) + local function resolve(str) -- use schemes, this one is then for the commandline only if type(str) == "table" then local t = { } @@ -14186,7 +14939,7 @@ end resolvers.resolve = resolve resolvers.unresolve = unresolve -if os.uname then +if type(os.uname) == "function" then for k, v in next, os.uname() do if not prefixes[k] then @@ -14198,11 +14951,17 @@ end if os.type == "unix" then + -- We need to distringuish between a prefix and something else : so we + -- have a special repath variant for linux. Also, when a new prefix is + -- defined, we need to remake the matcher. + local pattern local function makepattern(t,k,v) + if t then + rawset(t,k,v) + end local colon = P(":") - local p for k, v in table.sortedpairs(prefixes) do if p then p = P(k) + p @@ -14211,9 +14970,6 @@ if os.type == "unix" then end end pattern = Cs((p * colon + colon/";" + P(1))^0) - if t then - t[k] = v - end end makepattern() @@ -14424,18 +15180,7 @@ local trace_cache = false trackers.register("resolvers.cache", functi local trace_containers = false trackers.register("resolvers.containers", function(v) trace_containers = v end) local trace_storage = false trackers.register("resolvers.storage", function(v) trace_storage = v end) ---[[ldx-- -

Once we found ourselves defining similar cache constructs -several times, containers were introduced. Containers are used -to collect tables in memory and reuse them when possible based -on (unique) hashes (to be provided by the calling function).

- -

Caching to disk is disabled by default. Version numbers are -stored in the saved table which makes it possible to change the -table structures without bothering about the disk cache.

-

Examples of usage can be found in the font related code.

---ldx]]-- containers = containers or { } local containers = containers @@ -14670,11 +15415,7 @@ local trace_locating = false trackers.register("resolvers.locating", function(v local report_zip = logs.reporter("resolvers","zip") --- zip:///oeps.zip?name=bla/bla.tex --- zip:///oeps.zip?tree=tex/texmf-local --- zip:///texmf.zip?tree=/tex/texmf --- zip:///texmf.zip?tree=/tex/texmf-local --- zip:///texmf-mine.zip?tree=/tex/texmf-projects + local resolvers = resolvers @@ -14999,7 +15740,7 @@ end -- of closure do -- create closure to overcome 200 locals limit -if not modules then modules = { } end modules ['data-crl'] = { +if not modules then modules = { } end modules ['data-sch'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -15007,60 +15748,199 @@ if not modules then modules = { } end modules ['data-crl'] = { license = "see context related readme files" } --- this one is replaced by data-sch.lua -- +local loadstring = loadstring +local gsub, concat, format = string.gsub, table.concat, string.format +local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders -local gsub = string.gsub +local trace_schemes = false trackers.register("resolvers.schemes",function(v) trace_schemes = v end) +local report_schemes = logs.reporter("resolvers","schemes") -local resolvers = resolvers +local http = require("socket.http") +local ltn12 = require("ltn12") -local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders +local resolvers = resolvers +local schemes = resolvers.schemes or { } +resolvers.schemes = schemes + +local cleaners = { } +schemes.cleaners = cleaners + +local threshold = 24 * 60 * 60 + +directives.register("schemes.threshold", function(v) threshold = tonumber(v) or threshold end) + +function cleaners.none(specification) + return specification.original +end + +function cleaners.strip(specification) + return (gsub(specification.original,"[^%a%d%.]+","-")) -- so we keep periods +end + +function cleaners.md5(specification) + return file.addsuffix(md5.hex(specification.original),file.suffix(specification.path)) +end + +local cleaner = cleaners.strip + +directives.register("schemes.cleanmethod", function(v) cleaner = cleaners[v] or cleaners.strip end) + +function resolvers.schemes.cleanname(specification) + local hash = cleaner(specification) + if trace_schemes then + report_schemes("hashing %s to %s",specification.original,hash) + end + return hash +end -resolvers.curl = resolvers.curl or { } -local curl = resolvers.curl +local cached, loaded, reused, thresholds, handlers = { }, { }, { }, { }, { } -local cached = { } +local function runcurl(name,cachename) -- we use sockets instead or the curl library when possible + local command = "curl --silent --create-dirs --output " .. cachename .. " " .. name + os.spawn(command) +end -local function runcurl(specification) +local function fetch(specification) local original = specification.original - -- local scheme = specification.scheme - local cleanname = gsub(original,"[^%a%d%.]+","-") - local cachename = caches.setfirstwritablefile(cleanname,"curl") + local scheme = specification.scheme + local cleanname = schemes.cleanname(specification) + local cachename = caches.setfirstwritablefile(cleanname,"schemes") if not cached[original] then - if not io.exists(cachename) then + statistics.starttiming(schemes) + if not io.exists(cachename) or (os.difftime(os.time(),lfs.attributes(cachename).modification) > (thresholds[protocol] or threshold)) then cached[original] = cachename - local command = "curl --silent --create-dirs --output " .. cachename .. " " .. original - os.spawn(command) + local handler = handlers[scheme] + if handler then + if trace_schemes then + report_schemes("fetching '%s', protocol '%s', method 'built-in'",original,scheme) + end + logs.flush() + handler(specification,cachename) + else + if trace_schemes then + report_schemes("fetching '%s', protocol '%s', method 'curl'",original,scheme) + end + logs.flush() + runcurl(original,cachename) + end end if io.exists(cachename) then cached[original] = cachename + if trace_schemes then + report_schemes("using cached '%s', protocol '%s', cachename '%s'",original,scheme,cachename) + end else cached[original] = "" + if trace_schemes then + report_schemes("using missing '%s', protocol '%s'",original,scheme) + end end + loaded[scheme] = loaded[scheme] + 1 + statistics.stoptiming(schemes) + else + if trace_schemes then + report_schemes("reusing '%s', protocol '%s'",original,scheme) + end + reused[scheme] = reused[scheme] + 1 end return cached[original] end --- old code: we could be cleaner using specification (see schemes) - local function finder(specification,filetype) - return resolvers.methodhandler("finders",runcurl(specification),filetype) + return resolvers.methodhandler("finders",fetch(specification),filetype) end local opener = openers.file local loader = loaders.file -local function install(scheme) - finders[scheme] = finder - openers[scheme] = opener - loaders[scheme] = loader +local function install(scheme,handler,newthreshold) + handlers [scheme] = handler + loaded [scheme] = 0 + reused [scheme] = 0 + finders [scheme] = finder + openers [scheme] = opener + loaders [scheme] = loader + thresholds[scheme] = newthreshold or threshold end -resolvers.curl.install = install +schemes.install = install + +local function http_handler(specification,cachename) + local tempname = cachename .. ".tmp" + local f = io.open(tempname,"wb") + local status, message = http.request { + url = specification.original, + sink = ltn12.sink.file(f) + } + if not status then + os.remove(tempname) + else + os.remove(cachename) + os.rename(tempname,cachename) + end + return cachename +end -install('http') -install('https') +install('http',http_handler) +install('https') -- see pod install('ftp') +statistics.register("scheme handling time", function() + local l, r, nl, nr = { }, { }, 0, 0 + for k, v in table.sortedhash(loaded) do + if v > 0 then + nl = nl + 1 + l[nl] = k .. ":" .. v + end + end + for k, v in table.sortedhash(reused) do + if v > 0 then + nr = nr + 1 + r[nr] = k .. ":" .. v + end + end + local n = nl + nr + if n > 0 then + l = nl > 0 and concat(l) or "none" + r = nr > 0 and concat(r) or "none" + return format("%s seconds, %s processed, threshold %s seconds, loaded: %s, reused: %s", + statistics.elapsedtime(schemes), n, threshold, l, r) + else + return nil + end +end) + +-- We provide a few more helpers: + +----- http = require("socket.http") +local httprequest = http.request +local toquery = url.toquery + +-- local function httprequest(url) +-- return os.resultof(format("curl --silent %q", url)) +-- end + +local function fetchstring(url,data) + local q = data and toquery(data) + if q then + url = url .. "?" .. q + end + local reply = httprequest(url) + return reply -- just one argument +end + +schemes.fetchstring = fetchstring + +function schemes.fetchtable(url,data) + local reply = fetchstring(url,data) + if reply then + local s = loadstring("return " .. reply) + if s then + return s() + end + end +end + end -- of closure @@ -15074,170 +15954,199 @@ if not modules then modules = { } end modules ['data-lua'] = { license = "see context related readme files" } --- some loading stuff ... we might move this one to slot 2 depending --- on the developments (the loaders must not trigger kpse); we could --- of course use a more extensive lib path spec +-- We overload the regular loader. We do so because we operate mostly in +-- tds and use our own loader code. Alternatively we could use a more +-- extensive definition of package.path and package.cpath but even then +-- we're not done. Also, we now have better tracing. +-- +-- -- local mylib = require("libtest") +-- -- local mysql = require("luasql.mysql") -local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) +local concat = table.concat + +local trace_libraries = false + +trackers.register("resolvers.libraries", function(v) trace_libraries = v end) +trackers.register("resolvers.locating", function(v) trace_libraries = v end) local report_libraries = logs.reporter("resolvers","libraries") local gsub, insert = string.gsub, table.insert +local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match local unpack = unpack or table.unpack +local is_readable = file.is_readable local resolvers, package = resolvers, package -local libformats = { 'luatexlibs', 'tex', 'texmfscripts', 'othertextfiles' } -- 'luainputs' -local clibformats = { 'lib' } - -local _path_, libpaths, _cpath_, clibpaths - -function package.libpaths() - if not _path_ or package.path ~= _path_ then - _path_ = package.path - libpaths = file.splitpath(_path_,";") +local libsuffixes = { 'tex', 'lua' } +local clibsuffixes = { 'lib' } +local libformats = { 'TEXINPUTS', 'LUAINPUTS' } +local clibformats = { 'CLUAINPUTS' } + +local libpaths = nil +local clibpaths = nil +local libhash = { } +local clibhash = { } +local libextras = { } +local clibextras = { } + +local pattern = Cs(P("!")^0 / "" * (P("/") * P(-1) / "/" + P("/")^1 / "/" + 1)^0) + +local function cleanpath(path) --hm, don't we have a helper for this? + return resolvers.resolve(lpegmatch(pattern,path)) +end + +local function getlibpaths() + if not libpaths then + libpaths = { } + for i=1,#libformats do + local paths = resolvers.expandedpathlistfromvariable(libformats[i]) + for i=1,#paths do + local path = cleanpath(paths[i]) + if not libhash[path] then + libpaths[#libpaths+1] = path + libhash[path] = true + end + end + end end return libpaths end -function package.clibpaths() - if not _cpath_ or package.cpath ~= _cpath_ then - _cpath_ = package.cpath - clibpaths = file.splitpath(_cpath_,";") +local function getclibpaths() + if not clibpaths then + clibpaths = { } + for i=1,#clibformats do + local paths = resolvers.expandedpathlistfromvariable(clibformats[i]) + for i=1,#paths do + local path = cleanpath(paths[i]) + if not clibhash[path] then + clibpaths[#clibpaths+1] = path + clibhash[path] = true + end + end + end end return clibpaths end -local function thepath(...) - local t = { ... } t[#t+1] = "?.lua" - local path = file.join(unpack(t)) - if trace_locating then - report_libraries("! appending '%s' to 'package.path'",path) +package.libpaths = getlibpaths +package.clibpaths = getclibpaths + +function package.extralibpath(...) + local paths = { ... } + for i=1,#paths do + local path = cleanpath(paths[i]) + if not libhash[path] then + if trace_libraries then + report_libraries("! extra lua path '%s'",path) + end + libextras[#libextras+1] = path + libpaths[#libpaths +1] = path + end end - return path end -local p_libpaths, a_libpaths = { }, { } - -function package.appendtolibpath(...) - insert(a_libpath,thepath(...)) +function package.extraclibpath(...) + local paths = { ... } + for i=1,#paths do + local path = cleanpath(paths[i]) + if not clibhash[path] then + if trace_libraries then + report_libraries("! extra lib path '%s'",path) + end + clibextras[#clibextras+1] = path + clibpaths[#clibpaths +1] = path + end + end end -function package.prependtolibpath(...) - insert(p_libpaths,1,thepath(...)) +if not package.loaders[-2] then + -- use package-path and package-cpath + package.loaders[-2] = package.loaders[2] end --- beware, we need to return a loadfile result ! +local function loadedaslib(resolved,rawname) + return package.loadlib(resolved,"luaopen_" .. gsub(rawname,"%.","_")) +end -local function loaded(libpaths,name,simple) - for i=1,#libpaths do -- package.path, might become option - local libpath = libpaths[i] - local resolved = gsub(libpath,"%?",simple) - if trace_locating then -- more detail - report_libraries("! checking for '%s' on 'package.path': '%s' => '%s'",simple,libpath,resolved) - end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'package.path': '%s'",name,resolved) - end - return loadfile(resolved) - end +local function loadedbylua(name) + if trace_libraries then + report_libraries("! locating %q using normal loader",name) end + local resolved = package.loaders[-2](name) end -package.loaders[2] = function(name) -- was [#package.loaders+1] - if file.suffix(name) == "" then - name = file.addsuffix(name,"lua") -- maybe a list - if trace_locating then -- mode detail - report_libraries("! locating '%s' with forced suffix",name) - end - else - if trace_locating then -- mode detail - report_libraries("! locating '%s'",name) - end +local function loadedbyformat(name,rawname,suffixes,islib) + if trace_libraries then + report_libraries("! locating %q as %q using formats %q",rawname,name,concat(suffixes)) end - for i=1,#libformats do - local format = libformats[i] + for i=1,#suffixes do -- so we use findfile and not a lookup loop + local format = suffixes[i] local resolved = resolvers.findfile(name,format) or "" - if trace_locating then -- mode detail - report_libraries("! checking for '%s' using 'libformat path': '%s'",name,format) + if trace_libraries then + report_libraries("! checking for %q' using format %q",name,format) end if resolved ~= "" then - if trace_locating then - report_libraries("! lib '%s' located via environment: '%s'",name,resolved) + if trace_libraries then + report_libraries("! lib %q located on %q",name,resolved) end - return loadfile(resolved) - end - end - -- libpaths - local libpaths, clibpaths = package.libpaths(), package.clibpaths() - local simple = gsub(name,"%.lua$","") - local simple = gsub(simple,"%.","/") - local resolved = loaded(p_libpaths,name,simple) or loaded(libpaths,name,simple) or loaded(a_libpaths,name,simple) - if resolved then - return resolved - end - -- - local libname = file.addsuffix(simple,os.libsuffix) - for i=1,#clibformats do - -- better have a dedicated loop - local format = clibformats[i] - local paths = resolvers.expandedpathlistfromvariable(format) - for p=1,#paths do - local path = paths[p] - local resolved = file.join(path,libname) - if trace_locating then -- mode detail - report_libraries("! checking for '%s' using 'clibformat path': '%s'",libname,path) - end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'clibformat': '%s'",libname,resolved) - end - return package.loadlib(resolved,name) + if islib then + return loadedaslib(resolved,rawname) + else + return loadfile(resolved) end end end - for i=1,#clibpaths do -- package.path, might become option - local libpath = clibpaths[i] - local resolved = gsub(libpath,"?",simple) - if trace_locating then -- more detail - report_libraries("! checking for '%s' on 'package.cpath': '%s'",simple,libpath) +end + +local function loadedbypath(name,rawname,paths,islib,what) + if trace_libraries then + report_libraries("! locating %q as %q on %q paths",rawname,name,what) + end + for p=1,#paths do + local path = paths[p] + local resolved = file.join(path,name) + if trace_libraries then -- mode detail + report_libraries("! checking for %q using %q path %q",name,what,path) end - if file.is_readable(resolved) then - if trace_locating then - report_libraries("! lib '%s' located via 'package.cpath': '%s'",name,resolved) + if is_readable(resolved) then + if trace_libraries then + report_libraries("! lib %q located on %q",name,resolved) + end + if islib then + return loadedaslib(resolved,rawname) + else + return loadfile(resolved) end - return package.loadlib(resolved,name) - end - end - -- just in case the distribution is messed up - if trace_loading then -- more detail - report_libraries("! checking for '%s' using 'luatexlibs': '%s'",name) - end - local resolved = resolvers.findfile(file.basename(name),'luatexlibs') or "" - if resolved ~= "" then - if trace_locating then - report_libraries("! lib '%s' located by basename via environment: '%s'",name,resolved) end - return loadfile(resolved) end - if trace_locating then - report_libraries('? unable to locate lib: %s',name) - end --- return "unable to locate " .. name end -resolvers.loadlualib = require - --- -- -- -- +local function notloaded(name) + if trace_libraries then + report_libraries("? unable to locate library %q",name) + end +end -package.obsolete = package.obsolete or { } +package.loaders[2] = function(name) + local thename = gsub(name,"%.","/") + local luaname = file.addsuffix(thename,"lua") + local libname = file.addsuffix(thename,os.libsuffix) + return + loadedbyformat(luaname,name,libsuffixes, false) + or loadedbyformat(libname,name,clibsuffixes, true) + or loadedbypath (luaname,name,getlibpaths (),false,"lua") + or loadedbypath (luaname,name,getclibpaths(),false,"lua") + or loadedbypath (libname,name,getclibpaths(),true, "lib") + or loadedbylua (name) + or notloaded (name) +end -package.append_libpath = appendtolibpath -- will become obsolete -package.prepend_libpath = prependtolibpath -- will become obsolete +-- package.loaders[3] = nil +-- package.loaders[4] = nil -package.obsolete.append_libpath = appendtolibpath -- will become obsolete -package.obsolete.prepend_libpath = prependtolibpath -- will become obsolete +resolvers.loadlualib = require end -- of closure @@ -15707,7 +16616,6 @@ function environment.make_format(name) end function environment.run_format(name,data,more) - -- hm, rather old code here; we can now use the file.whatever functions if name and name ~= "" then local barename = file.removesuffix(name) local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats") @@ -15736,6 +16644,129 @@ function environment.run_format(name,data,more) end +end -- of closure + +do -- create closure to overcome 200 locals limit + +if not modules then modules = { } end modules ['util-tpl'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- This is experimental code. Coming from dos and windows, I've always used %whatever% +-- as template variables so let's stick to it. After all, it's easy to parse and stands +-- out well. A double %% is turned into a regular %. + +utilities.templates = utilities.templates or { } +local templates = utilities.templates + +local trace_template = false trackers.register("templates.trace",function(v) trace_template = v end) +local report_template = logs.reporter("template") + +local format = string.format +local P, C, Cs, Carg, lpegmatch = lpeg.P, lpeg.C, lpeg.Cs, lpeg.Carg, lpeg.match + +-- todo: make installable template.new + +local replacer + +local function replacekey(k,t,recursive) + local v = t[k] + if not v then + if trace_template then + report_template("unknown key %q",k) + end + return "" + else + if trace_template then + report_template("setting key %q to value %q",k,v) + end + if recursive then + return lpegmatch(replacer,v,1,t) + else + return v + end + end +end + +local sqlescape = lpeg.replacer { + { "'", "''" }, + { "\\", "\\\\" }, + { "\r\n", "\\n" }, + { "\r", "\\n" }, + -- { "\t", "\\t" }, +} + +local escapers = { + lua = function(s) + return format("%q",s) + end, + sql = function(s) + return lpegmatch(sqlescape,s) + end, +} + +local function replacekeyunquoted(s,t,how,recurse) -- ".. \" " + local escaper = how and escapers[how] or escapers.lua + return escaper(replacekey(s,t,recurse)) +end + +local single = P("%") -- test %test% test : resolves test +local double = P("%%") -- test 10%% test : %% becomes % +local lquoted = P("%[") -- test %[test]" test : resolves test with escaped "'s +local rquoted = P("]%") -- + +local escape = double / '%%' +local nosingle = single / '' +local nodouble = double / '' +local nolquoted = lquoted / '' +local norquoted = rquoted / '' + +local key = nosingle * (C((1-nosingle)^1 * Carg(1) * Carg(2) * Carg(3))/replacekey) * nosingle +local unquoted = nolquoted * ((C((1 - norquoted)^1) * Carg(1) * Carg(2) * Carg(3))/replacekeyunquoted) * norquoted +local any = P(1) + + replacer = Cs((unquoted + escape + key + any)^0) + +local function replace(str,mapping,how,recurse) + if mapping then + return lpegmatch(replacer,str,1,mapping,how or "lua",recurse or false) or str + else + return str + end +end + +-- print(replace("test '%[x]%' test",{ x = [[a 'x'  a]] })) +-- print(replace("test '%[x]%' test",{ x = [[a 'x'  a]] },'sql')) + +templates.replace = replace + +function templates.load(filename,mapping,how,recurse) + local data = io.loaddata(filename) or "" + if mapping and next(mapping) then + return replace(data,mapping,how,recurse) + else + return data + end +end + +function templates.resolve(t,mapping,how,recurse) + if not mapping then + mapping = t + end + for k, v in next, t do + t[k] = replace(v,mapping,how,recurse) + end + return t +end + +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" })) +-- inspect(utilities.templates.resolve({ one = "%two%", two = "two", three = "%three%" })) + + end -- of closure -- end library merge @@ -15796,7 +16827,7 @@ own.libs = { -- order can be made better -- 'data-bin.lua', 'data-zip.lua', 'data-tre.lua', - 'data-crl.lua', + 'data-sch.lua', 'data-lua.lua', 'data-aux.lua', -- updater 'data-tmf.lua', @@ -15804,6 +16835,8 @@ own.libs = { -- order can be made better 'luat-sta.lua', 'luat-fmt.lua', + + 'util-tpl.lua', } -- We need this hack till luatex is fixed. @@ -15824,7 +16857,7 @@ own.path = gsub(match(own.name,"^(.+)[\\/].-$") or ".","\\","/") local ownpath, owntree = own.path, environment and environment.ownpath or own.path -own.list = { +own.list = { -- predictable paths '.', ownpath , ownpath .. "/../sources", -- HH's development path @@ -15848,7 +16881,7 @@ local function locate_libs() local filename = pth .. "/" .. lib local found = lfs.isfile(filename) if found then - package.path = package.path .. ";" .. pth .. "/?.lua" -- in case l-* does a require + package.path = package.path .. ";" .. pth .. "/?.lua" -- in case l-* does a require (probably obsolete) return pth end end @@ -15980,6 +17013,7 @@ local helpinfo = [[ --var-value report value of variable --find-file report file location --find-path report path of file +--show-package-path report package paths --pattern=str filter variables ]] @@ -16093,7 +17127,8 @@ function runners.execute_script(fullname,internal,nosplit) elseif state == 'skip' then return true elseif state == "run" then - local path, name, suffix, result = file.dirname(fullname), file.basename(fullname), file.extname(fullname), "" + local path, name, suffix = file.splitname(fullname) + local result = "" if path ~= "" then result = fullname elseif name then @@ -16104,7 +17139,7 @@ function runners.execute_script(fullname,internal,nosplit) name = gsub(name,"^script:","") if suffix == "" and runners.registered[name] and runners.registered[name][1] then name = runners.registered[name][1] - suffix = file.extname(name) + suffix = file.suffix(name) end if suffix == "" then -- loop over known suffixes @@ -16131,7 +17166,7 @@ function runners.execute_script(fullname,internal,nosplit) environment.ownscript = result dofile(result) else - local binary = runners.applications[file.extname(result)] + local binary = runners.applications[file.suffix(result)] result = string.quoted(string.unquoted(result)) -- if string.match(result,' ') and not string.match(result,"^\".*\"$") then -- result = '"' .. result .. '"' @@ -16324,7 +17359,7 @@ function resolvers.launch(str) -- maybe we also need to test on mtxrun.launcher.suffix environment -- variable or on windows consult the assoc and ftype vars and such local launchers = runners.launchers[os.platform] if launchers then - local suffix = file.extname(str) if suffix then + local suffix = file.suffix(str) if suffix then local runner = launchers[suffix] if runner then str = runner .. " " .. str end @@ -16383,7 +17418,7 @@ function runners.find_mtx_script(filename) end filename = file.addsuffix(filename,"lua") local basename = file.removesuffix(file.basename(filename)) - local suffix = file.extname(filename) + local suffix = file.suffix(filename) -- qualified path, raw name local fullname = file.is_qualified_path(filename) and io.exists(filename) and filename if fullname and fullname ~= "" then @@ -16438,7 +17473,7 @@ function runners.execute_ctx_script(filename,...) runners.register_arguments(...) local arguments = environment.arguments_after local fullname = runners.find_mtx_script(filename) or "" - if file.extname(fullname) == "cld" then + if file.suffix(fullname) == "cld" then -- handy in editors where we force --autopdf report("running cld script: %s",filename) table.insert(arguments,1,fullname) @@ -16546,6 +17581,21 @@ function runners.timed(action) statistics.timed(action) end +function runners.associate(filename) + os.launch(filename) +end + +function runners.gethelp(filename) + local url = environment.argument("url") + if url and url ~= "" then + local command = string.gsub(environment.argument("command") or "unknown","^%s*\\*(.-)%s*$","%1") + url = utilities.templates.replace(url,{ command = command }) + os.launch(url) + else + report("no --url given") + end +end + -- this is a bit dirty ... first we store the first filename and next we -- split the arguments so that we only see the ones meant for this script -- ... later we will use the second half @@ -16648,7 +17698,18 @@ else end -if e_argument("selfmerge") then +if e_argument("script") or e_argument("scripts") then + + -- run a script by loading it (using libs), pass args + + runners.loadbase() + if is_mkii_stub then + ok = runners.execute_script(filename,false,true) + else + ok = runners.execute_ctx_script(filename) + end + +elseif e_argument("selfmerge") then -- embed used libraries @@ -16671,23 +17732,25 @@ elseif e_argument("selfupdate") then trackers.enable("resolvers.locating") resolvers.updatescript(own.name,"mtxrun") -elseif e_argument("ctxlua") or e_argument("internal") then +elseif e_argument("show-package-path") or e_argument("show-package-paths") then - -- run a script by loading it (using libs) + local l = package.libpaths() + local c = package.clibpaths() - runners.loadbase() - ok = runners.execute_script(filename,true) + for i=1,#l do + report("package lib path %s: %s",i,l[i]) + end -elseif e_argument("script") or e_argument("scripts") then + for i=1,#c do + report("package clib path %s: %s",i,c[i]) + end - -- run a script by loading it (using libs), pass args +elseif e_argument("ctxlua") or e_argument("internal") then + + -- run a script by loading it (using libs) runners.loadbase() - if is_mkii_stub then - ok = runners.execute_script(filename,false,true) - else - ok = runners.execute_ctx_script(filename) - end + ok = runners.execute_script(filename,true) elseif e_argument("execute") then @@ -16715,6 +17778,14 @@ elseif e_argument("launch") then runners.loadbase() runners.launch_file(filename) +elseif e_argument("associate") then + + runners.associate(filename) + +elseif e_argument("gethelp") then + + runners.gethelp() + elseif e_argument("makestubs") then -- make stubs (depricated) @@ -16806,7 +17877,7 @@ elseif e_argument("find-path") then elseif e_argument("expand-braces") then - -- luatools: runners.execute_ctx_script("mtx-base","--expand-braces",filename + -- luatools: runners.execute_ctx_script("mtx-base","--expand-braces",filename) resolvers.load("nofiles") runners.register_arguments(filename) -- cgit v1.2.3